mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-22 12:46:36 +00:00
a47fa135de
- Modified `.gitignore` to include test fixture data while excluding test results. - Updated `config.yaml` to change the model from 'yolo11m.yaml' to 'yolo26m.pt'. - Enhanced `.cursor/rules/coderule.mdc` with additional guidelines for test environment consistency and infrastructure handling. - Revised autopilot state management in `_docs/_autopilot_state.md` to reflect current progress and tasks. - Removed outdated augmentation tests and adjusted dataset formation tests to align with the new structure. These changes streamline the configuration and testing processes, ensuring better organization and clarity in the project.
218 lines
5.1 KiB
Python
218 lines
5.1 KiB
Python
import shutil
|
|
from os import path as osp
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
import constants as c_mod
|
|
|
|
|
|
def _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
count,
|
|
corrupt_stems,
|
|
):
|
|
constants_patch(tmp_path)
|
|
import train
|
|
|
|
proc_img = Path(c_mod.config.processed_images_dir)
|
|
proc_lbl = Path(c_mod.config.processed_labels_dir)
|
|
proc_img.mkdir(parents=True, exist_ok=True)
|
|
proc_lbl.mkdir(parents=True, exist_ok=True)
|
|
|
|
imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count]
|
|
for p in imgs:
|
|
stem = p.stem
|
|
shutil.copy2(fixture_images_dir / f"{stem}.jpg", proc_img / f"{stem}.jpg")
|
|
dst = proc_lbl / f"{stem}.txt"
|
|
shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst)
|
|
if stem in corrupt_stems:
|
|
dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8")
|
|
|
|
today_ds = osp.join(c_mod.config.datasets_dir, train.today_folder)
|
|
return train, today_ds
|
|
|
|
|
|
def _count_jpg(p):
|
|
return len(list(Path(p).glob("*.jpg")))
|
|
|
|
|
|
def test_bt_dsf_01_split_ratio_70_20_10(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
assert _count_jpg(Path(today_ds, "train", "images")) == 14
|
|
assert _count_jpg(Path(today_ds, "valid", "images")) == 4
|
|
assert _count_jpg(Path(today_ds, "test", "images")) == 2
|
|
|
|
|
|
def test_bt_dsf_02_six_subdirectories(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
base = Path(today_ds)
|
|
assert (base / "train" / "images").is_dir()
|
|
assert (base / "train" / "labels").is_dir()
|
|
assert (base / "valid" / "images").is_dir()
|
|
assert (base / "valid" / "labels").is_dir()
|
|
assert (base / "test" / "images").is_dir()
|
|
assert (base / "test" / "labels").is_dir()
|
|
|
|
|
|
def test_bt_dsf_03_total_files_twenty(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
n = (
|
|
_count_jpg(Path(today_ds, "train", "images"))
|
|
+ _count_jpg(Path(today_ds, "valid", "images"))
|
|
+ _count_jpg(Path(today_ds, "test", "images"))
|
|
)
|
|
assert n == 20
|
|
|
|
|
|
def test_bt_dsf_04_corrupted_labels_quarantined(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
stems = [p.stem for p in sorted(fixture_images_dir.glob("*.jpg"))[:20]]
|
|
corrupt = set(stems[:5])
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
corrupt,
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
split_total = (
|
|
_count_jpg(Path(today_ds, "train", "images"))
|
|
+ _count_jpg(Path(today_ds, "valid", "images"))
|
|
+ _count_jpg(Path(today_ds, "test", "images"))
|
|
)
|
|
assert split_total == 15
|
|
assert _count_jpg(c_mod.config.corrupted_images_dir) == 5
|
|
assert len(list(Path(c_mod.config.corrupted_labels_dir).glob("*.txt"))) == 5
|
|
|
|
|
|
@pytest.mark.resilience
|
|
def test_rt_dsf_01_empty_processed_no_crash(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
0,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
assert Path(today_ds).is_dir()
|
|
|
|
|
|
@pytest.mark.resource_limit
|
|
def test_rl_dsf_01_split_ratios_sum_hundred():
|
|
# Assert
|
|
import train
|
|
|
|
assert train.train_set + train.valid_set + train.test_set == 100
|
|
|
|
|
|
@pytest.mark.resource_limit
|
|
def test_rl_dsf_02_no_filename_duplication_across_splits(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
base = Path(today_ds)
|
|
names = []
|
|
for split in ("train", "valid", "test"):
|
|
for f in (base / split / "images").glob("*.jpg"):
|
|
names.append(f.name)
|
|
assert len(names) == len(set(names))
|
|
assert len(names) == 20
|