mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-22 10:56:36 +00:00
142c6c4de8
- Replaced module-level path variables in constants.py with a structured Pydantic Config class. - Updated all relevant modules (train.py, augmentation.py, exports.py, dataset-visualiser.py, manual_run.py) to access paths through the new config structure. - Fixed bugs related to image processing and model saving. - Enhanced test infrastructure to accommodate the new configuration approach. This refactor improves code maintainability and clarity by centralizing configuration management.
235 lines
5.6 KiB
Python
235 lines
5.6 KiB
Python
import shutil
|
|
import sys
|
|
import types
|
|
from os import path as osp
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
import constants as c_mod
|
|
|
|
|
|
def _stub_train_dependencies():
|
|
if getattr(_stub_train_dependencies, "_done", False):
|
|
return
|
|
|
|
def add_mod(name):
|
|
if name in sys.modules:
|
|
return sys.modules[name]
|
|
m = types.ModuleType(name)
|
|
sys.modules[name] = m
|
|
return m
|
|
|
|
ultra = add_mod("ultralytics")
|
|
|
|
class YOLO:
|
|
pass
|
|
|
|
ultra.YOLO = YOLO
|
|
|
|
def fake_client(*_a, **_k):
|
|
return types.SimpleNamespace(
|
|
upload_fileobj=lambda *_a, **_k: None,
|
|
download_file=lambda *_a, **_k: None,
|
|
)
|
|
|
|
boto = add_mod("boto3")
|
|
boto.client = fake_client
|
|
add_mod("netron")
|
|
add_mod("requests")
|
|
_stub_train_dependencies._done = True
|
|
|
|
|
|
_stub_train_dependencies()
|
|
|
|
|
|
def _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
count,
|
|
corrupt_stems,
|
|
):
|
|
constants_patch(tmp_path)
|
|
import train
|
|
|
|
proc_img = Path(c_mod.config.processed_images_dir)
|
|
proc_lbl = Path(c_mod.config.processed_labels_dir)
|
|
proc_img.mkdir(parents=True, exist_ok=True)
|
|
proc_lbl.mkdir(parents=True, exist_ok=True)
|
|
|
|
imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count]
|
|
for p in imgs:
|
|
stem = p.stem
|
|
shutil.copy2(fixture_images_dir / f"{stem}.jpg", proc_img / f"{stem}.jpg")
|
|
dst = proc_lbl / f"{stem}.txt"
|
|
shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst)
|
|
if stem in corrupt_stems:
|
|
dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8")
|
|
|
|
today_ds = osp.join(c_mod.config.datasets_dir, train.today_folder)
|
|
return train, today_ds
|
|
|
|
|
|
def _count_jpg(p):
|
|
return len(list(Path(p).glob("*.jpg")))
|
|
|
|
|
|
def test_bt_dsf_01_split_ratio_70_20_10(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
set(),
|
|
)
|
|
train.form_dataset()
|
|
assert _count_jpg(Path(today_ds, "train", "images")) == 70
|
|
assert _count_jpg(Path(today_ds, "valid", "images")) == 20
|
|
assert _count_jpg(Path(today_ds, "test", "images")) == 10
|
|
|
|
|
|
def test_bt_dsf_02_six_subdirectories(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
set(),
|
|
)
|
|
train.form_dataset()
|
|
base = Path(today_ds)
|
|
assert (base / "train" / "images").is_dir()
|
|
assert (base / "train" / "labels").is_dir()
|
|
assert (base / "valid" / "images").is_dir()
|
|
assert (base / "valid" / "labels").is_dir()
|
|
assert (base / "test" / "images").is_dir()
|
|
assert (base / "test" / "labels").is_dir()
|
|
|
|
|
|
def test_bt_dsf_03_total_files_one_hundred(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
set(),
|
|
)
|
|
train.form_dataset()
|
|
n = (
|
|
_count_jpg(Path(today_ds, "train", "images"))
|
|
+ _count_jpg(Path(today_ds, "valid", "images"))
|
|
+ _count_jpg(Path(today_ds, "test", "images"))
|
|
)
|
|
assert n == 100
|
|
|
|
|
|
def test_bt_dsf_04_corrupted_labels_quarantined(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
stems = [p.stem for p in sorted(fixture_images_dir.glob("*.jpg"))[:100]]
|
|
corrupt = set(stems[:5])
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
corrupt,
|
|
)
|
|
train.form_dataset()
|
|
split_total = (
|
|
_count_jpg(Path(today_ds, "train", "images"))
|
|
+ _count_jpg(Path(today_ds, "valid", "images"))
|
|
+ _count_jpg(Path(today_ds, "test", "images"))
|
|
)
|
|
assert split_total == 95
|
|
assert _count_jpg(c_mod.config.corrupted_images_dir) == 5
|
|
assert len(list(Path(c_mod.config.corrupted_labels_dir).glob("*.txt"))) == 5
|
|
|
|
|
|
@pytest.mark.resilience
|
|
def test_rt_dsf_01_empty_processed_no_crash(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
0,
|
|
set(),
|
|
)
|
|
train.form_dataset()
|
|
assert Path(today_ds).is_dir()
|
|
|
|
|
|
@pytest.mark.resource_limit
|
|
def test_rl_dsf_01_split_ratios_sum_hundred():
|
|
import train
|
|
|
|
assert train.train_set + train.valid_set + train.test_set == 100
|
|
|
|
|
|
@pytest.mark.resource_limit
|
|
def test_rl_dsf_02_no_filename_duplication_across_splits(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
set(),
|
|
)
|
|
train.form_dataset()
|
|
base = Path(today_ds)
|
|
names = []
|
|
for split in ("train", "valid", "test"):
|
|
for f in (base / split / "images").glob("*.jpg"):
|
|
names.append(f.name)
|
|
assert len(names) == len(set(names))
|
|
assert len(names) == 100
|