[AZ-153] [AZ-155] [AZ-156] [AZ-158] Add augmentation, dataset formation, label validation, model split tests

Made-with: Cursor
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-26 23:18:17 +02:00
parent 66fe1cc918
commit 41552c5699
7 changed files with 690 additions and 0 deletions
+103
View File
@@ -0,0 +1,103 @@
import shutil
import sys
import time
import types
from os import path as osp
from pathlib import Path
import pytest
import constants as c_mod
def _stub_train_dependencies():
if getattr(_stub_train_dependencies, "_done", False):
return
def add_mod(name):
if name in sys.modules:
return sys.modules[name]
m = types.ModuleType(name)
sys.modules[name] = m
return m
ultra = add_mod("ultralytics")
class YOLO:
pass
ultra.YOLO = YOLO
def fake_client(*_a, **_k):
return types.SimpleNamespace(
upload_fileobj=lambda *_a, **_k: None,
download_file=lambda *_a, **_k: None,
)
boto = add_mod("boto3")
boto.client = fake_client
add_mod("netron")
add_mod("requests")
_stub_train_dependencies._done = True
_stub_train_dependencies()
def _prepare_form_dataset(
monkeypatch,
tmp_path,
constants_patch,
fixture_images_dir,
fixture_labels_dir,
count,
corrupt_stems,
):
constants_patch(tmp_path)
import train
proc_img = Path(c_mod.processed_images_dir)
proc_lbl = Path(c_mod.processed_labels_dir)
proc_img.mkdir(parents=True, exist_ok=True)
proc_lbl.mkdir(parents=True, exist_ok=True)
imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count]
for p in imgs:
stem = p.stem
shutil.copy2(fixture_images_dir / f"{stem}.jpg", proc_img / f"{stem}.jpg")
dst = proc_lbl / f"{stem}.txt"
shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst)
if stem in corrupt_stems:
dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8")
today_ds = osp.join(c_mod.datasets_dir, train.today_folder)
monkeypatch.setattr(train, "today_dataset", today_ds)
monkeypatch.setattr(train, "processed_images_dir", c_mod.processed_images_dir)
monkeypatch.setattr(train, "processed_labels_dir", c_mod.processed_labels_dir)
monkeypatch.setattr(train, "corrupted_images_dir", c_mod.corrupted_images_dir)
monkeypatch.setattr(train, "corrupted_labels_dir", c_mod.corrupted_labels_dir)
monkeypatch.setattr(train, "datasets_dir", c_mod.datasets_dir)
return train
@pytest.mark.performance
def test_pt_dsf_01_dataset_formation_under_thirty_seconds(
monkeypatch,
tmp_path,
constants_patch,
fixture_images_dir,
fixture_labels_dir,
):
train = _prepare_form_dataset(
monkeypatch,
tmp_path,
constants_patch,
fixture_images_dir,
fixture_labels_dir,
100,
set(),
)
t0 = time.perf_counter()
train.form_dataset()
elapsed = time.perf_counter() - t0
assert elapsed <= 30.0