mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-22 10:36:35 +00:00
243b69656b
- Modified `.gitignore` to reflect the new path for test results. - Updated `docker-compose.test.yml` to mount the correct test results directory. - Adjusted `Dockerfile.test` to set the `PYTHONPATH` and ensure test results are saved in the updated location. - Added `boto3` and `netron` to `requirements-test.txt` to support new functionalities. - Updated `pytest.ini` to include the new `pythonpath` for test discovery. These changes streamline the testing process and ensure compatibility with the updated directory structure.
218 lines
5.1 KiB
Python
218 lines
5.1 KiB
Python
import shutil
|
|
from os import path as osp
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
import constants as c_mod
|
|
|
|
|
|
def _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
count,
|
|
corrupt_stems,
|
|
):
|
|
constants_patch(tmp_path)
|
|
import train
|
|
|
|
proc_img = Path(c_mod.config.processed_images_dir)
|
|
proc_lbl = Path(c_mod.config.processed_labels_dir)
|
|
proc_img.mkdir(parents=True, exist_ok=True)
|
|
proc_lbl.mkdir(parents=True, exist_ok=True)
|
|
|
|
imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count]
|
|
for p in imgs:
|
|
stem = p.stem
|
|
shutil.copy2(fixture_images_dir / f"{stem}.jpg", proc_img / f"{stem}.jpg")
|
|
dst = proc_lbl / f"{stem}.txt"
|
|
shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst)
|
|
if stem in corrupt_stems:
|
|
dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8")
|
|
|
|
today_ds = osp.join(c_mod.config.datasets_dir, train.today_folder)
|
|
return train, today_ds
|
|
|
|
|
|
def _count_jpg(p):
|
|
return len(list(Path(p).glob("*.jpg")))
|
|
|
|
|
|
def test_bt_dsf_01_split_ratio_70_20_10(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
assert _count_jpg(Path(today_ds, "train", "images")) == 70
|
|
assert _count_jpg(Path(today_ds, "valid", "images")) == 20
|
|
assert _count_jpg(Path(today_ds, "test", "images")) == 10
|
|
|
|
|
|
def test_bt_dsf_02_six_subdirectories(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
base = Path(today_ds)
|
|
assert (base / "train" / "images").is_dir()
|
|
assert (base / "train" / "labels").is_dir()
|
|
assert (base / "valid" / "images").is_dir()
|
|
assert (base / "valid" / "labels").is_dir()
|
|
assert (base / "test" / "images").is_dir()
|
|
assert (base / "test" / "labels").is_dir()
|
|
|
|
|
|
def test_bt_dsf_03_total_files_one_hundred(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
n = (
|
|
_count_jpg(Path(today_ds, "train", "images"))
|
|
+ _count_jpg(Path(today_ds, "valid", "images"))
|
|
+ _count_jpg(Path(today_ds, "test", "images"))
|
|
)
|
|
assert n == 100
|
|
|
|
|
|
def test_bt_dsf_04_corrupted_labels_quarantined(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
stems = [p.stem for p in sorted(fixture_images_dir.glob("*.jpg"))[:100]]
|
|
corrupt = set(stems[:5])
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
corrupt,
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
split_total = (
|
|
_count_jpg(Path(today_ds, "train", "images"))
|
|
+ _count_jpg(Path(today_ds, "valid", "images"))
|
|
+ _count_jpg(Path(today_ds, "test", "images"))
|
|
)
|
|
assert split_total == 95
|
|
assert _count_jpg(c_mod.config.corrupted_images_dir) == 5
|
|
assert len(list(Path(c_mod.config.corrupted_labels_dir).glob("*.txt"))) == 5
|
|
|
|
|
|
@pytest.mark.resilience
|
|
def test_rt_dsf_01_empty_processed_no_crash(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
0,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
assert Path(today_ds).is_dir()
|
|
|
|
|
|
@pytest.mark.resource_limit
|
|
def test_rl_dsf_01_split_ratios_sum_hundred():
|
|
# Assert
|
|
import train
|
|
|
|
assert train.train_set + train.valid_set + train.test_set == 100
|
|
|
|
|
|
@pytest.mark.resource_limit
|
|
def test_rl_dsf_02_no_filename_duplication_across_splits(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
100,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
base = Path(today_ds)
|
|
names = []
|
|
for split in ("train", "valid", "test"):
|
|
for f in (base / split / "images").glob("*.jpg"):
|
|
names.append(f.name)
|
|
assert len(names) == len(set(names))
|
|
assert len(names) == 100
|