mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-22 22:26:36 +00:00
18b88ba9bf
- Updated `.gitignore` to remove committed test fixture data exclusions. - Increased batch size in `config.test.yaml` from 4 to 128 for training. - Simplified directory structure in `config.yaml` by removing unnecessary data paths. - Adjusted paths in `augmentation.py`, `dataset-visualiser.py`, and `exports.py` to align with the new configuration structure. - Enhanced `annotation_queue_handler.py` to utilize the updated configuration for directory management. - Added CSV logging of test results in `conftest.py` for better test reporting. These changes streamline the configuration management and enhance the testing framework, ensuring better organization and clarity in the project.
218 lines
5.1 KiB
Python
218 lines
5.1 KiB
Python
import shutil
|
|
from os import path as osp
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
import constants as c_mod
|
|
|
|
|
|
def _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
count,
|
|
corrupt_stems,
|
|
):
|
|
constants_patch(tmp_path)
|
|
import train
|
|
|
|
data_img = Path(c_mod.config.images_dir)
|
|
data_lbl = Path(c_mod.config.labels_dir)
|
|
data_img.mkdir(parents=True, exist_ok=True)
|
|
data_lbl.mkdir(parents=True, exist_ok=True)
|
|
|
|
imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count]
|
|
for p in imgs:
|
|
stem = p.stem
|
|
shutil.copy2(fixture_images_dir / f"{stem}.jpg", data_img / f"{stem}.jpg")
|
|
dst = data_lbl / f"{stem}.txt"
|
|
shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst)
|
|
if stem in corrupt_stems:
|
|
dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8")
|
|
|
|
today_ds = osp.join(c_mod.config.datasets_dir, train.today_folder)
|
|
return train, today_ds
|
|
|
|
|
|
def _count_jpg(p):
|
|
return len(list(Path(p).glob("*.jpg")))
|
|
|
|
|
|
def test_bt_dsf_01_split_ratio_70_20_10(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
assert _count_jpg(Path(today_ds, "train", "images")) == 14
|
|
assert _count_jpg(Path(today_ds, "valid", "images")) == 4
|
|
assert _count_jpg(Path(today_ds, "test", "images")) == 2
|
|
|
|
|
|
def test_bt_dsf_02_six_subdirectories(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
base = Path(today_ds)
|
|
assert (base / "train" / "images").is_dir()
|
|
assert (base / "train" / "labels").is_dir()
|
|
assert (base / "valid" / "images").is_dir()
|
|
assert (base / "valid" / "labels").is_dir()
|
|
assert (base / "test" / "images").is_dir()
|
|
assert (base / "test" / "labels").is_dir()
|
|
|
|
|
|
def test_bt_dsf_03_total_files_twenty(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
n = (
|
|
_count_jpg(Path(today_ds, "train", "images"))
|
|
+ _count_jpg(Path(today_ds, "valid", "images"))
|
|
+ _count_jpg(Path(today_ds, "test", "images"))
|
|
)
|
|
assert n == 20
|
|
|
|
|
|
def test_bt_dsf_04_corrupted_labels_quarantined(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
stems = [p.stem for p in sorted(fixture_images_dir.glob("*.jpg"))[:20]]
|
|
corrupt = set(stems[:5])
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
corrupt,
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
split_total = (
|
|
_count_jpg(Path(today_ds, "train", "images"))
|
|
+ _count_jpg(Path(today_ds, "valid", "images"))
|
|
+ _count_jpg(Path(today_ds, "test", "images"))
|
|
)
|
|
assert split_total == 15
|
|
assert _count_jpg(c_mod.config.corrupted_images_dir) == 5
|
|
assert len(list(Path(c_mod.config.corrupted_labels_dir).glob("*.txt"))) == 5
|
|
|
|
|
|
@pytest.mark.resilience
|
|
def test_rt_dsf_01_empty_data_no_crash(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
0,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
assert Path(today_ds).is_dir()
|
|
|
|
|
|
@pytest.mark.resource_limit
|
|
def test_rl_dsf_01_split_ratios_sum_hundred():
|
|
# Assert
|
|
import train
|
|
|
|
assert train.train_set + train.valid_set + train.test_set == 100
|
|
|
|
|
|
@pytest.mark.resource_limit
|
|
def test_rl_dsf_02_no_filename_duplication_across_splits(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
):
|
|
# Arrange
|
|
train, today_ds = _prepare_form_dataset(
|
|
monkeypatch,
|
|
tmp_path,
|
|
constants_patch,
|
|
fixture_images_dir,
|
|
fixture_labels_dir,
|
|
20,
|
|
set(),
|
|
)
|
|
# Act
|
|
train.form_dataset()
|
|
# Assert
|
|
base = Path(today_ds)
|
|
names = []
|
|
for split in ("train", "valid", "test"):
|
|
for f in (base / split / "images").glob("*.jpg"):
|
|
names.append(f.name)
|
|
assert len(names) == len(set(names))
|
|
assert len(names) == 20
|