Refactor configuration and update test structure for improved clarity

- Updated `.gitignore` to remove committed test fixture data exclusions.
- Increased batch size in `config.test.yaml` from 4 to 128 for training.
- Simplified directory structure in `config.yaml` by removing unnecessary data paths.
- Adjusted paths in `augmentation.py`, `dataset-visualiser.py`, and `exports.py` to align with the new configuration structure.
- Enhanced `annotation_queue_handler.py` to utilize the updated configuration for directory management.
- Added CSV logging of test results in `conftest.py` for better test reporting.

These changes streamline the configuration management and enhance the testing framework, ensuring better organization and clarity in the project.
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-28 07:32:40 +02:00
parent a47fa135de
commit 18b88ba9bf
90 changed files with 140 additions and 141 deletions
-4
View File
@@ -25,7 +25,3 @@ venv
# Test results # Test results
tests/test-results/ tests/test-results/
# Test fixture data — override global ignores for committed test data
!tests/root/
!tests/root/**
+1 -1
View File
@@ -5,5 +5,5 @@ flow: existing-code
step: 7 step: 7
name: Refactor name: Refactor
status: in_progress status: in_progress
sub_step: 4 — Execution (Batch 1 done: AZ-165, AZ-166, AZ-167; next: Batch 2 AZ-168) sub_step: 4 — Execution (All batches done: AZ-165, AZ-166, AZ-167, AZ-168, AZ-169; pending: final test run, commit)
retry_count: 0 retry_count: 0
+1 -1
View File
@@ -1,7 +1,7 @@
training: training:
model: 'yolo11n.yaml' model: 'yolo11n.yaml'
epochs: 1 epochs: 1
batch: 4 batch: 64
imgsz: 320 imgsz: 320
save_period: 1 save_period: 1
workers: 0 workers: 0
+1 -3
View File
@@ -14,10 +14,8 @@ dirs:
root: '/azaion' root: '/azaion'
data: 'data' data: 'data'
data_seed: 'data-seed' data_seed: 'data-seed'
data_processed: 'data-processed'
data_deleted: 'data_deleted' data_deleted: 'data_deleted'
images: 'images'
labels: 'labels'
training: training:
model: 'yolo26m.pt' model: 'yolo26m.pt'
@@ -40,36 +40,26 @@ class AnnotationQueueHandler:
class AnnotationName: class AnnotationName:
def __init__(self, h, name): def __init__(self, h, name):
self.img_data = path.join(h.data_dir, h.images_dir, f"{name}{h.JPG_EXT}") self.img_data = path.join(h.cfg.images_dir, f"{name}{h.JPG_EXT}")
self.lbl_data = path.join(h.data_dir, h.labels_dir, f"{name}{h.TXT_EXT}") self.lbl_data = path.join(h.cfg.labels_dir, f"{name}{h.TXT_EXT}")
self.img_seed = path.join(h.data_seed_dir, h.images_dir, f"{name}{h.JPG_EXT}") self.img_seed = path.join(h.cfg.seed_images_dir, f"{name}{h.JPG_EXT}")
self.lbl_seed = path.join(h.data_seed_dir, h.labels_dir, f"{name}{h.TXT_EXT}") self.lbl_seed = path.join(h.cfg.seed_labels_dir, f"{name}{h.TXT_EXT}")
def __init__(self): def __init__(self):
cfg = constants.config self.cfg = constants.config
self.data_dir = path.join(cfg.dirs.root, cfg.dirs.data)
self.data_seed_dir = path.join(cfg.dirs.root, cfg.dirs.data_seed)
self.images_dir = cfg.dirs.images
self.labels_dir = cfg.dirs.labels
self.del_img_dir = path.join(cfg.dirs.root, cfg.dirs.data_deleted, self.images_dir) for d in (self.cfg.images_dir, self.cfg.labels_dir,
self.del_lbl_dir = path.join(cfg.dirs.root, cfg.dirs.data_deleted, self.labels_dir) self.cfg.seed_images_dir, self.cfg.seed_labels_dir,
self.cfg.del_images_dir, self.cfg.del_labels_dir):
makedirs(path.join(self.data_dir, self.images_dir), exist_ok=True) makedirs(d, exist_ok=True)
makedirs(path.join(self.data_dir, self.labels_dir), exist_ok=True)
makedirs(path.join(self.data_seed_dir, self.images_dir), exist_ok=True)
makedirs(path.join(self.data_seed_dir, self.labels_dir), exist_ok=True)
makedirs(self.del_img_dir, exist_ok=True)
makedirs(self.del_lbl_dir, exist_ok=True)
self.consumer = Consumer( self.consumer = Consumer(
host=cfg.queue.host, host=self.cfg.queue.host,
port=cfg.queue.port, port=self.cfg.queue.port,
username=cfg.queue.consumer_user, username=self.cfg.queue.consumer_user,
password=cfg.queue.consumer_pw password=self.cfg.queue.consumer_pw
) )
self.queue_name = cfg.queue.name self.queue_name = self.cfg.queue.name
try: try:
with open(self.OFFSET_FILE, 'r') as f: with open(self.OFFSET_FILE, 'r') as f:
@@ -154,14 +144,14 @@ class AnnotationQueueHandler:
for name in msg.annotation_names: for name in msg.annotation_names:
a = self.AnnotationName(self, name) a = self.AnnotationName(self, name)
if path.exists(a.img_data): if path.exists(a.img_data):
shutil.move(a.img_data, self.del_img_dir) shutil.move(a.img_data, self.cfg.del_images_dir)
if path.exists(a.img_seed): if path.exists(a.img_seed):
shutil.move(a.img_seed, self.del_img_dir) shutil.move(a.img_seed, self.cfg.del_images_dir)
if path.exists(a.lbl_data): if path.exists(a.lbl_data):
shutil.move(a.lbl_data, self.del_lbl_dir) shutil.move(a.lbl_data, self.cfg.del_labels_dir)
if path.exists(a.lbl_seed): if path.exists(a.lbl_seed):
shutil.move(a.lbl_seed, self.del_lbl_dir) shutil.move(a.lbl_seed, self.cfg.del_labels_dir)
if __name__ == '__main__': if __name__ == '__main__':
+3 -3
View File
@@ -95,8 +95,8 @@ class Augmentator:
def augment_annotation(self, image_file): def augment_annotation(self, image_file):
try: try:
image_path = os.path.join(constants.config.data_images_dir, image_file.name) image_path = os.path.join(constants.config.images_dir, image_file.name)
labels_path = os.path.join(constants.config.data_labels_dir, f'{Path(str(image_path)).stem}.txt') labels_path = os.path.join(constants.config.labels_dir, f'{Path(str(image_path)).stem}.txt')
image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED) image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
img_ann = ImageLabel( img_ann = ImageLabel(
@@ -134,7 +134,7 @@ class Augmentator:
processed_images = set(f.name for f in os.scandir(constants.config.processed_images_dir)) processed_images = set(f.name for f in os.scandir(constants.config.processed_images_dir))
images = [] images = []
with os.scandir(constants.config.data_images_dir) as imd: with os.scandir(constants.config.images_dir) as imd:
for image_file in imd: for image_file in imd:
if image_file.is_file() and image_file.name not in processed_images: if image_file.is_file() and image_file.name not in processed_images:
images.append(image_file) images.append(image_file)
+27 -15
View File
@@ -24,10 +24,7 @@ class DirsConfig(BaseModel):
root: str = '/azaion' root: str = '/azaion'
data: str = 'data' data: str = 'data'
data_seed: str = 'data-seed' data_seed: str = 'data-seed'
data_processed: str = 'data-processed'
data_deleted: str = 'data_deleted' data_deleted: str = 'data_deleted'
images: str = 'images'
labels: str = 'labels'
class TrainingConfig(BaseModel): class TrainingConfig(BaseModel):
@@ -60,24 +57,36 @@ class Config(BaseModel):
return path.join(self.dirs.root, self.dirs.data) return path.join(self.dirs.root, self.dirs.data)
@property @property
def data_images_dir(self) -> str: def images_dir(self) -> str:
return path.join(self.data_dir, self.dirs.images) return path.join(self.data_dir, IMAGES_DIR)
@property @property
def data_labels_dir(self) -> str: def labels_dir(self) -> str:
return path.join(self.data_dir, self.dirs.labels) return path.join(self.data_dir, LABELS_DIR)
@property @property
def processed_dir(self) -> str: def seed_dir(self) -> str:
return path.join(self.dirs.root, self.dirs.data_processed) return path.join(self.dirs.root, self.dirs.data_seed)
@property @property
def processed_images_dir(self) -> str: def seed_images_dir(self) -> str:
return path.join(self.processed_dir, self.dirs.images) return path.join(self.seed_dir, IMAGES_DIR)
@property @property
def processed_labels_dir(self) -> str: def seed_labels_dir(self) -> str:
return path.join(self.processed_dir, self.dirs.labels) return path.join(self.seed_dir, LABELS_DIR)
@property
def del_dir(self) -> str:
return path.join(self.dirs.root, self.dirs.data_deleted)
@property
def del_images_dir(self) -> str:
return path.join(self.del_dir, IMAGES_DIR)
@property
def del_labels_dir(self) -> str:
return path.join(self.del_dir, LABELS_DIR)
@property @property
def corrupted_dir(self) -> str: def corrupted_dir(self) -> str:
@@ -85,11 +94,11 @@ class Config(BaseModel):
@property @property
def corrupted_images_dir(self) -> str: def corrupted_images_dir(self) -> str:
return path.join(self.corrupted_dir, self.dirs.images) return path.join(self.corrupted_dir, IMAGES_DIR)
@property @property
def corrupted_labels_dir(self) -> str: def corrupted_labels_dir(self) -> str:
return path.join(self.corrupted_dir, self.dirs.labels) return path.join(self.corrupted_dir, LABELS_DIR)
@property @property
def sample_dir(self) -> str: def sample_dir(self) -> str:
@@ -130,6 +139,9 @@ checkpoint_date_format = '%Y-%m-%d %H:%M:%S'
CONFIG_FILE = 'config.yaml' CONFIG_FILE = 'config.yaml'
IMAGES_DIR = 'images'
LABELS_DIR = 'labels'
JPG_EXT = '.jpg' JPG_EXT = '.jpg'
TXT_EXT = '.txt' TXT_EXT = '.txt'
+3 -3
View File
@@ -33,8 +33,8 @@ def visualise_dataset():
def visualise_processed_folder(): def visualise_processed_folder():
def show_image(img): def show_image(img):
image_path = os.path.join(constants.config.processed_images_dir, img) image_path = os.path.join(constants.config.images_dir, img)
labels_path = os.path.join(constants.config.processed_labels_dir, f'{Path(img).stem}.txt') labels_path = os.path.join(constants.config.labels_dir, f'{Path(img).stem}.txt')
img = ImageLabel( img = ImageLabel(
image_path=image_path, image_path=image_path,
image=cv2.imread(image_path), image=cv2.imread(image_path),
@@ -42,7 +42,7 @@ def visualise_processed_folder():
labels=read_labels(labels_path) labels=read_labels(labels_path)
) )
img.visualize(annotation_classes) img.visualize(annotation_classes)
images = os.listdir(constants.config.processed_images_dir) images = os.listdir(constants.config.images_dir)
cur = 0 cur = 0
show_image(images[cur]) show_image(images[cur])
pass pass
+1 -1
View File
@@ -62,7 +62,7 @@ def export_tensorrt(model_path):
def form_data_sample(destination_path, size=500, write_txt_log=False): def form_data_sample(destination_path, size=500, write_txt_log=False):
images = [] images = []
with scandir(constants.config.processed_images_dir) as imd: with scandir(constants.config.images_dir) as imd:
for image_file in imd: for image_file in imd:
if not image_file.is_file(): if not image_file.is_file():
continue continue
+10 -4
View File
@@ -38,7 +38,7 @@ def form_dataset():
shutil.rmtree(today_dataset, ignore_errors=True) shutil.rmtree(today_dataset, ignore_errors=True)
makedirs(today_dataset) makedirs(today_dataset)
images = [] images = []
with scandir(constants.config.processed_images_dir) as imd: with scandir(constants.config.images_dir) as imd:
for image_file in imd: for image_file in imd:
if not image_file.is_file(): if not image_file.is_file():
continue continue
@@ -60,14 +60,20 @@ def copy_annotations(images, folder):
global total_files_copied global total_files_copied
total_files_copied = 0 total_files_copied = 0
def _link_or_copy(src, dst):
try:
os.link(src, dst)
except OSError:
shutil.copy(src, dst)
def copy_image(image): def copy_image(image):
global total_files_copied global total_files_copied
total_files_copied += 1 total_files_copied += 1
label_name = f'{Path(image.path).stem}.txt' label_name = f'{Path(image.path).stem}.txt'
label_path = path.join(constants.config.processed_labels_dir, label_name) label_path = path.join(constants.config.labels_dir, label_name)
if check_label(label_path): if check_label(label_path):
shutil.copy(image.path, path.join(destination_images, image.name)) _link_or_copy(image.path, path.join(destination_images, image.name))
shutil.copy(label_path, path.join(destination_labels, label_name)) _link_or_copy(label_path, path.join(destination_labels, label_name))
else: else:
shutil.copy(image.path, path.join(constants.config.corrupted_images_dir, image.name)) shutil.copy(image.path, path.join(constants.config.corrupted_images_dir, image.name))
shutil.copy(label_path, path.join(constants.config.corrupted_labels_dir, label_name)) shutil.copy(label_path, path.join(constants.config.corrupted_labels_dir, label_name))
+29
View File
@@ -1,3 +1,4 @@
import csv
import shutil import shutil
from pathlib import Path from pathlib import Path
@@ -14,6 +15,34 @@ _CONFIG_TEST = _PROJECT_ROOT / "config.test.yaml"
collect_ignore = ["security_test.py", "imagelabel_visualize_test.py"] collect_ignore = ["security_test.py", "imagelabel_visualize_test.py"]
_test_results = []
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
report = outcome.get_result()
if report.when == "call" or (report.when == "setup" and report.skipped):
_test_results.append({
"module": item.nodeid.rsplit("::", 1)[0],
"name": item.name,
"result": report.outcome.upper(),
"duration": round(report.duration, 3),
})
def pytest_sessionfinish(session, exitstatus):
if not _test_results:
return
results_dir = Path(__file__).resolve().parent / "test-results"
results_dir.mkdir(exist_ok=True)
with open(results_dir / "test-results.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["module", "test", "result", "duration_s"])
for r in _test_results:
writer.writerow([r["module"], r["name"], r["result"], f"{r['duration']:.3f}"])
def apply_constants_patch(monkeypatch, base: Path): def apply_constants_patch(monkeypatch, base: Path):
import constants as c import constants as c
+6 -6
View File
@@ -20,16 +20,16 @@ def _prepare_form_dataset(
constants_patch(tmp_path) constants_patch(tmp_path)
import train import train
proc_img = Path(c_mod.config.processed_images_dir) data_img = Path(c_mod.config.images_dir)
proc_lbl = Path(c_mod.config.processed_labels_dir) data_lbl = Path(c_mod.config.labels_dir)
proc_img.mkdir(parents=True, exist_ok=True) data_img.mkdir(parents=True, exist_ok=True)
proc_lbl.mkdir(parents=True, exist_ok=True) data_lbl.mkdir(parents=True, exist_ok=True)
imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count] imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count]
for p in imgs: for p in imgs:
stem = p.stem stem = p.stem
shutil.copy2(fixture_images_dir / f"{stem}.jpg", proc_img / f"{stem}.jpg") shutil.copy2(fixture_images_dir / f"{stem}.jpg", data_img / f"{stem}.jpg")
dst = proc_lbl / f"{stem}.txt" dst = data_lbl / f"{stem}.txt"
shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst) shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst)
if stem in corrupt_stems: if stem in corrupt_stems:
dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8") dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8")
Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 788 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 MiB

@@ -1 +0,0 @@
0 0.47200 0.78007 0.26215 0.42338
@@ -1,2 +0,0 @@
0 0.76062 0.31074 0.08738 0.13238
0 0.79600 0.20352 0.04985 0.08424
@@ -1,6 +0,0 @@
6 0.52646 0.69638 0.04738 0.18161
6 0.56554 0.69856 0.03077 0.16848
6 0.59908 0.69311 0.03754 0.17263
6 0.70000 0.74293 0.04738 0.17176
6 0.77046 0.69638 0.03077 0.14441
6 0.73538 0.70190 0.03077 0.14660
@@ -1,4 +0,0 @@
6 0.63569 0.46827 0.04185 0.13675
6 0.68492 0.38403 0.04800 0.13019
6 0.61569 0.36161 0.03877 0.10065
6 0.56708 0.44147 0.04985 0.12253
@@ -1 +0,0 @@
3 0.74738 0.58588 0.33415 0.49450
@@ -1 +0,0 @@
0 0.91200 0.50492 0.09846 0.08971
@@ -1 +0,0 @@
0 0.87846 0.50930 0.09785 0.07658
@@ -1 +0,0 @@
2 0.53169 0.44475 0.06523 0.12691
@@ -1 +0,0 @@
2 0.51323 0.45679 0.08369 0.08752
@@ -1 +0,0 @@
0 0.90154 0.37309 0.08246 0.10831
@@ -1,4 +0,0 @@
6 0.35723 0.48851 0.03015 0.12253
6 0.21415 0.44147 0.02954 0.11597
6 0.24277 0.44147 0.02769 0.11597
6 0.28892 0.44530 0.02769 0.08205
@@ -1 +0,0 @@
3 0.24338 0.33097 0.16308 0.21443
@@ -1 +0,0 @@
3 0.53200 0.18055 0.07815 0.08862
@@ -1 +0,0 @@
1 0.63538 0.20571 0.05354 0.11050
@@ -1 +0,0 @@
3 0.70092 0.15046 0.04431 0.06564
@@ -1 +0,0 @@
1 0.28431 0.81398 0.12677 0.33805
@@ -1,2 +0,0 @@
0 0.33108 0.75600 0.23262 0.35556
6 0.10862 0.51641 0.03385 0.09080
@@ -1 +0,0 @@
0 0.41138 0.64113 0.29108 0.21552
Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 788 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 MiB

@@ -1 +0,0 @@
0 0.47200 0.78007 0.26215 0.42338
@@ -1,2 +0,0 @@
0 0.76062 0.31074 0.08738 0.13238
0 0.79600 0.20352 0.04985 0.08424
@@ -1,6 +0,0 @@
6 0.52646 0.69638 0.04738 0.18161
6 0.56554 0.69856 0.03077 0.16848
6 0.59908 0.69311 0.03754 0.17263
6 0.70000 0.74293 0.04738 0.17176
6 0.77046 0.69638 0.03077 0.14441
6 0.73538 0.70190 0.03077 0.14660
@@ -1,4 +0,0 @@
6 0.63569 0.46827 0.04185 0.13675
6 0.68492 0.38403 0.04800 0.13019
6 0.61569 0.36161 0.03877 0.10065
6 0.56708 0.44147 0.04985 0.12253
@@ -1 +0,0 @@
3 0.74738 0.58588 0.33415 0.49450
@@ -1 +0,0 @@
0 0.91200 0.50492 0.09846 0.08971
@@ -1 +0,0 @@
0 0.87846 0.50930 0.09785 0.07658
@@ -1 +0,0 @@
2 0.53169 0.44475 0.06523 0.12691
@@ -1 +0,0 @@
2 0.51323 0.45679 0.08369 0.08752
@@ -1 +0,0 @@
0 0.90154 0.37309 0.08246 0.10831
@@ -1,4 +0,0 @@
6 0.35723 0.48851 0.03015 0.12253
6 0.21415 0.44147 0.02954 0.11597
6 0.24277 0.44147 0.02769 0.11597
6 0.28892 0.44530 0.02769 0.08205
@@ -1 +0,0 @@
3 0.24338 0.33097 0.16308 0.21443
@@ -1 +0,0 @@
3 0.53200 0.18055 0.07815 0.08862
@@ -1 +0,0 @@
1 0.63538 0.20571 0.05354 0.11050
@@ -1 +0,0 @@
3 0.70092 0.15046 0.04431 0.06564
@@ -1 +0,0 @@
1 0.28431 0.81398 0.12677 0.33805
@@ -1,2 +0,0 @@
0 0.33108 0.75600 0.23262 0.35556
6 0.10862 0.51641 0.03385 0.09080
@@ -1 +0,0 @@
0 0.41138 0.64113 0.29108 0.21552
+7 -7
View File
@@ -19,16 +19,16 @@ def _prepare_form_dataset(
constants_patch(tmp_path) constants_patch(tmp_path)
import train import train
proc_img = Path(c_mod.config.processed_images_dir) data_img = Path(c_mod.config.images_dir)
proc_lbl = Path(c_mod.config.processed_labels_dir) data_lbl = Path(c_mod.config.labels_dir)
proc_img.mkdir(parents=True, exist_ok=True) data_img.mkdir(parents=True, exist_ok=True)
proc_lbl.mkdir(parents=True, exist_ok=True) data_lbl.mkdir(parents=True, exist_ok=True)
imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count] imgs = sorted(fixture_images_dir.glob("*.jpg"))[:count]
for p in imgs: for p in imgs:
stem = p.stem stem = p.stem
shutil.copy2(fixture_images_dir / f"{stem}.jpg", proc_img / f"{stem}.jpg") shutil.copy2(fixture_images_dir / f"{stem}.jpg", data_img / f"{stem}.jpg")
dst = proc_lbl / f"{stem}.txt" dst = data_lbl / f"{stem}.txt"
shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst) shutil.copy2(fixture_labels_dir / f"{stem}.txt", dst)
if stem in corrupt_stems: if stem in corrupt_stems:
dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8") dst.write_text("0 1.5 0.5 0.1 0.1\n", encoding="utf-8")
@@ -156,7 +156,7 @@ def test_bt_dsf_04_corrupted_labels_quarantined(
@pytest.mark.resilience @pytest.mark.resilience
def test_rt_dsf_01_empty_processed_no_crash( def test_rt_dsf_01_empty_data_no_crash(
monkeypatch, monkeypatch,
tmp_path, tmp_path,
constants_patch, constants_patch,
+33 -1
View File
@@ -1,3 +1,4 @@
import os
import shutil import shutil
from os import path from os import path
from pathlib import Path from pathlib import Path
@@ -13,15 +14,42 @@ _TESTS_DIR = Path(__file__).resolve().parent
_TEST_ROOT = _TESTS_DIR / "root" _TEST_ROOT = _TESTS_DIR / "root"
_DATASET_IMAGES = _TEST_ROOT / "data" / "images" _DATASET_IMAGES = _TEST_ROOT / "data" / "images"
_CONFIG_TEST = _TESTS_DIR.parent / "config.test.yaml" _CONFIG_TEST = _TESTS_DIR.parent / "config.test.yaml"
_SOURCE_DATASET = _TESTS_DIR.parent / "_docs" / "00_problem" / "input_data" / "dataset"
def _hardlink_tree(src_dir: Path, dst_dir: Path):
dst_dir.mkdir(parents=True, exist_ok=True)
for f in src_dir.iterdir():
if f.is_file():
target = dst_dir / f.name
if not target.exists():
os.link(f, target)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def e2e_result(): def e2e_result():
# Arrange
src_images = _SOURCE_DATASET / "images"
src_labels = _SOURCE_DATASET / "labels"
if not src_images.is_dir() or not src_labels.is_dir():
pytest.skip("source dataset not found")
old_config = c.config old_config = c.config
c.config = c.Config.from_yaml(str(_CONFIG_TEST), root=str(_TEST_ROOT)) c.config = c.Config.from_yaml(str(_CONFIG_TEST), root=str(_TEST_ROOT))
dst_images = Path(c.config.images_dir)
dst_labels = Path(c.config.labels_dir)
for d in (dst_images, dst_labels, c.config.datasets_dir, c.config.models_dir, c.config.corrupted_dir):
shutil.rmtree(str(d), ignore_errors=True)
_hardlink_tree(src_images, dst_images)
_hardlink_tree(src_labels, dst_labels)
linked_count = len(list(dst_images.glob("*.jpg")))
Path(c.config.models_dir).mkdir(parents=True, exist_ok=True) Path(c.config.models_dir).mkdir(parents=True, exist_ok=True)
# Act
train_mod.train_dataset() train_mod.train_dataset()
exports_mod.export_onnx(c.config.current_pt_model) exports_mod.export_onnx(c.config.current_pt_model)
@@ -31,8 +59,11 @@ def e2e_result():
yield { yield {
"today_dataset": today_ds, "today_dataset": today_ds,
"linked_count": linked_count,
} }
shutil.rmtree(str(dst_images), ignore_errors=True)
shutil.rmtree(str(dst_labels), ignore_errors=True)
shutil.rmtree(c.config.datasets_dir, ignore_errors=True) shutil.rmtree(c.config.datasets_dir, ignore_errors=True)
shutil.rmtree(c.config.models_dir, ignore_errors=True) shutil.rmtree(c.config.models_dir, ignore_errors=True)
shutil.rmtree(c.config.corrupted_dir, ignore_errors=True) shutil.rmtree(c.config.corrupted_dir, ignore_errors=True)
@@ -42,6 +73,7 @@ def e2e_result():
@pytest.mark.e2e @pytest.mark.e2e
class TestTrainingPipeline: class TestTrainingPipeline:
def test_dataset_formed(self, e2e_result): def test_dataset_formed(self, e2e_result):
# Assert
base = Path(e2e_result["today_dataset"]) base = Path(e2e_result["today_dataset"])
for split in ("train", "valid", "test"): for split in ("train", "valid", "test"):
assert (base / split / "images").is_dir() assert (base / split / "images").is_dir()
@@ -50,7 +82,7 @@ class TestTrainingPipeline:
len(list((base / s / "images").glob("*.jpg"))) len(list((base / s / "images").glob("*.jpg")))
for s in ("train", "valid", "test") for s in ("train", "valid", "test")
) )
assert total == 20 assert 0 < total <= e2e_result["linked_count"]
def test_data_yaml_created(self, e2e_result): def test_data_yaml_created(self, e2e_result):
yaml_path = Path(e2e_result["today_dataset"]) / "data.yaml" yaml_path = Path(e2e_result["today_dataset"]) / "data.yaml"