diff --git a/.cursor/rules/coderule.mdc b/.cursor/rules/coderule.mdc index e7fe6f0..c333ec6 100644 --- a/.cursor/rules/coderule.mdc +++ b/.cursor/rules/coderule.mdc @@ -11,8 +11,11 @@ alwaysApply: true - Write code that takes into account the different environments: development, production - You are careful to make changes that are requested or you are confident the changes are well understood and related to the change being requested - Mocking data is needed only for tests, never mock data for dev or prod env +- Make test environment (files, db and so on) as close as possible to the production environment - When you add new libraries or dependencies make sure you are using the same version of it as other parts of the code - When a test fails due to a missing dependency, install it — do not fake or stub the module system. For normal packages, add them to the project's dependency file (requirements-test.txt, package.json devDependencies, test csproj, etc.) and install. Only consider stubbing if the dependency is heavy (e.g. hardware-specific SDK, large native toolchain) — and even then, ask the user first before choosing to stub. +- Do not solve environment or infrastructure problems (dependency resolution, import paths, service discovery, connection config) by hardcoding workarounds in source code. Fix them at the environment/configuration level. +- Before writing new infrastructure or workaround code, check how the existing codebase already handles the same concern. Follow established project patterns. - Focus on the areas of code relevant to the task - Do not touch code that is unrelated to the task diff --git a/.cursor/rules/cursor-meta.mdc b/.cursor/rules/cursor-meta.mdc index 94cc6c5..8cc663a 100644 --- a/.cursor/rules/cursor-meta.mdc +++ b/.cursor/rules/cursor-meta.mdc @@ -17,11 +17,5 @@ globs: [".cursor/**"] ## Agent Files (.cursor/agents/) - Must have `name` and `description` in frontmatter -## User Interaction -- Use the AskQuestion tool for structured choices (A/B/C/D) when available — it provides an interactive UI. Fall back to plain-text questions if the tool is unavailable. - -## Execution Safety -- Never run test suites, builds, Docker commands, or other long-running/resource-heavy/security-risky operations without asking the user first - unlsess it is explicilty stated in skill or agent, or user already asked to do so. - ## Security - All `.cursor/` files must be scanned for hidden Unicode before committing (see cursor-security.mdc) diff --git a/.cursor/rules/meta-rule.mdc b/.cursor/rules/meta-rule.mdc new file mode 100644 index 0000000..b7348e5 --- /dev/null +++ b/.cursor/rules/meta-rule.mdc @@ -0,0 +1,30 @@ +--- +description: "Execution safety, user interaction, and self-improvement protocols for the AI agent" +alwaysApply: true +--- +# Agent Meta Rules + +## Execution Safety +- Never run test suites, builds, Docker commands, or other long-running/resource-heavy/security-risky operations without asking the user first — unless it is explicitly stated in a skill or agent, or the user already asked to do so. + +## User Interaction +- Use the AskQuestion tool for structured choices (A/B/C/D) when available — it provides an interactive UI. Fall back to plain-text questions if the tool is unavailable. + +## Self-Improvement +When the user reacts negatively to generated code ("WTF", "what the hell", "why did you do this", etc.): + +1. **Pause** — do not rush to fix. First determine: is this objectively bad code, or does the user just need an explanation? +2. **If the user doesn't understand** — explain the reasoning. That's it. No code change needed. +3. **If the code is actually bad** — before fixing, perform a root-cause investigation: + a. **Why** did this bad code get produced? Identify the reasoning chain or implicit assumption that led to it. + b. **Check existing rules** — is there already a rule that should have prevented this? If so, clarify or strengthen it. + c. **Propose a new rule** if no existing rule covers the failure mode. Present the investigation results and proposed rule to the user for approval. + d. **Only then** fix the code. +4. The rule goes into `coderule.mdc` for coding practices, `meta-rule.mdc` for agent behavior, or a new focused rule file — depending on context. Always check for duplicates or near-duplicates first. + +### Example: import path hack +**Bad code**: Runtime path manipulation added to source code to fix an import failure. +**Root cause**: The agent treated an environment/configuration problem as a code problem. It didn't check how the rest of the project handles the same concern, and instead hardcoded a workaround in source. +**Preventive rules added to coderule.mdc**: +- "Do not solve environment or infrastructure problems by hardcoding workarounds in source code. Fix them at the environment/configuration level." +- "Before writing new infrastructure or workaround code, check how the existing codebase already handles the same concern. Follow established project patterns." diff --git a/.cursor/rules/tracker.mdc b/.cursor/rules/tracker.mdc new file mode 100644 index 0000000..285333c --- /dev/null +++ b/.cursor/rules/tracker.mdc @@ -0,0 +1,9 @@ +# Work Item Tracker + +- Use **Jira** as the sole work item tracker (MCP server: `user-Jira-MCP-Server`) +- Do NOT use Azure DevOps for work item management +- Jira cloud ID: `denyspopov.atlassian.net` +- Project key: `AZ` +- Project name: AZAION +- All task IDs follow the format `AZ-` +- Issue types: Epic, Story, Task, Bug, Subtask diff --git a/.cursor/skills/autopilot/state.md b/.cursor/skills/autopilot/state.md index 022ecda..33dd76f 100644 --- a/.cursor/skills/autopilot/state.md +++ b/.cursor/skills/autopilot/state.md @@ -41,7 +41,7 @@ retry_count: 3 ### State File Rules 1. **Create** on the first autopilot invocation (after state detection determines Step 1) -2. **Update** after every step completion, session boundary, or failed retry +2. **Update** after every change — this includes: batch completion, sub-step progress, step completion, session boundary, failed retry, or any meaningful state transition. The state file must always reflect the current reality. 3. **Read** as the first action on every invocation — before folder scanning 4. **Cross-check**: verify against actual `_docs/` folder contents. If they disagree, trust the folder structure and update the state file 5. **Never delete** the state file diff --git a/.cursor/skills/implement/SKILL.md b/.cursor/skills/implement/SKILL.md index 1039d01..e6e62a2 100644 --- a/.cursor/skills/implement/SKILL.md +++ b/.cursor/skills/implement/SKILL.md @@ -128,31 +128,31 @@ Auto-fix loop with bounded retries (max 2 attempts) before escalating to user: Track `auto_fix_attempts` count in the batch report for retrospective analysis. -### 10. Test - -- Read and execute `.cursor/skills/test-run/SKILL.md` (detect runner, run suite, diagnose failures, present blocking choices) -- Test failures are a **blocking gate** — do not proceed to commit until the test-run skill completes with a user decision -- Note: the autopilot also runs a separate full test suite after all implementation batches complete (greenfield Step 7, existing-code Steps 6/10). This is intentional — per-batch tests are regression checks, the post-implement run is final validation. - -### 11. Commit and Push +### 10. Commit and Push - After user confirms the batch (explicitly for FAIL, implicitly for PASS/PASS_WITH_WARNINGS): - `git add` all changed files from the batch - `git commit` with a message that includes ALL task IDs (tracker IDs or numeric prefixes) of tasks implemented in the batch, followed by a summary of what was implemented. Format: `[TASK-ID-1] [TASK-ID-2] ... Summary of changes` - `git push` to the remote branch -### 12. Update Tracker Status → In Testing +### 11. Update Tracker Status → In Testing After the batch is committed and pushed, transition the ticket status of each task in the batch to **In Testing** via the configured work item tracker. If `tracker: local`, skip this step. -### 13. Archive Completed Tasks +### 12. Archive Completed Tasks Move each completed task file from `TASKS_DIR/todo/` to `TASKS_DIR/done/`. -### 14. Loop +### 13. Loop - Go back to step 2 until all tasks in `todo/` are done -- When all tasks are complete, report final summary + +### 14. Final Test Run + +- After all batches are complete, run the full test suite once +- Read and execute `.cursor/skills/test-run/SKILL.md` (detect runner, run suite, diagnose failures, present blocking choices) +- Test failures are a **blocking gate** — do not proceed until the test-run skill completes with a user decision +- When tests pass, report final summary ## Batch Report Persistence @@ -195,7 +195,7 @@ After each batch, produce a structured report: | Implementer fails same approach 3+ times | Stop it, escalate to user | | Task blocked on external dependency (not in task list) | Report and skip | | File ownership conflict unresolvable | ASK user | -| Any test failure after a batch | Delegate to test-run skill — blocking gate | +| Test failure after final test run | Delegate to test-run skill — blocking gate | | All tasks complete | Report final summary, suggest final commit | | `_dependencies_table.md` missing | STOP — run `/decompose` first | @@ -203,7 +203,7 @@ After each batch, produce a structured report: Each batch commit serves as a rollback checkpoint. If recovery is needed: -- **Tests fail after a batch commit**: `git revert ` using the hash from the batch report in `_docs/03_implementation/` +- **Tests fail after final test run**: `git revert ` using hashes from the batch reports in `_docs/03_implementation/` - **Resuming after interruption**: Read `_docs/03_implementation/batch_*_report.md` files to determine which batches completed, then continue from the next batch - **Multiple consecutive batches fail**: Stop and escalate to user with links to batch reports and commit hashes @@ -212,4 +212,4 @@ Each batch commit serves as a rollback checkpoint. If recovery is needed: - Never launch tasks whose dependencies are not yet completed - Never allow two parallel agents to write to the same file - If a subagent fails or is flagged as stuck, stop it and report — do not let it loop indefinitely -- Always run tests after each batch completes +- Always run the full test suite after all batches complete (step 14) diff --git a/.gitignore b/.gitignore index 02f0c7a..717cf1f 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,8 @@ venv *.png # Test results -tests/test-results/ \ No newline at end of file +tests/test-results/ + +# Test fixture data — override global ignores for committed test data +!tests/root/ +!tests/root/** \ No newline at end of file diff --git a/_docs/02_document/tests/blackbox-tests.md b/_docs/02_document/tests/blackbox-tests.md index 6a78f16..172a8bf 100644 --- a/_docs/02_document/tests/blackbox-tests.md +++ b/_docs/02_document/tests/blackbox-tests.md @@ -1,61 +1,9 @@ # Blackbox Test Scenarios -## BT-AUG: Augmentation Pipeline - -### BT-AUG-01: Single image produces 8 outputs -- **Input**: 1 image + 1 valid label from fixture dataset -- **Action**: Run `Augmentator.augment_inner()` on the image -- **Expected**: Returns list of exactly 8 ImageLabel objects -- **Traces**: AC: 8× augmentation ratio - -### BT-AUG-02: Augmented filenames follow naming convention -- **Input**: Image with stem "test_image" -- **Action**: Run `augment_inner()` -- **Expected**: Output filenames: `test_image.jpg`, `test_image_1.jpg` through `test_image_7.jpg`; matching `.txt` labels -- **Traces**: AC: Augmentation output format - -### BT-AUG-03: All output bounding boxes in valid range -- **Input**: 1 image + label with multiple bboxes -- **Action**: Run `augment_inner()` -- **Expected**: Every bbox coordinate in every output label is in [0.0, 1.0] -- **Traces**: AC: Bounding boxes clipped to [0, 1] - -### BT-AUG-04: Bounding box correction clips edge bboxes -- **Input**: Label with bbox near edge: `0 0.99 0.5 0.2 0.1` -- **Action**: Run `correct_bboxes()` -- **Expected**: Width reduced so bbox fits within [margin, 1-margin]; no coordinate exceeds bounds -- **Traces**: AC: Bounding boxes clipped to [0, 1] - -### BT-AUG-05: Tiny bounding boxes removed after correction -- **Input**: Label with tiny bbox that becomes < 0.01 after clipping -- **Action**: Run `correct_bboxes()` -- **Expected**: Bbox removed from output (area < correct_min_bbox_size) -- **Traces**: AC: Bounding boxes with area < 0.01% discarded - -### BT-AUG-06: Empty label produces 8 outputs with empty labels -- **Input**: 1 image + empty label file -- **Action**: Run `augment_inner()` -- **Expected**: 8 ImageLabel objects returned; all have empty labels lists -- **Traces**: AC: Augmentation handles empty annotations - -### BT-AUG-07: Full augmentation pipeline (filesystem integration) -- **Input**: 5 images + labels copied to data/ directory in tmp_path -- **Action**: Run `augment_annotations()` with patched paths -- **Expected**: 40 images (5 × 8) in processed images dir; 40 matching labels in processed labels dir -- **Traces**: AC: 8× augmentation, filesystem output - -### BT-AUG-08: Augmentation skips already-processed images -- **Input**: 5 images in data/; 3 already present in processed/ dir -- **Action**: Run `augment_annotations()` -- **Expected**: Only 2 new images processed (16 new outputs); existing 3 untouched -- **Traces**: AC: Augmentation processes only unprocessed images - ---- - ## BT-DSF: Dataset Formation ### BT-DSF-01: 70/20/10 split ratio -- **Input**: 100 images + labels in processed/ dir +- **Input**: 100 images + labels in data/ dir - **Action**: Run `form_dataset()` with patched paths - **Expected**: train: 70 images+labels, valid: 20, test: 10 - **Traces**: AC: Dataset split 70/20/10 diff --git a/_docs/02_document/tests/performance-tests.md b/_docs/02_document/tests/performance-tests.md index 9c75a48..cef73f3 100644 --- a/_docs/02_document/tests/performance-tests.md +++ b/_docs/02_document/tests/performance-tests.md @@ -1,18 +1,5 @@ # Performance Test Scenarios -## PT-AUG-01: Augmentation throughput -- **Input**: 10 images from fixture dataset -- **Action**: Run `augment_annotations()`, measure wall time -- **Expected**: Completes within 60 seconds (10 images × 8 outputs = 80 files) -- **Traces**: Restriction: Augmentation runs continuously -- **Note**: Threshold is generous; actual performance depends on CPU - -## PT-AUG-02: Parallel augmentation speedup -- **Input**: 10 images from fixture dataset -- **Action**: Run with ThreadPoolExecutor vs sequential, compare times -- **Expected**: Parallel is ≥ 1.5× faster than sequential -- **Traces**: AC: Parallelized per-image processing - ## PT-DSF-01: Dataset formation throughput - **Input**: 100 images + labels - **Action**: Run `form_dataset()`, measure wall time diff --git a/_docs/02_document/tests/resilience-tests.md b/_docs/02_document/tests/resilience-tests.md index d36d555..65343ee 100644 --- a/_docs/02_document/tests/resilience-tests.md +++ b/_docs/02_document/tests/resilience-tests.md @@ -1,25 +1,7 @@ # Resilience Test Scenarios -## RT-AUG-01: Augmentation handles corrupted image gracefully -- **Input**: 1 valid image + 1 corrupted image file (truncated JPEG) in data/ dir -- **Action**: Run `augment_annotations()` -- **Expected**: Valid image produces 8 outputs; corrupted image skipped without crashing pipeline; total output: 8 files -- **Traces**: Restriction: Augmentation exception handling per-image - -## RT-AUG-02: Augmentation handles missing label file -- **Input**: 1 image with no matching label file -- **Action**: Run `augment_annotation()` on the image -- **Expected**: Exception caught per-thread; does not crash pipeline -- **Traces**: Restriction: Augmentation exception handling - -## RT-AUG-03: Augmentation transform failure produces fewer variants -- **Input**: 1 image + label that causes some transforms to fail (extremely narrow bbox) -- **Action**: Run `augment_inner()` -- **Expected**: Returns 1-8 ImageLabel objects (original always present; failed variants skipped); no crash -- **Traces**: Restriction: Transform failure handling - -## RT-DSF-01: Dataset formation with empty processed directory -- **Input**: Empty processed images dir +## RT-DSF-01: Dataset formation with empty data directory +- **Input**: Empty data images dir - **Action**: Run `form_dataset()` - **Expected**: Creates empty train/valid/test directories; no crash - **Traces**: Restriction: Edge case handling diff --git a/_docs/02_document/tests/resource-limit-tests.md b/_docs/02_document/tests/resource-limit-tests.md index f167a6e..e4624d0 100644 --- a/_docs/02_document/tests/resource-limit-tests.md +++ b/_docs/02_document/tests/resource-limit-tests.md @@ -1,11 +1,5 @@ # Resource Limit Test Scenarios -## RL-AUG-01: Augmentation output count bounded -- **Input**: 1 image -- **Action**: Run `augment_inner()` -- **Expected**: Returns exactly 8 outputs (never more, even with retries) -- **Traces**: AC: 8× augmentation ratio (1 original + 7 augmented) - ## RL-DSF-01: Dataset split ratios sum to 100% - **Input**: Any number of images - **Action**: Check `train_set + valid_set + test_set` diff --git a/_docs/02_document/tests/test-data.md b/_docs/02_document/tests/test-data.md index 392f07e..b98f79b 100644 --- a/_docs/02_document/tests/test-data.md +++ b/_docs/02_document/tests/test-data.md @@ -4,8 +4,8 @@ | ID | Data Item | Source | Format | Preparation | |----|-----------|--------|--------|-------------| -| FD-01 | Annotated images (100) | `_docs/00_problem/input_data/dataset/images/` | JPEG | Copy subset to tmp_path at test start | -| FD-02 | YOLO labels (100) | `_docs/00_problem/input_data/dataset/labels/` | TXT | Copy subset to tmp_path at test start | +| FD-01 | Annotated images (20) | `tests/data/images/` | JPEG | Copy subset to tmp_path at test start | +| FD-02 | YOLO labels (20) | `tests/data/labels/` | TXT | Copy subset to tmp_path at test start | | FD-03 | ONNX model | `_docs/00_problem/input_data/azaion.onnx` | ONNX | Read bytes at test start | | FD-04 | Class definitions | `classes.json` (project root) | JSON | Copy to tmp_path at test start | | FD-05 | Corrupted labels | Generated at test time | TXT | Create labels with coords > 1.0 | diff --git a/_docs/02_document/tests/traceability-matrix.md b/_docs/02_document/tests/traceability-matrix.md index 63ce3d1..de8a381 100644 --- a/_docs/02_document/tests/traceability-matrix.md +++ b/_docs/02_document/tests/traceability-matrix.md @@ -4,15 +4,6 @@ | AC / Restriction | Test IDs | Coverage | |------------------|----------|----------| -| 8× augmentation ratio | BT-AUG-01, BT-AUG-06, BT-AUG-07, RL-AUG-01 | Full | -| Augmentation naming convention | BT-AUG-02 | Full | -| Bounding boxes clipped to [0,1] | BT-AUG-03, BT-AUG-04 | Full | -| Tiny bboxes (< 0.01) discarded | BT-AUG-05 | Full | -| Augmentation skips already-processed | BT-AUG-08 | Full | -| Augmentation parallelized | PT-AUG-02 | Full | -| Augmentation handles corrupted images | RT-AUG-01 | Full | -| Augmentation handles missing labels | RT-AUG-02 | Full | -| Transform failure graceful | RT-AUG-03 | Full | | Dataset split 70/20/10 | BT-DSF-01, RL-DSF-01 | Full | | Dataset directory structure | BT-DSF-02 | Full | | Dataset integrity (no data loss) | BT-DSF-03, RL-DSF-02 | Full | @@ -34,6 +25,17 @@ | Static model encryption key | ST-ENC-03 | Full | | Random IV per encryption | ST-ENC-01 | Full | +## Removed (augmentation now built into YOLO training) + +The following tests were removed because external augmentation (`augmentation.py`) is no longer part of the training pipeline. YOLO's built-in augmentation replaces it. + +| Removed Test IDs | Reason | +|-------------------|--------| +| BT-AUG-01 to BT-AUG-08 | External augmentation replaced by YOLO built-in | +| PT-AUG-01, PT-AUG-02 | Augmentation performance no longer relevant | +| RT-AUG-01 to RT-AUG-03 | Augmentation resilience no longer relevant | +| RL-AUG-01 | Augmentation resource limits no longer relevant | + ## Uncovered (Require External Services) | AC / Restriction | Reason | @@ -50,18 +52,18 @@ | Metric | Value | |--------|-------| -| Total AC + Restrictions | 36 | -| Covered by tests | 29 | +| Total AC + Restrictions | 27 | +| Covered by tests | 20 | | Uncovered (external deps) | 7 | -| **Coverage** | **80.6%** | +| **Coverage** | **74.1%** | ## Test Count Summary | Category | Count | |----------|-------| -| Blackbox tests | 32 | -| Performance tests | 5 | -| Resilience tests | 6 | +| Blackbox tests | 21 | +| Performance tests | 3 | +| Resilience tests | 3 | | Security tests | 7 | -| Resource limit tests | 5 | -| **Total** | **55** | +| Resource limit tests | 4 | +| **Total** | **38** | diff --git a/_docs/02_tasks/_dependencies_table.md b/_docs/02_tasks/_dependencies_table.md index 210aa4c..e8fea72 100644 --- a/_docs/02_tasks/_dependencies_table.md +++ b/_docs/02_tasks/_dependencies_table.md @@ -38,8 +38,36 @@ AZ-151 (Epic: Blackbox Tests) └── AZ-163 test_annotation_queue ``` -## Implementation Strategy +--- -- **Batch 1**: AZ-152 (test infrastructure) — must be implemented first -- **Batch 2**: AZ-153 to AZ-163 (all test tasks) — can be implemented in parallel after infrastructure is ready -- **Estimated batches**: 2 +## Refactoring Tasks (Epic: AZ-164) + +**Date**: 2026-03-28 +**Total Tasks**: 5 +**Total Complexity Points**: 13 + +| Task | Name | Complexity | Dependencies | Epic | +|------|------|-----------|-------------|------| +| AZ-165 | refactor_unify_config | 3 | None | AZ-164 | +| AZ-166 | refactor_yolo_model | 2 | None | AZ-164 | +| AZ-167 | refactor_builtin_augmentation | 3 | AZ-166 | AZ-164 | +| AZ-168 | refactor_remove_processed_dir | 3 | AZ-167 | AZ-164 | +| AZ-169 | refactor_hard_symlinks | 2 | AZ-168 | AZ-164 | + +### Dependency Graph + +``` +AZ-164 (Epic: Code Improvements Refactoring) +├── AZ-165 refactor_unify_config (independent) +└── AZ-166 refactor_yolo_model + └── AZ-167 refactor_builtin_augmentation + └── AZ-168 refactor_remove_processed_dir + └── AZ-169 refactor_hard_symlinks +``` + +### Implementation Strategy + +- **Batch 1**: AZ-165 (unify config) + AZ-166 (YOLO model) — independent, can be parallel +- **Batch 2**: AZ-167 (built-in aug) + AZ-168 (remove processed dir) — sequential chain +- **Batch 3**: AZ-169 (hard symlinks) — depends on batch 2 +- **Estimated batches**: 3 diff --git a/_docs/02_tasks/todo/AZ-165_refactor_unify_config.md b/_docs/02_tasks/todo/AZ-165_refactor_unify_config.md new file mode 100644 index 0000000..00c36fd --- /dev/null +++ b/_docs/02_tasks/todo/AZ-165_refactor_unify_config.md @@ -0,0 +1,54 @@ +# Unify Configuration + +**Task**: AZ-165_refactor_unify_config +**Name**: Unify configuration — remove annotation-queue/config.yaml +**Description**: Consolidate two config files into one shared Config model +**Complexity**: 3 points +**Dependencies**: None +**Component**: Configuration +**Tracker**: AZ-165 +**Epic**: AZ-164 + +## Problem + +Two separate `config.yaml` files exist (root and `src/annotation-queue/`) with overlapping content but different `dirs` values. The annotation queue handler parses YAML manually instead of using the shared `Config` Pydantic model, creating drift risk. + +## Outcome + +- Single `Config` model in `constants.py` covers all configuration including queue settings +- `annotation_queue_handler.py` uses the shared `Config` instead of parsing its own YAML +- `src/annotation-queue/config.yaml` is deleted + +## Scope + +### Included +- Add Pydantic models for `ApiConfig`, `QueueConfig`; extend `DirsConfig` with all directory fields (data, data_seed, data_processed, data_deleted, images, labels) +- Add these to the `Config` Pydantic model in `constants.py` +- Refactor `annotation_queue_handler.py` constructor to accept/import the shared Pydantic `Config` +- Delete `src/annotation-queue/config.yaml` + +### Excluded +- Changing queue connection logic or message handling +- Modifying root `config.yaml` structure beyond adding queue section (it already has it) + +## Acceptance Criteria + +**AC-1: Single config source** +Given the root `config.yaml` contains queue and dirs settings +When `annotation_queue_handler.py` initializes +Then it reads configuration from the shared `Config` model, not a local YAML file + +**AC-2: No duplicate config file** +Given the refactoring is complete +When listing `src/annotation-queue/` +Then `config.yaml` does not exist + +**AC-3: Annotation queue behavior preserved** +Given the unified configuration +When the annotation queue handler processes messages +Then it uses the correct directory paths from configuration + +## Constraints + +- Root `config.yaml` already has the `queue` section — reuse it +- `annotation_queue_handler.py` runs as a separate process — config import path must work from its working directory diff --git a/_docs/02_tasks/todo/AZ-166_refactor_yolo_model.md b/_docs/02_tasks/todo/AZ-166_refactor_yolo_model.md new file mode 100644 index 0000000..e7cbcf2 --- /dev/null +++ b/_docs/02_tasks/todo/AZ-166_refactor_yolo_model.md @@ -0,0 +1,56 @@ +# Update YOLO Model + +**Task**: AZ-166_refactor_yolo_model +**Name**: Update YOLO model to 26m variant (supports both from-scratch and pretrained) +**Description**: Update model references from YOLO11m to YOLO26m; support both training from scratch (`.yaml`) and from pretrained weights (`.pt`) +**Complexity**: 2 points +**Dependencies**: None +**Component**: Training Pipeline +**Tracker**: AZ-166 +**Epic**: AZ-164 + +## Problem + +Current `TrainingConfig.model` is set to `yolo11m.yaml` which defines a YOLO11 architecture. YOLO26m is the latest model variant. The system should support both training modes: +1. **From scratch** — using `yolo26m.yaml` (architecture definition, trains from random weights) +2. **From pretrained** — using `yolo26m.pt` (pretrained weights, faster convergence) + +## Outcome + +- `TrainingConfig` default model updated to `yolo26m.pt` (pretrained, recommended default) +- `config.yaml` updated to `yolo26m.pt` +- Both `yolo26m.pt` and `yolo26m.yaml` work when set in `config.yaml` +- `train_dataset()` and `resume_training()` work with either model reference + +## Scope + +### Included +- Update `TrainingConfig.model` default from `yolo11m.yaml` to `yolo26m.pt` +- Update `config.yaml` training.model from `yolo11m.yaml` to `yolo26m.pt` +- Verify `train_dataset()` works with both `.pt` and `.yaml` model values + +### Excluded +- Changing training hyperparameters (epochs, batch, imgsz) +- Updating ultralytics library version + +## Acceptance Criteria + +**AC-1: Default model config updated** +Given the training configuration +When reading `TrainingConfig.model` +Then the default value is `yolo26m.pt` + +**AC-2: config.yaml updated** +Given the root `config.yaml` +When reading `training.model` +Then the value is `yolo26m.pt` + +**AC-3: From-scratch training supported** +Given `config.yaml` sets `training.model: yolo26m.yaml` +When `YOLO(constants.config.training.model)` is called +Then a YOLO26m model is built from the architecture definition + +**AC-4: Pretrained training supported** +Given `config.yaml` sets `training.model: yolo26m.pt` +When `YOLO(constants.config.training.model)` is called +Then a YOLO26m model is loaded from pretrained weights diff --git a/_docs/02_tasks/todo/AZ-167_refactor_builtin_augmentation.md b/_docs/02_tasks/todo/AZ-167_refactor_builtin_augmentation.md new file mode 100644 index 0000000..166e653 --- /dev/null +++ b/_docs/02_tasks/todo/AZ-167_refactor_builtin_augmentation.md @@ -0,0 +1,55 @@ +# Replace External Augmentation with YOLO Built-in + +**Task**: AZ-167_refactor_builtin_augmentation +**Name**: Replace external augmentation with YOLO built-in +**Description**: Remove albumentations pipeline and use YOLO model.train() built-in augmentation parameters +**Complexity**: 3 points +**Dependencies**: AZ-166_refactor_yolo_model +**Component**: Training Pipeline +**Tracker**: AZ-167 +**Epic**: AZ-164 + +## Problem + +`augmentation.py` uses the `albumentations` library to augment images into a `processed_dir` before training. This creates a separate processing step, uses extra disk space (8x original), and adds complexity. YOLO's built-in augmentation applies on-the-fly during training. + +## Outcome + +- `train_dataset()` passes augmentation parameters directly to `model.train()` +- Each augmentation parameter is on its own line with a descriptive comment +- The external augmentation step is removed from the training pipeline +- `augmentation.py` is no longer called during training + +## Scope + +### Included +- Add YOLO built-in augmentation parameters to `model.train()` call in `train_dataset()` +- Parameters to add: hsv_h, hsv_s, hsv_v, degrees, translate, scale, shear, fliplr, mosaic (each with comment) +- Remove call to augmentation from training flow + +### Excluded +- Deleting `augmentation.py` file (may still be useful standalone) +- Changing training hyperparameters unrelated to augmentation + +## Acceptance Criteria + +**AC-1: Built-in augmentation parameters with comments** +Given the `train_dataset()` function +When `model.train()` is called +Then every parameter (including augmentation: hsv_h, hsv_s, hsv_v, degrees, scale, shear, fliplr, mosaic, and training: data, epochs, batch, imgsz, etc.) is on its own line with an inline comment explaining what the parameter controls + +**AC-2: No external augmentation in training flow** +Given the training pipeline +When `train_dataset()` runs +Then it does not call `augment_annotations()` or any albumentations-based augmentation + +## Constraints + +- Every parameter row in the `model.train()` call MUST have an inline comment describing what it does (e.g. `hsv_h=0.015, # hue shift fraction of the color wheel`) +- This applies to ALL parameters, not just augmentation — training params (data, epochs, batch, imgsz, save_period, workers) also need comments +- Augmentation parameter values should approximate the current albumentations settings: + - fliplr=0.6 (was HorizontalFlip p=0.6) + - degrees=35.0 (was Affine rotate=(-35,35)) + - shear=10.0 (was Affine shear=(-10,10)) + - hsv_h=0.015, hsv_s=0.7, hsv_v=0.4 (approximate HSV shifts) + - mosaic=1.0 (new YOLO built-in, recommended default) diff --git a/_docs/02_tasks/todo/AZ-168_refactor_remove_processed_dir.md b/_docs/02_tasks/todo/AZ-168_refactor_remove_processed_dir.md new file mode 100644 index 0000000..3045f4d --- /dev/null +++ b/_docs/02_tasks/todo/AZ-168_refactor_remove_processed_dir.md @@ -0,0 +1,60 @@ +# Remove Processed Directory + +**Task**: AZ-168_refactor_remove_processed_dir +**Name**: Remove processed directory — use data dir directly +**Description**: Eliminate processed_dir concept from Config and all consumers; read from data dir directly; update e2e test fixture +**Complexity**: 3 points +**Dependencies**: AZ-167_refactor_builtin_augmentation +**Component**: Training Pipeline, Data Utilities +**Tracker**: AZ-168 +**Epic**: AZ-164 + +## Problem + +`Config` exposes `processed_dir`, `processed_images_dir`, `processed_labels_dir` properties. Multiple files read from the processed directory: `train.py::form_dataset()`, `exports.py::form_data_sample()`, `dataset-visualiser.py::visualise_processed_folder()`. With built-in augmentation, the processed directory is no longer populated. + +The e2e test fixture (`tests/test_training_e2e.py`) currently copies images to both `data_images_dir` and `processed_images_dir` as a workaround — this needs cleanup once `form_dataset()` reads from data dirs. + +## Outcome + +- `Config` no longer has `processed_dir`/`processed_images_dir`/`processed_labels_dir` properties +- `form_dataset()` reads images/labels from `data_images_dir`/`data_labels_dir` +- `form_data_sample()` reads from `data_images_dir` +- `visualise_processed_folder()` reads from `data_images_dir`/`data_labels_dir` +- E2e test fixture copies images only to `data_images_dir`/`data_labels_dir` (no more processed dir population) + +## Scope + +### Included +- Remove `processed_dir`, `processed_images_dir`, `processed_labels_dir` from `Config` +- Update `form_dataset()` in `train.py` to use `data_images_dir` and `data_labels_dir` +- Update `copy_annotations()` in `train.py` to look up labels from `data_labels_dir` instead of `processed_labels_dir` +- Update `form_data_sample()` in `exports.py` to use `data_images_dir` +- Update `visualise_processed_folder()` in `dataset-visualiser.py` +- Update `tests/test_training_e2e.py` e2e fixture: remove processed dir population (only copy to data dirs) + +### Excluded +- Removing `augmentation.py` file +- Changing `corrupted_dir` handling + +## Acceptance Criteria + +**AC-1: No processed dir in Config** +Given the `Config` class +When inspecting its properties +Then `processed_dir`, `processed_images_dir`, `processed_labels_dir` do not exist + +**AC-2: Dataset formation reads data dir** +Given images and labels in `data_images_dir` / `data_labels_dir` +When `form_dataset()` runs +Then it reads from `data_images_dir` and validates labels from `data_labels_dir` + +**AC-3: Data sample reads data dir** +Given images in `data_images_dir` +When `form_data_sample()` runs +Then it reads from `data_images_dir` + +**AC-4: E2e test uses data dirs only** +Given the e2e test fixture +When setting up test data +Then it copies images/labels only to `data_images_dir`/`data_labels_dir` (no processed dir) diff --git a/_docs/02_tasks/todo/AZ-169_refactor_hard_symlinks.md b/_docs/02_tasks/todo/AZ-169_refactor_hard_symlinks.md new file mode 100644 index 0000000..88054f4 --- /dev/null +++ b/_docs/02_tasks/todo/AZ-169_refactor_hard_symlinks.md @@ -0,0 +1,42 @@ +# Use Hard Symlinks for Dataset + +**Task**: AZ-169_refactor_hard_symlinks +**Name**: Use hard symlinks instead of file copies for dataset formation +**Description**: Replace shutil.copy() with os.link() in dataset split creation to save disk space +**Complexity**: 2 points +**Dependencies**: AZ-168_refactor_remove_processed_dir +**Component**: Training Pipeline +**Tracker**: AZ-169 +**Epic**: AZ-164 + +## Problem + +`copy_annotations()` in `train.py` uses `shutil.copy()` to duplicate images and labels into train/valid/test splits. For large datasets this wastes significant disk space. + +## Outcome + +- Dataset formation uses `os.link()` (hard links) instead of `shutil.copy()` +- Fallback to `shutil.copy()` when hard links fail (cross-filesystem) +- No change in training behavior — YOLO reads hard-linked files identically + +## Scope + +### Included +- Replace `shutil.copy()` with `os.link()` in `copy_annotations()` inner `copy_image()` function +- Add try/except fallback to `shutil.copy()` for `OSError` (cross-filesystem) + +### Excluded +- Changing `form_data_sample()` in exports.py (separate utility, lower priority) +- Changing corrupted file handling + +## Acceptance Criteria + +**AC-1: Hard links used** +Given images and labels in the data directory +When `copy_annotations()` creates train/valid/test splits +Then files are hard-linked via `os.link()`, not copied + +**AC-2: Fallback on failure** +Given a cross-filesystem scenario where `os.link()` raises `OSError` +When `copy_annotations()` encounters the error +Then it falls back to `shutil.copy()` transparently diff --git a/_docs/04_refactoring/01-code-improvements/analysis/refactoring_roadmap.md b/_docs/04_refactoring/01-code-improvements/analysis/refactoring_roadmap.md new file mode 100644 index 0000000..27291da --- /dev/null +++ b/_docs/04_refactoring/01-code-improvements/analysis/refactoring_roadmap.md @@ -0,0 +1,33 @@ +# Refactoring Roadmap + +**Run**: 01-code-improvements +**Date**: 2026-03-28 + +## Execution Order + +All 5 changes are grouped into a single phase (straightforward, low-to-medium risk). + +| Priority | Change | Risk | Effort | +|----------|--------|------|--------| +| 1 | C05: Unify configuration | medium | 3 pts | +| 2 | C01: Update YOLO model | medium | 2 pts | +| 3 | C02: Replace external augmentation | medium | 3 pts | +| 4 | C03: Remove processed directory | medium | 3 pts | +| 5 | C04: Hard symlinks | low | 2 pts | + +**Total estimated effort**: 13 points across 5 tasks + +## Dependency Graph + +``` +C05 (config unification) ─── independent +C01 (YOLO update) ← C02 (built-in aug) ← C03 (remove processed dir) ← C04 (hard symlinks) +``` + +C05 can be done in parallel with the C01→C04 chain. + +## Risk Mitigation + +- Existing test suite (83 tests) provides safety net +- Each change committed separately for easy rollback +- C02 is the highest-risk change (training pipeline behavior change) — validate by running a short training sanity check after implementation diff --git a/_docs/04_refactoring/01-code-improvements/analysis/research_findings.md b/_docs/04_refactoring/01-code-improvements/analysis/research_findings.md new file mode 100644 index 0000000..b037765 --- /dev/null +++ b/_docs/04_refactoring/01-code-improvements/analysis/research_findings.md @@ -0,0 +1,50 @@ +# Research Findings + +**Run**: 01-code-improvements +**Date**: 2026-03-28 + +## Current State Analysis + +### Training Pipeline +- Uses `yolo11m.yaml` (architecture-only config, trains from scratch) +- External augmentation via `albumentations` library in `src/augmentation.py` +- Two-step process: augment → form dataset → train +- Dataset formation copies files with `shutil.copy()`, duplicating ~8x storage + +### Configuration +- Two config files: `config.yaml` (root) and `src/annotation-queue/config.yaml` +- Annotation queue handler parses YAML manually instead of using shared `Config` model +- Config drift risk between the two files + +## YOLO 26 Model Update + +Ultralytics YOLO26 is the latest model family. The medium variant `yolo26m.pt` replaces `yolo11m.yaml`: +- Uses pretrained weights (`.pt`) rather than architecture-only (`.yaml`) +- Faster convergence with transfer learning +- Improved accuracy on detection benchmarks + +## Built-in Augmentation Parameters + +YOLO's `model.train()` supports the following augmentation parameters that replace the external `albumentations` pipeline: + +| Parameter | Default | Equivalent to current external aug | +|-----------|---------|-----------------------------------| +| `hsv_h` | 0.015 | HueSaturationValue(hue_shift_limit=10) | +| `hsv_s` | 0.7 | HueSaturationValue(sat_shift_limit=10) | +| `hsv_v` | 0.4 | RandomBrightnessContrast + HSV | +| `degrees` | 0.0 | Affine(rotate=(-35,35)) → set to 35.0 | +| `translate` | 0.1 | Default is sufficient | +| `scale` | 0.5 | Affine(scale=(0.8,1.2)) → default covers this | +| `shear` | 0.0 | Affine(shear=(-10,10)) → set to 10.0 | +| `fliplr` | 0.5 | HorizontalFlip(p=0.6) → set to 0.6 | +| `flipud` | 0.0 | Not used currently | +| `mosaic` | 1.0 | New — YOLO built-in | +| `mixup` | 0.0 | New — optional | + +## Hard Symlinks + +`os.link()` creates hard links sharing the same inode. Benefits: +- Zero additional disk usage for dataset splits +- Same read performance as regular files +- Works on same filesystem (which is the case here — all under `/azaion/`) +- Fallback to `shutil.copy()` for cross-filesystem edge cases diff --git a/_docs/04_refactoring/01-code-improvements/baseline_metrics.md b/_docs/04_refactoring/01-code-improvements/baseline_metrics.md new file mode 100644 index 0000000..314d7e2 --- /dev/null +++ b/_docs/04_refactoring/01-code-improvements/baseline_metrics.md @@ -0,0 +1,66 @@ +# Baseline Metrics + +**Run**: 01-code-improvements +**Date**: 2026-03-28 +**Mode**: Guided +**Source**: `_docs/02_document/refactoring_notes.md` + +## Goals + +Apply 5 improvements identified during documentation: +1. Update YOLO to v26m version +2. Replace external augmentation with YOLO built-in augmentation +3. Remove processed folder — use data dir directly +4. Use hard symlinks instead of file copies for dataset formation +5. Unify constants directories — remove `src/annotation-queue/config.yaml` + +## Code Metrics + +| Metric | Value | +|--------|-------| +| Source files (src/) | 24 Python files | +| Source LOC | 2,945 | +| Test files | 21 Python files | +| Test LOC | 1,646 | +| Total tests | 83 (77 blackbox/unit + 6 performance) | +| Test execution time | ~130s (120s unit + 10s perf) | +| Python version | 3.10.8 | +| Ultralytics version | 8.4.30 | +| Pip packages | ~76 | + +## Files Affected by Refactoring + +| File | LOC | Refactoring Items | +|------|-----|-------------------| +| `src/constants.py` | 118 | #3 (remove processed_dir), #5 (unify config) | +| `src/train.py` | 178 | #1 (YOLO version), #2 (built-in aug), #3 (data dir), #4 (symlinks) | +| `src/augmentation.py` | 152 | #2 (replace with YOLO built-in), #3 (processed dir) | +| `src/exports.py` | 118 | #3 (processed dir references) | +| `src/convert-annotations.py` | 119 | #3 (processed dir references) | +| `src/dataset-visualiser.py` | 52 | #3 (processed dir references) | +| `src/annotation-queue/annotation_queue_handler.py` | 173 | #5 (remove separate config.yaml) | +| `src/annotation-queue/config.yaml` | 21 | #5 (delete — duplicated config) | +| `config.yaml` | 30 | #5 (single source of truth) | + +## Test Suite Baseline + +``` +77 passed, 0 failed, 0 skipped (blackbox/unit) +6 passed, 0 failed, 0 skipped (performance) +Total: 83 passed in ~130s +``` + +## Functionality Inventory + +| Feature | Status | Affected by Refactoring | +|---------|--------|------------------------| +| Augmentation pipeline | Working | Yes (#2, #3) | +| Dataset formation | Working | Yes (#3, #4) | +| Training | Working | Yes (#1, #2) | +| Model export (ONNX) | Working | No | +| Inference (ONNX/TensorRT) | Working | No | +| Annotation queue | Working | Yes (#5) | +| API client | Working | No | +| CDN manager | Working | No | +| Security/encryption | Working | No | +| Label validation | Working | No | diff --git a/_docs/04_refactoring/01-code-improvements/discovery/components/01_training_pipeline.md b/_docs/04_refactoring/01-code-improvements/discovery/components/01_training_pipeline.md new file mode 100644 index 0000000..8acc8b6 --- /dev/null +++ b/_docs/04_refactoring/01-code-improvements/discovery/components/01_training_pipeline.md @@ -0,0 +1,26 @@ +# Training Pipeline + +## Files +- `src/train.py` (178 LOC) +- `src/augmentation.py` (152 LOC) +- `src/constants.py` (118 LOC) + +## Current Flow + +```mermaid +graph TD + A[augmentation.py] -->|reads from| B[data_dir] + A -->|writes to| C[processed_dir] + D[train.py::form_dataset] -->|reads from| C + D -->|shutil.copy to| E[datasets_dir/today/train,valid,test] + F[train.py::train_dataset] -->|YOLO.train| E +``` + +## Issues +- External augmentation (albumentations) runs as separate step, writing to `processed_dir` +- `form_dataset()` copies files from `processed_dir` to dataset splits using `shutil.copy` +- YOLO has built-in augmentation that runs during training (mosaic, mixup, flips, etc.) +- Using built-in aug eliminates need for `processed_dir` and the full `augmentation.py` pipeline +- `copy_annotations()` uses `shutil.copy` — wasteful for large datasets +- Global mutable `total_files_copied` variable in `copy_annotations` +- Model config `yolo11m.yaml` trains from scratch; likely should use pretrained weights or updated variant diff --git a/_docs/04_refactoring/01-code-improvements/discovery/components/02_config_system.md b/_docs/04_refactoring/01-code-improvements/discovery/components/02_config_system.md new file mode 100644 index 0000000..2e8dfd9 --- /dev/null +++ b/_docs/04_refactoring/01-code-improvements/discovery/components/02_config_system.md @@ -0,0 +1,18 @@ +# Configuration System + +## Files +- `src/constants.py` (118 LOC) +- `config.yaml` (root, 30 lines) +- `src/annotation-queue/config.yaml` (21 lines) +- `src/annotation-queue/annotation_queue_handler.py` (173 LOC) + +## Current State +- `constants.py` defines `Config` (Pydantic model) loaded from root `config.yaml` +- `annotation_queue_handler.py` reads its own `config.yaml` with raw `yaml.safe_load` +- Both config files share `api`, `queue`, `dirs` sections but with different `dirs` values +- Annotation queue config has `data: 'data-test'` vs root `data: 'data'` + +## Issues +- Two config files with overlapping content — drift risk +- `annotation_queue_handler.py` parses config manually instead of using `Config` model +- `constants.py` still has `processed_dir` properties that become obsolete after removing external augmentation diff --git a/_docs/04_refactoring/01-code-improvements/discovery/components/03_data_utilities.md b/_docs/04_refactoring/01-code-improvements/discovery/components/03_data_utilities.md new file mode 100644 index 0000000..3e0138a --- /dev/null +++ b/_docs/04_refactoring/01-code-improvements/discovery/components/03_data_utilities.md @@ -0,0 +1,10 @@ +# Data Utilities + +## Files +- `src/exports.py` — `form_data_sample()` reads from `processed_images_dir` +- `src/dataset-visualiser.py` — `visualise_processed_folder()` reads from `processed_images_dir`/`processed_labels_dir` + +## Impact +- Both files reference `processed_dir` via `constants.config` +- After removing `processed_dir`, these must switch to `data_images_dir`/`data_labels_dir` +- `form_data_sample()` also uses `shutil.copy` — candidate for hard links diff --git a/_docs/04_refactoring/01-code-improvements/list-of-changes.md b/_docs/04_refactoring/01-code-improvements/list-of-changes.md new file mode 100644 index 0000000..70aa976 --- /dev/null +++ b/_docs/04_refactoring/01-code-improvements/list-of-changes.md @@ -0,0 +1,52 @@ +# List of Changes + +**Run**: 01-code-improvements +**Mode**: guided +**Source**: `_docs/02_document/refactoring_notes.md` +**Date**: 2026-03-28 + +## Summary + +Apply 5 improvements from documentation review: update YOLO model, switch to built-in augmentation, remove processed directory, use hard symlinks for dataset formation, and unify configuration files. + +## Changes + +### C01: Update YOLO model to 26m variant +- **File(s)**: `src/constants.py`, `src/train.py` +- **Problem**: Current model config uses `yolo11m.yaml` which trains from a YAML architecture definition +- **Change**: Update `TrainingConfig.model` to the YOLO 26m variant; ensure `train_dataset()` uses the updated model reference +- **Rationale**: Use updated model version as requested; pretrained weights improve convergence +- **Risk**: medium +- **Dependencies**: None + +### C02: Replace external augmentation with YOLO built-in +- **File(s)**: `src/train.py`, `src/augmentation.py` +- **Problem**: `augmentation.py` uses albumentations to augment images into a separate `processed_dir` before training — adds complexity, disk usage, and a separate processing step +- **Change**: Remove the `augment_annotations()` call from the training pipeline; add YOLO built-in augmentation parameters (hsv_h, hsv_s, hsv_v, degrees, translate, scale, shear, flipud, fliplr, mosaic, mixup) to the `model.train()` call in `train_dataset()`, each on its own line with a descriptive comment; `augmentation.py` remains in codebase but is no longer called during training +- **Rationale**: YOLO's built-in augmentation applies on-the-fly during training, eliminating the pre-processing step and processed directory +- **Risk**: medium +- **Dependencies**: C01 + +### C03: Remove processed directory — use data dir directly +- **File(s)**: `src/constants.py`, `src/train.py`, `src/exports.py`, `src/dataset-visualiser.py` +- **Problem**: `processed_dir`, `processed_images_dir`, `processed_labels_dir` properties in `Config` are no longer needed when built-in augmentation is used; `form_dataset()` reads from processed dir; `form_data_sample()` reads from processed dir; `visualise_processed_folder()` reads from processed dir +- **Change**: Remove `processed_dir`/`processed_images_dir`/`processed_labels_dir` properties from `Config`; update `form_dataset()` to read from `data_images_dir`/`data_labels_dir`; update `form_data_sample()` similarly; update `visualise_processed_folder()` similarly +- **Rationale**: Processed directory is unnecessary without external augmentation step +- **Risk**: medium +- **Dependencies**: C02 + +### C04: Use hard symlinks instead of file copies for dataset +- **File(s)**: `src/train.py` +- **Problem**: `copy_annotations()` uses `shutil.copy()` to duplicate images and labels into train/valid/test splits — wastes disk space on large datasets +- **Change**: Replace `shutil.copy()` with `os.link()` to create hard links; add fallback to `shutil.copy()` for cross-filesystem scenarios +- **Rationale**: Hard links share the same inode, saving disk space while maintaining independent directory entries +- **Risk**: low +- **Dependencies**: C03 + +### C05: Unify configuration — remove annotation-queue/config.yaml +- **File(s)**: `src/constants.py`, `src/annotation-queue/annotation_queue_handler.py`, `src/annotation-queue/config.yaml` +- **Problem**: `src/annotation-queue/config.yaml` duplicates root `config.yaml` with different `dirs` values; `annotation_queue_handler.py` parses config manually via `yaml.safe_load` instead of using the shared `Config` model +- **Change**: Extend `Config` in `constants.py` to include queue and annotation-queue directory settings; refactor `annotation_queue_handler.py` to accept a `Config` instance (or import from constants); delete `src/annotation-queue/config.yaml` +- **Rationale**: Single source of truth for configuration eliminates drift risk and inconsistency +- **Risk**: medium +- **Dependencies**: None diff --git a/_docs/_autopilot_state.md b/_docs/_autopilot_state.md index 5bd50d2..87b3c26 100644 --- a/_docs/_autopilot_state.md +++ b/_docs/_autopilot_state.md @@ -2,8 +2,8 @@ ## Current Step flow: existing-code -step: 6 -name: Run Tests +step: 7 +name: Refactor status: in_progress -sub_step: 0 -retry_count: 1 +sub_step: 4 — Execution (Batch 1 done: AZ-165, AZ-166, AZ-167; next: Batch 2 AZ-168) +retry_count: 0 diff --git a/config.yaml b/config.yaml index 9b6ba6f..b7fff65 100644 --- a/config.yaml +++ b/config.yaml @@ -20,7 +20,7 @@ dirs: labels: 'labels' training: - model: 'yolo11m.yaml' + model: 'yolo26m.pt' epochs: 120 batch: 11 imgsz: 1280 diff --git a/src/annotation-queue/annotation_queue_handler.py b/src/annotation-queue/annotation_queue_handler.py index 248dd39..543ffc4 100644 --- a/src/annotation-queue/annotation_queue_handler.py +++ b/src/annotation-queue/annotation_queue_handler.py @@ -1,12 +1,13 @@ import os import shutil import sys +from os import path, makedirs +import constants import yaml import asyncio from rstream import Consumer, AMQPMessage, ConsumerOffsetSpecification, OffsetType, MessageContext from rstream.amqp import amqp_decoder -from os import path, makedirs from datetime import datetime, timedelta import logging from logging.handlers import TimedRotatingFileHandler @@ -30,7 +31,6 @@ logger.addHandler(logging.StreamHandler(sys.stdout)) class AnnotationQueueHandler: - CONFIG_FILE = 'config.yaml' OFFSET_FILE = 'offset.yaml' QUEUE_ANNOTATION_STATUS_RECORD = b'AnnotationStatus' QUEUE_EMAIL_RECORD = b'Email' @@ -46,18 +46,14 @@ class AnnotationQueueHandler: self.lbl_seed = path.join(h.data_seed_dir, h.labels_dir, f"{name}{h.TXT_EXT}") def __init__(self): - with open(self.CONFIG_FILE, "r") as f: - config_dict = yaml.safe_load(f) + cfg = constants.config + self.data_dir = path.join(cfg.dirs.root, cfg.dirs.data) + self.data_seed_dir = path.join(cfg.dirs.root, cfg.dirs.data_seed) + self.images_dir = cfg.dirs.images + self.labels_dir = cfg.dirs.labels - root = config_dict['dirs']['root'] - - self.data_dir = path.join(root, config_dict['dirs']['data']) - self.data_seed_dir = path.join(root, config_dict['dirs']['data_seed']) - self.images_dir = config_dict['dirs']['images'] - self.labels_dir = config_dict['dirs']['labels'] - - self.del_img_dir = path.join(root, config_dict['dirs']['data_deleted'], self.images_dir) - self.del_lbl_dir = path.join(root, config_dict['dirs']['data_deleted'], self.labels_dir) + self.del_img_dir = path.join(cfg.dirs.root, cfg.dirs.data_deleted, self.images_dir) + self.del_lbl_dir = path.join(cfg.dirs.root, cfg.dirs.data_deleted, self.labels_dir) makedirs(path.join(self.data_dir, self.images_dir), exist_ok=True) makedirs(path.join(self.data_dir, self.labels_dir), exist_ok=True) @@ -68,12 +64,12 @@ class AnnotationQueueHandler: makedirs(self.del_lbl_dir, exist_ok=True) self.consumer = Consumer( - host=config_dict['queue']['host'], - port=config_dict['queue']['port'], - username=config_dict['queue']['consumer_user'], - password=config_dict['queue']['consumer_pw'] + host=cfg.queue.host, + port=cfg.queue.port, + username=cfg.queue.consumer_user, + password=cfg.queue.consumer_pw ) - self.queue_name = config_dict['queue']['name'] + self.queue_name = cfg.queue.name try: with open(self.OFFSET_FILE, 'r') as f: diff --git a/src/annotation-queue/config.yaml b/src/annotation-queue/config.yaml deleted file mode 100644 index febb818..0000000 --- a/src/annotation-queue/config.yaml +++ /dev/null @@ -1,20 +0,0 @@ -api: - url: 'https://api.azaion.com' - email: 'uploader@azaion.com' - password: 'Az@1on_10Upl0@der' - -queue: - host: '188.245.120.247' - port: 5552 - consumer_user: 'azaion_receiver' - consumer_pw: 'Az1onRecce777ve2r' - name: 'azaion-annotations' - -dirs: - root: '/azaion' - data: 'data-test' - data_seed: 'data-test-seed' - data_processed: 'data-test-processed' - data_deleted: 'data-test_deleted' - images: 'images' - labels: 'labels' diff --git a/src/constants.py b/src/constants.py index f83c298..e883be6 100644 --- a/src/constants.py +++ b/src/constants.py @@ -3,13 +3,35 @@ from os import path import yaml from pydantic import BaseModel +_PROJECT_ROOT = path.dirname(path.dirname(path.abspath(__file__))) + + +class ApiConfig(BaseModel): + url: str = '' + email: str = '' + password: str = '' + + +class QueueConfig(BaseModel): + host: str = 'localhost' + port: int = 5552 + consumer_user: str = '' + consumer_pw: str = '' + name: str = '' + class DirsConfig(BaseModel): root: str = '/azaion' + data: str = 'data' + data_seed: str = 'data-seed' + data_processed: str = 'data-processed' + data_deleted: str = 'data_deleted' + images: str = 'images' + labels: str = 'labels' class TrainingConfig(BaseModel): - model: str = 'yolo11m.yaml' + model: str = 'yolo26m.pt' epochs: int = 120 batch: int = 11 imgsz: int = 1280 @@ -24,36 +46,38 @@ class ExportConfig(BaseModel): class Config(BaseModel): dirs: DirsConfig = DirsConfig() + api: ApiConfig = ApiConfig() + queue: QueueConfig = QueueConfig() training: TrainingConfig = TrainingConfig() export: ExportConfig = ExportConfig() @property - def azaion(self) -> str: + def root(self) -> str: return self.dirs.root @property def data_dir(self) -> str: - return path.join(self.dirs.root, 'data') + return path.join(self.dirs.root, self.dirs.data) @property def data_images_dir(self) -> str: - return path.join(self.data_dir, 'images') + return path.join(self.data_dir, self.dirs.images) @property def data_labels_dir(self) -> str: - return path.join(self.data_dir, 'labels') + return path.join(self.data_dir, self.dirs.labels) @property def processed_dir(self) -> str: - return path.join(self.dirs.root, 'data-processed') + return path.join(self.dirs.root, self.dirs.data_processed) @property def processed_images_dir(self) -> str: - return path.join(self.processed_dir, 'images') + return path.join(self.processed_dir, self.dirs.images) @property def processed_labels_dir(self) -> str: - return path.join(self.processed_dir, 'labels') + return path.join(self.processed_dir, self.dirs.labels) @property def corrupted_dir(self) -> str: @@ -61,11 +85,11 @@ class Config(BaseModel): @property def corrupted_images_dir(self) -> str: - return path.join(self.corrupted_dir, 'images') + return path.join(self.corrupted_dir, self.dirs.images) @property def corrupted_labels_dir(self) -> str: - return path.join(self.corrupted_dir, 'labels') + return path.join(self.corrupted_dir, self.dirs.labels) @property def sample_dir(self) -> str: @@ -115,4 +139,4 @@ SMALL_SIZE_KB = 3 CDN_CONFIG = 'cdn.yaml' MODELS_FOLDER = 'models' -config: Config = Config.from_yaml(CONFIG_FILE) +config: Config = Config.from_yaml(path.join(_PROJECT_ROOT, CONFIG_FILE)) diff --git a/src/train.py b/src/train.py index 486e86a..8c0dc4a 100644 --- a/src/train.py +++ b/src/train.py @@ -132,13 +132,23 @@ def create_yaml(): def resume_training(last_pt_path): model = YOLO(last_pt_path) - results = model.train(data=yaml, - resume=True, - epochs=constants.config.training.epochs, - batch=constants.config.training.batch, - imgsz=constants.config.training.imgsz, - save_period=constants.config.training.save_period, - workers=constants.config.training.workers) + results = model.train(data=yaml, # path to dataset YAML + resume=True, # continue from last checkpoint + epochs=constants.config.training.epochs, # total training epochs + batch=constants.config.training.batch, # batch size per GPU + imgsz=constants.config.training.imgsz, # input image size in pixels + save_period=constants.config.training.save_period, # save checkpoint every N epochs + workers=constants.config.training.workers, # dataloader worker threads + hsv_h=0.015, # hue shift fraction of the color wheel + hsv_s=0.7, # saturation shift fraction + hsv_v=0.4, # value (brightness) shift fraction + degrees=35.0, # max rotation degrees (+/- range) + translate=0.1, # max translation as fraction of image size + scale=0.5, # max scale gain (1 +/- scale) + shear=10.0, # max shear degrees (+/- range) + fliplr=0.6, # probability of horizontal flip + mosaic=1.0, # mosaic augmentation probability + mixup=0.0) # mixup augmentation probability (disabled) def train_dataset(): @@ -147,12 +157,22 @@ def train_dataset(): model = YOLO(constants.config.training.model) today_dataset = path.join(constants.config.datasets_dir, today_folder) - results = model.train(data=abspath(path.join(today_dataset, 'data.yaml')), - epochs=constants.config.training.epochs, - batch=constants.config.training.batch, - imgsz=constants.config.training.imgsz, - save_period=constants.config.training.save_period, - workers=constants.config.training.workers) + results = model.train(data=abspath(path.join(today_dataset, 'data.yaml')), # path to dataset YAML + epochs=constants.config.training.epochs, # total training epochs + batch=constants.config.training.batch, # batch size per GPU + imgsz=constants.config.training.imgsz, # input image size in pixels + save_period=constants.config.training.save_period, # save checkpoint every N epochs + workers=constants.config.training.workers, # dataloader worker threads + hsv_h=0.015, # hue shift fraction of the color wheel + hsv_s=0.7, # saturation shift fraction + hsv_v=0.4, # value (brightness) shift fraction + degrees=35.0, # max rotation degrees (+/- range) + translate=0.1, # max translation as fraction of image size + scale=0.5, # max scale gain (1 +/- scale) + shear=10.0, # max shear degrees (+/- range) + fliplr=0.6, # probability of horizontal flip + mosaic=1.0, # mosaic augmentation probability + mixup=0.0) # mixup augmentation probability (disabled) model_dir = path.join(constants.config.models_dir, today_folder) diff --git a/tests/conftest.py b/tests/conftest.py index 0738ee1..45452b5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,8 +4,10 @@ from pathlib import Path import pytest _PROJECT_ROOT = Path(__file__).resolve().parent.parent -_DATASET_IMAGES = _PROJECT_ROOT / "_docs/00_problem/input_data/dataset/images" -_DATASET_LABELS = _PROJECT_ROOT / "_docs/00_problem/input_data/dataset/labels" +_TESTS_DIR = Path(__file__).resolve().parent +_TEST_ROOT = _TESTS_DIR / "root" +_DATASET_IMAGES = _TEST_ROOT / "data" / "images" +_DATASET_LABELS = _TEST_ROOT / "data" / "labels" _ONNX_MODEL = _PROJECT_ROOT / "_docs/00_problem/input_data/azaion.onnx" _CLASSES_JSON = _PROJECT_ROOT / "src" / "classes.json" _CONFIG_TEST = _PROJECT_ROOT / "config.test.yaml" diff --git a/tests/performance/test_augmentation_perf.py b/tests/performance/test_augmentation_perf.py deleted file mode 100644 index a7c62cb..0000000 --- a/tests/performance/test_augmentation_perf.py +++ /dev/null @@ -1,115 +0,0 @@ -import concurrent.futures -import random -import shutil -import time -from pathlib import Path - -import numpy as np -import pytest - -from tests.conftest import apply_constants_patch - - -def _patch_augmentation_paths(monkeypatch, base: Path): - apply_constants_patch(monkeypatch, base) - - -def _augment_annotation_with_total(monkeypatch): - import augmentation as aug - - orig = aug.Augmentator.augment_annotation - - def wrapped(self, image_file): - self.total_to_process = self.total_images_to_process - return orig(self, image_file) - - monkeypatch.setattr(aug.Augmentator, "augment_annotation", wrapped) - - -def _seed(): - random.seed(42) - np.random.seed(42) - - -@pytest.mark.performance -def test_pt_aug_01_throughput_ten_images_sixty_seconds( - tmp_path, monkeypatch, sample_images_labels -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _augment_annotation_with_total(monkeypatch) - _seed() - import constants as c - from augmentation import Augmentator - - img_dir = Path(c.config.data_images_dir) - lbl_dir = Path(c.config.data_labels_dir) - img_dir.mkdir(parents=True, exist_ok=True) - lbl_dir.mkdir(parents=True, exist_ok=True) - src_img, src_lbl = sample_images_labels(10) - for p in src_img.glob("*.jpg"): - shutil.copy2(p, img_dir / p.name) - for p in src_lbl.glob("*.txt"): - shutil.copy2(p, lbl_dir / p.name) - # Act - t0 = time.perf_counter() - Augmentator().augment_annotations() - elapsed = time.perf_counter() - t0 - # Assert - assert elapsed <= 60.0 - - -@pytest.mark.performance -def test_pt_aug_02_parallel_at_least_one_point_five_x_faster( - tmp_path, monkeypatch, sample_images_labels -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _augment_annotation_with_total(monkeypatch) - _seed() - import constants as c - from augmentation import Augmentator - - img_dir = Path(c.config.data_images_dir) - lbl_dir = Path(c.config.data_labels_dir) - proc_dir = Path(c.config.processed_dir) - img_dir.mkdir(parents=True, exist_ok=True) - lbl_dir.mkdir(parents=True, exist_ok=True) - src_img, src_lbl = sample_images_labels(10) - for p in src_img.glob("*.jpg"): - shutil.copy2(p, img_dir / p.name) - for p in src_lbl.glob("*.txt"): - shutil.copy2(p, lbl_dir / p.name) - Path(c.config.processed_images_dir).mkdir(parents=True, exist_ok=True) - Path(c.config.processed_labels_dir).mkdir(parents=True, exist_ok=True) - names = sorted(p.name for p in img_dir.glob("*.jpg")) - - class _E: - __slots__ = ("name",) - - def __init__(self, name): - self.name = name - - entries = [_E(n) for n in names] - - # Act - aug_seq = Augmentator() - aug_seq.total_images_to_process = len(entries) - t0 = time.perf_counter() - for e in entries: - aug_seq.augment_annotation(e) - seq_elapsed = time.perf_counter() - t0 - - shutil.rmtree(proc_dir) - Path(c.config.processed_images_dir).mkdir(parents=True, exist_ok=True) - Path(c.config.processed_labels_dir).mkdir(parents=True, exist_ok=True) - - aug_par = Augmentator() - aug_par.total_images_to_process = len(entries) - t0 = time.perf_counter() - with concurrent.futures.ThreadPoolExecutor() as ex: - list(ex.map(aug_par.augment_annotation, entries)) - par_elapsed = time.perf_counter() - t0 - - # Assert - assert seq_elapsed >= par_elapsed * 1.5 diff --git a/tests/root/data-processed/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.jpg b/tests/root/data-processed/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.jpg new file mode 100644 index 0000000..87bf488 Binary files /dev/null and b/tests/root/data-processed/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.jpg differ diff --git a/tests/root/data-processed/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.jpg b/tests/root/data-processed/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.jpg new file mode 100644 index 0000000..732e64d Binary files /dev/null and b/tests/root/data-processed/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000106.jpg b/tests/root/data-processed/images/#Азов_з_неба_000106.jpg new file mode 100644 index 0000000..f316b2b Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000106.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000113.jpg b/tests/root/data-processed/images/#Азов_з_неба_000113.jpg new file mode 100644 index 0000000..3871e98 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000113.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000148.jpg b/tests/root/data-processed/images/#Азов_з_неба_000148.jpg new file mode 100644 index 0000000..8c54eba Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000148.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000162.jpg b/tests/root/data-processed/images/#Азов_з_неба_000162.jpg new file mode 100644 index 0000000..480b9ac Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000162.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000173.jpg b/tests/root/data-processed/images/#Азов_з_неба_000173.jpg new file mode 100644 index 0000000..ee949db Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000173.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000189.jpg b/tests/root/data-processed/images/#Азов_з_неба_000189.jpg new file mode 100644 index 0000000..d9e1058 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000189.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000210.jpg b/tests/root/data-processed/images/#Азов_з_неба_000210.jpg new file mode 100644 index 0000000..85de419 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000210.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000245.jpg b/tests/root/data-processed/images/#Азов_з_неба_000245.jpg new file mode 100644 index 0000000..664dfc3 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000245.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000269.jpg b/tests/root/data-processed/images/#Азов_з_неба_000269.jpg new file mode 100644 index 0000000..b765680 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000269.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000307.jpg b/tests/root/data-processed/images/#Азов_з_неба_000307.jpg new file mode 100644 index 0000000..1ad21e2 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000307.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000314.jpg b/tests/root/data-processed/images/#Азов_з_неба_000314.jpg new file mode 100644 index 0000000..89ecc54 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000314.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000320.jpg b/tests/root/data-processed/images/#Азов_з_неба_000320.jpg new file mode 100644 index 0000000..dd66c48 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000320.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000338.jpg b/tests/root/data-processed/images/#Азов_з_неба_000338.jpg new file mode 100644 index 0000000..14744f5 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000338.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000346.jpg b/tests/root/data-processed/images/#Азов_з_неба_000346.jpg new file mode 100644 index 0000000..52986da Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000346.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000347.jpg b/tests/root/data-processed/images/#Азов_з_неба_000347.jpg new file mode 100644 index 0000000..dcedfe7 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000347.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000373.jpg b/tests/root/data-processed/images/#Азов_з_неба_000373.jpg new file mode 100644 index 0000000..49ac20a Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000373.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000380.jpg b/tests/root/data-processed/images/#Азов_з_неба_000380.jpg new file mode 100644 index 0000000..27dd8d8 Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000380.jpg differ diff --git a/tests/root/data-processed/images/#Азов_з_неба_000403.jpg b/tests/root/data-processed/images/#Азов_з_неба_000403.jpg new file mode 100644 index 0000000..4774f3c Binary files /dev/null and b/tests/root/data-processed/images/#Азов_з_неба_000403.jpg differ diff --git a/tests/root/data-processed/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.txt b/tests/root/data-processed/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.txt new file mode 100644 index 0000000..f0231e6 --- /dev/null +++ b/tests/root/data-processed/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.txt @@ -0,0 +1 @@ +0 0.63516 0.76054 0.05480 0.09305 \ No newline at end of file diff --git a/tests/root/data-processed/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.txt b/tests/root/data-processed/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.txt new file mode 100644 index 0000000..e03dfe2 --- /dev/null +++ b/tests/root/data-processed/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.txt @@ -0,0 +1 @@ +0 0.61853 0.77915 0.05973 0.09305 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000106.txt b/tests/root/data-processed/labels/#Азов_з_неба_000106.txt new file mode 100644 index 0000000..304e549 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000106.txt @@ -0,0 +1 @@ +0 0.47200 0.78007 0.26215 0.42338 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000113.txt b/tests/root/data-processed/labels/#Азов_з_неба_000113.txt new file mode 100644 index 0000000..104c11e --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000113.txt @@ -0,0 +1,2 @@ +0 0.76062 0.31074 0.08738 0.13238 +0 0.79600 0.20352 0.04985 0.08424 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000148.txt b/tests/root/data-processed/labels/#Азов_з_неба_000148.txt new file mode 100644 index 0000000..67e97f5 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000148.txt @@ -0,0 +1,6 @@ +6 0.52646 0.69638 0.04738 0.18161 +6 0.56554 0.69856 0.03077 0.16848 +6 0.59908 0.69311 0.03754 0.17263 +6 0.70000 0.74293 0.04738 0.17176 +6 0.77046 0.69638 0.03077 0.14441 +6 0.73538 0.70190 0.03077 0.14660 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000162.txt b/tests/root/data-processed/labels/#Азов_з_неба_000162.txt new file mode 100644 index 0000000..2216408 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000162.txt @@ -0,0 +1,4 @@ +6 0.63569 0.46827 0.04185 0.13675 +6 0.68492 0.38403 0.04800 0.13019 +6 0.61569 0.36161 0.03877 0.10065 +6 0.56708 0.44147 0.04985 0.12253 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000173.txt b/tests/root/data-processed/labels/#Азов_з_неба_000173.txt new file mode 100644 index 0000000..08ac396 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000173.txt @@ -0,0 +1 @@ +3 0.74738 0.58588 0.33415 0.49450 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000189.txt b/tests/root/data-processed/labels/#Азов_з_неба_000189.txt new file mode 100644 index 0000000..546bd29 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000189.txt @@ -0,0 +1 @@ +0 0.91200 0.50492 0.09846 0.08971 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000210.txt b/tests/root/data-processed/labels/#Азов_з_неба_000210.txt new file mode 100644 index 0000000..5a5eefa --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000210.txt @@ -0,0 +1 @@ +0 0.87846 0.50930 0.09785 0.07658 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000245.txt b/tests/root/data-processed/labels/#Азов_з_неба_000245.txt new file mode 100644 index 0000000..7b9d776 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000245.txt @@ -0,0 +1 @@ +2 0.53169 0.44475 0.06523 0.12691 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000269.txt b/tests/root/data-processed/labels/#Азов_з_неба_000269.txt new file mode 100644 index 0000000..a6dd311 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000269.txt @@ -0,0 +1 @@ +2 0.51323 0.45679 0.08369 0.08752 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000307.txt b/tests/root/data-processed/labels/#Азов_з_неба_000307.txt new file mode 100644 index 0000000..f6614c0 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000307.txt @@ -0,0 +1 @@ +0 0.90154 0.37309 0.08246 0.10831 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000314.txt b/tests/root/data-processed/labels/#Азов_з_неба_000314.txt new file mode 100644 index 0000000..dfe2158 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000314.txt @@ -0,0 +1,4 @@ +6 0.35723 0.48851 0.03015 0.12253 +6 0.21415 0.44147 0.02954 0.11597 +6 0.24277 0.44147 0.02769 0.11597 +6 0.28892 0.44530 0.02769 0.08205 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000320.txt b/tests/root/data-processed/labels/#Азов_з_неба_000320.txt new file mode 100644 index 0000000..b9f65f6 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000320.txt @@ -0,0 +1 @@ +3 0.24338 0.33097 0.16308 0.21443 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000338.txt b/tests/root/data-processed/labels/#Азов_з_неба_000338.txt new file mode 100644 index 0000000..f15f033 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000338.txt @@ -0,0 +1 @@ +3 0.53200 0.18055 0.07815 0.08862 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000346.txt b/tests/root/data-processed/labels/#Азов_з_неба_000346.txt new file mode 100644 index 0000000..cfb31fd --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000346.txt @@ -0,0 +1 @@ +1 0.63538 0.20571 0.05354 0.11050 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000347.txt b/tests/root/data-processed/labels/#Азов_з_неба_000347.txt new file mode 100644 index 0000000..bccb5e5 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000347.txt @@ -0,0 +1 @@ +3 0.70092 0.15046 0.04431 0.06564 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000373.txt b/tests/root/data-processed/labels/#Азов_з_неба_000373.txt new file mode 100644 index 0000000..5415a80 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000373.txt @@ -0,0 +1 @@ +1 0.28431 0.81398 0.12677 0.33805 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000380.txt b/tests/root/data-processed/labels/#Азов_з_неба_000380.txt new file mode 100644 index 0000000..194c6ac --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000380.txt @@ -0,0 +1,2 @@ +0 0.33108 0.75600 0.23262 0.35556 +6 0.10862 0.51641 0.03385 0.09080 \ No newline at end of file diff --git a/tests/root/data-processed/labels/#Азов_з_неба_000403.txt b/tests/root/data-processed/labels/#Азов_з_неба_000403.txt new file mode 100644 index 0000000..035b823 --- /dev/null +++ b/tests/root/data-processed/labels/#Азов_з_неба_000403.txt @@ -0,0 +1 @@ +0 0.41138 0.64113 0.29108 0.21552 \ No newline at end of file diff --git a/tests/root/data/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.jpg b/tests/root/data/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.jpg new file mode 100644 index 0000000..87bf488 Binary files /dev/null and b/tests/root/data/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.jpg differ diff --git a/tests/root/data/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.jpg b/tests/root/data/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.jpg new file mode 100644 index 0000000..732e64d Binary files /dev/null and b/tests/root/data/images/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000106.jpg b/tests/root/data/images/#Азов_з_неба_000106.jpg new file mode 100644 index 0000000..f316b2b Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000106.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000113.jpg b/tests/root/data/images/#Азов_з_неба_000113.jpg new file mode 100644 index 0000000..3871e98 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000113.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000148.jpg b/tests/root/data/images/#Азов_з_неба_000148.jpg new file mode 100644 index 0000000..8c54eba Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000148.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000162.jpg b/tests/root/data/images/#Азов_з_неба_000162.jpg new file mode 100644 index 0000000..480b9ac Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000162.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000173.jpg b/tests/root/data/images/#Азов_з_неба_000173.jpg new file mode 100644 index 0000000..ee949db Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000173.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000189.jpg b/tests/root/data/images/#Азов_з_неба_000189.jpg new file mode 100644 index 0000000..d9e1058 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000189.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000210.jpg b/tests/root/data/images/#Азов_з_неба_000210.jpg new file mode 100644 index 0000000..85de419 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000210.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000245.jpg b/tests/root/data/images/#Азов_з_неба_000245.jpg new file mode 100644 index 0000000..664dfc3 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000245.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000269.jpg b/tests/root/data/images/#Азов_з_неба_000269.jpg new file mode 100644 index 0000000..b765680 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000269.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000307.jpg b/tests/root/data/images/#Азов_з_неба_000307.jpg new file mode 100644 index 0000000..1ad21e2 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000307.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000314.jpg b/tests/root/data/images/#Азов_з_неба_000314.jpg new file mode 100644 index 0000000..89ecc54 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000314.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000320.jpg b/tests/root/data/images/#Азов_з_неба_000320.jpg new file mode 100644 index 0000000..dd66c48 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000320.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000338.jpg b/tests/root/data/images/#Азов_з_неба_000338.jpg new file mode 100644 index 0000000..14744f5 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000338.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000346.jpg b/tests/root/data/images/#Азов_з_неба_000346.jpg new file mode 100644 index 0000000..52986da Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000346.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000347.jpg b/tests/root/data/images/#Азов_з_неба_000347.jpg new file mode 100644 index 0000000..dcedfe7 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000347.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000373.jpg b/tests/root/data/images/#Азов_з_неба_000373.jpg new file mode 100644 index 0000000..49ac20a Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000373.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000380.jpg b/tests/root/data/images/#Азов_з_неба_000380.jpg new file mode 100644 index 0000000..27dd8d8 Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000380.jpg differ diff --git a/tests/root/data/images/#Азов_з_неба_000403.jpg b/tests/root/data/images/#Азов_з_неба_000403.jpg new file mode 100644 index 0000000..4774f3c Binary files /dev/null and b/tests/root/data/images/#Азов_з_неба_000403.jpg differ diff --git a/tests/root/data/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.txt b/tests/root/data/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.txt new file mode 100644 index 0000000..f0231e6 --- /dev/null +++ b/tests/root/data/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001226.txt @@ -0,0 +1 @@ +0 0.63516 0.76054 0.05480 0.09305 \ No newline at end of file diff --git a/tests/root/data/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.txt b/tests/root/data/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.txt new file mode 100644 index 0000000..e03dfe2 --- /dev/null +++ b/tests/root/data/labels/!Працюєпершийсамохіднийартилерійськийдивізіон.К_001278.txt @@ -0,0 +1 @@ +0 0.61853 0.77915 0.05973 0.09305 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000106.txt b/tests/root/data/labels/#Азов_з_неба_000106.txt new file mode 100644 index 0000000..304e549 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000106.txt @@ -0,0 +1 @@ +0 0.47200 0.78007 0.26215 0.42338 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000113.txt b/tests/root/data/labels/#Азов_з_неба_000113.txt new file mode 100644 index 0000000..104c11e --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000113.txt @@ -0,0 +1,2 @@ +0 0.76062 0.31074 0.08738 0.13238 +0 0.79600 0.20352 0.04985 0.08424 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000148.txt b/tests/root/data/labels/#Азов_з_неба_000148.txt new file mode 100644 index 0000000..67e97f5 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000148.txt @@ -0,0 +1,6 @@ +6 0.52646 0.69638 0.04738 0.18161 +6 0.56554 0.69856 0.03077 0.16848 +6 0.59908 0.69311 0.03754 0.17263 +6 0.70000 0.74293 0.04738 0.17176 +6 0.77046 0.69638 0.03077 0.14441 +6 0.73538 0.70190 0.03077 0.14660 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000162.txt b/tests/root/data/labels/#Азов_з_неба_000162.txt new file mode 100644 index 0000000..2216408 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000162.txt @@ -0,0 +1,4 @@ +6 0.63569 0.46827 0.04185 0.13675 +6 0.68492 0.38403 0.04800 0.13019 +6 0.61569 0.36161 0.03877 0.10065 +6 0.56708 0.44147 0.04985 0.12253 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000173.txt b/tests/root/data/labels/#Азов_з_неба_000173.txt new file mode 100644 index 0000000..08ac396 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000173.txt @@ -0,0 +1 @@ +3 0.74738 0.58588 0.33415 0.49450 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000189.txt b/tests/root/data/labels/#Азов_з_неба_000189.txt new file mode 100644 index 0000000..546bd29 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000189.txt @@ -0,0 +1 @@ +0 0.91200 0.50492 0.09846 0.08971 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000210.txt b/tests/root/data/labels/#Азов_з_неба_000210.txt new file mode 100644 index 0000000..5a5eefa --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000210.txt @@ -0,0 +1 @@ +0 0.87846 0.50930 0.09785 0.07658 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000245.txt b/tests/root/data/labels/#Азов_з_неба_000245.txt new file mode 100644 index 0000000..7b9d776 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000245.txt @@ -0,0 +1 @@ +2 0.53169 0.44475 0.06523 0.12691 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000269.txt b/tests/root/data/labels/#Азов_з_неба_000269.txt new file mode 100644 index 0000000..a6dd311 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000269.txt @@ -0,0 +1 @@ +2 0.51323 0.45679 0.08369 0.08752 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000307.txt b/tests/root/data/labels/#Азов_з_неба_000307.txt new file mode 100644 index 0000000..f6614c0 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000307.txt @@ -0,0 +1 @@ +0 0.90154 0.37309 0.08246 0.10831 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000314.txt b/tests/root/data/labels/#Азов_з_неба_000314.txt new file mode 100644 index 0000000..dfe2158 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000314.txt @@ -0,0 +1,4 @@ +6 0.35723 0.48851 0.03015 0.12253 +6 0.21415 0.44147 0.02954 0.11597 +6 0.24277 0.44147 0.02769 0.11597 +6 0.28892 0.44530 0.02769 0.08205 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000320.txt b/tests/root/data/labels/#Азов_з_неба_000320.txt new file mode 100644 index 0000000..b9f65f6 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000320.txt @@ -0,0 +1 @@ +3 0.24338 0.33097 0.16308 0.21443 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000338.txt b/tests/root/data/labels/#Азов_з_неба_000338.txt new file mode 100644 index 0000000..f15f033 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000338.txt @@ -0,0 +1 @@ +3 0.53200 0.18055 0.07815 0.08862 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000346.txt b/tests/root/data/labels/#Азов_з_неба_000346.txt new file mode 100644 index 0000000..cfb31fd --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000346.txt @@ -0,0 +1 @@ +1 0.63538 0.20571 0.05354 0.11050 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000347.txt b/tests/root/data/labels/#Азов_з_неба_000347.txt new file mode 100644 index 0000000..bccb5e5 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000347.txt @@ -0,0 +1 @@ +3 0.70092 0.15046 0.04431 0.06564 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000373.txt b/tests/root/data/labels/#Азов_з_неба_000373.txt new file mode 100644 index 0000000..5415a80 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000373.txt @@ -0,0 +1 @@ +1 0.28431 0.81398 0.12677 0.33805 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000380.txt b/tests/root/data/labels/#Азов_з_неба_000380.txt new file mode 100644 index 0000000..194c6ac --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000380.txt @@ -0,0 +1,2 @@ +0 0.33108 0.75600 0.23262 0.35556 +6 0.10862 0.51641 0.03385 0.09080 \ No newline at end of file diff --git a/tests/root/data/labels/#Азов_з_неба_000403.txt b/tests/root/data/labels/#Азов_з_неба_000403.txt new file mode 100644 index 0000000..035b823 --- /dev/null +++ b/tests/root/data/labels/#Азов_з_неба_000403.txt @@ -0,0 +1 @@ +0 0.41138 0.64113 0.29108 0.21552 \ No newline at end of file diff --git a/tests/test_augmentation.py b/tests/test_augmentation.py deleted file mode 100644 index 88ca3c9..0000000 --- a/tests/test_augmentation.py +++ /dev/null @@ -1,267 +0,0 @@ -import random -import shutil -from pathlib import Path - -import cv2 -import numpy as np - -from tests.conftest import apply_constants_patch - - -def _patch_augmentation_paths(monkeypatch, base: Path): - apply_constants_patch(monkeypatch, base) - - -def _seed(): - random.seed(42) - np.random.seed(42) - - -def _augment_annotation_with_total(monkeypatch): - import augmentation as aug - - orig = aug.Augmentator.augment_annotation - - def wrapped(self, image_file): - self.total_to_process = self.total_images_to_process - return orig(self, image_file) - - monkeypatch.setattr(aug.Augmentator, "augment_annotation", wrapped) - - -def test_bt_aug_01_augment_inner_returns_eight_image_labels( - tmp_path, monkeypatch, fixture_images_dir, fixture_labels_dir -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _seed() - from augmentation import Augmentator - - stem = sorted(fixture_images_dir.glob("*.jpg"))[0].stem - img_path = fixture_images_dir / f"{stem}.jpg" - lbl_path = fixture_labels_dir / f"{stem}.txt" - img = cv2.imdecode(np.fromfile(str(img_path), dtype=np.uint8), cv2.IMREAD_COLOR) - aug = Augmentator() - labels = aug.read_labels(lbl_path) - proc_img = Path(tmp_path) / "azaion" / "data-processed" / "images" / f"{stem}.jpg" - proc_lbl = Path(tmp_path) / "azaion" / "data-processed" / "labels" / f"{stem}.txt" - proc_img.parent.mkdir(parents=True, exist_ok=True) - proc_lbl.parent.mkdir(parents=True, exist_ok=True) - from dto.imageLabel import ImageLabel - - img_ann = ImageLabel( - image_path=str(proc_img), - image=img, - labels_path=str(proc_lbl), - labels=labels, - ) - # Act - out = aug.augment_inner(img_ann) - # Assert - assert len(out) == 8 - - -def test_bt_aug_02_naming_convention(tmp_path, monkeypatch, fixture_images_dir, fixture_labels_dir): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _seed() - from augmentation import Augmentator - from dto.imageLabel import ImageLabel - - stem = "test_image" - proc_img = Path(tmp_path) / "azaion" / "data-processed" / "images" / f"{stem}.jpg" - proc_lbl = Path(tmp_path) / "azaion" / "data-processed" / "labels" / f"{stem}.txt" - proc_img.parent.mkdir(parents=True, exist_ok=True) - proc_lbl.parent.mkdir(parents=True, exist_ok=True) - src_img = sorted(fixture_images_dir.glob("*.jpg"))[0] - img = cv2.imdecode(np.fromfile(str(src_img), dtype=np.uint8), cv2.IMREAD_COLOR) - lbl_path = fixture_labels_dir / f"{src_img.stem}.txt" - labels = Augmentator().read_labels(lbl_path) - aug = Augmentator() - img_ann = ImageLabel( - image_path=str(proc_img), - image=img, - labels_path=str(proc_lbl), - labels=labels, - ) - # Act - out = aug.augment_inner(img_ann) - # Assert - names = [Path(o.image_path).name for o in out] - expected = [f"{stem}.jpg"] + [f"{stem}_{i}.jpg" for i in range(1, 8)] - assert names == expected - lbl_names = [Path(o.labels_path).name for o in out] - expected_lbl = [f"{stem}.txt"] + [f"{stem}_{i}.txt" for i in range(1, 8)] - assert lbl_names == expected_lbl - - -def _all_coords_in_unit(labels_list): - for row in labels_list: - for j in range(4): - v = float(row[j]) - if v < 0.0 or v > 1.0: - return False - return True - - -def test_bt_aug_03_all_bbox_coords_in_zero_one( - tmp_path, monkeypatch, fixture_images_dir, fixture_labels_dir -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _seed() - from augmentation import Augmentator - from dto.imageLabel import ImageLabel - - stem = sorted(fixture_images_dir.glob("*.jpg"))[0].stem - proc_img = Path(tmp_path) / "azaion" / "data-processed" / "images" / f"{stem}.jpg" - proc_lbl = Path(tmp_path) / "azaion" / "data-processed" / "labels" / f"{stem}.txt" - proc_img.parent.mkdir(parents=True, exist_ok=True) - proc_lbl.parent.mkdir(parents=True, exist_ok=True) - img_path = fixture_images_dir / f"{stem}.jpg" - lbl_path = fixture_labels_dir / f"{stem}.txt" - img = cv2.imdecode(np.fromfile(str(img_path), dtype=np.uint8), cv2.IMREAD_COLOR) - aug = Augmentator() - labels = aug.read_labels(lbl_path) - img_ann = ImageLabel( - image_path=str(proc_img), - image=img, - labels_path=str(proc_lbl), - labels=labels, - ) - # Act - out = aug.augment_inner(img_ann) - # Assert - for o in out: - for row in o.labels: - assert len(row) >= 5 - assert _all_coords_in_unit(o.labels) - - -def test_bt_aug_04_correct_bboxes_clips_edge(tmp_path, monkeypatch): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - from augmentation import Augmentator - - aug = Augmentator() - m = aug.correct_margin - inp = [[0.99, 0.5, 0.2, 0.1, 0]] - # Act - res = aug.correct_bboxes(inp) - # Assert - assert len(res) == 1 - x, y, w, h, _ = res[0] - hw, hh = 0.5 * w, 0.5 * h - assert x - hw >= m - 1e-9 - assert x + hw <= 1.0 - m + 1e-9 - assert y - hh >= m - 1e-9 - assert y + hh <= 1.0 - m + 1e-9 - - -def test_bt_aug_05_tiny_bbox_removed_after_clipping(tmp_path, monkeypatch): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - from augmentation import Augmentator - - aug = Augmentator() - inp = [[0.995, 0.5, 0.01, 0.5, 0]] - # Act - res = aug.correct_bboxes(inp) - # Assert - assert res == [] - - -def test_bt_aug_06_empty_label_eight_outputs_empty_labels( - tmp_path, monkeypatch, fixture_images_dir -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _seed() - from augmentation import Augmentator - from dto.imageLabel import ImageLabel - - stem = "empty_case" - proc_img = Path(tmp_path) / "azaion" / "data-processed" / "images" / f"{stem}.jpg" - proc_lbl = Path(tmp_path) / "azaion" / "data-processed" / "labels" / f"{stem}.txt" - proc_img.parent.mkdir(parents=True, exist_ok=True) - proc_lbl.parent.mkdir(parents=True, exist_ok=True) - src_img = sorted(fixture_images_dir.glob("*.jpg"))[0] - img = cv2.imdecode(np.fromfile(str(src_img), dtype=np.uint8), cv2.IMREAD_COLOR) - aug = Augmentator() - img_ann = ImageLabel( - image_path=str(proc_img), - image=img, - labels_path=str(proc_lbl), - labels=[], - ) - # Act - out = aug.augment_inner(img_ann) - # Assert - assert len(out) == 8 - for o in out: - assert o.labels == [] - - -def test_bt_aug_07_full_pipeline_five_images_forty_outputs( - tmp_path, monkeypatch, sample_images_labels -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _augment_annotation_with_total(monkeypatch) - _seed() - import constants as c - from augmentation import Augmentator - - img_dir = Path(c.config.data_images_dir) - lbl_dir = Path(c.config.data_labels_dir) - img_dir.mkdir(parents=True, exist_ok=True) - lbl_dir.mkdir(parents=True, exist_ok=True) - src_img, src_lbl = sample_images_labels(5) - for p in src_img.glob("*.jpg"): - shutil.copy2(p, img_dir / p.name) - for p in src_lbl.glob("*.txt"): - shutil.copy2(p, lbl_dir / p.name) - # Act - Augmentator().augment_annotations() - # Assert - proc_img = Path(c.config.processed_images_dir) - proc_lbl = Path(c.config.processed_labels_dir) - assert len(list(proc_img.glob("*.jpg"))) == 40 - assert len(list(proc_lbl.glob("*.txt"))) == 40 - - -def test_bt_aug_08_skips_already_processed(tmp_path, monkeypatch, sample_images_labels): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _augment_annotation_with_total(monkeypatch) - _seed() - import constants as c - from augmentation import Augmentator - - img_dir = Path(c.config.data_images_dir) - lbl_dir = Path(c.config.data_labels_dir) - proc_img = Path(c.config.processed_images_dir) - proc_lbl = Path(c.config.processed_labels_dir) - img_dir.mkdir(parents=True, exist_ok=True) - lbl_dir.mkdir(parents=True, exist_ok=True) - proc_img.mkdir(parents=True, exist_ok=True) - proc_lbl.mkdir(parents=True, exist_ok=True) - src_img, src_lbl = sample_images_labels(5) - jpgs = sorted(src_img.glob("*.jpg")) - for p in jpgs: - shutil.copy2(p, img_dir / p.name) - for p in src_lbl.glob("*.txt"): - shutil.copy2(p, lbl_dir / p.name) - markers = [] - for p in jpgs[:3]: - dst = proc_img / p.name - shutil.copy2(p, dst) - markers.append(dst.read_bytes()) - # Act - Augmentator().augment_annotations() - # Assert - after_jpgs = list(proc_img.glob("*.jpg")) - assert len(after_jpgs) == 19 - assert len(list(proc_lbl.glob("*.txt"))) == 16 - for i, p in enumerate(jpgs[:3]): - assert (proc_img / p.name).read_bytes() == markers[i] diff --git a/tests/test_augmentation_nonfunc.py b/tests/test_augmentation_nonfunc.py deleted file mode 100644 index b2bf7a6..0000000 --- a/tests/test_augmentation_nonfunc.py +++ /dev/null @@ -1,143 +0,0 @@ -import random -import shutil -from pathlib import Path -from types import SimpleNamespace - -import cv2 -import numpy as np -import pytest - -from tests.conftest import apply_constants_patch - - -def _patch_augmentation_paths(monkeypatch, base: Path): - apply_constants_patch(monkeypatch, base) - - -def _augment_annotation_with_total(monkeypatch): - import augmentation as aug - - orig = aug.Augmentator.augment_annotation - - def wrapped(self, image_file): - self.total_to_process = self.total_images_to_process - return orig(self, image_file) - - monkeypatch.setattr(aug.Augmentator, "augment_annotation", wrapped) - - -def _seed(): - random.seed(42) - np.random.seed(42) - - -@pytest.mark.resilience -def test_rt_aug_01_corrupted_image_skipped( - tmp_path, monkeypatch, fixture_images_dir, fixture_labels_dir -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _augment_annotation_with_total(monkeypatch) - _seed() - import constants as c - from augmentation import Augmentator - - img_dir = Path(c.config.data_images_dir) - lbl_dir = Path(c.config.data_labels_dir) - img_dir.mkdir(parents=True, exist_ok=True) - lbl_dir.mkdir(parents=True, exist_ok=True) - stem = sorted(fixture_images_dir.glob("*.jpg"))[0].stem - shutil.copy2(fixture_images_dir / f"{stem}.jpg", img_dir / f"{stem}.jpg") - shutil.copy2(fixture_labels_dir / f"{stem}.txt", lbl_dir / f"{stem}.txt") - raw = (fixture_images_dir / f"{stem}.jpg").read_bytes()[:200] - (img_dir / "corrupted_trunc.jpg").write_bytes(raw) - # Act - Augmentator().augment_annotations() - # Assert - proc_img = Path(c.config.processed_images_dir) - assert len(list(proc_img.glob("*.jpg"))) == 8 - - -@pytest.mark.resilience -def test_rt_aug_02_missing_label_no_crash(tmp_path, monkeypatch, fixture_images_dir): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _augment_annotation_with_total(monkeypatch) - import constants as c - from augmentation import Augmentator - - img_dir = Path(c.config.data_images_dir) - lbl_dir = Path(c.config.data_labels_dir) - img_dir.mkdir(parents=True, exist_ok=True) - lbl_dir.mkdir(parents=True, exist_ok=True) - stem = "no_label_here" - shutil.copy2(sorted(fixture_images_dir.glob("*.jpg"))[0], img_dir / f"{stem}.jpg") - aug = Augmentator() - aug.total_images_to_process = 1 - # Act - aug.augment_annotation(SimpleNamespace(name=f"{stem}.jpg")) - # Assert - assert len(list(Path(c.config.processed_images_dir).glob("*.jpg"))) == 0 - - -@pytest.mark.resilience -def test_rt_aug_03_narrow_bbox_fewer_or_eight_variants( - tmp_path, monkeypatch, fixture_images_dir -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _seed() - from augmentation import Augmentator - from dto.imageLabel import ImageLabel - - stem = "narrow_bbox" - proc_img = Path(tmp_path) / "azaion" / "data-processed" / "images" / f"{stem}.jpg" - proc_lbl = Path(tmp_path) / "azaion" / "data-processed" / "labels" / f"{stem}.txt" - proc_img.parent.mkdir(parents=True, exist_ok=True) - proc_lbl.parent.mkdir(parents=True, exist_ok=True) - src_img = sorted(fixture_images_dir.glob("*.jpg"))[0] - img = cv2.imdecode(np.fromfile(str(src_img), dtype=np.uint8), cv2.IMREAD_COLOR) - aug = Augmentator() - labels = [[0.5, 0.5, 0.0005, 0.0005, 0]] - img_ann = ImageLabel( - image_path=str(proc_img), - image=img, - labels_path=str(proc_lbl), - labels=labels, - ) - # Act - out = aug.augment_inner(img_ann) - # Assert - assert 1 <= len(out) <= 8 - - -@pytest.mark.resource_limit -def test_rl_aug_01_augment_inner_exactly_eight_outputs( - tmp_path, monkeypatch, fixture_images_dir, fixture_labels_dir -): - # Arrange - _patch_augmentation_paths(monkeypatch, tmp_path) - _seed() - from augmentation import Augmentator - from dto.imageLabel import ImageLabel - - stem = sorted(fixture_images_dir.glob("*.jpg"))[0].stem - img_path = fixture_images_dir / f"{stem}.jpg" - lbl_path = fixture_labels_dir / f"{stem}.txt" - img = cv2.imdecode(np.fromfile(str(img_path), dtype=np.uint8), cv2.IMREAD_COLOR) - aug = Augmentator() - labels = aug.read_labels(lbl_path) - proc_img = Path(tmp_path) / "azaion" / "data-processed" / "images" / f"{stem}.jpg" - proc_lbl = Path(tmp_path) / "azaion" / "data-processed" / "labels" / f"{stem}.txt" - proc_img.parent.mkdir(parents=True, exist_ok=True) - proc_lbl.parent.mkdir(parents=True, exist_ok=True) - img_ann = ImageLabel( - image_path=str(proc_img), - image=img, - labels_path=str(proc_lbl), - labels=labels, - ) - # Act - out = aug.augment_inner(img_ann) - # Assert - assert len(out) == 8 diff --git a/tests/test_dataset_formation.py b/tests/test_dataset_formation.py index 46ecb49..4f4ae20 100644 --- a/tests/test_dataset_formation.py +++ b/tests/test_dataset_formation.py @@ -55,15 +55,15 @@ def test_bt_dsf_01_split_ratio_70_20_10( constants_patch, fixture_images_dir, fixture_labels_dir, - 100, + 20, set(), ) # Act train.form_dataset() # Assert - assert _count_jpg(Path(today_ds, "train", "images")) == 70 - assert _count_jpg(Path(today_ds, "valid", "images")) == 20 - assert _count_jpg(Path(today_ds, "test", "images")) == 10 + assert _count_jpg(Path(today_ds, "train", "images")) == 14 + assert _count_jpg(Path(today_ds, "valid", "images")) == 4 + assert _count_jpg(Path(today_ds, "test", "images")) == 2 def test_bt_dsf_02_six_subdirectories( @@ -80,7 +80,7 @@ def test_bt_dsf_02_six_subdirectories( constants_patch, fixture_images_dir, fixture_labels_dir, - 100, + 20, set(), ) # Act @@ -95,7 +95,7 @@ def test_bt_dsf_02_six_subdirectories( assert (base / "test" / "labels").is_dir() -def test_bt_dsf_03_total_files_one_hundred( +def test_bt_dsf_03_total_files_twenty( monkeypatch, tmp_path, constants_patch, @@ -109,7 +109,7 @@ def test_bt_dsf_03_total_files_one_hundred( constants_patch, fixture_images_dir, fixture_labels_dir, - 100, + 20, set(), ) # Act @@ -120,7 +120,7 @@ def test_bt_dsf_03_total_files_one_hundred( + _count_jpg(Path(today_ds, "valid", "images")) + _count_jpg(Path(today_ds, "test", "images")) ) - assert n == 100 + assert n == 20 def test_bt_dsf_04_corrupted_labels_quarantined( @@ -131,7 +131,7 @@ def test_bt_dsf_04_corrupted_labels_quarantined( fixture_labels_dir, ): # Arrange - stems = [p.stem for p in sorted(fixture_images_dir.glob("*.jpg"))[:100]] + stems = [p.stem for p in sorted(fixture_images_dir.glob("*.jpg"))[:20]] corrupt = set(stems[:5]) train, today_ds = _prepare_form_dataset( monkeypatch, @@ -139,7 +139,7 @@ def test_bt_dsf_04_corrupted_labels_quarantined( constants_patch, fixture_images_dir, fixture_labels_dir, - 100, + 20, corrupt, ) # Act @@ -150,7 +150,7 @@ def test_bt_dsf_04_corrupted_labels_quarantined( + _count_jpg(Path(today_ds, "valid", "images")) + _count_jpg(Path(today_ds, "test", "images")) ) - assert split_total == 95 + assert split_total == 15 assert _count_jpg(c_mod.config.corrupted_images_dir) == 5 assert len(list(Path(c_mod.config.corrupted_labels_dir).glob("*.txt"))) == 5 @@ -202,7 +202,7 @@ def test_rl_dsf_02_no_filename_duplication_across_splits( constants_patch, fixture_images_dir, fixture_labels_dir, - 100, + 20, set(), ) # Act @@ -214,4 +214,4 @@ def test_rl_dsf_02_no_filename_duplication_across_splits( for f in (base / split / "images").glob("*.jpg"): names.append(f.name) assert len(names) == len(set(names)) - assert len(names) == 100 + assert len(names) == 20 diff --git a/tests/test_infrastructure.py b/tests/test_infrastructure.py index 1a7a43b..bb22ec5 100644 --- a/tests/test_infrastructure.py +++ b/tests/test_infrastructure.py @@ -3,12 +3,12 @@ import constants as c def test_fixture_images_dir_has_jpegs(fixture_images_dir): jpgs = list(fixture_images_dir.glob("*.jpg")) - assert len(jpgs) == 100 + assert len(jpgs) == 20 def test_fixture_labels_dir_has_yolo_labels(fixture_labels_dir): txts = list(fixture_labels_dir.glob("*.txt")) - assert len(txts) == 100 + assert len(txts) == 20 def test_fixture_onnx_model_bytes(fixture_onnx_model): @@ -54,6 +54,6 @@ def test_empty_label_file(empty_label): def test_constants_patch_uses_tmp(constants_patch, tmp_path): constants_patch(tmp_path) - assert c.config.azaion.startswith(str(tmp_path)) + assert c.config.root.startswith(str(tmp_path)) assert c.config.data_dir.startswith(str(tmp_path)) assert c.config.current_onnx_model.startswith(str(tmp_path)) diff --git a/tests/test_training_e2e.py b/tests/test_training_e2e.py index 40a6a61..21ba7e4 100644 --- a/tests/test_training_e2e.py +++ b/tests/test_training_e2e.py @@ -9,33 +9,18 @@ import constants as c import train as train_mod import exports as exports_mod -_PROJECT_ROOT = Path(__file__).resolve().parent.parent -_DATASET_IMAGES = _PROJECT_ROOT / "_docs/00_problem/input_data/dataset/images" -_DATASET_LABELS = _PROJECT_ROOT / "_docs/00_problem/input_data/dataset/labels" -_CONFIG_TEST = _PROJECT_ROOT / "config.test.yaml" +_TESTS_DIR = Path(__file__).resolve().parent +_TEST_ROOT = _TESTS_DIR / "root" +_DATASET_IMAGES = _TEST_ROOT / "data" / "images" +_CONFIG_TEST = _TESTS_DIR.parent / "config.test.yaml" @pytest.fixture(scope="module") -def e2e_result(tmp_path_factory): - base = tmp_path_factory.mktemp("e2e") - +def e2e_result(): old_config = c.config - c.config = c.Config.from_yaml(str(_CONFIG_TEST), root=str(base / "azaion")) + c.config = c.Config.from_yaml(str(_CONFIG_TEST), root=str(_TEST_ROOT)) - data_img = Path(c.config.data_images_dir) - data_lbl = Path(c.config.data_labels_dir) - data_img.mkdir(parents=True) - data_lbl.mkdir(parents=True) - Path(c.config.models_dir).mkdir(parents=True) - - for img in sorted(_DATASET_IMAGES.glob("*.jpg")): - shutil.copy2(img, data_img / img.name) - lbl = _DATASET_LABELS / f"{img.stem}.txt" - if lbl.exists(): - shutil.copy2(lbl, data_lbl / lbl.name) - - from augmentation import Augmentator - Augmentator().augment_annotations() + Path(c.config.models_dir).mkdir(parents=True, exist_ok=True) train_mod.train_dataset() @@ -48,15 +33,14 @@ def e2e_result(tmp_path_factory): "today_dataset": today_ds, } + shutil.rmtree(c.config.datasets_dir, ignore_errors=True) + shutil.rmtree(c.config.models_dir, ignore_errors=True) + shutil.rmtree(c.config.corrupted_dir, ignore_errors=True) c.config = old_config @pytest.mark.e2e class TestTrainingPipeline: - def test_augmentation_produced_output(self, e2e_result): - proc = Path(c.config.processed_images_dir) - assert len(list(proc.glob("*.jpg"))) == 800 - def test_dataset_formed(self, e2e_result): base = Path(e2e_result["today_dataset"]) for split in ("train", "valid", "test"): @@ -66,7 +50,7 @@ class TestTrainingPipeline: len(list((base / s / "images").glob("*.jpg"))) for s in ("train", "valid", "test") ) - assert total == 800 + assert total == 20 def test_data_yaml_created(self, e2e_result): yaml_path = Path(e2e_result["today_dataset"]) / "data.yaml"