diff --git a/.cursor/README.md b/.cursor/README.md index d9522b4..055abd3 100644 --- a/.cursor/README.md +++ b/.cursor/README.md @@ -7,7 +7,7 @@ Type `/autopilot` to start or continue the full workflow. The orchestrator detects where your project is and picks up from there. ``` -/autopilot — start a new project or continue where you left off +/autopilot (or /auto) — start a new project or continue where you left off ``` If you want to run a specific skill directly (without the orchestrator), use the individual commands: diff --git a/.cursor/skills/autopilot/SKILL.md b/.cursor/skills/autopilot/SKILL.md index 57d39a1..8cec5a5 100644 --- a/.cursor/skills/autopilot/SKILL.md +++ b/.cursor/skills/autopilot/SKILL.md @@ -60,7 +60,7 @@ Every invocation follows this sequence: 3. Cross-check state file against _docs/ folder structure (rules in state.md) 4. Resolve flow (see Flow Resolution above) 5. Resolve current step (detection rules from the active flow file) -6. Present Status Summary (format in protocols.md) +6. Present Status Summary (template in active flow file) 7. Execute: a. Delegate to current skill (see Skill Delegation below) b. If skill returns FAILED → apply Skill Failure Retry Protocol (see protocols.md): @@ -102,37 +102,6 @@ This skill activates when the user wants to: - User wants to document an existing codebase → use `/document` directly - User wants the full guided workflow → use `/autopilot` -## Methodology Quick Reference +## Flow Reference -``` -┌────────────────────────────────────────────────────────────────┐ -│ Autopilot (Auto-Chain Orchestrator) │ -├────────────────────────────────────────────────────────────────┤ -│ EVERY INVOCATION: │ -│ 1. Read state file + module files │ -│ 2. Resolve flow & current step │ -│ 3. Status Summary → Execute → Auto-chain (loop) │ -│ │ -│ GREENFIELD FLOW (flows/greenfield.md): │ -│ Step 0 Problem → Step 1 Research → Step 2 Plan │ -│ → 2a UI Design (if UI) → Step 3 Decompose → [SESSION] │ -│ → Step 4 Implement → Step 5 Run Tests │ -│ → 5b Security (opt) → 5c Perf Test (opt) → Step 6 Deploy │ -│ → DONE │ -│ │ -│ EXISTING CODE FLOW (flows/existing-code.md): │ -│ Pre-Step Document → 2b Test Spec → 2c Decompose Tests │ -│ → [SESSION] → 2d Implement Tests → 2e Refactor │ -│ → 2ea UI Design (if UI) → 2f New Task → [SESSION] │ -│ → 2g Implement → 2h Run Tests → 2hb Security (opt) │ -│ → 2hc Perf Test (opt) → 2i Deploy → DONE │ -│ │ -│ STATE: _docs/_autopilot_state.md (see state.md) │ -│ PROTOCOLS: choice format, Jira auth, errors (see protocols.md) │ -│ PAUSE POINTS: sub-skill BLOCKING gates only │ -│ SESSION BREAK: after Decompose/New Task (before Implement) │ -├────────────────────────────────────────────────────────────────┤ -│ Auto-chain · State to file · Rich re-entry · Delegate │ -│ Pause at decisions only · Minimize interruptions │ -└────────────────────────────────────────────────────────────────┘ -``` +See `flows/greenfield.md` and `flows/existing-code.md` for step tables, detection rules, auto-chain rules, and status summary templates. diff --git a/.cursor/skills/autopilot/flows/existing-code.md b/.cursor/skills/autopilot/flows/existing-code.md index 91e120f..ff31c36 100644 --- a/.cursor/skills/autopilot/flows/existing-code.md +++ b/.cursor/skills/autopilot/flows/existing-code.md @@ -1,25 +1,25 @@ # Existing Code Workflow -Workflow for projects with an existing codebase. Starts with documentation, produces test specs, decomposes and implements tests, refactors with that safety net, then adds new functionality and deploys. +Workflow for projects with an existing codebase. Starts with documentation, produces test specs, decomposes and implements tests, verifies them, refactors with that safety net, then adds new functionality and deploys. ## Step Reference Table -| Step | Name | Sub-Skill | Internal SubSteps | -|------|-------------------------|---------------------------------|---------------------------------------| -| — | Document (pre-step) | document/SKILL.md | Steps 1–8 | -| 2b | Blackbox Test Spec | test-spec/SKILL.md | Phase 1a–1b | -| 2c | Decompose Tests | decompose/SKILL.md (tests-only) | Step 1t + Step 3 + Step 4 | -| 2d | Implement Tests | implement/SKILL.md | (batch-driven, no fixed sub-steps) | -| 2e | Refactor | refactor/SKILL.md | Phases 0–5 (6-phase method) | -| 2ea | UI Design | ui-design/SKILL.md | Phase 0–8 (conditional — UI projects only) | -| 2f | New Task | new-task/SKILL.md | Steps 1–8 (loop) | -| 2g | Implement | implement/SKILL.md | (batch-driven, no fixed sub-steps) | -| 2h | Run Tests | (autopilot-managed) | Unit tests → Blackbox tests | -| 2hb | Security Audit | security/SKILL.md | Phase 1–5 (optional) | -| 2hc | Performance Test | (autopilot-managed) | Load/stress tests (optional) | -| 2i | Deploy | deploy/SKILL.md | Steps 1–7 | +| Step | Name | Sub-Skill | Internal SubSteps | +|------|------|-----------|-------------------| +| 1 | Document | document/SKILL.md | Steps 1–8 | +| 2 | Test Spec | test-spec/SKILL.md | Phase 1a–1b | +| 3 | Decompose Tests | decompose/SKILL.md (tests-only) | Step 1t + Step 3 + Step 4 | +| 4 | Implement Tests | implement/SKILL.md | (batch-driven, no fixed sub-steps) | +| 5 | Run Tests | test-run/SKILL.md | Steps 1–4 | +| 6 | Refactor | refactor/SKILL.md | Phases 0–5 (6-phase method) | +| 7 | New Task | new-task/SKILL.md | Steps 1–8 (loop) | +| 8 | Implement | implement/SKILL.md | (batch-driven, no fixed sub-steps) | +| 9 | Run Tests | test-run/SKILL.md | Steps 1–4 | +| 10 | Security Audit | security/SKILL.md | Phase 1–5 (optional) | +| 11 | Performance Test | (autopilot-managed) | Load/stress tests (optional) | +| 12 | Deploy | deploy/SKILL.md | Step 1–7 | -After Step 2i, the existing-code workflow is complete. +After Step 12, the existing-code workflow is complete. ## Detection Rules @@ -27,30 +27,14 @@ Check rules in order — first match wins. --- -**Pre-Step — Existing Codebase Detection** +**Step 1 — Document** Condition: `_docs/` does not exist AND the workspace contains source code files (e.g., `*.py`, `*.cs`, `*.rs`, `*.ts`, `src/`, `Cargo.toml`, `*.csproj`, `package.json`) -Action: An existing codebase without documentation was detected. Present using Choose format: - -``` -══════════════════════════════════════ - DECISION REQUIRED: Existing codebase detected -══════════════════════════════════════ - A) Start fresh — define the problem from scratch (greenfield workflow) - B) Document existing codebase first — run /document to reverse-engineer docs, then continue -══════════════════════════════════════ - Recommendation: B — the /document skill analyzes your code - bottom-up and produces _docs/ artifacts automatically, - then you can continue with test specs, refactor, and new features. -══════════════════════════════════════ -``` - -- If user picks A → proceed to Step 0 (Problem Gathering) in the greenfield flow -- If user picks B → read and execute `.cursor/skills/document/SKILL.md`. After document skill completes, re-detect state (the produced `_docs/` artifacts will place the project at Step 2b or later). +Action: An existing codebase without documentation was detected. Read and execute `.cursor/skills/document/SKILL.md`. After the document skill completes, re-detect state (the produced `_docs/` artifacts will place the project at Step 2 or later). --- -**Step 2b — Blackbox Test Spec** +**Step 2 — Test Spec** Condition: `_docs/02_document/FINAL_report.md` exists AND workspace contains source code files (e.g., `*.py`, `*.cs`, `*.rs`, `*.ts`) AND `_docs/02_document/tests/traceability-matrix.md` does not exist AND the autopilot state shows Document was run (check `Completed Steps` for "Document" entry) Action: Read and execute `.cursor/skills/test-spec/SKILL.md` @@ -59,7 +43,7 @@ This step applies when the codebase was documented via the `/document` skill. Te --- -**Step 2c — Decompose Tests** +**Step 3 — Decompose Tests** Condition: `_docs/02_document/tests/traceability-matrix.md` exists AND workspace contains source code files AND the autopilot state shows Document was run AND (`_docs/02_tasks/` does not exist or has no task files) Action: Read and execute `.cursor/skills/decompose/SKILL.md` in **tests-only mode** (pass `_docs/02_document/tests/` as input). The decompose skill will: @@ -71,8 +55,8 @@ If `_docs/02_tasks/` has some task files already, the decompose skill's resumabi --- -**Step 2d — Implement Tests** -Condition: `_docs/02_tasks/` contains task files AND `_dependencies_table.md` exists AND the autopilot state shows Step 2c (Decompose Tests) is completed AND `_docs/03_implementation/FINAL_implementation_report.md` does not exist +**Step 4 — Implement Tests** +Condition: `_docs/02_tasks/` contains task files AND `_dependencies_table.md` exists AND the autopilot state shows Step 3 (Decompose Tests) is completed AND `_docs/03_implementation/FINAL_implementation_report.md` does not exist Action: Read and execute `.cursor/skills/implement/SKILL.md` @@ -82,8 +66,17 @@ If `_docs/03_implementation/` has batch reports, the implement skill detects com --- -**Step 2e — Refactor** -Condition: `_docs/03_implementation/FINAL_implementation_report.md` exists AND the autopilot state shows Step 2d (Implement Tests) is completed AND `_docs/04_refactoring/FINAL_report.md` does not exist +**Step 5 — Run Tests** +Condition: `_docs/03_implementation/FINAL_implementation_report.md` exists AND the autopilot state shows Step 4 (Implement Tests) is completed AND the autopilot state does NOT show Step 5 (Run Tests) as completed + +Action: Read and execute `.cursor/skills/test-run/SKILL.md` + +Verifies the implemented test suite passes before proceeding to refactoring. The tests form the safety net for all subsequent code changes. + +--- + +**Step 6 — Refactor** +Condition: the autopilot state shows Step 5 (Run Tests) is completed AND `_docs/04_refactoring/FINAL_report.md` does not exist Action: Read and execute `.cursor/skills/refactor/SKILL.md` @@ -93,37 +86,8 @@ If `_docs/04_refactoring/` has phase reports, the refactor skill detects complet --- -**Step 2ea — UI Design (conditional)** -Condition: the autopilot state shows Step 2e (Refactor) is completed AND the autopilot state does NOT show Step 2ea (UI Design) as completed or skipped - -**UI Project Detection** — the project is a UI project if ANY of the following are true: -- `package.json` exists in the workspace root or any subdirectory -- `*.html`, `*.jsx`, `*.tsx` files exist in the workspace -- `_docs/02_document/components/` contains a component whose `description.md` mentions UI, frontend, page, screen, dashboard, form, or view -- `_docs/02_document/architecture.md` mentions frontend, UI layer, SPA, or client-side rendering - -If the project is NOT a UI project → mark Step 2ea as `skipped` in the state file and auto-chain to Step 2f. - -If the project IS a UI project → present using Choose format: - -``` -══════════════════════════════════════ - DECISION REQUIRED: UI project detected — generate/update mockups? -══════════════════════════════════════ - A) Generate UI mockups before new task planning (recommended) - B) Skip — proceed directly to new task -══════════════════════════════════════ - Recommendation: A — mockups inform better frontend task specs -══════════════════════════════════════ -``` - -- If user picks A → Read and execute `.cursor/skills/ui-design/SKILL.md`. After completion, auto-chain to Step 2f (New Task). -- If user picks B → Mark Step 2ea as `skipped` in the state file, auto-chain to Step 2f (New Task). - ---- - -**Step 2f — New Task** -Condition: (the autopilot state shows Step 2ea (UI Design) is completed or skipped) AND the autopilot state does NOT show Step 2f (New Task) as completed +**Step 7 — New Task** +Condition: the autopilot state shows Step 6 (Refactor) is completed AND the autopilot state does NOT show Step 7 (New Task) as completed Action: Read and execute `.cursor/skills/new-task/SKILL.md` @@ -131,46 +95,26 @@ The new-task skill interactively guides the user through defining new functional --- -**Step 2g — Implement** -Condition: the autopilot state shows Step 2f (New Task) is completed AND `_docs/03_implementation/` does not contain a FINAL report covering the new tasks (check state for distinction between test implementation and feature implementation) +**Step 8 — Implement** +Condition: the autopilot state shows Step 7 (New Task) is completed AND `_docs/03_implementation/` does not contain a FINAL report covering the new tasks (check state for distinction between test implementation and feature implementation) Action: Read and execute `.cursor/skills/implement/SKILL.md` -The implement skill reads the new tasks from `_docs/02_tasks/` and implements them. Tasks already implemented in Step 2d are skipped (the implement skill tracks completed tasks in batch reports). +The implement skill reads the new tasks from `_docs/02_tasks/` and implements them. Tasks already implemented in Step 4 are skipped (the implement skill tracks completed tasks in batch reports). If `_docs/03_implementation/` has batch reports from this phase, the implement skill detects completed tasks and continues. --- -**Step 2h — Run Tests** -Condition: the autopilot state shows Step 2g (Implement) is completed AND the autopilot state does NOT show Step 2h (Run Tests) as completed +**Step 9 — Run Tests** +Condition: the autopilot state shows Step 8 (Implement) is completed AND the autopilot state does NOT show Step 9 (Run Tests) as completed -Action: Run the full test suite to verify the implementation before deployment. - -1. If `scripts/run-tests.sh` exists (generated by the test-spec skill Phase 4), execute it -2. Otherwise, detect the project's test runner manually (e.g., `pytest`, `dotnet test`, `cargo test`, `npm test`) and run all unit tests; if `docker-compose.test.yml` or an equivalent test environment exists, spin it up and run the blackbox test suite -3. **Report results**: present a summary of passed/failed/skipped tests - -If all tests pass → auto-chain to Step 2hb (Security Audit). - -If tests fail → present using Choose format: - -``` -══════════════════════════════════════ - TEST RESULTS: [N passed, M failed, K skipped] -══════════════════════════════════════ - A) Fix failing tests and re-run - B) Proceed to deploy anyway (not recommended) - C) Abort — fix manually -══════════════════════════════════════ - Recommendation: A — fix failures before deploying -══════════════════════════════════════ -``` +Action: Read and execute `.cursor/skills/test-run/SKILL.md` --- -**Step 2hb — Security Audit (optional)** -Condition: the autopilot state shows Step 2h (Run Tests) is completed AND the autopilot state does NOT show Step 2hb (Security Audit) as completed or skipped AND (`_docs/04_deploy/` does not exist or is incomplete) +**Step 10 — Security Audit (optional)** +Condition: the autopilot state shows Step 9 (Run Tests) is completed AND the autopilot state does NOT show Step 10 (Security Audit) as completed or skipped AND (`_docs/04_deploy/` does not exist or is incomplete) Action: Present using Choose format: @@ -185,13 +129,13 @@ Action: Present using Choose format: ══════════════════════════════════════ ``` -- If user picks A → Read and execute `.cursor/skills/security/SKILL.md`. After completion, auto-chain to Step 2i (Deploy). -- If user picks B → Mark Step 2hb as `skipped` in the state file, auto-chain to Step 2i (Deploy). +- If user picks A → Read and execute `.cursor/skills/security/SKILL.md`. After completion, auto-chain to Step 11 (Performance Test). +- If user picks B → Mark Step 10 as `skipped` in the state file, auto-chain to Step 11 (Performance Test). --- -**Step 2hc — Performance Test (optional)** -Condition: the autopilot state shows Step 2hb (Security Audit) is completed or skipped AND the autopilot state does NOT show Step 2hc (Performance Test) as completed or skipped AND (`_docs/04_deploy/` does not exist or is incomplete) +**Step 11 — Performance Test (optional)** +Condition: the autopilot state shows Step 10 (Security Audit) is completed or skipped AND the autopilot state does NOT show Step 11 (Performance Test) as completed or skipped AND (`_docs/04_deploy/` does not exist or is incomplete) Action: Present using Choose format: @@ -212,13 +156,13 @@ Action: Present using Choose format: 2. Otherwise, check if `_docs/02_document/tests/performance-tests.md` exists for test scenarios, detect appropriate load testing tool (k6, locust, artillery, wrk, or built-in benchmarks), and execute performance test scenarios against the running system 3. Present results vs acceptance criteria thresholds 4. If thresholds fail → present Choose format: A) Fix and re-run, B) Proceed anyway, C) Abort - 5. After completion, auto-chain to Step 2i (Deploy) -- If user picks B → Mark Step 2hc as `skipped` in the state file, auto-chain to Step 2i (Deploy). + 5. After completion, auto-chain to Step 12 (Deploy) +- If user picks B → Mark Step 11 as `skipped` in the state file, auto-chain to Step 12 (Deploy). --- -**Step 2i — Deploy** -Condition: the autopilot state shows Step 2h (Run Tests) is completed AND (Step 2hb is completed or skipped) AND (Step 2hc is completed or skipped) AND (`_docs/04_deploy/` does not exist or is incomplete) +**Step 12 — Deploy** +Condition: the autopilot state shows Step 9 (Run Tests) is completed AND (Step 10 is completed or skipped) AND (Step 11 is completed or skipped) AND (`_docs/04_deploy/` does not exist or is incomplete) Action: Read and execute `.cursor/skills/deploy/SKILL.md` @@ -227,7 +171,7 @@ After deployment completes, the existing-code workflow is done. --- **Re-Entry After Completion** -Condition: the autopilot state shows `step: done` OR all steps through 2i (Deploy) are completed +Condition: the autopilot state shows `step: done` OR all steps through 12 (Deploy) are completed Action: The project completed a full cycle. Present status and loop back to New Task: @@ -243,22 +187,48 @@ Action: The project completed a full cycle. Present status and loop back to New ══════════════════════════════════════ ``` -- If user picks A → set `step: 2f`, `status: not_started` in the state file, then auto-chain to Step 2f (New Task). Previous cycle history stays in Completed Steps. +- If user picks A → set `step: 7`, `status: not_started` in the state file, then auto-chain to Step 7 (New Task). Previous cycle history stays in Completed Steps. - If user picks B → report final project status and exit. ## Auto-Chain Rules | Completed Step | Next Action | |---------------|-------------| -| Document (existing code) | Auto-chain → Blackbox Test Spec (Step 2b) | -| Blackbox Test Spec (Step 2b) | Auto-chain → Decompose Tests (Step 2c) | -| Decompose Tests (Step 2c) | **Session boundary** — suggest new conversation before Implement Tests | -| Implement Tests (Step 2d) | Auto-chain → Refactor (Step 2e) | -| Refactor (Step 2e) | Auto-chain → UI Design detection (Step 2ea) | -| UI Design (Step 2ea, done or skipped) | Auto-chain → New Task (Step 2f) | -| New Task (Step 2f) | **Session boundary** — suggest new conversation before Implement | -| Implement (Step 2g) | Auto-chain → Run Tests (Step 2h) | -| Run Tests (Step 2h, all pass) | Auto-chain → Security Audit choice (Step 2hb) | -| Security Audit (Step 2hb, done or skipped) | Auto-chain → Performance Test choice (Step 2hc) | -| Performance Test (Step 2hc, done or skipped) | Auto-chain → Deploy (Step 2i) | -| Deploy (Step 2i) | **Workflow complete** — existing-code flow done | +| Document (1) | Auto-chain → Test Spec (2) | +| Test Spec (2) | Auto-chain → Decompose Tests (3) | +| Decompose Tests (3) | **Session boundary** — suggest new conversation before Implement Tests | +| Implement Tests (4) | Auto-chain → Run Tests (5) | +| Run Tests (5, all pass) | Auto-chain → Refactor (6) | +| Refactor (6) | Auto-chain → New Task (7) | +| New Task (7) | **Session boundary** — suggest new conversation before Implement | +| Implement (8) | Auto-chain → Run Tests (9) | +| Run Tests (9, all pass) | Auto-chain → Security Audit choice (10) | +| Security Audit (10, done or skipped) | Auto-chain → Performance Test choice (11) | +| Performance Test (11, done or skipped) | Auto-chain → Deploy (12) | +| Deploy (12) | **Workflow complete** — existing-code flow done | + +## Status Summary Template + +``` +═══════════════════════════════════════════════════ + AUTOPILOT STATUS (existing-code) +═══════════════════════════════════════════════════ + Step 1 Document [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 2 Test Spec [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 3 Decompose Tests [DONE (N tasks) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 4 Implement Tests [DONE / IN PROGRESS (batch M) / NOT STARTED / FAILED (retry N/3)] + Step 5 Run Tests [DONE (N passed, M failed) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 6 Refactor [DONE / IN PROGRESS (phase N) / NOT STARTED / FAILED (retry N/3)] + Step 7 New Task [DONE (N tasks) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 8 Implement [DONE / IN PROGRESS (batch M of ~N) / NOT STARTED / FAILED (retry N/3)] + Step 9 Run Tests [DONE (N passed, M failed) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 10 Security Audit [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 11 Performance Test [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 12 Deploy [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] +═══════════════════════════════════════════════════ + Current: Step N — Name + SubStep: M — [sub-skill internal step name] + Retry: [N/3 if retrying, omit if 0] + Action: [what will happen next] +═══════════════════════════════════════════════════ +``` diff --git a/.cursor/skills/autopilot/flows/greenfield.md b/.cursor/skills/autopilot/flows/greenfield.md index 859094d..04bf16f 100644 --- a/.cursor/skills/autopilot/flows/greenfield.md +++ b/.cursor/skills/autopilot/flows/greenfield.md @@ -1,21 +1,21 @@ # Greenfield Workflow -Workflow for new projects built from scratch. Flows linearly: Problem → Research → Plan → UI Design (if applicable) → Decompose → Implement → Run Tests → Security Audit (optional) → Deploy. +Workflow for new projects built from scratch. Flows linearly: Problem → Research → Plan → UI Design (if applicable) → Decompose → Implement → Run Tests → Security Audit (optional) → Performance Test (optional) → Deploy. ## Step Reference Table -| Step | Name | Sub-Skill | Internal SubSteps | -|------|-----------|------------------------|---------------------------------------| -| 0 | Problem | problem/SKILL.md | Phase 1–4 | -| 1 | Research | research/SKILL.md | Mode A: Phase 1–4 · Mode B: Step 0–8 | -| 2 | Plan | plan/SKILL.md | Step 1–6 + Final | -| 2a | UI Design | ui-design/SKILL.md | Phase 0–8 (conditional — UI projects only) | -| 3 | Decompose | decompose/SKILL.md | Step 1–4 | -| 4 | Implement | implement/SKILL.md | (batch-driven, no fixed sub-steps) | -| 5 | Run Tests | (autopilot-managed) | Unit tests → Blackbox tests | -| 5b | Security Audit | security/SKILL.md | Phase 1–5 (optional) | -| 5c | Performance Test | (autopilot-managed) | Load/stress tests (optional) | -| 6 | Deploy | deploy/SKILL.md | Step 1–7 | +| Step | Name | Sub-Skill | Internal SubSteps | +|------|------|-----------|-------------------| +| 1 | Problem | problem/SKILL.md | Phase 1–4 | +| 2 | Research | research/SKILL.md | Mode A: Phase 1–4 · Mode B: Step 0–8 | +| 3 | Plan | plan/SKILL.md | Step 1–6 + Final | +| 4 | UI Design | ui-design/SKILL.md | Phase 0–8 (conditional — UI projects only) | +| 5 | Decompose | decompose/SKILL.md | Step 1–4 | +| 6 | Implement | implement/SKILL.md | (batch-driven, no fixed sub-steps) | +| 7 | Run Tests | test-run/SKILL.md | Steps 1–4 | +| 8 | Security Audit | security/SKILL.md | Phase 1–5 (optional) | +| 9 | Performance Test | (autopilot-managed) | Load/stress tests (optional) | +| 10 | Deploy | deploy/SKILL.md | Step 1–7 | ## Detection Rules @@ -23,7 +23,7 @@ Check rules in order — first match wins. --- -**Step 0 — Problem Gathering** +**Step 1 — Problem Gathering** Condition: `_docs/00_problem/` does not exist, OR any of these are missing/empty: - `problem.md` - `restrictions.md` @@ -34,14 +34,14 @@ Action: Read and execute `.cursor/skills/problem/SKILL.md` --- -**Step 1 — Research (Initial)** +**Step 2 — Research (Initial)** Condition: `_docs/00_problem/` is complete AND `_docs/01_solution/` has no `solution_draft*.md` files Action: Read and execute `.cursor/skills/research/SKILL.md` (will auto-detect Mode A) --- -**Step 1b — Research Decision** +**Research Decision** (inline gate between Step 2 and Step 3) Condition: `_docs/01_solution/` contains `solution_draft*.md` files AND `_docs/01_solution/solution.md` does not exist AND `_docs/02_document/architecture.md` does not exist Action: Present the current research state to the user: @@ -63,11 +63,11 @@ Then present using the **Choose format**: ``` - If user picks A → Read and execute `.cursor/skills/research/SKILL.md` (will auto-detect Mode B) -- If user picks B → auto-chain to Step 2 (Plan) +- If user picks B → auto-chain to Step 3 (Plan) --- -**Step 2 — Plan** +**Step 3 — Plan** Condition: `_docs/01_solution/` has `solution_draft*.md` files AND `_docs/02_document/architecture.md` does not exist Action: @@ -78,8 +78,8 @@ If `_docs/02_document/` exists but is incomplete (has some artifacts but no `FIN --- -**Step 2a — UI Design (conditional)** -Condition: `_docs/02_document/architecture.md` exists AND the autopilot state does NOT show Step 2a (UI Design) as completed or skipped AND the project is a UI project +**Step 4 — UI Design (conditional)** +Condition: `_docs/02_document/architecture.md` exists AND the autopilot state does NOT show Step 4 (UI Design) as completed or skipped AND the project is a UI project **UI Project Detection** — the project is a UI project if ANY of the following are true: - `package.json` exists in the workspace root or any subdirectory @@ -88,7 +88,7 @@ Condition: `_docs/02_document/architecture.md` exists AND the autopilot state do - `_docs/02_document/architecture.md` mentions frontend, UI layer, SPA, or client-side rendering - `_docs/01_solution/solution.md` mentions frontend, web interface, or user-facing UI -If the project is NOT a UI project → mark Step 2a as `skipped` in the state file and auto-chain to Step 3. +If the project is NOT a UI project → mark Step 4 as `skipped` in the state file and auto-chain to Step 5. If the project IS a UI project → present using Choose format: @@ -104,12 +104,12 @@ If the project IS a UI project → present using Choose format: ══════════════════════════════════════ ``` -- If user picks A → Read and execute `.cursor/skills/ui-design/SKILL.md`. After completion, auto-chain to Step 3 (Decompose). -- If user picks B → Mark Step 2a as `skipped` in the state file, auto-chain to Step 3 (Decompose). +- If user picks A → Read and execute `.cursor/skills/ui-design/SKILL.md`. After completion, auto-chain to Step 5 (Decompose). +- If user picks B → Mark Step 4 as `skipped` in the state file, auto-chain to Step 5 (Decompose). --- -**Step 3 — Decompose** +**Step 5 — Decompose** Condition: `_docs/02_document/` contains `architecture.md` AND `_docs/02_document/components/` has at least one component AND `_docs/02_tasks/` does not exist or has no task files (excluding `_dependencies_table.md`) Action: Read and execute `.cursor/skills/decompose/SKILL.md` @@ -118,7 +118,7 @@ If `_docs/02_tasks/` has some task files already, the decompose skill's resumabi --- -**Step 4 — Implement** +**Step 6 — Implement** Condition: `_docs/02_tasks/` contains task files AND `_dependencies_table.md` exists AND `_docs/03_implementation/FINAL_implementation_report.md` does not exist Action: Read and execute `.cursor/skills/implement/SKILL.md` @@ -127,35 +127,15 @@ If `_docs/03_implementation/` has batch reports, the implement skill detects com --- -**Step 5 — Run Tests** -Condition: `_docs/03_implementation/FINAL_implementation_report.md` exists AND the autopilot state does NOT show Step 5 (Run Tests) as completed AND (`_docs/04_deploy/` does not exist or is incomplete) +**Step 7 — Run Tests** +Condition: `_docs/03_implementation/FINAL_implementation_report.md` exists AND the autopilot state does NOT show Step 7 (Run Tests) as completed AND (`_docs/04_deploy/` does not exist or is incomplete) -Action: Run the full test suite to verify the implementation before deployment. - -1. If `scripts/run-tests.sh` exists (generated by the test-spec skill Phase 4), execute it -2. Otherwise, detect the project's test runner manually (e.g., `pytest`, `dotnet test`, `cargo test`, `npm test`) and run all unit tests; if `docker-compose.test.yml` or an equivalent test environment exists, spin it up and run the blackbox test suite -3. **Report results**: present a summary of passed/failed/skipped tests - -If all tests pass → auto-chain to Step 5b (Security Audit). - -If tests fail → present using Choose format: - -``` -══════════════════════════════════════ - TEST RESULTS: [N passed, M failed, K skipped] -══════════════════════════════════════ - A) Fix failing tests and re-run - B) Proceed to deploy anyway (not recommended) - C) Abort — fix manually -══════════════════════════════════════ - Recommendation: A — fix failures before deploying -══════════════════════════════════════ -``` +Action: Read and execute `.cursor/skills/test-run/SKILL.md` --- -**Step 5b — Security Audit (optional)** -Condition: the autopilot state shows Step 5 (Run Tests) is completed AND the autopilot state does NOT show Step 5b (Security Audit) as completed or skipped AND (`_docs/04_deploy/` does not exist or is incomplete) +**Step 8 — Security Audit (optional)** +Condition: the autopilot state shows Step 7 (Run Tests) is completed AND the autopilot state does NOT show Step 8 (Security Audit) as completed or skipped AND (`_docs/04_deploy/` does not exist or is incomplete) Action: Present using Choose format: @@ -170,13 +150,13 @@ Action: Present using Choose format: ══════════════════════════════════════ ``` -- If user picks A → Read and execute `.cursor/skills/security/SKILL.md`. After completion, auto-chain to Step 6 (Deploy). -- If user picks B → Mark Step 5b as `skipped` in the state file, auto-chain to Step 6 (Deploy). +- If user picks A → Read and execute `.cursor/skills/security/SKILL.md`. After completion, auto-chain to Step 9 (Performance Test). +- If user picks B → Mark Step 8 as `skipped` in the state file, auto-chain to Step 9 (Performance Test). --- -**Step 5c — Performance Test (optional)** -Condition: the autopilot state shows Step 5b (Security Audit) is completed or skipped AND the autopilot state does NOT show Step 5c (Performance Test) as completed or skipped AND (`_docs/04_deploy/` does not exist or is incomplete) +**Step 9 — Performance Test (optional)** +Condition: the autopilot state shows Step 8 (Security Audit) is completed or skipped AND the autopilot state does NOT show Step 9 (Performance Test) as completed or skipped AND (`_docs/04_deploy/` does not exist or is incomplete) Action: Present using Choose format: @@ -197,13 +177,13 @@ Action: Present using Choose format: 2. Otherwise, check if `_docs/02_document/tests/performance-tests.md` exists for test scenarios, detect appropriate load testing tool (k6, locust, artillery, wrk, or built-in benchmarks), and execute performance test scenarios against the running system 3. Present results vs acceptance criteria thresholds 4. If thresholds fail → present Choose format: A) Fix and re-run, B) Proceed anyway, C) Abort - 5. After completion, auto-chain to Step 6 (Deploy) -- If user picks B → Mark Step 5c as `skipped` in the state file, auto-chain to Step 6 (Deploy). + 5. After completion, auto-chain to Step 10 (Deploy) +- If user picks B → Mark Step 9 as `skipped` in the state file, auto-chain to Step 10 (Deploy). --- -**Step 6 — Deploy** -Condition: the autopilot state shows Step 5 (Run Tests) is completed AND (Step 5b is completed or skipped) AND (Step 5c is completed or skipped) AND (`_docs/04_deploy/` does not exist or is incomplete) +**Step 10 — Deploy** +Condition: the autopilot state shows Step 7 (Run Tests) is completed AND (Step 8 is completed or skipped) AND (Step 9 is completed or skipped) AND (`_docs/04_deploy/` does not exist or is incomplete) Action: Read and execute `.cursor/skills/deploy/SKILL.md` @@ -218,14 +198,38 @@ Action: Report project completion with summary. If the user runs autopilot again | Completed Step | Next Action | |---------------|-------------| -| Problem Gathering | Auto-chain → Research (Mode A) | -| Research (any round) | Auto-chain → Research Decision (ask user: another round or proceed?) | -| Research Decision → proceed | Auto-chain → Plan | -| Plan | Auto-chain → UI Design detection (Step 2a) | -| UI Design (done or skipped) | Auto-chain → Decompose | -| Decompose | **Session boundary** — suggest new conversation before Implement | -| Implement | Auto-chain → Run Tests (Step 5) | -| Run Tests (all pass) | Auto-chain → Security Audit choice (Step 5b) | -| Security Audit (done or skipped) | Auto-chain → Performance Test choice (Step 5c) | -| Performance Test (done or skipped) | Auto-chain → Deploy (Step 6) | -| Deploy | Report completion | +| Problem (1) | Auto-chain → Research (2) | +| Research (2) | Auto-chain → Research Decision (ask user: another round or proceed?) | +| Research Decision → proceed | Auto-chain → Plan (3) | +| Plan (3) | Auto-chain → UI Design detection (4) | +| UI Design (4, done or skipped) | Auto-chain → Decompose (5) | +| Decompose (5) | **Session boundary** — suggest new conversation before Implement | +| Implement (6) | Auto-chain → Run Tests (7) | +| Run Tests (7, all pass) | Auto-chain → Security Audit choice (8) | +| Security Audit (8, done or skipped) | Auto-chain → Performance Test choice (9) | +| Performance Test (9, done or skipped) | Auto-chain → Deploy (10) | +| Deploy (10) | Report completion | + +## Status Summary Template + +``` +═══════════════════════════════════════════════════ + AUTOPILOT STATUS (greenfield) +═══════════════════════════════════════════════════ + Step 1 Problem [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 2 Research [DONE (N drafts) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 3 Plan [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 4 UI Design [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 5 Decompose [DONE (N tasks) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 6 Implement [DONE / IN PROGRESS (batch M of ~N) / NOT STARTED / FAILED (retry N/3)] + Step 7 Run Tests [DONE (N passed, M failed) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 8 Security Audit [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 9 Performance Test [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] + Step 10 Deploy [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] +═══════════════════════════════════════════════════ + Current: Step N — Name + SubStep: M — [sub-skill internal step name] + Retry: [N/3 if retrying, omit if 0] + Action: [what will happen next] +═══════════════════════════════════════════════════ +``` diff --git a/.cursor/skills/autopilot/protocols.md b/.cursor/skills/autopilot/protocols.md index 18eb731..406bf72 100644 --- a/.cursor/skills/autopilot/protocols.md +++ b/.cursor/skills/autopilot/protocols.md @@ -63,16 +63,16 @@ Several workflow steps create work items (epics, tasks, links). The system suppo ### Steps That Require Work Item Tracker -| Step | Sub-Step | Tracker Action | -|------|----------|----------------| -| 2 (Plan) | Step 6 — Epics | Create epics for each component | -| 2c (Decompose Tests) | Step 1t + Step 3 — All test tasks | Create ticket per task, link to epic | -| 2f (New Task) | Step 7 — Ticket | Create ticket per task, link to epic | -| 3 (Decompose) | Step 1–3 — All tasks | Create ticket per task, link to epic | +| Flow | Step | Sub-Step | Tracker Action | +|------|------|----------|----------------| +| greenfield | 3 (Plan) | Step 6 — Epics | Create epics for each component | +| greenfield | 5 (Decompose) | Step 1–3 — All tasks | Create ticket per task, link to epic | +| existing-code | 3 (Decompose Tests) | Step 1t + Step 3 — All test tasks | Create ticket per task, link to epic | +| existing-code | 7 (New Task) | Step 7 — Ticket | Create ticket per task, link to epic | ### Authentication Gate -Before entering **Step 2 (Plan)**, **Step 2c (Decompose Tests)**, **Step 2f (New Task)**, or **Step 3 (Decompose)** for the first time, the autopilot must: +Before entering a step that requires work item tracking (see table above) for the first time, the autopilot must: 1. Call `mcp_auth` on the detected tracker's MCP server 2. If authentication succeeds → proceed normally @@ -306,57 +306,7 @@ For steps that produce `_docs/` artifacts (problem, research, plan, decompose, d ## Status Summary -On every invocation, before executing any skill, present a status summary built from the state file (with folder scan fallback). Use the template matching the active flow (see Flow Resolution in SKILL.md). - -### Greenfield Flow - -``` -═══════════════════════════════════════════════════ - AUTOPILOT STATUS (greenfield) -═══════════════════════════════════════════════════ - Step 0 Problem [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 1 Research [DONE (N drafts) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2 Plan [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2a UI Design [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 3 Decompose [DONE (N tasks) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 4 Implement [DONE / IN PROGRESS (batch M of ~N) / NOT STARTED / FAILED (retry N/3)] - Step 5 Run Tests [DONE (N passed, M failed) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 5b Security Audit [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 5c Performance Test [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 6 Deploy [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] -═══════════════════════════════════════════════════ - Current: Step N — Name - SubStep: M — [sub-skill internal step name] - Retry: [N/3 if retrying, omit if 0] - Action: [what will happen next] -═══════════════════════════════════════════════════ -``` - -### Existing Code Flow - -``` -═══════════════════════════════════════════════════ - AUTOPILOT STATUS (existing-code) -═══════════════════════════════════════════════════ - Pre Document [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2b Blackbox Test Spec [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2c Decompose Tests [DONE (N tasks) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2d Implement Tests [DONE / IN PROGRESS (batch M) / NOT STARTED / FAILED (retry N/3)] - Step 2e Refactor [DONE / IN PROGRESS (phase N) / NOT STARTED / FAILED (retry N/3)] - Step 2ea UI Design [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2f New Task [DONE (N tasks) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2g Implement [DONE / IN PROGRESS (batch M of ~N) / NOT STARTED / FAILED (retry N/3)] - Step 2h Run Tests [DONE (N passed, M failed) / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2hb Security Audit [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2hc Performance Test [DONE / SKIPPED / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] - Step 2i Deploy [DONE / IN PROGRESS / NOT STARTED / FAILED (retry N/3)] -═══════════════════════════════════════════════════ - Current: Step N — Name - SubStep: M — [sub-skill internal step name] - Retry: [N/3 if retrying, omit if 0] - Action: [what will happen next] -═══════════════════════════════════════════════════ -``` +On every invocation, before executing any skill, present a status summary built from the state file (with folder scan fallback). Use the Status Summary Template from the active flow file (`flows/greenfield.md` or `flows/existing-code.md`). For re-entry (state file exists), also include: - Key decisions from the state file's `Key Decisions` section diff --git a/.cursor/skills/autopilot/state.md b/.cursor/skills/autopilot/state.md index 50650aa..57e6444 100644 --- a/.cursor/skills/autopilot/state.md +++ b/.cursor/skills/autopilot/state.md @@ -10,28 +10,29 @@ The autopilot persists its state to `_docs/_autopilot_state.md`. This file is th # Autopilot State ## Current Step -step: [0-6 or "2a" / "2b" / "2c" / "2d" / "2e" / "2ea" / "2f" / "2g" / "2h" / "2hb" / "2hc" / "2i" or "5b" / "5c" or "done"] -name: [Problem / Research / Plan / UI Design / Blackbox Test Spec / Decompose Tests / Implement Tests / Refactor / UI Design / New Task / Implement / Run Tests / Security Audit / Performance Test / Deploy / Decompose / Done] +flow: [greenfield | existing-code] +step: [1-10 for greenfield, 1-12 for existing-code, or "done"] +name: [step name from the active flow's Step Reference Table] status: [not_started / in_progress / completed / skipped / failed] sub_step: [optional — sub-skill internal step number + name if interrupted mid-step] retry_count: [0-3 — number of consecutive auto-retry attempts for current step, reset to 0 on success] -## Step ↔ SubStep Reference -(include the step reference table from the active flow file) - When updating `Current Step`, always write it as: - step: N ← autopilot step (0–6 or 2b/2c/2d/2e/2ea/2f/2g/2h/2hb/2hc/2i or 5b/5c) - sub_step: M ← sub-skill's own internal step/phase number + name - retry_count: 0 ← reset on new step or success; increment on each failed retry + flow: existing-code ← active flow + step: N ← autopilot step (sequential integer) + sub_step: M ← sub-skill's own internal step/phase number + name + retry_count: 0 ← reset on new step or success; increment on each failed retry Example: - step: 2 + flow: greenfield + step: 3 name: Plan status: in_progress sub_step: 4 — Architecture Review & Risk Assessment retry_count: 0 Example (failed after 3 retries): - step: 2b - name: Blackbox Test Spec + flow: existing-code + step: 2 + name: Test Spec status: failed sub_step: 1b — Test Case Generation retry_count: 3 @@ -40,8 +41,8 @@ Example (failed after 3 retries): | Step | Name | Completed | Key Outcome | |------|------|-----------|-------------| -| 0 | Problem | [date] | [one-line summary] | -| 1 | Research | [date] | [N drafts, final approach summary] | +| 1 | [name] | [date] | [one-line summary] | +| 2 | [name] | [date] | [one-line summary] | | ... | ... | ... | ... | ## Key Decisions @@ -69,10 +70,10 @@ notes: [any context for next session] ### State File Rules -1. **Create** the state file on the very first autopilot invocation (after state detection determines Step 0) +1. **Create** the state file on the very first autopilot invocation (after state detection determines Step 1) 2. **Update** the state file after every step completion, every session boundary, every BLOCKING gate confirmation, and every failed retry attempt 3. **Read** the state file as the first action on every invocation — before folder scanning -4. **Cross-check**: after reading the state file, verify against actual `_docs/` folder contents. If they disagree (e.g., state file says Step 2 but `_docs/02_document/architecture.md` already exists), trust the folder structure and update the state file to match +4. **Cross-check**: after reading the state file, verify against actual `_docs/` folder contents. If they disagree (e.g., state file says Step 3 but `_docs/02_document/architecture.md` already exists), trust the folder structure and update the state file to match 5. **Never delete** the state file. It accumulates history across the entire project lifecycle 6. **Retry tracking**: increment `retry_count` on each failed auto-retry; reset to `0` when the step succeeds or the user manually resets. If `retry_count` reaches 3, set `status: failed` and add an entry to `Blockers` 7. **Failed state on re-entry**: if the state file shows `status: failed` with `retry_count: 3`, do NOT auto-retry — present the blocker to the user and wait for their decision before proceeding @@ -83,7 +84,7 @@ Read `_docs/_autopilot_state.md` first. If it exists and is consistent with the ### Folder Scan Rules (fallback) -Scan `_docs/` to determine the current workflow position. The detection rules are defined in each flow file (`flows/greenfield.md` and `flows/existing-code.md`). Check the existing-code flow first (Pre-Step detection), then greenfield flow rules. First match wins. +Scan `_docs/` to determine the current workflow position. The detection rules are defined in each flow file (`flows/greenfield.md` and `flows/existing-code.md`). Check the existing-code flow first (Step 1 detection), then greenfield flow rules. First match wins. ## Re-Entry Protocol @@ -97,12 +98,12 @@ When the user invokes `/autopilot` and work already exists: ## Session Boundaries -After any decompose/planning step completes (Step 2c, Step 2f, or Step 3), **do not auto-chain to implement**. Instead: +After any decompose/planning step completes, **do not auto-chain to implement**. Instead: 1. Update state file: mark the step as completed, set current step to the next implement step with status `not_started` - - After Step 2c (Decompose Tests) → set current step to 2d (Implement Tests) - - After Step 2f (New Task) → set current step to 2g (Implement) - - After Step 3 (Decompose) → set current step to 4 (Implement) + - Existing-code flow: After Step 3 (Decompose Tests) → set current step to 4 (Implement Tests) + - Existing-code flow: After Step 7 (New Task) → set current step to 8 (Implement) + - Greenfield flow: After Step 5 (Decompose) → set current step to 6 (Implement) 2. Write `Last Session` section: `reason: session boundary`, `notes: Decompose complete, implementation ready` 3. Present a summary: number of tasks, estimated batches, total complexity points 4. Use Choose format: diff --git a/.cursor/skills/test-run/SKILL.md b/.cursor/skills/test-run/SKILL.md new file mode 100644 index 0000000..e8a52c9 --- /dev/null +++ b/.cursor/skills/test-run/SKILL.md @@ -0,0 +1,75 @@ +--- +name: test-run +description: | + Run the project's test suite, report results, and handle failures. + Detects test runners automatically (pytest, dotnet test, cargo test, npm test) + or uses scripts/run-tests.sh if available. + Trigger phrases: + - "run tests", "test suite", "verify tests" +category: build +tags: [testing, verification, test-suite] +disable-model-invocation: true +--- + +# Test Run + +Run the project's test suite and report results. This skill is invoked by the autopilot at verification checkpoints — after implementing tests, after implementing features, or at any point where the test suite must pass before proceeding. + +## Workflow + +### 1. Detect Test Runner + +Check in order — first match wins: + +1. `scripts/run-tests.sh` exists → use it +2. `docker-compose.test.yml` or equivalent test environment exists → spin it up first, then detect runner below +3. Auto-detect from project files: + - `pytest.ini`, `pyproject.toml` with `[tool.pytest]`, or `conftest.py` → `pytest` + - `*.csproj` or `*.sln` → `dotnet test` + - `Cargo.toml` → `cargo test` + - `package.json` with test script → `npm test` + - `Makefile` with `test` target → `make test` + +If no runner detected → report failure and ask user to specify. + +### 2. Run Tests + +1. Execute the detected test runner +2. Capture output: passed, failed, skipped, errors +3. If a test environment was spun up, tear it down after tests complete + +### 3. Report Results + +Present a summary: + +``` +══════════════════════════════════════ + TEST RESULTS: [N passed, M failed, K skipped] +══════════════════════════════════════ +``` + +### 4. Handle Outcome + +**All tests pass** → return success to the autopilot for auto-chain. + +**Tests fail** → present using Choose format: + +``` +══════════════════════════════════════ + TEST RESULTS: [N passed, M failed, K skipped] +══════════════════════════════════════ + A) Fix failing tests and re-run + B) Proceed anyway (not recommended) + C) Abort — fix manually +══════════════════════════════════════ + Recommendation: A — fix failures before proceeding +══════════════════════════════════════ +``` + +- If user picks A → attempt to fix failures, then re-run (loop back to step 2) +- If user picks B → return success with warning to the autopilot +- If user picks C → return failure to the autopilot + +## Trigger Conditions + +This skill is invoked by the autopilot at test verification checkpoints. It is not typically invoked directly by the user. diff --git a/.cursor/skills/test-spec/SKILL.md b/.cursor/skills/test-spec/SKILL.md index 54a056d..7dd3e48 100644 --- a/.cursor/skills/test-spec/SKILL.md +++ b/.cursor/skills/test-spec/SKILL.md @@ -59,16 +59,16 @@ Every input data item MUST have a corresponding expected result that defines wha Expected results live inside `_docs/00_problem/input_data/` in one or both of: -1. **Mapping file** (`input_data/expected_results.md`): a table pairing each input with its quantifiable expected output, using the format defined in `.cursor/skills/test-spec/templates/expected-results.md` +1. **Mapping file** (`input_data/expected_results/results_report.md`): a table pairing each input with its quantifiable expected output, using the format defined in `.cursor/skills/test-spec/templates/expected-results.md` 2. **Reference files folder** (`input_data/expected_results/`): machine-readable files (JSON, CSV, etc.) containing full expected outputs for complex cases, referenced from the mapping file ``` input_data/ -├── expected_results.md ← required: input→expected result mapping -├── expected_results/ ← optional: complex reference files -│ ├── image_01_detections.json -│ └── batch_A_results.json +├── expected_results/ ← required: expected results folder +│ ├── results_report.md ← required: input→expected result mapping +│ ├── image_01_expected.csv ← per-file expected detections +│ └── video_01_expected.csv ├── image_01.jpg ├── empty_scene.jpg └── data_parameters.md @@ -95,7 +95,7 @@ input_data/ 1. `acceptance_criteria.md` exists and is non-empty — **STOP if missing** 2. `restrictions.md` exists and is non-empty — **STOP if missing** 3. `input_data/` exists and contains at least one file — **STOP if missing** -4. `input_data/expected_results.md` exists and is non-empty — **STOP if missing**. Prompt the user: *"Expected results mapping is required. Please create `_docs/00_problem/input_data/expected_results.md` pairing each input with its quantifiable expected output. Use `.cursor/skills/test-spec/templates/expected-results.md` as the format reference."* +4. `input_data/expected_results/results_report.md` exists and is non-empty — **STOP if missing**. Prompt the user: *"Expected results mapping is required. Please create `_docs/00_problem/input_data/expected_results/results_report.md` pairing each input with its quantifiable expected output. Use `.cursor/skills/test-spec/templates/expected-results.md` as the format reference."* 5. `problem.md` exists and is non-empty — **STOP if missing** 6. `solution.md` exists and is non-empty — **STOP if missing** 7. Create TESTS_OUTPUT_DIR if it does not exist @@ -161,12 +161,12 @@ At the start of execution, create a TodoWrite with all three phases. Update stat 2. Read `acceptance_criteria.md`, `restrictions.md` 3. Read testing strategy from solution.md (if present) 4. If `DOCUMENT_DIR/architecture.md` and `DOCUMENT_DIR/system-flows.md` exist, read them for additional context on system interfaces and flows -5. Read `input_data/expected_results.md` and any referenced files in `input_data/expected_results/` +5. Read `input_data/expected_results/results_report.md` and any referenced files in `input_data/expected_results/` 6. Analyze `input_data/` contents against: - Coverage of acceptance criteria scenarios - Coverage of restriction edge cases - Coverage of testing strategy requirements -7. Analyze `input_data/expected_results.md` completeness: +7. Analyze `input_data/expected_results/results_report.md` completeness: - Every input data item has a corresponding expected result row in the mapping - Expected results are quantifiable (contain numeric thresholds, exact values, patterns, or file references — not vague descriptions like "works correctly" or "returns result") - Expected results specify a comparison method (exact match, tolerance range, pattern match, threshold) per the template @@ -178,7 +178,7 @@ At the start of execution, create a TodoWrite with all three phases. Update stat | [file/data] | Yes/No | Yes/No | [missing, vague, no tolerance, etc.] | 9. Threshold: at least 70% coverage of scenarios AND every covered scenario has a quantifiable expected result (see `.cursor/rules/cursor-meta.mdc` Quality Thresholds table) -10. If coverage is low, search the internet for supplementary data, assess quality with user, and if user agrees, add to `input_data/` and update `input_data/expected_results.md` +10. If coverage is low, search the internet for supplementary data, assess quality with user, and if user agrees, add to `input_data/` and update `input_data/expected_results/results_report.md` 11. If expected results are missing or not quantifiable, ask user to provide them before proceeding **BLOCKING**: Do NOT proceed until user confirms both input data coverage AND expected results completeness are sufficient. @@ -205,7 +205,7 @@ Based on all acquired data, acceptance_criteria, and restrictions, form detailed **Self-verification**: - [ ] Every acceptance criterion is covered by at least one test scenario - [ ] Every restriction is verified by at least one test scenario -- [ ] Every test scenario has a quantifiable expected result from `input_data/expected_results.md` +- [ ] Every test scenario has a quantifiable expected result from `input_data/expected_results/results_report.md` - [ ] Expected results use comparison methods from `.cursor/skills/test-spec/templates/expected-results.md` - [ ] Positive and negative scenarios are balanced - [ ] Consumer app has no direct access to system internals @@ -251,7 +251,7 @@ For each row where **Input Provided?** is **No** OR **Expected Result Provided?* > **Option A — Provide the missing items**: Supply what is missing: > - **Missing input data**: Place test data files in `_docs/00_problem/input_data/` or indicate the location. -> - **Missing expected result**: Provide the quantifiable expected result for this input. Update `_docs/00_problem/input_data/expected_results.md` with a row mapping the input to its expected output. If the expected result is complex, provide a reference file in `_docs/00_problem/input_data/expected_results/`. Use `.cursor/skills/test-spec/templates/expected-results.md` for format guidance. +> - **Missing expected result**: Provide the quantifiable expected result for this input. Update `_docs/00_problem/input_data/expected_results/results_report.md` with a row mapping the input to its expected output. If the expected result is complex, provide a reference CSV file in `_docs/00_problem/input_data/expected_results/`. Use `.cursor/skills/test-spec/templates/expected-results.md` for format guidance. > > Expected results MUST be quantifiable — the test must be able to programmatically compare actual vs expected. Examples: > - "3 detections with bounding boxes [(x1,y1,x2,y2), ...] ± 10px" @@ -273,7 +273,7 @@ For each item where the user chose **Option A**: 3. Verify **quantity**: enough data samples to cover the scenario (e.g., at least N images for a batch test, multiple edge-case variants) **Expected result validation**: -4. Verify the expected result exists in `input_data/expected_results.md` or as a referenced file in `input_data/expected_results/` +4. Verify the expected result exists in `input_data/expected_results/results_report.md` or as a referenced file in `input_data/expected_results/` 5. Verify **quantifiability**: the expected result can be evaluated programmatically — it must contain at least one of: - Exact values (counts, strings, status codes) - Numeric values with tolerance (e.g., `± 10px`, `≥ 0.85`) @@ -392,7 +392,7 @@ Create `scripts/run-performance-tests.sh` at the project root. The script must: | Situation | Action | |-----------|--------| | Missing acceptance_criteria.md, restrictions.md, or input_data/ | **STOP** — specification cannot proceed | -| Missing input_data/expected_results.md | **STOP** — ask user to provide expected results mapping using the template | +| Missing input_data/expected_results/results_report.md | **STOP** — ask user to provide expected results mapping using the template | | Ambiguous requirements | ASK user | | Input data coverage below 70% (Phase 1) | Search internet for supplementary data, ASK user to validate | | Expected results missing or not quantifiable (Phase 1) | ASK user to provide quantifiable expected results before proceeding | diff --git a/.cursor/skills/test-spec/templates/expected-results.md b/.cursor/skills/test-spec/templates/expected-results.md index 0700733..315a13a 100644 --- a/.cursor/skills/test-spec/templates/expected-results.md +++ b/.cursor/skills/test-spec/templates/expected-results.md @@ -1,7 +1,7 @@ # Expected Results Template -Save as `_docs/00_problem/input_data/expected_results.md`. -For complex expected outputs, create `_docs/00_problem/input_data/expected_results/` and place reference files there. +Save as `_docs/00_problem/input_data/expected_results/results_report.md`. +For complex expected outputs, place reference CSV files alongside it in `_docs/00_problem/input_data/expected_results/`. Referenced by the test-spec skill (`.cursor/skills/test-spec/SKILL.md`). --- diff --git a/.gitignore b/.gitignore index 564adca..7c54af8 100644 --- a/.gitignore +++ b/.gitignore @@ -34,13 +34,14 @@ coverage.xml .hypothesis/ .tox/ -# Binary test data -_docs/00_problem/input_data/*.onnx -_docs/00_problem/input_data/*.jpg -_docs/00_problem/input_data/*.JPG -_docs/00_problem/input_data/*.mp4 -_docs/00_problem/input_data/*.png -_docs/00_problem/input_data/*.avi +# Binary / media / model files +*.onnx +*.mp4 +*.avi +*.jpg +*.JPG +*.jpeg +*.png # Standalone skill output (ephemeral, not part of project) _standalone/ diff --git a/AGENTS.md b/AGENTS.md index 6574f77..b157803 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -18,7 +18,7 @@ See [.cursor/README.md](.cursor/README.md) for the full documentation, including | Command | What it does | |---------|-------------| -| `/autopilot` | Full guided workflow (problem → deploy) | +| `/autopilot`, `/auto` | Full guided workflow (problem → deploy) | | `/problem` | Interactive problem gathering | | `/research` | Deep research methodology | | `/plan` | Architecture and component planning | diff --git a/_docs/00_problem/input_data/azaion.pt b/_docs/00_problem/input_data/azaion.pt new file mode 100644 index 0000000..5cabf08 Binary files /dev/null and b/_docs/00_problem/input_data/azaion.pt differ diff --git a/_docs/00_problem/input_data/expected_results/image_dense01_expected.csv b/_docs/00_problem/input_data/expected_results/image_dense01_expected.csv new file mode 100644 index 0000000..3567276 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/image_dense01_expected.csv @@ -0,0 +1 @@ +center_x,center_y,width,height,label,confidence_min diff --git a/_docs/00_problem/input_data/expected_results/image_dense02_expected.csv b/_docs/00_problem/input_data/expected_results/image_dense02_expected.csv new file mode 100644 index 0000000..3567276 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/image_dense02_expected.csv @@ -0,0 +1 @@ +center_x,center_y,width,height,label,confidence_min diff --git a/_docs/00_problem/input_data/expected_results/image_different_types_expected.csv b/_docs/00_problem/input_data/expected_results/image_different_types_expected.csv new file mode 100644 index 0000000..3567276 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/image_different_types_expected.csv @@ -0,0 +1 @@ +center_x,center_y,width,height,label,confidence_min diff --git a/_docs/00_problem/input_data/expected_results/image_empty_scene_expected.csv b/_docs/00_problem/input_data/expected_results/image_empty_scene_expected.csv new file mode 100644 index 0000000..3567276 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/image_empty_scene_expected.csv @@ -0,0 +1 @@ +center_x,center_y,width,height,label,confidence_min diff --git a/_docs/00_problem/input_data/expected_results/image_large_expected.csv b/_docs/00_problem/input_data/expected_results/image_large_expected.csv new file mode 100644 index 0000000..3567276 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/image_large_expected.csv @@ -0,0 +1 @@ +center_x,center_y,width,height,label,confidence_min diff --git a/_docs/00_problem/input_data/expected_results/image_small_expected.csv b/_docs/00_problem/input_data/expected_results/image_small_expected.csv new file mode 100644 index 0000000..3567276 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/image_small_expected.csv @@ -0,0 +1 @@ +center_x,center_y,width,height,label,confidence_min diff --git a/_docs/00_problem/input_data/expected_results/results_report.md b/_docs/00_problem/input_data/expected_results/results_report.md new file mode 100644 index 0000000..b298506 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/results_report.md @@ -0,0 +1,104 @@ +# Expected Results + +Maps every input data item to its quantifiable expected result. +Tests use this mapping to compare actual system output against known-correct answers. + +## Coordinate System + +All bounding box coordinates are **normalized to 0.0–1.0** relative to the full image/frame dimensions, matching the API response format: + +| Field | Meaning | +|-------|---------| +| `center_x` | Horizontal center of bounding box (0.0 = left edge, 1.0 = right edge) | +| `center_y` | Vertical center of bounding box (0.0 = top edge, 1.0 = bottom edge) | +| `width` | Bounding box width as fraction of image width | +| `height` | Bounding box height as fraction of image height | +| `label` | Class name from `classes.json` (e.g., `ArmorVehicle`, `Car`, `Person`) | +| `confidence_min` | Minimum acceptable confidence for this detection (threshold comparison, `≥`) | + +For videos, the additional field: + +| Field | Meaning | +|-------|---------| +| `time_sec` | Timestamp in seconds from video start when this detection is visible | + +## Global Tolerances + +| Parameter | Tolerance | Comparison Method | +|-----------|-----------|-------------------| +| Bounding box coordinates (center_x, center_y, width, height) | ± 0.05 | `numeric_tolerance` | +| Detection count | ± 2 | `numeric_tolerance` | +| Confidence | ≥ `confidence_min` value per row | `threshold_min` | +| Label | exact match | `exact` | +| Video time_sec | ± 1.0s | `numeric_tolerance` | + +## Input → Expected Result Mapping + +### Images + +| # | Input File | Description | Expected Result File | Expected Detection Count | Notes | +|---|------------|-------------|---------------------|-------------------------|-------| +| 1 | `image_small.jpg` | 1280×720 aerial, contains detectable objects | `image_small_expected.csv` | ? | Primary test image for single-frame detection | +| 2 | `image_large.JPG` | 6252×4168 aerial, triggers GSD-based tiling | `image_large_expected.csv` | ? | Coordinates normalized to full image (not tile) | +| 3 | `image_dense01.jpg` | 1280×720 dense scene, many clustered objects | `image_dense01_expected.csv` | ? | Used for dedup and max-detection-cap tests | +| 4 | `image_dense02.jpg` | 1920×1080 dense scene variant | `image_dense02_expected.csv` | ? | Borderline tiling, dedup variant | +| 5 | `image_different_types.jpg` | 900×1600, varied object classes | `image_different_types_expected.csv` | ? | Must contain multiple distinct class labels | +| 6 | `image_empty_scene.jpg` | 1920×1080, no detectable objects | `image_empty_scene_expected.csv` | 0 | CSV has headers only — zero detections expected | + +### Videos + +| # | Input File | Description | Expected Result File | Notes | +|---|------------|-------------|---------------------|-------| +| 7 | `video_short01.mp4` | Standard test video | `video_short01_expected.csv` | Primary async/SSE/video test. List key-frame detections. | +| 8 | `video_short02.mp4` | Video variant | `video_short02_expected.csv` | Used for resilience and concurrent tests | +| 9 | `video_long03.mp4` | Long video (288MB), generates >100 SSE events | `video_long03_expected.csv` | SSE overflow test. Only key-frame samples needed. | + +## How to Fill + +### Images + +1. Run the model on each image (or use the detection service) +2. Record every detection the model returns +3. Fill one row per detection in the CSV: + +``` +center_x,center_y,width,height,label,confidence_min +0.45,0.32,0.08,0.12,Car,0.25 +0.71,0.55,0.06,0.09,Person,0.25 +``` + +4. For `image_empty_scene_expected.csv` — leave only the header row (0 detections) + +### Videos + +1. Run the model on the video (or use the detection service with `frame_period_recognition: 1`) +2. For key frames where detections appear, record the timestamp and detections +3. Fill one row per detection per timestamp: + +``` +time_sec,center_x,center_y,width,height,label,confidence_min +2.0,0.45,0.32,0.08,0.12,Car,0.25 +2.0,0.71,0.55,0.06,0.09,Person,0.25 +4.0,0.46,0.33,0.08,0.12,Car,0.25 +``` + +4. You don't need every single frame — sample at key moments (e.g., every 2–4 seconds) to validate detection presence and approximate positions + +## Non-Detection Expected Results + +The following test scenarios have expected results that are not per-file detections. These are defined inline in the test specs and do not need CSV files: + +| Scenario | Expected Result | Comparison | Defined In | +|----------|----------------|------------|------------| +| Empty image (FT-N-01) | HTTP 400, `"Image is empty"` | exact | `blackbox-tests.md` | +| Corrupt image (FT-N-02) | HTTP 400 or 422 | exact | `blackbox-tests.md` | +| Engine unavailable (FT-N-03) | HTTP 503 or 422, not 500 | exact | `blackbox-tests.md` | +| Duplicate media_id (FT-N-04) | HTTP 409 | exact | `blackbox-tests.md` | +| Missing classes.json (FT-N-05) | Service fails or empty detections | exact | `blackbox-tests.md` | +| Health pre-init (FT-P-01) | `aiAvailability: "None"` | exact | `blackbox-tests.md` | +| Health post-init (FT-P-02) | `aiAvailability` not "None"/"Downloading" | exact | `blackbox-tests.md` | +| Async start (FT-P-08) | `{"status": "started"}`, response < 1s | exact + threshold_max | `blackbox-tests.md` | +| SSE completion (FT-P-09) | Final event: `mediaStatus: "AIProcessed"`, `percent: 100` | exact | `blackbox-tests.md` | +| Max detections (NFT-RES-LIM-03) | `len(detections) ≤ 300` | threshold_max | `resource-limit-tests.md` | +| Single image latency (NFT-PERF-01) | p95 < 5000ms (ONNX CPU) | threshold_max | `performance-tests.md` | +| Log file naming (NFT-RES-LIM-04) | `log_inference_YYYYMMDD.txt` exists | regex | `resource-limit-tests.md` | diff --git a/_docs/00_problem/input_data/expected_results/video_long03_expected.csv b/_docs/00_problem/input_data/expected_results/video_long03_expected.csv new file mode 100644 index 0000000..4aba659 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/video_long03_expected.csv @@ -0,0 +1 @@ +time_sec,center_x,center_y,width,height,label,confidence_min diff --git a/_docs/00_problem/input_data/expected_results/video_short01_expected.csv b/_docs/00_problem/input_data/expected_results/video_short01_expected.csv new file mode 100644 index 0000000..4aba659 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/video_short01_expected.csv @@ -0,0 +1 @@ +time_sec,center_x,center_y,width,height,label,confidence_min diff --git a/_docs/00_problem/input_data/expected_results/video_short02_expected.csv b/_docs/00_problem/input_data/expected_results/video_short02_expected.csv new file mode 100644 index 0000000..4aba659 --- /dev/null +++ b/_docs/00_problem/input_data/expected_results/video_short02_expected.csv @@ -0,0 +1 @@ +time_sec,center_x,center_y,width,height,label,confidence_min diff --git a/_docs/_autopilot_state.md b/_docs/_autopilot_state.md index 6409c8e..63028d9 100644 --- a/_docs/_autopilot_state.md +++ b/_docs/_autopilot_state.md @@ -1,46 +1,29 @@ # Autopilot State ## Current Step -step: 2f -name: Refactor -status: not_started -sub_step: — +flow: existing-code +step: 5 +name: Run Tests +status: in_progress +sub_step: 2 — Run Tests retry_count: 0 -## Step ↔ SubStep Reference -| Step | Name | Sub-Skill | Internal SubSteps | -|------|------------------------|----------------------------------|------------------------------------------| -| 0 | Problem | problem/SKILL.md | Phase 1–4 | -| 1 | Research | research/SKILL.md | Mode A: Phase 1–4 · Mode B: Step 0–8 | -| 2 | Plan | plan/SKILL.md | Step 1–6 | -| 2b | Blackbox Test Spec | blackbox-test-spec/SKILL.md | Phase 1a–1b (existing code path only) | -| 2c | Post-Test-Spec Decision| (autopilot decision gate) | Refactor vs normal workflow | -| 2d | Decompose Tests | decompose/SKILL.md (tests-only) | Step 1t + Step 3 + Step 4 | -| 2e | Implement Tests | implement/SKILL.md | (batch-driven, no fixed sub-steps) | -| 2f | Refactor | refactor/SKILL.md | Phases 0–5 (6-phase method) | -| 2g | New Task | new-task/SKILL.md | Steps 1–8 (loop) | -| 2h | Implement | implement/SKILL.md | (batch-driven, no fixed sub-steps) | -| 2i | Run Tests | (autopilot-managed) | Unit + integration tests | -| 2j | Security Audit | security/SKILL.md | Phase 1–5 (optional) | -| 2k | Deploy | deploy/SKILL.md | Step 1–7 | - ## Completed Steps | Step | Name | Completed | Key Outcome | |------|------|-----------|-------------| -| — | Document (pre-step) | 2026-03-21 | 10 modules, 4 components, full _docs/ generated from existing codebase | -| 2b | Blackbox Test Spec | 2026-03-21 | 39 test scenarios (16 positive, 8 negative, 11 non-functional), 85% total coverage, 5 artifacts produced | -| 2c | Post-Test-Spec Decision | 2026-03-22 | User chose refactor path (A) | -| 2d | Decompose Tests | 2026-03-23 | 11 tasks (AZ-138..AZ-148), 35 complexity points, 3 batches. Phase 3 test data gate PASSED: 39/39 scenarios validated, 12 data files provided. | -| 2e | Implement Tests | 2026-03-23 | 11 tasks implemented across 4 batches, 38 tests (2 skipped), all code reviews PASS_WITH_WARNINGS. Commits: 5418bd7, a469579, 861d4f0, f0e3737. | +| 1 | Document | 2026-03-21 | 10 modules, 4 components, full _docs/ generated from existing codebase | +| 2 | Test Spec | 2026-03-21 | 39 test scenarios (16 positive, 8 negative, 11 non-functional), 85% total coverage, 5 artifacts produced | +| 3 | Decompose Tests | 2026-03-23 | 11 tasks (AZ-138..AZ-148), 35 complexity points, 3 batches. Phase 3 test data gate PASSED: 39/39 scenarios validated, 12 data files provided. | +| 4 | Implement Tests | 2026-03-23 | 11 tasks implemented across 4 batches, 38 tests (2 skipped), all code reviews PASS_WITH_WARNINGS. Commits: 5418bd7, a469579, 861d4f0, f0e3737. | ## Key Decisions -- User chose B: Document existing codebase before proceeding +- User chose to document existing codebase before proceeding - Component breakdown: 4 components (Domain, Inference Engines, Inference Pipeline, API) - Verification: 4 legacy issues found and documented (unused serialize/from_msgpack, orphaned queue declarations) - Input data coverage approved at ~90% (Phase 1a) - Test coverage approved at 85% (21/22 AC, 13/18 restrictions) with all gaps justified -- User chose A: Refactor path (decompose tests → implement tests → refactor) +- User chose refactor path (decompose tests → implement tests → refactor) - Integration Tests Epic: AZ-137 - Test Infrastructure: AZ-138 (5 pts) - 10 integration test tasks decomposed: AZ-139 through AZ-148 (30 pts) @@ -51,10 +34,10 @@ retry_count: 0 - Jira MCP auth skipped — tickets not transitioned to In Testing ## Last Session -date: 2026-03-23 -ended_at: Step 2e Implement Tests — COMPLETE. All 11 tasks, 38 tests, 4 batches. +date: 2026-03-24 +ended_at: Step 4 Implement Tests — COMPLETE. All 11 tasks, 38 tests, 4 batches. reason: step completed, context limit approaching -notes: All integration tests implemented and committed. Next step: 2f Refactor. The refactor skill runs a 6-phase method using the implemented tests as a safety net. Recommend fresh conversation for better context management. +notes: All integration tests implemented and committed. Next step: 5 Run Tests — verify tests pass before proceeding to refactor. Recommend fresh conversation for better context management. ## Blockers - none diff --git a/constants_inf.pxd b/constants_inf.pxd index f5573eb..59fb7a0 100644 --- a/constants_inf.pxd +++ b/constants_inf.pxd @@ -21,7 +21,7 @@ cdef log(str log_message) cdef logerror(str error) cdef format_time(int ms) -cdef dict[int, AnnotationClass] annotations_dict +cdef dict annotations_dict cdef class AnnotationClass: cdef public int id diff --git a/constants_inf.pyx b/constants_inf.pyx index 4b515bf..a94e6ef 100644 --- a/constants_inf.pyx +++ b/constants_inf.pyx @@ -70,6 +70,9 @@ logger.add( colorize=True ) +def get_annotations_dict(): + return annotations_dict + cdef log(str log_message): logger.info(log_message) diff --git a/e2e/docker-compose.test.yml b/e2e/docker-compose.test.yml index 4f52a05..324105b 100644 --- a/e2e/docker-compose.test.yml +++ b/e2e/docker-compose.test.yml @@ -65,7 +65,7 @@ services: - ./results:/results networks: - e2e-net - command: ["pytest", "--csv=/results/report.csv", "-v"] + command: ["pytest", "--csv=/results/report.csv", "-v", "-x"] networks: e2e-net: diff --git a/inference.pyx b/inference.pyx index f6153f6..81acee1 100644 --- a/inference.pyx +++ b/inference.pyx @@ -264,7 +264,9 @@ cdef class Inference: if frame is None: raise ValueError("Invalid image data") - input_blob = self.preprocess([frame]) + cdef int bs = self.engine.get_batch_size() + frames = [frame] * bs + input_blob = self.preprocess(frames) outputs = self.engine.run(input_blob) list_detections = self.postprocess(outputs, ai_config) if list_detections: diff --git a/main.py b/main.py index 1e8d3f5..c3f0f26 100644 --- a/main.py +++ b/main.py @@ -109,9 +109,10 @@ class AIConfigDto(BaseModel): def detection_to_dto(det) -> DetectionDto: import constants_inf + ann = constants_inf.get_annotations_dict() label = "" - if det.cls in constants_inf.annotations_dict: - label = constants_inf.annotations_dict[det.cls].name + if det.cls in ann: + label = ann[det.cls].name return DetectionDto( centerX=det.x, centerY=det.y, diff --git a/onnx_engine.pxd b/onnx_engine.pxd new file mode 100644 index 0000000..55ab2fa --- /dev/null +++ b/onnx_engine.pxd @@ -0,0 +1,13 @@ +from inference_engine cimport InferenceEngine + + +cdef class OnnxEngine(InferenceEngine): + + cdef public object session + cdef object model_inputs + cdef str input_name + cdef object input_shape + + cdef tuple get_input_shape(self) + cdef int get_batch_size(self) + cdef run(self, input_data)