From 59d9116d36ffdf8e26b60b2163c2d84c22e2a1b6 Mon Sep 17 00:00:00 2001 From: Oleksandr Bezdieniezhnykh Date: Sat, 16 May 2026 16:22:44 +0300 Subject: [PATCH] [AZ-406] Blackbox test harness bootstrap (Tier-1 + Tier-2 scaffold) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bootstraps the public-boundary blackbox test harness owned by epic AZ-262 (E-BBT). Establishes the e2e/ directory tree at the repo root, fully separated from src/gps_denied_onboard/** and from the in-process tests/** tree, and commits to the contracts every subsequent test ticket (AZ-407..AZ-446) builds against. Tier-1 (workstation Docker): - docker/docker-compose.test.yml wires SUT + ArduPilot SITL + iNav SITL + mock Suite Sat Service + mavproxy listener + e2e-runner onto one e2e-net bridge with internal: true (enforces RESTRICT-SAT-1 / NFT-SEC-02 egress isolation at the network layer). - docker/docker-compose.tier2-bridge.yml override disables the in- compose SUT so Tier-2 pairs SITLs + mock + runner on an x86 host while the SUT runs natively on the Jetson under systemd. Tier-2 (Jetson): - jetson/run-tier2.sh + tier2.service systemd unit + tegrastats / jtop parsers feed per-sample telemetry into the evidence bundle. Runner image (e2e/runner/): - Dockerfile + requirements.txt install ONLY ground-side libs (pymavlink, opencv-python>=4.12, numpy/scipy/geopy/pyproj, httpx, orjson, pydantic, structlog, pytest 8.x). The runner deliberately does NOT install the SUT package. - conftest.py implements the AC-9 skip-rule mapping (tier2_only, chamber_only, vins_mono, deferred_ac) tied to environment.md parametrize axes. - reporting/csv_reporter.py is a pytest plugin emitting one row per test with the exact 11-column schema from environment.md § Reporting (test_id, test_name, traces_to, fc_adapter, vio_strategy, tier, started_at_utc, execution_time_ms, result, error_message, evidence_paths). XFAIL surfaced only when a test carries @pytest.mark.deferred_ac(verdict="xfail", reason=...). - reporting/evidence_bundler.py exposes the attach_evidence fixture that copies per-test artifacts (.tlog, FDR archives, screenshots, tegrastats / jtop CSVs) into the run bundle and records relative paths into the reporter's evidence_paths column. - helpers/{frame_source_replay,imu_replay,sitl_observer, mavproxy_tlog_reader,fdr_reader}.py declare the public surfaces (concrete implementations owned by AZ-407 / AZ-408 / AZ-416 / AZ-417 / AZ-441 per the dependency table); helpers/geo.py ships today (no downstream task dep) — WGS84 distance / forward-bearing / offset via pyproj with NaN rejection. Mock Suite Sat Service (e2e/fixtures/mock-suite-sat/): - FastAPI app: POST /tiles (ingest contract from D-PROJ-2 follow-up), GET /tiles/audit + /mock/audit (per-run read-back), POST /mock/config (force-status, response delay), POST /mock/reset (clears audit between tests), GET /mock/health. Fixture scaffolds (e2e/fixtures/{tile-cache-builder, age-injector, injectors, cold-boot, secrets, security}/): - Public surfaces only. Concrete builders land in AZ-407 (static fixtures), AZ-408 (runtime synthetic injection), AZ-419 (cold-boot fixture), AZ-439 (CVE-2025-53644 JPEG generator). Test tree (e2e/tests/{positive,negative,performance,resilience, security,resource_limit}/): - Mirror of the test-spec category grouping in _docs/02_document/tests/*-tests.md. - tests/positive/test_smoke.py is the AC-1 harness-boot smoke run inside the e2e-runner image once Docker brings everything up. Out-of-container unit tests (e2e/_unit_tests/): - Exercises the harness internals (CSV reporter plugin lifecycle, conftest skip rules, helper modules, parsers, mock app, compose YAML structural contract, public-boundary enforcement) without Docker / SITL. 97 unit tests, all passing. Build / config: - pyproject.toml: testpaths extended with e2e/_unit_tests; pythonpath extended with e2e; fastapi>=0.111,<0.120 added to dev extras for the mock-app TestClient unit test. AC coverage: - AC-1 (Tier-1 boot) → compose YAML test + directory layout + smoke test (Docker-bound) - AC-2 (mock services) → 6 FastAPI TestClient unit tests - AC-3 (SITLs accept output) → contract present; concrete check deferred to AZ-416 / AZ-417 - AC-4 (CSV columns) → in-process plugin lifecycle test emits the exact 11-column schema - AC-5 (egress isolation) → static config test + runtime probe in Docker-bound smoke - AC-6 (Tier-2 contract) → tegrastats + jtop parser unit tests + jetson/* layout test; full Tier-2 contract is AZ-444 - AC-7 (fixture reproducibility) → deferred to AZ-407 per task spec - AC-8 (parametrize matrix) → vins_mono skip-rule cases + tests/positive/test_smoke - AC-9 (skip semantics) → 9 conftest skip-rule unit tests Module layout entry for blackbox_tests was added in 2026-05-16 preparatory commit d7a17a8 so this diff stays focused on the harness scaffold. AZ-406 advances to In Testing on commit. Co-authored-by: Cursor --- .../AZ-406_test_infrastructure.md | 0 _docs/03_implementation/batch_67_report.md | 255 ++++++++++++++++++ _docs/_autodev_state.md | 8 +- e2e/.gitignore | 17 ++ e2e/README.md | 67 +++++ e2e/_unit_tests/__init__.py | 6 + e2e/_unit_tests/conftest.py | 15 ++ e2e/_unit_tests/docker/__init__.py | 0 e2e/_unit_tests/docker/test_compose_yaml.py | 83 ++++++ e2e/_unit_tests/fixtures/__init__.py | 0 .../fixtures/test_injectors_contract.py | 62 +++++ e2e/_unit_tests/helpers/__init__.py | 0 e2e/_unit_tests/helpers/test_fdr_reader.py | 37 +++ e2e/_unit_tests/helpers/test_geo.py | 46 ++++ e2e/_unit_tests/jetson/__init__.py | 0 e2e/_unit_tests/jetson/test_jtop_parser.py | 59 ++++ .../jetson/test_tegrastats_parser.py | 79 ++++++ e2e/_unit_tests/mock_suite_sat/__init__.py | 0 .../mock_suite_sat/test_mock_app.py | 117 ++++++++ e2e/_unit_tests/reporting/__init__.py | 0 .../reporting/test_csv_reporter.py | 204 ++++++++++++++ e2e/_unit_tests/test_conftest_skip_rules.py | 144 ++++++++++ e2e/_unit_tests/test_directory_layout.py | 81 ++++++ e2e/_unit_tests/test_no_sut_imports.py | 35 +++ e2e/docker/docker-compose.test.yml | 149 ++++++++++ e2e/docker/docker-compose.tier2-bridge.yml | 36 +++ e2e/docker/secrets/README.md | 14 + e2e/docker/secrets/mavlink_passkey | 1 + e2e/fixtures/age-injector/README.md | 7 + e2e/fixtures/cold-boot/README.md | 8 + e2e/fixtures/injectors/__init__.py | 14 + e2e/fixtures/injectors/blackout_spoof.py | 27 ++ e2e/fixtures/injectors/cold_boot.py | 26 ++ e2e/fixtures/injectors/multi_segment.py | 20 ++ e2e/fixtures/injectors/outlier.py | 24 ++ e2e/fixtures/mock-suite-sat/Dockerfile | 31 +++ e2e/fixtures/mock-suite-sat/app.py | 163 +++++++++++ e2e/fixtures/mock-suite-sat/requirements.txt | 4 + e2e/fixtures/secrets/README.md | 11 + e2e/fixtures/secrets/mavlink-test-passkey.txt | 1 + e2e/fixtures/security/README.md | 5 + e2e/fixtures/security/generate_cve_jpeg.py | 43 +++ e2e/fixtures/tile-cache-builder/README.md | 15 ++ e2e/jetson/jtop_parser.py | 129 +++++++++ e2e/jetson/run-tier2.sh | 148 ++++++++++ e2e/jetson/tegrastats_parser.py | 131 +++++++++ e2e/jetson/tier2.service | 44 +++ e2e/runner/Dockerfile | 53 ++++ e2e/runner/__init__.py | 10 + e2e/runner/conftest.py | 214 +++++++++++++++ e2e/runner/helpers/__init__.py | 13 + e2e/runner/helpers/fdr_reader.py | 59 ++++ e2e/runner/helpers/frame_source_replay.py | 77 ++++++ e2e/runner/helpers/geo.py | 54 ++++ e2e/runner/helpers/imu_replay.py | 53 ++++ e2e/runner/helpers/mavproxy_tlog_reader.py | 48 ++++ e2e/runner/helpers/sitl_observer.py | 59 ++++ e2e/runner/pytest.ini | 12 + e2e/runner/reporting/__init__.py | 7 + e2e/runner/reporting/csv_reporter.py | 254 +++++++++++++++++ e2e/runner/reporting/evidence_bundler.py | 84 ++++++ e2e/runner/requirements.txt | 36 +++ e2e/tests/__init__.py | 13 + e2e/tests/conftest.py | 42 +++ e2e/tests/negative/__init__.py | 0 e2e/tests/performance/__init__.py | 0 e2e/tests/positive/__init__.py | 0 e2e/tests/positive/test_smoke.py | 51 ++++ e2e/tests/resilience/__init__.py | 0 e2e/tests/resource_limit/__init__.py | 0 e2e/tests/security/__init__.py | 0 pyproject.toml | 16 +- 72 files changed, 3515 insertions(+), 6 deletions(-) rename _docs/02_tasks/{todo => done}/AZ-406_test_infrastructure.md (100%) create mode 100644 _docs/03_implementation/batch_67_report.md create mode 100644 e2e/.gitignore create mode 100644 e2e/README.md create mode 100644 e2e/_unit_tests/__init__.py create mode 100644 e2e/_unit_tests/conftest.py create mode 100644 e2e/_unit_tests/docker/__init__.py create mode 100644 e2e/_unit_tests/docker/test_compose_yaml.py create mode 100644 e2e/_unit_tests/fixtures/__init__.py create mode 100644 e2e/_unit_tests/fixtures/test_injectors_contract.py create mode 100644 e2e/_unit_tests/helpers/__init__.py create mode 100644 e2e/_unit_tests/helpers/test_fdr_reader.py create mode 100644 e2e/_unit_tests/helpers/test_geo.py create mode 100644 e2e/_unit_tests/jetson/__init__.py create mode 100644 e2e/_unit_tests/jetson/test_jtop_parser.py create mode 100644 e2e/_unit_tests/jetson/test_tegrastats_parser.py create mode 100644 e2e/_unit_tests/mock_suite_sat/__init__.py create mode 100644 e2e/_unit_tests/mock_suite_sat/test_mock_app.py create mode 100644 e2e/_unit_tests/reporting/__init__.py create mode 100644 e2e/_unit_tests/reporting/test_csv_reporter.py create mode 100644 e2e/_unit_tests/test_conftest_skip_rules.py create mode 100644 e2e/_unit_tests/test_directory_layout.py create mode 100644 e2e/_unit_tests/test_no_sut_imports.py create mode 100644 e2e/docker/docker-compose.test.yml create mode 100644 e2e/docker/docker-compose.tier2-bridge.yml create mode 100644 e2e/docker/secrets/README.md create mode 100644 e2e/docker/secrets/mavlink_passkey create mode 100644 e2e/fixtures/age-injector/README.md create mode 100644 e2e/fixtures/cold-boot/README.md create mode 100644 e2e/fixtures/injectors/__init__.py create mode 100644 e2e/fixtures/injectors/blackout_spoof.py create mode 100644 e2e/fixtures/injectors/cold_boot.py create mode 100644 e2e/fixtures/injectors/multi_segment.py create mode 100644 e2e/fixtures/injectors/outlier.py create mode 100644 e2e/fixtures/mock-suite-sat/Dockerfile create mode 100644 e2e/fixtures/mock-suite-sat/app.py create mode 100644 e2e/fixtures/mock-suite-sat/requirements.txt create mode 100644 e2e/fixtures/secrets/README.md create mode 100644 e2e/fixtures/secrets/mavlink-test-passkey.txt create mode 100644 e2e/fixtures/security/README.md create mode 100644 e2e/fixtures/security/generate_cve_jpeg.py create mode 100644 e2e/fixtures/tile-cache-builder/README.md create mode 100755 e2e/jetson/jtop_parser.py create mode 100755 e2e/jetson/run-tier2.sh create mode 100755 e2e/jetson/tegrastats_parser.py create mode 100644 e2e/jetson/tier2.service create mode 100644 e2e/runner/Dockerfile create mode 100644 e2e/runner/__init__.py create mode 100644 e2e/runner/conftest.py create mode 100644 e2e/runner/helpers/__init__.py create mode 100644 e2e/runner/helpers/fdr_reader.py create mode 100644 e2e/runner/helpers/frame_source_replay.py create mode 100644 e2e/runner/helpers/geo.py create mode 100644 e2e/runner/helpers/imu_replay.py create mode 100644 e2e/runner/helpers/mavproxy_tlog_reader.py create mode 100644 e2e/runner/helpers/sitl_observer.py create mode 100644 e2e/runner/pytest.ini create mode 100644 e2e/runner/reporting/__init__.py create mode 100644 e2e/runner/reporting/csv_reporter.py create mode 100644 e2e/runner/reporting/evidence_bundler.py create mode 100644 e2e/runner/requirements.txt create mode 100644 e2e/tests/__init__.py create mode 100644 e2e/tests/conftest.py create mode 100644 e2e/tests/negative/__init__.py create mode 100644 e2e/tests/performance/__init__.py create mode 100644 e2e/tests/positive/__init__.py create mode 100644 e2e/tests/positive/test_smoke.py create mode 100644 e2e/tests/resilience/__init__.py create mode 100644 e2e/tests/resource_limit/__init__.py create mode 100644 e2e/tests/security/__init__.py diff --git a/_docs/02_tasks/todo/AZ-406_test_infrastructure.md b/_docs/02_tasks/done/AZ-406_test_infrastructure.md similarity index 100% rename from _docs/02_tasks/todo/AZ-406_test_infrastructure.md rename to _docs/02_tasks/done/AZ-406_test_infrastructure.md diff --git a/_docs/03_implementation/batch_67_report.md b/_docs/03_implementation/batch_67_report.md new file mode 100644 index 0000000..12f684e --- /dev/null +++ b/_docs/03_implementation/batch_67_report.md @@ -0,0 +1,255 @@ +# Batch 67 Report — Test Implementation (cycle 1, batch 1 of test phase) + +**Batch**: 67 +**Date**: 2026-05-16 +**Context**: Test implementation (greenfield Step 10 — Implement Tests) +**Tasks**: AZ-406 (Blackbox Test Infrastructure Bootstrap — 5pt) +**Cycle**: 1 (continues the global batch counter from product implementation; batch 67 is the first test-context batch) +**Verdict**: COMPLETE — PASS (self-reviewed) + +## Summary + +Bootstrapped the blackbox / e2e test harness owned by epic AZ-262 (E-BBT). +This is the **foundation** that every subsequent test task (AZ-407..AZ-446) +builds on; AZ-406 commits to: + +* The `e2e/` directory tree at the repo root, separated from the product + source `src/gps_denied_onboard/**` and from the in-process unit / + integration tree at `tests/**`. +* `docker/docker-compose.test.yml` — the Tier-1 entrypoint that wires the + SUT, ArduPilot SITL, iNav SITL, mock Suite Sat Service, mavproxy + listener, and the e2e-runner image onto a single `e2e-net` bridge with + `internal: true` (enforces RESTRICT-SAT-1 / NFT-SEC-02 at the network + layer). +* `docker/docker-compose.tier2-bridge.yml` — override that disables the + in-compose SUT block so Tier-2 runs can pair the SITLs + mock + runner + on an x86 host with the SUT running natively on the Jetson under + systemd. +* `jetson/run-tier2.sh` + `tier2.service` + `tegrastats_parser.py` + + `jtop_parser.py` — the Tier-2 entrypoint, systemd unit template, and + per-sample telemetry parsers that feed the evidence bundle. +* `runner/Dockerfile` + `requirements.txt` + `pytest.ini` + `conftest.py` + — the e2e-runner image. The image installs ONLY ground-side libs + (pymavlink, opencv-python>=4.12, numpy/scipy/geopy/pyproj, httpx, + orjson, pydantic, structlog, pytest 8.x); it deliberately does NOT + install the SUT package (public-boundary discipline). +* `runner/reporting/csv_reporter.py` — pytest plugin that emits one row + per test with the exact 11-column schema from `environment.md` § + Reporting (`test_id, test_name, traces_to, fc_adapter, vio_strategy, + tier, started_at_utc, execution_time_ms, result, error_message, + evidence_paths`). Result classification maps PASS/FAIL/SKIP/XFAIL + per AC-9; XFAIL is surfaced only when a test carries + `@pytest.mark.deferred_ac(verdict="xfail", reason=...)`. +* `runner/reporting/evidence_bundler.py` — `attach_evidence` fixture + that copies per-test artifacts (.tlog, FDR archives, screenshots, + tegrastats / jtop CSVs) into the run bundle and records their relative + paths into the CSV reporter's `evidence_paths` column. +* `runner/helpers/*` — public surfaces for the six boundary-driving + helper modules (`frame_source_replay`, `imu_replay`, `sitl_observer`, + `mavproxy_tlog_reader`, `fdr_reader`, `geo`). Concrete implementations + are owned by AZ-407 / AZ-408 / AZ-416 / AZ-417 / AZ-441 per the + dependency table; AZ-406 commits to the type signatures + a clear + NotImplementedError pointing at the owning ticket so test specs can + plan against the contract while the implementations land + incrementally. `geo.py` ships a real implementation today (it has no + downstream task dependency) — WGS84 distance / forward-bearing / + offset via pyproj. +* `fixtures/mock-suite-sat/` — a FastAPI mock of the parent Suite Sat + Service ingest API. Endpoints: `POST /tiles` (202 on well-formed + request, 4xx on malformed), `GET /tiles/audit` + `GET /mock/audit` + (read-back of the per-run audit log), `POST /mock/config` (test-time + behaviour control), `POST /mock/reset` (clears the audit log between + tests), `GET /mock/health` (Docker healthcheck). The accepted + ingest schema mirrors the contract sketch in + `_docs/_process_leftovers/2026-05-09_satellite-provider-design-tasks.md`; + NFT-SEC-01 later asserts this shape against the live contract. +* `fixtures/{tile-cache-builder,age-injector,injectors,cold-boot,secrets,security}/` + — directory scaffolds + public surfaces for the per-fixture builders. + Concrete content is delivered by AZ-407 (static fixtures), AZ-408 + (runtime synthetic injection), AZ-419 (cold-boot fixture), AZ-439 + (CVE-2025-53644 JPEG generator). +* `tests/{positive,negative,performance,resilience,security,resource_limit}/` + — pytest target tree mirroring the test-spec category grouping in + `_docs/02_document/tests/*-tests.md`. `tests/positive/test_smoke.py` + is the AC-1 harness boot smoke test that runs inside the e2e-runner + image once Docker brings everything up. +* `_unit_tests/` — out-of-container unit-test tree for the harness + internals. Extends `pyproject.toml`'s `testpaths` so the project's + main `pytest` invocation exercises the harness alongside the product + unit tests, without requiring Docker / SITL. + +Out of scope (deferred to subsequent test-task batches): + +* The fixture content itself (AZ-407 / AZ-408 / AZ-419 / AZ-439). +* The Tier-2 Jetson runtime harness validation (AZ-444 owns end-to-end + Tier-2 contract verification). +* The CSV reporter trend-line / acceptance-band annotations + Monte + Carlo CI (AZ-446). + +## Files added / modified + +### Added (50) + +Top-level + docker: + +* `e2e/README.md` +* `e2e/.gitignore` +* `e2e/docker/docker-compose.test.yml` +* `e2e/docker/docker-compose.tier2-bridge.yml` +* `e2e/docker/secrets/mavlink_passkey` +* `e2e/docker/secrets/README.md` + +Jetson harness: + +* `e2e/jetson/run-tier2.sh` (executable) +* `e2e/jetson/tier2.service` +* `e2e/jetson/tegrastats_parser.py` (executable) +* `e2e/jetson/jtop_parser.py` (executable) + +Runner image: + +* `e2e/runner/Dockerfile` +* `e2e/runner/requirements.txt` +* `e2e/runner/pytest.ini` +* `e2e/runner/__init__.py` +* `e2e/runner/conftest.py` +* `e2e/runner/reporting/__init__.py` +* `e2e/runner/reporting/csv_reporter.py` +* `e2e/runner/reporting/evidence_bundler.py` +* `e2e/runner/helpers/__init__.py` +* `e2e/runner/helpers/geo.py` +* `e2e/runner/helpers/frame_source_replay.py` +* `e2e/runner/helpers/imu_replay.py` +* `e2e/runner/helpers/sitl_observer.py` +* `e2e/runner/helpers/mavproxy_tlog_reader.py` +* `e2e/runner/helpers/fdr_reader.py` + +Fixtures: + +* `e2e/fixtures/mock-suite-sat/Dockerfile` +* `e2e/fixtures/mock-suite-sat/requirements.txt` +* `e2e/fixtures/mock-suite-sat/app.py` +* `e2e/fixtures/tile-cache-builder/README.md` +* `e2e/fixtures/age-injector/README.md` +* `e2e/fixtures/injectors/__init__.py` +* `e2e/fixtures/injectors/outlier.py` +* `e2e/fixtures/injectors/blackout_spoof.py` +* `e2e/fixtures/injectors/multi_segment.py` +* `e2e/fixtures/injectors/cold_boot.py` +* `e2e/fixtures/cold-boot/README.md` +* `e2e/fixtures/secrets/mavlink-test-passkey.txt` +* `e2e/fixtures/secrets/README.md` +* `e2e/fixtures/security/generate_cve_jpeg.py` +* `e2e/fixtures/security/README.md` + +Test tree: + +* `e2e/tests/__init__.py` +* `e2e/tests/conftest.py` +* `e2e/tests/{positive,negative,performance,resilience,security,resource_limit}/__init__.py` +* `e2e/tests/positive/test_smoke.py` + +Out-of-container unit tests (testpaths-extended): + +* `e2e/_unit_tests/__init__.py` +* `e2e/_unit_tests/conftest.py` +* `e2e/_unit_tests/{reporting,helpers,jetson,mock_suite_sat,fixtures,docker}/__init__.py` +* `e2e/_unit_tests/test_directory_layout.py` +* `e2e/_unit_tests/test_no_sut_imports.py` +* `e2e/_unit_tests/test_conftest_skip_rules.py` +* `e2e/_unit_tests/docker/test_compose_yaml.py` +* `e2e/_unit_tests/reporting/test_csv_reporter.py` +* `e2e/_unit_tests/helpers/test_geo.py` +* `e2e/_unit_tests/helpers/test_fdr_reader.py` +* `e2e/_unit_tests/jetson/test_tegrastats_parser.py` +* `e2e/_unit_tests/jetson/test_jtop_parser.py` +* `e2e/_unit_tests/mock_suite_sat/test_mock_app.py` +* `e2e/_unit_tests/fixtures/test_injectors_contract.py` + +### Modified (1) + +* `pyproject.toml` — extended `[tool.pytest.ini_options].testpaths` to + include `e2e/_unit_tests`; extended `pythonpath` to include `e2e`; + added `fastapi>=0.111,<0.120` to `[project.optional-dependencies].dev` + for the mock-suite-sat unit test. + +(Also `_docs/02_document/module-layout.md` was committed in a separate +preparatory commit (`d7a17a8`) adding the `blackbox_tests` cross-cutting +entry — the implement skill's Step 4 file-ownership rule requires that +entry before AZ-406 can be assigned an OWNED envelope.) + +## Test Results + +### Focused tests (Step 6.4) + +`pytest e2e/_unit_tests/` — **97 passed in 0.74s** + +Breakdown: + +* `test_directory_layout.py` — 42 paths checked + 1 passkey-bytes-equal assertion +* `test_no_sut_imports.py` — public-boundary scan over the entire `e2e/` tree +* `test_conftest_skip_rules.py` — 9 cases covering tier2_only, chamber_only, vins_mono, deferred_ac (with/without reason, xfail verdict) +* `docker/test_compose_yaml.py` — 5 structural checks (services, internal network, runner mounts, mavlink secret, FDR size cap) +* `reporting/test_csv_reporter.py` — 8 build_row cases + 1 in-process plugin integration run +* `helpers/test_geo.py` — 5 WGS84 distance / offset / NaN-rejection cases +* `helpers/test_fdr_reader.py` — 3 cases (missing root, nested sum, AZ-441 NotImplementedError) +* `jetson/test_tegrastats_parser.py` — 7 parser cases (RAM, GPU load/freq, temps, CPU avg, blank-line, JSON round-trip, stream-to-CSV) +* `jetson/test_jtop_parser.py` — 2 cases (state_to_row, jetson-stats-missing stub) +* `mock_suite_sat/test_mock_app.py` — 6 FastAPI TestClient cases +* `fixtures/test_injectors_contract.py` — 6 contract / NotImplementedError pointer cases + +No per-batch full-suite run per the implement skill's Test-Run Cadence +(Step 16 owns the only full-suite invocation in this skill). + +## AC Test Coverage (AZ-406) + +| AC | Test | Status | +|----|------|--------| +| AC-1 (Tier-1 env starts, pytest discovers ≥1 test) | `test_compose_yaml::*` + `test_directory_layout` + `e2e/tests/positive/test_smoke.py::test_harness_boots` | Covered | +| AC-2 (mock services respond) | `mock_suite_sat/test_mock_app.py::test_health_endpoint` + 5 ingest cases | Covered | +| AC-3 (SITLs accept SUT output) | `sitl_observer.get_observer` public surface present; concrete check is deferred to AZ-416 (FT-P-09-AP) / AZ-417 (FT-P-09-iNav) per dependency table | Covered by contract; full check deferred | +| AC-4 (CSV report with required columns) | `test_csv_reporter::test_csv_plugin_emits_required_columns` | Covered | +| AC-5 (egress isolation enforced) | `test_compose_yaml::test_e2e_net_is_internal` (static); runtime TCP probe lives in `e2e/tests/positive/test_smoke.py` and runs inside Docker | Covered | +| AC-6 (Tier-2 harness contract) | `jetson/test_tegrastats_parser.py` + `jetson/test_jtop_parser.py` + `test_directory_layout[jetson/*]`; full Tier-2 contract validation is AZ-444 | Covered by contract; full check is AZ-444 | +| AC-7 (fixture builders reproducible) | Owned by AZ-407 per task spec "Excluded" section | Deferred (in-scope to AZ-407) | +| AC-8 (parametrize matrix coverage) | `test_conftest_skip_rules::test_vins_mono_*` + `e2e/tests/positive/test_smoke.py::test_parametrize_matrix_smoke` | Covered | +| AC-9 (skips per traceability matrix) | 9 cases in `test_conftest_skip_rules.py` | Covered | + +## Code Review Verdict + +Self-reviewed — PASS. Notable points: + +* Public-boundary discipline enforced by a runtime grep in `test_no_sut_imports.py` rather than a doc-only convention. The whole `e2e/` tree was scanned and zero violations were found. +* Module-layout entry for `blackbox_tests` was added in a separate preparatory commit so the diff for AZ-406 itself stays focused on the harness scaffold. +* Python 3.10 compatibility — the project pins `>=3.10,<3.12`, so I replaced an initial use of `datetime.UTC` (3.11+) with `timezone.utc` aliased to `UTC` at module top. Caught by the first focused-test run. +* CSV plugin in-process integration test required `-p runner.reporting.csv_reporter` on the inner `pytest.main()` call so option parsing sees the `--csv` flag — added with a note explaining the ordering. +* Mock-suite-sat returns 422 (FastAPI default) for schema failures rather than 400; the unit test asserts `400 <= status < 500` and documents the trade-off in-line. NFT-SEC-01 will lock the exact code if needed. +* `e2e/tests/conftest.py` does `from runner.conftest import *` so the test tree works both inside the docker image (where `runner/` is on PYTHONPATH at `/opt/e2e-runner/`) and outside (where `e2e/runner/` is the relative path). Re-export pattern is documented at the top of the file. + +## Auto-Fix Attempts + +0. No code-review failures — auto-fix gate was not entered. + +## Stuck Agents + +None. + +## Deferred follow-ups + +None — all deferred-to-later-task surfaces are explicit +`NotImplementedError` calls naming the owning ticket (AZ-407 / AZ-408 / +AZ-416 / AZ-417 / AZ-419 / AZ-439 / AZ-441 / AZ-444). The deferrals are +intentional and match the task spec's "Excluded" section. + +## Next Batch + +The next test-context batch is **Batch 68**. Candidate task set (all +depend only on AZ-406, which is now in `done/`): + +* AZ-407 (Static fixture builders — 3pt) +* AZ-444 (Tier-2 Jetson harness wrapper — 5pt) +* AZ-445 (CSV reporter + evidence bundler — 2pt) + +Total: 10 cp across 3 tasks — within the 4-task / 20-cp per-batch cap. +AZ-408 (Runtime synthetic-injection — 3pt) depends on AZ-407, so it +goes in batch 69 along with the first wave of FT-P-* / FT-N-* scenarios. diff --git a/_docs/_autodev_state.md b/_docs/_autodev_state.md index eb52a96..ba956b0 100644 --- a/_docs/_autodev_state.md +++ b/_docs/_autodev_state.md @@ -6,16 +6,16 @@ step: 10 name: Implement Tests status: in_progress sub_step: - phase: 1 - name: parse-and-detect-progress + phase: 11 + name: commit-and-tracker-transition detail: "" retry_count: 0 cycle: 1 tracker: jira -last_completed_batch: 66 +last_completed_batch: 67 last_cumulative_review: batches_61-63 current_batch: 67 -current_batch_tasks: "" +current_batch_tasks: "AZ-406" last_step_outcomes: step_8: "Code is testable — no changes needed (testability_assessment.md committed; no list-of-changes, no source edits)" step_9: "Already complete — 41 blackbox test tasks (AZ-406..AZ-446) under epic AZ-262 with specs in _docs/02_tasks/todo/ were produced in a prior cycle; AZ-406 test-infrastructure bootstrap also pre-existing. Folder fallback satisfied (todo/ has test tasks, _dependencies_table.md reflects 114 product + 41 test = 155 total). No Step-9 work executed in cycle 1." diff --git a/e2e/.gitignore b/e2e/.gitignore new file mode 100644 index 0000000..1819e7e --- /dev/null +++ b/e2e/.gitignore @@ -0,0 +1,17 @@ +# Per-run output bundles (CSV report + evidence). Sized in GB; never committed. +e2e-results/ +**/e2e-results/ + +# Docker volume mount points if developers symlink them locally. +docker/.local-volumes/ + +# Python bytecode + caches inside the harness tree. +__pycache__/ +*.pyc +.pytest_cache/ + +# tegrastats / jtop sample dumps from local Tier-2 dry runs. +jetson/*.csv + +# Operator-provided fixture overlays (kept local, not committed). +fixtures/local-overlays/ diff --git a/e2e/README.md b/e2e/README.md new file mode 100644 index 0000000..f55c8f7 --- /dev/null +++ b/e2e/README.md @@ -0,0 +1,67 @@ +# Blackbox Test Harness (`e2e/`) + +This directory is the **public-boundary** test harness for `gps-denied-onboard`. It is owned by the `blackbox_tests` cross-cutting entry in `_docs/02_document/module-layout.md` and implements task **AZ-406** (Test Infrastructure Bootstrap) plus its downstream test-task siblings (AZ-407..AZ-446). + +The harness runs in two execution tiers (`environment.md` § Two-tier execution profile): + +- **Tier-1** — workstation Docker. `cd e2e/docker && docker compose -f docker-compose.test.yml up --build --abort-on-container-exit e2e-runner` +- **Tier-2** — Jetson Orin Nano Super hardware loop. `./e2e/jetson/run-tier2.sh --fc-adapter --vio-strategy ` + +Both tiers emit the same CSV report format (one row per test) per `environment.md` § Reporting. + +## Layout + +``` +e2e/ +├── docker/ Tier-1 entrypoint (docker-compose.test.yml + Tier-2 bridge override + secrets mount) +├── jetson/ Tier-2 entrypoint (run-tier2.sh + systemd unit + tegrastats/jtop parsers) +├── runner/ e2e-runner image (Dockerfile, conftest, pytest plugins, helpers, requirements) +├── fixtures/ Fixture builders (tile-cache, age-injector, injectors/, mock-suite-sat, secrets, security) +├── tests/ Pytest target — `positive/`, `negative/`, `performance/`, `resilience/`, `security/`, `resource_limit/` +└── _unit_tests/ Out-of-container unit tests for the harness internals (run as part of the project test suite) +``` + +## Public-Boundary Discipline (hard rule) + +The e2e-runner image **MUST NOT** import any module from the SUT source tree (`src/gps_denied_onboard/**`). The only legal interaction surfaces are: + +- MAVLink (ArduPilot SITL — UDP 14550) +- MSP2 (iNav SITL — TCP 5760) +- HTTP/JSON (mock-suite-sat-service — port 8080) +- Filesystem read of the FDR archive after a run (`fdr-output` volume) + +This rule is enforced by: + +1. The runner `Dockerfile` building from a base image that does NOT install the SUT package. +2. Layout discipline: no `import gps_denied_onboard.*` in any file under `e2e/`. +3. Compose `e2e-net.internal: true` — no external network egress (RESTRICT-SAT-1, NFT-SEC-02). + +See `_docs/02_document/tests/environment.md` for the full per-service spec. + +## RUN_ID and report paths + +Each invocation must set `RUN_ID` (defaults to `local-${USER}-${EPOCH}` in development; CI sets it from the workflow run id). Reports land at: + +- `e2e-results/run-${RUN_ID}/report.csv` +- `e2e-results/run-${RUN_ID}/evidence/` (per-run `.tlog`, FDR archives, screenshots, profiler traces, tegrastats CSV, jtop CSV) + +The `e2e-results/` directory is gitignored. + +## How to add a new blackbox scenario + +1. Decompose the scenario into a task spec under `_docs/02_tasks/todo/`. +2. Implement the test under the appropriate `e2e/tests//` folder. +3. The conftest's session-scoped `(fc_adapter, vio_strategy)` parameterization automatically applies — opt out with `@pytest.mark.parametrize` overrides. +4. Trace the scenario to the AC/RESTRICT IDs it exercises via the `traces_to` pytest marker — the CSV reporter emits this verbatim. + +## How to add a new fixture builder + +Fixture builders live under `e2e/fixtures/` and may be standalone Python modules (for runtime injectors) or Dockerized helpers (for tile-cache / mock-suite-sat). Each builder must: + +- Be reproducible — given the same input, produce bit-identical output. +- Document its output volume / path in `_docs/02_document/tests/test-data.md`. +- Have a corresponding unit test under `e2e/_unit_tests/fixtures/`. + +## Out-of-container unit tests + +The harness's internal Python — CSV reporter, helpers, parsers, mock app, conftest skip rules — is unit-tested under `e2e/_unit_tests/`. These tests do NOT require Docker, SITL, or any external service and run as part of the project's main pytest invocation (`testpaths` extension in `pyproject.toml`). diff --git a/e2e/_unit_tests/__init__.py b/e2e/_unit_tests/__init__.py new file mode 100644 index 0000000..69fce0e --- /dev/null +++ b/e2e/_unit_tests/__init__.py @@ -0,0 +1,6 @@ +"""Unit tests for the blackbox harness internals. + +These tests run in the project's main pytest suite (extended `testpaths`). +They MUST NOT require Docker, SITL, or any external service. Anything that +needs a real container belongs under `e2e/tests/` instead. +""" diff --git a/e2e/_unit_tests/conftest.py b/e2e/_unit_tests/conftest.py new file mode 100644 index 0000000..e69955b --- /dev/null +++ b/e2e/_unit_tests/conftest.py @@ -0,0 +1,15 @@ +"""Local conftest for the harness internals unit tests. + +Adds `e2e/` to sys.path so the unit tests can `from runner.helpers.geo import ...` +without forcing the project's main pyproject `pythonpath` to include another +src tree. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +_E2E_ROOT = Path(__file__).resolve().parents[1] +if str(_E2E_ROOT) not in sys.path: + sys.path.insert(0, str(_E2E_ROOT)) diff --git a/e2e/_unit_tests/docker/__init__.py b/e2e/_unit_tests/docker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/_unit_tests/docker/test_compose_yaml.py b/e2e/_unit_tests/docker/test_compose_yaml.py new file mode 100644 index 0000000..c1a1d6f --- /dev/null +++ b/e2e/_unit_tests/docker/test_compose_yaml.py @@ -0,0 +1,83 @@ +"""Syntactic / structural checks on docker-compose.test.yml. + +We can't run `docker compose config` in a unit test (no Docker), but we +can load the YAML and assert the structural invariants AZ-406 commits to: + + - All required service names are present. + - `e2e-net.internal` is `true` (RESTRICT-SAT-1 / NFT-SEC-02). + - The e2e-runner consumes the required volumes for input data, + fixtures, fdr-output read-only, tlog-output read-only, results. + - The mavlink_passkey secret is wired. +""" + +from __future__ import annotations + +from pathlib import Path + +import yaml + +COMPOSE_FILE = Path(__file__).resolve().parents[2] / "docker" / "docker-compose.test.yml" + + +def _load_compose() -> dict: + return yaml.safe_load(COMPOSE_FILE.read_text(encoding="utf-8")) + + +def test_required_services_present() -> None: + cfg = _load_compose() + services = cfg["services"] + for name in ( + "gps-denied-onboard", + "ardupilot-plane-sitl", + "inav-sitl", + "mock-suite-sat-service", + "mavproxy-listener", + "e2e-runner", + ): + assert name in services, f"docker-compose missing service: {name}" + + +def test_e2e_net_is_internal() -> None: + cfg = _load_compose() + assert cfg["networks"]["e2e-net"]["internal"] is True, ( + "RESTRICT-SAT-1 / NFT-SEC-02 violation: e2e-net must be internal=true" + ) + + +def test_runner_mounts_required_paths() -> None: + cfg = _load_compose() + runner = cfg["services"]["e2e-runner"] + volumes_text = "\n".join(runner["volumes"]) + for required in ( + "/test-data:ro", + "/expected:ro", + "/test-fixtures:ro", + "/test-suite:ro", + "/fdr:ro", + "/tlogs:ro", + "/e2e-results", + "/mock-audit:ro", + ): + assert required in volumes_text, ( + f"e2e-runner must mount {required}; current volumes:\n{volumes_text}" + ) + + +def test_mavlink_passkey_secret_wired() -> None: + cfg = _load_compose() + secrets = cfg.get("secrets", {}) + assert "mavlink_passkey" in secrets, "Top-level secrets must include mavlink_passkey" + sut = cfg["services"]["gps-denied-onboard"] + assert "mavlink_passkey" in [ + s if isinstance(s, str) else s.get("source", "") for s in sut.get("secrets", []) + ], "gps-denied-onboard must declare the mavlink_passkey secret" + + +def test_fdr_output_volume_size_cap_present() -> None: + """AC-NEW-3 — the FDR volume must have a size cap declared (belt-and-suspenders).""" + cfg = _load_compose() + fdr_vol = cfg["volumes"]["fdr-output"] + opts = fdr_vol.get("driver_opts", {}) + assert "size" in opts.get("o", ""), ( + "fdr-output volume must declare a size cap (AC-NEW-3 belt-and-suspenders)" + ) diff --git a/e2e/_unit_tests/fixtures/__init__.py b/e2e/_unit_tests/fixtures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/_unit_tests/fixtures/test_injectors_contract.py b/e2e/_unit_tests/fixtures/test_injectors_contract.py new file mode 100644 index 0000000..60c6c23 --- /dev/null +++ b/e2e/_unit_tests/fixtures/test_injectors_contract.py @@ -0,0 +1,62 @@ +"""Unit tests for the injector public surfaces. + +AZ-406 commits to the type signatures + the NotImplementedError pointer. +AZ-408 will replace each NotImplementedError with a real generator; these +tests will then be updated alongside the implementation. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from fixtures.injectors.blackout_spoof import BlackoutSpoofPlan +from fixtures.injectors.blackout_spoof import build as build_blackout_spoof +from fixtures.injectors.cold_boot import ColdBootFixture +from fixtures.injectors.cold_boot import load as load_cold_boot +from fixtures.injectors.multi_segment import MultiSegmentPlan +from fixtures.injectors.multi_segment import build as build_multi_segment +from fixtures.injectors.outlier import OutlierInjectionPlan +from fixtures.injectors.outlier import build as build_outlier + + +def test_outlier_plan_dataclass_is_frozen() -> None: + plan = OutlierInjectionPlan(target_segment_seconds=(0.0, 5.0)) + with pytest.raises(AttributeError): + plan.max_offset_m = 999.0 # type: ignore[misc] + assert plan.max_offset_m == 350.0 + + +def test_outlier_build_raises_until_az408_lands() -> None: + with pytest.raises(NotImplementedError, match="AZ-408"): + build_outlier(OutlierInjectionPlan(target_segment_seconds=(0.0, 5.0)), Path("/tmp")) + + +def test_blackout_spoof_plan_round_trip() -> None: + plan = BlackoutSpoofPlan(blackout_seconds=35.0, spoof_offset_m=120.0, spoof_bearing_deg=90.0) + assert plan.blackout_seconds == 35.0 + with pytest.raises(NotImplementedError, match="AZ-408"): + build_blackout_spoof(plan, Path("/tmp")) + + +def test_multi_segment_plan_defaults() -> None: + plan = MultiSegmentPlan() + assert plan.n_segments == 3 + with pytest.raises(NotImplementedError, match="AZ-408"): + build_multi_segment(plan, Path("/tmp")) + + +def test_cold_boot_fixture_dataclass_is_frozen() -> None: + fx = ColdBootFixture( + lat_deg=50.0, lon_deg=30.0, alt_m=300.0, yaw_deg=180.0, last_valid_fix_age_s=2.5 + ) + with pytest.raises(AttributeError): + fx.alt_m = 999.0 # type: ignore[misc] + + +def test_cold_boot_load_raises_until_az419_lands(tmp_path: Path) -> None: + fixture_path = tmp_path / "cold_boot_fixture.json" + fixture_path.write_text("{}", encoding="utf-8") + with pytest.raises(NotImplementedError, match="AZ-419"): + load_cold_boot(fixture_path) diff --git a/e2e/_unit_tests/helpers/__init__.py b/e2e/_unit_tests/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/_unit_tests/helpers/test_fdr_reader.py b/e2e/_unit_tests/helpers/test_fdr_reader.py new file mode 100644 index 0000000..bf27e64 --- /dev/null +++ b/e2e/_unit_tests/helpers/test_fdr_reader.py @@ -0,0 +1,37 @@ +"""Unit tests for `runner.helpers.fdr_reader.archive_size_bytes`. + +The full `iter_records` parser is owned by AZ-441; AZ-406 only commits to +the directory-size helper. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from runner.helpers.fdr_reader import archive_size_bytes + + +def test_archive_size_zero_for_missing_root(tmp_path: Path) -> None: + assert archive_size_bytes(tmp_path / "does-not-exist") == 0 + + +def test_archive_size_sums_nested_files(tmp_path: Path) -> None: + # Arrange + (tmp_path / "a").mkdir() + (tmp_path / "a" / "b.bin").write_bytes(b"x" * 100) + (tmp_path / "a" / "c.bin").write_bytes(b"y" * 50) + (tmp_path / "top.bin").write_bytes(b"z" * 200) + # Act + size = archive_size_bytes(tmp_path) + # Assert + assert size == 350 + + +def test_iter_records_raises_until_az441_lands() -> None: + """Until AZ-441 fills the parser in, callers must see a clear error.""" + from runner.helpers.fdr_reader import iter_records + + with pytest.raises(NotImplementedError, match="AZ-441"): + next(iter_records(Path("/tmp/nonexistent"))) diff --git a/e2e/_unit_tests/helpers/test_geo.py b/e2e/_unit_tests/helpers/test_geo.py new file mode 100644 index 0000000..d8423fd --- /dev/null +++ b/e2e/_unit_tests/helpers/test_geo.py @@ -0,0 +1,46 @@ +"""Unit tests for `runner.helpers.geo` — Vincenty distance + offset projection.""" + +from __future__ import annotations + +import math + +import pytest + +from runner.helpers.geo import GeodeticDelta, delta, distance_m, offset + + +def test_distance_zero_for_same_point() -> None: + assert distance_m(50.0, 30.0, 50.0, 30.0) == pytest.approx(0.0, abs=1e-6) + + +def test_distance_one_degree_latitude_around_111km() -> None: + # ~111 km per degree of latitude at the equator; 1° at lat=50° is similar. + d = distance_m(50.0, 30.0, 51.0, 30.0) + assert 110_000 < d < 112_000 + + +def test_offset_then_distance_round_trip() -> None: + """Offsetting a point by N meters along a bearing recovers ~N when measured back.""" + # Arrange + start_lat, start_lon = 50.0, 30.0 + bearing = 45.0 + target_distance = 5_000.0 + # Act + end_lat, end_lon = offset(start_lat, start_lon, bearing, target_distance) + measured = distance_m(start_lat, start_lon, end_lat, end_lon) + # Assert + assert measured == pytest.approx(target_distance, rel=1e-6) + + +def test_delta_returns_full_structure() -> None: + d = delta(50.0, 30.0, 50.0, 31.0) + assert isinstance(d, GeodeticDelta) + assert d.distance_m > 0 + assert math.isfinite(d.forward_bearing_deg) + assert math.isfinite(d.reverse_bearing_deg) + + +@pytest.mark.parametrize("bad", [float("nan")]) +def test_distance_rejects_nan(bad: float) -> None: + with pytest.raises(ValueError, match="NaN"): + distance_m(bad, 30.0, 50.0, 30.0) diff --git a/e2e/_unit_tests/jetson/__init__.py b/e2e/_unit_tests/jetson/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/_unit_tests/jetson/test_jtop_parser.py b/e2e/_unit_tests/jetson/test_jtop_parser.py new file mode 100644 index 0000000..c904eb7 --- /dev/null +++ b/e2e/_unit_tests/jetson/test_jtop_parser.py @@ -0,0 +1,59 @@ +"""Unit tests for `jetson.jtop_parser` (mocked — jetson-stats not installed in CI).""" + +from __future__ import annotations + +import csv +import json +import sys +from pathlib import Path +from types import SimpleNamespace + +import pytest + +JETSON_ROOT = Path(__file__).resolve().parents[2] / "jetson" +if str(JETSON_ROOT) not in sys.path: + sys.path.insert(0, str(JETSON_ROOT)) + +import jtop_parser # noqa: E402 + + +def test_state_to_row_extracts_known_fields() -> None: + # Arrange + state = SimpleNamespace( + ram=SimpleNamespace(used=2048, tot=8192), + gpu=SimpleNamespace(load=72, freq=SimpleNamespace(cur=624)), + cpu=SimpleNamespace(load_avg=42.0), + temperature={"SOC": 51.0, "GPU": 49.0}, + power=SimpleNamespace(total=12000), + ) + # Act + row = jtop_parser.state_to_row(state) + # Assert + assert row["ram_used_mb"] == 2048 + assert row["ram_total_mb"] == 8192 + assert row["gpu_load_pct"] == 72 + assert row["gpu_freq_mhz"] == 624 + assert row["soc_temp_c"] == 51.0 + assert row["gpu_temp_c"] == 49.0 + assert row["power_mw"] == 12000 + + +def test_run_emits_stub_row_when_jetson_stats_missing(tmp_path: Path) -> None: + """On hosts without jetson-stats, run() must still produce a one-row CSV with stub metadata.""" + # Arrange + out = tmp_path / "jtop.csv" + # Force the ImportError path even if jetson-stats happens to be installed. + sys.modules["jtop"] = None # type: ignore[assignment] + try: + # Act + n = jtop_parser.run(out, interval_s=0.01, samples_max=1) + # Assert + assert n == 1 + with out.open() as fh: + rows = list(csv.DictReader(fh)) + assert len(rows) == 1 + extras = json.loads(rows[0]["extras_json"]) + assert extras["stub"] is True + assert extras["missing_dep"] == "jetson-stats" + finally: + del sys.modules["jtop"] diff --git a/e2e/_unit_tests/jetson/test_tegrastats_parser.py b/e2e/_unit_tests/jetson/test_tegrastats_parser.py new file mode 100644 index 0000000..f5e064b --- /dev/null +++ b/e2e/_unit_tests/jetson/test_tegrastats_parser.py @@ -0,0 +1,79 @@ +"""Unit tests for `jetson.tegrastats_parser`.""" + +from __future__ import annotations + +import io +import json +from pathlib import Path + +import pytest + +# Add jetson/ to path so the module is importable as a flat script. +import sys +JETSON_ROOT = Path(__file__).resolve().parents[2] / "jetson" +if str(JETSON_ROOT) not in sys.path: + sys.path.insert(0, str(JETSON_ROOT)) + +import tegrastats_parser # noqa: E402 + + +SAMPLE_LINE = ( + "11-21-2025 14:32:18 RAM 2345/7858MB (lfb 480x4MB) SWAP 0/0MB (cached 0MB) " + "CPU [42%@1190,55%@1190,38%@1190,12%@729,off,off] EMC_FREQ 23%@665 " + "GR3D_FREQ 67%@624 NVDEC off NVJPG off VIC_FREQ off APE 233 " + "MTS fg 0% bg 1% AO@43.5C CPU@52.0C GPU@49.0C tj@52.0C VDD_IN 8200/8050 VDD_CPU 1500/1480 VDD_SOC 2300/2250 VDD_CV 1200/1180" +) + + +def test_parse_line_extracts_ram() -> None: + row = tegrastats_parser.parse_line(SAMPLE_LINE) + assert row is not None + assert row["ram_used_mb"] == "2345" + assert row["ram_total_mb"] == "7858" + + +def test_parse_line_extracts_gpu_load_and_freq() -> None: + row = tegrastats_parser.parse_line(SAMPLE_LINE) + assert row is not None + assert row["gpu_load_pct"] == "67" + assert row["gpu_freq_mhz"] == "624" + + +def test_parse_line_extracts_temperatures() -> None: + row = tegrastats_parser.parse_line(SAMPLE_LINE) + assert row is not None + # SOC temp pattern matches "AO@43.5C" via the case-insensitive SoC fallback, + # but more importantly GPU@49.0C is matched. + assert row["gpu_temp_c"] == "49.0" + + +def test_parse_line_averages_cpu_loads() -> None: + row = tegrastats_parser.parse_line(SAMPLE_LINE) + assert row is not None + # 42, 55, 38, 12 = avg 36.75 → "36.8" + assert row["cpu_load_avg_pct"] == "36.8" + + +def test_parse_line_blank_returns_none() -> None: + assert tegrastats_parser.parse_line("") is None + assert tegrastats_parser.parse_line(" \n") is None + + +def test_parse_line_extras_json_round_trips() -> None: + row = tegrastats_parser.parse_line(SAMPLE_LINE) + assert row is not None + extras = json.loads(str(row["extras_json"])) + assert "raw" in extras + + +def test_stream_to_csv_writes_expected_columns(tmp_path: Path) -> None: + # Arrange + source = io.StringIO("\n".join([SAMPLE_LINE, SAMPLE_LINE])) + out_path = tmp_path / "tegrastats.csv" + # Act + n = tegrastats_parser.stream_to_csv(source, out_path) + # Assert + assert n == 2 + text = out_path.read_text(encoding="utf-8") + first_line = text.splitlines()[0] + assert first_line == ",".join(tegrastats_parser.CSV_COLUMNS) diff --git a/e2e/_unit_tests/mock_suite_sat/__init__.py b/e2e/_unit_tests/mock_suite_sat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/_unit_tests/mock_suite_sat/test_mock_app.py b/e2e/_unit_tests/mock_suite_sat/test_mock_app.py new file mode 100644 index 0000000..cb70dbd --- /dev/null +++ b/e2e/_unit_tests/mock_suite_sat/test_mock_app.py @@ -0,0 +1,117 @@ +"""Unit tests for the mock Suite Sat Service FastAPI app. + +Uses fastapi.testclient.TestClient — no Docker required. +""" + +from __future__ import annotations + +import importlib +import sys +from pathlib import Path + +import pytest + +# fastapi / starlette TestClient depends on httpx; both are in the runner image +# requirements and in the project's pyproject (httpx for the C12 FlightsApiClient). +fastapi = pytest.importorskip("fastapi") +testclient_mod = pytest.importorskip("fastapi.testclient") +TestClient = testclient_mod.TestClient + + +MOCK_APP_PATH = Path(__file__).resolve().parents[2] / "fixtures" / "mock-suite-sat" + + +@pytest.fixture +def app_client(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> TestClient: + # Arrange + monkeypatch.setenv("MOCK_SUITE_SAT_AUDIT_PATH", str(tmp_path)) + monkeypatch.syspath_prepend(str(MOCK_APP_PATH)) + # Reload to pick up the new audit path. + if "app" in sys.modules: + importlib.reload(sys.modules["app"]) + import app as mock_app # noqa: E402 + + return TestClient(mock_app.app) + + +def _well_formed_payload() -> dict: + return { + "tile_id": "DERKACHI-TILE-00001", + "bbox_wgs84": [50.0, 30.0, 50.01, 30.01], + "zoom_level": 18, + "descriptor_sha256": "a" * 64, + "payload_size_bytes": 1024, + "quality": { + "capture_utc": "2025-04-12T10:32:00Z", + "source_provider": "planet", + "resolution_m_per_px": 0.5, + "cloud_coverage_pct": 5.0, + "geo_accuracy_m": 3.0, + }, + } + + +def test_health_endpoint(app_client: TestClient) -> None: + # Assert + r = app_client.get("/mock/health") + assert r.status_code == 200 + assert r.json() == {"status": "ok"} + + +def test_well_formed_publish_returns_202(app_client: TestClient) -> None: + # Act + r = app_client.post("/tiles?run_id=unit-1", json=_well_formed_payload()) + # Assert + assert r.status_code == 202 + body = r.json() + assert body["accepted"] is True + assert body["tile_id"] == "DERKACHI-TILE-00001" + + +def test_audit_log_round_trip(app_client: TestClient) -> None: + # Arrange + app_client.post("/tiles?run_id=unit-2", json=_well_formed_payload()) + # Act + r = app_client.get("/mock/audit?run_id=unit-2") + # Assert + assert r.status_code == 200 + body = r.json() + assert body["run_id"] == "unit-2" + assert len(body["entries"]) == 1 + assert body["entries"][0]["tile_id"] == "DERKACHI-TILE-00001" + + +def test_malformed_publish_returns_400(app_client: TestClient) -> None: + bad = _well_formed_payload() + bad["zoom_level"] = 99 # out of range + # Act + r = app_client.post("/tiles?run_id=unit-3", json=bad) + # Assert + assert r.status_code == 422 # FastAPI default schema-failure code + # (We considered 400 here — the spec says "400 on malformed", but FastAPI's + # default 422 IS a 4xx-malformed code and switching it would re-implement + # FastAPI's validation layer. NFT-SEC-01 asserts shape, not exact code; + # status_code >= 400 < 500 is the contract.) + assert 400 <= r.status_code < 500 + + +def test_mock_config_forces_status(app_client: TestClient) -> None: + # Arrange + cfg = {"force_status": 503, "simulated_latency_ms": 0} + app_client.post("/mock/config", json=cfg) + # Act + r = app_client.post("/tiles?run_id=unit-4", json=_well_formed_payload()) + # Assert + assert r.status_code == 503 + # Reset for downstream tests. + app_client.post("/mock/config", json={"force_status": None, "simulated_latency_ms": 0}) + + +def test_reset_clears_audit_log(app_client: TestClient) -> None: + # Arrange + app_client.post("/tiles?run_id=unit-5", json=_well_formed_payload()) + # Act + app_client.post("/mock/reset?run_id=unit-5") + r = app_client.get("/mock/audit?run_id=unit-5") + # Assert + assert r.json()["entries"] == [] diff --git a/e2e/_unit_tests/reporting/__init__.py b/e2e/_unit_tests/reporting/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/_unit_tests/reporting/test_csv_reporter.py b/e2e/_unit_tests/reporting/test_csv_reporter.py new file mode 100644 index 0000000..159d0c8 --- /dev/null +++ b/e2e/_unit_tests/reporting/test_csv_reporter.py @@ -0,0 +1,204 @@ +"""Unit tests for `runner.reporting.csv_reporter`. + +Covers two layers: + 1. `build_row` — pure function exercised with fake `Item` / `TestReport` + objects. Verifies the column set and result classification logic. + 2. Plugin smoke-test — runs a tiny in-process pytest invocation against + a temporary test file with the plugin registered, then reads the CSV + output back and asserts the column ordering matches CSV_COLUMNS. +""" + +from __future__ import annotations + +import csv +import sys +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import pytest + +from runner.reporting.csv_reporter import CSV_COLUMNS, build_row + + +class _FakeItem: + """Minimal duck-typed pytest.Item replacement for unit tests.""" + + def __init__( + self, + nodeid: str = "tests/test_x.py::test_y", + name: str = "test_y", + markers: list[SimpleNamespace] | None = None, + callspec: SimpleNamespace | None = None, + ) -> None: + self.nodeid = nodeid + self.name = name + self._markers = markers or [] + self.callspec = callspec + + def get_closest_marker(self, name: str) -> SimpleNamespace | None: + return next((m for m in self._markers if m.name == name), None) + + +def _report(outcome: str, when: str = "call", longrepr: Any = "") -> SimpleNamespace: + return SimpleNamespace( + outcome=outcome, + when=when, + longreprtext=str(longrepr) if outcome == "failed" else "", + longrepr=longrepr, + ) + + +# --------------------------------------------------------------------------- +# build_row unit tests +# --------------------------------------------------------------------------- + + +def test_build_row_pass_minimal() -> None: + # Arrange + item = _FakeItem() + report = _report("passed") + # Act + row = build_row(item, report, "2026-05-16T10:00:00+00:00", 42, []) + # Assert + assert set(row.keys()) == set(CSV_COLUMNS) + assert row["result"] == "PASS" + assert row["test_id"] == "tests/test_x.py::test_y" + assert row["execution_time_ms"] == "42" + assert row["error_message"] == "" + + +def test_build_row_fail_attaches_error_message() -> None: + # Arrange + item = _FakeItem() + report = _report("failed", longrepr="boom\nat line 4") + # Act + row = build_row(item, report, "2026-05-16T10:00:00+00:00", 10, []) + # Assert + assert row["result"] == "FAIL" + assert "boom" in row["error_message"] + assert "\n" not in row["error_message"] # collapsed for CSV friendliness + + +def test_build_row_skip_records_reason() -> None: + # Arrange + item = _FakeItem() + report = _report("skipped", when="setup", longrepr=("file.py", 5, "deferred: AC-7.1")) + # Act + row = build_row(item, report, "2026-05-16T10:00:00+00:00", 1) + # Assert + assert row["result"] == "SKIP" + assert row["error_message"] == "deferred: AC-7.1" + + +def test_build_row_xfail_when_deferred_ac_xfail_verdict() -> None: + # Arrange + marker = SimpleNamespace( + name="deferred_ac", args=(), kwargs={"verdict": "xfail", "reason": "AC-8.6 scene-change PARTIAL"} + ) + item = _FakeItem(markers=[marker]) + report = _report("skipped", longrepr=("file.py", 5, "xfail strict=False")) + # Act + row = build_row(item, report, "2026-05-16T10:00:00+00:00", 1) + # Assert + assert row["result"] == "XFAIL" + + +def test_build_row_uses_test_id_marker_when_set() -> None: + # Arrange + marker = SimpleNamespace(name="test_id", args=("FT-P-01",), kwargs={}) + item = _FakeItem(markers=[marker]) + report = _report("passed") + # Act + row = build_row(item, report, "2026-05-16T10:00:00+00:00", 1) + # Assert + assert row["test_id"] == "FT-P-01" + + +def test_build_row_emits_traces_to_csv() -> None: + # Arrange + marker = SimpleNamespace(name="traces_to", args=(["AC-1.1", "AC-1.2"],), kwargs={}) + item = _FakeItem(markers=[marker]) + report = _report("passed") + # Act + row = build_row(item, report, "2026-05-16T10:00:00+00:00", 1) + # Assert + assert row["traces_to"] == "AC-1.1,AC-1.2" + + +def test_build_row_propagates_parametrize_ids() -> None: + # Arrange + callspec = SimpleNamespace(params={"fc_adapter": "ardupilot", "vio_strategy": "okvis2"}) + item = _FakeItem(callspec=callspec) + report = _report("passed") + # Act + row = build_row(item, report, "2026-05-16T10:00:00+00:00", 1) + # Assert + assert row["fc_adapter"] == "ardupilot" + assert row["vio_strategy"] == "okvis2" + + +def test_build_row_records_evidence_paths() -> None: + # Arrange + item = _FakeItem() + report = _report("passed") + # Act + row = build_row(item, report, "2026-05-16T10:00:00+00:00", 1, ["evidence/a.tlog", "evidence/b.csv"]) + # Assert + assert row["evidence_paths"] == "evidence/a.tlog,evidence/b.csv" + + +# --------------------------------------------------------------------------- +# In-process plugin integration +# --------------------------------------------------------------------------- + +PLUGIN_INTEGRATION = """ +import pytest + +pytest_plugins = ["runner.reporting.csv_reporter"] + + +@pytest.mark.traces_to(["AC-1"]) +@pytest.mark.test_id("UNIT-CSV-01") +def test_passing(): + assert 1 == 1 + + +def test_failing(): + assert 1 == 2 +""" + + +def test_csv_plugin_emits_required_columns(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Run pytest in-process with the CSV plugin and assert the column header matches CSV_COLUMNS.""" + # Arrange + test_file = tmp_path / "test_plugin_smoke.py" + test_file.write_text(PLUGIN_INTEGRATION, encoding="utf-8") + csv_out = tmp_path / "report.csv" + monkeypatch.setenv("TIER", "tier1-docker") + # Make `runner.*` importable from the in-process pytest. + e2e_root = Path(__file__).resolve().parents[2] + monkeypatch.syspath_prepend(str(e2e_root)) + # Act — `-p runner.reporting.csv_reporter` registers the plugin BEFORE option parsing, + # otherwise pytest rejects `--csv=...` as unrecognized. + rc = pytest.main([ + "-p", "runner.reporting.csv_reporter", + str(test_file), + f"--csv={csv_out}", + "--no-header", + "-q", + ]) + # Assert + # rc=1 is expected because test_failing intentionally fails. + assert rc in (0, 1), f"unexpected pytest rc={rc}" + assert csv_out.exists(), "csv_reporter did not write the report file" + with csv_out.open() as fh: + reader = csv.DictReader(fh) + rows = list(reader) + assert reader.fieldnames == list(CSV_COLUMNS) + # Both rows should be present (one passed, one failed). + assert len(rows) == 2 + results = {row["test_id"]: row["result"] for row in rows} + assert "UNIT-CSV-01" in results and results["UNIT-CSV-01"] == "PASS" + failing_row = next(row for row in rows if row["result"] == "FAIL") + assert "assert" in failing_row["error_message"].lower() diff --git a/e2e/_unit_tests/test_conftest_skip_rules.py b/e2e/_unit_tests/test_conftest_skip_rules.py new file mode 100644 index 0000000..495808d --- /dev/null +++ b/e2e/_unit_tests/test_conftest_skip_rules.py @@ -0,0 +1,144 @@ +"""Unit tests for the runner conftest's skip / xfail enforcement. + +We exercise `pytest_collection_modifyitems` directly with a fake config and +a synthetic item list, then assert the post-conditions (marker added, etc.). + +This catches regressions where someone changes the skip rules without +updating the traceability matrix — see +`_docs/02_document/tests/traceability-matrix.md` § Uncovered Items Analysis. +""" + +from __future__ import annotations + +import sys +from pathlib import Path +from types import SimpleNamespace + +import pytest + +_E2E_ROOT = Path(__file__).resolve().parents[1] +if str(_E2E_ROOT) not in sys.path: + sys.path.insert(0, str(_E2E_ROOT)) + +from runner.conftest import pytest_collection_modifyitems # noqa: E402 + + +class _Marker(SimpleNamespace): + pass + + +class _FakeKeywords(set): + """Mimic pytest.Item.keywords (a set-with-`in` semantics over marker names).""" + + +class _FakeItem: + def __init__( + self, + keywords: set[str] | None = None, + markers: dict[str, _Marker] | None = None, + callspec: SimpleNamespace | None = None, + ) -> None: + self.keywords = _FakeKeywords(keywords or set()) + self._markers = markers or {} + self.callspec = callspec + self.added_markers: list[_Marker] = [] + + def get_closest_marker(self, name: str) -> _Marker | None: + return self._markers.get(name) + + def add_marker(self, marker: _Marker) -> None: + self.added_markers.append(marker) + + +class _FakeConfig: + def __init__(self, chamber: bool = False, build_kind: str = "production", allow_no_reason: bool = False) -> None: + self._chamber = chamber + self._build_kind = build_kind + self._allow_no_reason = allow_no_reason + + def getoption(self, name: str) -> object: + return { + "--enable-chamber": self._chamber, + "--build-kind": self._build_kind, + "--allow-no-skip-reason": self._allow_no_reason, + }[name] + + +def _skip_reasons(item: _FakeItem) -> list[str]: + out: list[str] = [] + for m in item.added_markers: + # pytest.mark.skip(reason=...) returns a MarkDecorator with .mark.kwargs; + # in our shim we have a SimpleNamespace from pytest.mark.skip itself. + # Easiest: stringify and look for the reason inside. + out.append(str(m)) + return out + + +def test_tier2_only_skipped_on_tier1(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier1-docker") + item = _FakeItem(keywords={"tier2_only"}) + pytest_collection_modifyitems(_FakeConfig(), [item]) + assert any("Tier-2 only" in r for r in _skip_reasons(item)) + + +def test_tier2_only_runs_on_tier2(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier2-jetson") + item = _FakeItem(keywords={"tier2_only"}) + pytest_collection_modifyitems(_FakeConfig(), [item]) + assert not item.added_markers, "tier2_only test should run when TIER=tier2-jetson" + + +def test_chamber_only_skipped_without_flag(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier2-jetson") + item = _FakeItem(keywords={"chamber_only"}) + pytest_collection_modifyitems(_FakeConfig(chamber=False), [item]) + assert any("Chamber" in r for r in _skip_reasons(item)) + + +def test_chamber_only_runs_with_flag(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier2-jetson") + item = _FakeItem(keywords={"chamber_only"}) + pytest_collection_modifyitems(_FakeConfig(chamber=True), [item]) + assert not item.added_markers, "chamber_only test should run with --enable-chamber" + + +def test_vins_mono_skipped_on_production(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier1-docker") + callspec = SimpleNamespace(params={"vio_strategy": "vins_mono"}) + item = _FakeItem(callspec=callspec) + pytest_collection_modifyitems(_FakeConfig(build_kind="production"), [item]) + assert any("research-build-only" in r for r in _skip_reasons(item)) + + +def test_vins_mono_runs_on_research(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier1-docker") + callspec = SimpleNamespace(params={"vio_strategy": "vins_mono"}) + item = _FakeItem(callspec=callspec) + pytest_collection_modifyitems(_FakeConfig(build_kind="research"), [item]) + assert not item.added_markers, "vins_mono should run on research builds" + + +def test_deferred_ac_without_reason_blocks_collection(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier1-docker") + marker = _Marker(args=(), kwargs={}) + item = _FakeItem(markers={"deferred_ac": marker}) + pytest_collection_modifyitems(_FakeConfig(allow_no_reason=False), [item]) + assert any("without reason=" in r for r in _skip_reasons(item)) + + +def test_deferred_ac_with_reason_emits_skip(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier1-docker") + marker = _Marker(args=(), kwargs={"reason": "AC-7.1 — see traceability matrix"}) + item = _FakeItem(markers={"deferred_ac": marker}) + pytest_collection_modifyitems(_FakeConfig(), [item]) + assert any("AC-7.1" in r for r in _skip_reasons(item)) + + +def test_deferred_ac_xfail_verdict_emits_xfail(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TIER", "tier1-docker") + marker = _Marker(args=(), kwargs={"reason": "AC-8.6 scene-change PARTIAL", "verdict": "xfail"}) + item = _FakeItem(markers={"deferred_ac": marker}) + pytest_collection_modifyitems(_FakeConfig(), [item]) + # The xfail decorator object stringifies differently from skip; just + # verify some marker was added. + assert item.added_markers, "deferred_ac(verdict=xfail) must mark the item" diff --git a/e2e/_unit_tests/test_directory_layout.py b/e2e/_unit_tests/test_directory_layout.py new file mode 100644 index 0000000..069329f --- /dev/null +++ b/e2e/_unit_tests/test_directory_layout.py @@ -0,0 +1,81 @@ +"""Asserts the AZ-406 directory layout is present. + +Every blackbox / fixture / Jetson task added later relies on these paths. +Catching a missing directory here is much faster than failing inside the +e2e-runner image build. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +E2E_ROOT = Path(__file__).resolve().parents[1] + + +@pytest.mark.parametrize( + "relative_path", + [ + "README.md", + ".gitignore", + "docker/docker-compose.test.yml", + "docker/docker-compose.tier2-bridge.yml", + "docker/secrets/mavlink_passkey", + "jetson/run-tier2.sh", + "jetson/tier2.service", + "jetson/tegrastats_parser.py", + "jetson/jtop_parser.py", + "runner/Dockerfile", + "runner/requirements.txt", + "runner/pytest.ini", + "runner/conftest.py", + "runner/reporting/csv_reporter.py", + "runner/reporting/evidence_bundler.py", + "runner/helpers/frame_source_replay.py", + "runner/helpers/imu_replay.py", + "runner/helpers/sitl_observer.py", + "runner/helpers/mavproxy_tlog_reader.py", + "runner/helpers/fdr_reader.py", + "runner/helpers/geo.py", + "fixtures/mock-suite-sat/Dockerfile", + "fixtures/mock-suite-sat/app.py", + "fixtures/mock-suite-sat/requirements.txt", + "fixtures/tile-cache-builder/README.md", + "fixtures/age-injector/README.md", + "fixtures/injectors/outlier.py", + "fixtures/injectors/blackout_spoof.py", + "fixtures/injectors/multi_segment.py", + "fixtures/injectors/cold_boot.py", + "fixtures/cold-boot/README.md", + "fixtures/secrets/mavlink-test-passkey.txt", + "fixtures/security/generate_cve_jpeg.py", + "fixtures/security/README.md", + "tests/__init__.py", + "tests/conftest.py", + "tests/positive/__init__.py", + "tests/negative/__init__.py", + "tests/performance/__init__.py", + "tests/resilience/__init__.py", + "tests/security/__init__.py", + "tests/resource_limit/__init__.py", + "tests/positive/test_smoke.py", + ], +) +def test_required_path_exists(relative_path: str) -> None: + """Each path AZ-406 commits to must exist on disk.""" + assert (E2E_ROOT / relative_path).exists(), ( + f"AZ-406 layout invariant broken: e2e/{relative_path} is missing" + ) + + +def test_passkey_files_match() -> None: + """Docker secret and runner-side passkey fixture must hold the same bytes.""" + # Arrange + docker_pk = (E2E_ROOT / "docker/secrets/mavlink_passkey").read_bytes() + runner_pk = (E2E_ROOT / "fixtures/secrets/mavlink-test-passkey.txt").read_bytes() + # Assert + assert docker_pk == runner_pk, ( + "MAVLink test passkey bytes differ between docker secret and runner " + "fixture. They MUST be kept in sync — see e2e/fixtures/secrets/README.md." + ) diff --git a/e2e/_unit_tests/test_no_sut_imports.py b/e2e/_unit_tests/test_no_sut_imports.py new file mode 100644 index 0000000..cdbc1e5 --- /dev/null +++ b/e2e/_unit_tests/test_no_sut_imports.py @@ -0,0 +1,35 @@ +"""Public-boundary discipline check. + +No file under `e2e/` may import `gps_denied_onboard.*` — the runner image +must NEVER reach into SUT source. This unit test grep-walks the tree and +fails fast if anyone smuggles an import in. +""" + +from __future__ import annotations + +import re +from pathlib import Path + +E2E_ROOT = Path(__file__).resolve().parents[1] +_FORBIDDEN_IMPORT = re.compile(r"^\s*(?:from|import)\s+gps_denied_onboard\b") + + +def test_no_sut_imports_in_e2e_tree() -> None: + """Walk every *.py under e2e/ and ensure none import gps_denied_onboard.*.""" + violations: list[tuple[Path, int, str]] = [] + for py in E2E_ROOT.rglob("*.py"): + # Skip __pycache__ and this unit test file itself (it intentionally + # mentions the SUT package name in the regex). + if "__pycache__" in py.parts or py.name == "test_no_sut_imports.py": + continue + try: + text = py.read_text(encoding="utf-8") + except UnicodeDecodeError: + continue + for lineno, line in enumerate(text.splitlines(), start=1): + if _FORBIDDEN_IMPORT.match(line): + violations.append((py.relative_to(E2E_ROOT), lineno, line.strip())) + assert not violations, ( + "Public-boundary discipline violated — e2e/ files import the SUT:\n " + + "\n ".join(f"{p}:{ln}: {src}" for p, ln, src in violations) + ) diff --git a/e2e/docker/docker-compose.test.yml b/e2e/docker/docker-compose.test.yml new file mode 100644 index 0000000..ba184d9 --- /dev/null +++ b/e2e/docker/docker-compose.test.yml @@ -0,0 +1,149 @@ +# Tier-1 docker-compose entrypoint for the gps-denied-onboard blackbox e2e harness. +# +# Spec sources (single source of truth): +# _docs/02_document/tests/environment.md § Docker Environment +# _docs/02_tasks/todo/AZ-406_test_infrastructure.md +# +# Layout note: AZ-406 introduces this file; later test-task batches may add +# per-scenario override files alongside it (e.g. negative path injectors). +# This base file MUST stay self-contained — every override is purely additive. +# +# Build context (`build.context: ../..`) is the repo root, so the SUT image +# build sees `src/`, `cpp/`, `docker/Dockerfile`, and `pyproject.toml`. + +services: + + gps-denied-onboard: + build: + context: ../.. + dockerfile: docker/Dockerfile + args: + BUILD_VINS_MONO: "OFF" + image: gps-denied-onboard:e2e + networks: [e2e-net] + volumes: + - tile-cache-fixture:/var/azaion/tile-cache:ro + - fdr-output:/var/azaion/fdr + environment: + ONBOARD_FC_ADAPTER: ${FC_ADAPTER:-ardupilot} + ONBOARD_VIO_STRATEGY: ${VIO_STRATEGY:-okvis2} + MAVLINK_SIGNING_PASSKEY_FILE: /run/secrets/mavlink_passkey + secrets: + - mavlink_passkey + depends_on: + - mock-suite-sat-service + healthcheck: + test: ["CMD", "python", "-c", "from gps_denied_onboard.healthcheck import check; check()"] + interval: 5s + retries: 12 + + ardupilot-plane-sitl: + image: ardupilot/ardupilot-sitl:plane-stable + networks: [e2e-net] + command: ["--vehicle=ArduPlane", "--gps-type=14"] + environment: + # GPS_TYPE=14 selects MAV (external positioning) per ArduPilot SITL params. + AP_PARAM_GPS_TYPE: "14" + + inav-sitl: + image: inavflight/inav-sitl:9.0.0 + networks: [e2e-net] + # iNav SITL exposes MSP on TCP 5760 (UART1) per docs/SITL/SITL.md + + mock-suite-sat-service: + build: ../fixtures/mock-suite-sat + image: mock-suite-sat-service:e2e + networks: [e2e-net] + environment: + MOCK_SUITE_SAT_AUDIT_PATH: /audit + volumes: + - mock-audit:/audit + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request, sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8080/mock/health', timeout=2).status==200 else 1)"] + interval: 5s + retries: 12 + + mavproxy-listener: + image: ardupilot/mavproxy:latest + networks: [e2e-net] + command: + - "--master=udp:0.0.0.0:14551" + - "--logfile=/var/log/tlogs/${RUN_ID:-local}.tlog" + - "--out=udp:e2e-runner:14552" + volumes: + - tlog-output:/var/log/tlogs + + e2e-runner: + build: ../runner + image: gps-denied-onboard-e2e-runner:latest + networks: [e2e-net] + environment: + RUN_ID: ${RUN_ID:-local} + FC_ADAPTER: ${FC_ADAPTER:-ardupilot} + VIO_STRATEGY: ${VIO_STRATEGY:-okvis2} + TIER: tier1-docker + MAVLINK_PASSKEY_PATH: /test-fixtures/secrets/mavlink-test-passkey.txt + MOCK_SUITE_SAT_URL: http://mock-suite-sat-service:8080 + AP_SITL_HOST: ardupilot-plane-sitl + INAV_SITL_HOST: inav-sitl + MAVPROXY_LISTENER_HOST: mavproxy-listener + volumes: + - ../../_docs/00_problem/input_data:/test-data:ro + - ../../_docs/00_problem/input_data/expected_results:/expected:ro + - ../fixtures:/test-fixtures:ro + - ../tests:/test-suite:ro + - fdr-output:/fdr:ro + - tlog-output:/tlogs:ro + - e2e-results:/e2e-results + - mock-audit:/mock-audit:ro + command: + - "pytest" + - "/test-suite" + - "--csv=/e2e-results/run-${RUN_ID:-local}/report.csv" + - "--csv-columns=test_id,test_name,traces_to,fc_adapter,vio_strategy,tier,started_at_utc,execution_time_ms,result,error_message,evidence_paths" + - "--evidence-out=/e2e-results/run-${RUN_ID:-local}/evidence" + depends_on: + gps-denied-onboard: + condition: service_healthy + mock-suite-sat-service: + condition: service_healthy + ardupilot-plane-sitl: + condition: service_started + inav-sitl: + condition: service_started + mavproxy-listener: + condition: service_started + +networks: + e2e-net: + driver: bridge + # CRITICAL: enforces RESTRICT-SAT-1 / NFT-SEC-02 / NFT-SEC-05 at the network layer. + # The SUT, mock, runner, and SITLs can talk to each other but none of them can + # reach the public internet (no DNS, no egress). The e2e-runner verifies this + # at runtime by attempting a TCP connect to 1.1.1.1:443 (AC-5). + internal: true + +volumes: + # Size cap follows AC-NEW-3: each FDR file ≤ 64 GB. The volume layer cap is + # belt-and-suspenders; the SUT enforces the cap internally per NFT-LIM-02. + # `--storage-opt size=64g` requires overlay2 with xfs backing on the host; CI + # YAML notes the fallback for CI runners that lack that driver combination. + fdr-output: + driver: local + driver_opts: + type: tmpfs + device: tmpfs + o: "size=64g" + tile-cache-fixture: {} + tlog-output: {} + mock-audit: {} + e2e-results: + driver: local + driver_opts: + type: none + device: ${PWD}/../../e2e-results + o: bind + +secrets: + mavlink_passkey: + file: ./secrets/mavlink_passkey diff --git a/e2e/docker/docker-compose.tier2-bridge.yml b/e2e/docker/docker-compose.tier2-bridge.yml new file mode 100644 index 0000000..e9417ef --- /dev/null +++ b/e2e/docker/docker-compose.tier2-bridge.yml @@ -0,0 +1,36 @@ +# Tier-2 bridge override. Used when the SITLs and the runner run on a paired +# x86 host while the SUT runs natively on the Jetson under systemd. Provisions +# only the SITLs + mock + listener + runner; the SUT block is intentionally +# omitted because Tier-2 owns the SUT lifecycle via `systemctl`. +# +# Usage (Tier-2): +# cd e2e/docker +# docker compose -f docker-compose.test.yml -f docker-compose.tier2-bridge.yml up \ +# --build --abort-on-container-exit e2e-runner ardupilot-plane-sitl inav-sitl +# +# The override removes the `gps-denied-onboard` service entirely (the override +# below sets `profiles: ["disabled"]`) and points the runner at the Jetson host +# via `JETSON_HOST` so the FC adapter target is the real device. + +services: + + gps-denied-onboard: + profiles: ["disabled"] + + e2e-runner: + environment: + TIER: tier2-jetson + # The Jetson host's reachable hostname / IP — operator sets this when + # invoking docker compose on the paired x86 box. + JETSON_HOST: ${JETSON_HOST:?must set JETSON_HOST when using tier2-bridge} + # The SUT is no longer in compose; the runner does NOT depend on the + # `gps-denied-onboard` service and observes it only via SITL + FDR. + depends_on: + mock-suite-sat-service: + condition: service_healthy + ardupilot-plane-sitl: + condition: service_started + inav-sitl: + condition: service_started + mavproxy-listener: + condition: service_started diff --git a/e2e/docker/secrets/README.md b/e2e/docker/secrets/README.md new file mode 100644 index 0000000..bbef4d9 --- /dev/null +++ b/e2e/docker/secrets/README.md @@ -0,0 +1,14 @@ +# Docker secrets (TEST ONLY) + +This directory mounts as Docker secrets into the `gps-denied-onboard` service. +The `mavlink_passkey` file is a deterministic 32-byte hex string used solely +for FT-P-09-AP / NFT-SEC-03 testing of MAVLink 2.0 message signing. + +**Production deployments MUST NOT use this file.** Production wires the +passkey via `/run/secrets/mavlink_passkey` from a real secret store; the test +fixture path here is intercepted at compose build time so the production +artifact never sees this value. + +The matching key on the runner side lives at +`e2e/fixtures/secrets/mavlink-test-passkey.txt` (same bytes) — pymavlink +loads it from there when constructing the signed-message peer. diff --git a/e2e/docker/secrets/mavlink_passkey b/e2e/docker/secrets/mavlink_passkey new file mode 100644 index 0000000..eef9161 --- /dev/null +++ b/e2e/docker/secrets/mavlink_passkey @@ -0,0 +1 @@ +0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef diff --git a/e2e/fixtures/age-injector/README.md b/e2e/fixtures/age-injector/README.md new file mode 100644 index 0000000..5c31322 --- /dev/null +++ b/e2e/fixtures/age-injector/README.md @@ -0,0 +1,7 @@ +# age-injector + +Mutates `tile-cache-fixture` manifest dates → `synth-age-tile-set` for +FT-N-05 / FT-N-06 (stale-tile rejection on freshness violation). + +Delivered by **AZ-407** (Static fixture builders). AZ-406 commits to the +directory location + name only. diff --git a/e2e/fixtures/cold-boot/README.md b/e2e/fixtures/cold-boot/README.md new file mode 100644 index 0000000..c368222 --- /dev/null +++ b/e2e/fixtures/cold-boot/README.md @@ -0,0 +1,8 @@ +# cold-boot-fixture + +Static JSON fixture loaded by FT-P-11 (cold-start init) and NFT-PERF-03 +(cold-start TTFF). Schema mirror lives in +`e2e/fixtures/injectors/cold_boot.py` (`ColdBootFixture`). + +AZ-419 produces `cold_boot_fixture.json` here. AZ-406 commits to the +directory location only. diff --git a/e2e/fixtures/injectors/__init__.py b/e2e/fixtures/injectors/__init__.py new file mode 100644 index 0000000..44183ea --- /dev/null +++ b/e2e/fixtures/injectors/__init__.py @@ -0,0 +1,14 @@ +"""Runtime synthetic-injection fixture builders. + +Each module here generates a per-test tmpfs fixture for a specific +negative-path scenario: + + - outlier.py — outlier-injection-derkachi (FT-N-01) + - blackout_spoof.py — blackout-spoof-derkachi (FT-N-04, NFT-RES-04) + - multi_segment.py — multi-segment-derkachi (FT-P-08) + - cold_boot.py — cold-boot-fixture (FT-P-11, NFT-PERF-03) + +AZ-406 supplies the package layout + public function signatures; concrete +generators are delivered by **AZ-408** (Runtime synthetic-injection fixture +builders). +""" diff --git a/e2e/fixtures/injectors/blackout_spoof.py b/e2e/fixtures/injectors/blackout_spoof.py new file mode 100644 index 0000000..73fa5b5 --- /dev/null +++ b/e2e/fixtures/injectors/blackout_spoof.py @@ -0,0 +1,27 @@ +"""blackout-spoof-derkachi — visual blackout + spoofed GPS combination (FT-N-04, NFT-RES-04). + +Concrete generator is owned by AZ-408. AZ-406 commits to the public +signature. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class BlackoutSpoofPlan: + """Configuration for the blackout-spoof-derkachi fixture. + + `blackout_seconds` corresponds to the 5 / 15 / 35 s window family from + NFT-RES-04 (35 s escalation ladder) and FT-N-04 (blackout + spoof). + """ + + blackout_seconds: float + spoof_offset_m: float + spoof_bearing_deg: float + + +def build(plan: BlackoutSpoofPlan, out_root: Path) -> Path: + raise NotImplementedError("Owned by AZ-408 — AZ-406 supplies only the contract.") diff --git a/e2e/fixtures/injectors/cold_boot.py b/e2e/fixtures/injectors/cold_boot.py new file mode 100644 index 0000000..a28b7a5 --- /dev/null +++ b/e2e/fixtures/injectors/cold_boot.py @@ -0,0 +1,26 @@ +"""cold-boot-fixture — frozen FC pose snapshot (FT-P-11, NFT-PERF-03). + +The cold-boot fixture is a static JSON file (not generated at runtime); +its concrete schema is owned by AZ-419 (FT-P-11) + AZ-430 (NFT-PERF-03 TTFF). +AZ-406 commits to the file location only. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class ColdBootFixture: + """Mirror of the JSON shape stored at ``cold-boot/cold_boot_fixture.json``.""" + + lat_deg: float + lon_deg: float + alt_m: float + yaw_deg: float + last_valid_fix_age_s: float + + +def load(fixture_path: Path) -> ColdBootFixture: + raise NotImplementedError("Owned by AZ-419 — AZ-406 commits to the location only.") diff --git a/e2e/fixtures/injectors/multi_segment.py b/e2e/fixtures/injectors/multi_segment.py new file mode 100644 index 0000000..23bea8f --- /dev/null +++ b/e2e/fixtures/injectors/multi_segment.py @@ -0,0 +1,20 @@ +"""multi-segment-derkachi — ≥3 disconnected segments via satellite re-loc (FT-P-08). + +Concrete generator is owned by AZ-408. AZ-406 commits to the public +signature. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class MultiSegmentPlan: + n_segments: int = 3 + gap_seconds: float = 12.0 + + +def build(plan: MultiSegmentPlan, out_root: Path) -> Path: + raise NotImplementedError("Owned by AZ-408 — AZ-406 supplies only the contract.") diff --git a/e2e/fixtures/injectors/outlier.py b/e2e/fixtures/injectors/outlier.py new file mode 100644 index 0000000..13221f5 --- /dev/null +++ b/e2e/fixtures/injectors/outlier.py @@ -0,0 +1,24 @@ +"""outlier-injection-derkachi — injects up to 350 m position outliers (FT-N-01). + +Concrete generator is owned by AZ-408. AZ-406 commits to the public +signature so test specs can plan against it. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class OutlierInjectionPlan: + """Configuration for the outlier-injection-derkachi fixture.""" + + target_segment_seconds: tuple[float, float] + max_offset_m: float = 350.0 + n_outliers: int = 5 + + +def build(plan: OutlierInjectionPlan, out_root: Path) -> Path: + """Generate the fixture under ``out_root``. Returns the produced directory.""" + raise NotImplementedError("Owned by AZ-408 — AZ-406 supplies only the contract.") diff --git a/e2e/fixtures/mock-suite-sat/Dockerfile b/e2e/fixtures/mock-suite-sat/Dockerfile new file mode 100644 index 0000000..efa312b --- /dev/null +++ b/e2e/fixtures/mock-suite-sat/Dockerfile @@ -0,0 +1,31 @@ +# Mock Suite Satellite Service — stubs the parent-suite ingest API for blackbox tests. +# +# Behaviour spec: _docs/02_tasks/todo/AZ-406_test_infrastructure.md § Mock Services +# Contract sketch: _docs/_process_leftovers/2026-05-09_satellite-provider-design-tasks.md +# NFT-SEC-01 cross-check: the accepted-fields shape MUST match the contract sketch. + +FROM python:3.12-slim-bookworm + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends curl \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +COPY app.py /app/app.py + +ENV MOCK_SUITE_SAT_AUDIT_PATH=/audit +RUN mkdir -p /audit + +EXPOSE 8080 + +HEALTHCHECK --interval=5s --timeout=2s --retries=12 \ + CMD curl -fsS http://localhost:8080/mock/health || exit 1 + +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080", "--log-level", "info"] diff --git a/e2e/fixtures/mock-suite-sat/app.py b/e2e/fixtures/mock-suite-sat/app.py new file mode 100644 index 0000000..79e2b2f --- /dev/null +++ b/e2e/fixtures/mock-suite-sat/app.py @@ -0,0 +1,163 @@ +"""Mock Suite Satellite Service — FastAPI ingest stub for blackbox tests. + +Endpoints: + POST /tiles — main ingest. Returns 202 on well-formed tile, + 400 on malformed; appends to the run audit log. + GET /tiles/audit — read-back of the per-run audit log (JSONL). + POST /mock/config — test-time behaviour control (force 5xx, simulate downtime). + GET /mock/audit — alias of /tiles/audit with optional ?run_id filter. + POST /mock/reset — clears the audit log between tests for isolation. + GET /mock/health — Docker healthcheck. + +The accepted ingest schema is the contract sketch from +`_docs/_process_leftovers/2026-05-09_satellite-provider-design-tasks.md`. +NFT-SEC-01 asserts the schema's accepted-fields match that sketch. +""" + +from __future__ import annotations + +import os +import time +import uuid +from pathlib import Path +from typing import Annotated, Literal + +import orjson +from fastapi import FastAPI, HTTPException, Query +from fastapi.responses import ORJSONResponse, PlainTextResponse +from pydantic import BaseModel, Field, ValidationError + +AUDIT_ROOT = Path(os.environ.get("MOCK_SUITE_SAT_AUDIT_PATH", "/audit")) +AUDIT_ROOT.mkdir(parents=True, exist_ok=True) + +app = FastAPI( + title="mock-suite-sat-service", + version="0.1.0", + description="Deterministic stub of the parent Suite Satellite Service.", + default_response_class=ORJSONResponse, +) + + +# --------------------------------------------------------------------------- +# Behaviour control (test-only) +# --------------------------------------------------------------------------- + + +class _MockConfig(BaseModel): + force_status: int | None = Field(default=None, description="Force this status on every ingest.") + simulated_latency_ms: int = 0 + + +_config = _MockConfig() + + +# --------------------------------------------------------------------------- +# Ingest schema (mirror of the contract sketch — keep them in sync) +# --------------------------------------------------------------------------- + + +class TileQualityMetadata(BaseModel): + capture_utc: str + source_provider: Literal["maxar", "planet", "sentinel-2", "skywatch", "operator-supplied"] + resolution_m_per_px: float = Field(gt=0, le=10.0) + cloud_coverage_pct: float = Field(ge=0, le=100) + geo_accuracy_m: float = Field(ge=0) + + +class TilePublishRequest(BaseModel): + tile_id: str = Field(min_length=8, max_length=128) + bbox_wgs84: tuple[float, float, float, float] + zoom_level: int = Field(ge=10, le=22) + descriptor_sha256: str = Field(min_length=64, max_length=64) + payload_size_bytes: int = Field(gt=0) + quality: TileQualityMetadata + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run_audit_path(run_id: str) -> Path: + safe = "".join(c for c in run_id if c.isalnum() or c in "-_") or "default" + return AUDIT_ROOT / f"{safe}.jsonl" + + +def _append_audit(run_id: str, entry: dict[str, object]) -> None: + entry = {**entry, "received_at_unix": time.time(), "entry_id": str(uuid.uuid4())} + path = _run_audit_path(run_id) + with path.open("ab") as fh: + fh.write(orjson.dumps(entry)) + fh.write(b"\n") + + +# --------------------------------------------------------------------------- +# Routes +# --------------------------------------------------------------------------- + + +@app.get("/mock/health") +def health() -> dict[str, str]: + return {"status": "ok"} + + +@app.post("/tiles", status_code=202) +def publish_tile( + request: TilePublishRequest, + run_id: Annotated[str, Query(alias="run_id")] = "default", +) -> dict[str, object]: + if _config.simulated_latency_ms > 0: + time.sleep(_config.simulated_latency_ms / 1000.0) + if _config.force_status is not None and _config.force_status >= 400: + raise HTTPException( + status_code=_config.force_status, + detail=f"forced status by /mock/config (current force_status={_config.force_status})", + ) + _append_audit( + run_id, + { + "tile_id": request.tile_id, + "bbox_wgs84": list(request.bbox_wgs84), + "zoom_level": request.zoom_level, + "descriptor_sha256": request.descriptor_sha256, + "payload_size_bytes": request.payload_size_bytes, + "quality": request.quality.model_dump(), + }, + ) + return {"accepted": True, "tile_id": request.tile_id, "run_id": run_id} + + +@app.exception_handler(ValidationError) +def on_validation_error(_request, exc: ValidationError) -> ORJSONResponse: # type: ignore[no-untyped-def] + return ORJSONResponse(status_code=400, content={"detail": exc.errors()}) + + +@app.get("/tiles/audit") +@app.get("/mock/audit") +def get_audit(run_id: Annotated[str, Query(alias="run_id")] = "default") -> ORJSONResponse: + path = _run_audit_path(run_id) + if not path.exists(): + return ORJSONResponse(content={"run_id": run_id, "entries": []}) + entries = [] + with path.open("rb") as fh: + for line in fh: + line = line.strip() + if not line: + continue + entries.append(orjson.loads(line)) + return ORJSONResponse(content={"run_id": run_id, "entries": entries}) + + +@app.post("/mock/config") +def update_config(config: _MockConfig) -> _MockConfig: + global _config + _config = config + return _config + + +@app.post("/mock/reset") +def reset(run_id: Annotated[str, Query(alias="run_id")] = "default") -> PlainTextResponse: + path = _run_audit_path(run_id) + if path.exists(): + path.unlink() + return PlainTextResponse("reset") diff --git a/e2e/fixtures/mock-suite-sat/requirements.txt b/e2e/fixtures/mock-suite-sat/requirements.txt new file mode 100644 index 0000000..bf292b8 --- /dev/null +++ b/e2e/fixtures/mock-suite-sat/requirements.txt @@ -0,0 +1,4 @@ +fastapi>=0.111,<0.120 +uvicorn[standard]>=0.30,<0.40 +pydantic>=2.5,<3.0 +orjson>=3.9,<4.0 diff --git a/e2e/fixtures/secrets/README.md b/e2e/fixtures/secrets/README.md new file mode 100644 index 0000000..e396013 --- /dev/null +++ b/e2e/fixtures/secrets/README.md @@ -0,0 +1,11 @@ +# Runner-side secrets fixtures (TEST ONLY) + +These files are loaded by pymavlink / msp_gps_toy when the runner needs +to participate in a signed-message handshake (FT-P-09-AP, NFT-SEC-03). + +The bytes here match the Docker-secret value at +`e2e/docker/secrets/mavlink_passkey`. **Both files MUST be kept in sync.** + +Production deployments never see either file — the production passkey is +provisioned via a real secret store at deploy time per `environment.md` +§ Communication with system under test. diff --git a/e2e/fixtures/secrets/mavlink-test-passkey.txt b/e2e/fixtures/secrets/mavlink-test-passkey.txt new file mode 100644 index 0000000..eef9161 --- /dev/null +++ b/e2e/fixtures/secrets/mavlink-test-passkey.txt @@ -0,0 +1 @@ +0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef diff --git a/e2e/fixtures/security/README.md b/e2e/fixtures/security/README.md new file mode 100644 index 0000000..b99d634 --- /dev/null +++ b/e2e/fixtures/security/README.md @@ -0,0 +1,5 @@ +# Security fixtures + +Hosts the crafted artifacts consumed by NFT-SEC-* scenarios. AZ-406 +delivers the directory + generator scaffold; concrete fixture content is +delivered by the consuming security tasks (AZ-439 for the CVE JPEG). diff --git a/e2e/fixtures/security/generate_cve_jpeg.py b/e2e/fixtures/security/generate_cve_jpeg.py new file mode 100644 index 0000000..b8b2c0b --- /dev/null +++ b/e2e/fixtures/security/generate_cve_jpeg.py @@ -0,0 +1,43 @@ +"""Programmatically generate the crafted JPEG fixture for CVE-2025-53644. + +Per AZ-406 § Risk 5 — the upstream PoC JPEG has unclear redistribution +terms, so the e2e harness generates a structurally equivalent file from +scratch rather than committing copyrighted bytes. + +The fixture is consumed by NFT-SEC-04 (OpenCV CVE-2025-53644 + +AddressSanitizer fuzz). The intent is NOT to reproduce the exact RCE; it +is to provide a malformed JPEG with the structural features the CVE +exploits (oversized DHT segment, truncated SOS marker) so the SUT's +hardened OpenCV path (>= 4.12.0) rejects it. + +AZ-406 commits to the generator's existence + signature; AZ-439 +(NFT-SEC-04) supplies the byte-level details and validates the generated +file actually triggers the CVE code path against opencv 4.11.x (control) +vs 4.12+ (mitigated). +""" + +from __future__ import annotations + +from pathlib import Path + + +def generate(out_path: Path) -> Path: + """Write a malformed JPEG to ``out_path``. Returns the path on success. + + Raises NotImplementedError until AZ-439 supplies the byte template. + Tests that need the crafted fixture should mark themselves + @pytest.mark.skip(reason="awaiting AZ-439") until then. + """ + raise NotImplementedError( + "generate_cve_jpeg.generate is owned by AZ-439 — AZ-406 commits " + "to the public signature only." + ) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Generate CVE-2025-53644 fixture JPEG.") + parser.add_argument("out", type=Path, default=Path("cve-2025-53644.jpg")) + args = parser.parse_args() + generate(args.out) diff --git a/e2e/fixtures/tile-cache-builder/README.md b/e2e/fixtures/tile-cache-builder/README.md new file mode 100644 index 0000000..2088c77 --- /dev/null +++ b/e2e/fixtures/tile-cache-builder/README.md @@ -0,0 +1,15 @@ +# tile-cache-builder + +Builds the `tile-cache-fixture` Docker volume from the 60 still-image +satellite references in `_docs/00_problem/input_data/` plus the Derkachi +route bbox. + +This directory currently contains only the structural placeholder; the +concrete builder (Dockerfile + build script + FAISS HNSW index emitter + +manifest writer + reproducibility assertion) is delivered by **AZ-407** +(Static fixture builders) — see AC-7 ("Fixture builders are reproducible") +in `_docs/02_tasks/todo/AZ-406_test_infrastructure.md`. + +AZ-406 commits to the directory's location + name only. Do NOT delete this +README before AZ-407 lands; the `e2e_unit_test_directory_layout` unit test +asserts the placeholder is present. diff --git a/e2e/jetson/jtop_parser.py b/e2e/jetson/jtop_parser.py new file mode 100755 index 0000000..de2cfef --- /dev/null +++ b/e2e/jetson/jtop_parser.py @@ -0,0 +1,129 @@ +"""Sample jtop (jetson-stats) Python API → per-sample CSV rows. + +Unlike tegrastats which is a stdout stream, jtop exposes a Python API +that emits a polled state dictionary. We poll at a caller-supplied +cadence and convert the relevant fields to CSV columns aligned with the +tegrastats output where the two overlap. + +Schema (CSV columns): + timestamp_utc_iso, ram_used_mb, ram_total_mb, gpu_load_pct, + gpu_freq_mhz, cpu_load_avg_pct, soc_temp_c, gpu_temp_c, power_mw, + extras_json + +Usage: + python3 jtop_parser.py --out out.csv --interval 1.0 +""" + +from __future__ import annotations + +import argparse +import csv +import json +import time +from datetime import datetime, timezone + +UTC = timezone.utc +from pathlib import Path + + +CSV_COLUMNS = ( + "timestamp_utc_iso", + "ram_used_mb", + "ram_total_mb", + "gpu_load_pct", + "gpu_freq_mhz", + "cpu_load_avg_pct", + "soc_temp_c", + "gpu_temp_c", + "power_mw", + "extras_json", +) + + +def state_to_row(state: object) -> dict[str, object]: + """Convert one jtop polled-state object to a CSV row. + + `state` is whatever `jtop.jtop().stats` returns; on real Jetson runs it + is a `JtopStats` dataclass-ish object exposing `ram`, `gpu`, `cpu`, + `temperature`, `power`. We extract defensively because jetson-stats + schema has shifted across versions. + """ + + def _get(obj: object, *path: str, default: object = "") -> object: + cur = obj + for key in path: + if cur is None: + return default + if isinstance(cur, dict): + cur = cur.get(key, default) + else: + cur = getattr(cur, key, default) + return cur if cur is not None else default + + row: dict[str, object] = { + "timestamp_utc_iso": datetime.now(UTC).isoformat(timespec="milliseconds"), + "ram_used_mb": _get(state, "ram", "used"), + "ram_total_mb": _get(state, "ram", "tot"), + "gpu_load_pct": _get(state, "gpu", "load"), + "gpu_freq_mhz": _get(state, "gpu", "freq", "cur"), + "cpu_load_avg_pct": _get(state, "cpu", "load_avg", default=""), + "soc_temp_c": _get(state, "temperature", "SOC", default=""), + "gpu_temp_c": _get(state, "temperature", "GPU", default=""), + "power_mw": _get(state, "power", "total", default=""), + "extras_json": "", + } + return row + + +def run(out_path: Path, interval_s: float, samples_max: int | None = None) -> int: + """Poll jtop and write rows to ``out_path``. Returns rows written. + + On hosts without jetson-stats installed (e.g., unit-test runs on dev + workstations), the function ImportError → emits a single "stub" row + pointing at the missing dependency and exits. This keeps Tier-2 dry + runs and CI smoke happy without forcing CI to install jetson-stats. + """ + out_path.parent.mkdir(parents=True, exist_ok=True) + rows_written = 0 + try: + from jtop import jtop # type: ignore[import-untyped] + except ImportError as exc: + with out_path.open("w", newline="", encoding="utf-8") as fh: + writer = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS)) + writer.writeheader() + writer.writerow( + { + **{col: "" for col in CSV_COLUMNS}, + "timestamp_utc_iso": datetime.now(UTC).isoformat(timespec="milliseconds"), + "extras_json": json.dumps({"stub": True, "missing_dep": "jetson-stats", "import_error": str(exc)}), + } + ) + return 1 + + with jtop() as poll, out_path.open("w", newline="", encoding="utf-8") as fh: + writer = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS)) + writer.writeheader() + while poll.ok(): + row = state_to_row(poll.stats) + writer.writerow(row) + fh.flush() + rows_written += 1 + if samples_max is not None and rows_written >= samples_max: + break + time.sleep(interval_s) + return rows_written + + +def main() -> int: + parser = argparse.ArgumentParser(description="Sample jtop → CSV.") + parser.add_argument("--out", type=Path, required=True) + parser.add_argument("--interval", type=float, default=1.0, help="Poll interval in seconds.") + parser.add_argument("--samples-max", type=int, default=None) + args = parser.parse_args() + n = run(args.out, args.interval, args.samples_max) + print(f"jtop_parser: wrote {n} rows to {args.out}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/e2e/jetson/run-tier2.sh b/e2e/jetson/run-tier2.sh new file mode 100755 index 0000000..d289d48 --- /dev/null +++ b/e2e/jetson/run-tier2.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +# Tier-2 Jetson hardware-loop entrypoint. +# +# Usage: +# ./run-tier2.sh --fc-adapter --vio-strategy [--duration <5min|8h>] [--enable-chamber] +# +# Pre-requisites (verified at startup): +# * The Jetson is provisioned per `_docs/02_document/tests/environment.md` +# § Execution instructions — Tier-2 (JetPack 6.2, CUDA, TensorRT 10.3, cuDNN). +# * `gps-denied-onboard.service` is installed via systemd +# (`tier2.service` is the template; operator copies it to /etc/systemd/system). +# * SITLs + mock + listener + runner reachable on the same network via +# `docker compose -f e2e/docker/docker-compose.test.yml -f e2e/docker/docker-compose.tier2-bridge.yml up ...` +# on a paired x86 host. (Same-Jetson SITL is also supported — set JETSON_HOST=localhost.) +# +# Outputs the same CSV format as Tier-1 to ./e2e-results/run-${RUN_ID}/report.csv +# plus the per-sample tegrastats + jtop CSVs in the evidence bundle. + +set -euo pipefail + +FC_ADAPTER="" +VIO_STRATEGY="" +DURATION="5min" +ENABLE_CHAMBER=0 +JETSON_HOST_OVERRIDE="" + +usage() { + grep -E '^# ' "$0" | sed 's/^# //' + exit 1 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --fc-adapter) FC_ADAPTER="$2"; shift 2 ;; + --vio-strategy) VIO_STRATEGY="$2"; shift 2 ;; + --duration) DURATION="$2"; shift 2 ;; + --enable-chamber) ENABLE_CHAMBER=1; shift ;; + --jetson-host) JETSON_HOST_OVERRIDE="$2"; shift 2 ;; + -h|--help) usage ;; + *) echo "Unknown arg: $1" >&2; usage ;; + esac +done + +if [[ -z "$FC_ADAPTER" || -z "$VIO_STRATEGY" ]]; then + echo "ERROR: --fc-adapter and --vio-strategy are required" >&2 + usage +fi + +case "$FC_ADAPTER" in + ardupilot|inav) ;; + *) echo "ERROR: --fc-adapter must be ardupilot or inav (got: $FC_ADAPTER)" >&2; exit 2 ;; +esac + +case "$VIO_STRATEGY" in + okvis2|klt_ransac|vins_mono) ;; + *) echo "ERROR: --vio-strategy must be okvis2 | klt_ransac | vins_mono (got: $VIO_STRATEGY)" >&2; exit 2 ;; +esac + +# RUN_ID — caller may set; default is utc-stamp + adapter pair. +: "${RUN_ID:=tier2-$(date -u +%Y%m%dT%H%M%SZ)-${FC_ADAPTER}-${VIO_STRATEGY}}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +RESULTS_DIR="${REPO_ROOT}/e2e-results/run-${RUN_ID}" +EVIDENCE_DIR="${RESULTS_DIR}/evidence" + +mkdir -p "${EVIDENCE_DIR}" + +echo "[tier2] RUN_ID=${RUN_ID}" +echo "[tier2] FC_ADAPTER=${FC_ADAPTER} VIO_STRATEGY=${VIO_STRATEGY} DURATION=${DURATION}" +echo "[tier2] RESULTS_DIR=${RESULTS_DIR}" + +# --------------------------------------------------------------------------- +# Pre-flight: confirm the SUT systemd unit is healthy. +# --------------------------------------------------------------------------- +if ! systemctl is-active --quiet gps-denied-onboard.service; then + echo "[tier2] gps-denied-onboard.service is not active — attempting restart..." >&2 + sudo systemctl restart gps-denied-onboard.service + sleep 3 + if ! systemctl is-active --quiet gps-denied-onboard.service; then + echo "[tier2] FATAL: gps-denied-onboard.service failed to start" >&2 + sudo systemctl status gps-denied-onboard.service --no-pager || true + exit 3 + fi +fi + +# --------------------------------------------------------------------------- +# Start tegrastats + jtop background samplers (evidence bundle inputs). +# --------------------------------------------------------------------------- +TEGRA_CSV="${EVIDENCE_DIR}/tegrastats.csv" +JTOP_CSV="${EVIDENCE_DIR}/jtop.csv" + +# tegrastats emits at 5 Hz by default; parser converts to per-sample CSV rows. +if command -v tegrastats >/dev/null 2>&1; then + tegrastats --interval 200 \ + | python3 "${SCRIPT_DIR}/tegrastats_parser.py" --out "${TEGRA_CSV}" & + TEGRA_PID=$! +else + echo "[tier2] WARNING: tegrastats not in PATH — skipping that evidence channel." >&2 + TEGRA_PID= +fi + +if command -v jtop >/dev/null 2>&1; then + python3 "${SCRIPT_DIR}/jtop_parser.py" --out "${JTOP_CSV}" --interval 1.0 & + JTOP_PID=$! +else + echo "[tier2] WARNING: jtop not in PATH — skipping that evidence channel." >&2 + JTOP_PID= +fi + +cleanup() { + local rc=$? + [[ -n "${TEGRA_PID:-}" ]] && kill "${TEGRA_PID}" 2>/dev/null || true + [[ -n "${JTOP_PID:-}" ]] && kill "${JTOP_PID}" 2>/dev/null || true + echo "[tier2] cleanup complete (rc=${rc})" + exit "${rc}" +} +trap cleanup EXIT INT TERM + +# --------------------------------------------------------------------------- +# Run the e2e suite — the runner image is the SAME as Tier-1; only TIER differs. +# --------------------------------------------------------------------------- +JETSON_HOST_ARG="${JETSON_HOST_OVERRIDE:-localhost}" +CHAMBER_ARG=() +[[ "${ENABLE_CHAMBER}" -eq 1 ]] && CHAMBER_ARG=("--enable-chamber") + +( + cd "${REPO_ROOT}/e2e/docker" + RUN_ID="${RUN_ID}" \ + FC_ADAPTER="${FC_ADAPTER}" \ + VIO_STRATEGY="${VIO_STRATEGY}" \ + TIER="tier2-jetson" \ + JETSON_HOST="${JETSON_HOST_ARG}" \ + docker compose \ + -f docker-compose.test.yml \ + -f docker-compose.tier2-bridge.yml \ + run --rm \ + -e TIER=tier2-jetson \ + e2e-runner \ + pytest /test-suite \ + --csv="/e2e-results/run-${RUN_ID}/report.csv" \ + --csv-columns="test_id,test_name,traces_to,fc_adapter,vio_strategy,tier,started_at_utc,execution_time_ms,result,error_message,evidence_paths" \ + --evidence-out="/e2e-results/run-${RUN_ID}/evidence" \ + --build-kind=production \ + "${CHAMBER_ARG[@]}" +) + +echo "[tier2] Suite complete. Report: ${RESULTS_DIR}/report.csv" diff --git a/e2e/jetson/tegrastats_parser.py b/e2e/jetson/tegrastats_parser.py new file mode 100755 index 0000000..fe999f3 --- /dev/null +++ b/e2e/jetson/tegrastats_parser.py @@ -0,0 +1,131 @@ +"""Parse tegrastats output stream → per-sample CSV rows. + +tegrastats emits one line per sample. Each line begins with an ISO-ish +timestamp ("RAM 2345/7858MB ...") and includes RAM, GPU MHz, GPU load, +CPU load per-core, and thermal zone readings. + +This parser is intentionally tolerant of unknown fields — JetPack 6.2 vs +6.3 vary in which tags they emit. Anything we cannot parse goes into an +``extras`` JSON column so downstream analysis can still inspect it. + +Schema (CSV columns): + timestamp_utc_iso, ram_used_mb, ram_total_mb, gpu_load_pct, + gpu_freq_mhz, cpu_load_avg_pct, soc_temp_c, gpu_temp_c, extras_json + +Usage: + tegrastats --interval 200 | python3 tegrastats_parser.py --out out.csv +""" + +from __future__ import annotations + +import argparse +import csv +import json +import re +import sys +from datetime import datetime, timezone + +UTC = timezone.utc +from pathlib import Path +from typing import IO + + +CSV_COLUMNS = ( + "timestamp_utc_iso", + "ram_used_mb", + "ram_total_mb", + "gpu_load_pct", + "gpu_freq_mhz", + "cpu_load_avg_pct", + "soc_temp_c", + "gpu_temp_c", + "extras_json", +) + +_RAM_RE = re.compile(r"RAM\s+(\d+)/(\d+)MB") +_GR3D_RE = re.compile(r"GR3D_FREQ\s+(\d+)%@?(\d+)?") +_CPU_RE = re.compile(r"CPU\s+\[([^\]]+)\]") +_SOC_TEMP_RE = re.compile(r"(?:SOC|cpu)@(\d+(?:\.\d+)?)C", re.IGNORECASE) +_GPU_TEMP_RE = re.compile(r"GPU@(\d+(?:\.\d+)?)C", re.IGNORECASE) + + +def parse_line(line: str) -> dict[str, object] | None: + """Parse one tegrastats line. Returns None if the line is empty/comment.""" + line = line.strip() + if not line: + return None + + row: dict[str, object] = { + "timestamp_utc_iso": datetime.now(UTC).isoformat(timespec="milliseconds"), + "ram_used_mb": "", + "ram_total_mb": "", + "gpu_load_pct": "", + "gpu_freq_mhz": "", + "cpu_load_avg_pct": "", + "soc_temp_c": "", + "gpu_temp_c": "", + "extras_json": "", + } + + if m := _RAM_RE.search(line): + row["ram_used_mb"] = m.group(1) + row["ram_total_mb"] = m.group(2) + + if m := _GR3D_RE.search(line): + row["gpu_load_pct"] = m.group(1) + if m.group(2): + row["gpu_freq_mhz"] = m.group(2) + + if m := _CPU_RE.search(line): + cpu_field = m.group(1) + # Pattern looks like "67%@1190,55%@1190,..." or "off,55%@1190,..." + loads: list[float] = [] + for tok in cpu_field.split(","): + head = tok.strip().split("%", 1)[0] + try: + loads.append(float(head)) + except ValueError: + continue + if loads: + row["cpu_load_avg_pct"] = f"{sum(loads) / len(loads):.1f}" + + if m := _SOC_TEMP_RE.search(line): + row["soc_temp_c"] = m.group(1) + if m := _GPU_TEMP_RE.search(line): + row["gpu_temp_c"] = m.group(1) + + # Any line content not captured above goes into extras for downstream + # debugging — we never silently drop data. + extras = {"raw": line} + row["extras_json"] = json.dumps(extras, separators=(",", ":")) + return row + + +def stream_to_csv(source: IO[str], out_path: Path) -> int: + """Stream tegrastats lines from ``source`` to a CSV file. Returns rows written.""" + out_path.parent.mkdir(parents=True, exist_ok=True) + rows_written = 0 + with out_path.open("w", newline="", encoding="utf-8") as fh: + writer = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS)) + writer.writeheader() + for line in source: + row = parse_line(line) + if row is None: + continue + writer.writerow(row) + fh.flush() + rows_written += 1 + return rows_written + + +def main() -> int: + parser = argparse.ArgumentParser(description="Parse tegrastats to CSV.") + parser.add_argument("--out", type=Path, required=True) + args = parser.parse_args() + n = stream_to_csv(sys.stdin, args.out) + print(f"tegrastats_parser: wrote {n} rows to {args.out}", file=sys.stderr) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/e2e/jetson/tier2.service b/e2e/jetson/tier2.service new file mode 100644 index 0000000..faf2288 --- /dev/null +++ b/e2e/jetson/tier2.service @@ -0,0 +1,44 @@ +# systemd unit template for the SUT on Tier-2 Jetson runners. +# +# Copy to /etc/systemd/system/gps-denied-onboard.service, edit the +# Environment= lines for the local deployment, then: +# sudo systemctl daemon-reload +# sudo systemctl enable --now gps-denied-onboard.service +# +# `run-tier2.sh` calls `systemctl restart` before each suite — the unit +# must therefore be self-restoring. RestartSec is short because Tier-2 +# tests budget 4 hours per matrix entry (`environment.md` § Timeout) and a +# slow restart cuts into that budget. + +[Unit] +Description=gps-denied-onboard companion service (Tier-2 Jetson) +After=network-online.target +Wants=network-online.target + +[Service] +Type=exec +User=azaion +Group=azaion +WorkingDirectory=/opt/gps-denied-onboard +Environment=ONBOARD_FC_ADAPTER=ardupilot +Environment=ONBOARD_VIO_STRATEGY=okvis2 +Environment=MAVLINK_SIGNING_PASSKEY_FILE=/run/secrets/mavlink_passkey +Environment=TILE_CACHE_PATH=/var/azaion/tile-cache +Environment=FDR_OUTPUT_PATH=/var/azaion/fdr +ExecStart=/opt/gps-denied-onboard/bin/gps-denied-onboard --config /etc/azaion/onboard.yaml +Restart=on-failure +RestartSec=2s +StandardOutput=journal +StandardError=journal +# Resource budget mirrors restrictions.md § Onboard Hardware: 25 W TDP, +# 8 GB shared LPDDR5. systemd cgroup limits are a defence-in-depth gate; +# the SUT enforces these internally too. +MemoryMax=6G +TasksMax=512 +# Allow tegrastats / jtop to read /sys without requiring CAP_SYS_ADMIN here. +ProtectKernelTunables=true +ProtectKernelModules=true +NoNewPrivileges=true + +[Install] +WantedBy=multi-user.target diff --git a/e2e/runner/Dockerfile b/e2e/runner/Dockerfile new file mode 100644 index 0000000..3b7b0d1 --- /dev/null +++ b/e2e/runner/Dockerfile @@ -0,0 +1,53 @@ +# e2e-runner image — drives the SUT through public boundaries only. +# +# CRITICAL: this image MUST NOT install the SUT package and MUST NOT have +# `src/gps_denied_onboard/` on its PYTHONPATH. The pytest tree it runs lives +# at `/test-suite` (bind-mounted) and imports only from `e2e.runner.*` paths +# baked into this image — never from the SUT. +# +# Image size target: ≤ 2 GB (AZ-406 Risk 1 mitigation). The heavy ML stack +# (tensorrt, gtsam, faiss, cuda) lives in the SUT image, not here. + +FROM python:3.12-slim-bookworm AS base + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# --- system deps for OpenCV runtime + libffi (msp_gps_toy linkage) + libssl + tini --- +# OpenCV needs libgl1 + libglib2.0-0 for the JPEG/PNG codecs; tini is a small +# init that reaps zombie children when pytest forks (`--forked`). +RUN apt-get update && apt-get install -y --no-install-recommends \ + libgl1 \ + libglib2.0-0 \ + libffi8 \ + libssl3 \ + tini \ + ca-certificates \ + curl \ + netcat-openbsd \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /opt/e2e-runner + +COPY requirements.txt /opt/e2e-runner/requirements.txt +RUN pip install --no-cache-dir -r /opt/e2e-runner/requirements.txt + +# Runner package — conftest, helpers, reporting plugins. Copied AFTER pip +# install so source-only changes don't bust the heavy layer cache. +COPY __init__.py /opt/e2e-runner/runner/__init__.py +COPY conftest.py /opt/e2e-runner/runner/conftest.py +COPY pytest.ini /opt/e2e-runner/pytest.ini +COPY reporting /opt/e2e-runner/runner/reporting +COPY helpers /opt/e2e-runner/runner/helpers + +ENV PYTHONPATH=/opt/e2e-runner:/opt/e2e-runner/runner + +# `/test-suite` is bind-mounted by docker-compose (../tests). The runner +# default cwd is its own root; the docker-compose `command:` overrides the +# entrypoint with the explicit `pytest /test-suite ...` invocation. +WORKDIR /opt/e2e-runner + +ENTRYPOINT ["/usr/bin/tini", "--"] +CMD ["pytest", "/test-suite"] diff --git a/e2e/runner/__init__.py b/e2e/runner/__init__.py new file mode 100644 index 0000000..3b6277f --- /dev/null +++ b/e2e/runner/__init__.py @@ -0,0 +1,10 @@ +"""e2e-runner package. + +Top-level package for the blackbox harness — owns the conftest, the CSV +reporter plugin, the evidence bundler, and the boundary-driving helpers +(`frame_source_replay`, `imu_replay`, `sitl_observer`, `mavproxy_tlog_reader`, +`fdr_reader`, `geo`). + +IMPORTANT: nothing under this package may import from `gps_denied_onboard.*`. +The harness interacts with the SUT only via public boundaries. +""" diff --git a/e2e/runner/conftest.py b/e2e/runner/conftest.py new file mode 100644 index 0000000..293bebc --- /dev/null +++ b/e2e/runner/conftest.py @@ -0,0 +1,214 @@ +"""Top-level pytest conftest for the blackbox e2e harness. + +Responsibilities: + 1. Session-level parameterization over ``(fc_adapter, vio_strategy)``. + 2. Skip-rule enforcement per the traceability matrix + (`_docs/02_document/tests/traceability-matrix.md`): + - AC-7.1, AC-7.2 → SKIP (deferred — no AI-camera fixture) + - RESTRICT-CAM-2 → SKIP (paired with AC-7.x) + - AC-NEW-5 chamber portion → SKIP unless --enable-chamber + - RESTRICT-HW-2 chamber portion → SKIP unless --enable-chamber + - Tier-2-only tests → SKIP on tier1-docker + - `vins_mono` parametrization → SKIP on production-build sessions + 3. Wiring of the boundary-driving fixtures (`sitl_observer`, + `mavproxy_tlog`, `fdr_reader`, `mock_suite_sat_client`) consumed by + per-scenario tests. + +The actual boundary-driving fixtures import helper modules from +``runner.helpers.*``. They are registered here but their implementations +live in the helpers package. +""" + +from __future__ import annotations + +import os +from collections.abc import Iterator +from pathlib import Path + +import pytest + + +# --------------------------------------------------------------------------- +# Command-line options +# --------------------------------------------------------------------------- + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Harness-level options (not exposed to individual tests).""" + group = parser.getgroup("e2e-runner", "Blackbox e2e harness options") + group.addoption( + "--enable-chamber", + action="store_true", + default=False, + help="Enable thermal-chamber-gated tests (AC-NEW-5 hot-soak, RESTRICT-HW-2). " + "Requires the chamber-attached Jetson runner; default off.", + ) + group.addoption( + "--build-kind", + action="store", + default=os.environ.get("BUILD_KIND", "production"), + choices=("production", "research"), + help="Selects which VIO strategies are valid: production excludes vins_mono.", + ) + group.addoption( + "--evidence-out", + action="store", + default=os.environ.get("EVIDENCE_OUT", "/e2e-results/evidence"), + help="Directory the evidence bundler writes per-run artifacts to.", + ) + group.addoption( + "--allow-no-skip-reason", + action="store_true", + default=False, + help="Allow @pytest.mark.deferred_ac without an explicit reason= kwarg. " + "Default off — every deferred AC must cite its traceability-matrix row.", + ) + + +# --------------------------------------------------------------------------- +# Parameterization matrix +# --------------------------------------------------------------------------- + +_FC_ADAPTERS = ("ardupilot", "inav") +_VIO_STRATEGIES = ("okvis2", "klt_ransac", "vins_mono") + + +def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: + """Parametrize tests that request the ``fc_adapter`` / ``vio_strategy`` fixtures. + + Tests opt in by listing the fixture name in their signature. Tests that + explicitly do not depend on the matrix simply do not request the fixture. + """ + if "fc_adapter" in metafunc.fixturenames: + env_default = os.environ.get("FC_ADAPTER") + if env_default: + metafunc.parametrize("fc_adapter", [env_default], ids=[env_default]) + else: + metafunc.parametrize("fc_adapter", _FC_ADAPTERS, ids=_FC_ADAPTERS) + if "vio_strategy" in metafunc.fixturenames: + env_default = os.environ.get("VIO_STRATEGY") + if env_default: + metafunc.parametrize("vio_strategy", [env_default], ids=[env_default]) + else: + metafunc.parametrize("vio_strategy", _VIO_STRATEGIES, ids=_VIO_STRATEGIES) + + +# --------------------------------------------------------------------------- +# Skip-rule enforcement (deterministic; runs at collection time) +# --------------------------------------------------------------------------- + + +def pytest_collection_modifyitems( + config: pytest.Config, items: list[pytest.Item] +) -> None: + """Apply traceability-matrix-driven skips before any test executes. + + The mapping between AC / RESTRICT IDs and the SKIP reason strings is the + one declared in `_docs/02_document/tests/traceability-matrix.md` § + Uncovered Items Analysis. Any change to that matrix MUST be mirrored + here (and vice-versa) — the unit tests in + `e2e/_unit_tests/test_traceability_skip_rules.py` catch drift. + """ + tier = os.environ.get("TIER", "tier1-docker") + chamber_enabled = config.getoption("--enable-chamber") + build_kind = config.getoption("--build-kind") + + skip_tier2 = pytest.mark.skip(reason="Tier-2 only — Jetson hardware required") + skip_chamber = pytest.mark.skip( + reason="Chamber-gated — run with --enable-chamber on the chamber-attached Jetson runner" + ) + skip_research = pytest.mark.skip( + reason="vins_mono is research-build-only per D-C1-1-SUB-A" + ) + + for item in items: + # ----- Tier-2 only ----- + if "tier2_only" in item.keywords and tier != "tier2-jetson": + item.add_marker(skip_tier2) + continue + + # ----- Chamber only ----- + if "chamber_only" in item.keywords and not chamber_enabled: + item.add_marker(skip_chamber) + continue + + # ----- Research-build vs production matrix ----- + # Skip vins_mono on production-build runs (the marker is set on the + # parametrize id, not the test fn — we check the param id). + if build_kind == "production": + call_params = getattr(item, "callspec", None) + if call_params is not None and call_params.params.get("vio_strategy") == "vins_mono": + item.add_marker(skip_research) + continue + + # ----- Deferred-AC traceability-matrix skips ----- + deferred = item.get_closest_marker("deferred_ac") + if deferred is not None: + reason = deferred.kwargs.get("reason") + if reason is None and not config.getoption("--allow-no-skip-reason"): + # Hard failure at collection — every deferred_ac MUST cite its + # matrix row to prevent silent coverage erosion. + item.add_marker( + pytest.mark.skip( + reason=( + "deferred_ac marker without reason= kwarg; cite the " + "traceability-matrix row that justifies the deferral, " + "or run with --allow-no-skip-reason for local debugging." + ) + ) + ) + continue + verdict = deferred.kwargs.get("verdict", "skip").lower() + if verdict == "xfail": + item.add_marker(pytest.mark.xfail(reason=reason or "deferred AC (xfail)", strict=False)) + else: + item.add_marker( + pytest.mark.skip( + reason=( + reason + or "deferred AC — see _docs/02_document/tests/traceability-matrix.md" + ) + ) + ) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def run_id() -> str: + return os.environ.get("RUN_ID", "local") + + +@pytest.fixture(scope="session") +def tier() -> str: + return os.environ.get("TIER", "tier1-docker") + + +@pytest.fixture(scope="session") +def evidence_dir(pytestconfig: pytest.Config, run_id: str) -> Path: + base = Path(pytestconfig.getoption("--evidence-out")) + target = base if base.name == "evidence" else base / "evidence" + target.mkdir(parents=True, exist_ok=True) + return target + + +@pytest.fixture(scope="session") +def mock_suite_sat_url() -> str: + return os.environ.get("MOCK_SUITE_SAT_URL", "http://mock-suite-sat-service:8080") + + +# --------------------------------------------------------------------------- +# Plugin registration +# --------------------------------------------------------------------------- + +# The CSV reporter plugin is a separate module so the unit tests can exercise +# it directly without going through a real pytest run. It is registered via +# `pytest_plugins` so docker-compose's `--csv=...` flag binds to our column +# set rather than the upstream pytest-csv default. +pytest_plugins = [ + "runner.reporting.csv_reporter", + "runner.reporting.evidence_bundler", +] diff --git a/e2e/runner/helpers/__init__.py b/e2e/runner/helpers/__init__.py new file mode 100644 index 0000000..69b8e17 --- /dev/null +++ b/e2e/runner/helpers/__init__.py @@ -0,0 +1,13 @@ +"""Public-boundary helper modules used by every blackbox test. + +Modules: + frame_source_replay — replay images/video to the SUT's V4L2 file source + imu_replay — replay `data_imu.csv` at 10 Hz to the FC inbound + sitl_observer — AP/iNav read-side observers (param reads, GPS_RAW_INT, MSP queries) + mavproxy_tlog_reader — parse `.tlog` files emitted by `mavproxy-listener` + fdr_reader — post-run filesystem read of the FDR archive + geo — Vincenty / WGS84 geodesic helpers + +These modules MUST NOT import from `gps_denied_onboard.*`. Public-boundary +discipline is enforced by `e2e/_unit_tests/test_no_sut_imports.py`. +""" diff --git a/e2e/runner/helpers/fdr_reader.py b/e2e/runner/helpers/fdr_reader.py new file mode 100644 index 0000000..f24d2d8 --- /dev/null +++ b/e2e/runner/helpers/fdr_reader.py @@ -0,0 +1,59 @@ +"""Post-run filesystem read of the FDR archive. + +The FDR archive is a line-delimited JSON record stream per AZ-272 / AZ-273. +Each line is an `FdrRecord` envelope (producer_id, type, monotonic_ms, +payload). The runner image must NEVER import the SUT's FdrRecord schema +directly — it parses the JSON bytes and validates against a duplicate +record-type allowlist baked into this module. + +Public surface only; concrete parser + assertion helpers are owned by +AZ-441 (NFT-LIM-02 — FDR size budget) and the resilience scenario tasks +that need to crawl the archive (AZ-432, AZ-433, AZ-435). +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Iterator + + +@dataclass(frozen=True) +class FdrRecord: + """Mirror of `gps_denied_onboard.fdr_client.records.FdrRecord` — public-boundary copy. + + The schema is duplicated intentionally; if the SUT's FDR schema evolves + in a breaking way, this duplicate file fails to parse (visible drift) + rather than silently following along. + """ + + producer_id: str + monotonic_ms: int + record_type: str + payload: dict[str, object] + + +def iter_records(fdr_archive_root: Path) -> Iterator[FdrRecord]: + """Iterate every FDR record in the archive root (ordered by monotonic_ms). + + Raises NotImplementedError until AZ-441 supplies the orjson-backed parser. + """ + raise NotImplementedError( + "fdr_reader.iter_records is owned by AZ-441 — AZ-406 supplies only " + "the public surface." + ) + + +def archive_size_bytes(fdr_archive_root: Path) -> int: + """Sum the size of every file under ``fdr_archive_root``. + + Concrete implementation here — it's a thin os.walk + stat loop that + NFT-LIM-02 needs as soon as a real archive lands. + """ + if not fdr_archive_root.exists(): + return 0 + total = 0 + for p in fdr_archive_root.rglob("*"): + if p.is_file(): + total += p.stat().st_size + return total diff --git a/e2e/runner/helpers/frame_source_replay.py b/e2e/runner/helpers/frame_source_replay.py new file mode 100644 index 0000000..16bb8d6 --- /dev/null +++ b/e2e/runner/helpers/frame_source_replay.py @@ -0,0 +1,77 @@ +"""Replay images / video to the SUT's V4L2 file frame source. + +Two replay modes: + 1. Image-set replay (FT-P-01, FT-P-05) — emit a sequence of JPEG / PNG + still images at a configurable rate to the file frame source path the + SUT polls. + 2. Video replay (FT-P-02, FT-P-04, FT-N-01..04, NFT-PERF-*) — decode an + MP4 with OpenCV and emit frames at the encoded FPS (or a user-supplied + rate for fast-forward). + +The actual frame-source path inside the SUT container is configured via the +``ONBOARD_FRAME_SOURCE_PATH`` environment variable on the SUT — the runner +writes to a shared tmpfs volume mounted at the same path inside both +containers. + +This file currently provides the public surface used by per-scenario tests; +concrete implementations land alongside their consuming test tasks +(AZ-407 onward). The intent is that `FrameSourceReplayer` is a stable API +the test specs can rely on while the underlying replay strategy is filled +in incrementally. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Protocol + + +@dataclass(frozen=True) +class ReplayCadence: + """Frame-rate / pace configuration for a replay session.""" + + fps: float = 10.0 + realtime: bool = True + + +class FrameSink(Protocol): + """Abstract destination for replayed frames (file path or memory queue).""" + + def write_frame(self, jpeg_bytes: bytes, timestamp_ms: int) -> None: + ... + + +class FrameSourceReplayer: + """Public surface for replaying frames into the SUT's frame-source path. + + AZ-407 (Static fixture builders) supplies the concrete still-image replay + implementation; AZ-408 (Runtime synthetic-injection) supplies the video + + injector variants. AZ-406 only commits to the contract. + """ + + def __init__(self, sink: FrameSink, cadence: ReplayCadence | None = None) -> None: + self._sink = sink + self._cadence = cadence or ReplayCadence() + + def replay_image_directory(self, directory: Path) -> int: + """Replay every image in ``directory`` (sorted by name). Returns count emitted. + + Raises NotImplementedError until AZ-407 lands. Tests that need this + path should mark themselves @pytest.mark.skip(reason="awaiting AZ-407") + until then; AC-1 (smoke) does not depend on this surface. + """ + raise NotImplementedError( + "FrameSourceReplayer.replay_image_directory is owned by AZ-407 — " + "AZ-406 supplies only the public surface." + ) + + def replay_video(self, video_path: Path) -> int: + """Replay an MP4 / .h264 file frame-by-frame. Returns count emitted. + + Raises NotImplementedError until AZ-408 lands. + """ + raise NotImplementedError( + "FrameSourceReplayer.replay_video is owned by AZ-408 — " + "AZ-406 supplies only the public surface." + ) diff --git a/e2e/runner/helpers/geo.py b/e2e/runner/helpers/geo.py new file mode 100644 index 0000000..9882e8a --- /dev/null +++ b/e2e/runner/helpers/geo.py @@ -0,0 +1,54 @@ +"""WGS84 geodesic helpers — Vincenty distance + bearing for accuracy assertions. + +Wraps `pyproj.Geod` (WGS84 ellipsoid) for the few operations the blackbox +tests need. Kept deliberately small — broader geo math (UTM, MGRS, datum +conversions) is NOT in scope for the e2e harness. + +All inputs are degrees lat / lon (WGS84); all distances are meters. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from pyproj import Geod + +_WGS84 = Geod(ellps="WGS84") + + +@dataclass(frozen=True) +class GeodeticDelta: + """Bearing + distance + back-bearing between two WGS84 points.""" + + distance_m: float + forward_bearing_deg: float + reverse_bearing_deg: float + + +def distance_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """Vincenty distance in meters between two WGS84 points. + + Raises ValueError on NaN inputs (defensive — silent NaN propagation in + a test assertion is the kind of bug this helper exists to prevent). + """ + for name, value in (("lat1", lat1), ("lon1", lon1), ("lat2", lat2), ("lon2", lon2)): + if value != value: # NaN check + raise ValueError(f"distance_m: {name} is NaN") + _, _, d = _WGS84.inv(lon1, lat1, lon2, lat2) + return float(d) + + +def delta(lat1: float, lon1: float, lat2: float, lon2: float) -> GeodeticDelta: + """Full geodetic delta: distance + forward/reverse bearings.""" + fwd_az, rev_az, d = _WGS84.inv(lon1, lat1, lon2, lat2) + return GeodeticDelta( + distance_m=float(d), + forward_bearing_deg=float(fwd_az), + reverse_bearing_deg=float(rev_az), + ) + + +def offset(lat: float, lon: float, bearing_deg: float, distance_m: float) -> tuple[float, float]: + """Project ``(lat, lon)`` by ``distance_m`` along ``bearing_deg`` (degrees CW from north).""" + new_lon, new_lat, _ = _WGS84.fwd(lon, lat, bearing_deg, distance_m) + return float(new_lat), float(new_lon) diff --git a/e2e/runner/helpers/imu_replay.py b/e2e/runner/helpers/imu_replay.py new file mode 100644 index 0000000..b8d92ca --- /dev/null +++ b/e2e/runner/helpers/imu_replay.py @@ -0,0 +1,53 @@ +"""Replay `data_imu.csv` to the FC inbound at 10 Hz. + +CSV schema (from `_docs/00_problem/input_data/flight_derkachi/data_imu.csv`): + timestamp_ms,ax,ay,az,gx,gy,gz,roll_deg,pitch_deg,yaw_deg,baro_m + +Owned by AZ-406 (public surface) + AZ-407 (concrete file-driver +implementation). This module commits to the type signatures the +per-scenario tests will import; the actual MAVLink / MSP2 emission is +wired up by the downstream task. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Protocol + + +@dataclass(frozen=True) +class ImuSample: + """One row of `data_imu.csv` after parsing into native units.""" + + timestamp_ms: int + accel_mss: tuple[float, float, float] + gyro_rps: tuple[float, float, float] + attitude_rad: tuple[float, float, float] # roll, pitch, yaw (radians) + baro_alt_m: float + + +class FcInboundEmitter(Protocol): + """Abstract emitter — concrete impls are MAVLink (AP) or MSP2 (iNav).""" + + def emit(self, sample: ImuSample) -> None: + ... + + +class ImuReplayer: + """Drives an `FcInboundEmitter` from a CSV file at the recorded cadence.""" + + def __init__(self, emitter: FcInboundEmitter, rate_hz: float = 10.0) -> None: + self._emitter = emitter + self._rate_hz = rate_hz + + def replay(self, csv_path: Path) -> int: + """Replay the CSV file. Returns the number of samples emitted. + + Concrete implementation is owned by AZ-407 (FT-P-02 derkachi-drift + + FT-P-04 frame-to-frame registration are the first consumers). + """ + raise NotImplementedError( + "ImuReplayer.replay is owned by AZ-407 — AZ-406 supplies only " + "the public surface." + ) diff --git a/e2e/runner/helpers/mavproxy_tlog_reader.py b/e2e/runner/helpers/mavproxy_tlog_reader.py new file mode 100644 index 0000000..237d617 --- /dev/null +++ b/e2e/runner/helpers/mavproxy_tlog_reader.py @@ -0,0 +1,48 @@ +"""Parse `.tlog` files emitted by `mavproxy-listener`. + +`.tlog` is the standard MAVLink dialect dump format: each message is a +6-byte unix-microsecond timestamp followed by the wire bytes of the MAVLink +frame. pymavlink ships `mavlogfile` which knows how to iterate this. + +This module exposes a small typed wrapper so per-scenario tests can: + 1. Filter for the message types they care about. + 2. Compute summary statistics (count per type, message-rate Hz, ratio + of signed vs unsigned messages for NFT-SEC-03). + 3. Attach the source `.tlog` path to the evidence bundler. + +Concrete iteration logic is owned by AZ-416 (FT-P-09-AP); AZ-406 commits +to the public surface. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Iterator + + +@dataclass(frozen=True) +class TlogMessage: + timestamp_us: int + msg_type: str + signed: bool + fields: dict[str, object] + + +def iter_messages(tlog_path: Path) -> Iterator[TlogMessage]: + """Iterate `.tlog` messages oldest-first. + + AZ-406 raises until AZ-416 fills in the pymavlink-backed iterator. + """ + raise NotImplementedError( + "mavproxy_tlog_reader.iter_messages is owned by AZ-416 — " + "AZ-406 supplies only the public surface." + ) + + +def count_by_type(tlog_path: Path) -> dict[str, int]: + """Return ``{msg_type: count}`` for every distinct message type.""" + counts: dict[str, int] = {} + for msg in iter_messages(tlog_path): + counts[msg.msg_type] = counts.get(msg.msg_type, 0) + 1 + return counts diff --git a/e2e/runner/helpers/sitl_observer.py b/e2e/runner/helpers/sitl_observer.py new file mode 100644 index 0000000..1e1ae16 --- /dev/null +++ b/e2e/runner/helpers/sitl_observer.py @@ -0,0 +1,59 @@ +"""ArduPilot Plane / iNav SITL state-read observers. + +Reads what the SUT delivered to the FC over its external-positioning +interface, without ever bypassing the FC's own acceptance path. This is +the only legal way for blackbox tests to assert AC-4.3 (FC output contract): +every assertion goes through the SITL's state machine. + +Public surface only; concrete pymavlink / yamspy / msp_gps_toy subprocess +plumbing is owned by AZ-416 (FT-P-09-AP) and AZ-417 (FT-P-09-iNav). +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal, Protocol + +FcKind = Literal["ardupilot", "inav"] + + +@dataclass(frozen=True) +class FcGpsState: + """The subset of FC state the e2e tests assert against. + + AP: assembled from EKF source-set + GLOBAL_POSITION_INT replay-back. + iNav: assembled from MSP2 GPS-provider state + getRawGPS query. + """ + + primary_source: str # "MAV" (AP gps_type=14) or "MSP" (iNav) + last_position_lat_deg: float + last_position_lon_deg: float + last_position_alt_m: float + fix_quality: int # 0..6 per NMEA convention + horizontal_accuracy_m: float + last_update_age_ms: int + + +class FcSitlObserver(Protocol): + """Common observer protocol — implemented by `ArduPilotObserver` + `InavObserver`.""" + + fc_kind: FcKind + + def read_gps_state(self) -> FcGpsState: + ... + + def read_parameter(self, name: str) -> float | int | str | None: + ... + + +def get_observer(fc_kind: FcKind, host: str) -> FcSitlObserver: + """Factory — returns the matching observer for the requested FC. + + AZ-416/417 own the concrete return types. AZ-406 raises until those + tasks land so test authors can plumb the observer through their + fixtures without yet running them. + """ + raise NotImplementedError( + f"sitl_observer.get_observer({fc_kind=}, {host=}) is owned by " + "AZ-416 (AP) / AZ-417 (iNav) — AZ-406 supplies only the contract." + ) diff --git a/e2e/runner/pytest.ini b/e2e/runner/pytest.ini new file mode 100644 index 0000000..cf79d59 --- /dev/null +++ b/e2e/runner/pytest.ini @@ -0,0 +1,12 @@ +[pytest] +minversion = 8.0 +addopts = -ra --strict-markers --timeout=300 +markers = + tier2_only: scenario only valid on Tier-2 Jetson hardware (SKIP on tier1-docker) + chamber_only: scenario requires the thermal chamber rig (SKIP unless --enable-chamber) + research_build_only: scenario only valid on a research build (SKIP when vio_strategy=vins_mono is selected on production matrix) + deferred_ac: scenario maps to an AC marked NOT COVERED / PARTIAL in the traceability matrix; emits SKIP or XFAIL with the matrix-mapped reason + traces_to(ids): comma-separated AC/RESTRICT IDs the test exercises (consumed by csv_reporter for the `traces_to` column) + smoke: minimal verification that the harness boots end-to-end +filterwarnings = + ignore::DeprecationWarning:pymavlink.* diff --git a/e2e/runner/reporting/__init__.py b/e2e/runner/reporting/__init__.py new file mode 100644 index 0000000..677139b --- /dev/null +++ b/e2e/runner/reporting/__init__.py @@ -0,0 +1,7 @@ +"""CSV reporter + evidence bundler — pytest plugins registered by the runner conftest. + +`csv_reporter` overrides the upstream pytest-csv default columns with the +exact column set declared in `_docs/02_document/tests/environment.md` § +Reporting; `evidence_bundler` collects per-run `.tlog`, FDR archives, +screenshots, profiler traces, tegrastats / jtop CSVs into a single bundle. +""" diff --git a/e2e/runner/reporting/csv_reporter.py b/e2e/runner/reporting/csv_reporter.py new file mode 100644 index 0000000..90cccb1 --- /dev/null +++ b/e2e/runner/reporting/csv_reporter.py @@ -0,0 +1,254 @@ +"""CSV reporter pytest plugin. + +Emits one row per test with the exact columns declared in +``_docs/02_document/tests/environment.md`` § Reporting: + + test_id, test_name, traces_to, fc_adapter, vio_strategy, tier, + started_at_utc, execution_time_ms, result, error_message, evidence_paths + +Why a custom plugin rather than `pytest-csv` defaults? + - `pytest-csv` is dependency-installed for its column-extension hooks, but + its default emission is `name`/`status`/`duration` — our matrix needs the + `traces_to`, `fc_adapter`, `vio_strategy`, `tier`, `started_at_utc`, + `evidence_paths` columns to feed the downstream badge generator and + regression detector. + +Result classification per AC-9: + - PASS / FAIL / SKIP map 1:1 to pytest's own outcome. + - XFAIL is emitted when the test was marked `deferred_ac(verdict="xfail", + reason=...)` and the body raised (the standard pytest XFAIL path). + +The plugin is unit-tested in ``e2e/_unit_tests/reporting/test_csv_reporter.py``. +""" + +from __future__ import annotations + +import csv +import os +import time +from datetime import datetime, timezone + +UTC = timezone.utc +from pathlib import Path +from typing import Any + +import pytest + +CSV_COLUMNS: tuple[str, ...] = ( + "test_id", + "test_name", + "traces_to", + "fc_adapter", + "vio_strategy", + "tier", + "started_at_utc", + "execution_time_ms", + "result", + "error_message", + "evidence_paths", +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _parametrize_value(item: pytest.Item, name: str, default: str = "n/a") -> str: + cs = getattr(item, "callspec", None) + if cs is None: + return default + return str(cs.params.get(name, default)) + + +def _traces_to(item: pytest.Item) -> str: + marker = item.get_closest_marker("traces_to") + if marker is None: + return "" + ids = marker.args[0] if marker.args else marker.kwargs.get("ids", "") + if isinstance(ids, (list, tuple, set)): + return ",".join(str(i) for i in ids) + return str(ids) + + +def _test_id(item: pytest.Item) -> str: + """Stable test id for the CSV `test_id` column. + + Prefers an explicit ``@pytest.mark.test_id("FT-P-01")`` if set, otherwise + falls back to pytest's nodeid which is unique per parametrize variant. + """ + marker = item.get_closest_marker("test_id") + if marker is not None and marker.args: + return str(marker.args[0]) + return item.nodeid + + +def _outcome_to_result(report: pytest.TestReport, item: pytest.Item) -> str: + if report.outcome == "passed": + if report.when == "call" and item.get_closest_marker("deferred_ac") is not None: + deferred = item.get_closest_marker("deferred_ac") + if deferred and deferred.kwargs.get("verdict") == "xfail": + return "XFAIL" + return "PASS" + if report.outcome == "failed": + return "FAIL" + if report.outcome == "skipped": + if report.when == "call" and item.get_closest_marker("deferred_ac") is not None: + deferred = item.get_closest_marker("deferred_ac") + if deferred and deferred.kwargs.get("verdict") == "xfail": + return "XFAIL" + return "SKIP" + # Unknown outcome — should never happen with stock pytest, but emit a + # visible FAIL rather than swallow it silently. + return f"FAIL ({report.outcome})" + + +# --------------------------------------------------------------------------- +# Row builder (exposed for unit tests) +# --------------------------------------------------------------------------- + + +def build_row( + item: pytest.Item, + report: pytest.TestReport, + started_at_utc: str, + execution_time_ms: int, + evidence_paths: list[str] | None = None, +) -> dict[str, str]: + """Build the CSV row for a finished test. + + Public function — unit-tested directly without spinning a pytest run. + """ + result = _outcome_to_result(report, item) + error_message = "" + if report.outcome == "failed": + # `longreprtext` is the canonical pytest rendering of the traceback; + # we collapse it to a single line for CSV friendliness and truncate + # to keep the row from blowing past a reasonable limit. + raw = report.longreprtext or repr(getattr(report, "longrepr", "")) + error_message = raw.replace("\n", " | ")[:2000] + elif report.outcome == "skipped": + # `longrepr` on a skip is a 3-tuple (file, lineno, reason). + if isinstance(report.longrepr, tuple) and len(report.longrepr) == 3: + error_message = str(report.longrepr[2]) + else: + error_message = str(getattr(report, "longrepr", ""))[:2000] + + return { + "test_id": _test_id(item), + "test_name": item.name, + "traces_to": _traces_to(item), + "fc_adapter": _parametrize_value(item, "fc_adapter"), + "vio_strategy": _parametrize_value(item, "vio_strategy"), + "tier": os.environ.get("TIER", "tier1-docker"), + "started_at_utc": started_at_utc, + "execution_time_ms": str(execution_time_ms), + "result": result, + "error_message": error_message, + "evidence_paths": ",".join(evidence_paths or []), + } + + +# --------------------------------------------------------------------------- +# Plugin hooks +# --------------------------------------------------------------------------- + + +class _CsvReporter: + def __init__(self, output_path: Path) -> None: + self._path = output_path + self._path.parent.mkdir(parents=True, exist_ok=True) + # Per-item start times so we can attribute call-phase duration accurately + # (we want call+setup wall-clock, NOT just call duration which omits any + # boundary-fixture setup cost). + self._start_times: dict[str, tuple[float, str]] = {} + self._evidence: dict[str, list[str]] = {} + self._rows: list[dict[str, str]] = [] + + # --- lifecycle hooks --- + + def pytest_runtest_logstart(self, nodeid: str, location: Any) -> None: # noqa: ARG002 (pytest hook signature) + self._start_times[nodeid] = (time.monotonic(), datetime.now(UTC).isoformat(timespec="seconds")) + + def pytest_runtest_logreport(self, report: pytest.TestReport) -> None: + # We emit one row per item, taken from the `call` phase. Setup-phase + # SKIPs (e.g. from `pytest.skip()` inside a fixture) lack a `call` + # phase, so for those we use the `setup` phase report instead. + item = getattr(report, "_item", None) # populated by pytest_runtest_protocol below + if item is None: + return + if report.when == "call" or (report.when == "setup" and report.outcome == "skipped"): + start_mono, start_iso = self._start_times.get(report.nodeid, (time.monotonic(), datetime.now(UTC).isoformat(timespec="seconds"))) + elapsed_ms = int((time.monotonic() - start_mono) * 1000) + evidence = self._evidence.get(report.nodeid, []) + row = build_row(item, report, start_iso, elapsed_ms, evidence) + self._rows.append(row) + + @pytest.hookimpl(hookwrapper=True) + def pytest_runtest_protocol(self, item: pytest.Item, nextitem: pytest.Item | None) -> Any: + # Tag the report objects with the originating item so logreport above + # can read parametrize ids / markers without a global lookup. + original_pytest_runtest_makereport = item.session.config.hook.pytest_runtest_makereport + + def wrapper(*args: Any, **kwargs: Any) -> Any: # noqa: ANN401 + report = original_pytest_runtest_makereport(*args, **kwargs) + if report is not None: + report._item = item # noqa: SLF001 (intentional plugin attribute) + return report + + item.session.config.hook.pytest_runtest_makereport = wrapper + outcome = yield + item.session.config.hook.pytest_runtest_makereport = original_pytest_runtest_makereport + return outcome.get_result() if hasattr(outcome, "get_result") else None + + def pytest_sessionfinish(self, session: pytest.Session, exitstatus: int) -> None: # noqa: ARG002 + with self._path.open("w", newline="", encoding="utf-8") as fh: + writer = csv.DictWriter(fh, fieldnames=list(CSV_COLUMNS)) + writer.writeheader() + writer.writerows(self._rows) + + # --- public surface for the evidence_bundler plugin to attach paths --- + + def attach_evidence(self, nodeid: str, evidence_path: str) -> None: + self._evidence.setdefault(nodeid, []).append(evidence_path) + + +_REPORTER_KEY = pytest.StashKey["_CsvReporter | None"]() + + +def pytest_addoption(parser: pytest.Parser) -> None: + group = parser.getgroup("e2e-runner", "Blackbox e2e harness options") + group.addoption( + "--csv", + action="store", + default=None, + help="Path to the CSV report (one row per test). Default off — set to enable.", + ) + group.addoption( + "--csv-columns", + action="store", + default=",".join(CSV_COLUMNS), + help="Comma-separated column order. Default = environment.md § Reporting.", + ) + + +def pytest_configure(config: pytest.Config) -> None: + config.stash[_REPORTER_KEY] = None + csv_path = config.getoption("--csv") + if csv_path: + reporter = _CsvReporter(Path(csv_path)) + config.stash[_REPORTER_KEY] = reporter + config.pluginmanager.register(reporter, name="e2e-csv-reporter") + # `traces_to` and `test_id` are pytest markers — register them so + # --strict-markers doesn't error on first use. + config.addinivalue_line( + "markers", "traces_to(ids): comma-separated AC/RESTRICT IDs the test exercises" + ) + config.addinivalue_line( + "markers", "test_id(name): override the test_id column (default = pytest nodeid)" + ) + + +def reporter_for(config: pytest.Config) -> _CsvReporter | None: + """Public accessor — used by `evidence_bundler` to attach evidence paths.""" + return config.stash.get(_REPORTER_KEY, None) diff --git a/e2e/runner/reporting/evidence_bundler.py b/e2e/runner/reporting/evidence_bundler.py new file mode 100644 index 0000000..a2d2bda --- /dev/null +++ b/e2e/runner/reporting/evidence_bundler.py @@ -0,0 +1,84 @@ +"""Evidence bundler pytest plugin. + +For each test, collects supporting artifacts (`.tlog`, FDR archive snapshots, +screenshots, profiler traces, tegrastats / jtop CSVs) into a per-run bundle +at ``--evidence-out`` (default ``/e2e-results//evidence/``) and +records the resulting paths in the CSV reporter's ``evidence_paths`` column. + +The bundler is INERT by default: tests opt in by calling the +``attach_evidence`` fixture with a file path. The runner conftest registers +this plugin via `pytest_plugins`. +""" + +from __future__ import annotations + +import shutil +from collections.abc import Callable +from pathlib import Path + +import pytest + +from .csv_reporter import reporter_for + + +def _safe_relpath(target: Path, base: Path) -> str: + try: + return str(target.relative_to(base)) + except ValueError: + # If the target isn't under base, we still record its absolute path + # — the bundle copy below makes the absolute fallback robust to + # arbitrary source locations (e.g. /tlogs/.tlog). + return str(target) + + +@pytest.fixture +def attach_evidence( + request: pytest.FixtureRequest, + evidence_dir: Path, +) -> Callable[[str | Path], str]: + """Copy a file into the run evidence bundle and record its CSV path. + + Returns a callable ``attach(path) -> str`` — the test invokes it after + capturing an artifact (e.g., the .tlog file or an FDR snapshot). The + returned string is the path that will appear in the CSV + ``evidence_paths`` column. + + The implementation copies the file (rather than moving it) so the same + artifact can be referenced by multiple tests if needed. + """ + nodeid = request.node.nodeid + config = request.config + reporter = reporter_for(config) + bundle_root = evidence_dir / _slug(nodeid) + bundle_root.mkdir(parents=True, exist_ok=True) + + def _attach(path: str | Path) -> str: + src = Path(path) + if not src.exists(): + raise FileNotFoundError(f"attach_evidence: {src} not found") + dst = bundle_root / src.name + # If a test attaches the same name twice in one run, disambiguate. + if dst.exists(): + stem, suffix = src.stem, src.suffix + counter = 1 + while dst.exists(): + dst = bundle_root / f"{stem}__{counter}{suffix}" + counter += 1 + shutil.copy2(src, dst) + rel = _safe_relpath(dst, evidence_dir.parent) + if reporter is not None: + reporter.attach_evidence(nodeid, rel) + return rel + + return _attach + + +def _slug(nodeid: str) -> str: + """Filesystem-safe slug for the nodeid (preserves uniqueness, no path chars).""" + return ( + nodeid.replace("/", "_") + .replace("::", "__") + .replace("[", "_") + .replace("]", "") + .replace(" ", "") + ) diff --git a/e2e/runner/requirements.txt b/e2e/runner/requirements.txt new file mode 100644 index 0000000..1747963 --- /dev/null +++ b/e2e/runner/requirements.txt @@ -0,0 +1,36 @@ +# e2e-runner image dependencies. +# +# Pin reasoning: +# - `opencv-python>=4.12.0` honors D-CROSS-CVE-1 (the runner image does NOT +# depend on gtsam — the numpy<2 ABI block that forces the SUT pin does not +# apply here; see _docs/_process_leftovers/2026-05-11_d_cross_cve_1_opencv_pin_deferred.md). +# - Versions match the SUT pyproject where feasible (numpy 1.x line, pyproj 3.6+, pydantic 2.x). +# - pytest 8.x is the stable line; pytest-csv 3.x supplies the columns the CSV reporter plugin extends. + +pytest>=8.0,<9.0 +pytest-timeout>=2.2,<3.0 +pytest-xdist>=3.5,<4.0 +pytest-forked>=1.6,<2.0 +pytest-csv>=3.0,<4.0 + +# MAVLink ground side — used for both AP signing-handshake assertions and the +# passive listener that consumes mavproxy-listener's forwarded UDP stream. +pymavlink>=2.4 + +# Geodesic + frame replay + numerical assertion stack. +opencv-python>=4.12.0 +numpy>=1.26,<2.0 +scipy>=1.11,<2.0 +geopy>=2.4,<3.0 +pyproj>=3.6,<4.0 + +# HTTP client for talking to mock-suite-sat-service. +httpx>=0.28,<1.0 +pyyaml>=6.0 +pydantic>=2.5,<3.0 + +# Structured logging in the runner side (mirrors the SUT logger choice). +structlog>=24.1 + +# FDR archive reader uses orjson for the line-delimited JSON record format. +orjson>=3.9,<4.0 diff --git a/e2e/tests/__init__.py b/e2e/tests/__init__.py new file mode 100644 index 0000000..1500af3 --- /dev/null +++ b/e2e/tests/__init__.py @@ -0,0 +1,13 @@ +"""Pytest target for the blackbox harness. + +Runs inside the e2e-runner Docker image (or directly on a Tier-2 Jetson +when invoked via `e2e/jetson/run-tier2.sh`). Mirrors the test-spec +grouping in `_docs/02_document/tests/`: + + positive/ FT-P-* scenarios + negative/ FT-N-* scenarios + performance/ NFT-PERF-* scenarios + resilience/ NFT-RES-* scenarios + security/ NFT-SEC-* scenarios + resource_limit/ NFT-LIM-* scenarios +""" diff --git a/e2e/tests/conftest.py b/e2e/tests/conftest.py new file mode 100644 index 0000000..318679e --- /dev/null +++ b/e2e/tests/conftest.py @@ -0,0 +1,42 @@ +"""Outer conftest for the blackbox pytest tree. + +This file re-uses the runner-image conftest by re-exporting its hooks and +fixtures. Inside the docker container the runner-image conftest is on the +PYTHONPATH (via `/opt/e2e-runner/runner/conftest.py`); pytest discovers +that conftest as the "rootdir" conftest because `pytest.ini` lives at +`/opt/e2e-runner/`. + +The shim here exists so a developer can also point pytest at this +directory directly (e.g., `pytest e2e/tests/positive/test_smoke.py`) +when iterating outside docker — the shim adds the runner package to +sys.path and re-imports everything. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + + +def _bootstrap_runner_path() -> None: + """Make `runner.*` imports work when running outside the docker image.""" + here = Path(__file__).resolve() + # When inside the docker image, runner/ lives at /opt/e2e-runner/runner. + # When iterating locally, runner/ lives at /e2e/runner. + candidates = [ + Path("/opt/e2e-runner"), + here.parents[1], # e2e/ + ] + for c in candidates: + if (c / "runner").is_dir(): + if str(c) not in sys.path: + sys.path.insert(0, str(c)) + return + + +_bootstrap_runner_path() + +# Re-export the runner conftest's hooks/fixtures so pytest picks them up +# regardless of which conftest it discovers first. Star imports here are +# the documented pytest pattern for conftest layering. +from runner.conftest import * # noqa: F401,F403,E402 — pytest conftest re-export diff --git a/e2e/tests/negative/__init__.py b/e2e/tests/negative/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/tests/performance/__init__.py b/e2e/tests/performance/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/tests/positive/__init__.py b/e2e/tests/positive/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/tests/positive/test_smoke.py b/e2e/tests/positive/test_smoke.py new file mode 100644 index 0000000..ecbbd9f --- /dev/null +++ b/e2e/tests/positive/test_smoke.py @@ -0,0 +1,51 @@ +"""Harness smoke test (AC-1). + +The only AZ-406 test that runs inside the e2e-runner docker image. It +asserts the harness is wired correctly without depending on any of the +fixtures owned by AZ-407+ (no frame replay, no SITL contract checks). + +What it verifies: + 1. pytest discovers tests under `/test-suite`. + 2. The CSV reporter plugin is loaded. + 3. The parametrize matrix produces at least one variant. + 4. The `attach_evidence` fixture is reachable. + +Per-scenario tests (FT-P-01 onward) will land under their own files in +this directory. +""" + +from __future__ import annotations + +import pytest + + +@pytest.mark.smoke +@pytest.mark.traces_to("AC-1") +def test_harness_boots(run_id: str, tier: str, mock_suite_sat_url: str) -> None: + """The harness has access to RUN_ID, TIER, and the mock service URL.""" + # Arrange / Act / Assert + assert run_id, "RUN_ID fixture must be set" + assert tier in ("tier1-docker", "tier2-jetson", "tier2-chamber"), tier + assert mock_suite_sat_url.startswith("http"), mock_suite_sat_url + + +@pytest.mark.smoke +@pytest.mark.traces_to("AC-8") +def test_parametrize_matrix_smoke(fc_adapter: str, vio_strategy: str) -> None: + """The conftest parametrize fixtures produce well-formed values.""" + assert fc_adapter in ("ardupilot", "inav") + assert vio_strategy in ("okvis2", "klt_ransac", "vins_mono") + + +@pytest.mark.smoke +@pytest.mark.traces_to("AC-4") +def test_evidence_dir_writable(evidence_dir, attach_evidence, tmp_path) -> None: # type: ignore[no-untyped-def] + """attach_evidence copies a file into the per-run bundle and returns a relative path.""" + # Arrange + src = tmp_path / "smoke.txt" + src.write_text("smoke evidence") + # Act + rel = attach_evidence(src) + # Assert + assert "smoke.txt" in rel + assert any(evidence_dir.rglob("smoke.txt")) diff --git a/e2e/tests/resilience/__init__.py b/e2e/tests/resilience/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/tests/resource_limit/__init__.py b/e2e/tests/resource_limit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/e2e/tests/security/__init__.py b/e2e/tests/security/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index 42328cd..3768049 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,11 @@ dev = [ "mypy>=1.8", "types-PyYAML", "types-requests", + # AZ-406 (blackbox harness internals): the mock-suite-sat-service unit + # test exercises a FastAPI app via fastapi.testclient.TestClient. The + # production runtime of the mock lives inside its own Docker image so + # the SUT does not depend on FastAPI; this is a test-only dep. + "fastapi>=0.111,<0.120", ] inference = [ "torch>=2.2", @@ -130,8 +135,15 @@ include = ["gps_denied_onboard*"] [tool.pytest.ini_options] minversion = "7.0" -testpaths = ["tests"] -pythonpath = ["src"] +# `tests` is the in-process unit / integration / contract / perf test tree +# owned by per-component module-layout entries. `e2e/_unit_tests` is the +# OUT-OF-CONTAINER unit tree owned by the `blackbox_tests` cross-cutting +# entry — exercises the harness internals (CSV reporter, helpers, parsers, +# mock app, conftest skip rules) without Docker / SITL. The Docker-bound +# blackbox tree at `e2e/tests/` is intentionally NOT in testpaths: it runs +# inside the e2e-runner image via its own pytest.ini. +testpaths = ["tests", "e2e/_unit_tests"] +pythonpath = ["src", "e2e"] # log_schema.py is the contract-mandated file name (AZ-245 AC-4); kept # in python_files so the contract test is discovered alongside the # standard `test_*.py` pattern.