mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 22:01:13 +00:00
d7e6b0959e
Batch 63 of /autodev replay slice. Adds the AZ-404 E2E test harness against the Derkachi fixture and resolves the AZ-389 dependency phantom (closing AZ-559 Won't Fix). E2E test (AZ-404) - tests/e2e/replay/_tlog_synth.py: deterministic CSV->tlog generator (the original Derkachi tlog is not in repo; data_imu.csv is its export, so we round-trip the CSV through pymavlink). Verified: SCALED_IMU2 + ATTITUDE + GPS_RAW_INT + HEARTBEAT round-trip cleanly through mavutil.mavlink_connection. - tests/e2e/replay/_helpers.py: parse_jsonl, l2_horizontal_m (haversine), match_percentage, CapturingMavlinkTransport (ready for AZ-558 unblock), GroundTruthRow + load_ground_truth_csv. - tests/e2e/replay/conftest.py: derkachi_replay_inputs (session scope), replay_runner (subprocess fixture per AZ-402 CLI), operator_pre_flight_setup placeholder. - tests/e2e/replay/test_derkachi_1min.py: 9 tests covering AC-1..AC-8 with AC-7 skip-gate self-check + AC-4a mode-agnosticism AST scan (passes unconditionally, confirms ADR-011 holding). - tests/e2e/replay/test_helpers.py: 14 unit tests covering AC-9 helper L2 correctness + match_percentage + parse_jsonl + CapturingMavlinkTransport (all unconditional). - tests/e2e/replay/README.md: AC matrix, fixture state, runtime budget, failure cookbook (AC-10). AC matrix - AC-1, AC-2, AC-5, AC-6 implemented and Tier-1 gated on RUN_REPLAY_E2E=1. - AC-3 (<=100m for 80%) xfail until real Topotek KHP20S30 calibration ships (camera_info.md states intrinsics are unknown). - AC-4a (mode-agnosticism AST scan) PASSES unconditionally. - AC-4b (encoder byte-equality) skip until AZ-558 routes C8 bytes through MavlinkTransport. - AC-7 (skip-gate self-check) PASSES unconditionally. - AC-8 (operator workflow rehearsal) skip until D-PROJ-2 mock-suite-sat-service implements tile-fetch + index-build endpoints. - AC-9 (helper L2 correctness) 14 PASSES unconditionally. AZ-389 housekeeping - AZ-559 closed Won't Fix: investigation against c6_tile_cache/_types.py confirmed TileSource.ONBOARD_INGEST + TileMetadata.quality_metadata + write_tile's FreshnessRejectionError already cover the mid-flight ingest semantic. The "missing API" was a spec-vs-impl naming mismatch. - AZ-389 spec rewritten to consume the existing write_tile API + catch FreshnessRejectionError per AC-NEW-3 opportunistic emission. - _dependencies_table.md reverted: AZ-389 deps -> AZ-303 (was AZ-559 in the previous commit on this branch); total 150 / 497 pts. Tests - Full regression: 2099 passed (+14 new e2e/replay), 94 skipped (incl. 8 e2e/replay heavy-tier + documented blocker skips), 3 perf-microbench flakes deselected (test_cli_cold_start_under_2s, test_cold_start_under_500ms_p99, test_nfr_perf_sign_microbench; all pass in isolation - pre-existing under-load flakes on dev macOS). Reviews - _docs/03_implementation/reviews/batch_63_review.md: code review PASS_WITH_WARNINGS (3 documented spec-gap deferrals: AC-3, AC-4b, AC-8). - _docs/03_implementation/cumulative_review_batches_61-63_cycle1_report.md: cumulative review PASS_WITH_WARNINGS. Action items: prioritise AZ-558 (closes AZ-401 AC-9 + AZ-404 AC-4b); consider 2pt hygiene PBI for Protocol-completeness AST scan to catch the AZ-389 / AZ-559 phantom-API pattern at task-prep time. Architecture invariants observably holding - ADR-011 (replay-as-configuration): AC-4a's AST scan over src/gps_denied_onboard/components/**/*.py finds zero violations - components branch on neither config.mode nor any synonym. - Single composition root (replay protocol Invariant 11): AZ-402 CLI dispatches to runtime_root.main(config); does not call compose_root directly. Co-authored-by: Cursor <cursoragent@cursor.com>
235 lines
7.7 KiB
Python
235 lines
7.7 KiB
Python
"""Pytest fixtures for the AZ-404 E2E replay tests.
|
|
|
|
The fixtures are import-clean on dev macOS — the heavy work
|
|
(synthesizing the tlog, invoking the airborne CLI in a subprocess)
|
|
runs only when ``RUN_REPLAY_E2E=1`` is set in the environment.
|
|
Without the env var, the test module's collection-time skip marker
|
|
prevents the fixtures from being requested.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
from collections.abc import Iterator
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
from tests.e2e.replay._helpers import GroundTruthRow, load_ground_truth_csv
|
|
from tests.e2e.replay._tlog_synth import synthesize_tlog
|
|
|
|
|
|
# Derkachi clip range — anchored at the start of the data_imu.csv
|
|
# (Time=0.0). The fixture clip is deliberately the first 60 s rather
|
|
# than a mid-flight slice: the take-off region exercises the AZ-405
|
|
# IMU-take-off auto-sync detector, and the steady cruise that follows
|
|
# stresses the satellite-anchor + VIO drift-correction path. The
|
|
# trim is documented in `tests/e2e/replay/README.md`.
|
|
_CLIP_START_S: float = 0.0
|
|
_CLIP_END_S: float = 60.0
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Path helpers
|
|
|
|
|
|
def _repo_root() -> Path:
|
|
return Path(__file__).resolve().parents[3]
|
|
|
|
|
|
def _derkachi_dir() -> Path:
|
|
return _repo_root() / "_docs" / "00_problem" / "input_data" / "flight_derkachi"
|
|
|
|
|
|
def _calibration_path() -> Path:
|
|
# Placeholder calibration: the real Topotek KHP20S30 intrinsics
|
|
# are unknown per `_docs/00_problem/input_data/flight_derkachi/
|
|
# camera_info.md`. AC-3 is `xfail`ed until a real calibration
|
|
# ships; AC-1 / AC-2 / AC-5 / AC-6 do not depend on intrinsics
|
|
# accuracy.
|
|
return _repo_root() / "tests" / "fixtures" / "calibration" / "adti26.json"
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Fixtures
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class DerkachiReplayInputs:
|
|
"""Bundle of paths the AZ-402 CLI consumes for a Derkachi replay run."""
|
|
|
|
video_path: Path
|
|
tlog_path: Path
|
|
calibration_path: Path
|
|
config_path: Path
|
|
signing_key_path: Path
|
|
output_path: Path
|
|
ground_truth: list[GroundTruthRow]
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def derkachi_replay_inputs(tmp_path_factory: pytest.TempPathFactory) -> DerkachiReplayInputs:
|
|
"""Materialise Derkachi inputs + a synthesized tlog for the e2e run.
|
|
|
|
Session-scoped so the tlog synthesizer runs once across the whole
|
|
e2e collection. The tlog is cached at
|
|
``tmp_path_factory.mktemp("derkachi") / "synth.tlog"`` so each
|
|
pytest invocation gets a fresh copy; the synthesizer is fast
|
|
enough (~1 s for 60 s of data) that disk caching across invocations
|
|
is unnecessary.
|
|
"""
|
|
derkachi = _derkachi_dir()
|
|
csv_path = derkachi / "data_imu.csv"
|
|
video_path = derkachi / "flight_derkachi.mp4"
|
|
if not csv_path.is_file():
|
|
pytest.fail(
|
|
f"Derkachi fixture missing: {csv_path} — see "
|
|
"_docs/00_problem/input_data/flight_derkachi/README.md"
|
|
)
|
|
if not video_path.is_file():
|
|
pytest.fail(f"Derkachi fixture missing: {video_path}")
|
|
|
|
work_dir = tmp_path_factory.mktemp("derkachi")
|
|
tlog_path = work_dir / "synth.tlog"
|
|
synthesize_tlog(csv_path, tlog_path)
|
|
|
|
# Empty signing key — the airborne replay path runs the signing
|
|
# handshake against `NoopMavlinkTransport`, so the key contents do
|
|
# not affect any wire output. We still need a real file because
|
|
# the CLI's path-validation gate requires it.
|
|
signing_key_path = work_dir / "signing_key.bin"
|
|
signing_key_path.write_bytes(b"\x00" * 32)
|
|
|
|
config_path = work_dir / "config.yaml"
|
|
config_path.write_text(
|
|
# Replay-specific overrides; the rest comes from the env vars
|
|
# the airborne binary's `load_config` honours by default.
|
|
"mode: replay\n"
|
|
"replay:\n"
|
|
" pace: asap\n"
|
|
" target_fc_dialect: ardupilot_plane\n"
|
|
)
|
|
|
|
output_path = work_dir / "estimator_output.jsonl"
|
|
|
|
ground_truth_full = load_ground_truth_csv(csv_path)
|
|
ground_truth = [
|
|
r for r in ground_truth_full if _CLIP_START_S <= r.t_s <= _CLIP_END_S
|
|
]
|
|
|
|
return DerkachiReplayInputs(
|
|
video_path=video_path,
|
|
tlog_path=tlog_path,
|
|
calibration_path=_calibration_path(),
|
|
config_path=config_path,
|
|
signing_key_path=signing_key_path,
|
|
output_path=output_path,
|
|
ground_truth=ground_truth,
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ReplayRunResult:
|
|
"""Outcome of a single ``gps-denied-replay`` subprocess run."""
|
|
|
|
returncode: int
|
|
stdout: str
|
|
stderr: str
|
|
output_path: Path
|
|
wall_clock_s: float
|
|
|
|
|
|
@pytest.fixture
|
|
def replay_runner(derkachi_replay_inputs: DerkachiReplayInputs) -> Any:
|
|
"""Return a callable that invokes the ``gps-denied-replay`` console-script.
|
|
|
|
The callable accepts keyword overrides for ``pace`` and
|
|
``time_offset_ms``; everything else is taken from
|
|
``derkachi_replay_inputs``. Output is written to a fresh path per
|
|
invocation so determinism comparisons (AC-5) get two independent
|
|
files.
|
|
"""
|
|
|
|
binary = shutil.which("gps-denied-replay")
|
|
if binary is None:
|
|
venv_bin = Path(sys.executable).parent / "gps-denied-replay"
|
|
if venv_bin.exists():
|
|
binary = str(venv_bin)
|
|
if binary is None:
|
|
pytest.skip(
|
|
"gps-denied-replay console-script not on PATH; "
|
|
"install the package in the test venv"
|
|
)
|
|
|
|
invocation_count = {"n": 0}
|
|
|
|
def _run(*, pace: str = "asap", time_offset_ms: int | None = None) -> ReplayRunResult:
|
|
import time
|
|
|
|
invocation_count["n"] += 1
|
|
out_path = derkachi_replay_inputs.output_path.with_name(
|
|
f"estimator_output_{invocation_count['n']}.jsonl"
|
|
)
|
|
argv = [
|
|
binary,
|
|
"--video",
|
|
str(derkachi_replay_inputs.video_path),
|
|
"--tlog",
|
|
str(derkachi_replay_inputs.tlog_path),
|
|
"--output",
|
|
str(out_path),
|
|
"--camera-calibration",
|
|
str(derkachi_replay_inputs.calibration_path),
|
|
"--config",
|
|
str(derkachi_replay_inputs.config_path),
|
|
"--mavlink-signing-key",
|
|
str(derkachi_replay_inputs.signing_key_path),
|
|
"--pace",
|
|
pace,
|
|
]
|
|
if time_offset_ms is not None:
|
|
argv.extend(["--time-offset-ms", str(time_offset_ms)])
|
|
t0 = time.monotonic()
|
|
completed = subprocess.run(
|
|
argv,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=180,
|
|
)
|
|
wall_s = time.monotonic() - t0
|
|
return ReplayRunResult(
|
|
returncode=completed.returncode,
|
|
stdout=completed.stdout,
|
|
stderr=completed.stderr,
|
|
output_path=out_path,
|
|
wall_clock_s=wall_s,
|
|
)
|
|
|
|
return _run
|
|
|
|
|
|
@pytest.fixture
|
|
def operator_pre_flight_setup(tmp_path: Path) -> Iterator[Path]:
|
|
"""Operator C12 pre-flight rehearsal stub.
|
|
|
|
Per AZ-404's spec this fixture should run the operator's full
|
|
C10/C11/C12 pre-flight against a ``mock-suite-sat-service``
|
|
fixture and yield the populated cache directory. The current
|
|
``tests/fixtures/mock-suite-sat-service`` is a bootstrap stub
|
|
(only ``GET /healthz`` per its README) — the full D-PROJ-2
|
|
contract is not implemented. Until that ships, AC-8 (operator
|
|
workflow rehearsal) is skipped at the test level; this fixture
|
|
yields a placeholder cache directory so test bodies that
|
|
request it can fail-fast with a documented reason rather than a
|
|
surprise ImportError.
|
|
"""
|
|
cache_dir = tmp_path / "operator_cache"
|
|
cache_dir.mkdir()
|
|
yield cache_dir
|