mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 07:01:14 +00:00
ade0c86f2b
Wraps the AZ-699 verdict-report path with the AZ-839 operator_pre_flight_setup C3 fixture so a single Tier-2 test takes only (tlog, video, calibration) and runs the full 7-step pipeline on the Jetson harness without operator hand-curation. New surface (tests-only, no src/ changes): - tests/e2e/replay/_e2e_orchestrator.py — orchestrator with OrchestratorStep enum, OrchestrationFailure exception (step prefix per AC-5), OrchestrationReport dataclass, write_effective_replay_config helper, and run_e2e_orchestration entry point covering steps 1-2-6-7. - tests/e2e/replay/test_e2e_orchestrator_unit.py — 17 unit tests covering each failure mode + happy path with mocked subprocess + ground-truth loader (AC-8). - tests/e2e/replay/test_az835_e2e_real_flight.py — Tier-2 + RUN_REPLAY_E2E gated integration test asserting verdict report exists, 15-min budget held (AC-1, AC-2, AC-3, AC-4, AC-6). The effective config write overlays c6_tile_cache.root_dir onto the static operator YAML at runtime so the airborne subprocess shares the cache_root the C3 fixture chose. Field- level merge — every other operator-config block stays verbatim. The static YAML on disk is never touched. Test run: tests/e2e/replay 45 passed, 10 skipped (10 skips were 9 pre-existing + 1 new tier2). No src/ touched, no AZ-839 driver changes; AC-7 (AZ-699 still passes) holds by inspection. Co-authored-by: Cursor <cursoragent@cursor.com>
183 lines
6.6 KiB
Python
183 lines
6.6 KiB
Python
"""AZ-840 — E2E orchestrator integration test (AC-1 / AC-2 / AC-3 / AC-4 / AC-6).
|
|
|
|
The Tier-2 entry point that closes Epic AZ-835's narrative: from a
|
|
``(tlog, video, calibration)`` triple, run the full 7-step pipeline
|
|
end-to-end on the Jetson harness without operator hand-curation
|
|
between steps.
|
|
|
|
The test consumes:
|
|
|
|
* :func:`tests.e2e.replay.conftest.operator_pre_flight_setup` —
|
|
the AZ-839 C3 fixture that owns steps 3-5 (route extraction +
|
|
satellite-provider seeding + FAISS index build) and yields a
|
|
:class:`PopulatedC6Cache` keyed off a freshly-mktemp'd
|
|
``cache_root``.
|
|
* :func:`tests.e2e.replay.conftest.derkachi_replay_inputs` — the
|
|
shared session fixture that materialises the Derkachi tlog +
|
|
video + factory-sheet calibration + signing-key file.
|
|
* :func:`tests.e2e.replay._e2e_orchestrator.run_e2e_orchestration`
|
|
— the AC-1 driver that wires everything below the C3 fixture.
|
|
|
|
The driver writes a fresh effective replay config per session
|
|
(merging the static operator YAML with the cache_root override),
|
|
invokes ``gps-denied-replay --auto-trim``, parses the JSONL
|
|
emissions, computes the horizontal-error distribution, and writes
|
|
the verdict markdown under ``_docs/06_metrics/`` (AC-2).
|
|
|
|
Skip gates (in evaluation order):
|
|
|
|
1. ``@pytest.mark.tier2`` — the per-suite Tier-2 plugin gates this
|
|
off on dev macOS (matches the AZ-839 / AZ-699 contract).
|
|
2. ``RUN_REPLAY_E2E`` not in ``{1, true, yes, on}``.
|
|
3. ``gps-denied-replay`` console-script not on ``PATH``.
|
|
4. Real video missing or placeholder-sized (mirrors AZ-699's gate).
|
|
5. ``operator_pre_flight_setup`` fixture itself skipped — the
|
|
downstream consumer inherits the SKIP automatically (pytest's
|
|
fixture-skip propagation).
|
|
|
|
AC-7 (AZ-699 continues to pass) is satisfied by inspection: this
|
|
test does not modify ``test_derkachi_real_tlog.py`` and writes its
|
|
report to the same path (``real_flight_validation_<date>.md``) but
|
|
in an idempotent way — both tests writing PASS or both writing
|
|
FAIL is the expected joint outcome on a given clip.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import shutil
|
|
import sys
|
|
from collections.abc import Iterator
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from tests.e2e.replay._e2e_orchestrator import (
|
|
OrchestrationReport,
|
|
run_e2e_orchestration,
|
|
)
|
|
from tests.e2e.replay._operator_pre_flight import PopulatedC6Cache
|
|
from tests.e2e.replay.conftest import DerkachiReplayInputs
|
|
|
|
|
|
def _repo_root() -> Path:
|
|
return Path(__file__).resolve().parents[3]
|
|
|
|
|
|
def _derkachi_dir() -> Path:
|
|
return _repo_root() / "_docs" / "00_problem" / "input_data" / "flight_derkachi"
|
|
|
|
|
|
_MIN_REAL_VIDEO_BYTES: int = 1_000_000
|
|
|
|
|
|
def _replay_binary() -> Path | None:
|
|
"""Return the absolute path to ``gps-denied-replay`` or ``None``.
|
|
|
|
Same lookup order AZ-699 uses: PATH first, venv bin second.
|
|
"""
|
|
|
|
binary = shutil.which("gps-denied-replay")
|
|
if binary is not None:
|
|
return Path(binary)
|
|
venv_bin = Path(sys.executable).parent / "gps-denied-replay"
|
|
if venv_bin.exists():
|
|
return venv_bin
|
|
return None
|
|
|
|
|
|
def _orchestrator_skip_reason() -> str | None:
|
|
"""Return a SKIP message when env / inputs preclude a Jetson run."""
|
|
|
|
if os.environ.get("RUN_REPLAY_E2E", "").strip().lower() not in {
|
|
"1",
|
|
"true",
|
|
"yes",
|
|
"on",
|
|
}:
|
|
return "AZ-840 e2e orchestrator gated by RUN_REPLAY_E2E=1"
|
|
if not os.environ.get("GPS_DENIED_OPERATOR_CONFIG_PATH", "").strip():
|
|
return (
|
|
"AZ-840 e2e orchestrator requires GPS_DENIED_OPERATOR_CONFIG_PATH "
|
|
"(same env var the C3 fixture consumes)"
|
|
)
|
|
if _replay_binary() is None:
|
|
return "gps-denied-replay console-script not installed"
|
|
video = _derkachi_dir() / "flight_derkachi.mp4"
|
|
if not video.is_file():
|
|
return f"Derkachi video missing: {video}"
|
|
if video.stat().st_size < _MIN_REAL_VIDEO_BYTES:
|
|
return (
|
|
f"Derkachi video at {video} is only {video.stat().st_size} "
|
|
"bytes — placeholder, not a real recording"
|
|
)
|
|
return None
|
|
|
|
|
|
@pytest.fixture
|
|
def az840_skip_gate() -> Iterator[None]:
|
|
"""Skip-gate the orchestrator test before any heavy fixtures resolve."""
|
|
|
|
reason = _orchestrator_skip_reason()
|
|
if reason is not None:
|
|
pytest.skip(reason)
|
|
yield
|
|
|
|
|
|
@pytest.mark.tier2
|
|
def test_az840_e2e_real_flight_orchestration(
|
|
az840_skip_gate: None,
|
|
operator_pre_flight_setup: PopulatedC6Cache,
|
|
derkachi_replay_inputs: DerkachiReplayInputs,
|
|
tmp_path: Path,
|
|
) -> None:
|
|
# Arrange — every input besides cache_root comes from the existing
|
|
# session fixtures so the same Tier-2 harness setup that powers
|
|
# AZ-699 + AZ-839 is exercised.
|
|
binary = _replay_binary()
|
|
assert binary is not None, "skip gate already verified the binary exists"
|
|
base_config_path = Path(os.environ["GPS_DENIED_OPERATOR_CONFIG_PATH"])
|
|
output_path = tmp_path / "estimator_output.jsonl"
|
|
effective_config_path = tmp_path / "operator_config_effective.yaml"
|
|
report_dir = _repo_root() / "_docs" / "06_metrics"
|
|
|
|
# Act
|
|
report = run_e2e_orchestration(
|
|
populated_cache=operator_pre_flight_setup,
|
|
base_config_path=base_config_path,
|
|
tlog_path=derkachi_replay_inputs.tlog_path,
|
|
video_path=derkachi_replay_inputs.video_path,
|
|
calibration_path=derkachi_replay_inputs.calibration_path,
|
|
signing_key_path=derkachi_replay_inputs.signing_key_path,
|
|
replay_binary=binary,
|
|
output_path=output_path,
|
|
report_dir=report_dir,
|
|
effective_config_path=effective_config_path,
|
|
)
|
|
|
|
# Assert AC-2 + AC-4 — report exists; full run within the 15-min budget.
|
|
assert isinstance(report, OrchestrationReport)
|
|
assert report.report_path.is_file()
|
|
body = report.report_path.read_text()
|
|
assert "## Horizontal error (metres)" in body
|
|
assert "## Threshold-hit share" in body
|
|
assert "Mean" in body
|
|
for threshold in (10, 25, 50, 100):
|
|
assert f"| {threshold} |" in body, (
|
|
f"threshold {threshold} m row missing from report"
|
|
)
|
|
assert report.replay_subprocess_seconds <= 900.0, (
|
|
"AZ-840 AC-4: replay subprocess exceeded 15-min soft target"
|
|
)
|
|
assert report.wall_clock_s >= report.replay_subprocess_seconds
|
|
assert report.distribution.count > 0, (
|
|
"no emissions paired with ground truth — orchestration produced "
|
|
"data but every emission fell outside the tlog GPS window"
|
|
)
|
|
|
|
# Assert AC-3 — the effective config was written and points at the
|
|
# cache_root the C3 fixture supplied.
|
|
assert effective_config_path.is_file()
|
|
effective_text = effective_config_path.read_text()
|
|
assert str(operator_pre_flight_setup.cache_root) in effective_text
|