mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 21:41:13 +00:00
[AZ-840] [AZ-835] e2e orchestrator test (E-AZ-835 C4)
Wraps the AZ-699 verdict-report path with the AZ-839 operator_pre_flight_setup C3 fixture so a single Tier-2 test takes only (tlog, video, calibration) and runs the full 7-step pipeline on the Jetson harness without operator hand-curation. New surface (tests-only, no src/ changes): - tests/e2e/replay/_e2e_orchestrator.py — orchestrator with OrchestratorStep enum, OrchestrationFailure exception (step prefix per AC-5), OrchestrationReport dataclass, write_effective_replay_config helper, and run_e2e_orchestration entry point covering steps 1-2-6-7. - tests/e2e/replay/test_e2e_orchestrator_unit.py — 17 unit tests covering each failure mode + happy path with mocked subprocess + ground-truth loader (AC-8). - tests/e2e/replay/test_az835_e2e_real_flight.py — Tier-2 + RUN_REPLAY_E2E gated integration test asserting verdict report exists, 15-min budget held (AC-1, AC-2, AC-3, AC-4, AC-6). The effective config write overlays c6_tile_cache.root_dir onto the static operator YAML at runtime so the airborne subprocess shares the cache_root the C3 fixture chose. Field- level merge — every other operator-config block stays verbatim. The static YAML on disk is never touched. Test run: tests/e2e/replay 45 passed, 10 skipped (10 skips were 9 pre-existing + 1 new tier2). No src/ touched, no AZ-839 driver changes; AC-7 (AZ-699 still passes) holds by inspection. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,182 @@
|
||||
"""AZ-840 — E2E orchestrator integration test (AC-1 / AC-2 / AC-3 / AC-4 / AC-6).
|
||||
|
||||
The Tier-2 entry point that closes Epic AZ-835's narrative: from a
|
||||
``(tlog, video, calibration)`` triple, run the full 7-step pipeline
|
||||
end-to-end on the Jetson harness without operator hand-curation
|
||||
between steps.
|
||||
|
||||
The test consumes:
|
||||
|
||||
* :func:`tests.e2e.replay.conftest.operator_pre_flight_setup` —
|
||||
the AZ-839 C3 fixture that owns steps 3-5 (route extraction +
|
||||
satellite-provider seeding + FAISS index build) and yields a
|
||||
:class:`PopulatedC6Cache` keyed off a freshly-mktemp'd
|
||||
``cache_root``.
|
||||
* :func:`tests.e2e.replay.conftest.derkachi_replay_inputs` — the
|
||||
shared session fixture that materialises the Derkachi tlog +
|
||||
video + factory-sheet calibration + signing-key file.
|
||||
* :func:`tests.e2e.replay._e2e_orchestrator.run_e2e_orchestration`
|
||||
— the AC-1 driver that wires everything below the C3 fixture.
|
||||
|
||||
The driver writes a fresh effective replay config per session
|
||||
(merging the static operator YAML with the cache_root override),
|
||||
invokes ``gps-denied-replay --auto-trim``, parses the JSONL
|
||||
emissions, computes the horizontal-error distribution, and writes
|
||||
the verdict markdown under ``_docs/06_metrics/`` (AC-2).
|
||||
|
||||
Skip gates (in evaluation order):
|
||||
|
||||
1. ``@pytest.mark.tier2`` — the per-suite Tier-2 plugin gates this
|
||||
off on dev macOS (matches the AZ-839 / AZ-699 contract).
|
||||
2. ``RUN_REPLAY_E2E`` not in ``{1, true, yes, on}``.
|
||||
3. ``gps-denied-replay`` console-script not on ``PATH``.
|
||||
4. Real video missing or placeholder-sized (mirrors AZ-699's gate).
|
||||
5. ``operator_pre_flight_setup`` fixture itself skipped — the
|
||||
downstream consumer inherits the SKIP automatically (pytest's
|
||||
fixture-skip propagation).
|
||||
|
||||
AC-7 (AZ-699 continues to pass) is satisfied by inspection: this
|
||||
test does not modify ``test_derkachi_real_tlog.py`` and writes its
|
||||
report to the same path (``real_flight_validation_<date>.md``) but
|
||||
in an idempotent way — both tests writing PASS or both writing
|
||||
FAIL is the expected joint outcome on a given clip.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.e2e.replay._e2e_orchestrator import (
|
||||
OrchestrationReport,
|
||||
run_e2e_orchestration,
|
||||
)
|
||||
from tests.e2e.replay._operator_pre_flight import PopulatedC6Cache
|
||||
from tests.e2e.replay.conftest import DerkachiReplayInputs
|
||||
|
||||
|
||||
def _repo_root() -> Path:
|
||||
return Path(__file__).resolve().parents[3]
|
||||
|
||||
|
||||
def _derkachi_dir() -> Path:
|
||||
return _repo_root() / "_docs" / "00_problem" / "input_data" / "flight_derkachi"
|
||||
|
||||
|
||||
_MIN_REAL_VIDEO_BYTES: int = 1_000_000
|
||||
|
||||
|
||||
def _replay_binary() -> Path | None:
|
||||
"""Return the absolute path to ``gps-denied-replay`` or ``None``.
|
||||
|
||||
Same lookup order AZ-699 uses: PATH first, venv bin second.
|
||||
"""
|
||||
|
||||
binary = shutil.which("gps-denied-replay")
|
||||
if binary is not None:
|
||||
return Path(binary)
|
||||
venv_bin = Path(sys.executable).parent / "gps-denied-replay"
|
||||
if venv_bin.exists():
|
||||
return venv_bin
|
||||
return None
|
||||
|
||||
|
||||
def _orchestrator_skip_reason() -> str | None:
|
||||
"""Return a SKIP message when env / inputs preclude a Jetson run."""
|
||||
|
||||
if os.environ.get("RUN_REPLAY_E2E", "").strip().lower() not in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}:
|
||||
return "AZ-840 e2e orchestrator gated by RUN_REPLAY_E2E=1"
|
||||
if not os.environ.get("GPS_DENIED_OPERATOR_CONFIG_PATH", "").strip():
|
||||
return (
|
||||
"AZ-840 e2e orchestrator requires GPS_DENIED_OPERATOR_CONFIG_PATH "
|
||||
"(same env var the C3 fixture consumes)"
|
||||
)
|
||||
if _replay_binary() is None:
|
||||
return "gps-denied-replay console-script not installed"
|
||||
video = _derkachi_dir() / "flight_derkachi.mp4"
|
||||
if not video.is_file():
|
||||
return f"Derkachi video missing: {video}"
|
||||
if video.stat().st_size < _MIN_REAL_VIDEO_BYTES:
|
||||
return (
|
||||
f"Derkachi video at {video} is only {video.stat().st_size} "
|
||||
"bytes — placeholder, not a real recording"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def az840_skip_gate() -> Iterator[None]:
|
||||
"""Skip-gate the orchestrator test before any heavy fixtures resolve."""
|
||||
|
||||
reason = _orchestrator_skip_reason()
|
||||
if reason is not None:
|
||||
pytest.skip(reason)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.tier2
|
||||
def test_az840_e2e_real_flight_orchestration(
|
||||
az840_skip_gate: None,
|
||||
operator_pre_flight_setup: PopulatedC6Cache,
|
||||
derkachi_replay_inputs: DerkachiReplayInputs,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
# Arrange — every input besides cache_root comes from the existing
|
||||
# session fixtures so the same Tier-2 harness setup that powers
|
||||
# AZ-699 + AZ-839 is exercised.
|
||||
binary = _replay_binary()
|
||||
assert binary is not None, "skip gate already verified the binary exists"
|
||||
base_config_path = Path(os.environ["GPS_DENIED_OPERATOR_CONFIG_PATH"])
|
||||
output_path = tmp_path / "estimator_output.jsonl"
|
||||
effective_config_path = tmp_path / "operator_config_effective.yaml"
|
||||
report_dir = _repo_root() / "_docs" / "06_metrics"
|
||||
|
||||
# Act
|
||||
report = run_e2e_orchestration(
|
||||
populated_cache=operator_pre_flight_setup,
|
||||
base_config_path=base_config_path,
|
||||
tlog_path=derkachi_replay_inputs.tlog_path,
|
||||
video_path=derkachi_replay_inputs.video_path,
|
||||
calibration_path=derkachi_replay_inputs.calibration_path,
|
||||
signing_key_path=derkachi_replay_inputs.signing_key_path,
|
||||
replay_binary=binary,
|
||||
output_path=output_path,
|
||||
report_dir=report_dir,
|
||||
effective_config_path=effective_config_path,
|
||||
)
|
||||
|
||||
# Assert AC-2 + AC-4 — report exists; full run within the 15-min budget.
|
||||
assert isinstance(report, OrchestrationReport)
|
||||
assert report.report_path.is_file()
|
||||
body = report.report_path.read_text()
|
||||
assert "## Horizontal error (metres)" in body
|
||||
assert "## Threshold-hit share" in body
|
||||
assert "Mean" in body
|
||||
for threshold in (10, 25, 50, 100):
|
||||
assert f"| {threshold} |" in body, (
|
||||
f"threshold {threshold} m row missing from report"
|
||||
)
|
||||
assert report.replay_subprocess_seconds <= 900.0, (
|
||||
"AZ-840 AC-4: replay subprocess exceeded 15-min soft target"
|
||||
)
|
||||
assert report.wall_clock_s >= report.replay_subprocess_seconds
|
||||
assert report.distribution.count > 0, (
|
||||
"no emissions paired with ground truth — orchestration produced "
|
||||
"data but every emission fell outside the tlog GPS window"
|
||||
)
|
||||
|
||||
# Assert AC-3 — the effective config was written and points at the
|
||||
# cache_root the C3 fixture supplied.
|
||||
assert effective_config_path.is_file()
|
||||
effective_text = effective_config_path.read_text()
|
||||
assert str(operator_pre_flight_setup.cache_root) in effective_text
|
||||
Reference in New Issue
Block a user