"""Unit tests for the AZ-840 e2e orchestrator (AC-8). The end-to-end happy path is the Tier-2 integration test in ``test_az835_e2e_real_flight.py`` (AC-1 / AC-2). This module covers the orchestration helper layer in isolation: * Param validation — every required path must exist before the airborne subprocess is spawned (AC-5 fails LOUD). * Effective-config merge — the ``c6_tile_cache.root_dir`` override is written to YAML; the rest of the base config is preserved. * Error propagation per step — every documented failure surfaces as :class:`OrchestrationFailure` with the correct :class:`OrchestratorStep` label. * Happy path — when the runner returns success and the JSONL + ground truth align, :class:`OrchestrationReport` carries a written report path and an honest verdict (AC-2: report exists PASS or FAIL). The tests inject a fake ``runner`` so no real ``gps-denied-replay`` subprocess is spawned. Real binary execution is exercised on the Jetson harness via the AC-1 integration test. """ from __future__ import annotations import json import subprocess from pathlib import Path from unittest.mock import MagicMock import pytest import yaml from gps_denied_onboard.helpers.accuracy_report import ( AC3_GATE_THRESHOLD_M, ) from gps_denied_onboard.replay_input.tlog_route import RouteSpec from tests.e2e.replay._e2e_orchestrator import ( OrchestrationFailure, OrchestrationReport, OrchestratorStep, read_calibration_acquisition_method, run_e2e_orchestration, write_effective_replay_config, ) from tests.e2e.replay._operator_pre_flight import PopulatedC6Cache # ---------------------------------------------------------------------- # Helpers def _build_populated_cache(tmp_path: Path) -> PopulatedC6Cache: """Construct a synthetic :class:`PopulatedC6Cache`. The orchestrator only consumes ``cache_root`` from the cache, so the FAISS sidecar paths are placeholders. The route_spec is a minimal one-waypoint instance — no AZ-836 invariants are re-asserted by AZ-840. """ cache_root = tmp_path / "cache_root" cache_root.mkdir() return PopulatedC6Cache( cache_root=cache_root, tile_store_path=cache_root / "tiles", faiss_index_path=cache_root / "descriptor.index", faiss_sidecar_sha256_path=cache_root / "descriptor.index.sha256", faiss_sidecar_meta_path=cache_root / "descriptor.index.meta.json", route_spec=RouteSpec( waypoints=((50.10, 36.10),), suggested_region_size_meters=500.0, source_tlog=Path("test.tlog"), source_segment=(0, 100), total_distance_meters=0.0, ), tile_count=1, elapsed_seconds=0.0, ) def _stage_inputs(tmp_path: Path) -> dict[str, Path]: """Write touch-files for every input path the orchestrator validates. The base config YAML carries one stub block so the merge step has a real document to overlay on. """ base_config = tmp_path / "operator_config.yaml" base_config.write_text( yaml.safe_dump( { "mode": "replay", "c6_tile_cache": { "store_runtime": "postgres_filesystem", "metadata_runtime": "postgres_filesystem", "descriptor_index_runtime": "faiss_hnsw", "root_dir": "/var/lib/gps-denied/tiles", "faiss_index_path": "/some/static/path/descriptor.index", }, } ) ) tlog = tmp_path / "input.tlog" tlog.write_bytes(b"\x00") video = tmp_path / "input.mp4" video.write_bytes(b"\x00") calibration = tmp_path / "calibration.json" calibration.write_text(json.dumps({"acquisition_method": "factory-sheet"})) signing_key = tmp_path / "signing_key.bin" signing_key.write_bytes(b"\x00" * 32) binary = tmp_path / "gps-denied-replay" binary.write_text("") return { "base_config_path": base_config, "tlog_path": tlog, "video_path": video, "calibration_path": calibration, "signing_key_path": signing_key, "replay_binary": binary, } def _ground_truth_tlog_loader( monkeypatch: pytest.MonkeyPatch, *, times_s: tuple[float, ...] = (0.0, 1.0, 2.0), lat_deg: float = 50.10, lon_deg: float = 36.10, alt_m: float = 100.0, ) -> None: """Stub the orchestrator's ground-truth loader so unit tests skip MAVLink. The orchestrator imports ``load_tlog_ground_truth`` from ``gps_denied_onboard.replay_input``; patching the symbol *as bound on the orchestrator module* keeps the patch local to the unit suite (no cross-test bleed). """ fixes = [ _StubGpsFix( ts_ns=int(t * 1e9), lat_deg=lat_deg, lon_deg=lon_deg, alt_m=alt_m, ) for t in times_s ] series = _StubGpsSeries(records=tuple(fixes)) monkeypatch.setattr( "tests.e2e.replay._e2e_orchestrator.load_tlog_ground_truth", lambda *_args, **_kwargs: series, ) class _StubGpsFix: """Mirrors the fields the orchestrator reads from each tlog row.""" __slots__ = ("ts_ns", "lat_deg", "lon_deg", "alt_m") def __init__( self, *, ts_ns: int, lat_deg: float, lon_deg: float, alt_m: float ) -> None: self.ts_ns = ts_ns self.lat_deg = lat_deg self.lon_deg = lon_deg self.alt_m = alt_m class _StubGpsSeries: """Drop-in replacement for :class:`TlogGroundTruth`.""" def __init__(self, *, records: tuple[_StubGpsFix, ...]) -> None: self.records = records def _build_runner_emitting( output_path: Path, *, rows: list[dict[str, object]], returncode: int = 0, stdout: str = "", stderr: str = "", ) -> "MagicMock": """Return a fake ``subprocess.run`` that writes JSONL on call.""" def _run(argv, **kwargs): # type: ignore[no-untyped-def] if rows: output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text( "\n".join(json.dumps(row) for row in rows) + "\n" ) return subprocess.CompletedProcess( args=argv, returncode=returncode, stdout=stdout, stderr=stderr, ) return MagicMock(side_effect=_run) # ---------------------------------------------------------------------- # write_effective_replay_config def test_write_effective_replay_config_overlays_root_dir( tmp_path: Path, ) -> None: # Arrange inputs = _stage_inputs(tmp_path) cache_root = tmp_path / "cache" cache_root.mkdir() output_path = tmp_path / "effective.yaml" # Act written_path = write_effective_replay_config( base_config_path=inputs["base_config_path"], cache_root=cache_root, output_path=output_path, ) # Assert assert written_path == output_path merged = yaml.safe_load(output_path.read_text()) assert merged["c6_tile_cache"]["root_dir"] == str(cache_root) assert merged["c6_tile_cache"]["faiss_index_path"] == "" assert merged["mode"] == "replay" assert ( merged["c6_tile_cache"]["store_runtime"] == "postgres_filesystem" ), "non-overridden c6_tile_cache fields must survive" def test_write_effective_replay_config_creates_block_when_absent( tmp_path: Path, ) -> None: # Arrange base = tmp_path / "operator.yaml" base.write_text(yaml.safe_dump({"mode": "replay"})) cache_root = tmp_path / "cache" cache_root.mkdir() # Act write_effective_replay_config( base_config_path=base, cache_root=cache_root, output_path=tmp_path / "effective.yaml", ) # Assert merged = yaml.safe_load((tmp_path / "effective.yaml").read_text()) assert merged["c6_tile_cache"]["root_dir"] == str(cache_root) def test_write_effective_replay_config_malformed_yaml_fails( tmp_path: Path, ) -> None: # Arrange base = tmp_path / "bad.yaml" base.write_text(":\n : not yaml:") cache_root = tmp_path / "cache" cache_root.mkdir() # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: write_effective_replay_config( base_config_path=base, cache_root=cache_root, output_path=tmp_path / "effective.yaml", ) assert exc_info.value.step is OrchestratorStep.WRITE_EFFECTIVE_CONFIG def test_write_effective_replay_config_non_mapping_top_level_fails( tmp_path: Path, ) -> None: # Arrange base = tmp_path / "bad.yaml" base.write_text("- not a mapping\n") cache_root = tmp_path / "cache" cache_root.mkdir() # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: write_effective_replay_config( base_config_path=base, cache_root=cache_root, output_path=tmp_path / "effective.yaml", ) assert exc_info.value.step is OrchestratorStep.WRITE_EFFECTIVE_CONFIG # ---------------------------------------------------------------------- # read_calibration_acquisition_method def test_read_calibration_acquisition_method_returns_field_when_present( tmp_path: Path, ) -> None: # Arrange path = tmp_path / "cal.json" path.write_text(json.dumps({"acquisition_method": "factory-sheet"})) # Assert assert read_calibration_acquisition_method(path) == "factory-sheet" def test_read_calibration_acquisition_method_returns_unknown_on_missing( tmp_path: Path, ) -> None: # Arrange path = tmp_path / "cal.json" path.write_text(json.dumps({"some_other_field": True})) # Assert assert read_calibration_acquisition_method(path) == "unknown" def test_read_calibration_acquisition_method_returns_unknown_on_malformed( tmp_path: Path, ) -> None: # Arrange path = tmp_path / "cal.json" path.write_text("{not valid json") # Assert assert read_calibration_acquisition_method(path) == "unknown" # ---------------------------------------------------------------------- # run_e2e_orchestration — param validation (AC-5) def test_run_e2e_orchestration_missing_tlog_fails_loud( tmp_path: Path, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) inputs["tlog_path"].unlink() # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: run_e2e_orchestration( populated_cache=cache, output_path=tmp_path / "out.jsonl", report_dir=tmp_path / "metrics", effective_config_path=tmp_path / "eff.yaml", **inputs, # type: ignore[arg-type] ) assert exc_info.value.step is OrchestratorStep.VALIDATE_INPUTS assert "tlog_path" in str(exc_info.value) def test_run_e2e_orchestration_missing_binary_fails_loud( tmp_path: Path, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) inputs["replay_binary"].unlink() # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: run_e2e_orchestration( populated_cache=cache, output_path=tmp_path / "out.jsonl", report_dir=tmp_path / "metrics", effective_config_path=tmp_path / "eff.yaml", **inputs, # type: ignore[arg-type] ) assert exc_info.value.step is OrchestratorStep.VALIDATE_INPUTS assert "replay_binary" in str(exc_info.value) # ---------------------------------------------------------------------- # run_e2e_orchestration — subprocess error propagation (AC-5) def test_run_e2e_orchestration_replay_nonzero_exit_fails_loud( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) output_path = tmp_path / "out.jsonl" runner = MagicMock( return_value=subprocess.CompletedProcess( args=[], returncode=1, stdout="", stderr="boom", ) ) _ground_truth_tlog_loader(monkeypatch) # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: run_e2e_orchestration( populated_cache=cache, output_path=output_path, report_dir=tmp_path / "metrics", effective_config_path=tmp_path / "eff.yaml", runner=runner, **inputs, # type: ignore[arg-type] ) assert exc_info.value.step is OrchestratorStep.AIRBORNE_PIPELINE assert "exited 1" in str(exc_info.value) assert "boom" in str(exc_info.value) def test_run_e2e_orchestration_replay_timeout_fails_loud( tmp_path: Path, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) def _timeout(*_args, **_kwargs): raise subprocess.TimeoutExpired(cmd=["replay"], timeout=0.1) runner = MagicMock(side_effect=_timeout) # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: run_e2e_orchestration( populated_cache=cache, output_path=tmp_path / "out.jsonl", report_dir=tmp_path / "metrics", effective_config_path=tmp_path / "eff.yaml", runner=runner, max_seconds=0.1, **inputs, # type: ignore[arg-type] ) assert exc_info.value.step is OrchestratorStep.AIRBORNE_PIPELINE assert "timed out" in str(exc_info.value) def test_run_e2e_orchestration_replay_oserror_fails_loud( tmp_path: Path, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) def _oserror(*_args, **_kwargs): raise OSError("permission denied") runner = MagicMock(side_effect=_oserror) # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: run_e2e_orchestration( populated_cache=cache, output_path=tmp_path / "out.jsonl", report_dir=tmp_path / "metrics", effective_config_path=tmp_path / "eff.yaml", runner=runner, **inputs, # type: ignore[arg-type] ) assert exc_info.value.step is OrchestratorStep.AIRBORNE_PIPELINE assert "permission denied" in str(exc_info.value) # ---------------------------------------------------------------------- # run_e2e_orchestration — empty / malformed JSONL (AC-5) def test_run_e2e_orchestration_empty_jsonl_fails_loud( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) output_path = tmp_path / "out.jsonl" def _runner(argv, **_kwargs): # type: ignore[no-untyped-def] output_path.write_text("\n\n") # only blanks return subprocess.CompletedProcess(args=argv, returncode=0, stdout="", stderr="") runner = MagicMock(side_effect=_runner) _ground_truth_tlog_loader(monkeypatch) # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: run_e2e_orchestration( populated_cache=cache, output_path=output_path, report_dir=tmp_path / "metrics", effective_config_path=tmp_path / "eff.yaml", runner=runner, **inputs, # type: ignore[arg-type] ) assert exc_info.value.step is OrchestratorStep.PARSE_EMISSIONS def test_run_e2e_orchestration_malformed_jsonl_fails_loud( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) output_path = tmp_path / "out.jsonl" def _runner(argv, **_kwargs): # type: ignore[no-untyped-def] output_path.write_text('{"valid": true}\nnot a json line\n') return subprocess.CompletedProcess(args=argv, returncode=0, stdout="", stderr="") runner = MagicMock(side_effect=_runner) _ground_truth_tlog_loader(monkeypatch) # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: run_e2e_orchestration( populated_cache=cache, output_path=output_path, report_dir=tmp_path / "metrics", effective_config_path=tmp_path / "eff.yaml", runner=runner, **inputs, # type: ignore[arg-type] ) assert exc_info.value.step is OrchestratorStep.PARSE_EMISSIONS # ---------------------------------------------------------------------- # run_e2e_orchestration — ground truth loader failure (AC-5) def test_run_e2e_orchestration_ground_truth_loader_failure_fails_loud( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) output_path = tmp_path / "out.jsonl" runner = _build_runner_emitting( output_path, rows=[ { "emitted_at": int(0.5 * 1e9), "position_wgs84": { "lat_deg": 50.10, "lon_deg": 36.10, "alt_m": 100.0, }, } ], ) def _raise(*_args, **_kwargs): raise ValueError("tlog corrupt") monkeypatch.setattr( "tests.e2e.replay._e2e_orchestrator.load_tlog_ground_truth", _raise, ) # Act + Assert with pytest.raises(OrchestrationFailure) as exc_info: run_e2e_orchestration( populated_cache=cache, output_path=output_path, report_dir=tmp_path / "metrics", effective_config_path=tmp_path / "eff.yaml", runner=runner, **inputs, # type: ignore[arg-type] ) assert exc_info.value.step is OrchestratorStep.LOAD_GROUND_TRUTH assert "tlog corrupt" in str(exc_info.value) # ---------------------------------------------------------------------- # run_e2e_orchestration — happy path (AC-1 / AC-2) def test_run_e2e_orchestration_happy_path_writes_report( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: # Arrange cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) output_path = tmp_path / "out.jsonl" report_dir = tmp_path / "metrics" effective_config_path = tmp_path / "eff.yaml" rows = [ { "emitted_at": int(0.5 * 1e9), "position_wgs84": {"lat_deg": 50.10, "lon_deg": 36.10, "alt_m": 100.0}, }, { "emitted_at": int(1.5 * 1e9), "position_wgs84": {"lat_deg": 50.10, "lon_deg": 36.10, "alt_m": 100.0}, }, ] runner = _build_runner_emitting(output_path, rows=rows) _ground_truth_tlog_loader(monkeypatch) # Act report = run_e2e_orchestration( populated_cache=cache, output_path=output_path, report_dir=report_dir, effective_config_path=effective_config_path, runner=runner, run_date_utc="2026-05-23", **inputs, # type: ignore[arg-type] ) # Assert assert isinstance(report, OrchestrationReport) assert report.report_path.is_file() assert report.emissions_count == 2 assert report.distribution.count == 2 assert report.verdict_passed is True body = report.report_path.read_text() assert "## Horizontal error (metres)" in body assert "## Threshold-hit share" in body assert f"| {AC3_GATE_THRESHOLD_M:g} |" in body runner.assert_called_once() argv_passed = runner.call_args.args[0] assert str(effective_config_path) in argv_passed assert "--auto-trim" in argv_passed merged = yaml.safe_load(effective_config_path.read_text()) assert merged["c6_tile_cache"]["root_dir"] == str(cache.cache_root) def test_run_e2e_orchestration_writes_report_even_on_fail_verdict( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: # Arrange — emissions are 1 km from ground truth, far above the 100 m gate. cache = _build_populated_cache(tmp_path) inputs = _stage_inputs(tmp_path) output_path = tmp_path / "out.jsonl" report_dir = tmp_path / "metrics" rows = [ { "emitted_at": int(0.5 * 1e9), "position_wgs84": {"lat_deg": 50.110, "lon_deg": 36.110, "alt_m": 100.0}, }, { "emitted_at": int(1.5 * 1e9), "position_wgs84": {"lat_deg": 50.110, "lon_deg": 36.110, "alt_m": 100.0}, }, ] runner = _build_runner_emitting(output_path, rows=rows) _ground_truth_tlog_loader(monkeypatch) # Act report = run_e2e_orchestration( populated_cache=cache, output_path=output_path, report_dir=report_dir, effective_config_path=tmp_path / "eff.yaml", runner=runner, run_date_utc="2026-05-23", **inputs, # type: ignore[arg-type] ) # Assert — AC-2: report exists regardless of PASS/FAIL. assert report.verdict_passed is False assert report.report_path.is_file() assert "FAIL" in report.report_path.read_text()