Files
gps-denied-onboard/tests/e2e/replay/test_e2e_orchestrator_unit.py
T
Oleksandr Bezdieniezhnykh ade0c86f2b [AZ-840] [AZ-835] e2e orchestrator test (E-AZ-835 C4)
Wraps the AZ-699 verdict-report path with the AZ-839
operator_pre_flight_setup C3 fixture so a single Tier-2 test
takes only (tlog, video, calibration) and runs the full 7-step
pipeline on the Jetson harness without operator hand-curation.

New surface (tests-only, no src/ changes):
- tests/e2e/replay/_e2e_orchestrator.py — orchestrator with
  OrchestratorStep enum, OrchestrationFailure exception (step
  prefix per AC-5), OrchestrationReport dataclass,
  write_effective_replay_config helper, and
  run_e2e_orchestration entry point covering steps 1-2-6-7.
- tests/e2e/replay/test_e2e_orchestrator_unit.py — 17 unit
  tests covering each failure mode + happy path with mocked
  subprocess + ground-truth loader (AC-8).
- tests/e2e/replay/test_az835_e2e_real_flight.py — Tier-2 +
  RUN_REPLAY_E2E gated integration test asserting verdict
  report exists, 15-min budget held (AC-1, AC-2, AC-3, AC-4,
  AC-6).

The effective config write overlays c6_tile_cache.root_dir
onto the static operator YAML at runtime so the airborne
subprocess shares the cache_root the C3 fixture chose. Field-
level merge — every other operator-config block stays
verbatim. The static YAML on disk is never touched.

Test run: tests/e2e/replay 45 passed, 10 skipped (10 skips
were 9 pre-existing + 1 new tier2). No src/ touched, no
AZ-839 driver changes; AC-7 (AZ-699 still passes) holds by
inspection.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-23 15:27:41 +03:00

672 lines
21 KiB
Python

"""Unit tests for the AZ-840 e2e orchestrator (AC-8).
The end-to-end happy path is the Tier-2 integration test in
``test_az835_e2e_real_flight.py`` (AC-1 / AC-2). This module covers
the orchestration helper layer in isolation:
* Param validation — every required path must exist before the
airborne subprocess is spawned (AC-5 fails LOUD).
* Effective-config merge — the ``c6_tile_cache.root_dir`` override
is written to YAML; the rest of the base config is preserved.
* Error propagation per step — every documented failure surfaces
as :class:`OrchestrationFailure` with the correct
:class:`OrchestratorStep` label.
* Happy path — when the runner returns success and the JSONL +
ground truth align, :class:`OrchestrationReport` carries a
written report path and an honest verdict (AC-2: report exists
PASS or FAIL).
The tests inject a fake ``runner`` so no real
``gps-denied-replay`` subprocess is spawned. Real binary execution
is exercised on the Jetson harness via the AC-1 integration test.
"""
from __future__ import annotations
import json
import subprocess
from pathlib import Path
from unittest.mock import MagicMock
import pytest
import yaml
from gps_denied_onboard.helpers.accuracy_report import (
AC3_GATE_THRESHOLD_M,
)
from gps_denied_onboard.replay_input.tlog_route import RouteSpec
from tests.e2e.replay._e2e_orchestrator import (
OrchestrationFailure,
OrchestrationReport,
OrchestratorStep,
read_calibration_acquisition_method,
run_e2e_orchestration,
write_effective_replay_config,
)
from tests.e2e.replay._operator_pre_flight import PopulatedC6Cache
# ----------------------------------------------------------------------
# Helpers
def _build_populated_cache(tmp_path: Path) -> PopulatedC6Cache:
"""Construct a synthetic :class:`PopulatedC6Cache`.
The orchestrator only consumes ``cache_root`` from the cache,
so the FAISS sidecar paths are placeholders. The route_spec is
a minimal one-waypoint instance — no AZ-836 invariants are
re-asserted by AZ-840.
"""
cache_root = tmp_path / "cache_root"
cache_root.mkdir()
return PopulatedC6Cache(
cache_root=cache_root,
tile_store_path=cache_root / "tiles",
faiss_index_path=cache_root / "descriptor.index",
faiss_sidecar_sha256_path=cache_root / "descriptor.index.sha256",
faiss_sidecar_meta_path=cache_root / "descriptor.index.meta.json",
route_spec=RouteSpec(
waypoints=((50.10, 36.10),),
suggested_region_size_meters=500.0,
source_tlog=Path("test.tlog"),
source_segment=(0, 100),
total_distance_meters=0.0,
),
tile_count=1,
elapsed_seconds=0.0,
)
def _stage_inputs(tmp_path: Path) -> dict[str, Path]:
"""Write touch-files for every input path the orchestrator validates.
The base config YAML carries one stub block so the merge step
has a real document to overlay on.
"""
base_config = tmp_path / "operator_config.yaml"
base_config.write_text(
yaml.safe_dump(
{
"mode": "replay",
"c6_tile_cache": {
"store_runtime": "postgres_filesystem",
"metadata_runtime": "postgres_filesystem",
"descriptor_index_runtime": "faiss_hnsw",
"root_dir": "/var/lib/gps-denied/tiles",
"faiss_index_path": "/some/static/path/descriptor.index",
},
}
)
)
tlog = tmp_path / "input.tlog"
tlog.write_bytes(b"\x00")
video = tmp_path / "input.mp4"
video.write_bytes(b"\x00")
calibration = tmp_path / "calibration.json"
calibration.write_text(json.dumps({"acquisition_method": "factory-sheet"}))
signing_key = tmp_path / "signing_key.bin"
signing_key.write_bytes(b"\x00" * 32)
binary = tmp_path / "gps-denied-replay"
binary.write_text("")
return {
"base_config_path": base_config,
"tlog_path": tlog,
"video_path": video,
"calibration_path": calibration,
"signing_key_path": signing_key,
"replay_binary": binary,
}
def _ground_truth_tlog_loader(
monkeypatch: pytest.MonkeyPatch,
*,
times_s: tuple[float, ...] = (0.0, 1.0, 2.0),
lat_deg: float = 50.10,
lon_deg: float = 36.10,
alt_m: float = 100.0,
) -> None:
"""Stub the orchestrator's ground-truth loader so unit tests skip MAVLink.
The orchestrator imports ``load_tlog_ground_truth`` from
``gps_denied_onboard.replay_input``; patching the symbol *as
bound on the orchestrator module* keeps the patch local to the
unit suite (no cross-test bleed).
"""
fixes = [
_StubGpsFix(
ts_ns=int(t * 1e9),
lat_deg=lat_deg,
lon_deg=lon_deg,
alt_m=alt_m,
)
for t in times_s
]
series = _StubGpsSeries(records=tuple(fixes))
monkeypatch.setattr(
"tests.e2e.replay._e2e_orchestrator.load_tlog_ground_truth",
lambda *_args, **_kwargs: series,
)
class _StubGpsFix:
"""Mirrors the fields the orchestrator reads from each tlog row."""
__slots__ = ("ts_ns", "lat_deg", "lon_deg", "alt_m")
def __init__(
self, *, ts_ns: int, lat_deg: float, lon_deg: float, alt_m: float
) -> None:
self.ts_ns = ts_ns
self.lat_deg = lat_deg
self.lon_deg = lon_deg
self.alt_m = alt_m
class _StubGpsSeries:
"""Drop-in replacement for :class:`TlogGroundTruth`."""
def __init__(self, *, records: tuple[_StubGpsFix, ...]) -> None:
self.records = records
def _build_runner_emitting(
output_path: Path,
*,
rows: list[dict[str, object]],
returncode: int = 0,
stdout: str = "",
stderr: str = "",
) -> "MagicMock":
"""Return a fake ``subprocess.run`` that writes JSONL on call."""
def _run(argv, **kwargs): # type: ignore[no-untyped-def]
if rows:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(
"\n".join(json.dumps(row) for row in rows) + "\n"
)
return subprocess.CompletedProcess(
args=argv,
returncode=returncode,
stdout=stdout,
stderr=stderr,
)
return MagicMock(side_effect=_run)
# ----------------------------------------------------------------------
# write_effective_replay_config
def test_write_effective_replay_config_overlays_root_dir(
tmp_path: Path,
) -> None:
# Arrange
inputs = _stage_inputs(tmp_path)
cache_root = tmp_path / "cache"
cache_root.mkdir()
output_path = tmp_path / "effective.yaml"
# Act
written_path = write_effective_replay_config(
base_config_path=inputs["base_config_path"],
cache_root=cache_root,
output_path=output_path,
)
# Assert
assert written_path == output_path
merged = yaml.safe_load(output_path.read_text())
assert merged["c6_tile_cache"]["root_dir"] == str(cache_root)
assert merged["c6_tile_cache"]["faiss_index_path"] == ""
assert merged["mode"] == "replay"
assert (
merged["c6_tile_cache"]["store_runtime"] == "postgres_filesystem"
), "non-overridden c6_tile_cache fields must survive"
def test_write_effective_replay_config_creates_block_when_absent(
tmp_path: Path,
) -> None:
# Arrange
base = tmp_path / "operator.yaml"
base.write_text(yaml.safe_dump({"mode": "replay"}))
cache_root = tmp_path / "cache"
cache_root.mkdir()
# Act
write_effective_replay_config(
base_config_path=base,
cache_root=cache_root,
output_path=tmp_path / "effective.yaml",
)
# Assert
merged = yaml.safe_load((tmp_path / "effective.yaml").read_text())
assert merged["c6_tile_cache"]["root_dir"] == str(cache_root)
def test_write_effective_replay_config_malformed_yaml_fails(
tmp_path: Path,
) -> None:
# Arrange
base = tmp_path / "bad.yaml"
base.write_text(":\n : not yaml:")
cache_root = tmp_path / "cache"
cache_root.mkdir()
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
write_effective_replay_config(
base_config_path=base,
cache_root=cache_root,
output_path=tmp_path / "effective.yaml",
)
assert exc_info.value.step is OrchestratorStep.WRITE_EFFECTIVE_CONFIG
def test_write_effective_replay_config_non_mapping_top_level_fails(
tmp_path: Path,
) -> None:
# Arrange
base = tmp_path / "bad.yaml"
base.write_text("- not a mapping\n")
cache_root = tmp_path / "cache"
cache_root.mkdir()
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
write_effective_replay_config(
base_config_path=base,
cache_root=cache_root,
output_path=tmp_path / "effective.yaml",
)
assert exc_info.value.step is OrchestratorStep.WRITE_EFFECTIVE_CONFIG
# ----------------------------------------------------------------------
# read_calibration_acquisition_method
def test_read_calibration_acquisition_method_returns_field_when_present(
tmp_path: Path,
) -> None:
# Arrange
path = tmp_path / "cal.json"
path.write_text(json.dumps({"acquisition_method": "factory-sheet"}))
# Assert
assert read_calibration_acquisition_method(path) == "factory-sheet"
def test_read_calibration_acquisition_method_returns_unknown_on_missing(
tmp_path: Path,
) -> None:
# Arrange
path = tmp_path / "cal.json"
path.write_text(json.dumps({"some_other_field": True}))
# Assert
assert read_calibration_acquisition_method(path) == "unknown"
def test_read_calibration_acquisition_method_returns_unknown_on_malformed(
tmp_path: Path,
) -> None:
# Arrange
path = tmp_path / "cal.json"
path.write_text("{not valid json")
# Assert
assert read_calibration_acquisition_method(path) == "unknown"
# ----------------------------------------------------------------------
# run_e2e_orchestration — param validation (AC-5)
def test_run_e2e_orchestration_missing_tlog_fails_loud(
tmp_path: Path,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
inputs["tlog_path"].unlink()
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
run_e2e_orchestration(
populated_cache=cache,
output_path=tmp_path / "out.jsonl",
report_dir=tmp_path / "metrics",
effective_config_path=tmp_path / "eff.yaml",
**inputs, # type: ignore[arg-type]
)
assert exc_info.value.step is OrchestratorStep.VALIDATE_INPUTS
assert "tlog_path" in str(exc_info.value)
def test_run_e2e_orchestration_missing_binary_fails_loud(
tmp_path: Path,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
inputs["replay_binary"].unlink()
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
run_e2e_orchestration(
populated_cache=cache,
output_path=tmp_path / "out.jsonl",
report_dir=tmp_path / "metrics",
effective_config_path=tmp_path / "eff.yaml",
**inputs, # type: ignore[arg-type]
)
assert exc_info.value.step is OrchestratorStep.VALIDATE_INPUTS
assert "replay_binary" in str(exc_info.value)
# ----------------------------------------------------------------------
# run_e2e_orchestration — subprocess error propagation (AC-5)
def test_run_e2e_orchestration_replay_nonzero_exit_fails_loud(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
output_path = tmp_path / "out.jsonl"
runner = MagicMock(
return_value=subprocess.CompletedProcess(
args=[],
returncode=1,
stdout="",
stderr="boom",
)
)
_ground_truth_tlog_loader(monkeypatch)
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
run_e2e_orchestration(
populated_cache=cache,
output_path=output_path,
report_dir=tmp_path / "metrics",
effective_config_path=tmp_path / "eff.yaml",
runner=runner,
**inputs, # type: ignore[arg-type]
)
assert exc_info.value.step is OrchestratorStep.AIRBORNE_PIPELINE
assert "exited 1" in str(exc_info.value)
assert "boom" in str(exc_info.value)
def test_run_e2e_orchestration_replay_timeout_fails_loud(
tmp_path: Path,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
def _timeout(*_args, **_kwargs):
raise subprocess.TimeoutExpired(cmd=["replay"], timeout=0.1)
runner = MagicMock(side_effect=_timeout)
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
run_e2e_orchestration(
populated_cache=cache,
output_path=tmp_path / "out.jsonl",
report_dir=tmp_path / "metrics",
effective_config_path=tmp_path / "eff.yaml",
runner=runner,
max_seconds=0.1,
**inputs, # type: ignore[arg-type]
)
assert exc_info.value.step is OrchestratorStep.AIRBORNE_PIPELINE
assert "timed out" in str(exc_info.value)
def test_run_e2e_orchestration_replay_oserror_fails_loud(
tmp_path: Path,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
def _oserror(*_args, **_kwargs):
raise OSError("permission denied")
runner = MagicMock(side_effect=_oserror)
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
run_e2e_orchestration(
populated_cache=cache,
output_path=tmp_path / "out.jsonl",
report_dir=tmp_path / "metrics",
effective_config_path=tmp_path / "eff.yaml",
runner=runner,
**inputs, # type: ignore[arg-type]
)
assert exc_info.value.step is OrchestratorStep.AIRBORNE_PIPELINE
assert "permission denied" in str(exc_info.value)
# ----------------------------------------------------------------------
# run_e2e_orchestration — empty / malformed JSONL (AC-5)
def test_run_e2e_orchestration_empty_jsonl_fails_loud(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
output_path = tmp_path / "out.jsonl"
def _runner(argv, **_kwargs): # type: ignore[no-untyped-def]
output_path.write_text("\n\n") # only blanks
return subprocess.CompletedProcess(args=argv, returncode=0, stdout="", stderr="")
runner = MagicMock(side_effect=_runner)
_ground_truth_tlog_loader(monkeypatch)
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
run_e2e_orchestration(
populated_cache=cache,
output_path=output_path,
report_dir=tmp_path / "metrics",
effective_config_path=tmp_path / "eff.yaml",
runner=runner,
**inputs, # type: ignore[arg-type]
)
assert exc_info.value.step is OrchestratorStep.PARSE_EMISSIONS
def test_run_e2e_orchestration_malformed_jsonl_fails_loud(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
output_path = tmp_path / "out.jsonl"
def _runner(argv, **_kwargs): # type: ignore[no-untyped-def]
output_path.write_text('{"valid": true}\nnot a json line\n')
return subprocess.CompletedProcess(args=argv, returncode=0, stdout="", stderr="")
runner = MagicMock(side_effect=_runner)
_ground_truth_tlog_loader(monkeypatch)
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
run_e2e_orchestration(
populated_cache=cache,
output_path=output_path,
report_dir=tmp_path / "metrics",
effective_config_path=tmp_path / "eff.yaml",
runner=runner,
**inputs, # type: ignore[arg-type]
)
assert exc_info.value.step is OrchestratorStep.PARSE_EMISSIONS
# ----------------------------------------------------------------------
# run_e2e_orchestration — ground truth loader failure (AC-5)
def test_run_e2e_orchestration_ground_truth_loader_failure_fails_loud(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
output_path = tmp_path / "out.jsonl"
runner = _build_runner_emitting(
output_path,
rows=[
{
"emitted_at": int(0.5 * 1e9),
"position_wgs84": {
"lat_deg": 50.10,
"lon_deg": 36.10,
"alt_m": 100.0,
},
}
],
)
def _raise(*_args, **_kwargs):
raise ValueError("tlog corrupt")
monkeypatch.setattr(
"tests.e2e.replay._e2e_orchestrator.load_tlog_ground_truth",
_raise,
)
# Act + Assert
with pytest.raises(OrchestrationFailure) as exc_info:
run_e2e_orchestration(
populated_cache=cache,
output_path=output_path,
report_dir=tmp_path / "metrics",
effective_config_path=tmp_path / "eff.yaml",
runner=runner,
**inputs, # type: ignore[arg-type]
)
assert exc_info.value.step is OrchestratorStep.LOAD_GROUND_TRUTH
assert "tlog corrupt" in str(exc_info.value)
# ----------------------------------------------------------------------
# run_e2e_orchestration — happy path (AC-1 / AC-2)
def test_run_e2e_orchestration_happy_path_writes_report(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
# Arrange
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
output_path = tmp_path / "out.jsonl"
report_dir = tmp_path / "metrics"
effective_config_path = tmp_path / "eff.yaml"
rows = [
{
"emitted_at": int(0.5 * 1e9),
"position_wgs84": {"lat_deg": 50.10, "lon_deg": 36.10, "alt_m": 100.0},
},
{
"emitted_at": int(1.5 * 1e9),
"position_wgs84": {"lat_deg": 50.10, "lon_deg": 36.10, "alt_m": 100.0},
},
]
runner = _build_runner_emitting(output_path, rows=rows)
_ground_truth_tlog_loader(monkeypatch)
# Act
report = run_e2e_orchestration(
populated_cache=cache,
output_path=output_path,
report_dir=report_dir,
effective_config_path=effective_config_path,
runner=runner,
run_date_utc="2026-05-23",
**inputs, # type: ignore[arg-type]
)
# Assert
assert isinstance(report, OrchestrationReport)
assert report.report_path.is_file()
assert report.emissions_count == 2
assert report.distribution.count == 2
assert report.verdict_passed is True
body = report.report_path.read_text()
assert "## Horizontal error (metres)" in body
assert "## Threshold-hit share" in body
assert f"| {AC3_GATE_THRESHOLD_M:g} |" in body
runner.assert_called_once()
argv_passed = runner.call_args.args[0]
assert str(effective_config_path) in argv_passed
assert "--auto-trim" in argv_passed
merged = yaml.safe_load(effective_config_path.read_text())
assert merged["c6_tile_cache"]["root_dir"] == str(cache.cache_root)
def test_run_e2e_orchestration_writes_report_even_on_fail_verdict(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
# Arrange — emissions are 1 km from ground truth, far above the 100 m gate.
cache = _build_populated_cache(tmp_path)
inputs = _stage_inputs(tmp_path)
output_path = tmp_path / "out.jsonl"
report_dir = tmp_path / "metrics"
rows = [
{
"emitted_at": int(0.5 * 1e9),
"position_wgs84": {"lat_deg": 50.110, "lon_deg": 36.110, "alt_m": 100.0},
},
{
"emitted_at": int(1.5 * 1e9),
"position_wgs84": {"lat_deg": 50.110, "lon_deg": 36.110, "alt_m": 100.0},
},
]
runner = _build_runner_emitting(output_path, rows=rows)
_ground_truth_tlog_loader(monkeypatch)
# Act
report = run_e2e_orchestration(
populated_cache=cache,
output_path=output_path,
report_dir=report_dir,
effective_config_path=tmp_path / "eff.yaml",
runner=runner,
run_date_utc="2026-05-23",
**inputs, # type: ignore[arg-type]
)
# Assert — AC-2: report exists regardless of PASS/FAIL.
assert report.verdict_passed is False
assert report.report_path.is_file()
assert "FAIL" in report.report_path.read_text()