mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 19:21:12 +00:00
d7e6b0959e
Batch 63 of /autodev replay slice. Adds the AZ-404 E2E test harness against the Derkachi fixture and resolves the AZ-389 dependency phantom (closing AZ-559 Won't Fix). E2E test (AZ-404) - tests/e2e/replay/_tlog_synth.py: deterministic CSV->tlog generator (the original Derkachi tlog is not in repo; data_imu.csv is its export, so we round-trip the CSV through pymavlink). Verified: SCALED_IMU2 + ATTITUDE + GPS_RAW_INT + HEARTBEAT round-trip cleanly through mavutil.mavlink_connection. - tests/e2e/replay/_helpers.py: parse_jsonl, l2_horizontal_m (haversine), match_percentage, CapturingMavlinkTransport (ready for AZ-558 unblock), GroundTruthRow + load_ground_truth_csv. - tests/e2e/replay/conftest.py: derkachi_replay_inputs (session scope), replay_runner (subprocess fixture per AZ-402 CLI), operator_pre_flight_setup placeholder. - tests/e2e/replay/test_derkachi_1min.py: 9 tests covering AC-1..AC-8 with AC-7 skip-gate self-check + AC-4a mode-agnosticism AST scan (passes unconditionally, confirms ADR-011 holding). - tests/e2e/replay/test_helpers.py: 14 unit tests covering AC-9 helper L2 correctness + match_percentage + parse_jsonl + CapturingMavlinkTransport (all unconditional). - tests/e2e/replay/README.md: AC matrix, fixture state, runtime budget, failure cookbook (AC-10). AC matrix - AC-1, AC-2, AC-5, AC-6 implemented and Tier-1 gated on RUN_REPLAY_E2E=1. - AC-3 (<=100m for 80%) xfail until real Topotek KHP20S30 calibration ships (camera_info.md states intrinsics are unknown). - AC-4a (mode-agnosticism AST scan) PASSES unconditionally. - AC-4b (encoder byte-equality) skip until AZ-558 routes C8 bytes through MavlinkTransport. - AC-7 (skip-gate self-check) PASSES unconditionally. - AC-8 (operator workflow rehearsal) skip until D-PROJ-2 mock-suite-sat-service implements tile-fetch + index-build endpoints. - AC-9 (helper L2 correctness) 14 PASSES unconditionally. AZ-389 housekeeping - AZ-559 closed Won't Fix: investigation against c6_tile_cache/_types.py confirmed TileSource.ONBOARD_INGEST + TileMetadata.quality_metadata + write_tile's FreshnessRejectionError already cover the mid-flight ingest semantic. The "missing API" was a spec-vs-impl naming mismatch. - AZ-389 spec rewritten to consume the existing write_tile API + catch FreshnessRejectionError per AC-NEW-3 opportunistic emission. - _dependencies_table.md reverted: AZ-389 deps -> AZ-303 (was AZ-559 in the previous commit on this branch); total 150 / 497 pts. Tests - Full regression: 2099 passed (+14 new e2e/replay), 94 skipped (incl. 8 e2e/replay heavy-tier + documented blocker skips), 3 perf-microbench flakes deselected (test_cli_cold_start_under_2s, test_cold_start_under_500ms_p99, test_nfr_perf_sign_microbench; all pass in isolation - pre-existing under-load flakes on dev macOS). Reviews - _docs/03_implementation/reviews/batch_63_review.md: code review PASS_WITH_WARNINGS (3 documented spec-gap deferrals: AC-3, AC-4b, AC-8). - _docs/03_implementation/cumulative_review_batches_61-63_cycle1_report.md: cumulative review PASS_WITH_WARNINGS. Action items: prioritise AZ-558 (closes AZ-401 AC-9 + AZ-404 AC-4b); consider 2pt hygiene PBI for Protocol-completeness AST scan to catch the AZ-389 / AZ-559 phantom-API pattern at task-prep time. Architecture invariants observably holding - ADR-011 (replay-as-configuration): AC-4a's AST scan over src/gps_denied_onboard/components/**/*.py finds zero violations - components branch on neither config.mode nor any synonym. - Single composition root (replay protocol Invariant 11): AZ-402 CLI dispatches to runtime_root.main(config); does not call compose_root directly. Co-authored-by: Cursor <cursoragent@cursor.com>
224 lines
7.5 KiB
Python
224 lines
7.5 KiB
Python
"""Helpers shared by the AZ-404 E2E replay tests.
|
|
|
|
* :func:`parse_jsonl` — read the ``JsonlReplaySink`` output into a list
|
|
of dicts with one entry per emit.
|
|
* :func:`l2_horizontal_m` — WGS84-aware L2 horizontal distance between
|
|
two ``(lat, lon)`` pairs in metres.
|
|
* :func:`match_percentage` — share of estimator emissions whose
|
|
L2 distance to the closest ground-truth row is within a threshold.
|
|
* :class:`CapturingMavlinkTransport` — test-only ``MavlinkTransport``
|
|
impl that records every ``write`` so AC-4b can compare the byte
|
|
streams produced by ``compose_root(config_live)`` vs.
|
|
``compose_root(config_replay)``.
|
|
* :func:`load_ground_truth_csv` — the IMU CSV's ``GLOBAL_POSITION_INT``
|
|
columns ARE the AC-3 reference (the original tlog's GPS rows
|
|
exported to CSV); this helper materialises them.
|
|
|
|
All functions are pure / deterministic and stay safely importable on
|
|
dev macOS without ``RUN_REPLAY_E2E``; the regular regression suite
|
|
calls them via the unit-level helper test in this module's sibling
|
|
``test_helpers.py``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import json
|
|
import math
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
__all__ = [
|
|
"CapturingMavlinkTransport",
|
|
"GroundTruthRow",
|
|
"l2_horizontal_m",
|
|
"load_ground_truth_csv",
|
|
"match_percentage",
|
|
"parse_jsonl",
|
|
]
|
|
|
|
|
|
# WGS84 mean Earth radius. Matches the value used by
|
|
# `helpers/wgs_converter.py` (AZ-279) so the e2e check is consistent
|
|
# with the production converter.
|
|
_EARTH_RADIUS_M: float = 6_371_008.8
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class GroundTruthRow:
|
|
"""One row from the Derkachi data_imu.csv ground-truth slice."""
|
|
|
|
t_s: float
|
|
lat_deg: float
|
|
lon_deg: float
|
|
alt_m: float
|
|
|
|
|
|
def parse_jsonl(path: Path) -> list[dict[str, Any]]:
|
|
"""Return one dict per line of a JsonlReplaySink output file.
|
|
|
|
Empty trailing lines are tolerated (orjson always terminates with
|
|
``\\n`` so the last newline is followed by ``""``); other empty
|
|
lines indicate a corrupt file and surface as a JSON decode error.
|
|
"""
|
|
records: list[dict[str, Any]] = []
|
|
with path.open(encoding="utf-8") as fp:
|
|
for lineno, line in enumerate(fp, start=1):
|
|
stripped = line.rstrip("\n")
|
|
if not stripped:
|
|
continue
|
|
try:
|
|
records.append(json.loads(stripped))
|
|
except json.JSONDecodeError as exc:
|
|
raise AssertionError(
|
|
f"line {lineno} in {path} is not valid JSON: {exc.msg!r}"
|
|
) from exc
|
|
return records
|
|
|
|
|
|
def l2_horizontal_m(
|
|
lat1_deg: float, lon1_deg: float, lat2_deg: float, lon2_deg: float
|
|
) -> float:
|
|
"""WGS84-spherical great-circle distance in metres.
|
|
|
|
Uses the haversine formula with the C5/AZ-279 mean Earth radius.
|
|
Sufficient for the AC-3 ≤ 100 m threshold (sub-metre accuracy at
|
|
the Derkachi latitude band; the spherical approximation diverges
|
|
from the WGS84 ellipsoid by < 0.5 % at these latitudes — well
|
|
within the AC-3 budget).
|
|
"""
|
|
phi1 = math.radians(lat1_deg)
|
|
phi2 = math.radians(lat2_deg)
|
|
dphi = phi2 - phi1
|
|
dlam = math.radians(lon2_deg - lon1_deg)
|
|
a = (
|
|
math.sin(dphi / 2.0) ** 2
|
|
+ math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2.0) ** 2
|
|
)
|
|
c = 2.0 * math.asin(min(1.0, math.sqrt(a)))
|
|
return _EARTH_RADIUS_M * c
|
|
|
|
|
|
def load_ground_truth_csv(csv_path: Path) -> list[GroundTruthRow]:
|
|
"""Load the Derkachi IMU CSV's GPS rows as ground truth.
|
|
|
|
The original ``flight_derkachi.tlog``'s ``GLOBAL_POSITION_INT``
|
|
messages were exported to ``data_imu.csv``; the ``lat / lon /
|
|
alt`` columns are degrees * 1e7 / metres * 1e3 (mavlink integer
|
|
encoding), so we divide accordingly.
|
|
"""
|
|
rows: list[GroundTruthRow] = []
|
|
with csv_path.open(newline="") as fp:
|
|
reader = csv.DictReader(fp)
|
|
for r in reader:
|
|
rows.append(
|
|
GroundTruthRow(
|
|
t_s=float(r["Time"]),
|
|
lat_deg=float(r["GLOBAL_POSITION_INT.lat"]) / 1e7,
|
|
lon_deg=float(r["GLOBAL_POSITION_INT.lon"]) / 1e7,
|
|
alt_m=float(r["GLOBAL_POSITION_INT.alt"]) / 1e3,
|
|
)
|
|
)
|
|
return rows
|
|
|
|
|
|
def match_percentage(
|
|
emissions: list[dict[str, Any]],
|
|
ground_truth: list[GroundTruthRow],
|
|
*,
|
|
threshold_m: float,
|
|
) -> float:
|
|
"""Share of emissions within ``threshold_m`` of the closest GT row.
|
|
|
|
For each emitted ``EstimatorOutput`` JSONL record, find the
|
|
nearest-in-time ground-truth row, compute the horizontal L2
|
|
distance, and count it as a hit when ≤ ``threshold_m``. Returns
|
|
the hit ratio in [0.0, 1.0].
|
|
|
|
Nearest-in-time is sufficient because the IMU CSV's 10 Hz cadence
|
|
(matching the C5 emit rate) means the candidate row is typically
|
|
< 50 ms off the emit timestamp — well below the AC-3 100 m budget.
|
|
"""
|
|
if not emissions:
|
|
return 0.0
|
|
if not ground_truth:
|
|
raise AssertionError("ground_truth must be non-empty")
|
|
gt_sorted = sorted(ground_truth, key=lambda r: r.t_s)
|
|
gt_times = [r.t_s for r in gt_sorted]
|
|
hits = 0
|
|
for emit in emissions:
|
|
emit_ts_ns = int(emit["emitted_at"])
|
|
emit_t_s = emit_ts_ns / 1e9
|
|
idx = _bisect_left(gt_times, emit_t_s)
|
|
candidates = []
|
|
if idx > 0:
|
|
candidates.append(gt_sorted[idx - 1])
|
|
if idx < len(gt_sorted):
|
|
candidates.append(gt_sorted[idx])
|
|
# Nearest-in-time row.
|
|
nearest = min(candidates, key=lambda r: abs(r.t_s - emit_t_s))
|
|
emit_pos = emit["position_wgs84"]
|
|
d = l2_horizontal_m(
|
|
emit_pos["lat_deg"],
|
|
emit_pos["lon_deg"],
|
|
nearest.lat_deg,
|
|
nearest.lon_deg,
|
|
)
|
|
if d <= threshold_m:
|
|
hits += 1
|
|
return hits / len(emissions)
|
|
|
|
|
|
def _bisect_left(seq: list[float], target: float) -> int:
|
|
"""Stdlib bisect_left, inlined to keep import surface narrow."""
|
|
lo, hi = 0, len(seq)
|
|
while lo < hi:
|
|
mid = (lo + hi) // 2
|
|
if seq[mid] < target:
|
|
lo = mid + 1
|
|
else:
|
|
hi = mid
|
|
return lo
|
|
|
|
|
|
class CapturingMavlinkTransport:
|
|
"""Test-only :class:`MavlinkTransport` that records every write.
|
|
|
|
Used by AZ-404 AC-4b: capture the byte streams produced by
|
|
``compose_root(config_live).c8.emit_external_position(out)`` and
|
|
``compose_root(config_replay).c8.emit_external_position(out)`` to
|
|
assert byte-identity per replay protocol Invariant 5.
|
|
|
|
NOTE: AC-4b is currently SKIPPED (blocked on AZ-558 — the C8
|
|
encoders still bypass the ``MavlinkTransport`` seam by calling
|
|
``mav.*_send`` directly). This class is in place so the test
|
|
fixture is ready the moment AZ-558 lands.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._chunks: list[bytes] = []
|
|
self._closed = False
|
|
|
|
def write(self, payload: bytes) -> int:
|
|
if self._closed:
|
|
raise RuntimeError("CapturingMavlinkTransport.write after close")
|
|
self._chunks.append(bytes(payload))
|
|
return len(payload)
|
|
|
|
def bytes_written(self) -> int:
|
|
return sum(len(c) for c in self._chunks)
|
|
|
|
def close(self) -> None:
|
|
self._closed = True
|
|
|
|
@property
|
|
def captured_payloads(self) -> tuple[bytes, ...]:
|
|
"""Tuple of every payload passed to :meth:`write`, in order."""
|
|
return tuple(self._chunks)
|
|
|
|
@property
|
|
def captured_concat(self) -> bytes:
|
|
"""All captured payloads concatenated — the wire-byte stream."""
|
|
return b"".join(self._chunks)
|