mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-04-23 01:16:38 +00:00
feat(testing): per-frame JSONL trace in E2EHarness
Opt-in trace_path parameter dumps one JSON record per processed frame with the fields diagnostics need: frame_idx, timestamp_ns, vo_success, alignment_success, tracking_state, confidence, eskf_initialized, eskf_position_enu (or None), eskf_pos_sigma_m, estimate_lat/lon, gt_lat/lon/alt No perf cost when trace_path is None. File is rotated per run — safe to point at /tmp/foo.jsonl for ad-hoc debugging. First real run on EuRoC MH_01 (100 frames) immediately exposes the concrete divergence: vo_success=0/100 (VO never engages on EuRoC grayscale imagery with current SP+LG adapter), eskf_initialized=0/100, alignment_success=77/100 (satellite-fallback path fires). Diagnosis that was hidden behind a single "ATE=10.9 km" number is now machine- readable per frame. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,7 +13,9 @@ What the harness does NOT do:
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from unittest.mock import AsyncMock, MagicMock
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
@@ -29,7 +31,11 @@ from gps_denied.core.processor import FlightProcessor
|
|||||||
from gps_denied.core.recovery import FailureRecoveryCoordinator
|
from gps_denied.core.recovery import FailureRecoveryCoordinator
|
||||||
from gps_denied.core.vo import SequentialVisualOdometry
|
from gps_denied.core.vo import SequentialVisualOdometry
|
||||||
from gps_denied.schemas.graph import FactorGraphConfig
|
from gps_denied.schemas.graph import FactorGraphConfig
|
||||||
from gps_denied.testing.datasets.base import DatasetAdapter, PlatformClass
|
from gps_denied.testing.datasets.base import (
|
||||||
|
DatasetAdapter,
|
||||||
|
DatasetPose,
|
||||||
|
PlatformClass,
|
||||||
|
)
|
||||||
|
|
||||||
EARTH_R = 6_378_137.0
|
EARTH_R = 6_378_137.0
|
||||||
|
|
||||||
@@ -59,10 +65,12 @@ class E2EHarness:
|
|||||||
adapter: DatasetAdapter,
|
adapter: DatasetAdapter,
|
||||||
flight_id: str = "e2e-flight",
|
flight_id: str = "e2e-flight",
|
||||||
max_frames: Optional[int] = None,
|
max_frames: Optional[int] = None,
|
||||||
|
trace_path: Optional[Path] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._adapter = adapter
|
self._adapter = adapter
|
||||||
self._flight_id = flight_id
|
self._flight_id = flight_id
|
||||||
self._max_frames = max_frames
|
self._max_frames = max_frames
|
||||||
|
self._trace_path: Optional[Path] = Path(trace_path) if trace_path else None
|
||||||
self._estimates: list[tuple[int, Optional[tuple[float, float, float]]]] = []
|
self._estimates: list[tuple[int, Optional[tuple[float, float, float]]]] = []
|
||||||
|
|
||||||
async def run(self) -> HarnessResult:
|
async def run(self) -> HarnessResult:
|
||||||
@@ -74,15 +82,35 @@ class E2EHarness:
|
|||||||
frames = frames[: self._max_frames]
|
frames = frames[: self._max_frames]
|
||||||
gt_poses = gt_poses[: self._max_frames]
|
gt_poses = gt_poses[: self._max_frames]
|
||||||
|
|
||||||
for frame in frames:
|
# Align GT by index so trace records can pair each frame with the
|
||||||
image = self._load_or_synth_image(frame.image_path)
|
# corresponding pose without an expensive timestamp search.
|
||||||
result = await processor.process_frame(
|
gt_by_idx: dict[int, DatasetPose] = {}
|
||||||
self._flight_id, frame.frame_idx, image
|
for i, pose in enumerate(gt_poses):
|
||||||
)
|
gt_by_idx[i] = pose
|
||||||
est = None
|
|
||||||
if result.gps is not None:
|
trace_fh = None
|
||||||
est = (result.gps.lat, result.gps.lon, 0.0) # alt not returned here
|
if self._trace_path is not None:
|
||||||
self._estimates.append((frame.frame_idx, est))
|
self._trace_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
trace_fh = self._trace_path.open("w")
|
||||||
|
|
||||||
|
try:
|
||||||
|
for frame in frames:
|
||||||
|
image = self._load_or_synth_image(frame.image_path)
|
||||||
|
result = await processor.process_frame(
|
||||||
|
self._flight_id, frame.frame_idx, image
|
||||||
|
)
|
||||||
|
est = None
|
||||||
|
if result.gps is not None:
|
||||||
|
est = (result.gps.lat, result.gps.lon, 0.0)
|
||||||
|
self._estimates.append((frame.frame_idx, est))
|
||||||
|
|
||||||
|
if trace_fh is not None:
|
||||||
|
gt = gt_by_idx.get(frame.frame_idx)
|
||||||
|
record = self._trace_record(processor, frame, result, gt)
|
||||||
|
trace_fh.write(json.dumps(record) + "\n")
|
||||||
|
finally:
|
||||||
|
if trace_fh is not None:
|
||||||
|
trace_fh.close()
|
||||||
|
|
||||||
gt_enu = self._poses_to_enu(gt_poses)
|
gt_enu = self._poses_to_enu(gt_poses)
|
||||||
est_enu = self._estimates_to_enu(gt_poses[0] if gt_poses else None)
|
est_enu = self._estimates_to_enu(gt_poses[0] if gt_poses else None)
|
||||||
@@ -96,6 +124,51 @@ class E2EHarness:
|
|||||||
platform_class=self._adapter.capabilities.platform_class,
|
platform_class=self._adapter.capabilities.platform_class,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _trace_record(
|
||||||
|
self,
|
||||||
|
processor: FlightProcessor,
|
||||||
|
frame,
|
||||||
|
result,
|
||||||
|
gt: Optional[DatasetPose],
|
||||||
|
) -> dict:
|
||||||
|
"""Build one JSONL record describing the product's state after a frame.
|
||||||
|
|
||||||
|
Captures VO success, ESKF state (position + trace(cov) as a scalar
|
||||||
|
sigma), and the estimate/GT pair. Enough to diagnose *where* the
|
||||||
|
pipeline diverges without dumping raw images or covariance matrices.
|
||||||
|
"""
|
||||||
|
eskf = processor._eskf.get(self._flight_id) # noqa: SLF001 — test harness
|
||||||
|
eskf_initialized = bool(eskf and eskf.initialized)
|
||||||
|
eskf_position = None
|
||||||
|
eskf_sigma = None
|
||||||
|
if eskf_initialized:
|
||||||
|
pos = eskf.position
|
||||||
|
eskf_position = [float(pos[0]), float(pos[1]), float(pos[2])]
|
||||||
|
cov = eskf.covariance
|
||||||
|
if cov is not None:
|
||||||
|
# Scalar horizontal+vertical position uncertainty summary.
|
||||||
|
eskf_sigma = float(np.sqrt(np.trace(cov[0:3, 0:3]) / 3.0))
|
||||||
|
|
||||||
|
est_lat = result.gps.lat if result.gps is not None else None
|
||||||
|
est_lon = result.gps.lon if result.gps is not None else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"frame_idx": frame.frame_idx,
|
||||||
|
"timestamp_ns": frame.timestamp_ns,
|
||||||
|
"vo_success": bool(result.vo_success),
|
||||||
|
"alignment_success": bool(result.alignment_success),
|
||||||
|
"tracking_state": result.tracking_state.value,
|
||||||
|
"confidence": float(result.confidence),
|
||||||
|
"eskf_initialized": eskf_initialized,
|
||||||
|
"eskf_position_enu": eskf_position,
|
||||||
|
"eskf_pos_sigma_m": eskf_sigma,
|
||||||
|
"estimate_lat": est_lat,
|
||||||
|
"estimate_lon": est_lon,
|
||||||
|
"gt_lat": gt.lat if gt else None,
|
||||||
|
"gt_lon": gt.lon if gt else None,
|
||||||
|
"gt_alt": gt.alt if gt else None,
|
||||||
|
}
|
||||||
|
|
||||||
def _build_processor(self) -> FlightProcessor:
|
def _build_processor(self) -> FlightProcessor:
|
||||||
repo = MagicMock()
|
repo = MagicMock()
|
||||||
streamer = MagicMock()
|
streamer = MagicMock()
|
||||||
|
|||||||
@@ -64,3 +64,61 @@ async def test_harness_max_frames_larger_than_dataset_is_harmless():
|
|||||||
harness = E2EHarness(adapter, max_frames=100)
|
harness = E2EHarness(adapter, max_frames=100)
|
||||||
result = await harness.run()
|
result = await harness.run()
|
||||||
assert result.num_frames_submitted == 4
|
assert result.num_frames_submitted == 4
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_harness_trace_path_none_does_not_emit(tmp_path):
|
||||||
|
# No trace_path → no file created, no perf cost from serialisation.
|
||||||
|
adapter = SyntheticAdapter(num_frames=3, fps=5.0)
|
||||||
|
harness = E2EHarness(adapter)
|
||||||
|
await harness.run()
|
||||||
|
# Sanity: tmp_path stays empty (harness never touches it).
|
||||||
|
assert list(tmp_path.iterdir()) == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_harness_trace_path_writes_jsonl_with_expected_fields(tmp_path):
|
||||||
|
import json
|
||||||
|
|
||||||
|
trace = tmp_path / "run.jsonl"
|
||||||
|
adapter = SyntheticAdapter(num_frames=4, fps=5.0)
|
||||||
|
harness = E2EHarness(adapter, trace_path=trace)
|
||||||
|
await harness.run()
|
||||||
|
|
||||||
|
assert trace.is_file()
|
||||||
|
lines = trace.read_text().splitlines()
|
||||||
|
# One JSON record per submitted frame
|
||||||
|
assert len(lines) == 4
|
||||||
|
|
||||||
|
first = json.loads(lines[0])
|
||||||
|
# Contract: every record carries these keys so downstream tooling can
|
||||||
|
# depend on them across datasets.
|
||||||
|
expected_keys = {
|
||||||
|
"frame_idx",
|
||||||
|
"timestamp_ns",
|
||||||
|
"vo_success",
|
||||||
|
"alignment_success",
|
||||||
|
"tracking_state",
|
||||||
|
"confidence",
|
||||||
|
"eskf_initialized",
|
||||||
|
"eskf_position_enu", # list[3] or None when not initialized
|
||||||
|
"eskf_pos_sigma_m", # float or None
|
||||||
|
"estimate_lat", # float or None
|
||||||
|
"estimate_lon",
|
||||||
|
"gt_lat",
|
||||||
|
"gt_lon",
|
||||||
|
"gt_alt",
|
||||||
|
}
|
||||||
|
missing = expected_keys - set(first.keys())
|
||||||
|
assert not missing, f"trace record missing keys: {missing}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_harness_trace_truncates_with_max_frames(tmp_path):
|
||||||
|
# When max_frames caps iteration, trace file has exactly that many lines.
|
||||||
|
trace = tmp_path / "run.jsonl"
|
||||||
|
adapter = SyntheticAdapter(num_frames=10, fps=5.0)
|
||||||
|
harness = E2EHarness(adapter, max_frames=3, trace_path=trace)
|
||||||
|
await harness.run()
|
||||||
|
lines = trace.read_text().splitlines()
|
||||||
|
assert len(lines) == 3
|
||||||
|
|||||||
Reference in New Issue
Block a user