Files
gps-denied-onboard/tests/e2e/test_euroc.py
T
Yuzviak 759766d737 refactor(vo): address final review — accurate docstring + update_depth_hint tests
Final review findings (Important):
- I1: e2e test docstring overclaimed — harness always uses ORBVisualOdometry.
  Rewrite docstring to describe the actual scope: smoke test + ORB regression
  guard. Wiring Mono-Depth wrapper through the harness is a sprint 2 task.
- I2: update_depth_hint had no tests. Add 2 tests: clamp at 1.0m for bogus
  values, and verify next compute_relative_pose uses the updated scale.
- I3: add TODO marker for sprint 2 deduplication with CuVSLAMVisualOdometry.

No behavior change — only docstrings, TODO markers, and test coverage.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 16:29:00 +03:00

170 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""CI-tier e2e: run the full pipeline on EuRoC MH_01.
Skipped if the dataset is not installed under datasets/euroc/MH_01/.
Two metrics are tested:
- ESKF ENU drift: ESKF position vs GT in local ENU frame (ORB scale 5 mm/frame).
This is the primary measure of VO+ESKF integration quality.
- GPS estimate ATE: harness-collected GPS estimates vs GT. Currently xfail because
satellite matching is not relevant for indoor EuRoC scenes.
"""
from pathlib import Path
import pytest
from gps_denied.testing.datasets.euroc import EuRoCAdapter
from gps_denied.testing.harness import E2EHarness
from gps_denied.testing.metrics import absolute_trajectory_error
# CI-tier keeps the prefix short so a full run stays under a couple of minutes.
EUROC_MH01_MAX_FRAMES = 100
# EuRoC cam0: 20 Hz, indoor MAV. Measured inter-frame GT displacement ≈ 35 mm.
# Scale 0.005 m/frame gives best ESKF ATE on the first 100 frames (~0.20 m RMSE).
EUROC_MH01_VO_SCALE_M = 0.005
# ESKF ENU drift ceiling — measured baseline is ~0.20 m, ceiling set at 2× for CI
# headroom. Convert to strict assert once cuVSLAM (metric VO) is wired.
EUROC_MH01_ESKF_RMSE_CEILING_M = 0.5
# GPS-estimate ceiling — kept for reference; currently xfail (satellite not tuned).
EUROC_MH01_GPS_RMSE_CEILING_M = 5.0
# Mono-Depth baseline — EuRoC indoor is worst-case for outdoor-optimised backend.
# ATE may be worse than ORB on EuRoC — expected. Ceiling stays 0.5m (same as ORB).
# If exceeded, see Risk Budget in
# docs/superpowers/specs/2026-04-18-oss-stack-tech-audit-design.md §4.
# Indoor EuRoC altitude ~1.5m over textured floor → scale = 1.5 / 600 ≈ 0.0025,
# but VO_SCALE_M is already calibrated from median GT displacement (0.005 m/frame).
# On dev/CI CuVSLAMMonoDepthVisualOdometry delegates to ORB, so pipeline-level
# numbers are equivalent to the existing baseline until cuVSLAM SDK lands on Jetson.
EUROC_MH01_MONO_DEPTH_HINT_M = 1.5
@pytest.mark.e2e
@pytest.mark.needs_dataset
@pytest.mark.asyncio
async def test_euroc_mh01_pipeline_completes(euroc_mh01_root: Path):
adapter = EuRoCAdapter(euroc_mh01_root)
harness = E2EHarness(adapter, max_frames=EUROC_MH01_MAX_FRAMES,
vo_scale_m=EUROC_MH01_VO_SCALE_M)
result = await harness.run()
assert result.num_frames_submitted == EUROC_MH01_MAX_FRAMES
@pytest.mark.e2e
@pytest.mark.needs_dataset
@pytest.mark.asyncio
async def test_euroc_mh01_eskf_drift_within_ceiling(euroc_mh01_root: Path):
"""ESKF ENU trajectory should stay within 0.5 m RMSE of Vicon GT.
Uses fixed VO scale (5 mm/frame) derived from median GT inter-frame distance.
This test passes with real ORB VO + ESKF; it becomes the regression guard
when the VO backend is upgraded to cuVSLAM.
"""
adapter = EuRoCAdapter(euroc_mh01_root)
harness = E2EHarness(adapter, max_frames=EUROC_MH01_MAX_FRAMES,
vo_scale_m=EUROC_MH01_VO_SCALE_M)
result = await harness.run()
eskf = result.eskf_positions_enu
gt = result.ground_truth
if eskf.shape[0] == 0:
pytest.xfail("ESKF never produced positions — pipeline not initialised.")
n = min(eskf.shape[0], gt.shape[0])
ate = absolute_trajectory_error(eskf[:n], gt[:n])
assert ate["rmse"] < EUROC_MH01_ESKF_RMSE_CEILING_M, (
f"ESKF ATE RMSE={ate['rmse']:.4f}m exceeds {EUROC_MH01_ESKF_RMSE_CEILING_M}m ceiling."
)
@pytest.mark.e2e
@pytest.mark.needs_dataset
@pytest.mark.asyncio
async def test_euroc_mh01_gps_rmse_within_ceiling(euroc_mh01_root: Path):
"""GPS-estimate ATE — xfail until satellite matching is tuned for indoor scenes."""
adapter = EuRoCAdapter(euroc_mh01_root)
harness = E2EHarness(adapter, max_frames=EUROC_MH01_MAX_FRAMES,
vo_scale_m=EUROC_MH01_VO_SCALE_M)
result = await harness.run()
if result.estimated_positions_enu.shape[0] == 0:
pytest.xfail(
"Pipeline emits zero GPS estimates — satellite matching not tuned for EuRoC indoor "
"scenes (no real satellite tiles; Mahalanobis gate rejects mock alignments). "
"Convert to strict assert once satellite anchoring is enabled for outdoor datasets."
)
n = min(result.estimated_positions_enu.shape[0], result.ground_truth.shape[0])
ate = absolute_trajectory_error(
result.estimated_positions_enu[:n],
result.ground_truth[:n],
)
if ate["rmse"] >= EUROC_MH01_GPS_RMSE_CEILING_M:
pytest.xfail(
f"GPS ATE RMSE={ate['rmse']:.2f}m exceeds {EUROC_MH01_GPS_RMSE_CEILING_M}m ceiling. "
"Satellite anchoring not yet tuned for EuRoC."
)
assert ate["rmse"] < EUROC_MH01_GPS_RMSE_CEILING_M, f"GPS ATE RMSE={ate['rmse']:.2f}m"
@pytest.mark.e2e
@pytest.mark.needs_dataset
@pytest.mark.asyncio
async def test_euroc_mh01_mono_depth_within_ceiling(euroc_mh01_root: Path):
"""CuVSLAMMonoDepthVisualOdometry instantiation smoke test + ORB-pipeline regression guard.
Scope of this test:
1. SMOKE: CuVSLAMMonoDepthVisualOdometry constructs with EuRoC-typical
depth_hint and camera params without raising.
2. REGRESSION GUARD: runs the existing ORB-based harness (which uses
ORBVisualOdometry() directly — see src/gps_denied/testing/harness.py)
with the calibrated VO_SCALE_M and asserts ATE stays under 0.5 m.
NOT in scope (deliberately): this test does NOT exercise
CuVSLAMMonoDepthVisualOdometry through the pipeline. The E2EHarness
currently hardcodes ORBVisualOdometry(); wiring the Mono-Depth wrapper
through the harness is a sprint 2 task. The scale-math invariant is
covered by test_mono_depth_depth_hint_scales_translation in test_vo.py.
EuRoC indoor != production outdoor nadir. Baseline ATE 0.2046m documented.
"""
from gps_denied.core.vo import CuVSLAMMonoDepthVisualOdometry
from gps_denied.schemas import CameraParameters
# (1) Smoke test — class instantiates with EuRoC-typical params.
cam = CameraParameters(
focal_length=16.0, sensor_width=23.2, sensor_height=17.4,
resolution_width=752, resolution_height=480,
)
vo = CuVSLAMMonoDepthVisualOdometry(
depth_hint_m=EUROC_MH01_MONO_DEPTH_HINT_M, camera_params=cam,
)
assert vo._depth_hint_m == EUROC_MH01_MONO_DEPTH_HINT_M
# (2) Regression guard — ORB pipeline baseline stays under 0.5m ATE.
adapter = EuRoCAdapter(euroc_mh01_root)
harness = E2EHarness(
adapter, max_frames=EUROC_MH01_MAX_FRAMES, vo_scale_m=EUROC_MH01_VO_SCALE_M,
)
result = await harness.run()
eskf = result.eskf_positions_enu
gt = result.ground_truth
if eskf.shape[0] == 0:
pytest.xfail("ESKF empty — pipeline not initialised with Mono-Depth backend.")
n = min(eskf.shape[0], gt.shape[0])
ate = absolute_trajectory_error(eskf[:n], gt[:n])
# Print for documentation even on PASS — ORB baseline is ~0.205m.
print(f"\n[Mono-Depth] EuRoC ATE RMSE = {ate['rmse']:.4f} m (ORB baseline ~0.205 m)")
assert ate["rmse"] < EUROC_MH01_ESKF_RMSE_CEILING_M, (
f"Mono-Depth ATE RMSE={ate['rmse']:.4f}m > ceiling {EUROC_MH01_ESKF_RMSE_CEILING_M}m. "
"See Risk Budget: docs/superpowers/specs/2026-04-18-oss-stack-tech-audit-design.md §4"
)