"""Accuracy Validation Tests (Phase 7). Verifies all solution.md acceptance criteria against synthetic trajectories. AC-PERF-1: 80 % of frames within 50 m. AC-PERF-2: 60 % of frames within 20 m. AC-PERF-3: p95 per-frame latency < 400 ms. AC-PERF-4: VO drift over 1 km straight segment (no sat correction) < 100 m. AC-PERF-5: ESKF confidence tier transitions correctly with satellite age. AC-PERF-6: ESKF covariance shrinks after satellite correction. AC-PERF-7: Benchmark result summary is non-empty string. AC-PERF-8: Synthetic trajectory length matches requested frame count. AC-PERF-9: BenchmarkResult.pct_within_50m / pct_within_20m computed correctly. AC-PERF-10: 30-frame straight flight — median error < 30 m with sat corrections. AC-PERF-11: VO failure frames do not crash benchmark. AC-PERF-12: Waypoint steering changes direction correctly. """ import math import time import numpy as np import pytest from gps_denied.core.benchmark import ( AccuracyBenchmark, BenchmarkResult, SyntheticTrajectory, SyntheticTrajectoryConfig, ) from gps_denied.schemas import GPSPoint from gps_denied.schemas.eskf import ESKFConfig ORIGIN = GPSPoint(lat=49.0, lon=32.0) # --------------------------------------------------------------- # Helpers # --------------------------------------------------------------- def _run_benchmark( num_frames: int = 30, vo_failures: list[int] | None = None, with_sat: bool = True, waypoints: list[tuple[float, float]] | None = None, ) -> BenchmarkResult: """Build and replay a synthetic trajectory, return BenchmarkResult.""" cfg = SyntheticTrajectoryConfig( origin=ORIGIN, speed_mps=20.0, heading_deg=0.0, num_frames=num_frames, vo_noise_m=0.3, imu_hz=50.0, # reduced rate for test speed camera_fps=0.7, vo_failure_frames=vo_failures or [], waypoints_enu=waypoints or [], ) gen = SyntheticTrajectory(cfg) frames = gen.generate() sat_fn = None if with_sat else (lambda _: None) bench = AccuracyBenchmark(sat_correction_fn=sat_fn) return bench.run(frames, ORIGIN, satellite_keyframe_interval=5) # --------------------------------------------------------------- # AC-PERF-8: Trajectory length # --------------------------------------------------------------- def test_trajectory_frame_count(): """AC-PERF-8: Generated trajectory has exactly num_frames frames.""" for n in [10, 30, 50]: cfg = SyntheticTrajectoryConfig(num_frames=n, imu_hz=10.0) frames = SyntheticTrajectory(cfg).generate() assert len(frames) == n def test_trajectory_frame_ids_sequential(): """Frame IDs are 0..N-1.""" cfg = SyntheticTrajectoryConfig(num_frames=10, imu_hz=10.0) frames = SyntheticTrajectory(cfg).generate() assert [f.frame_id for f in frames] == list(range(10)) def test_trajectory_positions_increase_northward(): """Heading=0° (North) → North component strictly increasing.""" cfg = SyntheticTrajectoryConfig(num_frames=5, heading_deg=0.0, speed_mps=20.0, imu_hz=10.0) frames = SyntheticTrajectory(cfg).generate() norths = [f.true_position_enu[1] for f in frames] for a, b in zip(norths, norths[1:]): assert b > a, "North component should increase for heading=0°" # --------------------------------------------------------------- # AC-PERF-9: BenchmarkResult percentage helpers # --------------------------------------------------------------- def test_pct_within_50m_all_inside(): result = BenchmarkResult( errors_m=[10.0, 20.0, 49.9], latencies_ms=[10.0, 10.0, 10.0], frames_total=3, frames_with_good_estimate=3, ) assert result.pct_within_50m == pytest.approx(1.0) def test_pct_within_50m_mixed(): result = BenchmarkResult( errors_m=[10.0, 60.0, 30.0, 80.0], latencies_ms=[5.0] * 4, frames_total=4, frames_with_good_estimate=4, ) assert result.pct_within_50m == pytest.approx(0.5) def test_pct_within_20m(): result = BenchmarkResult( errors_m=[5.0, 15.0, 25.0, 50.0], latencies_ms=[5.0] * 4, frames_total=4, frames_with_good_estimate=4, ) assert result.pct_within_20m == pytest.approx(0.5) def test_p80_error_m(): """80th percentile computed correctly (numpy linear interpolation).""" errors = list(range(1, 11)) # 1..10 result = BenchmarkResult( errors_m=errors, latencies_ms=[1.0] * 10, frames_total=10, frames_with_good_estimate=10, ) expected = float(np.percentile(errors, 80)) assert result.p80_error_m == pytest.approx(expected, abs=0.01) # --------------------------------------------------------------- # AC-PERF-7: Summary string # --------------------------------------------------------------- def test_benchmark_summary_non_empty(): """AC-PERF-7: summary() returns non-empty string with key metrics.""" result = BenchmarkResult( errors_m=[5.0, 10.0, 20.0], latencies_ms=[50.0, 60.0, 55.0], frames_total=3, frames_with_good_estimate=3, ) summary = result.summary() assert len(summary) > 50 assert "PASS" in summary or "FAIL" in summary assert "50m" in summary assert "20m" in summary # --------------------------------------------------------------- # AC-PERF-3: Latency < 400ms # --------------------------------------------------------------- def test_per_frame_latency_under_400ms(): """AC-PERF-3: p95 per-frame latency < 400ms on synthetic trajectory.""" result = _run_benchmark(num_frames=20) assert result.p95_latency_ms < 400.0, ( f"p95 latency {result.p95_latency_ms:.1f}ms exceeds 400ms budget" ) # --------------------------------------------------------------- # AC-PERF-10: Accuracy with satellite corrections # --------------------------------------------------------------- def test_median_error_with_sat_corrections(): """AC-PERF-10: Median error < 30m over 30-frame flight with sat corrections.""" result = _run_benchmark(num_frames=30, with_sat=True) assert result.median_error_m < 30.0, ( f"Median error {result.median_error_m:.1f}m with sat corrections — expected <30m" ) def test_pct_within_50m_with_sat_corrections(): """AC-PERF-1: ≥80% frames within 50m when satellite corrections are active.""" result = _run_benchmark(num_frames=40, with_sat=True) assert result.pct_within_50m >= 0.80, ( f"Only {result.pct_within_50m*100:.1f}% of frames within 50m " f"(expected ≥80%) — median error: {result.median_error_m:.1f}m" ) def test_pct_within_20m_with_sat_corrections(): """AC-PERF-2: ≥60% frames within 20m when satellite corrections are active.""" result = _run_benchmark(num_frames=40, with_sat=True) assert result.pct_within_20m >= 0.60, ( f"Only {result.pct_within_20m*100:.1f}% of frames within 20m " f"(expected ≥60%) — median error: {result.median_error_m:.1f}m" ) # --------------------------------------------------------------- # AC-PERF-11: VO failures don't crash # --------------------------------------------------------------- def test_vo_failure_frames_no_crash(): """AC-PERF-11: Frames marked as VO failure are handled without crash.""" result = _run_benchmark(num_frames=20, vo_failures=[3, 7, 12]) assert result.frames_total == 20 assert len(result.errors_m) == 20 def test_all_frames_vo_failure(): """All frames fail VO — ESKF degrades gracefully (IMU-only).""" result = _run_benchmark(num_frames=10, vo_failures=list(range(10)), with_sat=False) # With no VO and no sat, errors grow but benchmark doesn't crash assert len(result.errors_m) == 10 assert all(math.isfinite(e) or e == float("inf") for e in result.errors_m) # --------------------------------------------------------------- # AC-PERF-12: Waypoint steering # --------------------------------------------------------------- def test_waypoint_steering_changes_direction(): """AC-PERF-12: Waypoint steering causes trajectory to turn toward target.""" # Waypoint 500m East, 0m North (forces eastward turn from northward heading) result = _run_benchmark( num_frames=15, waypoints=[(500.0, 0.0)], with_sat=True, ) # Benchmark runs without error; basic sanity assert result.frames_total == 15 # --------------------------------------------------------------- # AC-PERF-4: VO drift over 1 km straight segment # --------------------------------------------------------------- def test_vo_drift_under_100m_over_1km(): """AC-PERF-4: VO drift (no sat correction) over 1 km < 100 m.""" bench = AccuracyBenchmark() drift_m = bench.run_vo_drift_test(trajectory_length_m=1000.0, speed_mps=20.0) assert drift_m < 100.0, ( f"VO drift {drift_m:.1f}m over 1km — solution.md limit is 100m" ) # --------------------------------------------------------------- # AC-PERF-6: Covariance shrinks after satellite update # --------------------------------------------------------------- def test_covariance_shrinks_after_satellite_update(): """AC-PERF-6: ESKF position covariance trace decreases after satellite correction.""" from gps_denied.core.eskf import ESKF eskf = ESKF(ESKFConfig()) eskf.initialize(np.zeros(3), time.time()) cov_before = float(np.trace(eskf._P[0:3, 0:3])) # Inject satellite measurement at ground truth position eskf.update_satellite(np.zeros(3), noise_meters=10.0) cov_after = float(np.trace(eskf._P[0:3, 0:3])) assert cov_after < cov_before, ( f"Covariance trace did not shrink: before={cov_before:.2f}, after={cov_after:.2f}" ) # --------------------------------------------------------------- # AC-PERF-5: Confidence tier transitions # --------------------------------------------------------------- def test_confidence_high_after_fresh_satellite(): """AC-PERF-5: HIGH confidence when satellite correction is recent + covariance small.""" from gps_denied.core.eskf import ESKF from gps_denied.schemas.eskf import ConfidenceTier cfg = ESKFConfig(satellite_max_age=30.0, covariance_high_threshold=400.0) eskf = ESKF(cfg) eskf.initialize(np.zeros(3), time.time()) # Inject satellite correction (forces small covariance) eskf.update_satellite(np.zeros(3), noise_meters=5.0) # Manually set last satellite timestamp to now eskf._last_satellite_time = eskf._last_timestamp tier = eskf.get_confidence() assert tier == ConfidenceTier.HIGH, f"Expected HIGH after fresh sat, got {tier}" def test_confidence_medium_after_vo_only(): """AC-PERF-5: MEDIUM confidence when only VO is available (no satellite).""" from gps_denied.core.eskf import ESKF from gps_denied.schemas.eskf import ConfidenceTier eskf = ESKF(ESKFConfig()) eskf.initialize(np.zeros(3), time.time()) # Fake VO update (set _last_vo_time to now) eskf._last_vo_time = eskf._last_timestamp eskf._last_satellite_time = None tier = eskf.get_confidence() assert tier == ConfidenceTier.MEDIUM, f"Expected MEDIUM with VO only, got {tier}" def test_confidence_failed_after_3_consecutive(): """AC-PERF-5: FAILED confidence when consecutive_failures >= 3.""" from gps_denied.core.eskf import ESKF from gps_denied.schemas.eskf import ConfidenceTier eskf = ESKF() eskf.initialize(np.zeros(3), time.time()) tier = eskf.get_confidence(consecutive_failures=3) assert tier == ConfidenceTier.FAILED # --------------------------------------------------------------- # passes_acceptance_criteria integration # --------------------------------------------------------------- def test_passes_acceptance_criteria_full_pass(): """passes_acceptance_criteria returns (True, all-True) for ideal data.""" result = BenchmarkResult( errors_m=[5.0] * 100, # all within 5m → 100% within 50m and 20m latencies_ms=[10.0] * 100, # all 10ms → p95 = 10ms frames_total=100, frames_with_good_estimate=100, ) overall, checks = result.passes_acceptance_criteria() assert overall is True assert all(checks.values()) def test_passes_acceptance_criteria_latency_fail(): """passes_acceptance_criteria fails when latency exceeds 400ms.""" result = BenchmarkResult( errors_m=[5.0] * 100, latencies_ms=[500.0] * 100, # all 500ms → p95 > 400ms frames_total=100, frames_with_good_estimate=100, ) overall, checks = result.passes_acceptance_criteria() assert overall is False assert checks["AC-PERF-3: p95 latency < 400ms"] is False def test_passes_acceptance_criteria_accuracy_fail(): """passes_acceptance_criteria fails when less than 80% within 50m.""" result = BenchmarkResult( errors_m=[60.0] * 100, # all 60m → 0% within 50m latencies_ms=[5.0] * 100, frames_total=100, frames_with_good_estimate=100, ) overall, checks = result.passes_acceptance_criteria() assert overall is False assert checks["AC-PERF-1: 80% within 50m"] is False