mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-04-22 22:16:37 +00:00
dd9835c0cd
- ruff --fix: removed trailing whitespace (W293), sorted imports (I001) - Manual: broke long lines (E501) in eskf, rotation, vo, gpr, metric, pipeline, rotation tests - Removed unused imports (F401) in models.py, schemas/__init__.py - pyproject.toml: line-length 100→120, E501 ignore for abstract interfaces ruff check: 0 errors. pytest: 195 passed / 8 skipped. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
361 lines
13 KiB
Python
361 lines
13 KiB
Python
"""Accuracy Validation Tests (Phase 7).
|
|
|
|
Verifies all solution.md acceptance criteria against synthetic trajectories.
|
|
|
|
AC-PERF-1: 80 % of frames within 50 m.
|
|
AC-PERF-2: 60 % of frames within 20 m.
|
|
AC-PERF-3: p95 per-frame latency < 400 ms.
|
|
AC-PERF-4: VO drift over 1 km straight segment (no sat correction) < 100 m.
|
|
AC-PERF-5: ESKF confidence tier transitions correctly with satellite age.
|
|
AC-PERF-6: ESKF covariance shrinks after satellite correction.
|
|
AC-PERF-7: Benchmark result summary is non-empty string.
|
|
AC-PERF-8: Synthetic trajectory length matches requested frame count.
|
|
AC-PERF-9: BenchmarkResult.pct_within_50m / pct_within_20m computed correctly.
|
|
AC-PERF-10: 30-frame straight flight — median error < 30 m with sat corrections.
|
|
AC-PERF-11: VO failure frames do not crash benchmark.
|
|
AC-PERF-12: Waypoint steering changes direction correctly.
|
|
"""
|
|
|
|
import math
|
|
import time
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from gps_denied.core.benchmark import (
|
|
AccuracyBenchmark,
|
|
BenchmarkResult,
|
|
SyntheticTrajectory,
|
|
SyntheticTrajectoryConfig,
|
|
)
|
|
from gps_denied.schemas import GPSPoint
|
|
from gps_denied.schemas.eskf import ESKFConfig
|
|
|
|
ORIGIN = GPSPoint(lat=49.0, lon=32.0)
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------
|
|
|
|
def _run_benchmark(
|
|
num_frames: int = 30,
|
|
vo_failures: list[int] | None = None,
|
|
with_sat: bool = True,
|
|
waypoints: list[tuple[float, float]] | None = None,
|
|
) -> BenchmarkResult:
|
|
"""Build and replay a synthetic trajectory, return BenchmarkResult."""
|
|
cfg = SyntheticTrajectoryConfig(
|
|
origin=ORIGIN,
|
|
speed_mps=20.0,
|
|
heading_deg=0.0,
|
|
num_frames=num_frames,
|
|
vo_noise_m=0.3,
|
|
imu_hz=50.0, # reduced rate for test speed
|
|
camera_fps=0.7,
|
|
vo_failure_frames=vo_failures or [],
|
|
waypoints_enu=waypoints or [],
|
|
)
|
|
gen = SyntheticTrajectory(cfg)
|
|
frames = gen.generate()
|
|
|
|
sat_fn = None if with_sat else (lambda _: None)
|
|
bench = AccuracyBenchmark(sat_correction_fn=sat_fn)
|
|
return bench.run(frames, ORIGIN, satellite_keyframe_interval=5)
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-8: Trajectory length
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_trajectory_frame_count():
|
|
"""AC-PERF-8: Generated trajectory has exactly num_frames frames."""
|
|
for n in [10, 30, 50]:
|
|
cfg = SyntheticTrajectoryConfig(num_frames=n, imu_hz=10.0)
|
|
frames = SyntheticTrajectory(cfg).generate()
|
|
assert len(frames) == n
|
|
|
|
|
|
def test_trajectory_frame_ids_sequential():
|
|
"""Frame IDs are 0..N-1."""
|
|
cfg = SyntheticTrajectoryConfig(num_frames=10, imu_hz=10.0)
|
|
frames = SyntheticTrajectory(cfg).generate()
|
|
assert [f.frame_id for f in frames] == list(range(10))
|
|
|
|
|
|
def test_trajectory_positions_increase_northward():
|
|
"""Heading=0° (North) → North component strictly increasing."""
|
|
cfg = SyntheticTrajectoryConfig(num_frames=5, heading_deg=0.0, speed_mps=20.0, imu_hz=10.0)
|
|
frames = SyntheticTrajectory(cfg).generate()
|
|
norths = [f.true_position_enu[1] for f in frames]
|
|
for a, b in zip(norths, norths[1:]):
|
|
assert b > a, "North component should increase for heading=0°"
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-9: BenchmarkResult percentage helpers
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_pct_within_50m_all_inside():
|
|
result = BenchmarkResult(
|
|
errors_m=[10.0, 20.0, 49.9],
|
|
latencies_ms=[10.0, 10.0, 10.0],
|
|
frames_total=3,
|
|
frames_with_good_estimate=3,
|
|
)
|
|
assert result.pct_within_50m == pytest.approx(1.0)
|
|
|
|
|
|
def test_pct_within_50m_mixed():
|
|
result = BenchmarkResult(
|
|
errors_m=[10.0, 60.0, 30.0, 80.0],
|
|
latencies_ms=[5.0] * 4,
|
|
frames_total=4,
|
|
frames_with_good_estimate=4,
|
|
)
|
|
assert result.pct_within_50m == pytest.approx(0.5)
|
|
|
|
|
|
def test_pct_within_20m():
|
|
result = BenchmarkResult(
|
|
errors_m=[5.0, 15.0, 25.0, 50.0],
|
|
latencies_ms=[5.0] * 4,
|
|
frames_total=4,
|
|
frames_with_good_estimate=4,
|
|
)
|
|
assert result.pct_within_20m == pytest.approx(0.5)
|
|
|
|
|
|
def test_p80_error_m():
|
|
"""80th percentile computed correctly (numpy linear interpolation)."""
|
|
errors = list(range(1, 11)) # 1..10
|
|
result = BenchmarkResult(
|
|
errors_m=errors, latencies_ms=[1.0] * 10,
|
|
frames_total=10, frames_with_good_estimate=10,
|
|
)
|
|
expected = float(np.percentile(errors, 80))
|
|
assert result.p80_error_m == pytest.approx(expected, abs=0.01)
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-7: Summary string
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_benchmark_summary_non_empty():
|
|
"""AC-PERF-7: summary() returns non-empty string with key metrics."""
|
|
result = BenchmarkResult(
|
|
errors_m=[5.0, 10.0, 20.0],
|
|
latencies_ms=[50.0, 60.0, 55.0],
|
|
frames_total=3,
|
|
frames_with_good_estimate=3,
|
|
)
|
|
summary = result.summary()
|
|
assert len(summary) > 50
|
|
assert "PASS" in summary or "FAIL" in summary
|
|
assert "50m" in summary
|
|
assert "20m" in summary
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-3: Latency < 400ms
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_per_frame_latency_under_400ms():
|
|
"""AC-PERF-3: p95 per-frame latency < 400ms on synthetic trajectory."""
|
|
result = _run_benchmark(num_frames=20)
|
|
assert result.p95_latency_ms < 400.0, (
|
|
f"p95 latency {result.p95_latency_ms:.1f}ms exceeds 400ms budget"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-10: Accuracy with satellite corrections
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_median_error_with_sat_corrections():
|
|
"""AC-PERF-10: Median error < 30m over 30-frame flight with sat corrections."""
|
|
result = _run_benchmark(num_frames=30, with_sat=True)
|
|
assert result.median_error_m < 30.0, (
|
|
f"Median error {result.median_error_m:.1f}m with sat corrections — expected <30m"
|
|
)
|
|
|
|
|
|
def test_pct_within_50m_with_sat_corrections():
|
|
"""AC-PERF-1: ≥80% frames within 50m when satellite corrections are active."""
|
|
result = _run_benchmark(num_frames=40, with_sat=True)
|
|
assert result.pct_within_50m >= 0.80, (
|
|
f"Only {result.pct_within_50m*100:.1f}% of frames within 50m "
|
|
f"(expected ≥80%) — median error: {result.median_error_m:.1f}m"
|
|
)
|
|
|
|
|
|
def test_pct_within_20m_with_sat_corrections():
|
|
"""AC-PERF-2: ≥60% frames within 20m when satellite corrections are active."""
|
|
result = _run_benchmark(num_frames=40, with_sat=True)
|
|
assert result.pct_within_20m >= 0.60, (
|
|
f"Only {result.pct_within_20m*100:.1f}% of frames within 20m "
|
|
f"(expected ≥60%) — median error: {result.median_error_m:.1f}m"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-11: VO failures don't crash
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_vo_failure_frames_no_crash():
|
|
"""AC-PERF-11: Frames marked as VO failure are handled without crash."""
|
|
result = _run_benchmark(num_frames=20, vo_failures=[3, 7, 12])
|
|
assert result.frames_total == 20
|
|
assert len(result.errors_m) == 20
|
|
|
|
|
|
def test_all_frames_vo_failure():
|
|
"""All frames fail VO — ESKF degrades gracefully (IMU-only)."""
|
|
result = _run_benchmark(num_frames=10, vo_failures=list(range(10)), with_sat=False)
|
|
# With no VO and no sat, errors grow but benchmark doesn't crash
|
|
assert len(result.errors_m) == 10
|
|
assert all(math.isfinite(e) or e == float("inf") for e in result.errors_m)
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-12: Waypoint steering
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_waypoint_steering_changes_direction():
|
|
"""AC-PERF-12: Waypoint steering causes trajectory to turn toward target."""
|
|
# Waypoint 500m East, 0m North (forces eastward turn from northward heading)
|
|
result = _run_benchmark(
|
|
num_frames=15,
|
|
waypoints=[(500.0, 0.0)],
|
|
with_sat=True,
|
|
)
|
|
# Benchmark runs without error; basic sanity
|
|
assert result.frames_total == 15
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-4: VO drift over 1 km straight segment
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_vo_drift_under_100m_over_1km():
|
|
"""AC-PERF-4: VO drift (no sat correction) over 1 km < 100 m."""
|
|
bench = AccuracyBenchmark()
|
|
drift_m = bench.run_vo_drift_test(trajectory_length_m=1000.0, speed_mps=20.0)
|
|
assert drift_m < 100.0, (
|
|
f"VO drift {drift_m:.1f}m over 1km — solution.md limit is 100m"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-6: Covariance shrinks after satellite update
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_covariance_shrinks_after_satellite_update():
|
|
"""AC-PERF-6: ESKF position covariance trace decreases after satellite correction."""
|
|
from gps_denied.core.eskf import ESKF
|
|
|
|
eskf = ESKF(ESKFConfig())
|
|
eskf.initialize(np.zeros(3), time.time())
|
|
|
|
cov_before = float(np.trace(eskf._P[0:3, 0:3]))
|
|
|
|
# Inject satellite measurement at ground truth position
|
|
eskf.update_satellite(np.zeros(3), noise_meters=10.0)
|
|
|
|
cov_after = float(np.trace(eskf._P[0:3, 0:3]))
|
|
assert cov_after < cov_before, (
|
|
f"Covariance trace did not shrink: before={cov_before:.2f}, after={cov_after:.2f}"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# AC-PERF-5: Confidence tier transitions
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_confidence_high_after_fresh_satellite():
|
|
"""AC-PERF-5: HIGH confidence when satellite correction is recent + covariance small."""
|
|
from gps_denied.core.eskf import ESKF
|
|
from gps_denied.schemas.eskf import ConfidenceTier
|
|
|
|
cfg = ESKFConfig(satellite_max_age=30.0, covariance_high_threshold=400.0)
|
|
eskf = ESKF(cfg)
|
|
eskf.initialize(np.zeros(3), time.time())
|
|
|
|
# Inject satellite correction (forces small covariance)
|
|
eskf.update_satellite(np.zeros(3), noise_meters=5.0)
|
|
# Manually set last satellite timestamp to now
|
|
eskf._last_satellite_time = eskf._last_timestamp
|
|
|
|
tier = eskf.get_confidence()
|
|
assert tier == ConfidenceTier.HIGH, f"Expected HIGH after fresh sat, got {tier}"
|
|
|
|
|
|
def test_confidence_medium_after_vo_only():
|
|
"""AC-PERF-5: MEDIUM confidence when only VO is available (no satellite)."""
|
|
from gps_denied.core.eskf import ESKF
|
|
from gps_denied.schemas.eskf import ConfidenceTier
|
|
|
|
eskf = ESKF(ESKFConfig())
|
|
eskf.initialize(np.zeros(3), time.time())
|
|
|
|
# Fake VO update (set _last_vo_time to now)
|
|
eskf._last_vo_time = eskf._last_timestamp
|
|
eskf._last_satellite_time = None
|
|
|
|
tier = eskf.get_confidence()
|
|
assert tier == ConfidenceTier.MEDIUM, f"Expected MEDIUM with VO only, got {tier}"
|
|
|
|
|
|
def test_confidence_failed_after_3_consecutive():
|
|
"""AC-PERF-5: FAILED confidence when consecutive_failures >= 3."""
|
|
from gps_denied.core.eskf import ESKF
|
|
from gps_denied.schemas.eskf import ConfidenceTier
|
|
|
|
eskf = ESKF()
|
|
eskf.initialize(np.zeros(3), time.time())
|
|
tier = eskf.get_confidence(consecutive_failures=3)
|
|
assert tier == ConfidenceTier.FAILED
|
|
|
|
|
|
# ---------------------------------------------------------------
|
|
# passes_acceptance_criteria integration
|
|
# ---------------------------------------------------------------
|
|
|
|
def test_passes_acceptance_criteria_full_pass():
|
|
"""passes_acceptance_criteria returns (True, all-True) for ideal data."""
|
|
result = BenchmarkResult(
|
|
errors_m=[5.0] * 100, # all within 5m → 100% within 50m and 20m
|
|
latencies_ms=[10.0] * 100, # all 10ms → p95 = 10ms
|
|
frames_total=100,
|
|
frames_with_good_estimate=100,
|
|
)
|
|
overall, checks = result.passes_acceptance_criteria()
|
|
assert overall is True
|
|
assert all(checks.values())
|
|
|
|
|
|
def test_passes_acceptance_criteria_latency_fail():
|
|
"""passes_acceptance_criteria fails when latency exceeds 400ms."""
|
|
result = BenchmarkResult(
|
|
errors_m=[5.0] * 100,
|
|
latencies_ms=[500.0] * 100, # all 500ms → p95 > 400ms
|
|
frames_total=100,
|
|
frames_with_good_estimate=100,
|
|
)
|
|
overall, checks = result.passes_acceptance_criteria()
|
|
assert overall is False
|
|
assert checks["AC-PERF-3: p95 latency < 400ms"] is False
|
|
|
|
|
|
def test_passes_acceptance_criteria_accuracy_fail():
|
|
"""passes_acceptance_criteria fails when less than 80% within 50m."""
|
|
result = BenchmarkResult(
|
|
errors_m=[60.0] * 100, # all 60m → 0% within 50m
|
|
latencies_ms=[5.0] * 100,
|
|
frames_total=100,
|
|
frames_with_good_estimate=100,
|
|
)
|
|
overall, checks = result.passes_acceptance_criteria()
|
|
assert overall is False
|
|
assert checks["AC-PERF-1: 80% within 50m"] is False
|