gps-denied-onboard/tests/test_accuracy.py

"""Accuracy Validation Tests (Phase 7).

Verifies all solution.md acceptance criteria against synthetic trajectories.

AC-PERF-1: 80 % of frames within 50 m.
AC-PERF-2: 60 % of frames within 20 m.
AC-PERF-3: p95 per-frame latency < 400 ms.
AC-PERF-4: VO drift over 1 km straight segment (no sat correction) < 100 m.
AC-PERF-5: ESKF confidence tier transitions correctly with satellite age.
AC-PERF-6: ESKF covariance shrinks after satellite correction.
AC-PERF-7: Benchmark result summary is non-empty string.
AC-PERF-8: Synthetic trajectory length matches requested frame count.
AC-PERF-9: BenchmarkResult.pct_within_50m / pct_within_20m computed correctly.
AC-PERF-10: 30-frame straight flight — median error < 30 m with sat corrections.
AC-PERF-11: VO failure frames do not crash benchmark.
AC-PERF-12: Waypoint steering changes direction correctly.
"""

import math
import time

import numpy as np
import pytest

from gps_denied.core.benchmark import (
    AccuracyBenchmark,
    BenchmarkResult,
    SyntheticTrajectory,
    SyntheticTrajectoryConfig,
)
from gps_denied.schemas import GPSPoint
from gps_denied.schemas.eskf import ESKFConfig

ORIGIN = GPSPoint(lat=49.0, lon=32.0)


# ---------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------

def _run_benchmark(
    num_frames: int = 30,
    vo_failures: list[int] | None = None,
    with_sat: bool = True,
    waypoints: list[tuple[float, float]] | None = None,
) -> BenchmarkResult:
    """Build and replay a synthetic trajectory, return BenchmarkResult."""
    cfg = SyntheticTrajectoryConfig(
        origin=ORIGIN,
        speed_mps=20.0,
        heading_deg=0.0,
        num_frames=num_frames,
        vo_noise_m=0.3,
        imu_hz=50.0,   # reduced rate for test speed
        camera_fps=0.7,
        vo_failure_frames=vo_failures or [],
        waypoints_enu=waypoints or [],
    )
    gen = SyntheticTrajectory(cfg)
    frames = gen.generate()

    sat_fn = None if with_sat else (lambda _: None)
    bench = AccuracyBenchmark(sat_correction_fn=sat_fn)
    return bench.run(frames, ORIGIN, satellite_keyframe_interval=5)


# ---------------------------------------------------------------
# AC-PERF-8: Trajectory length
# ---------------------------------------------------------------

def test_trajectory_frame_count():
    """AC-PERF-8: Generated trajectory has exactly num_frames frames."""
    for n in [10, 30, 50]:
        cfg = SyntheticTrajectoryConfig(num_frames=n, imu_hz=10.0)
        frames = SyntheticTrajectory(cfg).generate()
        assert len(frames) == n


def test_trajectory_frame_ids_sequential():
    """Frame IDs are 0..N-1."""
    cfg = SyntheticTrajectoryConfig(num_frames=10, imu_hz=10.0)
    frames = SyntheticTrajectory(cfg).generate()
    assert [f.frame_id for f in frames] == list(range(10))


def test_trajectory_positions_increase_northward():
    """Heading=0° (North) → North component strictly increasing."""
    cfg = SyntheticTrajectoryConfig(num_frames=5, heading_deg=0.0, speed_mps=20.0, imu_hz=10.0)
    frames = SyntheticTrajectory(cfg).generate()
    norths = [f.true_position_enu[1] for f in frames]
    for a, b in zip(norths, norths[1:]):
        assert b > a, "North component should increase for heading=0°"


# ---------------------------------------------------------------
# AC-PERF-9: BenchmarkResult percentage helpers
# ---------------------------------------------------------------

def test_pct_within_50m_all_inside():
    result = BenchmarkResult(
        errors_m=[10.0, 20.0, 49.9],
        latencies_ms=[10.0, 10.0, 10.0],
        frames_total=3,
        frames_with_good_estimate=3,
    )
    assert result.pct_within_50m == pytest.approx(1.0)


def test_pct_within_50m_mixed():
    result = BenchmarkResult(
        errors_m=[10.0, 60.0, 30.0, 80.0],
        latencies_ms=[5.0] * 4,
        frames_total=4,
        frames_with_good_estimate=4,
    )
    assert result.pct_within_50m == pytest.approx(0.5)


def test_pct_within_20m():
    result = BenchmarkResult(
        errors_m=[5.0, 15.0, 25.0, 50.0],
        latencies_ms=[5.0] * 4,
        frames_total=4,
        frames_with_good_estimate=4,
    )
    assert result.pct_within_20m == pytest.approx(0.5)


def test_p80_error_m():
    """80th percentile computed correctly (numpy linear interpolation)."""
    errors = list(range(1, 11))  # 1..10
    result = BenchmarkResult(
        errors_m=errors, latencies_ms=[1.0] * 10,
        frames_total=10, frames_with_good_estimate=10,
    )
    expected = float(np.percentile(errors, 80))
    assert result.p80_error_m == pytest.approx(expected, abs=0.01)


# ---------------------------------------------------------------
# AC-PERF-7: Summary string
# ---------------------------------------------------------------

def test_benchmark_summary_non_empty():
    """AC-PERF-7: summary() returns non-empty string with key metrics."""
    result = BenchmarkResult(
        errors_m=[5.0, 10.0, 20.0],
        latencies_ms=[50.0, 60.0, 55.0],
        frames_total=3,
        frames_with_good_estimate=3,
    )
    summary = result.summary()
    assert len(summary) > 50
    assert "PASS" in summary or "FAIL" in summary
    assert "50m" in summary
    assert "20m" in summary


# ---------------------------------------------------------------
# AC-PERF-3: Latency < 400ms
# ---------------------------------------------------------------

def test_per_frame_latency_under_400ms():
    """AC-PERF-3: p95 per-frame latency < 400ms on synthetic trajectory."""
    result = _run_benchmark(num_frames=20)
    assert result.p95_latency_ms < 400.0, (
        f"p95 latency {result.p95_latency_ms:.1f}ms exceeds 400ms budget"
    )


# ---------------------------------------------------------------
# AC-PERF-10: Accuracy with satellite corrections
# ---------------------------------------------------------------

def test_median_error_with_sat_corrections():
    """AC-PERF-10: Median error < 30m over 30-frame flight with sat corrections."""
    result = _run_benchmark(num_frames=30, with_sat=True)
    assert result.median_error_m < 30.0, (
        f"Median error {result.median_error_m:.1f}m with sat corrections — expected <30m"
    )


def test_pct_within_50m_with_sat_corrections():
    """AC-PERF-1: ≥80% frames within 50m when satellite corrections are active."""
    result = _run_benchmark(num_frames=40, with_sat=True)
    assert result.pct_within_50m >= 0.80, (
        f"Only {result.pct_within_50m*100:.1f}% of frames within 50m "
        f"(expected ≥80%) — median error: {result.median_error_m:.1f}m"
    )


def test_pct_within_20m_with_sat_corrections():
    """AC-PERF-2: ≥60% frames within 20m when satellite corrections are active."""
    result = _run_benchmark(num_frames=40, with_sat=True)
    assert result.pct_within_20m >= 0.60, (
        f"Only {result.pct_within_20m*100:.1f}% of frames within 20m "
        f"(expected ≥60%) — median error: {result.median_error_m:.1f}m"
    )


# ---------------------------------------------------------------
# AC-PERF-11: VO failures don't crash
# ---------------------------------------------------------------

def test_vo_failure_frames_no_crash():
    """AC-PERF-11: Frames marked as VO failure are handled without crash."""
    result = _run_benchmark(num_frames=20, vo_failures=[3, 7, 12])
    assert result.frames_total == 20
    assert len(result.errors_m) == 20


def test_all_frames_vo_failure():
    """All frames fail VO — ESKF degrades gracefully (IMU-only)."""
    result = _run_benchmark(num_frames=10, vo_failures=list(range(10)), with_sat=False)
    # With no VO and no sat, errors grow but benchmark doesn't crash
    assert len(result.errors_m) == 10
    assert all(math.isfinite(e) or e == float("inf") for e in result.errors_m)


# ---------------------------------------------------------------
# AC-PERF-12: Waypoint steering
# ---------------------------------------------------------------

def test_waypoint_steering_changes_direction():
    """AC-PERF-12: Waypoint steering causes trajectory to turn toward target."""
    # Waypoint 500m East, 0m North (forces eastward turn from northward heading)
    result = _run_benchmark(
        num_frames=15,
        waypoints=[(500.0, 0.0)],
        with_sat=True,
    )
    # Benchmark runs without error; basic sanity
    assert result.frames_total == 15


# ---------------------------------------------------------------
# AC-PERF-4: VO drift over 1 km straight segment
# ---------------------------------------------------------------

def test_vo_drift_under_100m_over_1km():
    """AC-PERF-4: VO drift (no sat correction) over 1 km < 100 m."""
    bench = AccuracyBenchmark()
    drift_m = bench.run_vo_drift_test(trajectory_length_m=1000.0, speed_mps=20.0)
    assert drift_m < 100.0, (
        f"VO drift {drift_m:.1f}m over 1km — solution.md limit is 100m"
    )


# ---------------------------------------------------------------
# AC-PERF-6: Covariance shrinks after satellite update
# ---------------------------------------------------------------

def test_covariance_shrinks_after_satellite_update():
    """AC-PERF-6: ESKF position covariance trace decreases after satellite correction."""
    from gps_denied.core.eskf import ESKF

    eskf = ESKF(ESKFConfig())
    eskf.initialize(np.zeros(3), time.time())

    cov_before = float(np.trace(eskf._P[0:3, 0:3]))

    # Inject satellite measurement at ground truth position
    eskf.update_satellite(np.zeros(3), noise_meters=10.0)

    cov_after = float(np.trace(eskf._P[0:3, 0:3]))
    assert cov_after < cov_before, (
        f"Covariance trace did not shrink: before={cov_before:.2f}, after={cov_after:.2f}"
    )


# ---------------------------------------------------------------
# AC-PERF-5: Confidence tier transitions
# ---------------------------------------------------------------

def test_confidence_high_after_fresh_satellite():
    """AC-PERF-5: HIGH confidence when satellite correction is recent + covariance small."""
    from gps_denied.core.eskf import ESKF
    from gps_denied.schemas.eskf import ConfidenceTier

    cfg = ESKFConfig(satellite_max_age=30.0, covariance_high_threshold=400.0)
    eskf = ESKF(cfg)
    eskf.initialize(np.zeros(3), time.time())

    # Inject satellite correction (forces small covariance)
    eskf.update_satellite(np.zeros(3), noise_meters=5.0)
    # Manually set last satellite timestamp to now
    eskf._last_satellite_time = eskf._last_timestamp

    tier = eskf.get_confidence()
    assert tier == ConfidenceTier.HIGH, f"Expected HIGH after fresh sat, got {tier}"


def test_confidence_medium_after_vo_only():
    """AC-PERF-5: MEDIUM confidence when only VO is available (no satellite)."""
    from gps_denied.core.eskf import ESKF
    from gps_denied.schemas.eskf import ConfidenceTier

    eskf = ESKF(ESKFConfig())
    eskf.initialize(np.zeros(3), time.time())

    # Fake VO update (set _last_vo_time to now)
    eskf._last_vo_time = eskf._last_timestamp
    eskf._last_satellite_time = None

    tier = eskf.get_confidence()
    assert tier == ConfidenceTier.MEDIUM, f"Expected MEDIUM with VO only, got {tier}"


def test_confidence_failed_after_3_consecutive():
    """AC-PERF-5: FAILED confidence when consecutive_failures >= 3."""
    from gps_denied.core.eskf import ESKF
    from gps_denied.schemas.eskf import ConfidenceTier

    eskf = ESKF()
    eskf.initialize(np.zeros(3), time.time())
    tier = eskf.get_confidence(consecutive_failures=3)
    assert tier == ConfidenceTier.FAILED


# ---------------------------------------------------------------
# passes_acceptance_criteria integration
# ---------------------------------------------------------------

def test_passes_acceptance_criteria_full_pass():
    """passes_acceptance_criteria returns (True, all-True) for ideal data."""
    result = BenchmarkResult(
        errors_m=[5.0] * 100,         # all within 5m → 100% within 50m and 20m
        latencies_ms=[10.0] * 100,    # all 10ms → p95 = 10ms
        frames_total=100,
        frames_with_good_estimate=100,
    )
    overall, checks = result.passes_acceptance_criteria()
    assert overall is True
    assert all(checks.values())


def test_passes_acceptance_criteria_latency_fail():
    """passes_acceptance_criteria fails when latency exceeds 400ms."""
    result = BenchmarkResult(
        errors_m=[5.0] * 100,
        latencies_ms=[500.0] * 100,   # all 500ms → p95 > 400ms
        frames_total=100,
        frames_with_good_estimate=100,
    )
    overall, checks = result.passes_acceptance_criteria()
    assert overall is False
    assert checks["AC-PERF-3: p95 latency < 400ms"] is False


def test_passes_acceptance_criteria_accuracy_fail():
    """passes_acceptance_criteria fails when less than 80% within 50m."""
    result = BenchmarkResult(
        errors_m=[60.0] * 100,        # all 60m → 0% within 50m
        latencies_ms=[5.0] * 100,
        frames_total=100,
        frames_with_good_estimate=100,
    )
    overall, checks = result.passes_acceptance_criteria()
    assert overall is False
    assert checks["AC-PERF-1: 80% within 50m"] is False