Files
gps-denied-onboard/scripts/benchmark_accuracy.py
T
Yuzviak 78dcf7b4e7 fix: post-audit — runtime bugs, functional gaps, docs, hardening
Phase A — Runtime bugs:
  - SSE: add push_event() method to SSEEventStreamer (was missing, masked by mocks)
  - MAVLink: satellites_visible=10 (was 0, triggers ArduPilot failsafe)
  - MAVLink: horiz_accuracy=sqrt(P[0,0]+P[1,1]) per spec (was sqrt(avg))
  - MAVLink: MEDIUM confidence → fix_type=3 per solution.md (was 2)

Phase B — Functional gaps:
  - handle_user_fix() injects operator GPS into ESKF with noise=500m
  - app.py uses create_vo_backend() factory (was hardcoded SequentialVO)
  - ESKF: Mahalanobis gating on satellite updates (rejects outliers >5σ)
  - ESKF: public accessors (position, quaternion, covariance, last_timestamp)
  - Processor: no more private ESKF field access

Phase C — Documentation:
  - README: correct API endpoints, CLI command, 40+ env vars documented
  - Dockerfile: ENV prefixes match pydantic-settings (DB_, SATELLITE_, MAVLINK_)
  - tech_stack.md marked ARCHIVED (contradicts solution.md)

Phase D — Hardening:
  - JWT auth middleware (AUTH_ENABLED=false default, verify_token on /flights)
  - TLS config env vars (AUTH_SSL_CERTFILE, AUTH_SSL_KEYFILE)
  - SHA-256 tile manifest verification in SatelliteDataManager
  - AuthConfig, ESKFSettings, MAVLinkConfig, SatelliteConfig in config.py

Also: conftest.py shared fixtures, download_tiles.py, convert_to_trt.py scripts,
config wiring into app.py lifespan, config-driven ESKF, calculate_precise_angle fix.

Tests: 196 passed / 8 skipped. Ruff clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 18:27:35 +03:00

207 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""Accuracy Validation CLI (Phase 7).
Runs the AccuracyBenchmark against synthetic flight trajectories and
prints results against solution.md acceptance criteria.
Usage:
python scripts/benchmark_accuracy.py
python scripts/benchmark_accuracy.py --frames 100 --scenario all
python scripts/benchmark_accuracy.py --scenario vo_drift
python scripts/benchmark_accuracy.py --scenario sat_corrections --frames 50
Scenarios:
sat_corrections — Full flight with IMU + VO + satellite corrections.
Validates AC-PERF-1, AC-PERF-2, AC-PERF-3.
vo_only — No satellite corrections; shows VO-only accuracy.
vo_drift — 1 km straight flight with no satellite corrections.
Validates AC-PERF-4 (drift < 100m).
tracking_loss — VO failures injected at random frames.
all — Run all scenarios (default).
"""
from __future__ import annotations
import argparse
# Ensure src/ is on the path when running from the repo root
import os
import sys
import time
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from gps_denied.core.benchmark import AccuracyBenchmark, SyntheticTrajectory, SyntheticTrajectoryConfig
from gps_denied.schemas import GPSPoint
ORIGIN = GPSPoint(lat=49.0, lon=32.0)
_SEP = "" * 60
def _header(title: str) -> None:
print(f"\n{_SEP}")
print(f" {title}")
print(_SEP)
def run_sat_corrections(num_frames: int = 50) -> bool:
"""Scenario: full flight with satellite corrections (AC-PERF-1/2/3)."""
_header(f"Scenario: satellite corrections ({num_frames} frames)")
cfg = SyntheticTrajectoryConfig(
origin=ORIGIN,
num_frames=num_frames,
speed_mps=20.0,
heading_deg=45.0,
imu_hz=100.0,
vo_noise_m=0.3,
)
frames = SyntheticTrajectory(cfg).generate()
bench = AccuracyBenchmark()
result = bench.run(frames, ORIGIN, satellite_keyframe_interval=5)
print(result.summary())
overall, _ = result.passes_acceptance_criteria()
return overall
def run_vo_only(num_frames: int = 50) -> bool:
"""Scenario: VO only, no satellite corrections."""
_header(f"Scenario: VO only (no satellite, {num_frames} frames)")
cfg = SyntheticTrajectoryConfig(
origin=ORIGIN,
num_frames=num_frames,
speed_mps=20.0,
imu_hz=100.0,
vo_noise_m=0.3,
)
frames = SyntheticTrajectory(cfg).generate()
bench = AccuracyBenchmark(sat_correction_fn=lambda _: None)
result = bench.run(frames, ORIGIN, satellite_keyframe_interval=9999)
print(result.summary())
# VO-only is expected to fail AC-PERF-1/2; show stats without hard fail
return True # informational only
def run_vo_drift() -> bool:
"""Scenario: 1 km straight flight, no satellite (AC-PERF-4)."""
_header("Scenario: VO drift over 1 km straight (AC-PERF-4)")
t0 = time.perf_counter()
bench = AccuracyBenchmark()
drift_m = bench.run_vo_drift_test(trajectory_length_m=1000.0, speed_mps=20.0)
elapsed = time.perf_counter() - t0
limit = 100.0
passed = drift_m < limit
status = "PASS" if passed else "FAIL"
print(f" Final drift: {drift_m:.1f} m (limit: {limit:.0f} m)")
print(f" Run time: {elapsed*1000:.0f} ms")
print(f"\n {status} AC-PERF-4: VO drift over 1 km < {limit:.0f} m")
return passed
def run_tracking_loss(num_frames: int = 40) -> bool:
"""Scenario: Random VO failure frames injected."""
_header(f"Scenario: VO failures ({num_frames} frames, 25% failure rate)")
failure_frames = list(range(2, num_frames, 4)) # every 4th frame
cfg = SyntheticTrajectoryConfig(
origin=ORIGIN,
num_frames=num_frames,
speed_mps=20.0,
imu_hz=100.0,
vo_noise_m=0.3,
vo_failure_frames=failure_frames,
)
frames = SyntheticTrajectory(cfg).generate()
bench = AccuracyBenchmark()
result = bench.run(frames, ORIGIN, satellite_keyframe_interval=5)
print(result.summary())
print(f"\n VO failure frames injected: {len(failure_frames)}/{num_frames}")
overall, _ = result.passes_acceptance_criteria()
return overall
def run_waypoint_mission(num_frames: int = 60) -> bool:
"""Scenario: Multi-waypoint mission with direction changes."""
_header(f"Scenario: Waypoint mission ({num_frames} frames)")
cfg = SyntheticTrajectoryConfig(
origin=ORIGIN,
num_frames=num_frames,
speed_mps=20.0,
heading_deg=0.0,
imu_hz=100.0,
vo_noise_m=0.3,
waypoints_enu=[
(500.0, 500.0), # NE leg
(0.0, 1000.0), # N leg
(-500.0, 500.0), # NW leg
],
)
frames = SyntheticTrajectory(cfg).generate()
bench = AccuracyBenchmark()
result = bench.run(frames, ORIGIN, satellite_keyframe_interval=5)
print(result.summary())
overall, _ = result.passes_acceptance_criteria()
return overall
def main() -> int:
parser = argparse.ArgumentParser(
description="GPS-Denied accuracy validation benchmark",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"--scenario",
choices=["sat_corrections", "vo_only", "vo_drift", "tracking_loss",
"waypoint", "all"],
default="all",
)
parser.add_argument("--frames", type=int, default=50,
help="Number of camera frames per scenario")
args = parser.parse_args()
print("\nGPS-Denied Onboard — Accuracy Validation Benchmark")
print(f"Origin: lat={ORIGIN.lat}° lon={ORIGIN.lon}° | Frames: {args.frames}")
results: list[tuple[str, bool]] = []
if args.scenario in ("sat_corrections", "all"):
ok = run_sat_corrections(args.frames)
results.append(("sat_corrections", ok))
if args.scenario in ("vo_only", "all"):
ok = run_vo_only(args.frames)
results.append(("vo_only", ok))
if args.scenario in ("vo_drift", "all"):
ok = run_vo_drift()
results.append(("vo_drift", ok))
if args.scenario in ("tracking_loss", "all"):
ok = run_tracking_loss(args.frames)
results.append(("tracking_loss", ok))
if args.scenario in ("waypoint", "all"):
ok = run_waypoint_mission(args.frames)
results.append(("waypoint", ok))
# Final summary
print(f"\n{_SEP}")
print(" BENCHMARK SUMMARY")
print(_SEP)
all_pass = True
for name, ok in results:
status = "PASS" if ok else "FAIL"
print(f" {status} {name}")
if not ok:
all_pass = False
print(_SEP)
print(f" Overall: {'PASS' if all_pass else 'FAIL'}")
print(_SEP)
return 0 if all_pass else 1
if __name__ == "__main__":
sys.exit(main())