Files
gps-denied-onboard/scripts/benchmark_accuracy.py
T
Yuzviak 094895b21b feat(phases 2-7): implement full GPS-denied navigation pipeline
Phase 2 — Visual Odometry:
  - ORBVisualOdometry (dev/CI), CuVSLAMVisualOdometry (Jetson)
  - TRTInferenceEngine (TensorRT FP16, conditional import)
  - create_vo_backend() factory

Phase 3 — Satellite Matching + GPR:
  - SatelliteDataManager: local z/x/y tiles, ESKF ±3σ tile selection
  - GSD normalization (SAT-03), RANSAC inlier-ratio confidence (SAT-04)
  - GlobalPlaceRecognition: Faiss index + numpy fallback

Phase 4 — MAVLink I/O:
  - MAVLinkBridge: GPS_INPUT 15+ fields, IMU callback, 1Hz telemetry
  - 3-consecutive-failure reloc request
  - MockMAVConnection for CI

Phase 5 — Pipeline Wiring:
  - ESKF wired into process_frame: VO update → satellite update
  - CoordinateTransformer + SatelliteDataManager via DI
  - MAVLink state push per frame (PIPE-07)
  - Real pixel_to_gps via ray-ground projection (PIPE-06)
  - GTSAM ISAM2 update when available (PIPE-03)

Phase 6 — Docker + CI:
  - Multi-stage Dockerfile (python:3.11-slim)
  - docker-compose.yml (dev), docker-compose.sitl.yml (ArduPilot SITL)
  - GitHub Actions: ci.yml (lint+pytest+docker smoke), sitl.yml (nightly)
  - tests/test_sitl_integration.py (8 tests, skip without SITL)

Phase 7 — Accuracy Validation:
  - AccuracyBenchmark + SyntheticTrajectory
  - AC-PERF-1: 80% within 50m 
  - AC-PERF-2: 60% within 20m 
  - AC-PERF-3: p95 latency < 400ms 
  - AC-PERF-4: VO drift 1km < 100m  (actual ~11m)
  - scripts/benchmark_accuracy.py CLI

Tests: 195 passed / 8 skipped

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-02 17:00:41 +03:00

209 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""Accuracy Validation CLI (Phase 7).
Runs the AccuracyBenchmark against synthetic flight trajectories and
prints results against solution.md acceptance criteria.
Usage:
python scripts/benchmark_accuracy.py
python scripts/benchmark_accuracy.py --frames 100 --scenario all
python scripts/benchmark_accuracy.py --scenario vo_drift
python scripts/benchmark_accuracy.py --scenario sat_corrections --frames 50
Scenarios:
sat_corrections — Full flight with IMU + VO + satellite corrections.
Validates AC-PERF-1, AC-PERF-2, AC-PERF-3.
vo_only — No satellite corrections; shows VO-only accuracy.
vo_drift — 1 km straight flight with no satellite corrections.
Validates AC-PERF-4 (drift < 100m).
tracking_loss — VO failures injected at random frames.
all — Run all scenarios (default).
"""
from __future__ import annotations
import argparse
import sys
import time
import numpy as np
# Ensure src/ is on the path when running from the repo root
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from gps_denied.core.benchmark import AccuracyBenchmark, SyntheticTrajectory, SyntheticTrajectoryConfig
from gps_denied.schemas import GPSPoint
ORIGIN = GPSPoint(lat=49.0, lon=32.0)
_SEP = "" * 60
def _header(title: str) -> None:
print(f"\n{_SEP}")
print(f" {title}")
print(_SEP)
def run_sat_corrections(num_frames: int = 50) -> bool:
"""Scenario: full flight with satellite corrections (AC-PERF-1/2/3)."""
_header(f"Scenario: satellite corrections ({num_frames} frames)")
cfg = SyntheticTrajectoryConfig(
origin=ORIGIN,
num_frames=num_frames,
speed_mps=20.0,
heading_deg=45.0,
imu_hz=100.0,
vo_noise_m=0.3,
)
frames = SyntheticTrajectory(cfg).generate()
bench = AccuracyBenchmark()
result = bench.run(frames, ORIGIN, satellite_keyframe_interval=5)
print(result.summary())
overall, _ = result.passes_acceptance_criteria()
return overall
def run_vo_only(num_frames: int = 50) -> bool:
"""Scenario: VO only, no satellite corrections."""
_header(f"Scenario: VO only (no satellite, {num_frames} frames)")
cfg = SyntheticTrajectoryConfig(
origin=ORIGIN,
num_frames=num_frames,
speed_mps=20.0,
imu_hz=100.0,
vo_noise_m=0.3,
)
frames = SyntheticTrajectory(cfg).generate()
bench = AccuracyBenchmark(sat_correction_fn=lambda _: None)
result = bench.run(frames, ORIGIN, satellite_keyframe_interval=9999)
print(result.summary())
# VO-only is expected to fail AC-PERF-1/2; show stats without hard fail
return True # informational only
def run_vo_drift() -> bool:
"""Scenario: 1 km straight flight, no satellite (AC-PERF-4)."""
_header("Scenario: VO drift over 1 km straight (AC-PERF-4)")
t0 = time.perf_counter()
bench = AccuracyBenchmark()
drift_m = bench.run_vo_drift_test(trajectory_length_m=1000.0, speed_mps=20.0)
elapsed = time.perf_counter() - t0
limit = 100.0
passed = drift_m < limit
status = "PASS" if passed else "FAIL"
print(f" Final drift: {drift_m:.1f} m (limit: {limit:.0f} m)")
print(f" Run time: {elapsed*1000:.0f} ms")
print(f"\n {status} AC-PERF-4: VO drift over 1 km < {limit:.0f} m")
return passed
def run_tracking_loss(num_frames: int = 40) -> bool:
"""Scenario: Random VO failure frames injected."""
_header(f"Scenario: VO failures ({num_frames} frames, 25% failure rate)")
failure_frames = list(range(2, num_frames, 4)) # every 4th frame
cfg = SyntheticTrajectoryConfig(
origin=ORIGIN,
num_frames=num_frames,
speed_mps=20.0,
imu_hz=100.0,
vo_noise_m=0.3,
vo_failure_frames=failure_frames,
)
frames = SyntheticTrajectory(cfg).generate()
bench = AccuracyBenchmark()
result = bench.run(frames, ORIGIN, satellite_keyframe_interval=5)
print(result.summary())
print(f"\n VO failure frames injected: {len(failure_frames)}/{num_frames}")
overall, _ = result.passes_acceptance_criteria()
return overall
def run_waypoint_mission(num_frames: int = 60) -> bool:
"""Scenario: Multi-waypoint mission with direction changes."""
_header(f"Scenario: Waypoint mission ({num_frames} frames)")
cfg = SyntheticTrajectoryConfig(
origin=ORIGIN,
num_frames=num_frames,
speed_mps=20.0,
heading_deg=0.0,
imu_hz=100.0,
vo_noise_m=0.3,
waypoints_enu=[
(500.0, 500.0), # NE leg
(0.0, 1000.0), # N leg
(-500.0, 500.0), # NW leg
],
)
frames = SyntheticTrajectory(cfg).generate()
bench = AccuracyBenchmark()
result = bench.run(frames, ORIGIN, satellite_keyframe_interval=5)
print(result.summary())
overall, _ = result.passes_acceptance_criteria()
return overall
def main() -> int:
parser = argparse.ArgumentParser(
description="GPS-Denied accuracy validation benchmark",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"--scenario",
choices=["sat_corrections", "vo_only", "vo_drift", "tracking_loss",
"waypoint", "all"],
default="all",
)
parser.add_argument("--frames", type=int, default=50,
help="Number of camera frames per scenario")
args = parser.parse_args()
print("\nGPS-Denied Onboard — Accuracy Validation Benchmark")
print(f"Origin: lat={ORIGIN.lat}° lon={ORIGIN.lon}° | Frames: {args.frames}")
results: list[tuple[str, bool]] = []
if args.scenario in ("sat_corrections", "all"):
ok = run_sat_corrections(args.frames)
results.append(("sat_corrections", ok))
if args.scenario in ("vo_only", "all"):
ok = run_vo_only(args.frames)
results.append(("vo_only", ok))
if args.scenario in ("vo_drift", "all"):
ok = run_vo_drift()
results.append(("vo_drift", ok))
if args.scenario in ("tracking_loss", "all"):
ok = run_tracking_loss(args.frames)
results.append(("tracking_loss", ok))
if args.scenario in ("waypoint", "all"):
ok = run_waypoint_mission(args.frames)
results.append(("waypoint", ok))
# Final summary
print(f"\n{_SEP}")
print(" BENCHMARK SUMMARY")
print(_SEP)
all_pass = True
for name, ok in results:
status = "PASS" if ok else "FAIL"
print(f" {status} {name}")
if not ok:
all_pass = False
print(_SEP)
print(f" Overall: {'PASS' if all_pass else 'FAIL'}")
print(_SEP)
return 0 if all_pass else 1
if __name__ == "__main__":
sys.exit(main())