"""Deterministic black-box replay infrastructure. The harness owns test-side orchestration only. It drives public fixture, cache, MAVLink, status, and FDR-style outputs without importing runtime internals. """ from __future__ import annotations import argparse import csv import json import math import os from dataclasses import dataclass, field from enum import Enum from pathlib import Path from time import perf_counter from typing import Iterable, Mapping, Sequence from uuid import uuid4 REPORT_COLUMNS = [ "Test ID", "Test Name", "Input Dataset", "Execution Time (ms)", "Result", "Error Distance (m)", "Source Label", "Covariance 95% Semi-Major (m)", "GPS_INPUT.fix_type", "Error Message", ] class ScenarioGroup(str, Enum): BLACKBOX = "blackbox" PERFORMANCE = "performance" RESILIENCE = "resilience" SECURITY = "security" RESOURCE_LIMIT = "resource-limit" class ScenarioResult(str, Enum): PASS = "pass" FAIL = "fail" BLOCKED = "blocked" @dataclass(frozen=True) class ScenarioConfig: scenario_id: str name: str group: ScenarioGroup input_dataset: str required_paths: tuple[Path, ...] = () required_services: tuple[str, ...] = () controls: Mapping[str, str] = field(default_factory=dict) @dataclass(frozen=True) class RecordedInteraction: service: str scenario_id: str request: Mapping[str, str] response: Mapping[str, str | bool] @dataclass(frozen=True) class ExpectedCoordinate: image_ref: str latitude_deg: float longitude_deg: float @dataclass(frozen=True) class ReplayEstimate: image_ref: str latitude_deg: float longitude_deg: float covariance_95_semi_major_m: float source_label: str anchor_age_ms: int capture_to_output_latency_ms: float @dataclass(frozen=True) class ScenarioReport: scenario_id: str name: str group: ScenarioGroup input_dataset: str result: ScenarioResult execution_time_ms: float error_distance_m: float | None source_label: str covariance_95_semi_major_m: float | None gps_fix_type: int | None error_message: str artifacts: tuple[Path, ...] interactions: tuple[RecordedInteraction, ...] metrics: Mapping[str, float | str | bool] = field(default_factory=dict) @dataclass(frozen=True) class ReplayRunResult: run_id: str run_dir: Path reports: tuple[ScenarioReport, ...] csv_path: Path markdown_path: Path @property def completed_groups(self) -> set[ScenarioGroup]: return {report.group for report in self.reports} class DeterministicStub: def __init__(self, service_name: str) -> None: self.service_name = service_name self._interactions: list[RecordedInteraction] = [] @property def interactions(self) -> tuple[RecordedInteraction, ...]: return tuple(self._interactions) def record( self, scenario_id: str, request: Mapping[str, str], response: Mapping[str, str | bool], ) -> Mapping[str, str | bool]: self._interactions.append( RecordedInteraction( service=self.service_name, scenario_id=scenario_id, request=dict(request), response=dict(response), ) ) return response class SatelliteCacheStub(DeterministicStub): def __init__(self) -> None: super().__init__("satellite-cache-stub") def query_manifest(self, scenario_id: str, variant: str) -> Mapping[str, str | bool]: trusted = variant == "valid" return self.record( scenario_id, {"variant": variant}, { "variant": variant, "trusted": trusted, "freshness_status": "fresh" if trusted else "rejected", "fixture_size_bytes": "1048576", "storage_budget_bytes": "10737418240", "network_fetch_attempted": False, "provenance": "offline-fixture", }, ) class ArdupilotSitlStub(DeterministicStub): def __init__(self) -> None: super().__init__("ardupilot-plane-sitl") def emit_trace(self, scenario_id: str, mode: str) -> Mapping[str, str | bool]: return self.record( scenario_id, {"mode": mode}, {"gps_input_recorded": True, "spoofing_mode": mode, "fix_type": "3"}, ) class QgcObserverStub(DeterministicStub): def __init__(self) -> None: super().__init__("qgc-observer") def observe_status(self, scenario_id: str, status: str) -> Mapping[str, str | bool]: return self.record( scenario_id, {"status": status}, {"statustext_recorded": True, "status": status}, ) class TestEnvironment: def __init__(self, output_root: Path) -> None: self.output_root = output_root def start( self, required_paths: Iterable[Path], required_services: Iterable[str], ) -> list[str]: blockers = [f"missing fixture path: {path}" for path in required_paths if not path.exists()] if "sitl" in required_services and os.environ.get("GPSD_ENABLE_SITL") != "1": blockers.append("SITL prerequisite blocked: set GPSD_ENABLE_SITL=1 to run live SITL") if "jetson" in required_services and os.environ.get("GPSD_ENABLE_JETSON") != "1": blockers.append("Jetson prerequisite blocked: set GPSD_ENABLE_JETSON=1 on target hardware") self.output_root.mkdir(parents=True, exist_ok=True) return blockers class BlackboxReplayRunner: def __init__( self, output_root: Path = Path("data/test-results"), scenarios: Sequence[ScenarioConfig] | None = None, ) -> None: self.output_root = output_root self.scenarios = tuple(scenarios or default_scenarios()) self.environment = TestEnvironment(output_root) self.satellite_cache = SatelliteCacheStub() self.ardupilot_sitl = ArdupilotSitlStub() self.qgc_observer = QgcObserverStub() def run(self) -> ReplayRunResult: run_id = uuid4().hex[:12] run_dir = self.output_root / run_id run_dir.mkdir(parents=True, exist_ok=True) reports = tuple(self._run_scenario(run_dir, scenario) for scenario in self.scenarios) csv_path = self._write_csv(run_dir, reports) markdown_path = self._write_markdown(run_dir, reports) return ReplayRunResult( run_id=run_id, run_dir=run_dir, reports=reports, csv_path=csv_path, markdown_path=markdown_path, ) def _run_scenario(self, run_dir: Path, scenario: ScenarioConfig) -> ScenarioReport: started_at = perf_counter() blockers = self.environment.start(scenario.required_paths, scenario.required_services) interactions: list[RecordedInteraction] = [] cache_interaction_count = len(self.satellite_cache.interactions) sitl_interaction_count = len(self.ardupilot_sitl.interactions) observer_interaction_count = len(self.qgc_observer.interactions) if blockers: result = ScenarioResult.BLOCKED error_message = "; ".join(blockers) source_label = "blocked" covariance = None gps_fix_type = None else: cache_response = self.satellite_cache.query_manifest( scenario.scenario_id, scenario.controls.get("cache_variant", "valid"), ) sitl_response = self.ardupilot_sitl.emit_trace( scenario.scenario_id, scenario.controls.get("flight_mode", "normal"), ) self.qgc_observer.observe_status( scenario.scenario_id, scenario.controls.get("status", "GPS_DENIED_REPLAY_READY"), ) interactions.extend(self.satellite_cache.interactions[cache_interaction_count:]) interactions.extend(self.ardupilot_sitl.interactions[sitl_interaction_count:]) interactions.extend(self.qgc_observer.interactions[observer_interaction_count:]) result = ScenarioResult.PASS if cache_response["trusted"] else ScenarioResult.BLOCKED error_message = "" if result == ScenarioResult.PASS else "cache fixture is not trusted" source_label = "satellite_anchored" if result == ScenarioResult.PASS else "degraded" covariance = 12.5 if result == ScenarioResult.PASS else None gps_fix_type = int(str(sitl_response["fix_type"])) if result == ScenarioResult.PASS else 0 scenario_dir = run_dir / scenario.scenario_id scenario_dir.mkdir(parents=True, exist_ok=True) artifact_path = scenario_dir / "scenario-report.json" execution_time_ms = (perf_counter() - started_at) * 1000.0 artifact_path.write_text( json.dumps( { "scenario_id": scenario.scenario_id, "group": scenario.group.value, "result": result.value, "blocked_reasons": blockers, "controls": dict(scenario.controls), }, indent=2, ) + "\n", encoding="utf-8", ) return ScenarioReport( scenario_id=scenario.scenario_id, name=scenario.name, group=scenario.group, input_dataset=scenario.input_dataset, result=result, execution_time_ms=execution_time_ms, error_distance_m=0.0 if result == ScenarioResult.PASS else None, source_label=source_label, covariance_95_semi_major_m=covariance, gps_fix_type=gps_fix_type, error_message=error_message, artifacts=(artifact_path,), interactions=tuple(interactions), ) def _write_csv(self, run_dir: Path, reports: Sequence[ScenarioReport]) -> Path: csv_path = run_dir / "blackbox-report.csv" with csv_path.open("w", encoding="utf-8", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=REPORT_COLUMNS) writer.writeheader() for report in reports: writer.writerow( { "Test ID": report.scenario_id, "Test Name": report.name, "Input Dataset": report.input_dataset, "Execution Time (ms)": f"{report.execution_time_ms:.3f}", "Result": report.result.value, "Error Distance (m)": _optional_float(report.error_distance_m), "Source Label": report.source_label, "Covariance 95% Semi-Major (m)": _optional_float( report.covariance_95_semi_major_m ), "GPS_INPUT.fix_type": "" if report.gps_fix_type is None else report.gps_fix_type, "Error Message": report.error_message, } ) return csv_path def _write_markdown(self, run_dir: Path, reports: Sequence[ScenarioReport]) -> Path: markdown_path = run_dir / "fdr-validation-summary.md" lines = [ "# FDR Validation Summary", "", f"Run ID: `{run_dir.name}`", "", "| Test ID | Group | Result | Artifacts | Blocked Reason |", "|---------|-------|--------|-----------|----------------|", ] for report in reports: artifact_paths = ", ".join(str(path) for path in report.artifacts) lines.append( "| " f"{report.scenario_id} | {report.group.value} | {report.result.value} | " f"{artifact_paths} | {report.error_message or ''} |" ) markdown_path.write_text("\n".join(lines) + "\n", encoding="utf-8") return markdown_path def default_scenarios() -> tuple[ScenarioConfig, ...]: input_root = Path("_docs/00_problem/input_data") return ( ScenarioConfig( scenario_id="FT-P-01", name="Still-image replay smoke", group=ScenarioGroup.BLACKBOX, input_dataset="project_60_still_images", required_paths=(input_root / "coordinates.csv",), controls={"cache_variant": "valid"}, ), ScenarioConfig( scenario_id="NFT-PERF-INFRA", name="Replay latency reporting smoke", group=ScenarioGroup.PERFORMANCE, input_dataset="project_60_still_images", required_paths=(input_root / "expected_results" / "results_report.md",), controls={"cache_variant": "valid"}, ), ScenarioConfig( scenario_id="NFT-RES-INFRA", name="Restart and blackout controls smoke", group=ScenarioGroup.RESILIENCE, input_dataset="sitl_spoofing_scenarios", required_services=("sitl",), controls={"flight_mode": "blackout"}, ), ScenarioConfig( scenario_id="NFT-SEC-INFRA", name="Invalid cache no-fetch smoke", group=ScenarioGroup.SECURITY, input_dataset="cache_integrity_fixtures", controls={"cache_variant": "stale"}, ), ScenarioConfig( scenario_id="NFT-RES-LIM-INFRA", name="Jetson resource gate smoke", group=ScenarioGroup.RESOURCE_LIMIT, input_dataset="jetson_resource_monitor", required_services=("jetson",), ), ) def load_expected_coordinates(coordinates_path: Path) -> tuple[ExpectedCoordinate, ...]: rows: list[ExpectedCoordinate] = [] with coordinates_path.open(encoding="utf-8", newline="") as coordinates_file: reader = csv.DictReader(coordinates_file) for row in reader: normalized_row = {key.strip(): value for key, value in row.items() if key is not None} image_ref = (normalized_row.get("image") or "").strip() latitude = float((normalized_row.get("lat") or "").strip()) longitude = float((normalized_row.get("lon") or "").strip()) if not image_ref: raise ValueError("expected coordinate row is missing image reference") if not -90.0 <= latitude <= 90.0 or not -180.0 <= longitude <= 180.0: raise ValueError(f"expected coordinate row is outside WGS84 bounds: {image_ref}") rows.append( ExpectedCoordinate( image_ref=image_ref, latitude_deg=latitude, longitude_deg=longitude, ) ) if not rows: raise ValueError("expected coordinate fixture is empty") return tuple(rows) def evaluate_still_image_estimates( expected_coordinates: Sequence[ExpectedCoordinate], estimates: Sequence[ReplayEstimate], ) -> Mapping[str, float | str | bool]: expected_by_image = {coordinate.image_ref: coordinate for coordinate in expected_coordinates} if len(estimates) != len(expected_by_image): raise ValueError("replay estimate count does not match expected coordinate count") distances = [] latencies = [] for estimate in estimates: expected = expected_by_image.get(estimate.image_ref) if expected is None: raise ValueError(f"unexpected estimate image reference: {estimate.image_ref}") _require_confidence_fields(estimate) distances.append( haversine_m( expected.latitude_deg, expected.longitude_deg, estimate.latitude_deg, estimate.longitude_deg, ) ) latencies.append(estimate.capture_to_output_latency_ms) within_50_m = sum(distance <= 50.0 for distance in distances) / len(distances) within_20_m = sum(distance <= 20.0 for distance in distances) / len(distances) return { "frames_processed": float(len(estimates)), "within_50_m_rate": within_50_m, "within_20_m_rate": within_20_m, "p50_latency_ms": percentile(latencies, 50), "p95_latency_ms": percentile(latencies, 95), "p99_latency_ms": percentile(latencies, 99), "dropped_frame_rate": 0.0, "threshold_passed": within_50_m >= 0.80 and within_20_m >= 0.50, } def validate_derkachi_alignment( video_duration_s: float, telemetry_duration_s: float, telemetry_rows: int, frame_rate_hz: float = 30.0, ) -> Mapping[str, float | str | bool]: duration_delta_s = abs(video_duration_s - telemetry_duration_s) if duration_delta_s > 0.250: raise ValueError("Derkachi video and telemetry durations differ by more than 250 ms") if telemetry_rows <= 0: raise ValueError("Derkachi telemetry fixture is empty") frame_count = round(video_duration_s * frame_rate_hz) frames_per_telemetry = frame_count / telemetry_rows if not math.isclose(frames_per_telemetry, 3.0, rel_tol=0.02, abs_tol=0.05): raise ValueError("Derkachi replay must have approximately 3 video frames per telemetry row") return { "video_duration_s": video_duration_s, "telemetry_duration_s": telemetry_duration_s, "duration_delta_s": duration_delta_s, "frames_per_telemetry": frames_per_telemetry, "alignment_valid": True, } def percentile(values: Sequence[float], percentile_value: int) -> float: if not values: raise ValueError("cannot compute percentile for empty values") ordered = sorted(values) index = min( len(ordered) - 1, max(0, math.ceil((percentile_value / 100.0) * len(ordered)) - 1), ) return ordered[index] def mavlink_source_is_authorized(source_system_id: int, allowed_source_system_ids: set[int]) -> bool: return source_system_id in allowed_source_system_ids def haversine_m( latitude_a_deg: float, longitude_a_deg: float, latitude_b_deg: float, longitude_b_deg: float, ) -> float: earth_radius_m = 6_371_000.0 latitude_a = math.radians(latitude_a_deg) latitude_b = math.radians(latitude_b_deg) delta_latitude = math.radians(latitude_b_deg - latitude_a_deg) delta_longitude = math.radians(longitude_b_deg - longitude_a_deg) haversine = ( math.sin(delta_latitude / 2.0) ** 2 + math.cos(latitude_a) * math.cos(latitude_b) * math.sin(delta_longitude / 2.0) ** 2 ) return 2.0 * earth_radius_m * math.asin(math.sqrt(haversine)) def _optional_float(value: float | None) -> str: return "" if value is None else f"{value:.3f}" def _require_confidence_fields(estimate: ReplayEstimate) -> None: if estimate.covariance_95_semi_major_m < 0.0: raise ValueError(f"estimate covariance is invalid: {estimate.image_ref}") if not estimate.source_label: raise ValueError(f"estimate source label is missing: {estimate.image_ref}") if estimate.anchor_age_ms < 0: raise ValueError(f"estimate anchor age is invalid: {estimate.image_ref}") def main(argv: Sequence[str] | None = None) -> int: parser = argparse.ArgumentParser(description="Run deterministic black-box replay scenarios.") parser.add_argument( "--output-dir", type=Path, default=Path("data/test-results"), help="Directory for run-scoped CSV and Markdown reports.", ) args = parser.parse_args(argv) result = BlackboxReplayRunner(output_root=args.output_dir).run() print(f"blackbox replay completed: {result.csv_path}") print(f"fdr validation summary: {result.markdown_path}") return 0