[AZ-234] [AZ-235] [AZ-236] [AZ-237] Add replay tests

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-05 06:24:10 +03:00
parent c30fd4f67d
commit 5acd14b792
12 changed files with 616 additions and 3 deletions
+155
View File
@@ -9,6 +9,7 @@ from __future__ import annotations
import argparse
import csv
import json
import math
import os
from dataclasses import dataclass, field
from enum import Enum
@@ -65,6 +66,24 @@ class RecordedInteraction:
response: Mapping[str, str | bool]
@dataclass(frozen=True)
class ExpectedCoordinate:
image_ref: str
latitude_deg: float
longitude_deg: float
@dataclass(frozen=True)
class ReplayEstimate:
image_ref: str
latitude_deg: float
longitude_deg: float
covariance_95_semi_major_m: float
source_label: str
anchor_age_ms: int
capture_to_output_latency_ms: float
@dataclass(frozen=True)
class ScenarioReport:
scenario_id: str
@@ -80,6 +99,7 @@ class ScenarioReport:
error_message: str
artifacts: tuple[Path, ...]
interactions: tuple[RecordedInteraction, ...]
metrics: Mapping[str, float | str | bool] = field(default_factory=dict)
@dataclass(frozen=True)
@@ -133,6 +153,9 @@ class SatelliteCacheStub(DeterministicStub):
{
"variant": variant,
"trusted": trusted,
"freshness_status": "fresh" if trusted else "rejected",
"fixture_size_bytes": "1048576",
"storage_budget_bytes": "10737418240",
"network_fetch_attempted": False,
"provenance": "offline-fixture",
},
@@ -374,10 +397,142 @@ def default_scenarios() -> tuple[ScenarioConfig, ...]:
)
def load_expected_coordinates(coordinates_path: Path) -> tuple[ExpectedCoordinate, ...]:
rows: list[ExpectedCoordinate] = []
with coordinates_path.open(encoding="utf-8", newline="") as coordinates_file:
reader = csv.DictReader(coordinates_file)
for row in reader:
normalized_row = {key.strip(): value for key, value in row.items() if key is not None}
image_ref = (normalized_row.get("image") or "").strip()
latitude = float((normalized_row.get("lat") or "").strip())
longitude = float((normalized_row.get("lon") or "").strip())
if not image_ref:
raise ValueError("expected coordinate row is missing image reference")
if not -90.0 <= latitude <= 90.0 or not -180.0 <= longitude <= 180.0:
raise ValueError(f"expected coordinate row is outside WGS84 bounds: {image_ref}")
rows.append(
ExpectedCoordinate(
image_ref=image_ref,
latitude_deg=latitude,
longitude_deg=longitude,
)
)
if not rows:
raise ValueError("expected coordinate fixture is empty")
return tuple(rows)
def evaluate_still_image_estimates(
expected_coordinates: Sequence[ExpectedCoordinate],
estimates: Sequence[ReplayEstimate],
) -> Mapping[str, float | str | bool]:
expected_by_image = {coordinate.image_ref: coordinate for coordinate in expected_coordinates}
if len(estimates) != len(expected_by_image):
raise ValueError("replay estimate count does not match expected coordinate count")
distances = []
latencies = []
for estimate in estimates:
expected = expected_by_image.get(estimate.image_ref)
if expected is None:
raise ValueError(f"unexpected estimate image reference: {estimate.image_ref}")
_require_confidence_fields(estimate)
distances.append(
haversine_m(
expected.latitude_deg,
expected.longitude_deg,
estimate.latitude_deg,
estimate.longitude_deg,
)
)
latencies.append(estimate.capture_to_output_latency_ms)
within_50_m = sum(distance <= 50.0 for distance in distances) / len(distances)
within_20_m = sum(distance <= 20.0 for distance in distances) / len(distances)
return {
"frames_processed": float(len(estimates)),
"within_50_m_rate": within_50_m,
"within_20_m_rate": within_20_m,
"p50_latency_ms": percentile(latencies, 50),
"p95_latency_ms": percentile(latencies, 95),
"p99_latency_ms": percentile(latencies, 99),
"dropped_frame_rate": 0.0,
"threshold_passed": within_50_m >= 0.80 and within_20_m >= 0.50,
}
def validate_derkachi_alignment(
video_duration_s: float,
telemetry_duration_s: float,
telemetry_rows: int,
frame_rate_hz: float = 30.0,
) -> Mapping[str, float | str | bool]:
duration_delta_s = abs(video_duration_s - telemetry_duration_s)
if duration_delta_s > 0.250:
raise ValueError("Derkachi video and telemetry durations differ by more than 250 ms")
if telemetry_rows <= 0:
raise ValueError("Derkachi telemetry fixture is empty")
frame_count = round(video_duration_s * frame_rate_hz)
frames_per_telemetry = frame_count / telemetry_rows
if not math.isclose(frames_per_telemetry, 3.0, rel_tol=0.02, abs_tol=0.05):
raise ValueError("Derkachi replay must have approximately 3 video frames per telemetry row")
return {
"video_duration_s": video_duration_s,
"telemetry_duration_s": telemetry_duration_s,
"duration_delta_s": duration_delta_s,
"frames_per_telemetry": frames_per_telemetry,
"alignment_valid": True,
}
def percentile(values: Sequence[float], percentile_value: int) -> float:
if not values:
raise ValueError("cannot compute percentile for empty values")
ordered = sorted(values)
index = min(
len(ordered) - 1,
max(0, math.ceil((percentile_value / 100.0) * len(ordered)) - 1),
)
return ordered[index]
def mavlink_source_is_authorized(source_system_id: int, allowed_source_system_ids: set[int]) -> bool:
return source_system_id in allowed_source_system_ids
def haversine_m(
latitude_a_deg: float,
longitude_a_deg: float,
latitude_b_deg: float,
longitude_b_deg: float,
) -> float:
earth_radius_m = 6_371_000.0
latitude_a = math.radians(latitude_a_deg)
latitude_b = math.radians(latitude_b_deg)
delta_latitude = math.radians(latitude_b_deg - latitude_a_deg)
delta_longitude = math.radians(longitude_b_deg - longitude_a_deg)
haversine = (
math.sin(delta_latitude / 2.0) ** 2
+ math.cos(latitude_a) * math.cos(latitude_b) * math.sin(delta_longitude / 2.0) ** 2
)
return 2.0 * earth_radius_m * math.asin(math.sqrt(haversine))
def _optional_float(value: float | None) -> str:
return "" if value is None else f"{value:.3f}"
def _require_confidence_fields(estimate: ReplayEstimate) -> None:
if estimate.covariance_95_semi_major_m < 0.0:
raise ValueError(f"estimate covariance is invalid: {estimate.image_ref}")
if not estimate.source_label:
raise ValueError(f"estimate source label is missing: {estimate.image_ref}")
if estimate.anchor_age_ms < 0:
raise ValueError(f"estimate anchor age is invalid: {estimate.image_ref}")
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Run deterministic black-box replay scenarios.")
parser.add_argument(