Files
gps-denied-onboard/e2e/runner/helpers/multi_segment_evaluator.py
T
Oleksandr Bezdieniezhnykh c6e6cba237 [AZ-414] [AZ-415] [AZ-418] Test batch 71: sharp turn + multi-segment + smoothing
- AZ-414 (FT-P-07 + FT-N-02): sharp_turn_detector helper covering
  AC-1 (gyro_z run detection + synthetic-overlay fallback),
  AC-2/AC-3 (FT-N-02 during-turn label + monotonic covariance),
  AC-4/AC-5/AC-6 (FT-P-07 recovery lag/drift/heading); twin scenario
  files under positive/ and negative/.
- AZ-415 (FT-P-08): multi_segment_evaluator helper + scenario.
- AZ-418 (FT-P-10): smoothing_evaluator helper covering AC-1 (raw +
  smoothed pose pairing), AC-2 (improvement rate >= 0.80), AC-3
  (mean improvement >= 5 m); scenario file.
- All scenarios skip-gated on upstream frame_source_replay /
  imu_replay / fdr_reader stubs (auto-activate when AZ-441 + AZ-407
  leftovers land).
- +68 unit tests; full e2e unit suite: 393 passed.

See _docs/03_implementation/batch_71_report.md and
_docs/03_implementation/reviews/batch_71_review.md.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-17 07:12:24 +03:00

288 lines
9.4 KiB
Python

"""Multi-segment relocalisation evaluation for FT-P-08 (AZ-415 / AC-3.3).
The ``multi-segment-derkachi`` fixture (AZ-408) writes a ``schedule.json``
naming ≥3 disjoint blackout windows. During replay the SUT MUST:
* AC-2: emit ``source_label = dead_reckoned`` for every estimate inside
every blackout window.
* AC-3: emit the next ``source_label = satellite_anchored`` within
≤3 frames of each blackout's ``end_ms`` (target frame cadence = 3 fps
per the runtime profile in `_docs/02_document/tests/blackbox-tests.md`).
* AC-4: trajectory continuity — the geodesic distance between the last
pre-recovery estimate (at or before ``end_ms``) and the first
post-recovery anchor must be ≤100 m.
The aggregate passes only when ALL ≥3 windows satisfy ALL three checks.
Public-boundary discipline: this module does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable, Mapping, Sequence
from .geo import distance_m
DEAD_RECKONED = "dead_reckoned"
SATELLITE_ANCHORED = "satellite_anchored"
VISUAL_PROPAGATED = "visual_propagated"
ALLOWED_SOURCE_LABELS = {SATELLITE_ANCHORED, VISUAL_PROPAGATED, DEAD_RECKONED}
# AC-3 / AC-4 / AC-5 thresholds from the FT-P-08 spec.
MAX_RECOVERY_FRAMES = 3
MAX_RECOVERY_FRAMES_SAFETY_MS = 1100 # 3 frames @ ~3 fps; +100 ms scheduling slack
MAX_TRAJECTORY_JUMP_M = 100.0
MIN_SEGMENTS_REQUIRED = 3
@dataclass(frozen=True)
class BlackoutWindow:
"""One blackout window from the injector's ``schedule.json``."""
start_ms: int
end_ms: int
first_frame_idx: int
last_frame_idx: int
@property
def duration_ms(self) -> int:
return self.end_ms - self.start_ms
@dataclass(frozen=True)
class EstimateSample:
"""One outbound estimate observed during replay.
The scenario builds this list from the SITL listener (for the
primary path) or from the post-run FDR archive (for the offline
audit). Either source is a public boundary.
"""
monotonic_ms: int
lat_deg: float
lon_deg: float
source_label: str
@dataclass(frozen=True)
class PerWindowReport:
"""Per-blackout-window evaluation produced by ``evaluate_window``."""
window_index: int
start_ms: int
end_ms: int
samples_inside: int
dead_reckoned_inside: int
label_violations: tuple[str, ...]
recovery_anchor_ms: int | None
recovery_lag_ms: int | None
trajectory_jump_m: float | None
@property
def passes_label(self) -> bool:
"""AC-2: every inside-window sample is dead_reckoned."""
return (
self.samples_inside > 0
and self.dead_reckoned_inside == self.samples_inside
and not self.label_violations
)
@property
def passes_recovery(self) -> bool:
"""AC-3: a satellite_anchored emission within the recovery budget."""
return (
self.recovery_lag_ms is not None
and self.recovery_lag_ms <= MAX_RECOVERY_FRAMES_SAFETY_MS
)
@property
def passes_jump(self) -> bool:
"""AC-4: trajectory jump ≤100 m."""
return (
self.trajectory_jump_m is not None
and self.trajectory_jump_m <= MAX_TRAJECTORY_JUMP_M
)
@property
def passes(self) -> bool:
return self.passes_label and self.passes_recovery and self.passes_jump
@dataclass(frozen=True)
class MultiSegmentReport:
"""Aggregate report across all blackout windows; drives the scenario assertion."""
per_window: tuple[PerWindowReport, ...]
failed_windows: tuple[int, ...] = field(default_factory=tuple)
@property
def window_count(self) -> int:
return len(self.per_window)
@property
def passes_segment_count(self) -> bool:
return self.window_count >= MIN_SEGMENTS_REQUIRED
@property
def passes(self) -> bool:
return (
self.passes_segment_count
and all(w.passes for w in self.per_window)
and not self.failed_windows
)
def load_schedule(schedule_json: Path) -> list[BlackoutWindow]:
"""Read the multi_segment injector's ``schedule.json``.
Shape (per AZ-408 multi_segment._write_schedule):
{"segments": [{"start_ms": int, "end_ms": int,
"first_frame_idx": int, "last_frame_idx": int}, ...]}
"""
if not schedule_json.exists():
raise FileNotFoundError(
f"multi-segment schedule.json not found at {schedule_json}"
"build the multi-segment-derkachi fixture first"
)
payload = json.loads(schedule_json.read_text())
if "segments" not in payload:
raise ValueError(
f"schedule.json missing 'segments' key — found {list(payload)}"
)
windows: list[BlackoutWindow] = []
for seg in payload["segments"]:
windows.append(
BlackoutWindow(
start_ms=int(seg["start_ms"]),
end_ms=int(seg["end_ms"]),
first_frame_idx=int(seg["first_frame_idx"]),
last_frame_idx=int(seg["last_frame_idx"]),
)
)
return windows
def evaluate_window(
window: BlackoutWindow,
window_index: int,
samples: Sequence[EstimateSample],
) -> PerWindowReport:
"""Evaluate AC-2 / AC-3 / AC-4 for one blackout window.
Sample-window classification (inclusive of ``start_ms``, exclusive of
``end_ms``) — the recovery search starts at ``end_ms`` and looks
forward.
"""
inside = [s for s in samples if window.start_ms <= s.monotonic_ms < window.end_ms]
dead_reckoned_inside = sum(1 for s in inside if s.source_label == DEAD_RECKONED)
label_violations = tuple(
sorted({s.source_label for s in inside if s.source_label != DEAD_RECKONED})
)
# AC-3 recovery search: first satellite_anchored emission at or after end_ms.
recovery: EstimateSample | None = None
for s in samples:
if s.monotonic_ms >= window.end_ms and s.source_label == SATELLITE_ANCHORED:
recovery = s
break
# AC-4 trajectory jump: last estimate at or before end_ms vs the recovery anchor.
pre_recovery: EstimateSample | None = None
for s in samples:
if s.monotonic_ms < window.end_ms:
pre_recovery = s
else:
break
if recovery is not None and pre_recovery is not None:
jump_m: float | None = distance_m(
pre_recovery.lat_deg,
pre_recovery.lon_deg,
recovery.lat_deg,
recovery.lon_deg,
)
else:
jump_m = None
return PerWindowReport(
window_index=window_index,
start_ms=window.start_ms,
end_ms=window.end_ms,
samples_inside=len(inside),
dead_reckoned_inside=dead_reckoned_inside,
label_violations=label_violations,
recovery_anchor_ms=recovery.monotonic_ms if recovery is not None else None,
recovery_lag_ms=(recovery.monotonic_ms - window.end_ms) if recovery is not None else None,
trajectory_jump_m=jump_m,
)
def evaluate(
windows: Sequence[BlackoutWindow],
samples: Sequence[EstimateSample],
) -> MultiSegmentReport:
"""Evaluate every window; aggregate per AC-1 + AC-2 + AC-3 + AC-4."""
for s in samples:
if s.source_label not in ALLOWED_SOURCE_LABELS:
raise ValueError(
f"unknown source_label '{s.source_label}' at {s.monotonic_ms} ms — "
f"allowed: {sorted(ALLOWED_SOURCE_LABELS)}"
)
per_window = tuple(
evaluate_window(w, i, samples) for i, w in enumerate(windows)
)
failed = tuple(w.window_index for w in per_window if not w.passes)
return MultiSegmentReport(per_window=per_window, failed_windows=failed)
def write_csv_evidence(out_path: Path, report: MultiSegmentReport) -> Path:
"""Write FT-P-08 per-window evidence CSV.
Header: ``window_index, start_ms, end_ms, samples_inside,
dead_reckoned_inside, label_violations, recovery_lag_ms,
trajectory_jump_m, passes_label, passes_recovery, passes_jump, passes``.
"""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"window_index",
"start_ms",
"end_ms",
"samples_inside",
"dead_reckoned_inside",
"label_violations",
"recovery_lag_ms",
"trajectory_jump_m",
"passes_label",
"passes_recovery",
"passes_jump",
"passes",
]
)
for w in report.per_window:
writer.writerow(
[
w.window_index,
w.start_ms,
w.end_ms,
w.samples_inside,
w.dead_reckoned_inside,
"|".join(w.label_violations) if w.label_violations else "",
"" if w.recovery_lag_ms is None else w.recovery_lag_ms,
"" if w.trajectory_jump_m is None else f"{w.trajectory_jump_m:.3f}",
"true" if w.passes_label else "false",
"true" if w.passes_recovery else "false",
"true" if w.passes_jump else "false",
"true" if w.passes else "false",
]
)
return out_path