gps-denied-onboard/e2e/runner/helpers/multi_segment_evaluator.py

"""Multi-segment relocalisation evaluation for FT-P-08 (AZ-415 / AC-3.3).

The ``multi-segment-derkachi`` fixture (AZ-408) writes a ``schedule.json``
naming ≥3 disjoint blackout windows. During replay the SUT MUST:

* AC-2: emit ``source_label = dead_reckoned`` for every estimate inside
  every blackout window.
* AC-3: emit the next ``source_label = satellite_anchored`` within
  ≤3 frames of each blackout's ``end_ms`` (target frame cadence = 3 fps
  per the runtime profile in `_docs/02_document/tests/blackbox-tests.md`).
* AC-4: trajectory continuity — the geodesic distance between the last
  pre-recovery estimate (at or before ``end_ms``) and the first
  post-recovery anchor must be ≤100 m.

The aggregate passes only when ALL ≥3 windows satisfy ALL three checks.

Public-boundary discipline: this module does NOT import any
``src/gps_denied_onboard`` symbol.
"""

from __future__ import annotations

import csv
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable, Mapping, Sequence

from .geo import distance_m

DEAD_RECKONED = "dead_reckoned"
SATELLITE_ANCHORED = "satellite_anchored"
VISUAL_PROPAGATED = "visual_propagated"

ALLOWED_SOURCE_LABELS = {SATELLITE_ANCHORED, VISUAL_PROPAGATED, DEAD_RECKONED}

# AC-3 / AC-4 / AC-5 thresholds from the FT-P-08 spec.
MAX_RECOVERY_FRAMES = 3
MAX_RECOVERY_FRAMES_SAFETY_MS = 1100  # 3 frames @ ~3 fps; +100 ms scheduling slack
MAX_TRAJECTORY_JUMP_M = 100.0
MIN_SEGMENTS_REQUIRED = 3


@dataclass(frozen=True)
class BlackoutWindow:
    """One blackout window from the injector's ``schedule.json``."""

    start_ms: int
    end_ms: int
    first_frame_idx: int
    last_frame_idx: int

    @property
    def duration_ms(self) -> int:
        return self.end_ms - self.start_ms


@dataclass(frozen=True)
class EstimateSample:
    """One outbound estimate observed during replay.

    The scenario builds this list from the SITL listener (for the
    primary path) or from the post-run FDR archive (for the offline
    audit). Either source is a public boundary.
    """

    monotonic_ms: int
    lat_deg: float
    lon_deg: float
    source_label: str


@dataclass(frozen=True)
class PerWindowReport:
    """Per-blackout-window evaluation produced by ``evaluate_window``."""

    window_index: int
    start_ms: int
    end_ms: int
    samples_inside: int
    dead_reckoned_inside: int
    label_violations: tuple[str, ...]
    recovery_anchor_ms: int | None
    recovery_lag_ms: int | None
    trajectory_jump_m: float | None

    @property
    def passes_label(self) -> bool:
        """AC-2: every inside-window sample is dead_reckoned."""
        return (
            self.samples_inside > 0
            and self.dead_reckoned_inside == self.samples_inside
            and not self.label_violations
        )

    @property
    def passes_recovery(self) -> bool:
        """AC-3: a satellite_anchored emission within the recovery budget."""
        return (
            self.recovery_lag_ms is not None
            and self.recovery_lag_ms <= MAX_RECOVERY_FRAMES_SAFETY_MS
        )

    @property
    def passes_jump(self) -> bool:
        """AC-4: trajectory jump ≤100 m."""
        return (
            self.trajectory_jump_m is not None
            and self.trajectory_jump_m <= MAX_TRAJECTORY_JUMP_M
        )

    @property
    def passes(self) -> bool:
        return self.passes_label and self.passes_recovery and self.passes_jump


@dataclass(frozen=True)
class MultiSegmentReport:
    """Aggregate report across all blackout windows; drives the scenario assertion."""

    per_window: tuple[PerWindowReport, ...]
    failed_windows: tuple[int, ...] = field(default_factory=tuple)

    @property
    def window_count(self) -> int:
        return len(self.per_window)

    @property
    def passes_segment_count(self) -> bool:
        return self.window_count >= MIN_SEGMENTS_REQUIRED

    @property
    def passes(self) -> bool:
        return (
            self.passes_segment_count
            and all(w.passes for w in self.per_window)
            and not self.failed_windows
        )


def load_schedule(schedule_json: Path) -> list[BlackoutWindow]:
    """Read the multi_segment injector's ``schedule.json``.

    Shape (per AZ-408 multi_segment._write_schedule):
        {"segments": [{"start_ms": int, "end_ms": int,
                       "first_frame_idx": int, "last_frame_idx": int}, ...]}
    """
    if not schedule_json.exists():
        raise FileNotFoundError(
            f"multi-segment schedule.json not found at {schedule_json} — "
            "build the multi-segment-derkachi fixture first"
        )
    payload = json.loads(schedule_json.read_text())
    if "segments" not in payload:
        raise ValueError(
            f"schedule.json missing 'segments' key — found {list(payload)}"
        )
    windows: list[BlackoutWindow] = []
    for seg in payload["segments"]:
        windows.append(
            BlackoutWindow(
                start_ms=int(seg["start_ms"]),
                end_ms=int(seg["end_ms"]),
                first_frame_idx=int(seg["first_frame_idx"]),
                last_frame_idx=int(seg["last_frame_idx"]),
            )
        )
    return windows


def evaluate_window(
    window: BlackoutWindow,
    window_index: int,
    samples: Sequence[EstimateSample],
) -> PerWindowReport:
    """Evaluate AC-2 / AC-3 / AC-4 for one blackout window.

    Sample-window classification (inclusive of ``start_ms``, exclusive of
    ``end_ms``) — the recovery search starts at ``end_ms`` and looks
    forward.
    """
    inside = [s for s in samples if window.start_ms <= s.monotonic_ms < window.end_ms]
    dead_reckoned_inside = sum(1 for s in inside if s.source_label == DEAD_RECKONED)
    label_violations = tuple(
        sorted({s.source_label for s in inside if s.source_label != DEAD_RECKONED})
    )

    # AC-3 recovery search: first satellite_anchored emission at or after end_ms.
    recovery: EstimateSample | None = None
    for s in samples:
        if s.monotonic_ms >= window.end_ms and s.source_label == SATELLITE_ANCHORED:
            recovery = s
            break

    # AC-4 trajectory jump: last estimate at or before end_ms vs the recovery anchor.
    pre_recovery: EstimateSample | None = None
    for s in samples:
        if s.monotonic_ms < window.end_ms:
            pre_recovery = s
        else:
            break

    if recovery is not None and pre_recovery is not None:
        jump_m: float | None = distance_m(
            pre_recovery.lat_deg,
            pre_recovery.lon_deg,
            recovery.lat_deg,
            recovery.lon_deg,
        )
    else:
        jump_m = None

    return PerWindowReport(
        window_index=window_index,
        start_ms=window.start_ms,
        end_ms=window.end_ms,
        samples_inside=len(inside),
        dead_reckoned_inside=dead_reckoned_inside,
        label_violations=label_violations,
        recovery_anchor_ms=recovery.monotonic_ms if recovery is not None else None,
        recovery_lag_ms=(recovery.monotonic_ms - window.end_ms) if recovery is not None else None,
        trajectory_jump_m=jump_m,
    )


def evaluate(
    windows: Sequence[BlackoutWindow],
    samples: Sequence[EstimateSample],
) -> MultiSegmentReport:
    """Evaluate every window; aggregate per AC-1 + AC-2 + AC-3 + AC-4."""
    for s in samples:
        if s.source_label not in ALLOWED_SOURCE_LABELS:
            raise ValueError(
                f"unknown source_label '{s.source_label}' at {s.monotonic_ms} ms — "
                f"allowed: {sorted(ALLOWED_SOURCE_LABELS)}"
            )
    per_window = tuple(
        evaluate_window(w, i, samples) for i, w in enumerate(windows)
    )
    failed = tuple(w.window_index for w in per_window if not w.passes)
    return MultiSegmentReport(per_window=per_window, failed_windows=failed)


def write_csv_evidence(out_path: Path, report: MultiSegmentReport) -> Path:
    """Write FT-P-08 per-window evidence CSV.

    Header: ``window_index, start_ms, end_ms, samples_inside,
    dead_reckoned_inside, label_violations, recovery_lag_ms,
    trajectory_jump_m, passes_label, passes_recovery, passes_jump, passes``.
    """
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with out_path.open("w", newline="") as fh:
        writer = csv.writer(fh)
        writer.writerow(
            [
                "window_index",
                "start_ms",
                "end_ms",
                "samples_inside",
                "dead_reckoned_inside",
                "label_violations",
                "recovery_lag_ms",
                "trajectory_jump_m",
                "passes_label",
                "passes_recovery",
                "passes_jump",
                "passes",
            ]
        )
        for w in report.per_window:
            writer.writerow(
                [
                    w.window_index,
                    w.start_ms,
                    w.end_ms,
                    w.samples_inside,
                    w.dead_reckoned_inside,
                    "|".join(w.label_violations) if w.label_violations else "",
                    "" if w.recovery_lag_ms is None else w.recovery_lag_ms,
                    "" if w.trajectory_jump_m is None else f"{w.trajectory_jump_m:.3f}",
                    "true" if w.passes_label else "false",
                    "true" if w.passes_recovery else "false",
                    "true" if w.passes_jump else "false",
                    "true" if w.passes else "false",
                ]
            )
    return out_path