gps-denied-onboard/e2e/runner/helpers/companion_reboot_evaluator.py

"""Companion-process reboot recovery evaluator for NFT-RES-02 (AZ-433 / AC-5.2 + AC-5.3).

Mid-flight, the runner issues a restart command (``docker compose
restart gps-denied-onboard`` on Tier-1, ``systemctl restart
gps-denied-onboard`` on Tier-2). The SUT must:

* AC-1 — actually restart within ≤``RESTART_TRIGGER_BUDGET_S`` (5 s).
* AC-2 — emit its first post-restart outbound estimate within
  ≤``RESUME_BUDGET_S`` (30 s) of the restart command.
* AC-3 — that first post-restart estimate must be within
  ≤``ACCURACY_BUDGET_M`` (100 m) of ground truth at that timestamp.

This module owns the pure-logic side of those budgets + CSV evidence.
The scenario test owns the orchestration (issue restart, capture
timestamps, query GT).

Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol.
"""

from __future__ import annotations

import csv
from dataclasses import dataclass
from pathlib import Path

from .geo import distance_m

RESTART_TRIGGER_BUDGET_S = 5.0
RESUME_BUDGET_S = 30.0
ACCURACY_BUDGET_M = 100.0


@dataclass(frozen=True)
class GeoFix:
    """A WGS84 fix at a monotonic-ms timestamp."""

    monotonic_ms: int
    lat_deg: float
    lon_deg: float


@dataclass(frozen=True)
class RestartEvidence:
    """Captured timestamps + first post-restart fix vs GT.

    All ``*_monotonic_ms`` fields share the runner's monotonic clock so
    deltas are well-defined. ``first_post_restart_estimate`` and
    ``ground_truth_at_first_emission`` are both captured at
    ``first_post_restart_emission_monotonic_ms``.

    ``process_restarted_monotonic_ms`` is the wall-clock-ish moment the
    SUT process is observed to have come back up (e.g., first PID write,
    health probe transition). Used for AC-1 only.

    ``first_post_restart_emission_monotonic_ms`` is the moment the runner
    captures the first outbound estimate AFTER the restart command. Used
    for AC-2 + AC-3. May be ``None`` if no emission arrives in the budget
    window — counted as AC-2 + AC-3 failures.
    """

    restart_command_monotonic_ms: int
    process_restarted_monotonic_ms: int | None
    first_post_restart_emission_monotonic_ms: int | None
    first_post_restart_estimate: GeoFix | None
    ground_truth_at_first_emission: GeoFix | None


@dataclass(frozen=True)
class CompanionRebootReport:
    """NFT-RES-02 aggregate verdict for one run."""

    restart_trigger_latency_s: float | None
    resume_time_s: float | None
    first_emission_accuracy_m: float | None
    restart_trigger_budget_s: float
    resume_budget_s: float
    accuracy_budget_m: float

    @property
    def passes_restart_trigger(self) -> bool:
        return (
            self.restart_trigger_latency_s is not None
            and self.restart_trigger_latency_s <= self.restart_trigger_budget_s
        )

    @property
    def passes_resume_time(self) -> bool:
        return (
            self.resume_time_s is not None
            and self.resume_time_s <= self.resume_budget_s
        )

    @property
    def passes_first_emission_accuracy(self) -> bool:
        return (
            self.first_emission_accuracy_m is not None
            and self.first_emission_accuracy_m <= self.accuracy_budget_m
        )

    @property
    def passes(self) -> bool:
        return (
            self.passes_restart_trigger
            and self.passes_resume_time
            and self.passes_first_emission_accuracy
        )


def evaluate(
    evidence: RestartEvidence,
    *,
    restart_trigger_budget_s: float = RESTART_TRIGGER_BUDGET_S,
    resume_budget_s: float = RESUME_BUDGET_S,
    accuracy_budget_m: float = ACCURACY_BUDGET_M,
) -> CompanionRebootReport:
    """Compute the AC-1 + AC-2 + AC-3 verdict from captured restart evidence."""
    trigger_latency: float | None = None
    if evidence.process_restarted_monotonic_ms is not None:
        delta_ms = (
            evidence.process_restarted_monotonic_ms
            - evidence.restart_command_monotonic_ms
        )
        if delta_ms < 0:
            raise ValueError(
                "process_restarted precedes restart_command — clock-skew bug? "
                f"command={evidence.restart_command_monotonic_ms} "
                f"restarted={evidence.process_restarted_monotonic_ms}"
            )
        trigger_latency = delta_ms / 1000.0

    resume_time: float | None = None
    if evidence.first_post_restart_emission_monotonic_ms is not None:
        delta_ms = (
            evidence.first_post_restart_emission_monotonic_ms
            - evidence.restart_command_monotonic_ms
        )
        if delta_ms < 0:
            raise ValueError(
                "first_post_restart_emission precedes restart_command — "
                "ordering bug; an emission BEFORE the restart command "
                "cannot be the 'first post-restart' emission"
            )
        resume_time = delta_ms / 1000.0

    accuracy_m: float | None = None
    if (
        evidence.first_post_restart_estimate is not None
        and evidence.ground_truth_at_first_emission is not None
    ):
        accuracy_m = distance_m(
            evidence.first_post_restart_estimate.lat_deg,
            evidence.first_post_restart_estimate.lon_deg,
            evidence.ground_truth_at_first_emission.lat_deg,
            evidence.ground_truth_at_first_emission.lon_deg,
        )

    return CompanionRebootReport(
        restart_trigger_latency_s=trigger_latency,
        resume_time_s=resume_time,
        first_emission_accuracy_m=accuracy_m,
        restart_trigger_budget_s=restart_trigger_budget_s,
        resume_budget_s=resume_budget_s,
        accuracy_budget_m=accuracy_budget_m,
    )


def write_csv_evidence(out_path: Path, report: CompanionRebootReport) -> Path:
    """Aggregate-summary CSV (one row per run)."""
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with out_path.open("w", newline="") as fh:
        writer = csv.writer(fh)
        writer.writerow(
            [
                "restart_trigger_latency_s",
                "restart_trigger_budget_s",
                "ac1_passes",
                "resume_time_s",
                "resume_budget_s",
                "ac2_passes",
                "first_emission_accuracy_m",
                "accuracy_budget_m",
                "ac3_passes",
                "passes",
            ]
        )
        writer.writerow(
            [
                "" if report.restart_trigger_latency_s is None else f"{report.restart_trigger_latency_s:.3f}",
                f"{report.restart_trigger_budget_s:.3f}",
                "true" if report.passes_restart_trigger else "false",
                "" if report.resume_time_s is None else f"{report.resume_time_s:.3f}",
                f"{report.resume_budget_s:.3f}",
                "true" if report.passes_resume_time else "false",
                "" if report.first_emission_accuracy_m is None else f"{report.first_emission_accuracy_m:.3f}",
                f"{report.accuracy_budget_m:.3f}",
                "true" if report.passes_first_emission_accuracy else "false",
                "true" if report.passes else "false",
            ]
        )
    return out_path