mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 14:51:13 +00:00
330893be5c
Batch 86: 4 NFT-RES blackbox scenarios + 4 helper evaluators + 74 unit tests + directory-layout registration. * AZ-432 NFT-RES-01: 30 s IMU-only fallback drift bound (AC-3.5 + AC-NEW-7); two sub-cases (no_imu ≤100m, good_imu_combined_factor ≤50m). * AZ-433 NFT-RES-02: companion mid-flight reboot (AC-5.2 + AC-5.3); resume ≤30s + first-emission accuracy ≤100m. * AZ-434 NFT-RES-03: 100-iteration Monte Carlo envelope (AC-NEW-4); iteration-count + master-seed determinism + envelope ratio ≥0.95. Canonical-param by default; E2E_NFT_RES_03_FULL_MATRIX=1 unlocks matrix. * AZ-435 NFT-RES-04: 35s blackout+spoof escalation ladder (AC-NEW-8); AC-1 (cov-2d→fix-degrade ≤500ms) + AC-2 (failsafe→999+STATUSTEXT ≤500ms) + AC-ORDER (strict ordering). Verdict: PASS_WITH_WARNINGS (0 Critical, 0 High, 0 Medium, 5 Low). F5 documents intentional threshold duplication with blackout_spoof evaluator (prevents contract drift between FT-N-04 and NFT-RES-04). Co-authored-by: Cursor <cursoragent@cursor.com>
202 lines
7.0 KiB
Python
202 lines
7.0 KiB
Python
"""Companion-process reboot recovery evaluator for NFT-RES-02 (AZ-433 / AC-5.2 + AC-5.3).
|
|
|
|
Mid-flight, the runner issues a restart command (``docker compose
|
|
restart gps-denied-onboard`` on Tier-1, ``systemctl restart
|
|
gps-denied-onboard`` on Tier-2). The SUT must:
|
|
|
|
* AC-1 — actually restart within ≤``RESTART_TRIGGER_BUDGET_S`` (5 s).
|
|
* AC-2 — emit its first post-restart outbound estimate within
|
|
≤``RESUME_BUDGET_S`` (30 s) of the restart command.
|
|
* AC-3 — that first post-restart estimate must be within
|
|
≤``ACCURACY_BUDGET_M`` (100 m) of ground truth at that timestamp.
|
|
|
|
This module owns the pure-logic side of those budgets + CSV evidence.
|
|
The scenario test owns the orchestration (issue restart, capture
|
|
timestamps, query GT).
|
|
|
|
Public-boundary discipline: does NOT import any
|
|
``src/gps_denied_onboard`` symbol.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
from .geo import distance_m
|
|
|
|
RESTART_TRIGGER_BUDGET_S = 5.0
|
|
RESUME_BUDGET_S = 30.0
|
|
ACCURACY_BUDGET_M = 100.0
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class GeoFix:
|
|
"""A WGS84 fix at a monotonic-ms timestamp."""
|
|
|
|
monotonic_ms: int
|
|
lat_deg: float
|
|
lon_deg: float
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RestartEvidence:
|
|
"""Captured timestamps + first post-restart fix vs GT.
|
|
|
|
All ``*_monotonic_ms`` fields share the runner's monotonic clock so
|
|
deltas are well-defined. ``first_post_restart_estimate`` and
|
|
``ground_truth_at_first_emission`` are both captured at
|
|
``first_post_restart_emission_monotonic_ms``.
|
|
|
|
``process_restarted_monotonic_ms`` is the wall-clock-ish moment the
|
|
SUT process is observed to have come back up (e.g., first PID write,
|
|
health probe transition). Used for AC-1 only.
|
|
|
|
``first_post_restart_emission_monotonic_ms`` is the moment the runner
|
|
captures the first outbound estimate AFTER the restart command. Used
|
|
for AC-2 + AC-3. May be ``None`` if no emission arrives in the budget
|
|
window — counted as AC-2 + AC-3 failures.
|
|
"""
|
|
|
|
restart_command_monotonic_ms: int
|
|
process_restarted_monotonic_ms: int | None
|
|
first_post_restart_emission_monotonic_ms: int | None
|
|
first_post_restart_estimate: GeoFix | None
|
|
ground_truth_at_first_emission: GeoFix | None
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class CompanionRebootReport:
|
|
"""NFT-RES-02 aggregate verdict for one run."""
|
|
|
|
restart_trigger_latency_s: float | None
|
|
resume_time_s: float | None
|
|
first_emission_accuracy_m: float | None
|
|
restart_trigger_budget_s: float
|
|
resume_budget_s: float
|
|
accuracy_budget_m: float
|
|
|
|
@property
|
|
def passes_restart_trigger(self) -> bool:
|
|
return (
|
|
self.restart_trigger_latency_s is not None
|
|
and self.restart_trigger_latency_s <= self.restart_trigger_budget_s
|
|
)
|
|
|
|
@property
|
|
def passes_resume_time(self) -> bool:
|
|
return (
|
|
self.resume_time_s is not None
|
|
and self.resume_time_s <= self.resume_budget_s
|
|
)
|
|
|
|
@property
|
|
def passes_first_emission_accuracy(self) -> bool:
|
|
return (
|
|
self.first_emission_accuracy_m is not None
|
|
and self.first_emission_accuracy_m <= self.accuracy_budget_m
|
|
)
|
|
|
|
@property
|
|
def passes(self) -> bool:
|
|
return (
|
|
self.passes_restart_trigger
|
|
and self.passes_resume_time
|
|
and self.passes_first_emission_accuracy
|
|
)
|
|
|
|
|
|
def evaluate(
|
|
evidence: RestartEvidence,
|
|
*,
|
|
restart_trigger_budget_s: float = RESTART_TRIGGER_BUDGET_S,
|
|
resume_budget_s: float = RESUME_BUDGET_S,
|
|
accuracy_budget_m: float = ACCURACY_BUDGET_M,
|
|
) -> CompanionRebootReport:
|
|
"""Compute the AC-1 + AC-2 + AC-3 verdict from captured restart evidence."""
|
|
trigger_latency: float | None = None
|
|
if evidence.process_restarted_monotonic_ms is not None:
|
|
delta_ms = (
|
|
evidence.process_restarted_monotonic_ms
|
|
- evidence.restart_command_monotonic_ms
|
|
)
|
|
if delta_ms < 0:
|
|
raise ValueError(
|
|
"process_restarted precedes restart_command — clock-skew bug? "
|
|
f"command={evidence.restart_command_monotonic_ms} "
|
|
f"restarted={evidence.process_restarted_monotonic_ms}"
|
|
)
|
|
trigger_latency = delta_ms / 1000.0
|
|
|
|
resume_time: float | None = None
|
|
if evidence.first_post_restart_emission_monotonic_ms is not None:
|
|
delta_ms = (
|
|
evidence.first_post_restart_emission_monotonic_ms
|
|
- evidence.restart_command_monotonic_ms
|
|
)
|
|
if delta_ms < 0:
|
|
raise ValueError(
|
|
"first_post_restart_emission precedes restart_command — "
|
|
"ordering bug; an emission BEFORE the restart command "
|
|
"cannot be the 'first post-restart' emission"
|
|
)
|
|
resume_time = delta_ms / 1000.0
|
|
|
|
accuracy_m: float | None = None
|
|
if (
|
|
evidence.first_post_restart_estimate is not None
|
|
and evidence.ground_truth_at_first_emission is not None
|
|
):
|
|
accuracy_m = distance_m(
|
|
evidence.first_post_restart_estimate.lat_deg,
|
|
evidence.first_post_restart_estimate.lon_deg,
|
|
evidence.ground_truth_at_first_emission.lat_deg,
|
|
evidence.ground_truth_at_first_emission.lon_deg,
|
|
)
|
|
|
|
return CompanionRebootReport(
|
|
restart_trigger_latency_s=trigger_latency,
|
|
resume_time_s=resume_time,
|
|
first_emission_accuracy_m=accuracy_m,
|
|
restart_trigger_budget_s=restart_trigger_budget_s,
|
|
resume_budget_s=resume_budget_s,
|
|
accuracy_budget_m=accuracy_budget_m,
|
|
)
|
|
|
|
|
|
def write_csv_evidence(out_path: Path, report: CompanionRebootReport) -> Path:
|
|
"""Aggregate-summary CSV (one row per run)."""
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with out_path.open("w", newline="") as fh:
|
|
writer = csv.writer(fh)
|
|
writer.writerow(
|
|
[
|
|
"restart_trigger_latency_s",
|
|
"restart_trigger_budget_s",
|
|
"ac1_passes",
|
|
"resume_time_s",
|
|
"resume_budget_s",
|
|
"ac2_passes",
|
|
"first_emission_accuracy_m",
|
|
"accuracy_budget_m",
|
|
"ac3_passes",
|
|
"passes",
|
|
]
|
|
)
|
|
writer.writerow(
|
|
[
|
|
"" if report.restart_trigger_latency_s is None else f"{report.restart_trigger_latency_s:.3f}",
|
|
f"{report.restart_trigger_budget_s:.3f}",
|
|
"true" if report.passes_restart_trigger else "false",
|
|
"" if report.resume_time_s is None else f"{report.resume_time_s:.3f}",
|
|
f"{report.resume_budget_s:.3f}",
|
|
"true" if report.passes_resume_time else "false",
|
|
"" if report.first_emission_accuracy_m is None else f"{report.first_emission_accuracy_m:.3f}",
|
|
f"{report.accuracy_budget_m:.3f}",
|
|
"true" if report.passes_first_emission_accuracy else "false",
|
|
"true" if report.passes else "false",
|
|
]
|
|
)
|
|
return out_path
|