mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 09:01:14 +00:00
[AZ-432] [AZ-433] [AZ-434] [AZ-435] Add NFT-RES-01..04 resilience scenarios
Batch 86: 4 NFT-RES blackbox scenarios + 4 helper evaluators + 74 unit tests + directory-layout registration. * AZ-432 NFT-RES-01: 30 s IMU-only fallback drift bound (AC-3.5 + AC-NEW-7); two sub-cases (no_imu ≤100m, good_imu_combined_factor ≤50m). * AZ-433 NFT-RES-02: companion mid-flight reboot (AC-5.2 + AC-5.3); resume ≤30s + first-emission accuracy ≤100m. * AZ-434 NFT-RES-03: 100-iteration Monte Carlo envelope (AC-NEW-4); iteration-count + master-seed determinism + envelope ratio ≥0.95. Canonical-param by default; E2E_NFT_RES_03_FULL_MATRIX=1 unlocks matrix. * AZ-435 NFT-RES-04: 35s blackout+spoof escalation ladder (AC-NEW-8); AC-1 (cov-2d→fix-degrade ≤500ms) + AC-2 (failsafe→999+STATUSTEXT ≤500ms) + AC-ORDER (strict ordering). Verdict: PASS_WITH_WARNINGS (0 Critical, 0 High, 0 Medium, 5 Low). F5 documents intentional threshold duplication with blackout_spoof evaluator (prevents contract drift between FT-N-04 and NFT-RES-04). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,201 @@
|
||||
"""Companion-process reboot recovery evaluator for NFT-RES-02 (AZ-433 / AC-5.2 + AC-5.3).
|
||||
|
||||
Mid-flight, the runner issues a restart command (``docker compose
|
||||
restart gps-denied-onboard`` on Tier-1, ``systemctl restart
|
||||
gps-denied-onboard`` on Tier-2). The SUT must:
|
||||
|
||||
* AC-1 — actually restart within ≤``RESTART_TRIGGER_BUDGET_S`` (5 s).
|
||||
* AC-2 — emit its first post-restart outbound estimate within
|
||||
≤``RESUME_BUDGET_S`` (30 s) of the restart command.
|
||||
* AC-3 — that first post-restart estimate must be within
|
||||
≤``ACCURACY_BUDGET_M`` (100 m) of ground truth at that timestamp.
|
||||
|
||||
This module owns the pure-logic side of those budgets + CSV evidence.
|
||||
The scenario test owns the orchestration (issue restart, capture
|
||||
timestamps, query GT).
|
||||
|
||||
Public-boundary discipline: does NOT import any
|
||||
``src/gps_denied_onboard`` symbol.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from .geo import distance_m
|
||||
|
||||
RESTART_TRIGGER_BUDGET_S = 5.0
|
||||
RESUME_BUDGET_S = 30.0
|
||||
ACCURACY_BUDGET_M = 100.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GeoFix:
|
||||
"""A WGS84 fix at a monotonic-ms timestamp."""
|
||||
|
||||
monotonic_ms: int
|
||||
lat_deg: float
|
||||
lon_deg: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RestartEvidence:
|
||||
"""Captured timestamps + first post-restart fix vs GT.
|
||||
|
||||
All ``*_monotonic_ms`` fields share the runner's monotonic clock so
|
||||
deltas are well-defined. ``first_post_restart_estimate`` and
|
||||
``ground_truth_at_first_emission`` are both captured at
|
||||
``first_post_restart_emission_monotonic_ms``.
|
||||
|
||||
``process_restarted_monotonic_ms`` is the wall-clock-ish moment the
|
||||
SUT process is observed to have come back up (e.g., first PID write,
|
||||
health probe transition). Used for AC-1 only.
|
||||
|
||||
``first_post_restart_emission_monotonic_ms`` is the moment the runner
|
||||
captures the first outbound estimate AFTER the restart command. Used
|
||||
for AC-2 + AC-3. May be ``None`` if no emission arrives in the budget
|
||||
window — counted as AC-2 + AC-3 failures.
|
||||
"""
|
||||
|
||||
restart_command_monotonic_ms: int
|
||||
process_restarted_monotonic_ms: int | None
|
||||
first_post_restart_emission_monotonic_ms: int | None
|
||||
first_post_restart_estimate: GeoFix | None
|
||||
ground_truth_at_first_emission: GeoFix | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CompanionRebootReport:
|
||||
"""NFT-RES-02 aggregate verdict for one run."""
|
||||
|
||||
restart_trigger_latency_s: float | None
|
||||
resume_time_s: float | None
|
||||
first_emission_accuracy_m: float | None
|
||||
restart_trigger_budget_s: float
|
||||
resume_budget_s: float
|
||||
accuracy_budget_m: float
|
||||
|
||||
@property
|
||||
def passes_restart_trigger(self) -> bool:
|
||||
return (
|
||||
self.restart_trigger_latency_s is not None
|
||||
and self.restart_trigger_latency_s <= self.restart_trigger_budget_s
|
||||
)
|
||||
|
||||
@property
|
||||
def passes_resume_time(self) -> bool:
|
||||
return (
|
||||
self.resume_time_s is not None
|
||||
and self.resume_time_s <= self.resume_budget_s
|
||||
)
|
||||
|
||||
@property
|
||||
def passes_first_emission_accuracy(self) -> bool:
|
||||
return (
|
||||
self.first_emission_accuracy_m is not None
|
||||
and self.first_emission_accuracy_m <= self.accuracy_budget_m
|
||||
)
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return (
|
||||
self.passes_restart_trigger
|
||||
and self.passes_resume_time
|
||||
and self.passes_first_emission_accuracy
|
||||
)
|
||||
|
||||
|
||||
def evaluate(
|
||||
evidence: RestartEvidence,
|
||||
*,
|
||||
restart_trigger_budget_s: float = RESTART_TRIGGER_BUDGET_S,
|
||||
resume_budget_s: float = RESUME_BUDGET_S,
|
||||
accuracy_budget_m: float = ACCURACY_BUDGET_M,
|
||||
) -> CompanionRebootReport:
|
||||
"""Compute the AC-1 + AC-2 + AC-3 verdict from captured restart evidence."""
|
||||
trigger_latency: float | None = None
|
||||
if evidence.process_restarted_monotonic_ms is not None:
|
||||
delta_ms = (
|
||||
evidence.process_restarted_monotonic_ms
|
||||
- evidence.restart_command_monotonic_ms
|
||||
)
|
||||
if delta_ms < 0:
|
||||
raise ValueError(
|
||||
"process_restarted precedes restart_command — clock-skew bug? "
|
||||
f"command={evidence.restart_command_monotonic_ms} "
|
||||
f"restarted={evidence.process_restarted_monotonic_ms}"
|
||||
)
|
||||
trigger_latency = delta_ms / 1000.0
|
||||
|
||||
resume_time: float | None = None
|
||||
if evidence.first_post_restart_emission_monotonic_ms is not None:
|
||||
delta_ms = (
|
||||
evidence.first_post_restart_emission_monotonic_ms
|
||||
- evidence.restart_command_monotonic_ms
|
||||
)
|
||||
if delta_ms < 0:
|
||||
raise ValueError(
|
||||
"first_post_restart_emission precedes restart_command — "
|
||||
"ordering bug; an emission BEFORE the restart command "
|
||||
"cannot be the 'first post-restart' emission"
|
||||
)
|
||||
resume_time = delta_ms / 1000.0
|
||||
|
||||
accuracy_m: float | None = None
|
||||
if (
|
||||
evidence.first_post_restart_estimate is not None
|
||||
and evidence.ground_truth_at_first_emission is not None
|
||||
):
|
||||
accuracy_m = distance_m(
|
||||
evidence.first_post_restart_estimate.lat_deg,
|
||||
evidence.first_post_restart_estimate.lon_deg,
|
||||
evidence.ground_truth_at_first_emission.lat_deg,
|
||||
evidence.ground_truth_at_first_emission.lon_deg,
|
||||
)
|
||||
|
||||
return CompanionRebootReport(
|
||||
restart_trigger_latency_s=trigger_latency,
|
||||
resume_time_s=resume_time,
|
||||
first_emission_accuracy_m=accuracy_m,
|
||||
restart_trigger_budget_s=restart_trigger_budget_s,
|
||||
resume_budget_s=resume_budget_s,
|
||||
accuracy_budget_m=accuracy_budget_m,
|
||||
)
|
||||
|
||||
|
||||
def write_csv_evidence(out_path: Path, report: CompanionRebootReport) -> Path:
|
||||
"""Aggregate-summary CSV (one row per run)."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"restart_trigger_latency_s",
|
||||
"restart_trigger_budget_s",
|
||||
"ac1_passes",
|
||||
"resume_time_s",
|
||||
"resume_budget_s",
|
||||
"ac2_passes",
|
||||
"first_emission_accuracy_m",
|
||||
"accuracy_budget_m",
|
||||
"ac3_passes",
|
||||
"passes",
|
||||
]
|
||||
)
|
||||
writer.writerow(
|
||||
[
|
||||
"" if report.restart_trigger_latency_s is None else f"{report.restart_trigger_latency_s:.3f}",
|
||||
f"{report.restart_trigger_budget_s:.3f}",
|
||||
"true" if report.passes_restart_trigger else "false",
|
||||
"" if report.resume_time_s is None else f"{report.resume_time_s:.3f}",
|
||||
f"{report.resume_budget_s:.3f}",
|
||||
"true" if report.passes_resume_time else "false",
|
||||
"" if report.first_emission_accuracy_m is None else f"{report.first_emission_accuracy_m:.3f}",
|
||||
f"{report.accuracy_budget_m:.3f}",
|
||||
"true" if report.passes_first_emission_accuracy else "false",
|
||||
"true" if report.passes else "false",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
@@ -0,0 +1,324 @@
|
||||
"""Escalation-ladder evaluator for NFT-RES-04 (AZ-435 / AC-NEW-8 escalation order).
|
||||
|
||||
FT-N-04 already proves the 35 s blackout-with-spoof window's per-AC
|
||||
thresholds (see ``blackout_spoof_evaluator``). NFT-RES-04 is the
|
||||
*resilience-tier* scenario: it asserts the **full ladder fires in
|
||||
observable order** within tight latency budgets:
|
||||
|
||||
* AC-1 — when the SUT's reported 95 % covariance crosses
|
||||
``COV_2D_THRESHOLD_M`` (100 m), MAVLink fix-quality degrades to
|
||||
≤``FIX_TYPE_2D`` within ≤``ESCALATION_LATENCY_MS`` (500 ms) of the
|
||||
crossing.
|
||||
* AC-2 — when covariance crosses ``COV_FAILSAFE_THRESHOLD_M`` (500 m)
|
||||
OR blackout duration exceeds ``DURATION_FAILSAFE_S`` (30 s), the
|
||||
outbound ``horiz_accuracy`` becomes ``HORIZ_ACCURACY_FAILSAFE``
|
||||
(999.0) AND a ``VISUAL_BLACKOUT_FAILSAFE`` STATUSTEXT is emitted
|
||||
within ≤``ESCALATION_LATENCY_MS`` of the trigger.
|
||||
* AC-ORDER — the AC-1 fix-degrade crossing must precede the AC-2
|
||||
failsafe trigger in observed time. A later-than-AC-2 cov-2d crossing
|
||||
is a strict-monotonicity bug because the SUT cannot un-cross 100 m
|
||||
on its way past 500 m.
|
||||
|
||||
This evaluator deliberately re-defines the thresholds locally rather
|
||||
than importing them from ``blackout_spoof_evaluator`` so a future
|
||||
contract drift in either evaluator does not silently propagate.
|
||||
|
||||
Public-boundary discipline: does NOT import any
|
||||
``src/gps_denied_onboard`` symbol.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
COV_2D_THRESHOLD_M = 100.0
|
||||
COV_FAILSAFE_THRESHOLD_M = 500.0
|
||||
DURATION_FAILSAFE_S = 30.0
|
||||
FIX_TYPE_2D = 2 # MAVLink GPS_FIX_TYPE_2D
|
||||
HORIZ_ACCURACY_FAILSAFE = 999.0
|
||||
STATUSTEXT_FAILSAFE = "VISUAL_BLACKOUT_FAILSAFE"
|
||||
ESCALATION_LATENCY_MS = 500
|
||||
EXPECTED_WINDOW_S = 35.0
|
||||
WINDOW_TOLERANCE_S = 2.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BlackoutWindow:
|
||||
"""The injector-emitted window the ladder is evaluated over."""
|
||||
|
||||
onset_monotonic_ms: int
|
||||
end_monotonic_ms: int
|
||||
|
||||
@property
|
||||
def duration_s(self) -> float:
|
||||
return (self.end_monotonic_ms - self.onset_monotonic_ms) / 1000.0
|
||||
|
||||
@property
|
||||
def is_35s(self) -> bool:
|
||||
return abs(self.duration_s - EXPECTED_WINDOW_S) <= WINDOW_TOLERANCE_S
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EstimateSample:
|
||||
"""One outbound estimate at a monotonic-ms timestamp."""
|
||||
|
||||
monotonic_ms: int
|
||||
cov_semi_major_m: float
|
||||
horiz_accuracy: float
|
||||
fix_type: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StatustextSample:
|
||||
monotonic_ms: int
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FixDegradeReport:
|
||||
"""AC-1: cov-2d crossing → fix-type degrade within ≤500 ms."""
|
||||
|
||||
cov2d_crossed_at_ms: int | None
|
||||
fix_degraded_at_ms: int | None
|
||||
latency_ms: int | None
|
||||
budget_ms: int
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return (
|
||||
self.cov2d_crossed_at_ms is not None
|
||||
and self.fix_degraded_at_ms is not None
|
||||
and self.latency_ms is not None
|
||||
and self.latency_ms <= self.budget_ms
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FailsafeReport:
|
||||
"""AC-2: cov-500 OR 30 s elapsed → 999.0 AND STATUSTEXT within ≤500 ms."""
|
||||
|
||||
failsafe_trigger_at_ms: int | None
|
||||
horiz_999_at_ms: int | None
|
||||
horiz_999_latency_ms: int | None
|
||||
statustext_at_ms: int | None
|
||||
statustext_latency_ms: int | None
|
||||
budget_ms: int
|
||||
|
||||
@property
|
||||
def horiz_999_passes(self) -> bool:
|
||||
return (
|
||||
self.failsafe_trigger_at_ms is not None
|
||||
and self.horiz_999_latency_ms is not None
|
||||
and self.horiz_999_latency_ms <= self.budget_ms
|
||||
)
|
||||
|
||||
@property
|
||||
def statustext_passes(self) -> bool:
|
||||
return (
|
||||
self.failsafe_trigger_at_ms is not None
|
||||
and self.statustext_latency_ms is not None
|
||||
and self.statustext_latency_ms <= self.budget_ms
|
||||
)
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.horiz_999_passes and self.statustext_passes
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OrderingReport:
|
||||
"""AC-ORDER: cov-2d crossing must precede the failsafe trigger."""
|
||||
|
||||
cov2d_at_ms: int | None
|
||||
failsafe_trigger_at_ms: int | None
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
if self.cov2d_at_ms is None or self.failsafe_trigger_at_ms is None:
|
||||
# Cannot verify ordering when either pole is missing —
|
||||
# the per-AC pass/fail covers that case.
|
||||
return False
|
||||
return self.cov2d_at_ms < self.failsafe_trigger_at_ms
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EscalationLadderReport:
|
||||
"""Aggregate NFT-RES-04 verdict for one 35 s window."""
|
||||
|
||||
window: BlackoutWindow
|
||||
fix_degrade: FixDegradeReport
|
||||
failsafe: FailsafeReport
|
||||
ordering: OrderingReport
|
||||
|
||||
@property
|
||||
def passes_window(self) -> bool:
|
||||
return self.window.is_35s
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return (
|
||||
self.passes_window
|
||||
and self.fix_degrade.passes
|
||||
and self.failsafe.passes
|
||||
and self.ordering.passes
|
||||
)
|
||||
|
||||
|
||||
def _samples_in_window(
|
||||
window: BlackoutWindow, samples: Sequence[EstimateSample]
|
||||
) -> list[EstimateSample]:
|
||||
return [
|
||||
s
|
||||
for s in samples
|
||||
if window.onset_monotonic_ms <= s.monotonic_ms <= window.end_monotonic_ms
|
||||
]
|
||||
|
||||
|
||||
def _first_cov_crossing(
|
||||
window: BlackoutWindow,
|
||||
samples: Sequence[EstimateSample],
|
||||
threshold_m: float,
|
||||
) -> int | None:
|
||||
for s in _samples_in_window(window, samples):
|
||||
if s.cov_semi_major_m >= threshold_m:
|
||||
return s.monotonic_ms
|
||||
return None
|
||||
|
||||
|
||||
def _first_fix_degrade(
|
||||
samples: Sequence[EstimateSample], from_ms: int
|
||||
) -> int | None:
|
||||
for s in samples:
|
||||
if s.monotonic_ms < from_ms:
|
||||
continue
|
||||
if s.fix_type <= FIX_TYPE_2D and s.fix_type >= 0:
|
||||
return s.monotonic_ms
|
||||
return None
|
||||
|
||||
|
||||
def _first_horiz_999(samples: Sequence[EstimateSample], from_ms: int) -> int | None:
|
||||
for s in samples:
|
||||
if s.monotonic_ms < from_ms:
|
||||
continue
|
||||
if s.horiz_accuracy == HORIZ_ACCURACY_FAILSAFE:
|
||||
return s.monotonic_ms
|
||||
return None
|
||||
|
||||
|
||||
def _first_failsafe_statustext(
|
||||
statustexts: Sequence[StatustextSample], from_ms: int
|
||||
) -> int | None:
|
||||
for st in statustexts:
|
||||
if st.monotonic_ms < from_ms:
|
||||
continue
|
||||
if STATUSTEXT_FAILSAFE in st.text:
|
||||
return st.monotonic_ms
|
||||
return None
|
||||
|
||||
|
||||
def evaluate(
|
||||
window: BlackoutWindow,
|
||||
*,
|
||||
estimates: Sequence[EstimateSample],
|
||||
statustexts: Sequence[StatustextSample],
|
||||
budget_ms: int = ESCALATION_LATENCY_MS,
|
||||
) -> EscalationLadderReport:
|
||||
"""Compute AC-1 + AC-2 + AC-ORDER verdicts for one 35 s window."""
|
||||
cov2d_at = _first_cov_crossing(window, estimates, COV_2D_THRESHOLD_M)
|
||||
fix_degraded_at = (
|
||||
_first_fix_degrade(estimates, cov2d_at) if cov2d_at is not None else None
|
||||
)
|
||||
fix_latency: int | None = None
|
||||
if cov2d_at is not None and fix_degraded_at is not None:
|
||||
fix_latency = fix_degraded_at - cov2d_at
|
||||
fix_report = FixDegradeReport(
|
||||
cov2d_crossed_at_ms=cov2d_at,
|
||||
fix_degraded_at_ms=fix_degraded_at,
|
||||
latency_ms=fix_latency,
|
||||
budget_ms=budget_ms,
|
||||
)
|
||||
|
||||
cov500_at = _first_cov_crossing(window, estimates, COV_FAILSAFE_THRESHOLD_M)
|
||||
duration_trip_at: int | None = None
|
||||
if window.duration_s >= DURATION_FAILSAFE_S:
|
||||
duration_trip_at = (
|
||||
window.onset_monotonic_ms + int(DURATION_FAILSAFE_S * 1000)
|
||||
)
|
||||
failsafe_trigger_at: int | None
|
||||
candidates = [t for t in (cov500_at, duration_trip_at) if t is not None]
|
||||
failsafe_trigger_at = min(candidates) if candidates else None
|
||||
|
||||
horiz_999_at: int | None = None
|
||||
horiz_latency: int | None = None
|
||||
statustext_at: int | None = None
|
||||
statustext_latency: int | None = None
|
||||
if failsafe_trigger_at is not None:
|
||||
horiz_999_at = _first_horiz_999(estimates, failsafe_trigger_at)
|
||||
if horiz_999_at is not None:
|
||||
horiz_latency = horiz_999_at - failsafe_trigger_at
|
||||
statustext_at = _first_failsafe_statustext(statustexts, failsafe_trigger_at)
|
||||
if statustext_at is not None:
|
||||
statustext_latency = statustext_at - failsafe_trigger_at
|
||||
|
||||
failsafe_report = FailsafeReport(
|
||||
failsafe_trigger_at_ms=failsafe_trigger_at,
|
||||
horiz_999_at_ms=horiz_999_at,
|
||||
horiz_999_latency_ms=horiz_latency,
|
||||
statustext_at_ms=statustext_at,
|
||||
statustext_latency_ms=statustext_latency,
|
||||
budget_ms=budget_ms,
|
||||
)
|
||||
|
||||
ordering = OrderingReport(
|
||||
cov2d_at_ms=cov2d_at, failsafe_trigger_at_ms=failsafe_trigger_at
|
||||
)
|
||||
|
||||
return EscalationLadderReport(
|
||||
window=window,
|
||||
fix_degrade=fix_report,
|
||||
failsafe=failsafe_report,
|
||||
ordering=ordering,
|
||||
)
|
||||
|
||||
|
||||
def write_csv_evidence(out_path: Path, report: EscalationLadderReport) -> Path:
|
||||
"""Aggregate-summary CSV (one row per window)."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"window_duration_s",
|
||||
"window_is_35s",
|
||||
"ac1_cov2d_at_ms",
|
||||
"ac1_fix_degraded_at_ms",
|
||||
"ac1_latency_ms",
|
||||
"ac1_passes",
|
||||
"ac2_failsafe_trigger_at_ms",
|
||||
"ac2_horiz_999_latency_ms",
|
||||
"ac2_statustext_latency_ms",
|
||||
"ac2_passes",
|
||||
"ac_order_passes",
|
||||
"passes",
|
||||
]
|
||||
)
|
||||
writer.writerow(
|
||||
[
|
||||
f"{report.window.duration_s:.3f}",
|
||||
"true" if report.window.is_35s else "false",
|
||||
"" if report.fix_degrade.cov2d_crossed_at_ms is None else report.fix_degrade.cov2d_crossed_at_ms,
|
||||
"" if report.fix_degrade.fix_degraded_at_ms is None else report.fix_degrade.fix_degraded_at_ms,
|
||||
"" if report.fix_degrade.latency_ms is None else report.fix_degrade.latency_ms,
|
||||
"true" if report.fix_degrade.passes else "false",
|
||||
"" if report.failsafe.failsafe_trigger_at_ms is None else report.failsafe.failsafe_trigger_at_ms,
|
||||
"" if report.failsafe.horiz_999_latency_ms is None else report.failsafe.horiz_999_latency_ms,
|
||||
"" if report.failsafe.statustext_latency_ms is None else report.failsafe.statustext_latency_ms,
|
||||
"true" if report.failsafe.passes else "false",
|
||||
"true" if report.ordering.passes else "false",
|
||||
"true" if report.passes else "false",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
@@ -0,0 +1,227 @@
|
||||
"""IMU-only fallback drift evaluator for NFT-RES-01 (AZ-432 / AC-3.5 + AC-NEW-7).
|
||||
|
||||
A pure-vision-blackout (no spoof) lasting 30 s is injected by
|
||||
``fixtures/injectors/blackout_spoof.py --no-spoof``. The SUT must
|
||||
fall back to IMU-only dead reckoning. AC-3.5 + AC-NEW-7 prescribe two
|
||||
drift budgets at the end of the blackout, depending on whether the
|
||||
CombinedImuFactor (PreintegratedCombinedMeasurements) is active:
|
||||
|
||||
* sub-case (a) — no good IMU → ``drift ≤ NO_IMU_BUDGET_M`` (100 m).
|
||||
* sub-case (b) — CombinedImuFactor active (SUT default config) →
|
||||
``drift ≤ GOOD_IMU_BUDGET_M`` (50 m).
|
||||
|
||||
Drift is the Vincenty distance between the SUT's last estimate at
|
||||
blackout end and the ground-truth position at the same timestamp.
|
||||
|
||||
The scenario test owns the orchestration (window injection,
|
||||
sub-case selection, fixture loading). This module owns the pure
|
||||
arithmetic + CSV evidence.
|
||||
|
||||
Public-boundary discipline: does NOT import any
|
||||
``src/gps_denied_onboard`` symbol; consumes only typed samples that
|
||||
the scenario adapter projects out of the boundary observers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Sequence
|
||||
|
||||
from .geo import distance_m
|
||||
|
||||
# AC-2 / AC-3
|
||||
NO_IMU_BUDGET_M = 100.0
|
||||
GOOD_IMU_BUDGET_M = 50.0
|
||||
# AC-1 — accept windows within ±2 s of the nominal 30 s.
|
||||
WINDOW_NOMINAL_S = 30.0
|
||||
WINDOW_TOLERANCE_S = 2.0
|
||||
|
||||
SUBCASE_NO_IMU = "no_imu"
|
||||
SUBCASE_GOOD_IMU = "good_imu_combined_factor"
|
||||
ALLOWED_SUBCASES = (SUBCASE_NO_IMU, SUBCASE_GOOD_IMU)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PositionSample:
|
||||
"""One WGS84 sample tagged with a monotonic-ms timestamp.
|
||||
|
||||
Used for both the SUT's outbound estimate stream and the ground-truth
|
||||
track. Both streams must share the same monotonic clock so the
|
||||
scenario can pick the "at blackout end" sample by interpolation /
|
||||
nearest-neighbour lookup.
|
||||
"""
|
||||
|
||||
monotonic_ms: int
|
||||
lat_deg: float
|
||||
lon_deg: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BlackoutWindow:
|
||||
"""The injector-emitted window the evaluator is bound to."""
|
||||
|
||||
onset_monotonic_ms: int
|
||||
end_monotonic_ms: int
|
||||
|
||||
@property
|
||||
def duration_s(self) -> float:
|
||||
return (self.end_monotonic_ms - self.onset_monotonic_ms) / 1000.0
|
||||
|
||||
@property
|
||||
def window_in_spec(self) -> bool:
|
||||
"""AC-1: window duration must be within ±2 s of nominal 30 s."""
|
||||
return abs(self.duration_s - WINDOW_NOMINAL_S) <= WINDOW_TOLERANCE_S
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SubCaseReport:
|
||||
"""Drift result for a single sub-case (no_imu / good_imu)."""
|
||||
|
||||
subcase: str
|
||||
drift_m: float | None
|
||||
budget_m: float
|
||||
estimate_at_end: PositionSample | None
|
||||
gt_at_end: PositionSample | None
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.drift_m is not None and self.drift_m <= self.budget_m
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ImuFallbackReport:
|
||||
"""Aggregate NFT-RES-01 result for one parameterization."""
|
||||
|
||||
window: BlackoutWindow
|
||||
sub_cases: tuple[SubCaseReport, ...]
|
||||
|
||||
@property
|
||||
def passes_window(self) -> bool:
|
||||
return self.window.window_in_spec
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.passes_window and all(s.passes for s in self.sub_cases)
|
||||
|
||||
def by_subcase(self, subcase: str) -> SubCaseReport:
|
||||
for s in self.sub_cases:
|
||||
if s.subcase == subcase:
|
||||
return s
|
||||
raise KeyError(f"sub-case {subcase!r} not present in report")
|
||||
|
||||
|
||||
def _pick_at_or_before(
|
||||
samples: Sequence[PositionSample], t_ms: int
|
||||
) -> PositionSample | None:
|
||||
"""Return the latest sample with ``monotonic_ms ≤ t_ms`` (None if none qualify).
|
||||
|
||||
Tests against the closest sample on the "left" of the boundary —
|
||||
drift evaluation must NOT extrapolate past the captured window.
|
||||
"""
|
||||
chosen: PositionSample | None = None
|
||||
for s in samples:
|
||||
if s.monotonic_ms <= t_ms:
|
||||
if chosen is None or s.monotonic_ms > chosen.monotonic_ms:
|
||||
chosen = s
|
||||
return chosen
|
||||
|
||||
|
||||
def evaluate_subcase(
|
||||
window: BlackoutWindow,
|
||||
estimates: Sequence[PositionSample],
|
||||
ground_truth: Sequence[PositionSample],
|
||||
*,
|
||||
subcase: str,
|
||||
budget_m: float | None = None,
|
||||
) -> SubCaseReport:
|
||||
"""Compute drift for one sub-case.
|
||||
|
||||
`subcase` selects the budget when `budget_m` is omitted: 100 m for
|
||||
``no_imu``, 50 m for ``good_imu_combined_factor``. Unknown sub-case
|
||||
names raise ``ValueError`` so a typo at the call site fails loud
|
||||
instead of silently relaxing the budget.
|
||||
"""
|
||||
if subcase not in ALLOWED_SUBCASES:
|
||||
raise ValueError(
|
||||
f"subcase must be one of {ALLOWED_SUBCASES}; got {subcase!r}"
|
||||
)
|
||||
if budget_m is None:
|
||||
budget_m = (
|
||||
NO_IMU_BUDGET_M if subcase == SUBCASE_NO_IMU else GOOD_IMU_BUDGET_M
|
||||
)
|
||||
estimate_end = _pick_at_or_before(estimates, window.end_monotonic_ms)
|
||||
gt_end = _pick_at_or_before(ground_truth, window.end_monotonic_ms)
|
||||
drift: float | None
|
||||
if estimate_end is None or gt_end is None:
|
||||
drift = None
|
||||
else:
|
||||
drift = distance_m(
|
||||
estimate_end.lat_deg,
|
||||
estimate_end.lon_deg,
|
||||
gt_end.lat_deg,
|
||||
gt_end.lon_deg,
|
||||
)
|
||||
return SubCaseReport(
|
||||
subcase=subcase,
|
||||
drift_m=drift,
|
||||
budget_m=budget_m,
|
||||
estimate_at_end=estimate_end,
|
||||
gt_at_end=gt_end,
|
||||
)
|
||||
|
||||
|
||||
def evaluate(
|
||||
window: BlackoutWindow,
|
||||
*,
|
||||
sub_cases: Iterable[tuple[str, Sequence[PositionSample], Sequence[PositionSample]]],
|
||||
) -> ImuFallbackReport:
|
||||
"""Compute the aggregate report across multiple sub-cases.
|
||||
|
||||
Each tuple is ``(subcase_name, estimates, ground_truth)``. The
|
||||
evaluator does not require both sub-cases to be present — a scenario
|
||||
that can only exercise one path still gets a partial report whose
|
||||
``passes`` is False (because the missing sub-case has no drift).
|
||||
"""
|
||||
reports: list[SubCaseReport] = []
|
||||
for subcase, estimates, ground_truth in sub_cases:
|
||||
reports.append(evaluate_subcase(window, estimates, ground_truth, subcase=subcase))
|
||||
return ImuFallbackReport(window=window, sub_cases=tuple(reports))
|
||||
|
||||
|
||||
def write_csv_evidence(out_path: Path, report: ImuFallbackReport) -> Path:
|
||||
"""Aggregate-summary CSV (one row per sub-case)."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"subcase",
|
||||
"window_duration_s",
|
||||
"window_in_spec",
|
||||
"drift_m",
|
||||
"budget_m",
|
||||
"estimate_end_lat",
|
||||
"estimate_end_lon",
|
||||
"gt_end_lat",
|
||||
"gt_end_lon",
|
||||
"passes",
|
||||
]
|
||||
)
|
||||
for sub in report.sub_cases:
|
||||
writer.writerow(
|
||||
[
|
||||
sub.subcase,
|
||||
f"{report.window.duration_s:.3f}",
|
||||
"true" if report.passes_window else "false",
|
||||
"" if sub.drift_m is None else f"{sub.drift_m:.3f}",
|
||||
f"{sub.budget_m:.3f}",
|
||||
"" if sub.estimate_at_end is None else f"{sub.estimate_at_end.lat_deg:.7f}",
|
||||
"" if sub.estimate_at_end is None else f"{sub.estimate_at_end.lon_deg:.7f}",
|
||||
"" if sub.gt_at_end is None else f"{sub.gt_at_end.lat_deg:.7f}",
|
||||
"" if sub.gt_at_end is None else f"{sub.gt_at_end.lon_deg:.7f}",
|
||||
"true" if sub.passes else "false",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
@@ -0,0 +1,228 @@
|
||||
"""Monte Carlo statistical-envelope evaluator for NFT-RES-03 (AZ-434 / AC-NEW-4).
|
||||
|
||||
The SUT promises an *honest* covariance: across many runs with seeded
|
||||
perturbations (gain noise, IMU bias, frame-drop pattern, outlier
|
||||
injection), the actual error distribution should stay within the
|
||||
``1.96 × cov_semi_major_m`` envelope at the 95th percentile. The
|
||||
runner drives N iterations and feeds this module the per-iteration
|
||||
per-frame ``(error_m, cov_semi_major_m)`` pairs.
|
||||
|
||||
ACs evaluated (per AZ-434):
|
||||
|
||||
* AC-1 — iteration_count == ``MIN_ITERATION_COUNT`` (100). Partial
|
||||
completion is a hard failure; the runner is responsible for
|
||||
re-running missing iterations rather than passing a short list.
|
||||
* AC-2 — determinism check: re-running with the same master_seed
|
||||
produces bit-identical iteration outcomes. This module records the
|
||||
master_seed and a SHA-256 of the per-iteration ``(error_m,
|
||||
cov_semi_major_m)`` tuples; the scenario harness compares the seed
|
||||
+ hash across two runs (the comparison itself is not a method on
|
||||
this evaluator — it's a scenario-level check).
|
||||
* AC-3 — global aggregate envelope: across all 100 × N_frames
|
||||
samples, ``count(error_m ≤ 1.96 × cov_semi_major_m) / total ≥
|
||||
0.95``.
|
||||
|
||||
Public-boundary discipline: does NOT import any
|
||||
``src/gps_denied_onboard`` symbol.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import hashlib
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
MIN_ITERATION_COUNT = 100
|
||||
ENVELOPE_MULTIPLIER = 1.96 # 95th-percentile envelope on a normal cov_semi_major
|
||||
ENVELOPE_RATIO_BUDGET = 0.95
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FrameSample:
|
||||
"""One per-frame ``(error_m, cov_semi_major_m)`` pair.
|
||||
|
||||
``error_m`` is the WGS84 Vincenty distance between the SUT's
|
||||
estimate and ground truth at that frame; ``cov_semi_major_m`` is
|
||||
the SUT's self-reported uncertainty semi-major-axis (m) at the
|
||||
same frame.
|
||||
"""
|
||||
|
||||
error_m: float
|
||||
cov_semi_major_m: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IterationOutcome:
|
||||
"""One Monte Carlo iteration: ordered per-frame samples + iteration seed."""
|
||||
|
||||
iteration_id: str
|
||||
iteration_seed: int
|
||||
samples: tuple[FrameSample, ...]
|
||||
|
||||
@property
|
||||
def frame_count(self) -> int:
|
||||
return len(self.samples)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MonteCarloReport:
|
||||
"""Aggregate NFT-RES-03 result over N iterations."""
|
||||
|
||||
iterations: tuple[IterationOutcome, ...]
|
||||
master_seed: int
|
||||
iteration_count: int
|
||||
total_samples: int
|
||||
covered_samples: int
|
||||
envelope_ratio: float | None
|
||||
min_iteration_count: int
|
||||
envelope_ratio_budget: float
|
||||
|
||||
@property
|
||||
def passes_iteration_count(self) -> bool:
|
||||
return self.iteration_count >= self.min_iteration_count
|
||||
|
||||
@property
|
||||
def passes_envelope(self) -> bool:
|
||||
return (
|
||||
self.envelope_ratio is not None
|
||||
and self.envelope_ratio >= self.envelope_ratio_budget
|
||||
)
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.passes_iteration_count and self.passes_envelope
|
||||
|
||||
|
||||
def iteration_hash(iteration: IterationOutcome) -> str:
|
||||
"""SHA-256 of the iteration's ``(error_m, cov_semi_major_m)`` tuples.
|
||||
|
||||
Used to certify AC-2 determinism — two runs of the same iteration
|
||||
with the same iteration_seed must produce the same hash.
|
||||
"""
|
||||
h = hashlib.sha256()
|
||||
h.update(f"{iteration.iteration_id}\n{iteration.iteration_seed}\n".encode("ascii"))
|
||||
for s in iteration.samples:
|
||||
h.update(f"{s.error_m!r}|{s.cov_semi_major_m!r}\n".encode("ascii"))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def determinism_fingerprint(report: MonteCarloReport) -> str:
|
||||
"""One-shot fingerprint of an entire MC run — for AC-2 cross-run comparison."""
|
||||
h = hashlib.sha256()
|
||||
h.update(f"master_seed={report.master_seed}\n".encode("ascii"))
|
||||
for it in report.iterations:
|
||||
h.update(f"{iteration_hash(it)}\n".encode("ascii"))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def evaluate(
|
||||
iterations: Sequence[IterationOutcome],
|
||||
*,
|
||||
master_seed: int,
|
||||
min_iteration_count: int = MIN_ITERATION_COUNT,
|
||||
envelope_ratio_budget: float = ENVELOPE_RATIO_BUDGET,
|
||||
envelope_multiplier: float = ENVELOPE_MULTIPLIER,
|
||||
) -> MonteCarloReport:
|
||||
"""Compute the AC-1 + AC-3 verdict.
|
||||
|
||||
AC-2 (determinism) is a scenario-level check: the scenario calls
|
||||
this twice with the same master_seed and compares
|
||||
``determinism_fingerprint(report1) == determinism_fingerprint(report2)``.
|
||||
"""
|
||||
total = 0
|
||||
covered = 0
|
||||
for it in iterations:
|
||||
for s in it.samples:
|
||||
total += 1
|
||||
if s.error_m <= envelope_multiplier * s.cov_semi_major_m:
|
||||
covered += 1
|
||||
ratio: float | None
|
||||
if total == 0:
|
||||
ratio = None
|
||||
else:
|
||||
ratio = covered / total
|
||||
return MonteCarloReport(
|
||||
iterations=tuple(iterations),
|
||||
master_seed=master_seed,
|
||||
iteration_count=len(iterations),
|
||||
total_samples=total,
|
||||
covered_samples=covered,
|
||||
envelope_ratio=ratio,
|
||||
min_iteration_count=min_iteration_count,
|
||||
envelope_ratio_budget=envelope_ratio_budget,
|
||||
)
|
||||
|
||||
|
||||
def write_csv_evidence(out_path: Path, report: MonteCarloReport) -> Path:
|
||||
"""Aggregate-summary CSV (one row per run)."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"master_seed",
|
||||
"iteration_count",
|
||||
"min_iteration_count",
|
||||
"total_samples",
|
||||
"covered_samples",
|
||||
"envelope_ratio",
|
||||
"envelope_ratio_budget",
|
||||
"ac1_iteration_count_passes",
|
||||
"ac3_envelope_passes",
|
||||
"passes",
|
||||
"fingerprint_sha256",
|
||||
]
|
||||
)
|
||||
writer.writerow(
|
||||
[
|
||||
report.master_seed,
|
||||
report.iteration_count,
|
||||
report.min_iteration_count,
|
||||
report.total_samples,
|
||||
report.covered_samples,
|
||||
"" if report.envelope_ratio is None else f"{report.envelope_ratio:.6f}",
|
||||
f"{report.envelope_ratio_budget:.6f}",
|
||||
"true" if report.passes_iteration_count else "false",
|
||||
"true" if report.passes_envelope else "false",
|
||||
"true" if report.passes else "false",
|
||||
determinism_fingerprint(report),
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
def write_per_iteration_csv(out_path: Path, report: MonteCarloReport) -> Path:
|
||||
"""One row per iteration — used during AC-3 envelope-breach investigation."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"iteration_id",
|
||||
"iteration_seed",
|
||||
"frame_count",
|
||||
"covered_count",
|
||||
"envelope_ratio",
|
||||
"iteration_hash_sha256",
|
||||
]
|
||||
)
|
||||
for it in report.iterations:
|
||||
covered = sum(
|
||||
1
|
||||
for s in it.samples
|
||||
if s.error_m <= ENVELOPE_MULTIPLIER * s.cov_semi_major_m
|
||||
)
|
||||
ratio = (covered / it.frame_count) if it.frame_count else None
|
||||
writer.writerow(
|
||||
[
|
||||
it.iteration_id,
|
||||
it.iteration_seed,
|
||||
it.frame_count,
|
||||
covered,
|
||||
"" if ratio is None else f"{ratio:.6f}",
|
||||
iteration_hash(it),
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
Reference in New Issue
Block a user