[AZ-432] [AZ-433] [AZ-434] [AZ-435] Add NFT-RES-01..04 resilience scenarios

Batch 86: 4 NFT-RES blackbox scenarios + 4 helper evaluators + 74 unit
tests + directory-layout registration.

* AZ-432 NFT-RES-01: 30 s IMU-only fallback drift bound (AC-3.5 + AC-NEW-7);
  two sub-cases (no_imu ≤100m, good_imu_combined_factor ≤50m).
* AZ-433 NFT-RES-02: companion mid-flight reboot (AC-5.2 + AC-5.3); resume
  ≤30s + first-emission accuracy ≤100m.
* AZ-434 NFT-RES-03: 100-iteration Monte Carlo envelope (AC-NEW-4);
  iteration-count + master-seed determinism + envelope ratio ≥0.95.
  Canonical-param by default; E2E_NFT_RES_03_FULL_MATRIX=1 unlocks matrix.
* AZ-435 NFT-RES-04: 35s blackout+spoof escalation ladder (AC-NEW-8);
  AC-1 (cov-2d→fix-degrade ≤500ms) + AC-2 (failsafe→999+STATUSTEXT
  ≤500ms) + AC-ORDER (strict ordering).

Verdict: PASS_WITH_WARNINGS (0 Critical, 0 High, 0 Medium, 5 Low).
F5 documents intentional threshold duplication with blackout_spoof
evaluator (prevents contract drift between FT-N-04 and NFT-RES-04).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 17:09:04 +03:00
parent 23640a784f
commit 330893be5c
15 changed files with 3325 additions and 0 deletions
@@ -0,0 +1,228 @@
"""Monte Carlo statistical-envelope evaluator for NFT-RES-03 (AZ-434 / AC-NEW-4).
The SUT promises an *honest* covariance: across many runs with seeded
perturbations (gain noise, IMU bias, frame-drop pattern, outlier
injection), the actual error distribution should stay within the
``1.96 × cov_semi_major_m`` envelope at the 95th percentile. The
runner drives N iterations and feeds this module the per-iteration
per-frame ``(error_m, cov_semi_major_m)`` pairs.
ACs evaluated (per AZ-434):
* AC-1 — iteration_count == ``MIN_ITERATION_COUNT`` (100). Partial
completion is a hard failure; the runner is responsible for
re-running missing iterations rather than passing a short list.
* AC-2 — determinism check: re-running with the same master_seed
produces bit-identical iteration outcomes. This module records the
master_seed and a SHA-256 of the per-iteration ``(error_m,
cov_semi_major_m)`` tuples; the scenario harness compares the seed
+ hash across two runs (the comparison itself is not a method on
this evaluator — it's a scenario-level check).
* AC-3 — global aggregate envelope: across all 100 × N_frames
samples, ``count(error_m ≤ 1.96 × cov_semi_major_m) / total ≥
0.95``.
Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
import hashlib
from dataclasses import dataclass
from pathlib import Path
from typing import Sequence
MIN_ITERATION_COUNT = 100
ENVELOPE_MULTIPLIER = 1.96 # 95th-percentile envelope on a normal cov_semi_major
ENVELOPE_RATIO_BUDGET = 0.95
@dataclass(frozen=True)
class FrameSample:
"""One per-frame ``(error_m, cov_semi_major_m)`` pair.
``error_m`` is the WGS84 Vincenty distance between the SUT's
estimate and ground truth at that frame; ``cov_semi_major_m`` is
the SUT's self-reported uncertainty semi-major-axis (m) at the
same frame.
"""
error_m: float
cov_semi_major_m: float
@dataclass(frozen=True)
class IterationOutcome:
"""One Monte Carlo iteration: ordered per-frame samples + iteration seed."""
iteration_id: str
iteration_seed: int
samples: tuple[FrameSample, ...]
@property
def frame_count(self) -> int:
return len(self.samples)
@dataclass(frozen=True)
class MonteCarloReport:
"""Aggregate NFT-RES-03 result over N iterations."""
iterations: tuple[IterationOutcome, ...]
master_seed: int
iteration_count: int
total_samples: int
covered_samples: int
envelope_ratio: float | None
min_iteration_count: int
envelope_ratio_budget: float
@property
def passes_iteration_count(self) -> bool:
return self.iteration_count >= self.min_iteration_count
@property
def passes_envelope(self) -> bool:
return (
self.envelope_ratio is not None
and self.envelope_ratio >= self.envelope_ratio_budget
)
@property
def passes(self) -> bool:
return self.passes_iteration_count and self.passes_envelope
def iteration_hash(iteration: IterationOutcome) -> str:
"""SHA-256 of the iteration's ``(error_m, cov_semi_major_m)`` tuples.
Used to certify AC-2 determinism — two runs of the same iteration
with the same iteration_seed must produce the same hash.
"""
h = hashlib.sha256()
h.update(f"{iteration.iteration_id}\n{iteration.iteration_seed}\n".encode("ascii"))
for s in iteration.samples:
h.update(f"{s.error_m!r}|{s.cov_semi_major_m!r}\n".encode("ascii"))
return h.hexdigest()
def determinism_fingerprint(report: MonteCarloReport) -> str:
"""One-shot fingerprint of an entire MC run — for AC-2 cross-run comparison."""
h = hashlib.sha256()
h.update(f"master_seed={report.master_seed}\n".encode("ascii"))
for it in report.iterations:
h.update(f"{iteration_hash(it)}\n".encode("ascii"))
return h.hexdigest()
def evaluate(
iterations: Sequence[IterationOutcome],
*,
master_seed: int,
min_iteration_count: int = MIN_ITERATION_COUNT,
envelope_ratio_budget: float = ENVELOPE_RATIO_BUDGET,
envelope_multiplier: float = ENVELOPE_MULTIPLIER,
) -> MonteCarloReport:
"""Compute the AC-1 + AC-3 verdict.
AC-2 (determinism) is a scenario-level check: the scenario calls
this twice with the same master_seed and compares
``determinism_fingerprint(report1) == determinism_fingerprint(report2)``.
"""
total = 0
covered = 0
for it in iterations:
for s in it.samples:
total += 1
if s.error_m <= envelope_multiplier * s.cov_semi_major_m:
covered += 1
ratio: float | None
if total == 0:
ratio = None
else:
ratio = covered / total
return MonteCarloReport(
iterations=tuple(iterations),
master_seed=master_seed,
iteration_count=len(iterations),
total_samples=total,
covered_samples=covered,
envelope_ratio=ratio,
min_iteration_count=min_iteration_count,
envelope_ratio_budget=envelope_ratio_budget,
)
def write_csv_evidence(out_path: Path, report: MonteCarloReport) -> Path:
"""Aggregate-summary CSV (one row per run)."""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"master_seed",
"iteration_count",
"min_iteration_count",
"total_samples",
"covered_samples",
"envelope_ratio",
"envelope_ratio_budget",
"ac1_iteration_count_passes",
"ac3_envelope_passes",
"passes",
"fingerprint_sha256",
]
)
writer.writerow(
[
report.master_seed,
report.iteration_count,
report.min_iteration_count,
report.total_samples,
report.covered_samples,
"" if report.envelope_ratio is None else f"{report.envelope_ratio:.6f}",
f"{report.envelope_ratio_budget:.6f}",
"true" if report.passes_iteration_count else "false",
"true" if report.passes_envelope else "false",
"true" if report.passes else "false",
determinism_fingerprint(report),
]
)
return out_path
def write_per_iteration_csv(out_path: Path, report: MonteCarloReport) -> Path:
"""One row per iteration — used during AC-3 envelope-breach investigation."""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"iteration_id",
"iteration_seed",
"frame_count",
"covered_count",
"envelope_ratio",
"iteration_hash_sha256",
]
)
for it in report.iterations:
covered = sum(
1
for s in it.samples
if s.error_m <= ENVELOPE_MULTIPLIER * s.cov_semi_major_m
)
ratio = (covered / it.frame_count) if it.frame_count else None
writer.writerow(
[
it.iteration_id,
it.iteration_seed,
it.frame_count,
covered,
"" if ratio is None else f"{ratio:.6f}",
iteration_hash(it),
]
)
return out_path