[AZ-436] [AZ-437] [AZ-438] [AZ-439] Add NFT-SEC-01..05 security scenarios

Batch 87: 6 NFT-SEC blackbox scenarios + 5 helper evaluators + 75 unit
tests + cumulative review batches 85-87.

* AZ-436 NFT-SEC-01: cache-poisoning safety budget (AC-NEW-9); aggregate
  false_trust_count ≤ N×1e-6; zero-tolerance default. Canonical-only by
  default; E2E_NFT_SEC_01_RELEASE_GATE=1 unlocks full matrix.
* AZ-437 NFT-SEC-02 + NFT-SEC-05: shared egress-observation evaluator
  (AC-NEW-10); SEC-02 = 0 packets to non-e2e-net over 5min replay;
  SEC-05 = DNS-blackhole sidecar healthy + lookup fails + UDP-53 silent.
* AZ-438 NFT-SEC-03: AP-only signing rejection (AC-NEW-11); 3 sub-cases
  (unsigned/wrong-key/replayed) each reject ≤500ms + no position drift.
* AZ-439 NFT-SEC-04: probe (always-run) = no-crash + deterministic
  decode outcome; ASan-fuzz (release-gate) = 0 findings ≥4h; AC-3
  corpus floor informational only per spec.

Verdict per-batch: PASS_WITH_WARNINGS (5 Low). Cumulative review for
batches 85-87 (K=3 window) also PASS_WITH_WARNINGS with 5 cross-batch
findings — recommends hygiene PBIs for write_csv_evidence duplication
(13 helpers) and _resolve_fixture_path duplication (13 scenarios), plus
new tickets for AZ-595 fixture builder + DNS-blackhole sidecar service.

Also adds _docs/LESSONS.md documenting the Jira transition-ID lesson
(always call getTransitionsForJiraIssue first, never memorize numeric
IDs across sessions).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 17:33:22 +03:00
parent de19e716d8
commit c56d4584e6
21 changed files with 3510 additions and 0 deletions
+169
View File
@@ -0,0 +1,169 @@
"""AddressSanitizer fuzz evaluator for NFT-SEC-04 (AZ-439 / RESTRICT-CVE-1 release-gate).
Companion to ``cve_probe_evaluator``: while the probe asserts a single
crafted JPEG does not crash the SUT, the fuzz scenario runs the
``build_kind=ASan`` SUT image under random JPEG inputs for ≥4 h and
asserts:
* AC-2: 0 ASan findings (``heap-buffer-overflow``, ``use-after-free``,
``stack-buffer-overflow``, ``heap-use-after-free``, etc.) in the
captured stderr / ASan log;
* AC-3 (informational only — no hard threshold): the harness reached
≥``MIN_CORPUS_COVERAGE`` unique JPEG inputs.
ASan-finding categories follow the canonical sanitizer wording. The
classifier matches a curated, non-exhaustive set; an *unknown* match
is bucketed into ``OTHER_FINDING`` and still fails AC-2. Unknown
findings are surfaced in the CSV evidence so a regression triage knows
to extend the canonical set.
Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
import re
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Sequence
MIN_FUZZ_DURATION_SECONDS = 4 * 3600 # AC-2 — release-gate minimum
MIN_CORPUS_COVERAGE = 1000 # AC-3 — informational only
class AsanFindingCategory(str, Enum):
HEAP_BUFFER_OVERFLOW = "heap-buffer-overflow"
HEAP_USE_AFTER_FREE = "heap-use-after-free"
STACK_BUFFER_OVERFLOW = "stack-buffer-overflow"
STACK_USE_AFTER_RETURN = "stack-use-after-return"
GLOBAL_BUFFER_OVERFLOW = "global-buffer-overflow"
USE_AFTER_FREE = "use-after-free"
DOUBLE_FREE = "double-free"
OTHER_FINDING = "other-finding" # canonical unknown ASan match
# Each entry is (regex, category). Matched in order — first hit wins.
_KNOWN_PATTERNS: tuple[tuple[str, AsanFindingCategory], ...] = (
(r"ERROR: AddressSanitizer:\s*heap-buffer-overflow", AsanFindingCategory.HEAP_BUFFER_OVERFLOW),
(r"ERROR: AddressSanitizer:\s*heap-use-after-free", AsanFindingCategory.HEAP_USE_AFTER_FREE),
(r"ERROR: AddressSanitizer:\s*stack-buffer-overflow", AsanFindingCategory.STACK_BUFFER_OVERFLOW),
(r"ERROR: AddressSanitizer:\s*stack-use-after-return", AsanFindingCategory.STACK_USE_AFTER_RETURN),
(r"ERROR: AddressSanitizer:\s*global-buffer-overflow", AsanFindingCategory.GLOBAL_BUFFER_OVERFLOW),
(r"ERROR: AddressSanitizer:\s*use-after-free", AsanFindingCategory.USE_AFTER_FREE),
(r"ERROR: AddressSanitizer:\s*double-free", AsanFindingCategory.DOUBLE_FREE),
)
_KNOWN_COMPILED = tuple((re.compile(pat), cat) for pat, cat in _KNOWN_PATTERNS)
_ANY_ASAN_RE = re.compile(r"ERROR: AddressSanitizer:")
def classify_asan_line(line: str) -> AsanFindingCategory | None:
"""Classify one stderr line. Returns ``None`` if it's not an ASan finding."""
for regex, category in _KNOWN_COMPILED:
if regex.search(line):
return category
if _ANY_ASAN_RE.search(line):
return AsanFindingCategory.OTHER_FINDING
return None
@dataclass(frozen=True)
class AsanFinding:
"""One classified finding (one line OR one synthesized event)."""
category: AsanFindingCategory
snippet: str # the matched line; truncated to ≤200 chars in evidence
@dataclass(frozen=True)
class AsanFuzzReport:
"""Aggregate verdict for one ≥4 h fuzz run."""
duration_seconds: float
corpus_size: int
findings: Sequence[AsanFinding]
@property
def passes_duration(self) -> bool:
return self.duration_seconds >= MIN_FUZZ_DURATION_SECONDS
@property
def passes_findings(self) -> bool:
return len(self.findings) == 0
@property
def reached_corpus_floor(self) -> bool:
# Informational only — does NOT contribute to ``passes``.
return self.corpus_size >= MIN_CORPUS_COVERAGE
@property
def passes(self) -> bool:
return self.passes_duration and self.passes_findings
def evaluate(
asan_log_lines: Sequence[str],
*,
duration_seconds: float,
corpus_size: int,
) -> AsanFuzzReport:
"""Scan the ASan log, classify findings, and assemble the report."""
findings: list[AsanFinding] = []
for line in asan_log_lines:
category = classify_asan_line(line)
if category is not None:
findings.append(
AsanFinding(
category=category,
snippet=line.strip()[:200],
)
)
return AsanFuzzReport(
duration_seconds=duration_seconds,
corpus_size=corpus_size,
findings=tuple(findings),
)
def write_csv_evidence(out_path: Path, report: AsanFuzzReport) -> Path:
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"duration_seconds",
"passes_duration",
"corpus_size",
"reached_corpus_floor",
"finding_count",
"passes_findings",
"passes",
"finding_breakdown",
]
)
breakdown: dict[str, int] = {}
for f in report.findings:
breakdown[f.category.value] = breakdown.get(f.category.value, 0) + 1
breakdown_str = ";".join(
f"{cat}={count}" for cat, count in sorted(breakdown.items())
)
writer.writerow(
[
f"{report.duration_seconds:.0f}",
"true" if report.passes_duration else "false",
report.corpus_size,
"true" if report.reached_corpus_floor else "false",
len(report.findings),
"true" if report.passes_findings else "false",
"true" if report.passes else "false",
breakdown_str,
]
)
if report.findings:
writer.writerow([])
writer.writerow(["finding_index", "category", "snippet"])
for idx, f in enumerate(report.findings):
writer.writerow([idx, f.category.value, f.snippet])
return out_path
@@ -0,0 +1,238 @@
"""Cache-poisoning safety-budget evaluator for NFT-SEC-01 (AZ-436 / AC-NEW-9).
The contract: across ``N`` synthetic flights — each carrying ``1-5 %`` of
*poisoned* tiles (signing-cert mismatch, freshness violation, or voting
disagreement) — the SUT MUST NOT silently emit a ``satellite_anchored``
estimate that traces back to a poisoned tile (a *false-trust event*).
Aggregate budget (Mode B Fact #103):
total_false_trust_events <= N * 1e-6
At default CI N=1000 the budget is 0.001 expected events; the test
therefore enforces the strict ``count == 0`` zero-tolerance default. The
``release-gate`` N=10000 run keeps the same zero-tolerance default (the
budget allows 0.01 events; one event is already a regression).
Per-flight invariants additionally guarded by this evaluator:
* poison ratio in ``[POISON_RATIO_MIN, POISON_RATIO_MAX]`` (AC-2);
* at least one poisoned tile per **defense layer** in every flight
(AC-2 — each layer must be exercised so a per-layer regression cannot
hide behind layer-confusion);
* per-event ``rejection_reason`` is one of the documented categories
(so an unlabeled false-positive cannot pass as a "rejection" in CSV
evidence).
Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol. The evaluator only consumes
runner-collected counter objects.
"""
from __future__ import annotations
import csv
from dataclasses import dataclass, field
from pathlib import Path
from typing import Sequence
FALSE_TRUST_BUDGET_PER_FLIGHT = 1e-6
POISON_RATIO_MIN = 0.01
POISON_RATIO_MAX = 0.05
DEFENSE_LAYER_SIGNING = "signing_cert_mismatch"
DEFENSE_LAYER_FRESHNESS = "freshness_violation"
DEFENSE_LAYER_VOTING = "voting_disagreement"
DEFENSE_LAYERS: tuple[str, ...] = (
DEFENSE_LAYER_SIGNING,
DEFENSE_LAYER_FRESHNESS,
DEFENSE_LAYER_VOTING,
)
REJECTION_REASONS: frozenset[str] = frozenset(
{
DEFENSE_LAYER_SIGNING,
DEFENSE_LAYER_FRESHNESS,
DEFENSE_LAYER_VOTING,
"freshness_gate_downgrade",
"service_voting_downgrade",
}
)
@dataclass(frozen=True)
class PoisonedTileSpec:
"""One poisoned tile slotted into a flight's tile cache."""
tile_id: str
defense_layer: str # MUST be one of ``DEFENSE_LAYERS``
@dataclass(frozen=True)
class FalseTrustEvent:
"""A frame where the SUT emitted ``satellite_anchored`` traced to a poisoned tile."""
flight_id: str
tile_id: str
monotonic_ms: int
defense_layer: str
@dataclass(frozen=True)
class FlightOutcome:
"""One synthetic-flight result.
``total_tile_count`` is the **cache size for that flight** (used to
compute the poison ratio). ``poisoned_tiles`` is the slate of
crafted tiles injected. ``false_trust_events`` are the runner-observed
frames where the SUT trusted a poisoned tile.
``rejection_reasons`` is a counter of how often each documented
rejection-reason fired, taken from the runner's outbound
``source_label`` capture + FDR signing-rejection events. Used in
evidence only (no AC assertion); a flight with zero rejection
events is suspicious but not necessarily a failure (the SUT may
have downgraded the candidate without naming the cause).
"""
flight_id: str
total_tile_count: int
poisoned_tiles: Sequence[PoisonedTileSpec]
false_trust_events: Sequence[FalseTrustEvent]
rejection_reasons: dict[str, int] = field(default_factory=dict)
@property
def poison_ratio(self) -> float:
if self.total_tile_count <= 0:
return 0.0
return len(self.poisoned_tiles) / self.total_tile_count
@property
def defense_layers_present(self) -> set[str]:
return {p.defense_layer for p in self.poisoned_tiles}
@property
def false_trust_count(self) -> int:
return len(self.false_trust_events)
def has_unknown_rejection_reasons(self) -> bool:
return any(r not in REJECTION_REASONS for r in self.rejection_reasons)
@dataclass(frozen=True)
class CachePoisoningReport:
"""Aggregate verdict over N flights."""
flights: Sequence[FlightOutcome]
flight_count: int
total_false_trust: int
flights_with_bad_poison_ratio: Sequence[str]
flights_missing_defense_layers: Sequence[tuple[str, list[str]]]
flights_with_unknown_rejection_reasons: Sequence[str]
@property
def budget(self) -> float:
return self.flight_count * FALSE_TRUST_BUDGET_PER_FLIGHT
@property
def passes_budget(self) -> bool:
# Zero-tolerance default: the budget at N=1000 is 0.001 expected
# events; one observed event is already a regression even on a
# purely floating-point reading of the budget.
return self.total_false_trust == 0
@property
def passes_ratio(self) -> bool:
return len(self.flights_with_bad_poison_ratio) == 0
@property
def passes_layer_coverage(self) -> bool:
return len(self.flights_missing_defense_layers) == 0
@property
def passes_rejection_reason_vocabulary(self) -> bool:
return len(self.flights_with_unknown_rejection_reasons) == 0
@property
def passes(self) -> bool:
return (
self.passes_budget
and self.passes_ratio
and self.passes_layer_coverage
and self.passes_rejection_reason_vocabulary
)
def evaluate(flights: Sequence[FlightOutcome]) -> CachePoisoningReport:
"""Compute the aggregate AC-1..AC-3 verdict for one Monte Carlo run."""
bad_ratio: list[str] = []
missing_layers: list[tuple[str, list[str]]] = []
unknown_reasons: list[str] = []
total_false_trust = 0
for flight in flights:
total_false_trust += flight.false_trust_count
ratio = flight.poison_ratio
if not (POISON_RATIO_MIN <= ratio <= POISON_RATIO_MAX):
bad_ratio.append(flight.flight_id)
missing = sorted(set(DEFENSE_LAYERS) - flight.defense_layers_present)
if missing:
missing_layers.append((flight.flight_id, missing))
if flight.has_unknown_rejection_reasons():
unknown_reasons.append(flight.flight_id)
return CachePoisoningReport(
flights=tuple(flights),
flight_count=len(flights),
total_false_trust=total_false_trust,
flights_with_bad_poison_ratio=tuple(bad_ratio),
flights_missing_defense_layers=tuple(missing_layers),
flights_with_unknown_rejection_reasons=tuple(unknown_reasons),
)
def write_csv_evidence(out_path: Path, report: CachePoisoningReport) -> Path:
"""Per-flight CSV — one row per flight + an aggregate footer row."""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"flight_id",
"total_tile_count",
"poisoned_tile_count",
"poison_ratio",
"defense_layers_present",
"false_trust_count",
"rejection_reason_breakdown",
]
)
for flight in report.flights:
layers_present = ",".join(sorted(flight.defense_layers_present)) or "<none>"
breakdown = ";".join(
f"{reason}={count}"
for reason, count in sorted(flight.rejection_reasons.items())
)
writer.writerow(
[
flight.flight_id,
flight.total_tile_count,
len(flight.poisoned_tiles),
f"{flight.poison_ratio:.4f}",
layers_present,
flight.false_trust_count,
breakdown,
]
)
writer.writerow([])
writer.writerow(
[
"AGGREGATE",
f"flight_count={report.flight_count}",
f"total_false_trust={report.total_false_trust}",
f"budget={report.budget:g}",
f"passes_budget={'true' if report.passes_budget else 'false'}",
f"passes_ratio={'true' if report.passes_ratio else 'false'}",
f"passes_layer_coverage={'true' if report.passes_layer_coverage else 'false'}",
]
)
return out_path
+143
View File
@@ -0,0 +1,143 @@
"""OpenCV CVE-2025-53644 probe evaluator for NFT-SEC-04 (AZ-439 / RESTRICT-CVE-1).
The probe feeds the canonical ``cve-jpeg-fixture`` (a single crafted
JPEG known to trigger the upstream OpenCV vulnerability) into the SUT's
nav-camera input and asserts:
* AC-1a: the SUT process remains **alive** after the frame (no crash,
no abort, no segfault — the FDR archive contains AT LEAST one record
whose monotonic_ms strictly post-dates the probe);
* AC-1b: the FDR contains a deterministic outcome for the probe frame
— either a ``decode-success`` record (the patched OpenCV decoded
the crafted JPEG) OR a ``frame-decode-error`` record (the SUT
gracefully rejected the malformed payload). A silent drop (no FDR
record at all) is a failure, because it means the SUT swallowed the
frame without honoring its error-reporting contract.
Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol. The evaluator consumes only the
runner-collected ``FdrSurvivalRecord`` summaries the FDR-reader helper
already produces for other scenarios.
"""
from __future__ import annotations
import csv
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Sequence
class ProbeFrameOutcome(str, Enum):
"""The deterministic per-frame outcome the SUT must record."""
DECODE_SUCCESS = "decode-success"
FRAME_DECODE_ERROR = "frame-decode-error"
MISSING = "missing" # silent drop — fails AC-1b
@dataclass(frozen=True)
class FdrSurvivalRecord:
"""One FDR record from the runner-collected archive."""
monotonic_ms: int
kind: str # e.g. "frame-decode-success" or "frame-decode-error"
@dataclass(frozen=True)
class CveProbeReport:
"""Verdict for one ``cve-jpeg-fixture`` injection."""
probe_injected_at_ms: int
last_fdr_record_at_ms: int | None
probe_outcome: ProbeFrameOutcome
@property
def passes_no_crash(self) -> bool:
return (
self.last_fdr_record_at_ms is not None
and self.last_fdr_record_at_ms >= self.probe_injected_at_ms
)
@property
def passes_graceful_outcome(self) -> bool:
return self.probe_outcome in (
ProbeFrameOutcome.DECODE_SUCCESS,
ProbeFrameOutcome.FRAME_DECODE_ERROR,
)
@property
def passes(self) -> bool:
return self.passes_no_crash and self.passes_graceful_outcome
def classify_probe_outcome(
fdr_records: Sequence[FdrSurvivalRecord],
*,
probe_injected_at_ms: int,
tolerance_ms: int = 50,
) -> ProbeFrameOutcome:
"""Pick the FDR record nearest the probe injection and classify it.
A record is considered ``for the probe`` if its monotonic timestamp
lies within ``[probe_injected_at_ms, probe_injected_at_ms + tolerance_ms]``.
If no record falls in that window the outcome is ``MISSING`` —
which fails AC-1b regardless of the no-crash check.
"""
for record in fdr_records:
if record.monotonic_ms < probe_injected_at_ms:
continue
if record.monotonic_ms > probe_injected_at_ms + tolerance_ms:
continue
if "decode-success" in record.kind:
return ProbeFrameOutcome.DECODE_SUCCESS
if "decode-error" in record.kind:
return ProbeFrameOutcome.FRAME_DECODE_ERROR
return ProbeFrameOutcome.MISSING
def evaluate(
fdr_records: Sequence[FdrSurvivalRecord],
*,
probe_injected_at_ms: int,
tolerance_ms: int = 50,
) -> CveProbeReport:
last_record_at = max((r.monotonic_ms for r in fdr_records), default=None)
outcome = classify_probe_outcome(
fdr_records,
probe_injected_at_ms=probe_injected_at_ms,
tolerance_ms=tolerance_ms,
)
return CveProbeReport(
probe_injected_at_ms=probe_injected_at_ms,
last_fdr_record_at_ms=last_record_at,
probe_outcome=outcome,
)
def write_csv_evidence(out_path: Path, report: CveProbeReport) -> Path:
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"probe_injected_at_ms",
"last_fdr_record_at_ms",
"probe_outcome",
"passes_no_crash",
"passes_graceful_outcome",
"passes",
]
)
writer.writerow(
[
report.probe_injected_at_ms,
"" if report.last_fdr_record_at_ms is None else report.last_fdr_record_at_ms,
report.probe_outcome.value,
"true" if report.passes_no_crash else "false",
"true" if report.passes_graceful_outcome else "false",
"true" if report.passes else "false",
]
)
return out_path
+219
View File
@@ -0,0 +1,219 @@
"""Egress-observation evaluator shared by NFT-SEC-02 and NFT-SEC-05 (AZ-437).
Both scenarios verify the same invariant: **no packets leave the e2e
Docker network** from the SUT container. NFT-SEC-02 reads the Docker
network-stats counter over a 5-min Derkachi replay. NFT-SEC-05 runs a
``nslookup`` probe inside the SUT container's network namespace and
checks that (a) the lookup fails and (b) no UDP-53 packets escape the
host's outbound interface during the probe.
The observation pattern is identical in both cases: take a *before*
counter snapshot, run the workload, take an *after* snapshot, assert
``after - before == 0`` for the relevant counter family. The runner is
responsible for the actual ``docker network inspect`` / ``ip -s link``
collection; this helper only performs the delta + verdict logic so the
scenario code stays tight and the verdict logic is unit-testable in
isolation.
DNS-resolution outcome categories follow the spec's wording (NXDOMAIN,
timeout, "no servers can be reached") + a generic "other failure" bucket
for resolver implementations that emit a different string but still
fail. A *success* outcome — i.e. an actual A record returned — is the
only failing case.
Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
class DnsLookupOutcome(str, Enum):
"""The runner-classified outcome of a ``nslookup`` probe."""
NXDOMAIN = "nxdomain"
TIMEOUT = "timeout"
NO_SERVERS = "no_servers_can_be_reached"
OTHER_FAILURE = "other_failure"
SUCCESS = "success" # the only outcome that fails the AC
FAILING_DNS_OUTCOMES: frozenset[DnsLookupOutcome] = frozenset(
{
DnsLookupOutcome.NXDOMAIN,
DnsLookupOutcome.TIMEOUT,
DnsLookupOutcome.NO_SERVERS,
DnsLookupOutcome.OTHER_FAILURE,
}
)
@dataclass(frozen=True)
class EgressCounterSnapshot:
"""One snapshot of egress-byte / packet counters on the SUT-facing interface."""
egress_packets_to_internal_net: int
egress_packets_to_other_destinations: int
udp53_egress_packets: int
def __post_init__(self) -> None:
for field_name in (
"egress_packets_to_internal_net",
"egress_packets_to_other_destinations",
"udp53_egress_packets",
):
value = getattr(self, field_name)
if value < 0:
raise ValueError(
f"egress counter {field_name!r} cannot be negative; got {value}"
)
@dataclass(frozen=True)
class NoEgressReport:
"""NFT-SEC-02 verdict — zero packets to non-internal destinations during the window."""
before: EgressCounterSnapshot
after: EgressCounterSnapshot
window_label: str # e.g. "5min-derkachi-replay"
@property
def delta_other_destinations(self) -> int:
return (
self.after.egress_packets_to_other_destinations
- self.before.egress_packets_to_other_destinations
)
@property
def delta_internal(self) -> int:
return (
self.after.egress_packets_to_internal_net
- self.before.egress_packets_to_internal_net
)
@property
def passes(self) -> bool:
return self.delta_other_destinations == 0
@dataclass(frozen=True)
class DnsBlackholeReport:
"""NFT-SEC-05 verdict — lookup fails AND no UDP-53 packets escape."""
before: EgressCounterSnapshot
after: EgressCounterSnapshot
lookup_outcome: DnsLookupOutcome
sidecar_healthy: bool
@property
def delta_udp53(self) -> int:
return self.after.udp53_egress_packets - self.before.udp53_egress_packets
@property
def passes_lookup(self) -> bool:
return self.lookup_outcome in FAILING_DNS_OUTCOMES
@property
def passes_udp_silence(self) -> bool:
return self.delta_udp53 == 0
@property
def passes(self) -> bool:
return (
self.sidecar_healthy and self.passes_lookup and self.passes_udp_silence
)
def evaluate_no_egress(
before: EgressCounterSnapshot,
after: EgressCounterSnapshot,
*,
window_label: str,
) -> NoEgressReport:
"""AC-1 verdict for NFT-SEC-02."""
return NoEgressReport(before=before, after=after, window_label=window_label)
def evaluate_dns_blackhole(
before: EgressCounterSnapshot,
after: EgressCounterSnapshot,
*,
lookup_outcome: DnsLookupOutcome,
sidecar_healthy: bool,
) -> DnsBlackholeReport:
"""AC-2 + AC-3 verdict for NFT-SEC-05."""
return DnsBlackholeReport(
before=before,
after=after,
lookup_outcome=lookup_outcome,
sidecar_healthy=sidecar_healthy,
)
def write_no_egress_csv_evidence(out_path: Path, report: NoEgressReport) -> Path:
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"window_label",
"before_other",
"after_other",
"delta_other",
"before_internal",
"after_internal",
"delta_internal",
"passes",
]
)
writer.writerow(
[
report.window_label,
report.before.egress_packets_to_other_destinations,
report.after.egress_packets_to_other_destinations,
report.delta_other_destinations,
report.before.egress_packets_to_internal_net,
report.after.egress_packets_to_internal_net,
report.delta_internal,
"true" if report.passes else "false",
]
)
return out_path
def write_dns_blackhole_csv_evidence(
out_path: Path, report: DnsBlackholeReport
) -> Path:
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"sidecar_healthy",
"lookup_outcome",
"passes_lookup",
"before_udp53",
"after_udp53",
"delta_udp53",
"passes_udp_silence",
"passes",
]
)
writer.writerow(
[
"true" if report.sidecar_healthy else "false",
report.lookup_outcome.value,
"true" if report.passes_lookup else "false",
report.before.udp53_egress_packets,
report.after.udp53_egress_packets,
report.delta_udp53,
"true" if report.passes_udp_silence else "false",
"true" if report.passes else "false",
]
)
return out_path
@@ -0,0 +1,217 @@
"""MAVLink 2.0 signing-rejection evaluator for NFT-SEC-03 (AZ-438 / AC-NEW-11, D-C8-9).
For each of the three injection sub-cases — unsigned, signed-with-wrong-key,
replayed-from-tlog — AP MUST:
* emit a ``BAD_SIGNATURE`` STATUSTEXT within ≤``REJECTION_LATENCY_MS``
(500 ms) of the injected message;
* NOT update its ``GLOBAL_POSITION_INT`` from the injected message
(i.e. the GPS position remains anchored to whatever the last legitimate
emission established).
The "rejection STATUSTEXT" regex matches the canonical AP wording (e.g.
``MAVLink: BAD_SIGNATURE``) plus an "equivalent" wildcard that the
spec carves out for AP variants that emit a slightly different phrase
("Bad signature received", "signature rejected", etc.) so this evaluator
does not lock to one exact build.
Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
import re
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Sequence
REJECTION_LATENCY_MS = 500
POSITION_DRIFT_TOLERANCE_M = 1.0
class SubCase(str, Enum):
UNSIGNED = "unsigned"
WRONG_KEY = "wrong_key"
REPLAYED = "replayed"
# Canonical wording + variants observed across AP builds. Matched
# case-insensitively. Extending this set is a deliberate decision; the
# regression risk is that a future AP build emits a brand-new phrase
# and the runner silently treats injections as accepted — guard against
# that by surfacing the seen STATUSTEXTs in the CSV evidence row.
BAD_SIGNATURE_PATTERNS: tuple[str, ...] = (
r"\bBAD[_\s]?SIGNATURE\b",
r"\bsignature\s+rejected\b",
r"\bbad\s+signature\s+received\b",
)
_BAD_SIGNATURE_RE = re.compile("|".join(BAD_SIGNATURE_PATTERNS), re.IGNORECASE)
def is_bad_signature_statustext(text: str) -> bool:
"""True iff the STATUSTEXT line matches one of the documented rejections."""
return bool(_BAD_SIGNATURE_RE.search(text))
@dataclass(frozen=True)
class InjectionEvent:
"""One runner-issued injection in a sub-case."""
sub_case: SubCase
injected_at_ms: int
@dataclass(frozen=True)
class StatustextSample:
monotonic_ms: int
text: str
@dataclass(frozen=True)
class PositionSample:
"""AP ``GLOBAL_POSITION_INT`` sample, decoded to meters."""
monotonic_ms: int
lat_e7: int
lon_e7: int
def position_drift_m(samples: Sequence[PositionSample], around_ms: int) -> float:
"""Equirectangular drift (m) between the last sample before and first after ``around_ms``.
A tiny budget (1 m by default) tolerates the per-frame jitter the
autopilot's own EKF produces; the absolute test is that the drift
is NOT on the order of the injected message's lat/lon magnitude
(which would be several-meters to kilometers).
"""
before: PositionSample | None = None
after: PositionSample | None = None
for s in samples:
if s.monotonic_ms <= around_ms:
before = s if before is None or s.monotonic_ms > before.monotonic_ms else before
elif after is None:
after = s
break
if before is None or after is None:
return 0.0
dlat_m = (after.lat_e7 - before.lat_e7) * 1e-7 * 111_320.0
avg_lat_rad = ((after.lat_e7 + before.lat_e7) / 2.0) * 1e-7 * (3.14159265358979 / 180.0)
import math
dlon_m = (after.lon_e7 - before.lon_e7) * 1e-7 * 111_320.0 * math.cos(avg_lat_rad)
return math.hypot(dlat_m, dlon_m)
@dataclass(frozen=True)
class SubCaseRejectionReport:
"""One sub-case verdict (AC-2 / AC-3 / AC-4)."""
sub_case: SubCase
rejection_at_ms: int | None
rejection_text: str | None
rejection_latency_ms: int | None
position_drift_m: float
budget_ms: int = REJECTION_LATENCY_MS
@property
def passes_rejection(self) -> bool:
return (
self.rejection_at_ms is not None
and self.rejection_latency_ms is not None
and self.rejection_latency_ms <= self.budget_ms
)
@property
def passes_no_position_update(self) -> bool:
return self.position_drift_m <= POSITION_DRIFT_TOLERANCE_M
@property
def passes(self) -> bool:
return self.passes_rejection and self.passes_no_position_update
@dataclass(frozen=True)
class SigningRejectionReport:
"""Aggregate AC-2 + AC-3 + AC-4 verdict across all sub-cases."""
sub_cases: Sequence[SubCaseRejectionReport]
@property
def passes(self) -> bool:
return all(sc.passes for sc in self.sub_cases)
def evaluate_subcase(
injection: InjectionEvent,
statustexts: Sequence[StatustextSample],
positions: Sequence[PositionSample],
) -> SubCaseRejectionReport:
"""Compute verdict for one (injection, capture) pair."""
rejection_at: int | None = None
rejection_text: str | None = None
rejection_latency: int | None = None
for st in statustexts:
if st.monotonic_ms < injection.injected_at_ms:
continue
if is_bad_signature_statustext(st.text):
rejection_at = st.monotonic_ms
rejection_text = st.text
rejection_latency = st.monotonic_ms - injection.injected_at_ms
break
drift = position_drift_m(positions, injection.injected_at_ms)
return SubCaseRejectionReport(
sub_case=injection.sub_case,
rejection_at_ms=rejection_at,
rejection_text=rejection_text,
rejection_latency_ms=rejection_latency,
position_drift_m=drift,
)
def evaluate(
injections: Sequence[InjectionEvent],
*,
statustexts: Sequence[StatustextSample],
positions: Sequence[PositionSample],
) -> SigningRejectionReport:
sub_reports: list[SubCaseRejectionReport] = []
for inj in injections:
sub_reports.append(
evaluate_subcase(inj, statustexts=statustexts, positions=positions)
)
return SigningRejectionReport(sub_cases=tuple(sub_reports))
def write_csv_evidence(out_path: Path, report: SigningRejectionReport) -> Path:
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"sub_case",
"rejection_at_ms",
"rejection_latency_ms",
"rejection_text",
"position_drift_m",
"passes_rejection",
"passes_no_position_update",
"passes",
]
)
for sc in report.sub_cases:
writer.writerow(
[
sc.sub_case.value,
"" if sc.rejection_at_ms is None else sc.rejection_at_ms,
"" if sc.rejection_latency_ms is None else sc.rejection_latency_ms,
sc.rejection_text or "",
f"{sc.position_drift_m:.4f}",
"true" if sc.passes_rejection else "false",
"true" if sc.passes_no_position_update else "false",
"true" if sc.passes else "false",
]
)
return out_path