Files
gps-denied-onboard/e2e/runner/helpers/blackout_spoof_evaluator.py
T
Oleksandr Bezdieniezhnykh 2d6d44af5d [AZ-424] [AZ-425] [AZ-426] Implement negatives set (FT-N-01/03/04)
Adds three pure-logic evaluators + scenarios + unit tests covering the
project's failure-mode robustness ladder (AC-3.1, AC-3.4, AC-3.5,
AC-NEW-8):

* outlier_tolerance_evaluator (AZ-424 / FT-N-01): per-event 50 m drift
  bound + 3-frame covariance-monotonic window over the AZ-408 outlier
  injector's medium-density manifest.
* outage_request_evaluator (AZ-425 / FT-N-03): detects 3+ consecutive
  missing-frame windows; validates OPERATOR_RELOC_REQUEST STATUSTEXT
  arrives at 2 s ±500 ms, dead_reckoned label during outage, and no
  FC EKF divergence.
* blackout_spoof_evaluator (AZ-426 / FT-N-04): eight-AC ladder across
  the 5 s / 15 s / 35 s sub-windows — switch latency, spoof rejection,
  monotonic covariance, honest horiz_accuracy, STATUSTEXT 1-2 Hz,
  35 s escalation thresholds, and recovery gate.

Each scenario is skip-gated on the AZ-441 / AZ-407 / AZ-416 replay /
SITL / mavproxy helpers; unit tests (14 + 18 + 29 = 61) cover the
AC logic today. Full e2e unit-test suite: 527 passed (+67).

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-17 08:26:16 +03:00

558 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Blackout-spoof evaluation for FT-N-04 (AZ-426 / AC-3.5 + AC-NEW-8).
Three-window ladder (5 s / 15 s / 35 s) with the
``blackout_spoof.py`` injector + FC-inbound spoof proxy. The
evaluator validates per AZ-426:
* AC-1: switch latency — within ≤1 frame OR ≤``SWITCH_LATENCY_MS``
(whichever is shorter), the first outbound estimate after blackout
onset carries ``source_label = dead_reckoned``.
* AC-2: spoof rejection — at least one FDR ``spoof-rejected`` event
is observed during the blackout window AND zero spoofed GPS records
are consumed into the estimator (label never returns to
``satellite_anchored`` during the window).
* AC-3: monotonic covariance — ``cov_semi_major_m`` is non-decreasing
across consecutive emissions inside the window.
* AC-4: honest horiz_accuracy —
``horiz_accuracy ≥ HONEST_ACCURACY_RATIO × cov_semi_major_m``
for every emission.
* AC-5: STATUSTEXT 1-2 Hz —
``VISUAL_BLACKOUT_IMU_ONLY`` STATUSTEXT rate is in
``[STATUSTEXT_RATE_MIN_HZ, STATUSTEXT_RATE_MAX_HZ]`` throughout the
window.
* AC-6 (35 s only): when 95 % covariance crosses
``ESCALATION_COV_2D_M``, fix_type degrades to ≤``ESCALATION_FIX_TYPE_2D``.
* AC-7 (35 s only): when 95 % covariance crosses
``ESCALATION_COV_FAILSAFE_M`` OR window duration exceeds
``ESCALATION_DURATION_FAILSAFE_S``, ``horiz_accuracy ==
HORIZ_ACCURACY_FAILSAFE`` AND ``VISUAL_BLACKOUT_FAILSAFE``
STATUSTEXT is emitted within ≤``ESCALATION_LATENCY_MS`` of the
crossing.
* AC-8: recovery gate — after blackout end, label only returns to
``satellite_anchored`` once both (a) FC GPS-health is stable +
non-spoofed for ≥``RECOVERY_STABLE_S`` AND (b) a
visual/satellite consistency check succeeds.
Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, Sequence
# AC-1
SWITCH_LATENCY_MS = 400
# AC-2
DEAD_RECKONED_LABEL = "dead_reckoned"
SATELLITE_ANCHORED_LABEL = "satellite_anchored"
# AC-4
HONEST_ACCURACY_RATIO = 0.95
# AC-5
STATUSTEXT_IMU_ONLY = "VISUAL_BLACKOUT_IMU_ONLY"
STATUSTEXT_RATE_MIN_HZ = 1.0
STATUSTEXT_RATE_MAX_HZ = 2.0
# AC-6 / AC-7
STATUSTEXT_FAILSAFE = "VISUAL_BLACKOUT_FAILSAFE"
ESCALATION_COV_2D_M = 100.0
ESCALATION_COV_FAILSAFE_M = 500.0
ESCALATION_DURATION_FAILSAFE_S = 30.0
ESCALATION_FIX_TYPE_2D = 2 # MAVLink GPS_FIX_TYPE_2D
HORIZ_ACCURACY_FAILSAFE = 999.0
ESCALATION_LATENCY_MS = 500
# AC-8
RECOVERY_STABLE_S = 10.0
@dataclass(frozen=True)
class BlackoutWindow:
"""The injector-emitted window the evaluator is bound to."""
onset_monotonic_ms: int
end_monotonic_ms: int
@property
def duration_s(self) -> float:
return (self.end_monotonic_ms - self.onset_monotonic_ms) / 1000.0
@dataclass(frozen=True)
class OutboundEstimateSample:
"""One outbound estimate with fields used by FT-N-04 ACs."""
monotonic_ms: int
source_label: str
cov_semi_major_m: float
horiz_accuracy: float # AP GPS_INPUT.horiz_accuracy (m)
fix_type: int # MAVLink GPS fix type (0..6); -1 if unavailable
@dataclass(frozen=True)
class StatustextSample:
monotonic_ms: int
text: str
@dataclass(frozen=True)
class SpoofRejectedEvent:
"""One FDR `spoof-rejected` event."""
monotonic_ms: int
reason: str
@dataclass(frozen=True)
class GpsHealthSample:
"""FC-side GPS health sample (post-blackout, for recovery gate)."""
monotonic_ms: int
healthy: bool
spoofed: bool
@dataclass(frozen=True)
class ConsistencyCheckEvent:
"""Visual/satellite consistency check outcome (post-blackout)."""
monotonic_ms: int
passed: bool
@dataclass(frozen=True)
class SwitchLatencyReport:
"""AC-1 result."""
first_dead_reckoned_offset_ms: int | None # ms after window onset
frame_period_ms: int
passes: bool
@dataclass(frozen=True)
class SpoofRejectionReport:
"""AC-2 result."""
spoof_rejected_count: int
satellite_anchored_inside_window: int
passes: bool
@dataclass(frozen=True)
class CovarianceMonotonicReport:
"""AC-3 result."""
first_decreasing_at_ms: int | None
sample_count: int
passes: bool
@dataclass(frozen=True)
class HonestAccuracyReport:
"""AC-4 result."""
violation_count: int
sample_count: int
passes: bool
@dataclass(frozen=True)
class StatustextRateReport:
"""AC-5 result for VISUAL_BLACKOUT_IMU_ONLY."""
observed_hz: float | None
count: int
passes: bool
@dataclass(frozen=True)
class EscalationReport:
"""AC-6 + AC-7 result (35 s window only — other windows return passes=True)."""
cov2d_crossed: bool
cov2d_crossed_at_ms: int | None
fix_type_degraded: bool # AC-6 satisfied
cov500_or_30s_crossed: bool
cov500_or_30s_crossed_at_ms: int | None
horiz_accuracy_999: bool # AC-7 part 1
failsafe_statustext_offset_ms: int | None
failsafe_statustext_in_time: bool # AC-7 part 2
passes_ac6: bool
passes_ac7: bool
@property
def passes(self) -> bool:
return self.passes_ac6 and self.passes_ac7
@dataclass(frozen=True)
class RecoveryGateReport:
"""AC-8 result."""
recovery_at_ms: int | None
stable_period_s: float | None
consistency_check_passed: bool
passes: bool
@dataclass(frozen=True)
class BlackoutSpoofReport:
"""Aggregate FT-N-04 result for one window."""
window: BlackoutWindow
switch_latency: SwitchLatencyReport
spoof_rejection: SpoofRejectionReport
covariance_monotonic: CovarianceMonotonicReport
honest_accuracy: HonestAccuracyReport
statustext_rate: StatustextRateReport
escalation: EscalationReport
recovery_gate: RecoveryGateReport
@property
def passes(self) -> bool:
return all(
(
self.switch_latency.passes,
self.spoof_rejection.passes,
self.covariance_monotonic.passes,
self.honest_accuracy.passes,
self.statustext_rate.passes,
self.escalation.passes,
self.recovery_gate.passes,
)
)
def _inside_window(window: BlackoutWindow, t_ms: int) -> bool:
return window.onset_monotonic_ms <= t_ms <= window.end_monotonic_ms
def _samples_inside_window(
window: BlackoutWindow, samples: Iterable[OutboundEstimateSample]
) -> list[OutboundEstimateSample]:
return [s for s in samples if _inside_window(window, s.monotonic_ms)]
def evaluate_switch_latency(
window: BlackoutWindow,
estimates: Sequence[OutboundEstimateSample],
frame_period_ms: int,
) -> SwitchLatencyReport:
"""AC-1: dead_reckoned label within ≤1 frame OR ≤SWITCH_LATENCY_MS."""
budget_ms = min(SWITCH_LATENCY_MS, frame_period_ms)
offset: int | None = None
for s in estimates:
if s.monotonic_ms < window.onset_monotonic_ms:
continue
if s.source_label == DEAD_RECKONED_LABEL:
offset = s.monotonic_ms - window.onset_monotonic_ms
break
return SwitchLatencyReport(
first_dead_reckoned_offset_ms=offset,
frame_period_ms=frame_period_ms,
passes=offset is not None and offset <= budget_ms,
)
def evaluate_spoof_rejection(
window: BlackoutWindow,
estimates: Sequence[OutboundEstimateSample],
spoof_events: Sequence[SpoofRejectedEvent],
) -> SpoofRejectionReport:
"""AC-2: spoof-rejected events present AND no satellite_anchored re-entry."""
rejected = sum(
1 for ev in spoof_events if _inside_window(window, ev.monotonic_ms)
)
inside = _samples_inside_window(window, estimates)
re_anchored = sum(1 for s in inside if s.source_label == SATELLITE_ANCHORED_LABEL)
return SpoofRejectionReport(
spoof_rejected_count=rejected,
satellite_anchored_inside_window=re_anchored,
passes=rejected >= 1 and re_anchored == 0,
)
def evaluate_covariance_monotonic(
window: BlackoutWindow, estimates: Sequence[OutboundEstimateSample]
) -> CovarianceMonotonicReport:
"""AC-3: cov_semi_major_m non-decreasing across consecutive emissions."""
inside = _samples_inside_window(window, estimates)
first_dec: int | None = None
for i in range(1, len(inside)):
if inside[i].cov_semi_major_m < inside[i - 1].cov_semi_major_m:
first_dec = inside[i].monotonic_ms
break
return CovarianceMonotonicReport(
first_decreasing_at_ms=first_dec,
sample_count=len(inside),
passes=first_dec is None and len(inside) >= 1,
)
def evaluate_honest_accuracy(
window: BlackoutWindow, estimates: Sequence[OutboundEstimateSample]
) -> HonestAccuracyReport:
"""AC-4: horiz_accuracy ≥ HONEST_ACCURACY_RATIO × cov_semi_major_m."""
inside = _samples_inside_window(window, estimates)
violations = sum(
1
for s in inside
if s.horiz_accuracy < HONEST_ACCURACY_RATIO * s.cov_semi_major_m
)
return HonestAccuracyReport(
violation_count=violations,
sample_count=len(inside),
passes=violations == 0 and len(inside) >= 1,
)
def evaluate_statustext_rate(
window: BlackoutWindow, statustexts: Sequence[StatustextSample]
) -> StatustextRateReport:
"""AC-5: VISUAL_BLACKOUT_IMU_ONLY rate ∈ [1, 2] Hz."""
inside = [
st
for st in statustexts
if STATUSTEXT_IMU_ONLY in st.text and _inside_window(window, st.monotonic_ms)
]
duration_s = window.duration_s
if duration_s <= 0 or not inside:
return StatustextRateReport(observed_hz=None, count=len(inside), passes=False)
rate = len(inside) / duration_s
return StatustextRateReport(
observed_hz=rate,
count=len(inside),
passes=STATUSTEXT_RATE_MIN_HZ <= rate <= STATUSTEXT_RATE_MAX_HZ,
)
def _first_cov_crossing_ms(
window: BlackoutWindow,
estimates: Sequence[OutboundEstimateSample],
threshold_m: float,
) -> int | None:
for s in _samples_inside_window(window, estimates):
if s.cov_semi_major_m >= threshold_m:
return s.monotonic_ms
return None
def evaluate_escalation(
window: BlackoutWindow,
estimates: Sequence[OutboundEstimateSample],
statustexts: Sequence[StatustextSample],
*,
is_35s_window: bool,
) -> EscalationReport:
"""AC-6 + AC-7: applies only to the 35 s sub-case.
For non-35 s windows the report is vacuously passing — those windows
are not expected to cross either escalation threshold and any
incidental crossing is treated as informational only.
"""
cov2d_at = _first_cov_crossing_ms(window, estimates, ESCALATION_COV_2D_M)
cov500_at = _first_cov_crossing_ms(window, estimates, ESCALATION_COV_FAILSAFE_M)
duration_breach_at: int | None = None
if window.duration_s >= ESCALATION_DURATION_FAILSAFE_S:
duration_breach_at = (
window.onset_monotonic_ms
+ int(ESCALATION_DURATION_FAILSAFE_S * 1000)
)
failsafe_trigger_at: int | None = None
if cov500_at is not None and duration_breach_at is not None:
failsafe_trigger_at = min(cov500_at, duration_breach_at)
else:
failsafe_trigger_at = cov500_at if cov500_at is not None else duration_breach_at
if not is_35s_window:
return EscalationReport(
cov2d_crossed=cov2d_at is not None,
cov2d_crossed_at_ms=cov2d_at,
fix_type_degraded=True,
cov500_or_30s_crossed=failsafe_trigger_at is not None,
cov500_or_30s_crossed_at_ms=failsafe_trigger_at,
horiz_accuracy_999=True,
failsafe_statustext_offset_ms=None,
failsafe_statustext_in_time=True,
passes_ac6=True,
passes_ac7=True,
)
# AC-6: any sample at/after cov2d_at must have fix_type ≤ ESCALATION_FIX_TYPE_2D.
fix_degraded = True
if cov2d_at is not None:
post = [s for s in _samples_inside_window(window, estimates) if s.monotonic_ms >= cov2d_at]
if post and any(s.fix_type > ESCALATION_FIX_TYPE_2D for s in post):
fix_degraded = False
passes_ac6 = cov2d_at is None or fix_degraded
# AC-7: post-trigger samples must have horiz_accuracy == 999 AND
# VISUAL_BLACKOUT_FAILSAFE STATUSTEXT must arrive within ≤500 ms of trigger.
horiz_999 = True
failsafe_offset: int | None = None
failsafe_in_time = True
if failsafe_trigger_at is not None:
post = [s for s in _samples_inside_window(window, estimates) if s.monotonic_ms >= failsafe_trigger_at]
if post and any(s.horiz_accuracy != HORIZ_ACCURACY_FAILSAFE for s in post):
horiz_999 = False
for st in statustexts:
if STATUSTEXT_FAILSAFE not in st.text:
continue
if st.monotonic_ms < failsafe_trigger_at:
continue
offset = st.monotonic_ms - failsafe_trigger_at
if failsafe_offset is None or offset < failsafe_offset:
failsafe_offset = offset
failsafe_in_time = (
failsafe_offset is not None and failsafe_offset <= ESCALATION_LATENCY_MS
)
passes_ac7 = failsafe_trigger_at is None or (horiz_999 and failsafe_in_time)
return EscalationReport(
cov2d_crossed=cov2d_at is not None,
cov2d_crossed_at_ms=cov2d_at,
fix_type_degraded=fix_degraded,
cov500_or_30s_crossed=failsafe_trigger_at is not None,
cov500_or_30s_crossed_at_ms=failsafe_trigger_at,
horiz_accuracy_999=horiz_999,
failsafe_statustext_offset_ms=failsafe_offset,
failsafe_statustext_in_time=failsafe_in_time,
passes_ac6=passes_ac6,
passes_ac7=passes_ac7,
)
def evaluate_recovery_gate(
window: BlackoutWindow,
estimates: Sequence[OutboundEstimateSample],
gps_health: Sequence[GpsHealthSample],
consistency_checks: Sequence[ConsistencyCheckEvent],
) -> RecoveryGateReport:
"""AC-8: recovery only after ≥10 s healthy/non-spoofed FC GPS AND a consistency check pass."""
# First post-window satellite_anchored sample marks the (claimed) recovery moment.
recovery_at: int | None = None
for s in estimates:
if (
s.monotonic_ms > window.end_monotonic_ms
and s.source_label == SATELLITE_ANCHORED_LABEL
):
recovery_at = s.monotonic_ms
break
if recovery_at is None:
# No recovery attempted — vacuously passing for this gate; the
# caller can still flag it via window-level coverage.
return RecoveryGateReport(
recovery_at_ms=None,
stable_period_s=None,
consistency_check_passed=False,
passes=True,
)
# (a) Continuous healthy/non-spoofed FC GPS for ≥RECOVERY_STABLE_S BEFORE recovery_at.
cutoff_ms = recovery_at - int(RECOVERY_STABLE_S * 1000)
relevant = [
h for h in gps_health
if window.end_monotonic_ms <= h.monotonic_ms <= recovery_at
]
stable = all(h.healthy and not h.spoofed for h in relevant) and len(relevant) >= 1
earliest_relevant = relevant[0].monotonic_ms if relevant else recovery_at
stable_period_s = (recovery_at - earliest_relevant) / 1000.0
has_enough_window = earliest_relevant <= cutoff_ms
# (b) Consistency check pass occurred between window-end and recovery_at.
consistency_passed = any(
c.passed and window.end_monotonic_ms <= c.monotonic_ms <= recovery_at
for c in consistency_checks
)
return RecoveryGateReport(
recovery_at_ms=recovery_at,
stable_period_s=stable_period_s,
consistency_check_passed=consistency_passed,
passes=stable and has_enough_window and consistency_passed,
)
def evaluate(
window: BlackoutWindow,
*,
estimates: Sequence[OutboundEstimateSample],
statustexts: Sequence[StatustextSample],
spoof_events: Sequence[SpoofRejectedEvent],
gps_health: Sequence[GpsHealthSample],
consistency_checks: Sequence[ConsistencyCheckEvent],
frame_period_ms: int,
is_35s_window: bool,
) -> BlackoutSpoofReport:
"""Run every AC-1..AC-8 check for a single window."""
return BlackoutSpoofReport(
window=window,
switch_latency=evaluate_switch_latency(window, estimates, frame_period_ms),
spoof_rejection=evaluate_spoof_rejection(window, estimates, spoof_events),
covariance_monotonic=evaluate_covariance_monotonic(window, estimates),
honest_accuracy=evaluate_honest_accuracy(window, estimates),
statustext_rate=evaluate_statustext_rate(window, statustexts),
escalation=evaluate_escalation(
window, estimates, statustexts, is_35s_window=is_35s_window
),
recovery_gate=evaluate_recovery_gate(
window, estimates, gps_health, consistency_checks
),
)
def write_csv_evidence(out_path: Path, report: BlackoutSpoofReport) -> Path:
"""Write FT-N-04 aggregate evidence — one row of per-AC summary."""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"window_duration_s",
"ac1_switch_latency_ms",
"ac1_passes",
"ac2_spoof_rejected_count",
"ac2_re_anchored_count",
"ac2_passes",
"ac3_first_decreasing_at_ms",
"ac3_passes",
"ac4_violation_count",
"ac4_passes",
"ac5_observed_hz",
"ac5_passes",
"ac6_cov2d_at_ms",
"ac6_passes",
"ac7_failsafe_trigger_at_ms",
"ac7_passes",
"ac8_recovery_at_ms",
"ac8_passes",
"passes",
]
)
r = report
writer.writerow(
[
f"{r.window.duration_s:.3f}",
"" if r.switch_latency.first_dead_reckoned_offset_ms is None else r.switch_latency.first_dead_reckoned_offset_ms,
"true" if r.switch_latency.passes else "false",
r.spoof_rejection.spoof_rejected_count,
r.spoof_rejection.satellite_anchored_inside_window,
"true" if r.spoof_rejection.passes else "false",
"" if r.covariance_monotonic.first_decreasing_at_ms is None else r.covariance_monotonic.first_decreasing_at_ms,
"true" if r.covariance_monotonic.passes else "false",
r.honest_accuracy.violation_count,
"true" if r.honest_accuracy.passes else "false",
"" if r.statustext_rate.observed_hz is None else f"{r.statustext_rate.observed_hz:.3f}",
"true" if r.statustext_rate.passes else "false",
"" if r.escalation.cov2d_crossed_at_ms is None else r.escalation.cov2d_crossed_at_ms,
"true" if r.escalation.passes_ac6 else "false",
"" if r.escalation.cov500_or_30s_crossed_at_ms is None else r.escalation.cov500_or_30s_crossed_at_ms,
"true" if r.escalation.passes_ac7 else "false",
"" if r.recovery_gate.recovery_at_ms is None else r.recovery_gate.recovery_at_ms,
"true" if r.recovery_gate.passes else "false",
"true" if r.passes else "false",
]
)
return out_path