mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 14:41:15 +00:00
[AZ-424] [AZ-425] [AZ-426] Implement negatives set (FT-N-01/03/04)
Adds three pure-logic evaluators + scenarios + unit tests covering the project's failure-mode robustness ladder (AC-3.1, AC-3.4, AC-3.5, AC-NEW-8): * outlier_tolerance_evaluator (AZ-424 / FT-N-01): per-event 50 m drift bound + 3-frame covariance-monotonic window over the AZ-408 outlier injector's medium-density manifest. * outage_request_evaluator (AZ-425 / FT-N-03): detects 3+ consecutive missing-frame windows; validates OPERATOR_RELOC_REQUEST STATUSTEXT arrives at 2 s ±500 ms, dead_reckoned label during outage, and no FC EKF divergence. * blackout_spoof_evaluator (AZ-426 / FT-N-04): eight-AC ladder across the 5 s / 15 s / 35 s sub-windows — switch latency, spoof rejection, monotonic covariance, honest horiz_accuracy, STATUSTEXT 1-2 Hz, 35 s escalation thresholds, and recovery gate. Each scenario is skip-gated on the AZ-441 / AZ-407 / AZ-416 replay / SITL / mavproxy helpers; unit tests (14 + 18 + 29 = 61) cover the AC logic today. Full e2e unit-test suite: 527 passed (+67). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,588 @@
|
||||
"""Unit tests for `e2e/runner/helpers/blackout_spoof_evaluator.py` (AZ-426)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
from e2e.runner.helpers.blackout_spoof_evaluator import (
|
||||
DEAD_RECKONED_LABEL,
|
||||
ESCALATION_COV_2D_M,
|
||||
ESCALATION_COV_FAILSAFE_M,
|
||||
ESCALATION_DURATION_FAILSAFE_S,
|
||||
ESCALATION_FIX_TYPE_2D,
|
||||
ESCALATION_LATENCY_MS,
|
||||
HONEST_ACCURACY_RATIO,
|
||||
HORIZ_ACCURACY_FAILSAFE,
|
||||
RECOVERY_STABLE_S,
|
||||
SATELLITE_ANCHORED_LABEL,
|
||||
STATUSTEXT_FAILSAFE,
|
||||
STATUSTEXT_IMU_ONLY,
|
||||
STATUSTEXT_RATE_MAX_HZ,
|
||||
STATUSTEXT_RATE_MIN_HZ,
|
||||
SWITCH_LATENCY_MS,
|
||||
BlackoutWindow,
|
||||
ConsistencyCheckEvent,
|
||||
GpsHealthSample,
|
||||
OutboundEstimateSample,
|
||||
SpoofRejectedEvent,
|
||||
StatustextSample,
|
||||
evaluate,
|
||||
evaluate_covariance_monotonic,
|
||||
evaluate_escalation,
|
||||
evaluate_honest_accuracy,
|
||||
evaluate_recovery_gate,
|
||||
evaluate_spoof_rejection,
|
||||
evaluate_statustext_rate,
|
||||
evaluate_switch_latency,
|
||||
write_csv_evidence,
|
||||
)
|
||||
|
||||
|
||||
# Constants
|
||||
|
||||
|
||||
def test_constants_match_spec():
|
||||
# AZ-426: AC-1 ≤400 ms, AC-4 ≥0.95×cov, AC-5 1-2 Hz, AC-6/7/8 thresholds.
|
||||
assert SWITCH_LATENCY_MS == 400
|
||||
assert HONEST_ACCURACY_RATIO == 0.95
|
||||
assert STATUSTEXT_RATE_MIN_HZ == 1.0 and STATUSTEXT_RATE_MAX_HZ == 2.0
|
||||
assert ESCALATION_COV_2D_M == 100.0
|
||||
assert ESCALATION_COV_FAILSAFE_M == 500.0
|
||||
assert ESCALATION_DURATION_FAILSAFE_S == 30.0
|
||||
assert ESCALATION_FIX_TYPE_2D == 2
|
||||
assert HORIZ_ACCURACY_FAILSAFE == 999.0
|
||||
assert ESCALATION_LATENCY_MS == 500
|
||||
assert RECOVERY_STABLE_S == 10.0
|
||||
assert STATUSTEXT_IMU_ONLY == "VISUAL_BLACKOUT_IMU_ONLY"
|
||||
assert STATUSTEXT_FAILSAFE == "VISUAL_BLACKOUT_FAILSAFE"
|
||||
assert DEAD_RECKONED_LABEL == "dead_reckoned"
|
||||
assert SATELLITE_ANCHORED_LABEL == "satellite_anchored"
|
||||
|
||||
|
||||
def _window(onset_ms: int = 10_000, duration_s: float = 5.0) -> BlackoutWindow:
|
||||
return BlackoutWindow(
|
||||
onset_monotonic_ms=onset_ms,
|
||||
end_monotonic_ms=onset_ms + int(duration_s * 1000),
|
||||
)
|
||||
|
||||
|
||||
def _est(
|
||||
ms: int,
|
||||
*,
|
||||
label: str = DEAD_RECKONED_LABEL,
|
||||
cov: float = 5.0,
|
||||
horiz: float | None = None,
|
||||
fix_type: int = 3,
|
||||
) -> OutboundEstimateSample:
|
||||
return OutboundEstimateSample(
|
||||
monotonic_ms=ms,
|
||||
source_label=label,
|
||||
cov_semi_major_m=cov,
|
||||
horiz_accuracy=cov if horiz is None else horiz,
|
||||
fix_type=fix_type,
|
||||
)
|
||||
|
||||
|
||||
# AC-1 switch latency
|
||||
|
||||
|
||||
def test_switch_latency_within_400_ms_passes():
|
||||
# Arrange
|
||||
w = _window()
|
||||
estimates = [
|
||||
_est(w.onset_monotonic_ms - 100, label=SATELLITE_ANCHORED_LABEL),
|
||||
_est(w.onset_monotonic_ms + 350),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_switch_latency(w, estimates, frame_period_ms=33)
|
||||
|
||||
# Assert — budget is min(400, 33) = 33 ms; 350 > 33 → fails.
|
||||
assert report.first_dead_reckoned_offset_ms == 350
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_switch_latency_within_one_frame_passes():
|
||||
# Arrange — frame period 100 ms, dead_reckoned at +50 ms → within both bounds.
|
||||
w = _window()
|
||||
estimates = [_est(w.onset_monotonic_ms + 50)]
|
||||
|
||||
# Act
|
||||
report = evaluate_switch_latency(w, estimates, frame_period_ms=100)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_switch_latency_at_one_frame_boundary_passes():
|
||||
# Arrange — exact frame-period boundary.
|
||||
w = _window()
|
||||
estimates = [_est(w.onset_monotonic_ms + 100)]
|
||||
|
||||
# Act
|
||||
report = evaluate_switch_latency(w, estimates, frame_period_ms=100)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_switch_latency_missing_dead_reckoned_fails():
|
||||
# Arrange — no dead_reckoned emission.
|
||||
w = _window()
|
||||
estimates = [_est(w.onset_monotonic_ms + 50, label=SATELLITE_ANCHORED_LABEL)]
|
||||
|
||||
# Act
|
||||
report = evaluate_switch_latency(w, estimates, frame_period_ms=100)
|
||||
|
||||
# Assert
|
||||
assert report.first_dead_reckoned_offset_ms is None
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
# AC-2 spoof rejection
|
||||
|
||||
|
||||
def test_spoof_rejection_pass():
|
||||
# Arrange — spoof events present, no satellite_anchored inside window.
|
||||
w = _window()
|
||||
estimates = [_est(w.onset_monotonic_ms + 500)]
|
||||
spoof_events = [SpoofRejectedEvent(monotonic_ms=w.onset_monotonic_ms + 200, reason="delta>500m")]
|
||||
|
||||
# Act
|
||||
report = evaluate_spoof_rejection(w, estimates, spoof_events)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_spoof_rejection_no_events_fails():
|
||||
# Arrange
|
||||
w = _window()
|
||||
estimates = [_est(w.onset_monotonic_ms + 500)]
|
||||
|
||||
# Act
|
||||
report = evaluate_spoof_rejection(w, estimates, spoof_events=[])
|
||||
|
||||
# Assert
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_spoof_rejection_label_returns_to_satellite_fails():
|
||||
# Arrange — spoof event present BUT label returns to satellite_anchored inside window.
|
||||
w = _window()
|
||||
estimates = [
|
||||
_est(w.onset_monotonic_ms + 100),
|
||||
_est(w.onset_monotonic_ms + 1000, label=SATELLITE_ANCHORED_LABEL),
|
||||
]
|
||||
spoof_events = [SpoofRejectedEvent(monotonic_ms=w.onset_monotonic_ms + 50, reason="x")]
|
||||
|
||||
# Act
|
||||
report = evaluate_spoof_rejection(w, estimates, spoof_events)
|
||||
|
||||
# Assert
|
||||
assert report.satellite_anchored_inside_window == 1
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
# AC-3 covariance monotonic
|
||||
|
||||
|
||||
def test_covariance_monotonic_pass():
|
||||
# Arrange
|
||||
w = _window()
|
||||
estimates = [
|
||||
_est(w.onset_monotonic_ms + 100, cov=5.0),
|
||||
_est(w.onset_monotonic_ms + 200, cov=5.5),
|
||||
_est(w.onset_monotonic_ms + 300, cov=6.0),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_covariance_monotonic(w, estimates)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
assert report.first_decreasing_at_ms is None
|
||||
|
||||
|
||||
def test_covariance_monotonic_decreasing_fails():
|
||||
# Arrange
|
||||
w = _window()
|
||||
estimates = [
|
||||
_est(w.onset_monotonic_ms + 100, cov=5.0),
|
||||
_est(w.onset_monotonic_ms + 200, cov=4.0),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_covariance_monotonic(w, estimates)
|
||||
|
||||
# Assert
|
||||
assert report.first_decreasing_at_ms == w.onset_monotonic_ms + 200
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
# AC-4 honest accuracy
|
||||
|
||||
|
||||
def test_honest_accuracy_pass():
|
||||
# Arrange — horiz_accuracy = cov ≥ 0.95 × cov.
|
||||
w = _window()
|
||||
estimates = [_est(w.onset_monotonic_ms + 100, cov=10.0, horiz=10.0)]
|
||||
|
||||
# Act
|
||||
report = evaluate_honest_accuracy(w, estimates)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_honest_accuracy_boundary_pass():
|
||||
# Arrange — horiz_accuracy = 0.95 × cov exactly.
|
||||
w = _window()
|
||||
estimates = [_est(w.onset_monotonic_ms + 100, cov=10.0, horiz=9.5)]
|
||||
|
||||
# Act
|
||||
report = evaluate_honest_accuracy(w, estimates)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_honest_accuracy_violation_fails():
|
||||
# Arrange — horiz_accuracy = 0.90 × cov.
|
||||
w = _window()
|
||||
estimates = [_est(w.onset_monotonic_ms + 100, cov=10.0, horiz=9.0)]
|
||||
|
||||
# Act
|
||||
report = evaluate_honest_accuracy(w, estimates)
|
||||
|
||||
# Assert
|
||||
assert report.violation_count == 1
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
# AC-5 STATUSTEXT rate
|
||||
|
||||
|
||||
def test_statustext_rate_pass_at_1hz():
|
||||
# Arrange — 5 STATUSTEXTs over 5 s = 1 Hz.
|
||||
w = _window(duration_s=5.0)
|
||||
sts = [
|
||||
StatustextSample(monotonic_ms=w.onset_monotonic_ms + i * 1000, text=STATUSTEXT_IMU_ONLY)
|
||||
for i in range(5)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_statustext_rate(w, sts)
|
||||
|
||||
# Assert
|
||||
assert report.observed_hz is not None and abs(report.observed_hz - 1.0) < 1e-6
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_statustext_rate_pass_at_2hz():
|
||||
# Arrange — 10 STATUSTEXTs over 5 s = 2 Hz.
|
||||
w = _window(duration_s=5.0)
|
||||
sts = [
|
||||
StatustextSample(monotonic_ms=w.onset_monotonic_ms + i * 500, text=STATUSTEXT_IMU_ONLY)
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_statustext_rate(w, sts)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_statustext_rate_too_slow_fails():
|
||||
# Arrange — 2 STATUSTEXTs over 5 s = 0.4 Hz.
|
||||
w = _window(duration_s=5.0)
|
||||
sts = [
|
||||
StatustextSample(monotonic_ms=w.onset_monotonic_ms + i * 2000, text=STATUSTEXT_IMU_ONLY)
|
||||
for i in range(2)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_statustext_rate(w, sts)
|
||||
|
||||
# Assert
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_statustext_rate_too_fast_fails():
|
||||
# Arrange — 30 STATUSTEXTs over 5 s = 6 Hz.
|
||||
w = _window(duration_s=5.0)
|
||||
sts = [
|
||||
StatustextSample(monotonic_ms=w.onset_monotonic_ms + int(i * 5000 / 30), text=STATUSTEXT_IMU_ONLY)
|
||||
for i in range(30)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_statustext_rate(w, sts)
|
||||
|
||||
# Assert
|
||||
assert report.observed_hz is not None and report.observed_hz > STATUSTEXT_RATE_MAX_HZ
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
# AC-6 / AC-7 escalation (35 s window)
|
||||
|
||||
|
||||
def _make_35s_window(onset_ms: int = 10_000) -> BlackoutWindow:
|
||||
return _window(onset_ms=onset_ms, duration_s=35.0)
|
||||
|
||||
|
||||
def test_escalation_non_35s_window_passes_vacuously():
|
||||
# Arrange — 5 s window with no escalation expected.
|
||||
w = _window(duration_s=5.0)
|
||||
estimates = [_est(w.onset_monotonic_ms + 100, cov=10.0)]
|
||||
|
||||
# Act
|
||||
report = evaluate_escalation(w, estimates, statustexts=[], is_35s_window=False)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_escalation_35s_no_crossings_passes():
|
||||
# Arrange — covariance stays below ESCALATION_COV_2D_M for whole window.
|
||||
w = _make_35s_window()
|
||||
estimates = [
|
||||
_est(w.onset_monotonic_ms + i * 1000, cov=10.0 + i * 0.1)
|
||||
for i in range(30)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_escalation(w, estimates, statustexts=[], is_35s_window=True)
|
||||
|
||||
# Assert — duration crossing at 30 s alone still triggers AC-7 path; no
|
||||
# failsafe STATUSTEXT → fails AC-7.
|
||||
assert report.cov500_or_30s_crossed is True
|
||||
assert report.passes_ac7 is False
|
||||
|
||||
|
||||
def test_escalation_35s_ac6_fix_type_degraded_passes():
|
||||
# Arrange — cov crosses 100 m at 5 s; fix_type drops to 2 from then on.
|
||||
w = _make_35s_window()
|
||||
estimates = []
|
||||
for i in range(30):
|
||||
t = w.onset_monotonic_ms + i * 1000
|
||||
cov = 50.0 if i < 5 else 150.0
|
||||
fix = 3 if i < 5 else 2
|
||||
estimates.append(_est(t, cov=cov, horiz=cov, fix_type=fix))
|
||||
# Provide failsafe STATUSTEXT at +30 s (within ESCALATION_LATENCY_MS of duration breach).
|
||||
failsafe_at = w.onset_monotonic_ms + int(ESCALATION_DURATION_FAILSAFE_S * 1000)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=failsafe_at + 100, text=STATUSTEXT_FAILSAFE)
|
||||
]
|
||||
# All post-failsafe-trigger samples need horiz_accuracy == 999.
|
||||
for i in range(30):
|
||||
if estimates[i].monotonic_ms >= failsafe_at:
|
||||
estimates[i] = OutboundEstimateSample(
|
||||
monotonic_ms=estimates[i].monotonic_ms,
|
||||
source_label=DEAD_RECKONED_LABEL,
|
||||
cov_semi_major_m=estimates[i].cov_semi_major_m,
|
||||
horiz_accuracy=HORIZ_ACCURACY_FAILSAFE,
|
||||
fix_type=2,
|
||||
)
|
||||
|
||||
# Act
|
||||
report = evaluate_escalation(w, estimates, statustexts, is_35s_window=True)
|
||||
|
||||
# Assert
|
||||
assert report.passes_ac6 is True
|
||||
assert report.passes_ac7 is True
|
||||
|
||||
|
||||
def test_escalation_35s_ac6_fix_type_not_degraded_fails():
|
||||
# Arrange — cov crosses 100 m but fix_type stays 3.
|
||||
w = _make_35s_window()
|
||||
estimates = []
|
||||
for i in range(30):
|
||||
cov = 50.0 if i < 5 else 150.0
|
||||
estimates.append(_est(w.onset_monotonic_ms + i * 1000, cov=cov, fix_type=3))
|
||||
|
||||
# Act
|
||||
report = evaluate_escalation(w, estimates, statustexts=[], is_35s_window=True)
|
||||
|
||||
# Assert
|
||||
assert report.passes_ac6 is False
|
||||
|
||||
|
||||
def test_escalation_35s_ac7_horiz_not_999_fails():
|
||||
# Arrange — failsafe trigger reached but horiz_accuracy != 999.
|
||||
w = _make_35s_window()
|
||||
estimates = []
|
||||
for i in range(35):
|
||||
cov = 50.0 + i * 20.0 # crosses 100 then 500.
|
||||
estimates.append(_est(w.onset_monotonic_ms + i * 1000, cov=cov, horiz=cov, fix_type=2))
|
||||
failsafe_at = w.onset_monotonic_ms + int(ESCALATION_DURATION_FAILSAFE_S * 1000)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=failsafe_at + 100, text=STATUSTEXT_FAILSAFE)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_escalation(w, estimates, statustexts, is_35s_window=True)
|
||||
|
||||
# Assert
|
||||
assert report.horiz_accuracy_999 is False
|
||||
assert report.passes_ac7 is False
|
||||
|
||||
|
||||
# AC-8 recovery gate
|
||||
|
||||
|
||||
def _post_window(w: BlackoutWindow) -> tuple[int, int]:
|
||||
return w.end_monotonic_ms, w.end_monotonic_ms + int(RECOVERY_STABLE_S * 1000) + 500
|
||||
|
||||
|
||||
def test_recovery_gate_pass():
|
||||
# Arrange — 12 s of healthy GPS + consistency pass + then satellite_anchored emission.
|
||||
w = _window()
|
||||
end, recovery = _post_window(w)
|
||||
estimates = [
|
||||
_est(end + 500),
|
||||
_est(recovery + 100, label=SATELLITE_ANCHORED_LABEL),
|
||||
]
|
||||
gps_health = [
|
||||
GpsHealthSample(monotonic_ms=end + i * 1000, healthy=True, spoofed=False)
|
||||
for i in range(12)
|
||||
]
|
||||
consistency = [ConsistencyCheckEvent(monotonic_ms=end + 5000, passed=True)]
|
||||
|
||||
# Act
|
||||
report = evaluate_recovery_gate(w, estimates, gps_health, consistency)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
assert report.recovery_at_ms == recovery + 100
|
||||
|
||||
|
||||
def test_recovery_gate_unstable_fails():
|
||||
# Arrange — GPS goes unhealthy mid-stability window.
|
||||
w = _window()
|
||||
end, recovery = _post_window(w)
|
||||
estimates = [_est(recovery + 100, label=SATELLITE_ANCHORED_LABEL)]
|
||||
gps_health = [
|
||||
GpsHealthSample(monotonic_ms=end + i * 1000, healthy=(i != 5), spoofed=False)
|
||||
for i in range(12)
|
||||
]
|
||||
consistency = [ConsistencyCheckEvent(monotonic_ms=end + 5000, passed=True)]
|
||||
|
||||
# Act
|
||||
report = evaluate_recovery_gate(w, estimates, gps_health, consistency)
|
||||
|
||||
# Assert
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_recovery_gate_spoofed_fails():
|
||||
# Arrange — GPS healthy but spoofed=True for one sample.
|
||||
w = _window()
|
||||
end, recovery = _post_window(w)
|
||||
estimates = [_est(recovery + 100, label=SATELLITE_ANCHORED_LABEL)]
|
||||
gps_health = [
|
||||
GpsHealthSample(monotonic_ms=end + i * 1000, healthy=True, spoofed=(i == 3))
|
||||
for i in range(12)
|
||||
]
|
||||
consistency = [ConsistencyCheckEvent(monotonic_ms=end + 5000, passed=True)]
|
||||
|
||||
# Act
|
||||
report = evaluate_recovery_gate(w, estimates, gps_health, consistency)
|
||||
|
||||
# Assert
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_recovery_gate_no_consistency_check_fails():
|
||||
# Arrange
|
||||
w = _window()
|
||||
end, recovery = _post_window(w)
|
||||
estimates = [_est(recovery + 100, label=SATELLITE_ANCHORED_LABEL)]
|
||||
gps_health = [
|
||||
GpsHealthSample(monotonic_ms=end + i * 1000, healthy=True, spoofed=False)
|
||||
for i in range(12)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_recovery_gate(w, estimates, gps_health, consistency_checks=[])
|
||||
|
||||
# Assert
|
||||
assert report.consistency_check_passed is False
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_recovery_gate_no_recovery_attempt_vacuous_pass():
|
||||
# Arrange — no satellite_anchored post-window.
|
||||
w = _window()
|
||||
estimates = [_est(w.end_monotonic_ms + 500)]
|
||||
|
||||
# Act
|
||||
report = evaluate_recovery_gate(w, estimates, gps_health=[], consistency_checks=[])
|
||||
|
||||
# Assert
|
||||
assert report.recovery_at_ms is None
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
# Aggregate evaluate + CSV evidence
|
||||
|
||||
|
||||
def _make_passing_5s_inputs() -> dict:
|
||||
w = _window(duration_s=5.0)
|
||||
end, recovery = _post_window(w)
|
||||
estimates = [_est(w.onset_monotonic_ms + 50)]
|
||||
estimates.extend(
|
||||
_est(w.onset_monotonic_ms + 100 + i * 100, cov=10.0 + i * 0.1)
|
||||
for i in range(20)
|
||||
)
|
||||
estimates.append(_est(recovery + 100, label=SATELLITE_ANCHORED_LABEL))
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=w.onset_monotonic_ms + i * 1000, text=STATUSTEXT_IMU_ONLY)
|
||||
for i in range(5)
|
||||
]
|
||||
spoof_events = [SpoofRejectedEvent(monotonic_ms=w.onset_monotonic_ms + 50, reason="x")]
|
||||
gps_health = [
|
||||
GpsHealthSample(monotonic_ms=end + i * 1000, healthy=True, spoofed=False)
|
||||
for i in range(12)
|
||||
]
|
||||
consistency = [ConsistencyCheckEvent(monotonic_ms=end + 5000, passed=True)]
|
||||
return dict(
|
||||
window=w,
|
||||
estimates=estimates,
|
||||
statustexts=statustexts,
|
||||
spoof_events=spoof_events,
|
||||
gps_health=gps_health,
|
||||
consistency_checks=consistency,
|
||||
frame_period_ms=100,
|
||||
is_35s_window=False,
|
||||
)
|
||||
|
||||
|
||||
def test_evaluate_5s_all_pass():
|
||||
# Arrange
|
||||
inputs = _make_passing_5s_inputs()
|
||||
|
||||
# Act
|
||||
report = evaluate(**inputs)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_write_csv_evidence_round_trips(tmp_path: Path):
|
||||
# Arrange
|
||||
inputs = _make_passing_5s_inputs()
|
||||
report = evaluate(**inputs)
|
||||
|
||||
# Act
|
||||
out = write_csv_evidence(tmp_path / "ft-n-04.csv", report)
|
||||
|
||||
# Assert
|
||||
with out.open() as fh:
|
||||
rows = list(csv.DictReader(fh))
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["passes"] == "true"
|
||||
assert rows[0]["ac1_passes"] == "true"
|
||||
assert rows[0]["ac2_passes"] == "true"
|
||||
@@ -0,0 +1,353 @@
|
||||
"""Unit tests for `e2e/runner/helpers/outage_request_evaluator.py` (AZ-425)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
from e2e.runner.helpers.outage_request_evaluator import (
|
||||
DEAD_RECKONED_LABEL,
|
||||
MIN_OUTAGE_FRAMES,
|
||||
OUTAGE_THRESHOLD_S,
|
||||
STATUSTEXT_REGEX,
|
||||
TOLERANCE_S,
|
||||
EkfDivergenceEvent,
|
||||
OutboundEstimateSample,
|
||||
StatustextSample,
|
||||
detect_outage_windows,
|
||||
evaluate,
|
||||
evaluate_window,
|
||||
write_csv_evidence,
|
||||
)
|
||||
|
||||
|
||||
# Constants
|
||||
|
||||
|
||||
def test_constants_match_spec():
|
||||
# AZ-425: AC-1 ≥3 frames; AC-2 2 s ±500 ms; AC-3 dead_reckoned label.
|
||||
assert MIN_OUTAGE_FRAMES == 3
|
||||
assert OUTAGE_THRESHOLD_S == 2.0
|
||||
assert TOLERANCE_S == 0.5
|
||||
assert STATUSTEXT_REGEX == "OPERATOR_RELOC_REQUEST"
|
||||
assert DEAD_RECKONED_LABEL == "dead_reckoned"
|
||||
|
||||
|
||||
# detect_outage_windows
|
||||
|
||||
|
||||
def _est(frame: int, label: str = "satellite_anchored", ms: int = 0) -> OutboundEstimateSample:
|
||||
return OutboundEstimateSample(
|
||||
frame_idx=frame,
|
||||
monotonic_ms=ms if ms else frame * 100,
|
||||
source_label=label,
|
||||
)
|
||||
|
||||
|
||||
def test_detect_no_outage_returns_empty():
|
||||
# Arrange — full frame sequence with all estimates.
|
||||
expected = list(range(10))
|
||||
estimates = [_est(i) for i in expected]
|
||||
|
||||
# Act
|
||||
windows = detect_outage_windows(expected, estimates, frame_period_ms=100)
|
||||
|
||||
# Assert
|
||||
assert windows == []
|
||||
|
||||
|
||||
def test_detect_run_below_min_length_ignored():
|
||||
# Arrange — 2-frame gap is below MIN_OUTAGE_FRAMES=3.
|
||||
expected = list(range(10))
|
||||
estimates = [_est(i) for i in expected if i not in (4, 5)]
|
||||
|
||||
# Act
|
||||
windows = detect_outage_windows(expected, estimates, frame_period_ms=100)
|
||||
|
||||
# Assert
|
||||
assert windows == []
|
||||
|
||||
|
||||
def test_detect_single_outage_window():
|
||||
# Arrange — 3-frame gap at indices 4,5,6.
|
||||
expected = list(range(10))
|
||||
estimates = [_est(i) for i in expected if i not in (4, 5, 6)]
|
||||
|
||||
# Act
|
||||
windows = detect_outage_windows(
|
||||
expected, estimates, frame_period_ms=100, replay_start_monotonic_ms=1000
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert len(windows) == 1
|
||||
w = windows[0]
|
||||
assert w.first_missing_frame_idx == 4
|
||||
assert w.last_missing_frame_idx == 6
|
||||
assert w.length_frames == 3
|
||||
assert w.onset_monotonic_ms == 1000 + 4 * 100 # 1400
|
||||
assert w.end_monotonic_ms == 1000 + 7 * 100 # 1700
|
||||
assert w.duration_ms == 300
|
||||
|
||||
|
||||
def test_detect_multiple_windows():
|
||||
# Arrange — two gaps: 4-6 and 12-15.
|
||||
expected = list(range(20))
|
||||
skip = {4, 5, 6, 12, 13, 14, 15}
|
||||
estimates = [_est(i) for i in expected if i not in skip]
|
||||
|
||||
# Act
|
||||
windows = detect_outage_windows(expected, estimates, frame_period_ms=100)
|
||||
|
||||
# Assert
|
||||
assert len(windows) == 2
|
||||
assert windows[0].first_missing_frame_idx == 4 and windows[0].length_frames == 3
|
||||
assert windows[1].first_missing_frame_idx == 12 and windows[1].length_frames == 4
|
||||
|
||||
|
||||
def test_detect_trailing_outage_window():
|
||||
# Arrange — gap at the end of the sequence.
|
||||
expected = list(range(10))
|
||||
estimates = [_est(i) for i in expected if i < 7]
|
||||
|
||||
# Act
|
||||
windows = detect_outage_windows(expected, estimates, frame_period_ms=100)
|
||||
|
||||
# Assert
|
||||
assert len(windows) == 1
|
||||
assert windows[0].first_missing_frame_idx == 7
|
||||
assert windows[0].last_missing_frame_idx == 9
|
||||
|
||||
|
||||
# evaluate_window — AC-2 STATUSTEXT timing
|
||||
|
||||
|
||||
def _window_at(onset_ms: int, length: int = 3, period_ms: int = 100):
|
||||
# Ensure expected sequence is long enough to fully contain the gap + a trailing frame.
|
||||
total = max(20, length + 5)
|
||||
expected = list(range(total))
|
||||
skip = set(range(2, 2 + length))
|
||||
estimates = [_est(i, ms=i * period_ms) for i in expected if i not in skip]
|
||||
[w] = detect_outage_windows(
|
||||
expected,
|
||||
estimates,
|
||||
frame_period_ms=period_ms,
|
||||
replay_start_monotonic_ms=onset_ms - 2 * period_ms,
|
||||
)
|
||||
return w, estimates
|
||||
|
||||
|
||||
def test_statustext_within_tolerance_passes():
|
||||
# Arrange — STATUSTEXT exactly at onset+2 s.
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30, period_ms=100)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=window.onset_monotonic_ms + 2000, text="OPERATOR_RELOC_REQUEST"),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts, ekf_events=[])
|
||||
|
||||
# Assert
|
||||
assert report.statustext_offset_ms == 2000
|
||||
assert report.passes_statustext is True
|
||||
|
||||
|
||||
def test_statustext_within_tolerance_late_passes():
|
||||
# Arrange — STATUSTEXT at onset+2.4 s (within ±500 ms).
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=window.onset_monotonic_ms + 2400, text="OPERATOR_RELOC_REQUEST"),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts, ekf_events=[])
|
||||
|
||||
# Assert
|
||||
assert report.passes_statustext is True
|
||||
|
||||
|
||||
def test_statustext_too_early_fails():
|
||||
# Arrange — STATUSTEXT at onset+1.0 s (before 1.5 s lower bound).
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=window.onset_monotonic_ms + 1000, text="OPERATOR_RELOC_REQUEST"),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts, ekf_events=[])
|
||||
|
||||
# Assert
|
||||
assert report.statustext_offset_ms == 1000
|
||||
assert report.passes_statustext is False
|
||||
|
||||
|
||||
def test_statustext_too_late_fails():
|
||||
# Arrange — STATUSTEXT at onset+3.0 s (beyond 2.5 s upper bound).
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=window.onset_monotonic_ms + 3000, text="OPERATOR_RELOC_REQUEST"),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts, ekf_events=[])
|
||||
|
||||
# Assert
|
||||
assert report.passes_statustext is False
|
||||
|
||||
|
||||
def test_statustext_missing_fails():
|
||||
# Arrange
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30)
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts=[], ekf_events=[])
|
||||
|
||||
# Assert
|
||||
assert report.statustext_offset_ms is None
|
||||
assert report.passes_statustext is False
|
||||
|
||||
|
||||
def test_statustext_payload_mismatch_fails():
|
||||
# Arrange — different STATUSTEXT message at the right time.
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=window.onset_monotonic_ms + 2000, text="EKF_VARIANCE"),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts, ekf_events=[])
|
||||
|
||||
# Assert
|
||||
assert report.passes_statustext is False
|
||||
|
||||
|
||||
# AC-3 dead_reckoned during outage
|
||||
|
||||
|
||||
def test_dead_reckoned_during_window_passes():
|
||||
# Arrange — outage 4-6 with dead_reckoned estimate at ms 500 (frame 5 in window).
|
||||
expected = list(range(20))
|
||||
skip = {4, 5, 6}
|
||||
estimates = [
|
||||
_est(i, ms=i * 100)
|
||||
for i in expected
|
||||
if i not in skip
|
||||
]
|
||||
# Add dead_reckoned filler emission during the outage window.
|
||||
estimates.append(
|
||||
OutboundEstimateSample(frame_idx=4, monotonic_ms=500, source_label=DEAD_RECKONED_LABEL)
|
||||
)
|
||||
[w] = detect_outage_windows(expected, [e for e in estimates if e.frame_idx not in {4, 5, 6} or e.source_label == "satellite_anchored"], frame_period_ms=100)
|
||||
# Note: detection ignores dead_reckoned filler so window still spans 4-6.
|
||||
|
||||
# Act
|
||||
report = evaluate_window(w, estimates, statustexts=[], ekf_events=[])
|
||||
|
||||
# Assert — at least one dead_reckoned emission with monotonic_ms in [onset_ms, end_ms].
|
||||
assert report.dead_reckoned_count >= 1
|
||||
assert report.passes_dead_reckoned is True
|
||||
|
||||
|
||||
def test_dead_reckoned_absent_fails():
|
||||
# Arrange
|
||||
window, estimates = _window_at(onset_ms=10_000, length=3, period_ms=100)
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts=[], ekf_events=[])
|
||||
|
||||
# Assert
|
||||
assert report.dead_reckoned_count == 0
|
||||
assert report.passes_dead_reckoned is False
|
||||
|
||||
|
||||
# AC-4 EKF divergence
|
||||
|
||||
|
||||
def test_ekf_divergence_during_window_fails():
|
||||
# Arrange
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30)
|
||||
events = [
|
||||
EkfDivergenceEvent(
|
||||
monotonic_ms=window.onset_monotonic_ms + 1000, reason="velocity_innov"
|
||||
)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts=[], ekf_events=events)
|
||||
|
||||
# Assert
|
||||
assert report.ekf_divergence_count == 1
|
||||
assert report.passes_ekf is False
|
||||
|
||||
|
||||
def test_ekf_divergence_outside_window_ignored():
|
||||
# Arrange
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30)
|
||||
events = [
|
||||
EkfDivergenceEvent(
|
||||
monotonic_ms=window.end_monotonic_ms + 1000, reason="velocity_innov"
|
||||
)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_window(window, estimates, statustexts=[], ekf_events=events)
|
||||
|
||||
# Assert
|
||||
assert report.passes_ekf is True
|
||||
|
||||
|
||||
# evaluate aggregate
|
||||
|
||||
|
||||
def test_evaluate_all_pass():
|
||||
# Arrange — single outage with everything in order.
|
||||
expected = list(range(40))
|
||||
skip = set(range(10, 40))
|
||||
period_ms = 100
|
||||
estimates = [
|
||||
_est(i, ms=i * period_ms)
|
||||
for i in expected
|
||||
if i not in skip
|
||||
]
|
||||
estimates.append(
|
||||
OutboundEstimateSample(
|
||||
frame_idx=10, monotonic_ms=10 * period_ms + 500, source_label=DEAD_RECKONED_LABEL
|
||||
)
|
||||
)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=10 * period_ms + 2000, text="OPERATOR_RELOC_REQUEST")
|
||||
]
|
||||
|
||||
# Act
|
||||
reports = evaluate(
|
||||
expected,
|
||||
estimates,
|
||||
statustexts,
|
||||
ekf_events=[],
|
||||
frame_period_ms=period_ms,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert len(reports) == 1
|
||||
assert reports[0].passes is True
|
||||
|
||||
|
||||
# CSV evidence
|
||||
|
||||
|
||||
def test_write_csv_evidence_round_trips(tmp_path: Path):
|
||||
# Arrange
|
||||
window, estimates = _window_at(onset_ms=10_000, length=30)
|
||||
statustexts = [
|
||||
StatustextSample(monotonic_ms=window.onset_monotonic_ms + 2000, text="OPERATOR_RELOC_REQUEST")
|
||||
]
|
||||
report = evaluate_window(window, estimates, statustexts, ekf_events=[])
|
||||
|
||||
# Act
|
||||
out = write_csv_evidence(tmp_path / "ft-n-03.csv", [report])
|
||||
|
||||
# Assert
|
||||
with out.open() as fh:
|
||||
rows = list(csv.DictReader(fh))
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["passes_statustext"] == "true"
|
||||
assert int(rows[0]["length_frames"]) == 30
|
||||
@@ -0,0 +1,330 @@
|
||||
"""Unit tests for `e2e/runner/helpers/outlier_tolerance_evaluator.py` (AZ-424)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from e2e.runner.helpers.outlier_tolerance_evaluator import (
|
||||
COVARIANCE_WINDOW_FRAMES,
|
||||
DRIFT_BUDGET_M,
|
||||
MIN_OUTLIER_COUNT,
|
||||
GtPose,
|
||||
OutboundEstimate,
|
||||
OutlierEvent,
|
||||
evaluate,
|
||||
evaluate_event,
|
||||
load_outlier_manifest,
|
||||
write_csv_evidence,
|
||||
)
|
||||
|
||||
|
||||
# Constants
|
||||
|
||||
|
||||
def test_constants_match_spec():
|
||||
# AC-2 budget + AC-3 window + AC-1 minimum count, per AZ-424.
|
||||
assert DRIFT_BUDGET_M == 50.0
|
||||
assert COVARIANCE_WINDOW_FRAMES == 3
|
||||
assert MIN_OUTLIER_COUNT == 10
|
||||
|
||||
|
||||
# Manifest loading
|
||||
|
||||
|
||||
def _write_manifest(path: Path, rows: list[dict]) -> None:
|
||||
fieldnames = [
|
||||
"frame_idx",
|
||||
"src_jpeg_path",
|
||||
"replacement_tile_x",
|
||||
"replacement_tile_y",
|
||||
"geodesic_offset_m",
|
||||
"seed",
|
||||
]
|
||||
with path.open("w", newline="") as fh:
|
||||
writer = csv.DictWriter(fh, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for r in rows:
|
||||
row = {k: "" for k in fieldnames}
|
||||
row.update(r)
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
def test_load_outlier_manifest_missing_file_raises(tmp_path: Path):
|
||||
# Assert
|
||||
with pytest.raises(FileNotFoundError, match="outlier manifest not found"):
|
||||
load_outlier_manifest(tmp_path / "nope.csv")
|
||||
|
||||
|
||||
def test_load_outlier_manifest_missing_columns_raises(tmp_path: Path):
|
||||
# Arrange
|
||||
p = tmp_path / "manifest.csv"
|
||||
with p.open("w", newline="") as fh:
|
||||
writer = csv.DictWriter(fh, fieldnames=["frame_idx", "src_jpeg_path"])
|
||||
writer.writeheader()
|
||||
writer.writerow({"frame_idx": "1", "src_jpeg_path": "x.jpg"})
|
||||
|
||||
# Assert
|
||||
with pytest.raises(ValueError, match="missing required columns"):
|
||||
load_outlier_manifest(p)
|
||||
|
||||
|
||||
def test_load_outlier_manifest_returns_events(tmp_path: Path):
|
||||
# Arrange
|
||||
p = tmp_path / "manifest.csv"
|
||||
_write_manifest(
|
||||
p,
|
||||
[
|
||||
{"frame_idx": "10", "src_jpeg_path": "AD000011.jpg", "geodesic_offset_m": "412.5"},
|
||||
{"frame_idx": "20", "src_jpeg_path": "AD000021.jpg", "geodesic_offset_m": "381.0"},
|
||||
],
|
||||
)
|
||||
|
||||
# Act
|
||||
events = load_outlier_manifest(p)
|
||||
|
||||
# Assert
|
||||
assert len(events) == 2
|
||||
assert events[0] == OutlierEvent(
|
||||
frame_idx=10, geodesic_offset_m=412.5, src_jpeg_path="AD000011.jpg"
|
||||
)
|
||||
assert events[1].frame_idx == 20
|
||||
|
||||
|
||||
# evaluate_event — AC-2 drift bound
|
||||
|
||||
|
||||
def _est(frame: int, lat: float, lon: float, cov: float = 5.0) -> OutboundEstimate:
|
||||
return OutboundEstimate(
|
||||
frame_idx=frame,
|
||||
monotonic_ms=frame * 100,
|
||||
lat_deg=lat,
|
||||
lon_deg=lon,
|
||||
cov_semi_major_m=cov,
|
||||
source_label="C3_VIO",
|
||||
)
|
||||
|
||||
|
||||
def _gt(frame: int, lat: float, lon: float) -> GtPose:
|
||||
return GtPose(frame_idx=frame, lat_deg=lat, lon_deg=lon)
|
||||
|
||||
|
||||
def test_evaluate_event_drift_within_budget():
|
||||
# Arrange — estimate before/after match GT exactly; outlier frame drifts.
|
||||
estimates = {
|
||||
9: _est(9, 50.0000, 30.0000, cov=4.0),
|
||||
10: _est(10, 50.0050, 30.0050, cov=5.0), # outlier
|
||||
11: _est(11, 50.0001, 30.0001, cov=5.0),
|
||||
}
|
||||
gt = {
|
||||
9: _gt(9, 50.0000, 30.0000),
|
||||
10: _gt(10, 50.0001, 30.0001),
|
||||
11: _gt(11, 50.0002, 30.0002),
|
||||
}
|
||||
event = OutlierEvent(frame_idx=10, geodesic_offset_m=412.5, src_jpeg_path="x.jpg")
|
||||
|
||||
# Act
|
||||
report = evaluate_event(event, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.frame_idx == 10
|
||||
assert report.drift_m is not None
|
||||
assert report.drift_m <= DRIFT_BUDGET_M
|
||||
assert report.passes_drift is True
|
||||
|
||||
|
||||
def test_evaluate_event_drift_exceeds_budget_fails():
|
||||
# Arrange — after-frame error is >> before-frame error.
|
||||
estimates = {
|
||||
9: _est(9, 50.0000, 30.0000),
|
||||
10: _est(10, 50.0050, 30.0050),
|
||||
11: _est(11, 50.0010, 30.0010), # ~129 m off
|
||||
}
|
||||
gt = {
|
||||
9: _gt(9, 50.0000, 30.0000),
|
||||
10: _gt(10, 50.0001, 30.0001),
|
||||
11: _gt(11, 50.0000, 30.0000),
|
||||
}
|
||||
event = OutlierEvent(frame_idx=10, geodesic_offset_m=400.0, src_jpeg_path="x.jpg")
|
||||
|
||||
# Act
|
||||
report = evaluate_event(event, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.drift_m is not None and report.drift_m > DRIFT_BUDGET_M
|
||||
assert report.passes_drift is False
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_evaluate_event_missing_neighbour_drift_none():
|
||||
# Arrange — only outlier frame present.
|
||||
estimates = {10: _est(10, 50.0050, 30.0050)}
|
||||
gt = {10: _gt(10, 50.0001, 30.0001)}
|
||||
event = OutlierEvent(frame_idx=10, geodesic_offset_m=400.0, src_jpeg_path="x.jpg")
|
||||
|
||||
# Act
|
||||
report = evaluate_event(event, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.drift_m is None
|
||||
assert report.passes_drift is False
|
||||
|
||||
|
||||
# evaluate_event — AC-3 covariance monotonic
|
||||
|
||||
|
||||
def test_evaluate_event_cov_monotonic_passes():
|
||||
# Arrange
|
||||
estimates = {
|
||||
9: _est(9, 50.0, 30.0, cov=4.0),
|
||||
10: _est(10, 50.0, 30.0, cov=5.0),
|
||||
11: _est(11, 50.0, 30.0, cov=5.5),
|
||||
}
|
||||
gt = {f: _gt(f, 50.0, 30.0) for f in (9, 10, 11)}
|
||||
event = OutlierEvent(frame_idx=10, geodesic_offset_m=400.0, src_jpeg_path="x.jpg")
|
||||
|
||||
# Act
|
||||
report = evaluate_event(event, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.cov_non_decreasing is True
|
||||
assert report.passes_covariance is True
|
||||
|
||||
|
||||
def test_evaluate_event_cov_decreasing_fails():
|
||||
# Arrange — outlier frame cov is lower than before frame.
|
||||
estimates = {
|
||||
9: _est(9, 50.0, 30.0, cov=5.0),
|
||||
10: _est(10, 50.0, 30.0, cov=4.0), # decrease — violates AC-3
|
||||
11: _est(11, 50.0, 30.0, cov=5.0),
|
||||
}
|
||||
gt = {f: _gt(f, 50.0, 30.0) for f in (9, 10, 11)}
|
||||
event = OutlierEvent(frame_idx=10, geodesic_offset_m=400.0, src_jpeg_path="x.jpg")
|
||||
|
||||
# Act
|
||||
report = evaluate_event(event, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.cov_non_decreasing is False
|
||||
assert report.passes_covariance is False
|
||||
|
||||
|
||||
def test_evaluate_event_cov_flat_window_passes():
|
||||
# Arrange — equal covariances satisfy non-decreasing.
|
||||
estimates = {
|
||||
9: _est(9, 50.0, 30.0, cov=5.0),
|
||||
10: _est(10, 50.0, 30.0, cov=5.0),
|
||||
11: _est(11, 50.0, 30.0, cov=5.0),
|
||||
}
|
||||
gt = {f: _gt(f, 50.0, 30.0) for f in (9, 10, 11)}
|
||||
event = OutlierEvent(frame_idx=10, geodesic_offset_m=400.0, src_jpeg_path="x.jpg")
|
||||
|
||||
# Act
|
||||
report = evaluate_event(event, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.cov_non_decreasing is True
|
||||
|
||||
|
||||
# Aggregate evaluate — AC-1 minimum count
|
||||
|
||||
|
||||
def test_evaluate_count_below_minimum_fails():
|
||||
# Arrange — only 5 outliers; AC-1 requires ≥10.
|
||||
events = [
|
||||
OutlierEvent(frame_idx=i * 10, geodesic_offset_m=400.0, src_jpeg_path=f"x{i}.jpg")
|
||||
for i in range(1, 6)
|
||||
]
|
||||
estimates: list[OutboundEstimate] = []
|
||||
gt: list[GtPose] = []
|
||||
for ev in events:
|
||||
for delta in (-1, 0, 1):
|
||||
estimates.append(_est(ev.frame_idx + delta, 50.0, 30.0, cov=5.0))
|
||||
gt.append(_gt(ev.frame_idx + delta, 50.0, 30.0))
|
||||
|
||||
# Act
|
||||
report = evaluate(events, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.total_outliers == 5
|
||||
assert report.passes_count is False
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_evaluate_count_at_minimum_passes_count_gate():
|
||||
# Arrange — exactly 10 outliers with non-violating drift/cov.
|
||||
events = [
|
||||
OutlierEvent(frame_idx=i * 10, geodesic_offset_m=400.0, src_jpeg_path=f"x{i}.jpg")
|
||||
for i in range(1, 11)
|
||||
]
|
||||
estimates: list[OutboundEstimate] = []
|
||||
gt: list[GtPose] = []
|
||||
for ev in events:
|
||||
for delta in (-1, 0, 1):
|
||||
estimates.append(_est(ev.frame_idx + delta, 50.0, 30.0, cov=5.0))
|
||||
gt.append(_gt(ev.frame_idx + delta, 50.0, 30.0))
|
||||
|
||||
# Act
|
||||
report = evaluate(events, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.total_outliers == 10
|
||||
assert report.passes_count is True
|
||||
assert report.failed_event_count == 0
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_evaluate_mixed_pass_fail_aggregates_correctly():
|
||||
# Arrange — 10 events, one with drift violation.
|
||||
events = [
|
||||
OutlierEvent(frame_idx=i * 10, geodesic_offset_m=400.0, src_jpeg_path=f"x{i}.jpg")
|
||||
for i in range(1, 11)
|
||||
]
|
||||
estimates: list[OutboundEstimate] = []
|
||||
gt: list[GtPose] = []
|
||||
for ev in events:
|
||||
for delta in (-1, 0, 1):
|
||||
estimates.append(_est(ev.frame_idx + delta, 50.0, 30.0, cov=5.0))
|
||||
gt.append(_gt(ev.frame_idx + delta, 50.0, 30.0))
|
||||
# Override frame 31 to be 200 m off — produces drift > 50 m for event at frame_idx=30.
|
||||
estimates = [e for e in estimates if e.frame_idx != 31]
|
||||
estimates.append(_est(31, 50.0018, 30.0, cov=5.0)) # ≈200 m off
|
||||
|
||||
# Act
|
||||
report = evaluate(events, estimates, gt)
|
||||
|
||||
# Assert
|
||||
assert report.total_outliers == 10
|
||||
assert report.failed_event_count == 1
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
# CSV evidence writer
|
||||
|
||||
|
||||
def test_write_csv_evidence_round_trips(tmp_path: Path):
|
||||
# Arrange
|
||||
events = [
|
||||
OutlierEvent(frame_idx=10, geodesic_offset_m=412.5, src_jpeg_path="AD000011.jpg"),
|
||||
OutlierEvent(frame_idx=20, geodesic_offset_m=381.0, src_jpeg_path="AD000021.jpg"),
|
||||
]
|
||||
estimates: list[OutboundEstimate] = []
|
||||
gt: list[GtPose] = []
|
||||
for ev in events:
|
||||
for delta in (-1, 0, 1):
|
||||
estimates.append(_est(ev.frame_idx + delta, 50.0, 30.0, cov=5.0))
|
||||
gt.append(_gt(ev.frame_idx + delta, 50.0, 30.0))
|
||||
report = evaluate(events, estimates, gt)
|
||||
|
||||
# Act
|
||||
out = write_csv_evidence(tmp_path / "ft_n_01_evidence.csv", report)
|
||||
|
||||
# Assert
|
||||
assert out.exists()
|
||||
with out.open() as fh:
|
||||
rows = list(csv.DictReader(fh))
|
||||
assert [int(r["frame_idx"]) for r in rows] == [10, 20]
|
||||
assert all(r["passes"] == "true" for r in rows)
|
||||
assert all(r["cov_non_decreasing"] == "true" for r in rows)
|
||||
@@ -52,6 +52,9 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
|
||||
"runner/helpers/msp_frame_observer.py",
|
||||
"runner/helpers/ap_contract_evaluator.py",
|
||||
"runner/helpers/cold_start_evaluator.py",
|
||||
"runner/helpers/outlier_tolerance_evaluator.py",
|
||||
"runner/helpers/outage_request_evaluator.py",
|
||||
"runner/helpers/blackout_spoof_evaluator.py",
|
||||
"fixtures/mock-suite-sat/Dockerfile",
|
||||
"fixtures/mock-suite-sat/app.py",
|
||||
"fixtures/mock-suite-sat/requirements.txt",
|
||||
@@ -96,7 +99,10 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
|
||||
"tests/positive/test_ft_p_09_inav.py",
|
||||
"tests/positive/test_ft_p_10_smoothing_lookback.py",
|
||||
"tests/positive/test_ft_p_11_cold_start_init.py",
|
||||
"tests/negative/test_ft_n_01_outlier_tolerance.py",
|
||||
"tests/negative/test_ft_n_02_sharp_turn_failure.py",
|
||||
"tests/negative/test_ft_n_03_outage_reloc.py",
|
||||
"tests/negative/test_ft_n_04_blackout_spoof.py",
|
||||
],
|
||||
)
|
||||
def test_required_path_exists(relative_path: str) -> None:
|
||||
|
||||
Reference in New Issue
Block a user