[AZ-436] [AZ-437] [AZ-438] [AZ-439] Add NFT-SEC-01..05 security scenarios

Batch 87: 6 NFT-SEC blackbox scenarios + 5 helper evaluators + 75 unit
tests + cumulative review batches 85-87.

* AZ-436 NFT-SEC-01: cache-poisoning safety budget (AC-NEW-9); aggregate
  false_trust_count ≤ N×1e-6; zero-tolerance default. Canonical-only by
  default; E2E_NFT_SEC_01_RELEASE_GATE=1 unlocks full matrix.
* AZ-437 NFT-SEC-02 + NFT-SEC-05: shared egress-observation evaluator
  (AC-NEW-10); SEC-02 = 0 packets to non-e2e-net over 5min replay;
  SEC-05 = DNS-blackhole sidecar healthy + lookup fails + UDP-53 silent.
* AZ-438 NFT-SEC-03: AP-only signing rejection (AC-NEW-11); 3 sub-cases
  (unsigned/wrong-key/replayed) each reject ≤500ms + no position drift.
* AZ-439 NFT-SEC-04: probe (always-run) = no-crash + deterministic
  decode outcome; ASan-fuzz (release-gate) = 0 findings ≥4h; AC-3
  corpus floor informational only per spec.

Verdict per-batch: PASS_WITH_WARNINGS (5 Low). Cumulative review for
batches 85-87 (K=3 window) also PASS_WITH_WARNINGS with 5 cross-batch
findings — recommends hygiene PBIs for write_csv_evidence duplication
(13 helpers) and _resolve_fixture_path duplication (13 scenarios), plus
new tickets for AZ-595 fixture builder + DNS-blackhole sidecar service.

Also adds _docs/LESSONS.md documenting the Jira transition-ID lesson
(always call getTransitionsForJiraIssue first, never memorize numeric
IDs across sessions).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 17:33:22 +03:00
parent de19e716d8
commit c56d4584e6
21 changed files with 3510 additions and 0 deletions
@@ -0,0 +1,176 @@
"""Unit tests for ``runner.helpers.asan_fuzz_evaluator`` (NFT-SEC-04 / AZ-439 fuzz)."""
from __future__ import annotations
import csv
from pathlib import Path
import pytest
from runner.helpers import asan_fuzz_evaluator as afe
@pytest.mark.parametrize(
"line,expected",
[
(
"=================================================================\n",
None,
),
("==1234==ERROR: AddressSanitizer: heap-buffer-overflow on address ...", afe.AsanFindingCategory.HEAP_BUFFER_OVERFLOW),
("==1234==ERROR: AddressSanitizer: heap-use-after-free on address ...", afe.AsanFindingCategory.HEAP_USE_AFTER_FREE),
("==1234==ERROR: AddressSanitizer: stack-buffer-overflow on address ...", afe.AsanFindingCategory.STACK_BUFFER_OVERFLOW),
("==1234==ERROR: AddressSanitizer: stack-use-after-return on ...", afe.AsanFindingCategory.STACK_USE_AFTER_RETURN),
("==1234==ERROR: AddressSanitizer: global-buffer-overflow on ...", afe.AsanFindingCategory.GLOBAL_BUFFER_OVERFLOW),
("==1234==ERROR: AddressSanitizer: use-after-free on ...", afe.AsanFindingCategory.USE_AFTER_FREE),
("==1234==ERROR: AddressSanitizer: double-free on ...", afe.AsanFindingCategory.DOUBLE_FREE),
# A new ASan category we haven't catalogued yet — must still
# fail the test by classifying as OTHER_FINDING.
(
"==1234==ERROR: AddressSanitizer: mysterious-future-category on ...",
afe.AsanFindingCategory.OTHER_FINDING,
),
("just a normal log line, harmless", None),
("ERROR but no AddressSanitizer prefix", None),
],
)
def test_classify_asan_line(
line: str, expected: afe.AsanFindingCategory | None
) -> None:
assert afe.classify_asan_line(line) == expected
def test_zero_findings_and_full_duration_passes() -> None:
report = afe.evaluate(
["info line", "another info line"],
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
corpus_size=afe.MIN_CORPUS_COVERAGE,
)
assert report.passes_findings
assert report.passes_duration
assert report.passes
def test_short_duration_fails_even_with_zero_findings() -> None:
report = afe.evaluate(
[],
duration_seconds=60, # 1 minute
corpus_size=afe.MIN_CORPUS_COVERAGE,
)
assert report.passes_findings
assert not report.passes_duration
assert not report.passes
def test_one_finding_fails_full_run() -> None:
report = afe.evaluate(
["==1==ERROR: AddressSanitizer: heap-buffer-overflow on ..."],
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
corpus_size=afe.MIN_CORPUS_COVERAGE,
)
assert len(report.findings) == 1
assert report.findings[0].category is afe.AsanFindingCategory.HEAP_BUFFER_OVERFLOW
assert not report.passes_findings
assert not report.passes
def test_unknown_asan_finding_still_fails() -> None:
report = afe.evaluate(
["==1==ERROR: AddressSanitizer: brand-new-category"],
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
corpus_size=afe.MIN_CORPUS_COVERAGE,
)
assert report.findings[0].category is afe.AsanFindingCategory.OTHER_FINDING
assert not report.passes
def test_corpus_floor_is_informational_only() -> None:
# 0 findings + full duration but well below corpus floor → still passes.
report = afe.evaluate(
[],
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
corpus_size=10,
)
assert not report.reached_corpus_floor
assert report.passes
def test_snippet_is_truncated_for_evidence() -> None:
huge = "==1==ERROR: AddressSanitizer: heap-buffer-overflow " + "x" * 500
report = afe.evaluate(
[huge],
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
corpus_size=afe.MIN_CORPUS_COVERAGE,
)
assert len(report.findings[0].snippet) <= 200
def test_multiple_findings_classified_and_counted() -> None:
log_lines = [
"info",
"==1==ERROR: AddressSanitizer: heap-buffer-overflow",
"info",
"==2==ERROR: AddressSanitizer: heap-buffer-overflow",
"==3==ERROR: AddressSanitizer: use-after-free",
"trailing log",
]
report = afe.evaluate(
log_lines,
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
corpus_size=afe.MIN_CORPUS_COVERAGE,
)
assert len(report.findings) == 3
categories = [f.category for f in report.findings]
assert categories.count(afe.AsanFindingCategory.HEAP_BUFFER_OVERFLOW) == 2
assert categories.count(afe.AsanFindingCategory.USE_AFTER_FREE) == 1
assert not report.passes
def test_csv_evidence_round_trip_no_findings(tmp_path: Path) -> None:
report = afe.evaluate(
[],
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
corpus_size=afe.MIN_CORPUS_COVERAGE,
)
out = tmp_path / "out.csv"
afe.write_csv_evidence(out, report)
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "duration_seconds"
assert rows[1][5] == "true" # passes_findings
assert rows[1][6] == "true" # passes
# No per-finding section because no findings were recorded.
assert len(rows) == 2
def test_csv_evidence_round_trip_with_findings(tmp_path: Path) -> None:
report = afe.evaluate(
[
"==1==ERROR: AddressSanitizer: heap-buffer-overflow",
"==2==ERROR: AddressSanitizer: use-after-free",
],
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
corpus_size=afe.MIN_CORPUS_COVERAGE,
)
out = tmp_path / "out.csv"
afe.write_csv_evidence(out, report)
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "duration_seconds"
assert rows[1][6] == "false" # passes
# Aggregate row + blank + sub-header + 2 finding rows = 5 rows total.
assert rows[2] == []
assert rows[3] == ["finding_index", "category", "snippet"]
assert len(rows) == 6
@@ -0,0 +1,245 @@
"""Unit tests for ``runner.helpers.cache_poisoning_evaluator`` (NFT-SEC-01 / AZ-436)."""
from __future__ import annotations
import csv
from pathlib import Path
import pytest
from runner.helpers import cache_poisoning_evaluator as cpe
def _spec(tile_id: str, layer: str) -> cpe.PoisonedTileSpec:
return cpe.PoisonedTileSpec(tile_id=tile_id, defense_layer=layer)
def _flight(
flight_id: str,
*,
total: int = 1000,
poisoned: tuple[cpe.PoisonedTileSpec, ...] | None = None,
false_trust: tuple[cpe.FalseTrustEvent, ...] | None = None,
rejection_reasons: dict[str, int] | None = None,
) -> cpe.FlightOutcome:
if poisoned is None:
poisoned = (
_spec("t1", cpe.DEFENSE_LAYER_SIGNING),
_spec("t2", cpe.DEFENSE_LAYER_FRESHNESS),
_spec("t3", cpe.DEFENSE_LAYER_VOTING),
)
return cpe.FlightOutcome(
flight_id=flight_id,
total_tile_count=total,
poisoned_tiles=poisoned,
false_trust_events=false_trust or (),
rejection_reasons=rejection_reasons or {},
)
def test_poison_ratio_within_band_passes_ratio_check() -> None:
# 3 poisoned / 100 total = 3 % — inside [1 %, 5 %].
flight = _flight("f1", total=100)
report = cpe.evaluate([flight])
assert report.passes_ratio
assert flight.poison_ratio == pytest.approx(0.03)
def test_poison_ratio_below_min_fails_ratio_check() -> None:
# 3 / 1000 = 0.3 % — below the 1 % floor.
flight = _flight("f-low", total=1000)
report = cpe.evaluate([flight])
assert not report.passes_ratio
assert "f-low" in report.flights_with_bad_poison_ratio
def test_poison_ratio_above_max_fails_ratio_check() -> None:
# 3 / 50 = 6 % — above the 5 % ceiling.
flight = _flight("f-high", total=50)
report = cpe.evaluate([flight])
assert not report.passes_ratio
assert "f-high" in report.flights_with_bad_poison_ratio
def test_zero_total_tile_count_gives_zero_ratio() -> None:
flight = _flight("f-empty", total=0, poisoned=())
report = cpe.evaluate([flight])
assert flight.poison_ratio == 0.0
assert "f-empty" in report.flights_with_bad_poison_ratio
def test_missing_defense_layer_fails_layer_coverage() -> None:
# Only signing + freshness; voting layer missing.
flight = _flight(
"f-missing",
total=100,
poisoned=(
_spec("t1", cpe.DEFENSE_LAYER_SIGNING),
_spec("t2", cpe.DEFENSE_LAYER_FRESHNESS),
),
)
report = cpe.evaluate([flight])
assert not report.passes_layer_coverage
[(flight_id, missing)] = report.flights_missing_defense_layers
assert flight_id == "f-missing"
assert missing == [cpe.DEFENSE_LAYER_VOTING]
def test_all_three_defense_layers_pass_layer_coverage() -> None:
flight = _flight("f-complete", total=100)
report = cpe.evaluate([flight])
assert report.passes_layer_coverage
def test_zero_false_trust_events_passes_budget() -> None:
flight = _flight("f", total=100)
report = cpe.evaluate([flight])
assert report.total_false_trust == 0
assert report.passes_budget
def test_single_false_trust_event_fails_budget() -> None:
flight = _flight(
"f",
total=100,
false_trust=(
cpe.FalseTrustEvent(
flight_id="f",
tile_id="t1",
monotonic_ms=12345,
defense_layer=cpe.DEFENSE_LAYER_SIGNING,
),
),
)
report = cpe.evaluate([flight])
assert report.total_false_trust == 1
assert not report.passes_budget
# Zero-tolerance default — even though 1 event is technically within
# the 0.01 budget at N=10000, the helper must reject it.
def test_unknown_rejection_reason_fails_vocabulary_check() -> None:
flight = _flight(
"f-vocab",
total=100,
rejection_reasons={"made_up_reason": 7},
)
report = cpe.evaluate([flight])
assert not report.passes_rejection_reason_vocabulary
assert "f-vocab" in report.flights_with_unknown_rejection_reasons
def test_known_rejection_reasons_pass_vocabulary_check() -> None:
flight = _flight(
"f-ok",
total=100,
rejection_reasons={
cpe.DEFENSE_LAYER_SIGNING: 12,
cpe.DEFENSE_LAYER_VOTING: 8,
"freshness_gate_downgrade": 3,
},
)
report = cpe.evaluate([flight])
assert report.passes_rejection_reason_vocabulary
def test_aggregate_budget_scales_with_flight_count() -> None:
flights = [_flight(f"f{i}", total=100) for i in range(50)]
report = cpe.evaluate(flights)
assert report.flight_count == 50
assert report.budget == pytest.approx(50 * 1e-6)
def test_aggregate_counts_false_trust_across_flights() -> None:
flights = [
_flight(
f"f{i}",
total=100,
false_trust=(
cpe.FalseTrustEvent(
flight_id=f"f{i}",
tile_id="tx",
monotonic_ms=100 * i,
defense_layer=cpe.DEFENSE_LAYER_SIGNING,
),
)
if i % 2 == 0
else (),
)
for i in range(4)
]
report = cpe.evaluate(flights)
assert report.total_false_trust == 2 # f0 and f2 each had one event
def test_overall_pass_requires_all_subchecks() -> None:
flight = _flight("f", total=100)
report = cpe.evaluate([flight])
assert report.passes
assert report.passes_budget
assert report.passes_ratio
assert report.passes_layer_coverage
assert report.passes_rejection_reason_vocabulary
def test_overall_pass_fails_if_any_subcheck_fails() -> None:
flight = _flight(
"f-broken",
total=1000, # 3/1000 = 0.3 % — bad ratio
)
report = cpe.evaluate([flight])
assert not report.passes
def test_empty_flight_list_gives_trivial_pass() -> None:
# Treat an empty run as a no-op rather than an implicit failure;
# the scenario test is responsible for asserting N >= 1.
report = cpe.evaluate([])
assert report.flight_count == 0
assert report.total_false_trust == 0
assert report.passes
def test_csv_evidence_has_header_per_flight_and_aggregate_rows(tmp_path: Path) -> None:
flights = [_flight(f"f{i}", total=100) for i in range(3)]
report = cpe.evaluate(flights)
out = tmp_path / "out.csv"
cpe.write_csv_evidence(out, report)
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "flight_id"
assert {rows[1][0], rows[2][0], rows[3][0]} == {"f0", "f1", "f2"}
assert rows[4] == []
assert rows[5][0] == "AGGREGATE"
assert any("flight_count=3" in cell for cell in rows[5])
@@ -0,0 +1,120 @@
"""Unit tests for ``runner.helpers.cve_probe_evaluator`` (NFT-SEC-04 / AZ-439 probe)."""
from __future__ import annotations
import csv
from pathlib import Path
from runner.helpers import cve_probe_evaluator as cpe
def _rec(ms: int, kind: str) -> cpe.FdrSurvivalRecord:
return cpe.FdrSurvivalRecord(monotonic_ms=ms, kind=kind)
def test_decode_success_in_window_is_classified_as_decode_success() -> None:
outcome = cpe.classify_probe_outcome(
[_rec(10_005, "frame-decode-success")],
probe_injected_at_ms=10_000,
)
assert outcome is cpe.ProbeFrameOutcome.DECODE_SUCCESS
def test_decode_error_in_window_is_classified_as_frame_decode_error() -> None:
outcome = cpe.classify_probe_outcome(
[_rec(10_010, "frame-decode-error")],
probe_injected_at_ms=10_000,
)
assert outcome is cpe.ProbeFrameOutcome.FRAME_DECODE_ERROR
def test_no_record_in_window_is_missing() -> None:
outcome = cpe.classify_probe_outcome(
[_rec(9_900, "frame-decode-success")],
probe_injected_at_ms=10_000,
)
assert outcome is cpe.ProbeFrameOutcome.MISSING
def test_record_outside_tolerance_is_missing() -> None:
outcome = cpe.classify_probe_outcome(
[_rec(10_100, "frame-decode-success")],
probe_injected_at_ms=10_000,
tolerance_ms=50,
)
assert outcome is cpe.ProbeFrameOutcome.MISSING
def test_first_match_in_window_wins() -> None:
outcome = cpe.classify_probe_outcome(
[
_rec(10_005, "frame-decode-success"),
_rec(10_010, "frame-decode-error"),
],
probe_injected_at_ms=10_000,
)
assert outcome is cpe.ProbeFrameOutcome.DECODE_SUCCESS
def test_passes_when_no_crash_and_decode_success() -> None:
report = cpe.evaluate(
[_rec(10_005, "frame-decode-success"), _rec(11_000, "imu-tick")],
probe_injected_at_ms=10_000,
)
assert report.passes_no_crash
assert report.passes_graceful_outcome
assert report.passes
def test_passes_when_no_crash_and_graceful_error() -> None:
report = cpe.evaluate(
[_rec(10_005, "frame-decode-error"), _rec(11_000, "imu-tick")],
probe_injected_at_ms=10_000,
)
assert report.passes
def test_fails_when_no_post_probe_fdr_record() -> None:
# All FDR records are BEFORE the probe — the SUT crashed at probe
# time. AC-1a fails.
report = cpe.evaluate(
[_rec(9_500, "imu-tick"), _rec(9_900, "frame-decode-success")],
probe_injected_at_ms=10_000,
)
assert not report.passes_no_crash
assert not report.passes
def test_fails_when_silent_drop() -> None:
# SUT is alive (post-probe records exist) but no decode record at
# all — the probe frame was silently swallowed. AC-1b fails.
report = cpe.evaluate(
[_rec(11_000, "imu-tick"), _rec(12_000, "imu-tick")],
probe_injected_at_ms=10_000,
)
assert report.passes_no_crash
assert not report.passes_graceful_outcome
assert not report.passes
def test_empty_fdr_archive_fails_both_subchecks() -> None:
report = cpe.evaluate([], probe_injected_at_ms=10_000)
assert not report.passes_no_crash
assert not report.passes_graceful_outcome
assert not report.passes
def test_csv_evidence_round_trip(tmp_path: Path) -> None:
report = cpe.evaluate(
[_rec(10_005, "frame-decode-success")],
probe_injected_at_ms=10_000,
)
out = tmp_path / "out.csv"
cpe.write_csv_evidence(out, report)
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "probe_injected_at_ms"
assert rows[1][2] == "decode-success"
assert rows[1][-1] == "true"
@@ -0,0 +1,168 @@
"""Unit tests for ``runner.helpers.egress_observer`` (NFT-SEC-02 + NFT-SEC-05 / AZ-437)."""
from __future__ import annotations
import csv
from pathlib import Path
import pytest
from runner.helpers import egress_observer as eo
def _snap(other: int = 0, internal: int = 0, udp53: int = 0) -> eo.EgressCounterSnapshot:
return eo.EgressCounterSnapshot(
egress_packets_to_internal_net=internal,
egress_packets_to_other_destinations=other,
udp53_egress_packets=udp53,
)
def test_egress_counter_rejects_negative_values() -> None:
with pytest.raises(ValueError, match="cannot be negative"):
eo.EgressCounterSnapshot(
egress_packets_to_internal_net=-1,
egress_packets_to_other_destinations=0,
udp53_egress_packets=0,
)
def test_no_egress_zero_delta_passes() -> None:
before = _snap(other=10, internal=5)
after = _snap(other=10, internal=42) # internal traffic grew; that's fine
report = eo.evaluate_no_egress(before, after, window_label="5min")
assert report.delta_other_destinations == 0
assert report.passes
def test_no_egress_nonzero_delta_fails() -> None:
before = _snap(other=10)
after = _snap(other=11)
report = eo.evaluate_no_egress(before, after, window_label="5min")
assert report.delta_other_destinations == 1
assert not report.passes
def test_no_egress_records_internal_delta_for_evidence() -> None:
before = _snap(internal=100)
after = _snap(internal=200)
report = eo.evaluate_no_egress(before, after, window_label="5min-derkachi")
assert report.delta_internal == 100 # informational; does not affect verdict
assert report.passes
def test_dns_blackhole_passes_on_full_silence_and_failed_lookup() -> None:
before = _snap(udp53=7)
after = _snap(udp53=7)
report = eo.evaluate_dns_blackhole(
before,
after,
lookup_outcome=eo.DnsLookupOutcome.NXDOMAIN,
sidecar_healthy=True,
)
assert report.passes
def test_dns_blackhole_fails_on_successful_lookup() -> None:
before = _snap(udp53=7)
after = _snap(udp53=7)
report = eo.evaluate_dns_blackhole(
before,
after,
lookup_outcome=eo.DnsLookupOutcome.SUCCESS,
sidecar_healthy=True,
)
assert not report.passes_lookup
assert not report.passes
def test_dns_blackhole_fails_when_udp53_packets_escaped() -> None:
before = _snap(udp53=7)
after = _snap(udp53=8)
report = eo.evaluate_dns_blackhole(
before,
after,
lookup_outcome=eo.DnsLookupOutcome.NXDOMAIN,
sidecar_healthy=True,
)
assert not report.passes_udp_silence
assert not report.passes
def test_dns_blackhole_fails_when_sidecar_unhealthy() -> None:
before = _snap()
after = _snap()
report = eo.evaluate_dns_blackhole(
before,
after,
lookup_outcome=eo.DnsLookupOutcome.NXDOMAIN,
sidecar_healthy=False,
)
assert not report.passes
@pytest.mark.parametrize(
"outcome",
[
eo.DnsLookupOutcome.NXDOMAIN,
eo.DnsLookupOutcome.TIMEOUT,
eo.DnsLookupOutcome.NO_SERVERS,
eo.DnsLookupOutcome.OTHER_FAILURE,
],
)
def test_all_failure_outcomes_pass_lookup_check(outcome: eo.DnsLookupOutcome) -> None:
report = eo.evaluate_dns_blackhole(
_snap(),
_snap(),
lookup_outcome=outcome,
sidecar_healthy=True,
)
assert report.passes_lookup
def test_no_egress_csv_evidence_round_trip(tmp_path: Path) -> None:
before = _snap(other=0, internal=5)
after = _snap(other=0, internal=42)
report = eo.evaluate_no_egress(before, after, window_label="5min")
out = tmp_path / "out.csv"
eo.write_no_egress_csv_evidence(out, report)
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "window_label"
assert rows[1][0] == "5min"
assert rows[1][-1] == "true"
def test_dns_blackhole_csv_evidence_round_trip(tmp_path: Path) -> None:
report = eo.evaluate_dns_blackhole(
_snap(udp53=7),
_snap(udp53=7),
lookup_outcome=eo.DnsLookupOutcome.NXDOMAIN,
sidecar_healthy=True,
)
out = tmp_path / "out.csv"
eo.write_dns_blackhole_csv_evidence(out, report)
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "sidecar_healthy"
assert rows[1][1] == "nxdomain"
assert rows[1][-1] == "true"
@@ -0,0 +1,196 @@
"""Unit tests for ``runner.helpers.mavlink_signing_evaluator`` (NFT-SEC-03 / AZ-438)."""
from __future__ import annotations
import csv
from pathlib import Path
import pytest
from runner.helpers import mavlink_signing_evaluator as mse
def _pos(ms: int, lat_e7: int = 0, lon_e7: int = 0) -> mse.PositionSample:
return mse.PositionSample(monotonic_ms=ms, lat_e7=lat_e7, lon_e7=lon_e7)
def _st(ms: int, text: str) -> mse.StatustextSample:
return mse.StatustextSample(monotonic_ms=ms, text=text)
@pytest.mark.parametrize(
"text,expected",
[
("MAVLink: BAD_SIGNATURE", True),
("BAD SIGNATURE", True),
("Bad signature received from sysid=255", True),
("Signature rejected on link 0", True),
("PreArm: GPS Glitch", False),
("OK", False),
("", False),
],
)
def test_is_bad_signature_statustext_matches_documented_variants(
text: str, expected: bool
) -> None:
assert mse.is_bad_signature_statustext(text) is expected
def test_subcase_passes_when_rejection_arrives_within_budget() -> None:
injection = mse.InjectionEvent(
sub_case=mse.SubCase.UNSIGNED, injected_at_ms=10_000
)
statustexts = [_st(10_300, "MAVLink: BAD_SIGNATURE")]
positions = [_pos(9_900), _pos(10_100)]
report = mse.evaluate_subcase(injection, statustexts, positions)
assert report.rejection_latency_ms == 300
assert report.passes_rejection
assert report.passes
def test_subcase_fails_when_no_rejection_seen() -> None:
injection = mse.InjectionEvent(
sub_case=mse.SubCase.WRONG_KEY, injected_at_ms=10_000
)
statustexts = [_st(10_300, "ok normal text")]
positions = [_pos(9_900), _pos(10_100)]
report = mse.evaluate_subcase(injection, statustexts, positions)
assert report.rejection_at_ms is None
assert not report.passes_rejection
assert not report.passes
def test_subcase_fails_when_rejection_too_slow() -> None:
injection = mse.InjectionEvent(
sub_case=mse.SubCase.REPLAYED, injected_at_ms=10_000
)
statustexts = [_st(11_000, "MAVLink: BAD_SIGNATURE")]
positions = [_pos(9_900), _pos(10_100)]
report = mse.evaluate_subcase(injection, statustexts, positions)
assert report.rejection_latency_ms == 1000
assert not report.passes_rejection
assert not report.passes
def test_subcase_fails_when_position_drifts() -> None:
injection = mse.InjectionEvent(
sub_case=mse.SubCase.UNSIGNED, injected_at_ms=10_000
)
statustexts = [_st(10_200, "MAVLink: BAD_SIGNATURE")]
# 0.0001 deg ≈ 11 m of latitude drift — clearly outside the 1 m
# tolerance, simulating a successful poison of AP's GPS state.
positions = [_pos(9_900, lat_e7=0), _pos(10_100, lat_e7=1_000)]
report = mse.evaluate_subcase(injection, statustexts, positions)
assert report.position_drift_m > mse.POSITION_DRIFT_TOLERANCE_M
assert not report.passes_no_position_update
assert not report.passes
def test_subcase_passes_with_tiny_jitter_within_tolerance() -> None:
injection = mse.InjectionEvent(
sub_case=mse.SubCase.UNSIGNED, injected_at_ms=10_000
)
statustexts = [_st(10_100, "MAVLink: BAD_SIGNATURE")]
# 1 e7-lat unit ≈ 1.1 cm — well below the 1 m tolerance.
positions = [_pos(9_900, lat_e7=0), _pos(10_100, lat_e7=10)]
report = mse.evaluate_subcase(injection, statustexts, positions)
assert report.position_drift_m < mse.POSITION_DRIFT_TOLERANCE_M
assert report.passes
def test_position_drift_returns_zero_when_no_pre_injection_sample() -> None:
# Only samples after the injection — the helper has no baseline
# so it returns 0 (the AC assertion still passes; the test author
# is expected to fail the test earlier if positions are missing).
drift = mse.position_drift_m([_pos(10_100)], around_ms=10_000)
assert drift == 0.0
def test_position_drift_returns_zero_when_no_post_injection_sample() -> None:
drift = mse.position_drift_m([_pos(9_900)], around_ms=10_000)
assert drift == 0.0
def test_rejection_before_injection_is_ignored() -> None:
injection = mse.InjectionEvent(
sub_case=mse.SubCase.WRONG_KEY, injected_at_ms=10_000
)
statustexts = [
_st(9_500, "MAVLink: BAD_SIGNATURE"), # earlier — ignored
_st(10_400, "ok"),
]
positions = [_pos(9_900), _pos(10_100)]
report = mse.evaluate_subcase(injection, statustexts, positions)
assert report.rejection_at_ms is None
assert not report.passes
def test_aggregate_passes_only_if_all_subcases_pass() -> None:
injections = [
mse.InjectionEvent(mse.SubCase.UNSIGNED, injected_at_ms=10_000),
mse.InjectionEvent(mse.SubCase.WRONG_KEY, injected_at_ms=20_000),
mse.InjectionEvent(mse.SubCase.REPLAYED, injected_at_ms=30_000),
]
statustexts = [
_st(10_100, "MAVLink: BAD_SIGNATURE"),
_st(20_200, "Signature rejected"),
_st(30_300, "Bad signature received"),
]
positions = [_pos(9_900), _pos(40_100)]
report = mse.evaluate(
injections, statustexts=statustexts, positions=positions
)
assert report.passes
def test_aggregate_fails_when_one_subcase_fails() -> None:
injections = [
mse.InjectionEvent(mse.SubCase.UNSIGNED, injected_at_ms=10_000),
mse.InjectionEvent(mse.SubCase.WRONG_KEY, injected_at_ms=20_000), # no rejection
]
statustexts = [_st(10_100, "MAVLink: BAD_SIGNATURE")]
positions = [_pos(9_900), _pos(40_100)]
report = mse.evaluate(
injections, statustexts=statustexts, positions=positions
)
assert not report.passes
[unsigned, wrong_key] = report.sub_cases
assert unsigned.passes
assert not wrong_key.passes
def test_csv_evidence_round_trip(tmp_path: Path) -> None:
injection = mse.InjectionEvent(mse.SubCase.UNSIGNED, injected_at_ms=10_000)
statustexts = [_st(10_200, "MAVLink: BAD_SIGNATURE")]
positions = [_pos(9_900), _pos(10_100, lat_e7=10)]
report = mse.evaluate(
[injection], statustexts=statustexts, positions=positions
)
out = tmp_path / "out.csv"
mse.write_csv_evidence(out, report)
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "sub_case"
assert rows[1][0] == "unsigned"
assert rows[1][2] == "200" # latency
assert rows[1][-1] == "true"
+11
View File
@@ -71,6 +71,11 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
"runner/helpers/companion_reboot_evaluator.py",
"runner/helpers/monte_carlo_envelope_evaluator.py",
"runner/helpers/escalation_ladder_evaluator.py",
"runner/helpers/cache_poisoning_evaluator.py",
"runner/helpers/egress_observer.py",
"runner/helpers/mavlink_signing_evaluator.py",
"runner/helpers/cve_probe_evaluator.py",
"runner/helpers/asan_fuzz_evaluator.py",
"fixtures/sitl_replay_builder/__init__.py",
"fixtures/sitl_replay_builder/builder.py",
"fixtures/sitl_replay_builder/build_p01_fixtures.py",
@@ -141,6 +146,12 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
"tests/resilience/test_nft_res_02_companion_reboot.py",
"tests/resilience/test_nft_res_03_monte_carlo.py",
"tests/resilience/test_nft_res_04_blackout_escalation.py",
"tests/security/test_nft_sec_01_cache_poisoning.py",
"tests/security/test_nft_sec_02_no_egress.py",
"tests/security/test_nft_sec_03_mavlink_signing.py",
"tests/security/test_nft_sec_04_opencv_cve.py",
"tests/security/test_nft_sec_04_asan_fuzz.py",
"tests/security/test_nft_sec_05_dns_blackhole.py",
],
)
def test_required_path_exists(relative_path: str) -> None: