mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 19:51:13 +00:00
[AZ-436] [AZ-437] [AZ-438] [AZ-439] Add NFT-SEC-01..05 security scenarios
Batch 87: 6 NFT-SEC blackbox scenarios + 5 helper evaluators + 75 unit tests + cumulative review batches 85-87. * AZ-436 NFT-SEC-01: cache-poisoning safety budget (AC-NEW-9); aggregate false_trust_count ≤ N×1e-6; zero-tolerance default. Canonical-only by default; E2E_NFT_SEC_01_RELEASE_GATE=1 unlocks full matrix. * AZ-437 NFT-SEC-02 + NFT-SEC-05: shared egress-observation evaluator (AC-NEW-10); SEC-02 = 0 packets to non-e2e-net over 5min replay; SEC-05 = DNS-blackhole sidecar healthy + lookup fails + UDP-53 silent. * AZ-438 NFT-SEC-03: AP-only signing rejection (AC-NEW-11); 3 sub-cases (unsigned/wrong-key/replayed) each reject ≤500ms + no position drift. * AZ-439 NFT-SEC-04: probe (always-run) = no-crash + deterministic decode outcome; ASan-fuzz (release-gate) = 0 findings ≥4h; AC-3 corpus floor informational only per spec. Verdict per-batch: PASS_WITH_WARNINGS (5 Low). Cumulative review for batches 85-87 (K=3 window) also PASS_WITH_WARNINGS with 5 cross-batch findings — recommends hygiene PBIs for write_csv_evidence duplication (13 helpers) and _resolve_fixture_path duplication (13 scenarios), plus new tickets for AZ-595 fixture builder + DNS-blackhole sidecar service. Also adds _docs/LESSONS.md documenting the Jira transition-ID lesson (always call getTransitionsForJiraIssue first, never memorize numeric IDs across sessions). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,176 @@
|
||||
"""Unit tests for ``runner.helpers.asan_fuzz_evaluator`` (NFT-SEC-04 / AZ-439 fuzz)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import asan_fuzz_evaluator as afe
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"line,expected",
|
||||
[
|
||||
(
|
||||
"=================================================================\n",
|
||||
None,
|
||||
),
|
||||
("==1234==ERROR: AddressSanitizer: heap-buffer-overflow on address ...", afe.AsanFindingCategory.HEAP_BUFFER_OVERFLOW),
|
||||
("==1234==ERROR: AddressSanitizer: heap-use-after-free on address ...", afe.AsanFindingCategory.HEAP_USE_AFTER_FREE),
|
||||
("==1234==ERROR: AddressSanitizer: stack-buffer-overflow on address ...", afe.AsanFindingCategory.STACK_BUFFER_OVERFLOW),
|
||||
("==1234==ERROR: AddressSanitizer: stack-use-after-return on ...", afe.AsanFindingCategory.STACK_USE_AFTER_RETURN),
|
||||
("==1234==ERROR: AddressSanitizer: global-buffer-overflow on ...", afe.AsanFindingCategory.GLOBAL_BUFFER_OVERFLOW),
|
||||
("==1234==ERROR: AddressSanitizer: use-after-free on ...", afe.AsanFindingCategory.USE_AFTER_FREE),
|
||||
("==1234==ERROR: AddressSanitizer: double-free on ...", afe.AsanFindingCategory.DOUBLE_FREE),
|
||||
# A new ASan category we haven't catalogued yet — must still
|
||||
# fail the test by classifying as OTHER_FINDING.
|
||||
(
|
||||
"==1234==ERROR: AddressSanitizer: mysterious-future-category on ...",
|
||||
afe.AsanFindingCategory.OTHER_FINDING,
|
||||
),
|
||||
("just a normal log line, harmless", None),
|
||||
("ERROR but no AddressSanitizer prefix", None),
|
||||
],
|
||||
)
|
||||
def test_classify_asan_line(
|
||||
line: str, expected: afe.AsanFindingCategory | None
|
||||
) -> None:
|
||||
assert afe.classify_asan_line(line) == expected
|
||||
|
||||
|
||||
def test_zero_findings_and_full_duration_passes() -> None:
|
||||
report = afe.evaluate(
|
||||
["info line", "another info line"],
|
||||
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
|
||||
corpus_size=afe.MIN_CORPUS_COVERAGE,
|
||||
)
|
||||
|
||||
assert report.passes_findings
|
||||
assert report.passes_duration
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_short_duration_fails_even_with_zero_findings() -> None:
|
||||
report = afe.evaluate(
|
||||
[],
|
||||
duration_seconds=60, # 1 minute
|
||||
corpus_size=afe.MIN_CORPUS_COVERAGE,
|
||||
)
|
||||
|
||||
assert report.passes_findings
|
||||
assert not report.passes_duration
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_one_finding_fails_full_run() -> None:
|
||||
report = afe.evaluate(
|
||||
["==1==ERROR: AddressSanitizer: heap-buffer-overflow on ..."],
|
||||
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
|
||||
corpus_size=afe.MIN_CORPUS_COVERAGE,
|
||||
)
|
||||
|
||||
assert len(report.findings) == 1
|
||||
assert report.findings[0].category is afe.AsanFindingCategory.HEAP_BUFFER_OVERFLOW
|
||||
assert not report.passes_findings
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_unknown_asan_finding_still_fails() -> None:
|
||||
report = afe.evaluate(
|
||||
["==1==ERROR: AddressSanitizer: brand-new-category"],
|
||||
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
|
||||
corpus_size=afe.MIN_CORPUS_COVERAGE,
|
||||
)
|
||||
|
||||
assert report.findings[0].category is afe.AsanFindingCategory.OTHER_FINDING
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_corpus_floor_is_informational_only() -> None:
|
||||
# 0 findings + full duration but well below corpus floor → still passes.
|
||||
report = afe.evaluate(
|
||||
[],
|
||||
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
|
||||
corpus_size=10,
|
||||
)
|
||||
|
||||
assert not report.reached_corpus_floor
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_snippet_is_truncated_for_evidence() -> None:
|
||||
huge = "==1==ERROR: AddressSanitizer: heap-buffer-overflow " + "x" * 500
|
||||
report = afe.evaluate(
|
||||
[huge],
|
||||
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
|
||||
corpus_size=afe.MIN_CORPUS_COVERAGE,
|
||||
)
|
||||
|
||||
assert len(report.findings[0].snippet) <= 200
|
||||
|
||||
|
||||
def test_multiple_findings_classified_and_counted() -> None:
|
||||
log_lines = [
|
||||
"info",
|
||||
"==1==ERROR: AddressSanitizer: heap-buffer-overflow",
|
||||
"info",
|
||||
"==2==ERROR: AddressSanitizer: heap-buffer-overflow",
|
||||
"==3==ERROR: AddressSanitizer: use-after-free",
|
||||
"trailing log",
|
||||
]
|
||||
|
||||
report = afe.evaluate(
|
||||
log_lines,
|
||||
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
|
||||
corpus_size=afe.MIN_CORPUS_COVERAGE,
|
||||
)
|
||||
|
||||
assert len(report.findings) == 3
|
||||
categories = [f.category for f in report.findings]
|
||||
assert categories.count(afe.AsanFindingCategory.HEAP_BUFFER_OVERFLOW) == 2
|
||||
assert categories.count(afe.AsanFindingCategory.USE_AFTER_FREE) == 1
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_csv_evidence_round_trip_no_findings(tmp_path: Path) -> None:
|
||||
report = afe.evaluate(
|
||||
[],
|
||||
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
|
||||
corpus_size=afe.MIN_CORPUS_COVERAGE,
|
||||
)
|
||||
|
||||
out = tmp_path / "out.csv"
|
||||
afe.write_csv_evidence(out, report)
|
||||
|
||||
with out.open() as fh:
|
||||
rows = list(csv.reader(fh))
|
||||
assert rows[0][0] == "duration_seconds"
|
||||
assert rows[1][5] == "true" # passes_findings
|
||||
assert rows[1][6] == "true" # passes
|
||||
# No per-finding section because no findings were recorded.
|
||||
assert len(rows) == 2
|
||||
|
||||
|
||||
def test_csv_evidence_round_trip_with_findings(tmp_path: Path) -> None:
|
||||
report = afe.evaluate(
|
||||
[
|
||||
"==1==ERROR: AddressSanitizer: heap-buffer-overflow",
|
||||
"==2==ERROR: AddressSanitizer: use-after-free",
|
||||
],
|
||||
duration_seconds=afe.MIN_FUZZ_DURATION_SECONDS,
|
||||
corpus_size=afe.MIN_CORPUS_COVERAGE,
|
||||
)
|
||||
|
||||
out = tmp_path / "out.csv"
|
||||
afe.write_csv_evidence(out, report)
|
||||
|
||||
with out.open() as fh:
|
||||
rows = list(csv.reader(fh))
|
||||
assert rows[0][0] == "duration_seconds"
|
||||
assert rows[1][6] == "false" # passes
|
||||
# Aggregate row + blank + sub-header + 2 finding rows = 5 rows total.
|
||||
assert rows[2] == []
|
||||
assert rows[3] == ["finding_index", "category", "snippet"]
|
||||
assert len(rows) == 6
|
||||
@@ -0,0 +1,245 @@
|
||||
"""Unit tests for ``runner.helpers.cache_poisoning_evaluator`` (NFT-SEC-01 / AZ-436)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import cache_poisoning_evaluator as cpe
|
||||
|
||||
|
||||
def _spec(tile_id: str, layer: str) -> cpe.PoisonedTileSpec:
|
||||
return cpe.PoisonedTileSpec(tile_id=tile_id, defense_layer=layer)
|
||||
|
||||
|
||||
def _flight(
|
||||
flight_id: str,
|
||||
*,
|
||||
total: int = 1000,
|
||||
poisoned: tuple[cpe.PoisonedTileSpec, ...] | None = None,
|
||||
false_trust: tuple[cpe.FalseTrustEvent, ...] | None = None,
|
||||
rejection_reasons: dict[str, int] | None = None,
|
||||
) -> cpe.FlightOutcome:
|
||||
if poisoned is None:
|
||||
poisoned = (
|
||||
_spec("t1", cpe.DEFENSE_LAYER_SIGNING),
|
||||
_spec("t2", cpe.DEFENSE_LAYER_FRESHNESS),
|
||||
_spec("t3", cpe.DEFENSE_LAYER_VOTING),
|
||||
)
|
||||
return cpe.FlightOutcome(
|
||||
flight_id=flight_id,
|
||||
total_tile_count=total,
|
||||
poisoned_tiles=poisoned,
|
||||
false_trust_events=false_trust or (),
|
||||
rejection_reasons=rejection_reasons or {},
|
||||
)
|
||||
|
||||
|
||||
def test_poison_ratio_within_band_passes_ratio_check() -> None:
|
||||
# 3 poisoned / 100 total = 3 % — inside [1 %, 5 %].
|
||||
flight = _flight("f1", total=100)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert report.passes_ratio
|
||||
assert flight.poison_ratio == pytest.approx(0.03)
|
||||
|
||||
|
||||
def test_poison_ratio_below_min_fails_ratio_check() -> None:
|
||||
# 3 / 1000 = 0.3 % — below the 1 % floor.
|
||||
flight = _flight("f-low", total=1000)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert not report.passes_ratio
|
||||
assert "f-low" in report.flights_with_bad_poison_ratio
|
||||
|
||||
|
||||
def test_poison_ratio_above_max_fails_ratio_check() -> None:
|
||||
# 3 / 50 = 6 % — above the 5 % ceiling.
|
||||
flight = _flight("f-high", total=50)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert not report.passes_ratio
|
||||
assert "f-high" in report.flights_with_bad_poison_ratio
|
||||
|
||||
|
||||
def test_zero_total_tile_count_gives_zero_ratio() -> None:
|
||||
flight = _flight("f-empty", total=0, poisoned=())
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert flight.poison_ratio == 0.0
|
||||
assert "f-empty" in report.flights_with_bad_poison_ratio
|
||||
|
||||
|
||||
def test_missing_defense_layer_fails_layer_coverage() -> None:
|
||||
# Only signing + freshness; voting layer missing.
|
||||
flight = _flight(
|
||||
"f-missing",
|
||||
total=100,
|
||||
poisoned=(
|
||||
_spec("t1", cpe.DEFENSE_LAYER_SIGNING),
|
||||
_spec("t2", cpe.DEFENSE_LAYER_FRESHNESS),
|
||||
),
|
||||
)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert not report.passes_layer_coverage
|
||||
[(flight_id, missing)] = report.flights_missing_defense_layers
|
||||
assert flight_id == "f-missing"
|
||||
assert missing == [cpe.DEFENSE_LAYER_VOTING]
|
||||
|
||||
|
||||
def test_all_three_defense_layers_pass_layer_coverage() -> None:
|
||||
flight = _flight("f-complete", total=100)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert report.passes_layer_coverage
|
||||
|
||||
|
||||
def test_zero_false_trust_events_passes_budget() -> None:
|
||||
flight = _flight("f", total=100)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert report.total_false_trust == 0
|
||||
assert report.passes_budget
|
||||
|
||||
|
||||
def test_single_false_trust_event_fails_budget() -> None:
|
||||
flight = _flight(
|
||||
"f",
|
||||
total=100,
|
||||
false_trust=(
|
||||
cpe.FalseTrustEvent(
|
||||
flight_id="f",
|
||||
tile_id="t1",
|
||||
monotonic_ms=12345,
|
||||
defense_layer=cpe.DEFENSE_LAYER_SIGNING,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert report.total_false_trust == 1
|
||||
assert not report.passes_budget
|
||||
# Zero-tolerance default — even though 1 event is technically within
|
||||
# the 0.01 budget at N=10000, the helper must reject it.
|
||||
|
||||
|
||||
def test_unknown_rejection_reason_fails_vocabulary_check() -> None:
|
||||
flight = _flight(
|
||||
"f-vocab",
|
||||
total=100,
|
||||
rejection_reasons={"made_up_reason": 7},
|
||||
)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert not report.passes_rejection_reason_vocabulary
|
||||
assert "f-vocab" in report.flights_with_unknown_rejection_reasons
|
||||
|
||||
|
||||
def test_known_rejection_reasons_pass_vocabulary_check() -> None:
|
||||
flight = _flight(
|
||||
"f-ok",
|
||||
total=100,
|
||||
rejection_reasons={
|
||||
cpe.DEFENSE_LAYER_SIGNING: 12,
|
||||
cpe.DEFENSE_LAYER_VOTING: 8,
|
||||
"freshness_gate_downgrade": 3,
|
||||
},
|
||||
)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert report.passes_rejection_reason_vocabulary
|
||||
|
||||
|
||||
def test_aggregate_budget_scales_with_flight_count() -> None:
|
||||
flights = [_flight(f"f{i}", total=100) for i in range(50)]
|
||||
|
||||
report = cpe.evaluate(flights)
|
||||
|
||||
assert report.flight_count == 50
|
||||
assert report.budget == pytest.approx(50 * 1e-6)
|
||||
|
||||
|
||||
def test_aggregate_counts_false_trust_across_flights() -> None:
|
||||
flights = [
|
||||
_flight(
|
||||
f"f{i}",
|
||||
total=100,
|
||||
false_trust=(
|
||||
cpe.FalseTrustEvent(
|
||||
flight_id=f"f{i}",
|
||||
tile_id="tx",
|
||||
monotonic_ms=100 * i,
|
||||
defense_layer=cpe.DEFENSE_LAYER_SIGNING,
|
||||
),
|
||||
)
|
||||
if i % 2 == 0
|
||||
else (),
|
||||
)
|
||||
for i in range(4)
|
||||
]
|
||||
|
||||
report = cpe.evaluate(flights)
|
||||
|
||||
assert report.total_false_trust == 2 # f0 and f2 each had one event
|
||||
|
||||
|
||||
def test_overall_pass_requires_all_subchecks() -> None:
|
||||
flight = _flight("f", total=100)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert report.passes
|
||||
assert report.passes_budget
|
||||
assert report.passes_ratio
|
||||
assert report.passes_layer_coverage
|
||||
assert report.passes_rejection_reason_vocabulary
|
||||
|
||||
|
||||
def test_overall_pass_fails_if_any_subcheck_fails() -> None:
|
||||
flight = _flight(
|
||||
"f-broken",
|
||||
total=1000, # 3/1000 = 0.3 % — bad ratio
|
||||
)
|
||||
|
||||
report = cpe.evaluate([flight])
|
||||
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_empty_flight_list_gives_trivial_pass() -> None:
|
||||
# Treat an empty run as a no-op rather than an implicit failure;
|
||||
# the scenario test is responsible for asserting N >= 1.
|
||||
report = cpe.evaluate([])
|
||||
|
||||
assert report.flight_count == 0
|
||||
assert report.total_false_trust == 0
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_csv_evidence_has_header_per_flight_and_aggregate_rows(tmp_path: Path) -> None:
|
||||
flights = [_flight(f"f{i}", total=100) for i in range(3)]
|
||||
report = cpe.evaluate(flights)
|
||||
|
||||
out = tmp_path / "out.csv"
|
||||
cpe.write_csv_evidence(out, report)
|
||||
|
||||
with out.open() as fh:
|
||||
rows = list(csv.reader(fh))
|
||||
assert rows[0][0] == "flight_id"
|
||||
assert {rows[1][0], rows[2][0], rows[3][0]} == {"f0", "f1", "f2"}
|
||||
assert rows[4] == []
|
||||
assert rows[5][0] == "AGGREGATE"
|
||||
assert any("flight_count=3" in cell for cell in rows[5])
|
||||
@@ -0,0 +1,120 @@
|
||||
"""Unit tests for ``runner.helpers.cve_probe_evaluator`` (NFT-SEC-04 / AZ-439 probe)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
from runner.helpers import cve_probe_evaluator as cpe
|
||||
|
||||
|
||||
def _rec(ms: int, kind: str) -> cpe.FdrSurvivalRecord:
|
||||
return cpe.FdrSurvivalRecord(monotonic_ms=ms, kind=kind)
|
||||
|
||||
|
||||
def test_decode_success_in_window_is_classified_as_decode_success() -> None:
|
||||
outcome = cpe.classify_probe_outcome(
|
||||
[_rec(10_005, "frame-decode-success")],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
assert outcome is cpe.ProbeFrameOutcome.DECODE_SUCCESS
|
||||
|
||||
|
||||
def test_decode_error_in_window_is_classified_as_frame_decode_error() -> None:
|
||||
outcome = cpe.classify_probe_outcome(
|
||||
[_rec(10_010, "frame-decode-error")],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
assert outcome is cpe.ProbeFrameOutcome.FRAME_DECODE_ERROR
|
||||
|
||||
|
||||
def test_no_record_in_window_is_missing() -> None:
|
||||
outcome = cpe.classify_probe_outcome(
|
||||
[_rec(9_900, "frame-decode-success")],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
assert outcome is cpe.ProbeFrameOutcome.MISSING
|
||||
|
||||
|
||||
def test_record_outside_tolerance_is_missing() -> None:
|
||||
outcome = cpe.classify_probe_outcome(
|
||||
[_rec(10_100, "frame-decode-success")],
|
||||
probe_injected_at_ms=10_000,
|
||||
tolerance_ms=50,
|
||||
)
|
||||
assert outcome is cpe.ProbeFrameOutcome.MISSING
|
||||
|
||||
|
||||
def test_first_match_in_window_wins() -> None:
|
||||
outcome = cpe.classify_probe_outcome(
|
||||
[
|
||||
_rec(10_005, "frame-decode-success"),
|
||||
_rec(10_010, "frame-decode-error"),
|
||||
],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
assert outcome is cpe.ProbeFrameOutcome.DECODE_SUCCESS
|
||||
|
||||
|
||||
def test_passes_when_no_crash_and_decode_success() -> None:
|
||||
report = cpe.evaluate(
|
||||
[_rec(10_005, "frame-decode-success"), _rec(11_000, "imu-tick")],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
assert report.passes_no_crash
|
||||
assert report.passes_graceful_outcome
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_passes_when_no_crash_and_graceful_error() -> None:
|
||||
report = cpe.evaluate(
|
||||
[_rec(10_005, "frame-decode-error"), _rec(11_000, "imu-tick")],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_fails_when_no_post_probe_fdr_record() -> None:
|
||||
# All FDR records are BEFORE the probe — the SUT crashed at probe
|
||||
# time. AC-1a fails.
|
||||
report = cpe.evaluate(
|
||||
[_rec(9_500, "imu-tick"), _rec(9_900, "frame-decode-success")],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
assert not report.passes_no_crash
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_fails_when_silent_drop() -> None:
|
||||
# SUT is alive (post-probe records exist) but no decode record at
|
||||
# all — the probe frame was silently swallowed. AC-1b fails.
|
||||
report = cpe.evaluate(
|
||||
[_rec(11_000, "imu-tick"), _rec(12_000, "imu-tick")],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
assert report.passes_no_crash
|
||||
assert not report.passes_graceful_outcome
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_empty_fdr_archive_fails_both_subchecks() -> None:
|
||||
report = cpe.evaluate([], probe_injected_at_ms=10_000)
|
||||
assert not report.passes_no_crash
|
||||
assert not report.passes_graceful_outcome
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_csv_evidence_round_trip(tmp_path: Path) -> None:
|
||||
report = cpe.evaluate(
|
||||
[_rec(10_005, "frame-decode-success")],
|
||||
probe_injected_at_ms=10_000,
|
||||
)
|
||||
|
||||
out = tmp_path / "out.csv"
|
||||
cpe.write_csv_evidence(out, report)
|
||||
|
||||
with out.open() as fh:
|
||||
rows = list(csv.reader(fh))
|
||||
assert rows[0][0] == "probe_injected_at_ms"
|
||||
assert rows[1][2] == "decode-success"
|
||||
assert rows[1][-1] == "true"
|
||||
@@ -0,0 +1,168 @@
|
||||
"""Unit tests for ``runner.helpers.egress_observer`` (NFT-SEC-02 + NFT-SEC-05 / AZ-437)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import egress_observer as eo
|
||||
|
||||
|
||||
def _snap(other: int = 0, internal: int = 0, udp53: int = 0) -> eo.EgressCounterSnapshot:
|
||||
return eo.EgressCounterSnapshot(
|
||||
egress_packets_to_internal_net=internal,
|
||||
egress_packets_to_other_destinations=other,
|
||||
udp53_egress_packets=udp53,
|
||||
)
|
||||
|
||||
|
||||
def test_egress_counter_rejects_negative_values() -> None:
|
||||
with pytest.raises(ValueError, match="cannot be negative"):
|
||||
eo.EgressCounterSnapshot(
|
||||
egress_packets_to_internal_net=-1,
|
||||
egress_packets_to_other_destinations=0,
|
||||
udp53_egress_packets=0,
|
||||
)
|
||||
|
||||
|
||||
def test_no_egress_zero_delta_passes() -> None:
|
||||
before = _snap(other=10, internal=5)
|
||||
after = _snap(other=10, internal=42) # internal traffic grew; that's fine
|
||||
|
||||
report = eo.evaluate_no_egress(before, after, window_label="5min")
|
||||
|
||||
assert report.delta_other_destinations == 0
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_no_egress_nonzero_delta_fails() -> None:
|
||||
before = _snap(other=10)
|
||||
after = _snap(other=11)
|
||||
|
||||
report = eo.evaluate_no_egress(before, after, window_label="5min")
|
||||
|
||||
assert report.delta_other_destinations == 1
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_no_egress_records_internal_delta_for_evidence() -> None:
|
||||
before = _snap(internal=100)
|
||||
after = _snap(internal=200)
|
||||
|
||||
report = eo.evaluate_no_egress(before, after, window_label="5min-derkachi")
|
||||
|
||||
assert report.delta_internal == 100 # informational; does not affect verdict
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_dns_blackhole_passes_on_full_silence_and_failed_lookup() -> None:
|
||||
before = _snap(udp53=7)
|
||||
after = _snap(udp53=7)
|
||||
|
||||
report = eo.evaluate_dns_blackhole(
|
||||
before,
|
||||
after,
|
||||
lookup_outcome=eo.DnsLookupOutcome.NXDOMAIN,
|
||||
sidecar_healthy=True,
|
||||
)
|
||||
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_dns_blackhole_fails_on_successful_lookup() -> None:
|
||||
before = _snap(udp53=7)
|
||||
after = _snap(udp53=7)
|
||||
|
||||
report = eo.evaluate_dns_blackhole(
|
||||
before,
|
||||
after,
|
||||
lookup_outcome=eo.DnsLookupOutcome.SUCCESS,
|
||||
sidecar_healthy=True,
|
||||
)
|
||||
|
||||
assert not report.passes_lookup
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_dns_blackhole_fails_when_udp53_packets_escaped() -> None:
|
||||
before = _snap(udp53=7)
|
||||
after = _snap(udp53=8)
|
||||
|
||||
report = eo.evaluate_dns_blackhole(
|
||||
before,
|
||||
after,
|
||||
lookup_outcome=eo.DnsLookupOutcome.NXDOMAIN,
|
||||
sidecar_healthy=True,
|
||||
)
|
||||
|
||||
assert not report.passes_udp_silence
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_dns_blackhole_fails_when_sidecar_unhealthy() -> None:
|
||||
before = _snap()
|
||||
after = _snap()
|
||||
|
||||
report = eo.evaluate_dns_blackhole(
|
||||
before,
|
||||
after,
|
||||
lookup_outcome=eo.DnsLookupOutcome.NXDOMAIN,
|
||||
sidecar_healthy=False,
|
||||
)
|
||||
|
||||
assert not report.passes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"outcome",
|
||||
[
|
||||
eo.DnsLookupOutcome.NXDOMAIN,
|
||||
eo.DnsLookupOutcome.TIMEOUT,
|
||||
eo.DnsLookupOutcome.NO_SERVERS,
|
||||
eo.DnsLookupOutcome.OTHER_FAILURE,
|
||||
],
|
||||
)
|
||||
def test_all_failure_outcomes_pass_lookup_check(outcome: eo.DnsLookupOutcome) -> None:
|
||||
report = eo.evaluate_dns_blackhole(
|
||||
_snap(),
|
||||
_snap(),
|
||||
lookup_outcome=outcome,
|
||||
sidecar_healthy=True,
|
||||
)
|
||||
|
||||
assert report.passes_lookup
|
||||
|
||||
|
||||
def test_no_egress_csv_evidence_round_trip(tmp_path: Path) -> None:
|
||||
before = _snap(other=0, internal=5)
|
||||
after = _snap(other=0, internal=42)
|
||||
report = eo.evaluate_no_egress(before, after, window_label="5min")
|
||||
|
||||
out = tmp_path / "out.csv"
|
||||
eo.write_no_egress_csv_evidence(out, report)
|
||||
|
||||
with out.open() as fh:
|
||||
rows = list(csv.reader(fh))
|
||||
assert rows[0][0] == "window_label"
|
||||
assert rows[1][0] == "5min"
|
||||
assert rows[1][-1] == "true"
|
||||
|
||||
|
||||
def test_dns_blackhole_csv_evidence_round_trip(tmp_path: Path) -> None:
|
||||
report = eo.evaluate_dns_blackhole(
|
||||
_snap(udp53=7),
|
||||
_snap(udp53=7),
|
||||
lookup_outcome=eo.DnsLookupOutcome.NXDOMAIN,
|
||||
sidecar_healthy=True,
|
||||
)
|
||||
|
||||
out = tmp_path / "out.csv"
|
||||
eo.write_dns_blackhole_csv_evidence(out, report)
|
||||
|
||||
with out.open() as fh:
|
||||
rows = list(csv.reader(fh))
|
||||
assert rows[0][0] == "sidecar_healthy"
|
||||
assert rows[1][1] == "nxdomain"
|
||||
assert rows[1][-1] == "true"
|
||||
@@ -0,0 +1,196 @@
|
||||
"""Unit tests for ``runner.helpers.mavlink_signing_evaluator`` (NFT-SEC-03 / AZ-438)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import mavlink_signing_evaluator as mse
|
||||
|
||||
|
||||
def _pos(ms: int, lat_e7: int = 0, lon_e7: int = 0) -> mse.PositionSample:
|
||||
return mse.PositionSample(monotonic_ms=ms, lat_e7=lat_e7, lon_e7=lon_e7)
|
||||
|
||||
|
||||
def _st(ms: int, text: str) -> mse.StatustextSample:
|
||||
return mse.StatustextSample(monotonic_ms=ms, text=text)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,expected",
|
||||
[
|
||||
("MAVLink: BAD_SIGNATURE", True),
|
||||
("BAD SIGNATURE", True),
|
||||
("Bad signature received from sysid=255", True),
|
||||
("Signature rejected on link 0", True),
|
||||
("PreArm: GPS Glitch", False),
|
||||
("OK", False),
|
||||
("", False),
|
||||
],
|
||||
)
|
||||
def test_is_bad_signature_statustext_matches_documented_variants(
|
||||
text: str, expected: bool
|
||||
) -> None:
|
||||
assert mse.is_bad_signature_statustext(text) is expected
|
||||
|
||||
|
||||
def test_subcase_passes_when_rejection_arrives_within_budget() -> None:
|
||||
injection = mse.InjectionEvent(
|
||||
sub_case=mse.SubCase.UNSIGNED, injected_at_ms=10_000
|
||||
)
|
||||
statustexts = [_st(10_300, "MAVLink: BAD_SIGNATURE")]
|
||||
positions = [_pos(9_900), _pos(10_100)]
|
||||
|
||||
report = mse.evaluate_subcase(injection, statustexts, positions)
|
||||
|
||||
assert report.rejection_latency_ms == 300
|
||||
assert report.passes_rejection
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_subcase_fails_when_no_rejection_seen() -> None:
|
||||
injection = mse.InjectionEvent(
|
||||
sub_case=mse.SubCase.WRONG_KEY, injected_at_ms=10_000
|
||||
)
|
||||
statustexts = [_st(10_300, "ok normal text")]
|
||||
positions = [_pos(9_900), _pos(10_100)]
|
||||
|
||||
report = mse.evaluate_subcase(injection, statustexts, positions)
|
||||
|
||||
assert report.rejection_at_ms is None
|
||||
assert not report.passes_rejection
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_subcase_fails_when_rejection_too_slow() -> None:
|
||||
injection = mse.InjectionEvent(
|
||||
sub_case=mse.SubCase.REPLAYED, injected_at_ms=10_000
|
||||
)
|
||||
statustexts = [_st(11_000, "MAVLink: BAD_SIGNATURE")]
|
||||
positions = [_pos(9_900), _pos(10_100)]
|
||||
|
||||
report = mse.evaluate_subcase(injection, statustexts, positions)
|
||||
|
||||
assert report.rejection_latency_ms == 1000
|
||||
assert not report.passes_rejection
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_subcase_fails_when_position_drifts() -> None:
|
||||
injection = mse.InjectionEvent(
|
||||
sub_case=mse.SubCase.UNSIGNED, injected_at_ms=10_000
|
||||
)
|
||||
statustexts = [_st(10_200, "MAVLink: BAD_SIGNATURE")]
|
||||
# 0.0001 deg ≈ 11 m of latitude drift — clearly outside the 1 m
|
||||
# tolerance, simulating a successful poison of AP's GPS state.
|
||||
positions = [_pos(9_900, lat_e7=0), _pos(10_100, lat_e7=1_000)]
|
||||
|
||||
report = mse.evaluate_subcase(injection, statustexts, positions)
|
||||
|
||||
assert report.position_drift_m > mse.POSITION_DRIFT_TOLERANCE_M
|
||||
assert not report.passes_no_position_update
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_subcase_passes_with_tiny_jitter_within_tolerance() -> None:
|
||||
injection = mse.InjectionEvent(
|
||||
sub_case=mse.SubCase.UNSIGNED, injected_at_ms=10_000
|
||||
)
|
||||
statustexts = [_st(10_100, "MAVLink: BAD_SIGNATURE")]
|
||||
# 1 e7-lat unit ≈ 1.1 cm — well below the 1 m tolerance.
|
||||
positions = [_pos(9_900, lat_e7=0), _pos(10_100, lat_e7=10)]
|
||||
|
||||
report = mse.evaluate_subcase(injection, statustexts, positions)
|
||||
|
||||
assert report.position_drift_m < mse.POSITION_DRIFT_TOLERANCE_M
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_position_drift_returns_zero_when_no_pre_injection_sample() -> None:
|
||||
# Only samples after the injection — the helper has no baseline
|
||||
# so it returns 0 (the AC assertion still passes; the test author
|
||||
# is expected to fail the test earlier if positions are missing).
|
||||
drift = mse.position_drift_m([_pos(10_100)], around_ms=10_000)
|
||||
|
||||
assert drift == 0.0
|
||||
|
||||
|
||||
def test_position_drift_returns_zero_when_no_post_injection_sample() -> None:
|
||||
drift = mse.position_drift_m([_pos(9_900)], around_ms=10_000)
|
||||
|
||||
assert drift == 0.0
|
||||
|
||||
|
||||
def test_rejection_before_injection_is_ignored() -> None:
|
||||
injection = mse.InjectionEvent(
|
||||
sub_case=mse.SubCase.WRONG_KEY, injected_at_ms=10_000
|
||||
)
|
||||
statustexts = [
|
||||
_st(9_500, "MAVLink: BAD_SIGNATURE"), # earlier — ignored
|
||||
_st(10_400, "ok"),
|
||||
]
|
||||
positions = [_pos(9_900), _pos(10_100)]
|
||||
|
||||
report = mse.evaluate_subcase(injection, statustexts, positions)
|
||||
|
||||
assert report.rejection_at_ms is None
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_aggregate_passes_only_if_all_subcases_pass() -> None:
|
||||
injections = [
|
||||
mse.InjectionEvent(mse.SubCase.UNSIGNED, injected_at_ms=10_000),
|
||||
mse.InjectionEvent(mse.SubCase.WRONG_KEY, injected_at_ms=20_000),
|
||||
mse.InjectionEvent(mse.SubCase.REPLAYED, injected_at_ms=30_000),
|
||||
]
|
||||
statustexts = [
|
||||
_st(10_100, "MAVLink: BAD_SIGNATURE"),
|
||||
_st(20_200, "Signature rejected"),
|
||||
_st(30_300, "Bad signature received"),
|
||||
]
|
||||
positions = [_pos(9_900), _pos(40_100)]
|
||||
|
||||
report = mse.evaluate(
|
||||
injections, statustexts=statustexts, positions=positions
|
||||
)
|
||||
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_aggregate_fails_when_one_subcase_fails() -> None:
|
||||
injections = [
|
||||
mse.InjectionEvent(mse.SubCase.UNSIGNED, injected_at_ms=10_000),
|
||||
mse.InjectionEvent(mse.SubCase.WRONG_KEY, injected_at_ms=20_000), # no rejection
|
||||
]
|
||||
statustexts = [_st(10_100, "MAVLink: BAD_SIGNATURE")]
|
||||
positions = [_pos(9_900), _pos(40_100)]
|
||||
|
||||
report = mse.evaluate(
|
||||
injections, statustexts=statustexts, positions=positions
|
||||
)
|
||||
|
||||
assert not report.passes
|
||||
[unsigned, wrong_key] = report.sub_cases
|
||||
assert unsigned.passes
|
||||
assert not wrong_key.passes
|
||||
|
||||
|
||||
def test_csv_evidence_round_trip(tmp_path: Path) -> None:
|
||||
injection = mse.InjectionEvent(mse.SubCase.UNSIGNED, injected_at_ms=10_000)
|
||||
statustexts = [_st(10_200, "MAVLink: BAD_SIGNATURE")]
|
||||
positions = [_pos(9_900), _pos(10_100, lat_e7=10)]
|
||||
report = mse.evaluate(
|
||||
[injection], statustexts=statustexts, positions=positions
|
||||
)
|
||||
|
||||
out = tmp_path / "out.csv"
|
||||
mse.write_csv_evidence(out, report)
|
||||
|
||||
with out.open() as fh:
|
||||
rows = list(csv.reader(fh))
|
||||
assert rows[0][0] == "sub_case"
|
||||
assert rows[1][0] == "unsigned"
|
||||
assert rows[1][2] == "200" # latency
|
||||
assert rows[1][-1] == "true"
|
||||
Reference in New Issue
Block a user