mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 16:31:14 +00:00
73cd632e95
Batch 85 — 4 Performance NFT scenarios + pure-logic evaluators. - NFT-PERF-01 (AZ-428, Tier-2): two-config e2e latency p95 ≤ 400 ms (K=3@25°C, K=2 hybrid@50°C) + frame-drop ≤10% + informational per-stage partition recording (D-CROSS-LATENCY-1). - NFT-PERF-02 (AZ-429): inter-emit p95 ≤ 350 ms + no ≥3 missed-emit windows. fc-adapter-aware SITL timestamp extraction (tlog vs MSP). - NFT-PERF-03 (AZ-430, Tier-2): cold-start TTFF p95 ≤ 30 s AND max ≤ 45 s over N≥10 iterations. - NFT-PERF-04 (AZ-431): spoof-promotion latency p95 ≤ 600 ms over N≥20 randomized-start blackout+spoof events. All scenarios consume external fixtures (AZ-595 dependency surfaced) and fail loudly when fixtures are missing or empty. Public-boundary discipline preserved — evaluators do NOT import src/gps_denied_onboard. Tests: 60 new unit tests pass; 24 scenarios collect (4 tests × 2 fc × 3 vio). Code review: PASS_WITH_WARNINGS — 1 Medium (fixed in batch), 3 Low (production-dependency surfacings + future hygiene). Co-authored-by: Cursor <cursoragent@cursor.com>
331 lines
9.5 KiB
Python
331 lines
9.5 KiB
Python
"""Unit tests for ``runner.helpers.streaming_evaluator`` (AZ-429 / NFT-PERF-02)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from runner.helpers import streaming_evaluator as se
|
|
|
|
|
|
# ───────────────────────── percentile ─────────────────────────
|
|
|
|
|
|
def test_percentile_q_must_be_in_range() -> None:
|
|
# Arrange / Act / Assert
|
|
with pytest.raises(ValueError):
|
|
se._percentile([100.0], -1.0)
|
|
with pytest.raises(ValueError):
|
|
se._percentile([100.0], 101.0)
|
|
|
|
|
|
def test_percentile_empty_returns_none() -> None:
|
|
# Assert
|
|
assert se._percentile([], 50.0) is None
|
|
|
|
|
|
def test_percentile_single_value_returns_that_value() -> None:
|
|
# Assert
|
|
assert se._percentile([42.0], 0.0) == 42.0
|
|
assert se._percentile([42.0], 50.0) == 42.0
|
|
assert se._percentile([42.0], 100.0) == 42.0
|
|
|
|
|
|
def test_percentile_known_distribution_linear_interpolation() -> None:
|
|
# Arrange — 100..1000 step 100
|
|
values = [float(x) for x in range(100, 1001, 100)]
|
|
|
|
# Assert
|
|
assert se._percentile(values, 0.0) == 100.0
|
|
assert se._percentile(values, 100.0) == 1000.0
|
|
# p50 of even-length sorted list = mean of middle two
|
|
assert se._percentile(values, 50.0) == pytest.approx(550.0)
|
|
|
|
|
|
def test_percentile_unsorted_input_is_sorted() -> None:
|
|
# Assert
|
|
assert se._percentile([1000.0, 100.0, 500.0], 50.0) == 500.0
|
|
|
|
|
|
# ─────────────────── evaluate_inter_emit (AC-1) ───────────────────
|
|
|
|
|
|
def test_inter_emit_perfect_cadence_passes() -> None:
|
|
# Arrange — exact 333.33 ms cadence (3 Hz target)
|
|
samples = [i * se.TARGET_INTER_FRAME_MS for i in range(20)]
|
|
|
|
# Act
|
|
report = se.evaluate_inter_emit(samples)
|
|
|
|
# Assert
|
|
assert report.sample_count == 20
|
|
assert report.interval_count == 19
|
|
assert report.p50_ms == pytest.approx(se.TARGET_INTER_FRAME_MS)
|
|
assert report.p95_ms == pytest.approx(se.TARGET_INTER_FRAME_MS)
|
|
assert report.passes_p95
|
|
|
|
|
|
def test_inter_emit_p95_at_budget_passes() -> None:
|
|
# Arrange — every interval exactly 350 ms
|
|
samples = [i * 350.0 for i in range(10)]
|
|
|
|
# Act
|
|
report = se.evaluate_inter_emit(samples)
|
|
|
|
# Assert
|
|
assert report.p95_ms == pytest.approx(350.0)
|
|
assert report.passes_p95
|
|
|
|
|
|
def test_inter_emit_p95_above_budget_fails() -> None:
|
|
# Arrange — last interval = 500 ms; with 10 intervals, p95 sits on tail
|
|
samples = [0.0] + [333.0 * (i + 1) for i in range(9)] + [333.0 * 9 + 500.0]
|
|
|
|
# Act
|
|
report = se.evaluate_inter_emit(samples)
|
|
|
|
# Assert
|
|
assert report.p95_ms is not None and report.p95_ms > 350.0
|
|
assert not report.passes_p95
|
|
|
|
|
|
def test_inter_emit_empty_returns_none_percentiles_and_fails() -> None:
|
|
# Act
|
|
report = se.evaluate_inter_emit([])
|
|
|
|
# Assert
|
|
assert report.sample_count == 0
|
|
assert report.interval_count == 0
|
|
assert report.p50_ms is None
|
|
assert report.p95_ms is None
|
|
assert not report.passes_p95
|
|
|
|
|
|
def test_inter_emit_single_sample_no_intervals() -> None:
|
|
# Act
|
|
report = se.evaluate_inter_emit([1000.0])
|
|
|
|
# Assert
|
|
assert report.interval_count == 0
|
|
assert not report.passes_p95
|
|
|
|
|
|
def test_inter_emit_custom_budget_overrides_default() -> None:
|
|
# Arrange — 600 ms cadence vs custom 700 ms budget
|
|
samples = [i * 600.0 for i in range(5)]
|
|
|
|
# Act
|
|
report = se.evaluate_inter_emit(samples, budget_ms=700.0)
|
|
|
|
# Assert
|
|
assert report.budget_ms == 700.0
|
|
assert report.passes_p95
|
|
|
|
|
|
def test_inter_emit_unsorted_input_is_sorted() -> None:
|
|
# Arrange — sorted: [0, 333, 666, 1000] → intervals [333, 333, 334]
|
|
samples = [0.0, 1000.0, 333.0, 666.0]
|
|
|
|
# Act
|
|
report = se.evaluate_inter_emit(samples)
|
|
|
|
# Assert — p95 of [333, 333, 334] = 333 + 0.9 = 333.9
|
|
assert report.p95_ms == pytest.approx(333.9, abs=0.5)
|
|
|
|
|
|
# ─────────────────── evaluate_missed_emits (AC-2) ───────────────────
|
|
|
|
|
|
def test_missed_emits_no_misses_returns_zero() -> None:
|
|
# Arrange
|
|
samples = [i * 333.0 for i in range(20)]
|
|
|
|
# Act
|
|
report = se.evaluate_missed_emits(samples)
|
|
|
|
# Assert
|
|
assert report.longest_run == 0
|
|
assert report.windows == ()
|
|
assert report.passes
|
|
|
|
|
|
def test_missed_emits_single_missed_interval_does_not_trip() -> None:
|
|
# Arrange — one isolated > 666.67 ms gap
|
|
samples = [0.0, 333.0, 666.0, 1700.0, 2033.0, 2366.0]
|
|
|
|
# Act
|
|
report = se.evaluate_missed_emits(samples)
|
|
|
|
# Assert — one run of length 1, limit is 3
|
|
assert report.longest_run == 1
|
|
assert len(report.windows) == 1
|
|
assert report.windows[0].length == 1
|
|
assert report.passes
|
|
|
|
|
|
def test_missed_emits_two_consecutive_misses_does_not_trip_default_limit() -> None:
|
|
# Arrange — two consecutive >666 ms intervals
|
|
samples = [0.0, 333.0, 1700.0, 3100.0, 3433.0]
|
|
|
|
# Act
|
|
report = se.evaluate_missed_emits(samples)
|
|
|
|
# Assert
|
|
assert report.longest_run == 2
|
|
assert report.passes # limit is 3, so 2 is allowed
|
|
|
|
|
|
def test_missed_emits_three_consecutive_misses_fails_default_limit() -> None:
|
|
# Arrange — three consecutive >666 ms intervals (the failure mode AC-2 forbids)
|
|
samples = [0.0, 333.0, 1700.0, 3100.0, 4500.0, 4833.0]
|
|
|
|
# Act
|
|
report = se.evaluate_missed_emits(samples)
|
|
|
|
# Assert
|
|
assert report.longest_run == 3
|
|
assert len(report.windows) == 1
|
|
assert report.windows[0].length == 3
|
|
assert not report.passes
|
|
|
|
|
|
def test_missed_emits_multiple_disjoint_runs_tracked_independently() -> None:
|
|
# Arrange — two separate runs, each length 2
|
|
samples = [
|
|
0.0, 333.0, # OK
|
|
1700.0, 3100.0, # two missed
|
|
3433.0, 3766.0, # OK
|
|
5200.0, 6600.0, # two more missed
|
|
]
|
|
|
|
# Act
|
|
report = se.evaluate_missed_emits(samples)
|
|
|
|
# Assert
|
|
assert report.longest_run == 2
|
|
assert len(report.windows) == 2
|
|
assert all(w.length == 2 for w in report.windows)
|
|
assert report.passes
|
|
|
|
|
|
def test_missed_emits_trailing_run_closes_correctly() -> None:
|
|
# Arrange — last 3 intervals all missed (run runs to end of list)
|
|
samples = [0.0, 333.0, 666.0, 2000.0, 3334.0, 4668.0]
|
|
|
|
# Act
|
|
report = se.evaluate_missed_emits(samples)
|
|
|
|
# Assert
|
|
assert report.longest_run == 3
|
|
assert len(report.windows) == 1
|
|
assert report.windows[0].length == 3
|
|
assert report.windows[0].end_ms == 4668.0
|
|
assert not report.passes
|
|
|
|
|
|
def test_missed_emits_threshold_at_target_ratio() -> None:
|
|
# Arrange — custom missed_ratio = 1.5
|
|
samples = [0.0, 1.5 * se.TARGET_INTER_FRAME_MS + 1.0]
|
|
|
|
# Act
|
|
report = se.evaluate_missed_emits(samples, missed_ratio=1.5)
|
|
|
|
# Assert
|
|
assert report.missed_emit_threshold_ms == pytest.approx(
|
|
1.5 * se.TARGET_INTER_FRAME_MS
|
|
)
|
|
assert report.longest_run == 1
|
|
|
|
|
|
def test_missed_emits_invalid_ratio_raises() -> None:
|
|
# Assert
|
|
with pytest.raises(ValueError):
|
|
se.evaluate_missed_emits([0.0, 1000.0], missed_ratio=1.0)
|
|
with pytest.raises(ValueError):
|
|
se.evaluate_missed_emits([0.0, 1000.0], missed_ratio=0.5)
|
|
|
|
|
|
def test_missed_emits_invalid_limit_raises() -> None:
|
|
# Assert
|
|
with pytest.raises(ValueError):
|
|
se.evaluate_missed_emits([0.0, 1000.0], limit=0)
|
|
|
|
|
|
# ─────────────────── evaluate (aggregate) ───────────────────
|
|
|
|
|
|
def test_evaluate_clean_run_passes_both_acs() -> None:
|
|
# Arrange
|
|
samples = [i * 333.0 for i in range(30)]
|
|
|
|
# Act
|
|
report = se.evaluate(samples)
|
|
|
|
# Assert
|
|
assert report.passes
|
|
assert report.inter_emit.passes_p95
|
|
assert report.missed_emits.passes
|
|
|
|
|
|
def test_evaluate_p95_breach_with_no_missed_run_still_fails() -> None:
|
|
# Arrange — many slightly-over-budget intervals with no consecutive triple
|
|
samples = [0.0]
|
|
for _ in range(10):
|
|
samples.append(samples[-1] + 400.0) # 400 ms — over 350 ms budget
|
|
|
|
# Act
|
|
report = se.evaluate(samples)
|
|
|
|
# Assert
|
|
assert not report.inter_emit.passes_p95
|
|
assert not report.passes
|
|
|
|
|
|
# ─────────────────── csv emit ───────────────────
|
|
|
|
|
|
def test_write_csv_evidence_emits_header_and_row(tmp_path: Path) -> None:
|
|
# Arrange
|
|
samples = [i * 333.0 for i in range(10)]
|
|
report = se.evaluate(samples)
|
|
out_path = tmp_path / "nft-perf-02.csv"
|
|
|
|
# Act
|
|
se.write_csv_evidence(out_path, report)
|
|
|
|
# Assert
|
|
text = out_path.read_text().splitlines()
|
|
assert len(text) == 2
|
|
header = text[0].split(",")
|
|
assert header[0] == "sample_count"
|
|
assert "ac1_passes" in header
|
|
assert "ac2_passes" in header
|
|
|
|
|
|
def test_write_intervals_csv_one_row_per_interval(tmp_path: Path) -> None:
|
|
# Arrange — 5 timestamps → 4 inter-emit intervals + 1 header + 1 leading sample
|
|
samples = [0.0, 100.0, 200.0, 300.0, 400.0]
|
|
out_path = tmp_path / "intervals.csv"
|
|
|
|
# Act
|
|
se.write_intervals_csv(out_path, samples)
|
|
|
|
# Assert
|
|
text = out_path.read_text().splitlines()
|
|
assert text[0] == "index,t_emit_ms,inter_emit_ms"
|
|
assert len(text) == 1 + 5 # header + 5 sample rows
|
|
|
|
|
|
def test_write_intervals_csv_first_row_has_empty_interval(tmp_path: Path) -> None:
|
|
# Arrange
|
|
out_path = tmp_path / "intervals.csv"
|
|
|
|
# Act
|
|
se.write_intervals_csv(out_path, [0.0, 100.0])
|
|
|
|
# Assert
|
|
rows = out_path.read_text().splitlines()
|
|
assert rows[1].endswith(",") # empty interval column on first sample row
|
|
assert rows[2].endswith(",100.000")
|