Files
gps-denied-onboard/e2e/_unit_tests/helpers/test_streaming_evaluator.py
T
Oleksandr Bezdieniezhnykh 73cd632e95 [AZ-428] [AZ-429] [AZ-430] [AZ-431] Add NFT-PERF-01..04 perf scenarios
Batch 85 — 4 Performance NFT scenarios + pure-logic evaluators.

- NFT-PERF-01 (AZ-428, Tier-2): two-config e2e latency p95 ≤ 400 ms
  (K=3@25°C, K=2 hybrid@50°C) + frame-drop ≤10% + informational per-stage
  partition recording (D-CROSS-LATENCY-1).
- NFT-PERF-02 (AZ-429): inter-emit p95 ≤ 350 ms + no ≥3 missed-emit
  windows. fc-adapter-aware SITL timestamp extraction (tlog vs MSP).
- NFT-PERF-03 (AZ-430, Tier-2): cold-start TTFF p95 ≤ 30 s AND max ≤ 45 s
  over N≥10 iterations.
- NFT-PERF-04 (AZ-431): spoof-promotion latency p95 ≤ 600 ms over N≥20
  randomized-start blackout+spoof events.

All scenarios consume external fixtures (AZ-595 dependency surfaced) and
fail loudly when fixtures are missing or empty. Public-boundary
discipline preserved — evaluators do NOT import src/gps_denied_onboard.

Tests: 60 new unit tests pass; 24 scenarios collect (4 tests × 2 fc × 3
vio). Code review: PASS_WITH_WARNINGS — 1 Medium (fixed in batch),
3 Low (production-dependency surfacings + future hygiene).

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-17 16:46:49 +03:00

331 lines
9.5 KiB
Python

"""Unit tests for ``runner.helpers.streaming_evaluator`` (AZ-429 / NFT-PERF-02)."""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import streaming_evaluator as se
# ───────────────────────── percentile ─────────────────────────
def test_percentile_q_must_be_in_range() -> None:
# Arrange / Act / Assert
with pytest.raises(ValueError):
se._percentile([100.0], -1.0)
with pytest.raises(ValueError):
se._percentile([100.0], 101.0)
def test_percentile_empty_returns_none() -> None:
# Assert
assert se._percentile([], 50.0) is None
def test_percentile_single_value_returns_that_value() -> None:
# Assert
assert se._percentile([42.0], 0.0) == 42.0
assert se._percentile([42.0], 50.0) == 42.0
assert se._percentile([42.0], 100.0) == 42.0
def test_percentile_known_distribution_linear_interpolation() -> None:
# Arrange — 100..1000 step 100
values = [float(x) for x in range(100, 1001, 100)]
# Assert
assert se._percentile(values, 0.0) == 100.0
assert se._percentile(values, 100.0) == 1000.0
# p50 of even-length sorted list = mean of middle two
assert se._percentile(values, 50.0) == pytest.approx(550.0)
def test_percentile_unsorted_input_is_sorted() -> None:
# Assert
assert se._percentile([1000.0, 100.0, 500.0], 50.0) == 500.0
# ─────────────────── evaluate_inter_emit (AC-1) ───────────────────
def test_inter_emit_perfect_cadence_passes() -> None:
# Arrange — exact 333.33 ms cadence (3 Hz target)
samples = [i * se.TARGET_INTER_FRAME_MS for i in range(20)]
# Act
report = se.evaluate_inter_emit(samples)
# Assert
assert report.sample_count == 20
assert report.interval_count == 19
assert report.p50_ms == pytest.approx(se.TARGET_INTER_FRAME_MS)
assert report.p95_ms == pytest.approx(se.TARGET_INTER_FRAME_MS)
assert report.passes_p95
def test_inter_emit_p95_at_budget_passes() -> None:
# Arrange — every interval exactly 350 ms
samples = [i * 350.0 for i in range(10)]
# Act
report = se.evaluate_inter_emit(samples)
# Assert
assert report.p95_ms == pytest.approx(350.0)
assert report.passes_p95
def test_inter_emit_p95_above_budget_fails() -> None:
# Arrange — last interval = 500 ms; with 10 intervals, p95 sits on tail
samples = [0.0] + [333.0 * (i + 1) for i in range(9)] + [333.0 * 9 + 500.0]
# Act
report = se.evaluate_inter_emit(samples)
# Assert
assert report.p95_ms is not None and report.p95_ms > 350.0
assert not report.passes_p95
def test_inter_emit_empty_returns_none_percentiles_and_fails() -> None:
# Act
report = se.evaluate_inter_emit([])
# Assert
assert report.sample_count == 0
assert report.interval_count == 0
assert report.p50_ms is None
assert report.p95_ms is None
assert not report.passes_p95
def test_inter_emit_single_sample_no_intervals() -> None:
# Act
report = se.evaluate_inter_emit([1000.0])
# Assert
assert report.interval_count == 0
assert not report.passes_p95
def test_inter_emit_custom_budget_overrides_default() -> None:
# Arrange — 600 ms cadence vs custom 700 ms budget
samples = [i * 600.0 for i in range(5)]
# Act
report = se.evaluate_inter_emit(samples, budget_ms=700.0)
# Assert
assert report.budget_ms == 700.0
assert report.passes_p95
def test_inter_emit_unsorted_input_is_sorted() -> None:
# Arrange — sorted: [0, 333, 666, 1000] → intervals [333, 333, 334]
samples = [0.0, 1000.0, 333.0, 666.0]
# Act
report = se.evaluate_inter_emit(samples)
# Assert — p95 of [333, 333, 334] = 333 + 0.9 = 333.9
assert report.p95_ms == pytest.approx(333.9, abs=0.5)
# ─────────────────── evaluate_missed_emits (AC-2) ───────────────────
def test_missed_emits_no_misses_returns_zero() -> None:
# Arrange
samples = [i * 333.0 for i in range(20)]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 0
assert report.windows == ()
assert report.passes
def test_missed_emits_single_missed_interval_does_not_trip() -> None:
# Arrange — one isolated > 666.67 ms gap
samples = [0.0, 333.0, 666.0, 1700.0, 2033.0, 2366.0]
# Act
report = se.evaluate_missed_emits(samples)
# Assert — one run of length 1, limit is 3
assert report.longest_run == 1
assert len(report.windows) == 1
assert report.windows[0].length == 1
assert report.passes
def test_missed_emits_two_consecutive_misses_does_not_trip_default_limit() -> None:
# Arrange — two consecutive >666 ms intervals
samples = [0.0, 333.0, 1700.0, 3100.0, 3433.0]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 2
assert report.passes # limit is 3, so 2 is allowed
def test_missed_emits_three_consecutive_misses_fails_default_limit() -> None:
# Arrange — three consecutive >666 ms intervals (the failure mode AC-2 forbids)
samples = [0.0, 333.0, 1700.0, 3100.0, 4500.0, 4833.0]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 3
assert len(report.windows) == 1
assert report.windows[0].length == 3
assert not report.passes
def test_missed_emits_multiple_disjoint_runs_tracked_independently() -> None:
# Arrange — two separate runs, each length 2
samples = [
0.0, 333.0, # OK
1700.0, 3100.0, # two missed
3433.0, 3766.0, # OK
5200.0, 6600.0, # two more missed
]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 2
assert len(report.windows) == 2
assert all(w.length == 2 for w in report.windows)
assert report.passes
def test_missed_emits_trailing_run_closes_correctly() -> None:
# Arrange — last 3 intervals all missed (run runs to end of list)
samples = [0.0, 333.0, 666.0, 2000.0, 3334.0, 4668.0]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 3
assert len(report.windows) == 1
assert report.windows[0].length == 3
assert report.windows[0].end_ms == 4668.0
assert not report.passes
def test_missed_emits_threshold_at_target_ratio() -> None:
# Arrange — custom missed_ratio = 1.5
samples = [0.0, 1.5 * se.TARGET_INTER_FRAME_MS + 1.0]
# Act
report = se.evaluate_missed_emits(samples, missed_ratio=1.5)
# Assert
assert report.missed_emit_threshold_ms == pytest.approx(
1.5 * se.TARGET_INTER_FRAME_MS
)
assert report.longest_run == 1
def test_missed_emits_invalid_ratio_raises() -> None:
# Assert
with pytest.raises(ValueError):
se.evaluate_missed_emits([0.0, 1000.0], missed_ratio=1.0)
with pytest.raises(ValueError):
se.evaluate_missed_emits([0.0, 1000.0], missed_ratio=0.5)
def test_missed_emits_invalid_limit_raises() -> None:
# Assert
with pytest.raises(ValueError):
se.evaluate_missed_emits([0.0, 1000.0], limit=0)
# ─────────────────── evaluate (aggregate) ───────────────────
def test_evaluate_clean_run_passes_both_acs() -> None:
# Arrange
samples = [i * 333.0 for i in range(30)]
# Act
report = se.evaluate(samples)
# Assert
assert report.passes
assert report.inter_emit.passes_p95
assert report.missed_emits.passes
def test_evaluate_p95_breach_with_no_missed_run_still_fails() -> None:
# Arrange — many slightly-over-budget intervals with no consecutive triple
samples = [0.0]
for _ in range(10):
samples.append(samples[-1] + 400.0) # 400 ms — over 350 ms budget
# Act
report = se.evaluate(samples)
# Assert
assert not report.inter_emit.passes_p95
assert not report.passes
# ─────────────────── csv emit ───────────────────
def test_write_csv_evidence_emits_header_and_row(tmp_path: Path) -> None:
# Arrange
samples = [i * 333.0 for i in range(10)]
report = se.evaluate(samples)
out_path = tmp_path / "nft-perf-02.csv"
# Act
se.write_csv_evidence(out_path, report)
# Assert
text = out_path.read_text().splitlines()
assert len(text) == 2
header = text[0].split(",")
assert header[0] == "sample_count"
assert "ac1_passes" in header
assert "ac2_passes" in header
def test_write_intervals_csv_one_row_per_interval(tmp_path: Path) -> None:
# Arrange — 5 timestamps → 4 inter-emit intervals + 1 header + 1 leading sample
samples = [0.0, 100.0, 200.0, 300.0, 400.0]
out_path = tmp_path / "intervals.csv"
# Act
se.write_intervals_csv(out_path, samples)
# Assert
text = out_path.read_text().splitlines()
assert text[0] == "index,t_emit_ms,inter_emit_ms"
assert len(text) == 1 + 5 # header + 5 sample rows
def test_write_intervals_csv_first_row_has_empty_interval(tmp_path: Path) -> None:
# Arrange
out_path = tmp_path / "intervals.csv"
# Act
se.write_intervals_csv(out_path, [0.0, 100.0])
# Assert
rows = out_path.read_text().splitlines()
assert rows[1].endswith(",") # empty interval column on first sample row
assert rows[2].endswith(",100.000")