Files
Oleksandr Bezdieniezhnykh 73cd632e95 [AZ-428] [AZ-429] [AZ-430] [AZ-431] Add NFT-PERF-01..04 perf scenarios
Batch 85 — 4 Performance NFT scenarios + pure-logic evaluators.

- NFT-PERF-01 (AZ-428, Tier-2): two-config e2e latency p95 ≤ 400 ms
  (K=3@25°C, K=2 hybrid@50°C) + frame-drop ≤10% + informational per-stage
  partition recording (D-CROSS-LATENCY-1).
- NFT-PERF-02 (AZ-429): inter-emit p95 ≤ 350 ms + no ≥3 missed-emit
  windows. fc-adapter-aware SITL timestamp extraction (tlog vs MSP).
- NFT-PERF-03 (AZ-430, Tier-2): cold-start TTFF p95 ≤ 30 s AND max ≤ 45 s
  over N≥10 iterations.
- NFT-PERF-04 (AZ-431): spoof-promotion latency p95 ≤ 600 ms over N≥20
  randomized-start blackout+spoof events.

All scenarios consume external fixtures (AZ-595 dependency surfaced) and
fail loudly when fixtures are missing or empty. Public-boundary
discipline preserved — evaluators do NOT import src/gps_denied_onboard.

Tests: 60 new unit tests pass; 24 scenarios collect (4 tests × 2 fc × 3
vio). Code review: PASS_WITH_WARNINGS — 1 Medium (fixed in batch),
3 Low (production-dependency surfacings + future hygiene).

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-17 16:46:49 +03:00

208 lines
5.6 KiB
Python

"""Unit tests for ``runner.helpers.ttff_evaluator`` (AZ-430 / NFT-PERF-03)."""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import ttff_evaluator as te
def _iter(iter_id: str, ttff_s: float | None) -> te.ColdStartIteration:
"""One iteration sample with the implied first_emission_ms timestamp."""
if ttff_s is None:
return te.measure_iteration(
iter_id, first_frame_arrival_ms=0, first_emission_ms=None
)
return te.measure_iteration(
iter_id,
first_frame_arrival_ms=0,
first_emission_ms=int(ttff_s * 1000),
)
# ───────────────────────── measure_iteration ─────────────────────────
def test_measure_iteration_happy_path() -> None:
# Act
s = te.measure_iteration(
"it1", first_frame_arrival_ms=1_000, first_emission_ms=24_000
)
# Assert
assert s.ttff_s == pytest.approx(23.0)
assert s.emitted
def test_measure_iteration_missing_emission_returns_none() -> None:
# Act
s = te.measure_iteration(
"it1", first_frame_arrival_ms=1_000, first_emission_ms=None
)
# Assert
assert s.ttff_s is None
assert not s.emitted
def test_measure_iteration_negative_ttff_raises() -> None:
# Assert
with pytest.raises(ValueError):
te.measure_iteration(
"it1", first_frame_arrival_ms=10_000, first_emission_ms=9_000
)
def test_measure_iteration_zero_ttff_allowed() -> None:
# Act
s = te.measure_iteration(
"it1", first_frame_arrival_ms=10_000, first_emission_ms=10_000
)
# Assert
assert s.ttff_s == 0.0
# ───────────────────────── evaluate ─────────────────────────
def test_evaluate_clean_run_passes_all_acs() -> None:
# Arrange — 10 iterations at 15..24 s
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(10)]
# Act
report = te.evaluate(iterations)
# Assert
assert report.iteration_count == 10
assert report.passes_iteration_count
assert report.missed_starts == 0
assert report.passes_p95
assert report.passes_max
assert report.passes
def test_evaluate_below_min_iterations_fails_ac1() -> None:
# Arrange
iterations = [_iter(f"it{i}", 15.0) for i in range(9)]
# Act
report = te.evaluate(iterations)
# Assert
assert not report.passes_iteration_count
assert not report.passes
def test_evaluate_p95_at_budget_passes() -> None:
# Arrange — all 10 exactly at 30 s
iterations = [_iter(f"it{i}", 30.0) for i in range(10)]
# Act
report = te.evaluate(iterations)
# Assert
assert report.p95_s == pytest.approx(30.0)
assert report.passes_p95
def test_evaluate_p95_above_budget_fails() -> None:
# Arrange — last 2 spike to 35 s; p95 will land in tail
iterations = [_iter(f"it{i}", 15.0) for i in range(8)] + [
_iter("it8", 35.0),
_iter("it9", 35.0),
]
# Act
report = te.evaluate(iterations)
# Assert
assert report.p95_s is not None and report.p95_s > 30.0
assert not report.passes_p95
assert not report.passes
def test_evaluate_max_exceeds_budget_fails_even_when_p95_passes() -> None:
# Arrange — N=20 dilutes the outlier's pull on linear-interp p95
iterations = [_iter(f"it{i}", 15.0) for i in range(19)] + [_iter("it19", 46.0)]
# Act
report = te.evaluate(iterations)
# Assert
assert report.passes_p95 # outlier doesn't shift p95 with 20 samples
assert not report.passes_max
assert not report.passes
def test_evaluate_one_missed_start_fails() -> None:
# Arrange
iterations = [_iter(f"it{i}", 15.0) for i in range(9)] + [_iter("it9", None)]
# Act
report = te.evaluate(iterations)
# Assert
assert report.missed_starts == 1
assert not report.passes_p95
assert not report.passes_max
assert not report.passes
def test_evaluate_empty_input_fails_iteration_count() -> None:
# Act
report = te.evaluate([])
# Assert
assert report.iteration_count == 0
assert not report.passes_iteration_count
assert not report.passes
def test_evaluate_custom_budgets_apply() -> None:
# Arrange
iterations = [_iter(f"it{i}", 40.0) for i in range(10)]
# Act
report = te.evaluate(iterations, p95_budget_s=45.0, max_budget_s=60.0)
# Assert
assert report.passes
# ───────────────────────── csv emit ─────────────────────────
def test_write_csv_evidence_emits_summary(tmp_path: Path) -> None:
# Arrange
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(10)]
report = te.evaluate(iterations)
out_path = tmp_path / "nft-perf-03.csv"
# Act
te.write_csv_evidence(out_path, report)
# Assert
rows = out_path.read_text().splitlines()
assert len(rows) == 2
assert rows[0].startswith("iteration_count")
assert "ac3_p95_passes" in rows[0]
assert "ac4_max_passes" in rows[0]
def test_write_per_iteration_csv_one_row_per_iter(tmp_path: Path) -> None:
# Arrange
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(3)]
report = te.evaluate(iterations, min_iteration_count=3)
out_path = tmp_path / "per-iter.csv"
# Act
te.write_per_iteration_csv(out_path, report)
# Assert
rows = out_path.read_text().splitlines()
assert rows[0] == "iteration_id,first_frame_arrival_ms,first_emission_ms,ttff_s"
assert len(rows) == 4