[AZ-428] [AZ-429] [AZ-430] [AZ-431] Add NFT-PERF-01..04 perf scenarios

Batch 85 — 4 Performance NFT scenarios + pure-logic evaluators.

- NFT-PERF-01 (AZ-428, Tier-2): two-config e2e latency p95 ≤ 400 ms
  (K=3@25°C, K=2 hybrid@50°C) + frame-drop ≤10% + informational per-stage
  partition recording (D-CROSS-LATENCY-1).
- NFT-PERF-02 (AZ-429): inter-emit p95 ≤ 350 ms + no ≥3 missed-emit
  windows. fc-adapter-aware SITL timestamp extraction (tlog vs MSP).
- NFT-PERF-03 (AZ-430, Tier-2): cold-start TTFF p95 ≤ 30 s AND max ≤ 45 s
  over N≥10 iterations.
- NFT-PERF-04 (AZ-431): spoof-promotion latency p95 ≤ 600 ms over N≥20
  randomized-start blackout+spoof events.

All scenarios consume external fixtures (AZ-595 dependency surfaced) and
fail loudly when fixtures are missing or empty. Public-boundary
discipline preserved — evaluators do NOT import src/gps_denied_onboard.

Tests: 60 new unit tests pass; 24 scenarios collect (4 tests × 2 fc × 3
vio). Code review: PASS_WITH_WARNINGS — 1 Medium (fixed in batch),
3 Low (production-dependency surfacings + future hygiene).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 16:46:49 +03:00
parent f25cae4a82
commit 73cd632e95
21 changed files with 3063 additions and 6 deletions
@@ -0,0 +1,214 @@
"""Unit tests for ``runner.helpers.e2e_latency_evaluator`` (AZ-428 / NFT-PERF-01)."""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import e2e_latency_evaluator as ee
def _frame(idx: int, latency_ms: float) -> ee.FrameLatencySample:
t_capture = idx * 333
return ee.measure_frame(
f"f{idx:04d}",
t_capture_ms=t_capture,
t_emit_at_sitl_ms=t_capture + int(round(latency_ms)),
)
# ───────────────────────── measure_frame ─────────────────────────
def test_measure_frame_negative_latency_raises() -> None:
# Assert
with pytest.raises(ValueError):
ee.measure_frame("bad", t_capture_ms=2_000, t_emit_at_sitl_ms=1_000)
def test_measure_frame_zero_latency_ok() -> None:
# Act
s = ee.measure_frame("z", t_capture_ms=2_000, t_emit_at_sitl_ms=2_000)
# Assert
assert s.latency_ms == 0.0
# ───────────────────────── evaluate ─────────────────────────
def test_evaluate_clean_run_passes_all_acs() -> None:
# Arrange — 900 frames all at 200 ms latency, no drops
samples = [_frame(i, 200.0) for i in range(900)]
# Act
report = ee.evaluate("k3-25c", samples)
# Assert
assert report.sample_count == 900
assert report.frame_drop_ratio == 0.0
assert report.p95_ms == pytest.approx(200.0)
assert report.passes_p95
assert report.passes_frame_drop
assert report.passes
def test_evaluate_p95_at_budget_passes() -> None:
# Arrange — 900 frames all at 400 ms
samples = [_frame(i, 400.0) for i in range(900)]
# Act
report = ee.evaluate("k3-25c", samples)
# Assert
assert report.p95_ms == pytest.approx(400.0)
assert report.passes_p95
def test_evaluate_p95_above_budget_fails() -> None:
# Arrange — last 100 spike to 500 ms; p95 lands well above 400
samples = [_frame(i, 200.0) for i in range(800)] + [
_frame(800 + j, 500.0) for j in range(100)
]
# Act
report = ee.evaluate("k3-25c", samples)
# Assert
assert report.p95_ms is not None and report.p95_ms > 400.0
assert not report.passes_p95
assert not report.passes
def test_evaluate_frame_drops_within_budget() -> None:
# Arrange — 810 frames received (90 dropped → exactly 10 %)
samples = [_frame(i, 200.0) for i in range(810)]
# Act
report = ee.evaluate("k3-25c", samples)
# Assert
assert report.frame_drop_ratio == pytest.approx(0.1)
assert report.passes_frame_drop
assert report.passes
def test_evaluate_frame_drops_above_budget_fails() -> None:
# Arrange — 809 received → 10.11 % > 10 %
samples = [_frame(i, 200.0) for i in range(809)]
# Act
report = ee.evaluate("k3-25c", samples)
# Assert
assert not report.passes_frame_drop
assert not report.passes
def test_evaluate_zero_samples_full_drop_fails() -> None:
# Act
report = ee.evaluate("k3-25c", [])
# Assert
assert report.frame_drop_ratio == pytest.approx(1.0)
assert report.p95_ms is None
assert not report.passes
def test_evaluate_zero_expected_frame_count_rejected() -> None:
# Assert
with pytest.raises(ValueError):
ee.evaluate("k3-25c", [], expected_frame_count=0)
def test_evaluate_custom_expected_frame_count_applies() -> None:
# Arrange — short window: 30 frames expected, 27 received
samples = [_frame(i, 200.0) for i in range(27)]
# Act
report = ee.evaluate("k3-25c", samples, expected_frame_count=30)
# Assert
assert report.frame_drop_ratio == pytest.approx(0.1)
assert report.passes
def test_evaluate_partitions_recorded_but_no_threshold() -> None:
# Arrange
samples = [_frame(i, 200.0) for i in range(900)]
stages = {
"c1_okvis2": [150.0] * 900,
"c2_ultravpr": [50.0] * 900,
}
# Act
report = ee.evaluate("k3-25c", samples, stages)
# Assert
names = [p.stage_name for p in report.stage_partitions]
assert names == ["c1_okvis2", "c2_ultravpr"]
assert report.stage_partitions[0].p95_ms == pytest.approx(150.0)
assert report.passes
def test_evaluate_chamber_unavailable_flag_propagates() -> None:
# Arrange
samples = [_frame(i, 200.0) for i in range(900)]
# Act
report = ee.evaluate("k2-hybrid-50c", samples, chamber_unavailable=True)
# Assert
assert report.chamber_unavailable
assert report.passes
# ───────────────────────── csv emit ─────────────────────────
def test_write_csv_evidence_one_row_per_config(tmp_path: Path) -> None:
# Arrange
s_a = [_frame(i, 200.0) for i in range(900)]
s_b = [_frame(i, 350.0) for i in range(900)]
reports = [ee.evaluate("k3-25c", s_a), ee.evaluate("k2-hybrid-50c", s_b)]
out_path = tmp_path / "nft-perf-01.csv"
# Act
ee.write_csv_evidence(out_path, reports)
# Assert
rows = out_path.read_text().splitlines()
assert len(rows) == 3
assert rows[0].startswith("config_id,sample_count")
def test_write_per_frame_csv_flat_table(tmp_path: Path) -> None:
# Arrange
samples = [_frame(i, 200.0) for i in range(3)]
reports = [ee.evaluate("k3-25c", samples, expected_frame_count=3)]
out_path = tmp_path / "per-frame.csv"
# Act
ee.write_per_frame_csv(out_path, reports)
# Assert
rows = out_path.read_text().splitlines()
assert rows[0] == "config_id,frame_id,t_capture_ms,t_emit_at_sitl_ms,latency_ms"
assert len(rows) == 4
def test_write_partition_csv_per_stage_per_config(tmp_path: Path) -> None:
# Arrange
samples = [_frame(i, 200.0) for i in range(10)]
stages = {"c1_okvis2": [150.0] * 10, "c2_ultravpr": [50.0] * 10}
reports = [ee.evaluate("k3-25c", samples, stages, expected_frame_count=10)]
out_path = tmp_path / "partition.csv"
# Act
ee.write_partition_csv(out_path, reports)
# Assert
rows = out_path.read_text().splitlines()
assert rows[0] == "config_id,stage_name,sample_count,p50_ms,p95_ms,p99_ms"
assert len(rows) == 3
@@ -0,0 +1,275 @@
"""Unit tests for ``runner.helpers.spoof_promotion_evaluator`` (AZ-431 / NFT-PERF-04)."""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import spoof_promotion_evaluator as spe
def _evt(
event_id: str,
onset_ms: int,
samples: list[tuple[int, str]],
) -> spe.SpoofEvent:
return spe.SpoofEvent(
event_id=event_id,
blackout_onset_ms=onset_ms,
samples=tuple(
spe.OutboundLabelSample(monotonic_ms=t, source_label=lbl)
for t, lbl in samples
),
)
def _clean_event(event_id: str, onset_ms: int, latency_ms: int) -> spe.SpoofEvent:
"""One event where dead_reckoned appears exactly ``latency_ms`` after onset."""
return _evt(
event_id,
onset_ms,
[
(onset_ms - 100, "satellite_anchored"),
(onset_ms, "satellite_anchored"),
(onset_ms + latency_ms, "dead_reckoned"),
(onset_ms + latency_ms + 100, "dead_reckoned"),
],
)
# ───────────────────────── measure_event_latency ─────────────────────────
def test_measure_event_latency_first_dr_after_onset() -> None:
# Arrange
event = _clean_event("e1", 10_000, 250)
# Act
report = spe.measure_event_latency(event)
# Assert
assert report.first_dead_reckoned_ms == 10_250
assert report.latency_ms == 250
assert report.has_promotion
def test_measure_event_latency_pre_onset_dr_is_ignored() -> None:
# Arrange — a dead_reckoned BEFORE onset must not be counted
event = _evt(
"e1",
10_000,
[
(9_500, "dead_reckoned"),
(10_300, "dead_reckoned"),
],
)
# Act
report = spe.measure_event_latency(event)
# Assert
assert report.first_dead_reckoned_ms == 10_300
assert report.latency_ms == 300
def test_measure_event_latency_no_dr_returns_none() -> None:
# Arrange
event = _evt(
"e1",
10_000,
[(10_100, "satellite_anchored"), (10_500, "satellite_anchored")],
)
# Act
report = spe.measure_event_latency(event)
# Assert
assert report.first_dead_reckoned_ms is None
assert report.latency_ms is None
assert not report.has_promotion
def test_measure_event_latency_unsorted_samples_sorted() -> None:
# Arrange
event = _evt(
"e1",
10_000,
[
(10_500, "dead_reckoned"),
(10_200, "dead_reckoned"),
(10_100, "satellite_anchored"),
],
)
# Act
report = spe.measure_event_latency(event)
# Assert — earliest dead_reckoned after onset wins
assert report.latency_ms == 200
def test_measure_event_latency_dr_at_onset_is_zero() -> None:
# Arrange
event = _evt("e1", 10_000, [(10_000, "dead_reckoned")])
# Act
report = spe.measure_event_latency(event)
# Assert
assert report.latency_ms == 0
# ───────────────────────── evaluate (aggregate) ─────────────────────────
def _budget_passing_events(n: int) -> list[spe.SpoofEvent]:
"""N events with latencies 100..(100+10*(n-1)) — all < 600 ms budget."""
return [
_clean_event(f"e{i}", onset_ms=10_000 + 1_000 * i, latency_ms=100 + i * 10)
for i in range(n)
]
def test_evaluate_min_event_count_default_passes_with_20() -> None:
# Arrange
events = _budget_passing_events(20)
# Act
report = spe.evaluate(events)
# Assert
assert report.event_count == 20
assert report.passes_event_count
assert report.missing_promotions == 0
assert report.passes_p95
def test_evaluate_min_event_count_fails_with_19() -> None:
# Arrange
events = _budget_passing_events(19)
# Act
report = spe.evaluate(events)
# Assert
assert not report.passes_event_count
assert not report.passes
def test_evaluate_custom_min_event_count() -> None:
# Arrange
events = _budget_passing_events(5)
# Act
report = spe.evaluate(events, min_event_count=5)
# Assert
assert report.passes_event_count
def test_evaluate_p95_at_budget_passes() -> None:
# Arrange — all events at exactly 600 ms (budget edge)
events = [_clean_event(f"e{i}", 10_000 + i * 1_000, 600) for i in range(20)]
# Act
report = spe.evaluate(events)
# Assert
assert report.p95_ms == pytest.approx(600.0)
assert report.passes_p95
def test_evaluate_p95_above_budget_fails() -> None:
# Arrange — last 2 events spike to 800 ms; 20 events → p95 sits in tail
events = _budget_passing_events(18) + [
_clean_event("e18", 30_000, 800),
_clean_event("e19", 31_000, 800),
]
# Act
report = spe.evaluate(events)
# Assert
assert report.p95_ms is not None and report.p95_ms > 600.0
assert not report.passes_p95
assert not report.passes
def test_evaluate_one_missing_promotion_fails_p95_even_if_others_pass() -> None:
# Arrange — 19 good events + 1 with no dead_reckoned
events = _budget_passing_events(19) + [
_evt(
"e19",
30_000,
[(30_500, "satellite_anchored"), (31_000, "satellite_anchored")],
)
]
# Act
report = spe.evaluate(events)
# Assert
assert report.missing_promotions == 1
assert not report.passes_p95
assert not report.passes
def test_evaluate_empty_input_fails() -> None:
# Act
report = spe.evaluate([])
# Assert
assert report.event_count == 0
assert not report.passes
assert report.p95_ms is None
def test_evaluate_percentiles_are_set_when_events_present() -> None:
# Arrange — 10 events with latencies 100..1000 step 100
events = [
_clean_event(f"e{i}", 10_000 + 1_000 * i, latency_ms=100 + 100 * i)
for i in range(10)
]
# Act
report = spe.evaluate(events, min_event_count=10)
# Assert
assert report.p50_ms == pytest.approx(550.0)
assert report.p95_ms == pytest.approx(955.0)
assert report.max_ms == 1000
# ───────────────────────── csv emit ─────────────────────────
def test_write_csv_evidence_emits_summary(tmp_path: Path) -> None:
# Arrange
events = _budget_passing_events(20)
report = spe.evaluate(events)
out_path = tmp_path / "nft-perf-04.csv"
# Act
spe.write_csv_evidence(out_path, report)
# Assert
rows = out_path.read_text().splitlines()
assert len(rows) == 2
assert rows[0].startswith("event_count")
assert "ac2_passes" in rows[0]
def test_write_per_event_csv_one_row_per_event(tmp_path: Path) -> None:
# Arrange
events = _budget_passing_events(3)
report = spe.evaluate(events, min_event_count=3)
out_path = tmp_path / "per-event.csv"
# Act
spe.write_per_event_csv(out_path, report)
# Assert
rows = out_path.read_text().splitlines()
assert rows[0] == "event_id,blackout_onset_ms,first_dead_reckoned_ms,latency_ms"
assert len(rows) == 4 # 1 header + 3 events
@@ -0,0 +1,330 @@
"""Unit tests for ``runner.helpers.streaming_evaluator`` (AZ-429 / NFT-PERF-02)."""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import streaming_evaluator as se
# ───────────────────────── percentile ─────────────────────────
def test_percentile_q_must_be_in_range() -> None:
# Arrange / Act / Assert
with pytest.raises(ValueError):
se._percentile([100.0], -1.0)
with pytest.raises(ValueError):
se._percentile([100.0], 101.0)
def test_percentile_empty_returns_none() -> None:
# Assert
assert se._percentile([], 50.0) is None
def test_percentile_single_value_returns_that_value() -> None:
# Assert
assert se._percentile([42.0], 0.0) == 42.0
assert se._percentile([42.0], 50.0) == 42.0
assert se._percentile([42.0], 100.0) == 42.0
def test_percentile_known_distribution_linear_interpolation() -> None:
# Arrange — 100..1000 step 100
values = [float(x) for x in range(100, 1001, 100)]
# Assert
assert se._percentile(values, 0.0) == 100.0
assert se._percentile(values, 100.0) == 1000.0
# p50 of even-length sorted list = mean of middle two
assert se._percentile(values, 50.0) == pytest.approx(550.0)
def test_percentile_unsorted_input_is_sorted() -> None:
# Assert
assert se._percentile([1000.0, 100.0, 500.0], 50.0) == 500.0
# ─────────────────── evaluate_inter_emit (AC-1) ───────────────────
def test_inter_emit_perfect_cadence_passes() -> None:
# Arrange — exact 333.33 ms cadence (3 Hz target)
samples = [i * se.TARGET_INTER_FRAME_MS for i in range(20)]
# Act
report = se.evaluate_inter_emit(samples)
# Assert
assert report.sample_count == 20
assert report.interval_count == 19
assert report.p50_ms == pytest.approx(se.TARGET_INTER_FRAME_MS)
assert report.p95_ms == pytest.approx(se.TARGET_INTER_FRAME_MS)
assert report.passes_p95
def test_inter_emit_p95_at_budget_passes() -> None:
# Arrange — every interval exactly 350 ms
samples = [i * 350.0 for i in range(10)]
# Act
report = se.evaluate_inter_emit(samples)
# Assert
assert report.p95_ms == pytest.approx(350.0)
assert report.passes_p95
def test_inter_emit_p95_above_budget_fails() -> None:
# Arrange — last interval = 500 ms; with 10 intervals, p95 sits on tail
samples = [0.0] + [333.0 * (i + 1) for i in range(9)] + [333.0 * 9 + 500.0]
# Act
report = se.evaluate_inter_emit(samples)
# Assert
assert report.p95_ms is not None and report.p95_ms > 350.0
assert not report.passes_p95
def test_inter_emit_empty_returns_none_percentiles_and_fails() -> None:
# Act
report = se.evaluate_inter_emit([])
# Assert
assert report.sample_count == 0
assert report.interval_count == 0
assert report.p50_ms is None
assert report.p95_ms is None
assert not report.passes_p95
def test_inter_emit_single_sample_no_intervals() -> None:
# Act
report = se.evaluate_inter_emit([1000.0])
# Assert
assert report.interval_count == 0
assert not report.passes_p95
def test_inter_emit_custom_budget_overrides_default() -> None:
# Arrange — 600 ms cadence vs custom 700 ms budget
samples = [i * 600.0 for i in range(5)]
# Act
report = se.evaluate_inter_emit(samples, budget_ms=700.0)
# Assert
assert report.budget_ms == 700.0
assert report.passes_p95
def test_inter_emit_unsorted_input_is_sorted() -> None:
# Arrange — sorted: [0, 333, 666, 1000] → intervals [333, 333, 334]
samples = [0.0, 1000.0, 333.0, 666.0]
# Act
report = se.evaluate_inter_emit(samples)
# Assert — p95 of [333, 333, 334] = 333 + 0.9 = 333.9
assert report.p95_ms == pytest.approx(333.9, abs=0.5)
# ─────────────────── evaluate_missed_emits (AC-2) ───────────────────
def test_missed_emits_no_misses_returns_zero() -> None:
# Arrange
samples = [i * 333.0 for i in range(20)]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 0
assert report.windows == ()
assert report.passes
def test_missed_emits_single_missed_interval_does_not_trip() -> None:
# Arrange — one isolated > 666.67 ms gap
samples = [0.0, 333.0, 666.0, 1700.0, 2033.0, 2366.0]
# Act
report = se.evaluate_missed_emits(samples)
# Assert — one run of length 1, limit is 3
assert report.longest_run == 1
assert len(report.windows) == 1
assert report.windows[0].length == 1
assert report.passes
def test_missed_emits_two_consecutive_misses_does_not_trip_default_limit() -> None:
# Arrange — two consecutive >666 ms intervals
samples = [0.0, 333.0, 1700.0, 3100.0, 3433.0]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 2
assert report.passes # limit is 3, so 2 is allowed
def test_missed_emits_three_consecutive_misses_fails_default_limit() -> None:
# Arrange — three consecutive >666 ms intervals (the failure mode AC-2 forbids)
samples = [0.0, 333.0, 1700.0, 3100.0, 4500.0, 4833.0]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 3
assert len(report.windows) == 1
assert report.windows[0].length == 3
assert not report.passes
def test_missed_emits_multiple_disjoint_runs_tracked_independently() -> None:
# Arrange — two separate runs, each length 2
samples = [
0.0, 333.0, # OK
1700.0, 3100.0, # two missed
3433.0, 3766.0, # OK
5200.0, 6600.0, # two more missed
]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 2
assert len(report.windows) == 2
assert all(w.length == 2 for w in report.windows)
assert report.passes
def test_missed_emits_trailing_run_closes_correctly() -> None:
# Arrange — last 3 intervals all missed (run runs to end of list)
samples = [0.0, 333.0, 666.0, 2000.0, 3334.0, 4668.0]
# Act
report = se.evaluate_missed_emits(samples)
# Assert
assert report.longest_run == 3
assert len(report.windows) == 1
assert report.windows[0].length == 3
assert report.windows[0].end_ms == 4668.0
assert not report.passes
def test_missed_emits_threshold_at_target_ratio() -> None:
# Arrange — custom missed_ratio = 1.5
samples = [0.0, 1.5 * se.TARGET_INTER_FRAME_MS + 1.0]
# Act
report = se.evaluate_missed_emits(samples, missed_ratio=1.5)
# Assert
assert report.missed_emit_threshold_ms == pytest.approx(
1.5 * se.TARGET_INTER_FRAME_MS
)
assert report.longest_run == 1
def test_missed_emits_invalid_ratio_raises() -> None:
# Assert
with pytest.raises(ValueError):
se.evaluate_missed_emits([0.0, 1000.0], missed_ratio=1.0)
with pytest.raises(ValueError):
se.evaluate_missed_emits([0.0, 1000.0], missed_ratio=0.5)
def test_missed_emits_invalid_limit_raises() -> None:
# Assert
with pytest.raises(ValueError):
se.evaluate_missed_emits([0.0, 1000.0], limit=0)
# ─────────────────── evaluate (aggregate) ───────────────────
def test_evaluate_clean_run_passes_both_acs() -> None:
# Arrange
samples = [i * 333.0 for i in range(30)]
# Act
report = se.evaluate(samples)
# Assert
assert report.passes
assert report.inter_emit.passes_p95
assert report.missed_emits.passes
def test_evaluate_p95_breach_with_no_missed_run_still_fails() -> None:
# Arrange — many slightly-over-budget intervals with no consecutive triple
samples = [0.0]
for _ in range(10):
samples.append(samples[-1] + 400.0) # 400 ms — over 350 ms budget
# Act
report = se.evaluate(samples)
# Assert
assert not report.inter_emit.passes_p95
assert not report.passes
# ─────────────────── csv emit ───────────────────
def test_write_csv_evidence_emits_header_and_row(tmp_path: Path) -> None:
# Arrange
samples = [i * 333.0 for i in range(10)]
report = se.evaluate(samples)
out_path = tmp_path / "nft-perf-02.csv"
# Act
se.write_csv_evidence(out_path, report)
# Assert
text = out_path.read_text().splitlines()
assert len(text) == 2
header = text[0].split(",")
assert header[0] == "sample_count"
assert "ac1_passes" in header
assert "ac2_passes" in header
def test_write_intervals_csv_one_row_per_interval(tmp_path: Path) -> None:
# Arrange — 5 timestamps → 4 inter-emit intervals + 1 header + 1 leading sample
samples = [0.0, 100.0, 200.0, 300.0, 400.0]
out_path = tmp_path / "intervals.csv"
# Act
se.write_intervals_csv(out_path, samples)
# Assert
text = out_path.read_text().splitlines()
assert text[0] == "index,t_emit_ms,inter_emit_ms"
assert len(text) == 1 + 5 # header + 5 sample rows
def test_write_intervals_csv_first_row_has_empty_interval(tmp_path: Path) -> None:
# Arrange
out_path = tmp_path / "intervals.csv"
# Act
se.write_intervals_csv(out_path, [0.0, 100.0])
# Assert
rows = out_path.read_text().splitlines()
assert rows[1].endswith(",") # empty interval column on first sample row
assert rows[2].endswith(",100.000")
@@ -0,0 +1,207 @@
"""Unit tests for ``runner.helpers.ttff_evaluator`` (AZ-430 / NFT-PERF-03)."""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import ttff_evaluator as te
def _iter(iter_id: str, ttff_s: float | None) -> te.ColdStartIteration:
"""One iteration sample with the implied first_emission_ms timestamp."""
if ttff_s is None:
return te.measure_iteration(
iter_id, first_frame_arrival_ms=0, first_emission_ms=None
)
return te.measure_iteration(
iter_id,
first_frame_arrival_ms=0,
first_emission_ms=int(ttff_s * 1000),
)
# ───────────────────────── measure_iteration ─────────────────────────
def test_measure_iteration_happy_path() -> None:
# Act
s = te.measure_iteration(
"it1", first_frame_arrival_ms=1_000, first_emission_ms=24_000
)
# Assert
assert s.ttff_s == pytest.approx(23.0)
assert s.emitted
def test_measure_iteration_missing_emission_returns_none() -> None:
# Act
s = te.measure_iteration(
"it1", first_frame_arrival_ms=1_000, first_emission_ms=None
)
# Assert
assert s.ttff_s is None
assert not s.emitted
def test_measure_iteration_negative_ttff_raises() -> None:
# Assert
with pytest.raises(ValueError):
te.measure_iteration(
"it1", first_frame_arrival_ms=10_000, first_emission_ms=9_000
)
def test_measure_iteration_zero_ttff_allowed() -> None:
# Act
s = te.measure_iteration(
"it1", first_frame_arrival_ms=10_000, first_emission_ms=10_000
)
# Assert
assert s.ttff_s == 0.0
# ───────────────────────── evaluate ─────────────────────────
def test_evaluate_clean_run_passes_all_acs() -> None:
# Arrange — 10 iterations at 15..24 s
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(10)]
# Act
report = te.evaluate(iterations)
# Assert
assert report.iteration_count == 10
assert report.passes_iteration_count
assert report.missed_starts == 0
assert report.passes_p95
assert report.passes_max
assert report.passes
def test_evaluate_below_min_iterations_fails_ac1() -> None:
# Arrange
iterations = [_iter(f"it{i}", 15.0) for i in range(9)]
# Act
report = te.evaluate(iterations)
# Assert
assert not report.passes_iteration_count
assert not report.passes
def test_evaluate_p95_at_budget_passes() -> None:
# Arrange — all 10 exactly at 30 s
iterations = [_iter(f"it{i}", 30.0) for i in range(10)]
# Act
report = te.evaluate(iterations)
# Assert
assert report.p95_s == pytest.approx(30.0)
assert report.passes_p95
def test_evaluate_p95_above_budget_fails() -> None:
# Arrange — last 2 spike to 35 s; p95 will land in tail
iterations = [_iter(f"it{i}", 15.0) for i in range(8)] + [
_iter("it8", 35.0),
_iter("it9", 35.0),
]
# Act
report = te.evaluate(iterations)
# Assert
assert report.p95_s is not None and report.p95_s > 30.0
assert not report.passes_p95
assert not report.passes
def test_evaluate_max_exceeds_budget_fails_even_when_p95_passes() -> None:
# Arrange — N=20 dilutes the outlier's pull on linear-interp p95
iterations = [_iter(f"it{i}", 15.0) for i in range(19)] + [_iter("it19", 46.0)]
# Act
report = te.evaluate(iterations)
# Assert
assert report.passes_p95 # outlier doesn't shift p95 with 20 samples
assert not report.passes_max
assert not report.passes
def test_evaluate_one_missed_start_fails() -> None:
# Arrange
iterations = [_iter(f"it{i}", 15.0) for i in range(9)] + [_iter("it9", None)]
# Act
report = te.evaluate(iterations)
# Assert
assert report.missed_starts == 1
assert not report.passes_p95
assert not report.passes_max
assert not report.passes
def test_evaluate_empty_input_fails_iteration_count() -> None:
# Act
report = te.evaluate([])
# Assert
assert report.iteration_count == 0
assert not report.passes_iteration_count
assert not report.passes
def test_evaluate_custom_budgets_apply() -> None:
# Arrange
iterations = [_iter(f"it{i}", 40.0) for i in range(10)]
# Act
report = te.evaluate(iterations, p95_budget_s=45.0, max_budget_s=60.0)
# Assert
assert report.passes
# ───────────────────────── csv emit ─────────────────────────
def test_write_csv_evidence_emits_summary(tmp_path: Path) -> None:
# Arrange
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(10)]
report = te.evaluate(iterations)
out_path = tmp_path / "nft-perf-03.csv"
# Act
te.write_csv_evidence(out_path, report)
# Assert
rows = out_path.read_text().splitlines()
assert len(rows) == 2
assert rows[0].startswith("iteration_count")
assert "ac3_p95_passes" in rows[0]
assert "ac4_max_passes" in rows[0]
def test_write_per_iteration_csv_one_row_per_iter(tmp_path: Path) -> None:
# Arrange
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(3)]
report = te.evaluate(iterations, min_iteration_count=3)
out_path = tmp_path / "per-iter.csv"
# Act
te.write_per_iteration_csv(out_path, report)
# Assert
rows = out_path.read_text().splitlines()
assert rows[0] == "iteration_id,first_frame_arrival_ms,first_emission_ms,ttff_s"
assert len(rows) == 4
+8
View File
@@ -63,6 +63,10 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
"runner/helpers/blackout_spoof_evaluator.py",
"runner/helpers/fc_proxy_runtime.py",
"runner/helpers/replay_mode.py",
"runner/helpers/streaming_evaluator.py",
"runner/helpers/spoof_promotion_evaluator.py",
"runner/helpers/ttff_evaluator.py",
"runner/helpers/e2e_latency_evaluator.py",
"fixtures/sitl_replay_builder/__init__.py",
"fixtures/sitl_replay_builder/builder.py",
"fixtures/sitl_replay_builder/build_p01_fixtures.py",
@@ -125,6 +129,10 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
"tests/negative/test_ft_n_04_blackout_spoof.py",
"tests/negative/test_ft_n_05_stale_tile_rejection.py",
"tests/negative/test_ft_n_06_mid_flight_freshness.py",
"tests/performance/test_nft_perf_01_e2e_latency.py",
"tests/performance/test_nft_perf_02_streaming.py",
"tests/performance/test_nft_perf_03_ttff.py",
"tests/performance/test_nft_perf_04_spoof_promotion.py",
],
)
def test_required_path_exists(relative_path: str) -> None: