mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 17:11:14 +00:00
[AZ-428] [AZ-429] [AZ-430] [AZ-431] Add NFT-PERF-01..04 perf scenarios
Batch 85 — 4 Performance NFT scenarios + pure-logic evaluators. - NFT-PERF-01 (AZ-428, Tier-2): two-config e2e latency p95 ≤ 400 ms (K=3@25°C, K=2 hybrid@50°C) + frame-drop ≤10% + informational per-stage partition recording (D-CROSS-LATENCY-1). - NFT-PERF-02 (AZ-429): inter-emit p95 ≤ 350 ms + no ≥3 missed-emit windows. fc-adapter-aware SITL timestamp extraction (tlog vs MSP). - NFT-PERF-03 (AZ-430, Tier-2): cold-start TTFF p95 ≤ 30 s AND max ≤ 45 s over N≥10 iterations. - NFT-PERF-04 (AZ-431): spoof-promotion latency p95 ≤ 600 ms over N≥20 randomized-start blackout+spoof events. All scenarios consume external fixtures (AZ-595 dependency surfaced) and fail loudly when fixtures are missing or empty. Public-boundary discipline preserved — evaluators do NOT import src/gps_denied_onboard. Tests: 60 new unit tests pass; 24 scenarios collect (4 tests × 2 fc × 3 vio). Code review: PASS_WITH_WARNINGS — 1 Medium (fixed in batch), 3 Low (production-dependency surfacings + future hygiene). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,214 @@
|
||||
"""Unit tests for ``runner.helpers.e2e_latency_evaluator`` (AZ-428 / NFT-PERF-01)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import e2e_latency_evaluator as ee
|
||||
|
||||
|
||||
def _frame(idx: int, latency_ms: float) -> ee.FrameLatencySample:
|
||||
t_capture = idx * 333
|
||||
return ee.measure_frame(
|
||||
f"f{idx:04d}",
|
||||
t_capture_ms=t_capture,
|
||||
t_emit_at_sitl_ms=t_capture + int(round(latency_ms)),
|
||||
)
|
||||
|
||||
|
||||
# ───────────────────────── measure_frame ─────────────────────────
|
||||
|
||||
|
||||
def test_measure_frame_negative_latency_raises() -> None:
|
||||
# Assert
|
||||
with pytest.raises(ValueError):
|
||||
ee.measure_frame("bad", t_capture_ms=2_000, t_emit_at_sitl_ms=1_000)
|
||||
|
||||
|
||||
def test_measure_frame_zero_latency_ok() -> None:
|
||||
# Act
|
||||
s = ee.measure_frame("z", t_capture_ms=2_000, t_emit_at_sitl_ms=2_000)
|
||||
|
||||
# Assert
|
||||
assert s.latency_ms == 0.0
|
||||
|
||||
|
||||
# ───────────────────────── evaluate ─────────────────────────
|
||||
|
||||
|
||||
def test_evaluate_clean_run_passes_all_acs() -> None:
|
||||
# Arrange — 900 frames all at 200 ms latency, no drops
|
||||
samples = [_frame(i, 200.0) for i in range(900)]
|
||||
|
||||
# Act
|
||||
report = ee.evaluate("k3-25c", samples)
|
||||
|
||||
# Assert
|
||||
assert report.sample_count == 900
|
||||
assert report.frame_drop_ratio == 0.0
|
||||
assert report.p95_ms == pytest.approx(200.0)
|
||||
assert report.passes_p95
|
||||
assert report.passes_frame_drop
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_evaluate_p95_at_budget_passes() -> None:
|
||||
# Arrange — 900 frames all at 400 ms
|
||||
samples = [_frame(i, 400.0) for i in range(900)]
|
||||
|
||||
# Act
|
||||
report = ee.evaluate("k3-25c", samples)
|
||||
|
||||
# Assert
|
||||
assert report.p95_ms == pytest.approx(400.0)
|
||||
assert report.passes_p95
|
||||
|
||||
|
||||
def test_evaluate_p95_above_budget_fails() -> None:
|
||||
# Arrange — last 100 spike to 500 ms; p95 lands well above 400
|
||||
samples = [_frame(i, 200.0) for i in range(800)] + [
|
||||
_frame(800 + j, 500.0) for j in range(100)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = ee.evaluate("k3-25c", samples)
|
||||
|
||||
# Assert
|
||||
assert report.p95_ms is not None and report.p95_ms > 400.0
|
||||
assert not report.passes_p95
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_frame_drops_within_budget() -> None:
|
||||
# Arrange — 810 frames received (90 dropped → exactly 10 %)
|
||||
samples = [_frame(i, 200.0) for i in range(810)]
|
||||
|
||||
# Act
|
||||
report = ee.evaluate("k3-25c", samples)
|
||||
|
||||
# Assert
|
||||
assert report.frame_drop_ratio == pytest.approx(0.1)
|
||||
assert report.passes_frame_drop
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_evaluate_frame_drops_above_budget_fails() -> None:
|
||||
# Arrange — 809 received → 10.11 % > 10 %
|
||||
samples = [_frame(i, 200.0) for i in range(809)]
|
||||
|
||||
# Act
|
||||
report = ee.evaluate("k3-25c", samples)
|
||||
|
||||
# Assert
|
||||
assert not report.passes_frame_drop
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_zero_samples_full_drop_fails() -> None:
|
||||
# Act
|
||||
report = ee.evaluate("k3-25c", [])
|
||||
|
||||
# Assert
|
||||
assert report.frame_drop_ratio == pytest.approx(1.0)
|
||||
assert report.p95_ms is None
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_zero_expected_frame_count_rejected() -> None:
|
||||
# Assert
|
||||
with pytest.raises(ValueError):
|
||||
ee.evaluate("k3-25c", [], expected_frame_count=0)
|
||||
|
||||
|
||||
def test_evaluate_custom_expected_frame_count_applies() -> None:
|
||||
# Arrange — short window: 30 frames expected, 27 received
|
||||
samples = [_frame(i, 200.0) for i in range(27)]
|
||||
|
||||
# Act
|
||||
report = ee.evaluate("k3-25c", samples, expected_frame_count=30)
|
||||
|
||||
# Assert
|
||||
assert report.frame_drop_ratio == pytest.approx(0.1)
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_evaluate_partitions_recorded_but_no_threshold() -> None:
|
||||
# Arrange
|
||||
samples = [_frame(i, 200.0) for i in range(900)]
|
||||
stages = {
|
||||
"c1_okvis2": [150.0] * 900,
|
||||
"c2_ultravpr": [50.0] * 900,
|
||||
}
|
||||
|
||||
# Act
|
||||
report = ee.evaluate("k3-25c", samples, stages)
|
||||
|
||||
# Assert
|
||||
names = [p.stage_name for p in report.stage_partitions]
|
||||
assert names == ["c1_okvis2", "c2_ultravpr"]
|
||||
assert report.stage_partitions[0].p95_ms == pytest.approx(150.0)
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_evaluate_chamber_unavailable_flag_propagates() -> None:
|
||||
# Arrange
|
||||
samples = [_frame(i, 200.0) for i in range(900)]
|
||||
|
||||
# Act
|
||||
report = ee.evaluate("k2-hybrid-50c", samples, chamber_unavailable=True)
|
||||
|
||||
# Assert
|
||||
assert report.chamber_unavailable
|
||||
assert report.passes
|
||||
|
||||
|
||||
# ───────────────────────── csv emit ─────────────────────────
|
||||
|
||||
|
||||
def test_write_csv_evidence_one_row_per_config(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
s_a = [_frame(i, 200.0) for i in range(900)]
|
||||
s_b = [_frame(i, 350.0) for i in range(900)]
|
||||
reports = [ee.evaluate("k3-25c", s_a), ee.evaluate("k2-hybrid-50c", s_b)]
|
||||
out_path = tmp_path / "nft-perf-01.csv"
|
||||
|
||||
# Act
|
||||
ee.write_csv_evidence(out_path, reports)
|
||||
|
||||
# Assert
|
||||
rows = out_path.read_text().splitlines()
|
||||
assert len(rows) == 3
|
||||
assert rows[0].startswith("config_id,sample_count")
|
||||
|
||||
|
||||
def test_write_per_frame_csv_flat_table(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
samples = [_frame(i, 200.0) for i in range(3)]
|
||||
reports = [ee.evaluate("k3-25c", samples, expected_frame_count=3)]
|
||||
out_path = tmp_path / "per-frame.csv"
|
||||
|
||||
# Act
|
||||
ee.write_per_frame_csv(out_path, reports)
|
||||
|
||||
# Assert
|
||||
rows = out_path.read_text().splitlines()
|
||||
assert rows[0] == "config_id,frame_id,t_capture_ms,t_emit_at_sitl_ms,latency_ms"
|
||||
assert len(rows) == 4
|
||||
|
||||
|
||||
def test_write_partition_csv_per_stage_per_config(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
samples = [_frame(i, 200.0) for i in range(10)]
|
||||
stages = {"c1_okvis2": [150.0] * 10, "c2_ultravpr": [50.0] * 10}
|
||||
reports = [ee.evaluate("k3-25c", samples, stages, expected_frame_count=10)]
|
||||
out_path = tmp_path / "partition.csv"
|
||||
|
||||
# Act
|
||||
ee.write_partition_csv(out_path, reports)
|
||||
|
||||
# Assert
|
||||
rows = out_path.read_text().splitlines()
|
||||
assert rows[0] == "config_id,stage_name,sample_count,p50_ms,p95_ms,p99_ms"
|
||||
assert len(rows) == 3
|
||||
@@ -0,0 +1,275 @@
|
||||
"""Unit tests for ``runner.helpers.spoof_promotion_evaluator`` (AZ-431 / NFT-PERF-04)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import spoof_promotion_evaluator as spe
|
||||
|
||||
|
||||
def _evt(
|
||||
event_id: str,
|
||||
onset_ms: int,
|
||||
samples: list[tuple[int, str]],
|
||||
) -> spe.SpoofEvent:
|
||||
return spe.SpoofEvent(
|
||||
event_id=event_id,
|
||||
blackout_onset_ms=onset_ms,
|
||||
samples=tuple(
|
||||
spe.OutboundLabelSample(monotonic_ms=t, source_label=lbl)
|
||||
for t, lbl in samples
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _clean_event(event_id: str, onset_ms: int, latency_ms: int) -> spe.SpoofEvent:
|
||||
"""One event where dead_reckoned appears exactly ``latency_ms`` after onset."""
|
||||
return _evt(
|
||||
event_id,
|
||||
onset_ms,
|
||||
[
|
||||
(onset_ms - 100, "satellite_anchored"),
|
||||
(onset_ms, "satellite_anchored"),
|
||||
(onset_ms + latency_ms, "dead_reckoned"),
|
||||
(onset_ms + latency_ms + 100, "dead_reckoned"),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# ───────────────────────── measure_event_latency ─────────────────────────
|
||||
|
||||
|
||||
def test_measure_event_latency_first_dr_after_onset() -> None:
|
||||
# Arrange
|
||||
event = _clean_event("e1", 10_000, 250)
|
||||
|
||||
# Act
|
||||
report = spe.measure_event_latency(event)
|
||||
|
||||
# Assert
|
||||
assert report.first_dead_reckoned_ms == 10_250
|
||||
assert report.latency_ms == 250
|
||||
assert report.has_promotion
|
||||
|
||||
|
||||
def test_measure_event_latency_pre_onset_dr_is_ignored() -> None:
|
||||
# Arrange — a dead_reckoned BEFORE onset must not be counted
|
||||
event = _evt(
|
||||
"e1",
|
||||
10_000,
|
||||
[
|
||||
(9_500, "dead_reckoned"),
|
||||
(10_300, "dead_reckoned"),
|
||||
],
|
||||
)
|
||||
|
||||
# Act
|
||||
report = spe.measure_event_latency(event)
|
||||
|
||||
# Assert
|
||||
assert report.first_dead_reckoned_ms == 10_300
|
||||
assert report.latency_ms == 300
|
||||
|
||||
|
||||
def test_measure_event_latency_no_dr_returns_none() -> None:
|
||||
# Arrange
|
||||
event = _evt(
|
||||
"e1",
|
||||
10_000,
|
||||
[(10_100, "satellite_anchored"), (10_500, "satellite_anchored")],
|
||||
)
|
||||
|
||||
# Act
|
||||
report = spe.measure_event_latency(event)
|
||||
|
||||
# Assert
|
||||
assert report.first_dead_reckoned_ms is None
|
||||
assert report.latency_ms is None
|
||||
assert not report.has_promotion
|
||||
|
||||
|
||||
def test_measure_event_latency_unsorted_samples_sorted() -> None:
|
||||
# Arrange
|
||||
event = _evt(
|
||||
"e1",
|
||||
10_000,
|
||||
[
|
||||
(10_500, "dead_reckoned"),
|
||||
(10_200, "dead_reckoned"),
|
||||
(10_100, "satellite_anchored"),
|
||||
],
|
||||
)
|
||||
|
||||
# Act
|
||||
report = spe.measure_event_latency(event)
|
||||
|
||||
# Assert — earliest dead_reckoned after onset wins
|
||||
assert report.latency_ms == 200
|
||||
|
||||
|
||||
def test_measure_event_latency_dr_at_onset_is_zero() -> None:
|
||||
# Arrange
|
||||
event = _evt("e1", 10_000, [(10_000, "dead_reckoned")])
|
||||
|
||||
# Act
|
||||
report = spe.measure_event_latency(event)
|
||||
|
||||
# Assert
|
||||
assert report.latency_ms == 0
|
||||
|
||||
|
||||
# ───────────────────────── evaluate (aggregate) ─────────────────────────
|
||||
|
||||
|
||||
def _budget_passing_events(n: int) -> list[spe.SpoofEvent]:
|
||||
"""N events with latencies 100..(100+10*(n-1)) — all < 600 ms budget."""
|
||||
return [
|
||||
_clean_event(f"e{i}", onset_ms=10_000 + 1_000 * i, latency_ms=100 + i * 10)
|
||||
for i in range(n)
|
||||
]
|
||||
|
||||
|
||||
def test_evaluate_min_event_count_default_passes_with_20() -> None:
|
||||
# Arrange
|
||||
events = _budget_passing_events(20)
|
||||
|
||||
# Act
|
||||
report = spe.evaluate(events)
|
||||
|
||||
# Assert
|
||||
assert report.event_count == 20
|
||||
assert report.passes_event_count
|
||||
assert report.missing_promotions == 0
|
||||
assert report.passes_p95
|
||||
|
||||
|
||||
def test_evaluate_min_event_count_fails_with_19() -> None:
|
||||
# Arrange
|
||||
events = _budget_passing_events(19)
|
||||
|
||||
# Act
|
||||
report = spe.evaluate(events)
|
||||
|
||||
# Assert
|
||||
assert not report.passes_event_count
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_custom_min_event_count() -> None:
|
||||
# Arrange
|
||||
events = _budget_passing_events(5)
|
||||
|
||||
# Act
|
||||
report = spe.evaluate(events, min_event_count=5)
|
||||
|
||||
# Assert
|
||||
assert report.passes_event_count
|
||||
|
||||
|
||||
def test_evaluate_p95_at_budget_passes() -> None:
|
||||
# Arrange — all events at exactly 600 ms (budget edge)
|
||||
events = [_clean_event(f"e{i}", 10_000 + i * 1_000, 600) for i in range(20)]
|
||||
|
||||
# Act
|
||||
report = spe.evaluate(events)
|
||||
|
||||
# Assert
|
||||
assert report.p95_ms == pytest.approx(600.0)
|
||||
assert report.passes_p95
|
||||
|
||||
|
||||
def test_evaluate_p95_above_budget_fails() -> None:
|
||||
# Arrange — last 2 events spike to 800 ms; 20 events → p95 sits in tail
|
||||
events = _budget_passing_events(18) + [
|
||||
_clean_event("e18", 30_000, 800),
|
||||
_clean_event("e19", 31_000, 800),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = spe.evaluate(events)
|
||||
|
||||
# Assert
|
||||
assert report.p95_ms is not None and report.p95_ms > 600.0
|
||||
assert not report.passes_p95
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_one_missing_promotion_fails_p95_even_if_others_pass() -> None:
|
||||
# Arrange — 19 good events + 1 with no dead_reckoned
|
||||
events = _budget_passing_events(19) + [
|
||||
_evt(
|
||||
"e19",
|
||||
30_000,
|
||||
[(30_500, "satellite_anchored"), (31_000, "satellite_anchored")],
|
||||
)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = spe.evaluate(events)
|
||||
|
||||
# Assert
|
||||
assert report.missing_promotions == 1
|
||||
assert not report.passes_p95
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_empty_input_fails() -> None:
|
||||
# Act
|
||||
report = spe.evaluate([])
|
||||
|
||||
# Assert
|
||||
assert report.event_count == 0
|
||||
assert not report.passes
|
||||
assert report.p95_ms is None
|
||||
|
||||
|
||||
def test_evaluate_percentiles_are_set_when_events_present() -> None:
|
||||
# Arrange — 10 events with latencies 100..1000 step 100
|
||||
events = [
|
||||
_clean_event(f"e{i}", 10_000 + 1_000 * i, latency_ms=100 + 100 * i)
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = spe.evaluate(events, min_event_count=10)
|
||||
|
||||
# Assert
|
||||
assert report.p50_ms == pytest.approx(550.0)
|
||||
assert report.p95_ms == pytest.approx(955.0)
|
||||
assert report.max_ms == 1000
|
||||
|
||||
|
||||
# ───────────────────────── csv emit ─────────────────────────
|
||||
|
||||
|
||||
def test_write_csv_evidence_emits_summary(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
events = _budget_passing_events(20)
|
||||
report = spe.evaluate(events)
|
||||
out_path = tmp_path / "nft-perf-04.csv"
|
||||
|
||||
# Act
|
||||
spe.write_csv_evidence(out_path, report)
|
||||
|
||||
# Assert
|
||||
rows = out_path.read_text().splitlines()
|
||||
assert len(rows) == 2
|
||||
assert rows[0].startswith("event_count")
|
||||
assert "ac2_passes" in rows[0]
|
||||
|
||||
|
||||
def test_write_per_event_csv_one_row_per_event(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
events = _budget_passing_events(3)
|
||||
report = spe.evaluate(events, min_event_count=3)
|
||||
out_path = tmp_path / "per-event.csv"
|
||||
|
||||
# Act
|
||||
spe.write_per_event_csv(out_path, report)
|
||||
|
||||
# Assert
|
||||
rows = out_path.read_text().splitlines()
|
||||
assert rows[0] == "event_id,blackout_onset_ms,first_dead_reckoned_ms,latency_ms"
|
||||
assert len(rows) == 4 # 1 header + 3 events
|
||||
@@ -0,0 +1,330 @@
|
||||
"""Unit tests for ``runner.helpers.streaming_evaluator`` (AZ-429 / NFT-PERF-02)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import streaming_evaluator as se
|
||||
|
||||
|
||||
# ───────────────────────── percentile ─────────────────────────
|
||||
|
||||
|
||||
def test_percentile_q_must_be_in_range() -> None:
|
||||
# Arrange / Act / Assert
|
||||
with pytest.raises(ValueError):
|
||||
se._percentile([100.0], -1.0)
|
||||
with pytest.raises(ValueError):
|
||||
se._percentile([100.0], 101.0)
|
||||
|
||||
|
||||
def test_percentile_empty_returns_none() -> None:
|
||||
# Assert
|
||||
assert se._percentile([], 50.0) is None
|
||||
|
||||
|
||||
def test_percentile_single_value_returns_that_value() -> None:
|
||||
# Assert
|
||||
assert se._percentile([42.0], 0.0) == 42.0
|
||||
assert se._percentile([42.0], 50.0) == 42.0
|
||||
assert se._percentile([42.0], 100.0) == 42.0
|
||||
|
||||
|
||||
def test_percentile_known_distribution_linear_interpolation() -> None:
|
||||
# Arrange — 100..1000 step 100
|
||||
values = [float(x) for x in range(100, 1001, 100)]
|
||||
|
||||
# Assert
|
||||
assert se._percentile(values, 0.0) == 100.0
|
||||
assert se._percentile(values, 100.0) == 1000.0
|
||||
# p50 of even-length sorted list = mean of middle two
|
||||
assert se._percentile(values, 50.0) == pytest.approx(550.0)
|
||||
|
||||
|
||||
def test_percentile_unsorted_input_is_sorted() -> None:
|
||||
# Assert
|
||||
assert se._percentile([1000.0, 100.0, 500.0], 50.0) == 500.0
|
||||
|
||||
|
||||
# ─────────────────── evaluate_inter_emit (AC-1) ───────────────────
|
||||
|
||||
|
||||
def test_inter_emit_perfect_cadence_passes() -> None:
|
||||
# Arrange — exact 333.33 ms cadence (3 Hz target)
|
||||
samples = [i * se.TARGET_INTER_FRAME_MS for i in range(20)]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_inter_emit(samples)
|
||||
|
||||
# Assert
|
||||
assert report.sample_count == 20
|
||||
assert report.interval_count == 19
|
||||
assert report.p50_ms == pytest.approx(se.TARGET_INTER_FRAME_MS)
|
||||
assert report.p95_ms == pytest.approx(se.TARGET_INTER_FRAME_MS)
|
||||
assert report.passes_p95
|
||||
|
||||
|
||||
def test_inter_emit_p95_at_budget_passes() -> None:
|
||||
# Arrange — every interval exactly 350 ms
|
||||
samples = [i * 350.0 for i in range(10)]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_inter_emit(samples)
|
||||
|
||||
# Assert
|
||||
assert report.p95_ms == pytest.approx(350.0)
|
||||
assert report.passes_p95
|
||||
|
||||
|
||||
def test_inter_emit_p95_above_budget_fails() -> None:
|
||||
# Arrange — last interval = 500 ms; with 10 intervals, p95 sits on tail
|
||||
samples = [0.0] + [333.0 * (i + 1) for i in range(9)] + [333.0 * 9 + 500.0]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_inter_emit(samples)
|
||||
|
||||
# Assert
|
||||
assert report.p95_ms is not None and report.p95_ms > 350.0
|
||||
assert not report.passes_p95
|
||||
|
||||
|
||||
def test_inter_emit_empty_returns_none_percentiles_and_fails() -> None:
|
||||
# Act
|
||||
report = se.evaluate_inter_emit([])
|
||||
|
||||
# Assert
|
||||
assert report.sample_count == 0
|
||||
assert report.interval_count == 0
|
||||
assert report.p50_ms is None
|
||||
assert report.p95_ms is None
|
||||
assert not report.passes_p95
|
||||
|
||||
|
||||
def test_inter_emit_single_sample_no_intervals() -> None:
|
||||
# Act
|
||||
report = se.evaluate_inter_emit([1000.0])
|
||||
|
||||
# Assert
|
||||
assert report.interval_count == 0
|
||||
assert not report.passes_p95
|
||||
|
||||
|
||||
def test_inter_emit_custom_budget_overrides_default() -> None:
|
||||
# Arrange — 600 ms cadence vs custom 700 ms budget
|
||||
samples = [i * 600.0 for i in range(5)]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_inter_emit(samples, budget_ms=700.0)
|
||||
|
||||
# Assert
|
||||
assert report.budget_ms == 700.0
|
||||
assert report.passes_p95
|
||||
|
||||
|
||||
def test_inter_emit_unsorted_input_is_sorted() -> None:
|
||||
# Arrange — sorted: [0, 333, 666, 1000] → intervals [333, 333, 334]
|
||||
samples = [0.0, 1000.0, 333.0, 666.0]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_inter_emit(samples)
|
||||
|
||||
# Assert — p95 of [333, 333, 334] = 333 + 0.9 = 333.9
|
||||
assert report.p95_ms == pytest.approx(333.9, abs=0.5)
|
||||
|
||||
|
||||
# ─────────────────── evaluate_missed_emits (AC-2) ───────────────────
|
||||
|
||||
|
||||
def test_missed_emits_no_misses_returns_zero() -> None:
|
||||
# Arrange
|
||||
samples = [i * 333.0 for i in range(20)]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_missed_emits(samples)
|
||||
|
||||
# Assert
|
||||
assert report.longest_run == 0
|
||||
assert report.windows == ()
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_missed_emits_single_missed_interval_does_not_trip() -> None:
|
||||
# Arrange — one isolated > 666.67 ms gap
|
||||
samples = [0.0, 333.0, 666.0, 1700.0, 2033.0, 2366.0]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_missed_emits(samples)
|
||||
|
||||
# Assert — one run of length 1, limit is 3
|
||||
assert report.longest_run == 1
|
||||
assert len(report.windows) == 1
|
||||
assert report.windows[0].length == 1
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_missed_emits_two_consecutive_misses_does_not_trip_default_limit() -> None:
|
||||
# Arrange — two consecutive >666 ms intervals
|
||||
samples = [0.0, 333.0, 1700.0, 3100.0, 3433.0]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_missed_emits(samples)
|
||||
|
||||
# Assert
|
||||
assert report.longest_run == 2
|
||||
assert report.passes # limit is 3, so 2 is allowed
|
||||
|
||||
|
||||
def test_missed_emits_three_consecutive_misses_fails_default_limit() -> None:
|
||||
# Arrange — three consecutive >666 ms intervals (the failure mode AC-2 forbids)
|
||||
samples = [0.0, 333.0, 1700.0, 3100.0, 4500.0, 4833.0]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_missed_emits(samples)
|
||||
|
||||
# Assert
|
||||
assert report.longest_run == 3
|
||||
assert len(report.windows) == 1
|
||||
assert report.windows[0].length == 3
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_missed_emits_multiple_disjoint_runs_tracked_independently() -> None:
|
||||
# Arrange — two separate runs, each length 2
|
||||
samples = [
|
||||
0.0, 333.0, # OK
|
||||
1700.0, 3100.0, # two missed
|
||||
3433.0, 3766.0, # OK
|
||||
5200.0, 6600.0, # two more missed
|
||||
]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_missed_emits(samples)
|
||||
|
||||
# Assert
|
||||
assert report.longest_run == 2
|
||||
assert len(report.windows) == 2
|
||||
assert all(w.length == 2 for w in report.windows)
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_missed_emits_trailing_run_closes_correctly() -> None:
|
||||
# Arrange — last 3 intervals all missed (run runs to end of list)
|
||||
samples = [0.0, 333.0, 666.0, 2000.0, 3334.0, 4668.0]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_missed_emits(samples)
|
||||
|
||||
# Assert
|
||||
assert report.longest_run == 3
|
||||
assert len(report.windows) == 1
|
||||
assert report.windows[0].length == 3
|
||||
assert report.windows[0].end_ms == 4668.0
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_missed_emits_threshold_at_target_ratio() -> None:
|
||||
# Arrange — custom missed_ratio = 1.5
|
||||
samples = [0.0, 1.5 * se.TARGET_INTER_FRAME_MS + 1.0]
|
||||
|
||||
# Act
|
||||
report = se.evaluate_missed_emits(samples, missed_ratio=1.5)
|
||||
|
||||
# Assert
|
||||
assert report.missed_emit_threshold_ms == pytest.approx(
|
||||
1.5 * se.TARGET_INTER_FRAME_MS
|
||||
)
|
||||
assert report.longest_run == 1
|
||||
|
||||
|
||||
def test_missed_emits_invalid_ratio_raises() -> None:
|
||||
# Assert
|
||||
with pytest.raises(ValueError):
|
||||
se.evaluate_missed_emits([0.0, 1000.0], missed_ratio=1.0)
|
||||
with pytest.raises(ValueError):
|
||||
se.evaluate_missed_emits([0.0, 1000.0], missed_ratio=0.5)
|
||||
|
||||
|
||||
def test_missed_emits_invalid_limit_raises() -> None:
|
||||
# Assert
|
||||
with pytest.raises(ValueError):
|
||||
se.evaluate_missed_emits([0.0, 1000.0], limit=0)
|
||||
|
||||
|
||||
# ─────────────────── evaluate (aggregate) ───────────────────
|
||||
|
||||
|
||||
def test_evaluate_clean_run_passes_both_acs() -> None:
|
||||
# Arrange
|
||||
samples = [i * 333.0 for i in range(30)]
|
||||
|
||||
# Act
|
||||
report = se.evaluate(samples)
|
||||
|
||||
# Assert
|
||||
assert report.passes
|
||||
assert report.inter_emit.passes_p95
|
||||
assert report.missed_emits.passes
|
||||
|
||||
|
||||
def test_evaluate_p95_breach_with_no_missed_run_still_fails() -> None:
|
||||
# Arrange — many slightly-over-budget intervals with no consecutive triple
|
||||
samples = [0.0]
|
||||
for _ in range(10):
|
||||
samples.append(samples[-1] + 400.0) # 400 ms — over 350 ms budget
|
||||
|
||||
# Act
|
||||
report = se.evaluate(samples)
|
||||
|
||||
# Assert
|
||||
assert not report.inter_emit.passes_p95
|
||||
assert not report.passes
|
||||
|
||||
|
||||
# ─────────────────── csv emit ───────────────────
|
||||
|
||||
|
||||
def test_write_csv_evidence_emits_header_and_row(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
samples = [i * 333.0 for i in range(10)]
|
||||
report = se.evaluate(samples)
|
||||
out_path = tmp_path / "nft-perf-02.csv"
|
||||
|
||||
# Act
|
||||
se.write_csv_evidence(out_path, report)
|
||||
|
||||
# Assert
|
||||
text = out_path.read_text().splitlines()
|
||||
assert len(text) == 2
|
||||
header = text[0].split(",")
|
||||
assert header[0] == "sample_count"
|
||||
assert "ac1_passes" in header
|
||||
assert "ac2_passes" in header
|
||||
|
||||
|
||||
def test_write_intervals_csv_one_row_per_interval(tmp_path: Path) -> None:
|
||||
# Arrange — 5 timestamps → 4 inter-emit intervals + 1 header + 1 leading sample
|
||||
samples = [0.0, 100.0, 200.0, 300.0, 400.0]
|
||||
out_path = tmp_path / "intervals.csv"
|
||||
|
||||
# Act
|
||||
se.write_intervals_csv(out_path, samples)
|
||||
|
||||
# Assert
|
||||
text = out_path.read_text().splitlines()
|
||||
assert text[0] == "index,t_emit_ms,inter_emit_ms"
|
||||
assert len(text) == 1 + 5 # header + 5 sample rows
|
||||
|
||||
|
||||
def test_write_intervals_csv_first_row_has_empty_interval(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
out_path = tmp_path / "intervals.csv"
|
||||
|
||||
# Act
|
||||
se.write_intervals_csv(out_path, [0.0, 100.0])
|
||||
|
||||
# Assert
|
||||
rows = out_path.read_text().splitlines()
|
||||
assert rows[1].endswith(",") # empty interval column on first sample row
|
||||
assert rows[2].endswith(",100.000")
|
||||
@@ -0,0 +1,207 @@
|
||||
"""Unit tests for ``runner.helpers.ttff_evaluator`` (AZ-430 / NFT-PERF-03)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import ttff_evaluator as te
|
||||
|
||||
|
||||
def _iter(iter_id: str, ttff_s: float | None) -> te.ColdStartIteration:
|
||||
"""One iteration sample with the implied first_emission_ms timestamp."""
|
||||
if ttff_s is None:
|
||||
return te.measure_iteration(
|
||||
iter_id, first_frame_arrival_ms=0, first_emission_ms=None
|
||||
)
|
||||
return te.measure_iteration(
|
||||
iter_id,
|
||||
first_frame_arrival_ms=0,
|
||||
first_emission_ms=int(ttff_s * 1000),
|
||||
)
|
||||
|
||||
|
||||
# ───────────────────────── measure_iteration ─────────────────────────
|
||||
|
||||
|
||||
def test_measure_iteration_happy_path() -> None:
|
||||
# Act
|
||||
s = te.measure_iteration(
|
||||
"it1", first_frame_arrival_ms=1_000, first_emission_ms=24_000
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert s.ttff_s == pytest.approx(23.0)
|
||||
assert s.emitted
|
||||
|
||||
|
||||
def test_measure_iteration_missing_emission_returns_none() -> None:
|
||||
# Act
|
||||
s = te.measure_iteration(
|
||||
"it1", first_frame_arrival_ms=1_000, first_emission_ms=None
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert s.ttff_s is None
|
||||
assert not s.emitted
|
||||
|
||||
|
||||
def test_measure_iteration_negative_ttff_raises() -> None:
|
||||
# Assert
|
||||
with pytest.raises(ValueError):
|
||||
te.measure_iteration(
|
||||
"it1", first_frame_arrival_ms=10_000, first_emission_ms=9_000
|
||||
)
|
||||
|
||||
|
||||
def test_measure_iteration_zero_ttff_allowed() -> None:
|
||||
# Act
|
||||
s = te.measure_iteration(
|
||||
"it1", first_frame_arrival_ms=10_000, first_emission_ms=10_000
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert s.ttff_s == 0.0
|
||||
|
||||
|
||||
# ───────────────────────── evaluate ─────────────────────────
|
||||
|
||||
|
||||
def test_evaluate_clean_run_passes_all_acs() -> None:
|
||||
# Arrange — 10 iterations at 15..24 s
|
||||
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(10)]
|
||||
|
||||
# Act
|
||||
report = te.evaluate(iterations)
|
||||
|
||||
# Assert
|
||||
assert report.iteration_count == 10
|
||||
assert report.passes_iteration_count
|
||||
assert report.missed_starts == 0
|
||||
assert report.passes_p95
|
||||
assert report.passes_max
|
||||
assert report.passes
|
||||
|
||||
|
||||
def test_evaluate_below_min_iterations_fails_ac1() -> None:
|
||||
# Arrange
|
||||
iterations = [_iter(f"it{i}", 15.0) for i in range(9)]
|
||||
|
||||
# Act
|
||||
report = te.evaluate(iterations)
|
||||
|
||||
# Assert
|
||||
assert not report.passes_iteration_count
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_p95_at_budget_passes() -> None:
|
||||
# Arrange — all 10 exactly at 30 s
|
||||
iterations = [_iter(f"it{i}", 30.0) for i in range(10)]
|
||||
|
||||
# Act
|
||||
report = te.evaluate(iterations)
|
||||
|
||||
# Assert
|
||||
assert report.p95_s == pytest.approx(30.0)
|
||||
assert report.passes_p95
|
||||
|
||||
|
||||
def test_evaluate_p95_above_budget_fails() -> None:
|
||||
# Arrange — last 2 spike to 35 s; p95 will land in tail
|
||||
iterations = [_iter(f"it{i}", 15.0) for i in range(8)] + [
|
||||
_iter("it8", 35.0),
|
||||
_iter("it9", 35.0),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = te.evaluate(iterations)
|
||||
|
||||
# Assert
|
||||
assert report.p95_s is not None and report.p95_s > 30.0
|
||||
assert not report.passes_p95
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_max_exceeds_budget_fails_even_when_p95_passes() -> None:
|
||||
# Arrange — N=20 dilutes the outlier's pull on linear-interp p95
|
||||
iterations = [_iter(f"it{i}", 15.0) for i in range(19)] + [_iter("it19", 46.0)]
|
||||
|
||||
# Act
|
||||
report = te.evaluate(iterations)
|
||||
|
||||
# Assert
|
||||
assert report.passes_p95 # outlier doesn't shift p95 with 20 samples
|
||||
assert not report.passes_max
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_one_missed_start_fails() -> None:
|
||||
# Arrange
|
||||
iterations = [_iter(f"it{i}", 15.0) for i in range(9)] + [_iter("it9", None)]
|
||||
|
||||
# Act
|
||||
report = te.evaluate(iterations)
|
||||
|
||||
# Assert
|
||||
assert report.missed_starts == 1
|
||||
assert not report.passes_p95
|
||||
assert not report.passes_max
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_empty_input_fails_iteration_count() -> None:
|
||||
# Act
|
||||
report = te.evaluate([])
|
||||
|
||||
# Assert
|
||||
assert report.iteration_count == 0
|
||||
assert not report.passes_iteration_count
|
||||
assert not report.passes
|
||||
|
||||
|
||||
def test_evaluate_custom_budgets_apply() -> None:
|
||||
# Arrange
|
||||
iterations = [_iter(f"it{i}", 40.0) for i in range(10)]
|
||||
|
||||
# Act
|
||||
report = te.evaluate(iterations, p95_budget_s=45.0, max_budget_s=60.0)
|
||||
|
||||
# Assert
|
||||
assert report.passes
|
||||
|
||||
|
||||
# ───────────────────────── csv emit ─────────────────────────
|
||||
|
||||
|
||||
def test_write_csv_evidence_emits_summary(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(10)]
|
||||
report = te.evaluate(iterations)
|
||||
out_path = tmp_path / "nft-perf-03.csv"
|
||||
|
||||
# Act
|
||||
te.write_csv_evidence(out_path, report)
|
||||
|
||||
# Assert
|
||||
rows = out_path.read_text().splitlines()
|
||||
assert len(rows) == 2
|
||||
assert rows[0].startswith("iteration_count")
|
||||
assert "ac3_p95_passes" in rows[0]
|
||||
assert "ac4_max_passes" in rows[0]
|
||||
|
||||
|
||||
def test_write_per_iteration_csv_one_row_per_iter(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
iterations = [_iter(f"it{i}", 15.0 + i) for i in range(3)]
|
||||
report = te.evaluate(iterations, min_iteration_count=3)
|
||||
out_path = tmp_path / "per-iter.csv"
|
||||
|
||||
# Act
|
||||
te.write_per_iteration_csv(out_path, report)
|
||||
|
||||
# Assert
|
||||
rows = out_path.read_text().splitlines()
|
||||
assert rows[0] == "iteration_id,first_frame_arrival_ms,first_emission_ms,ttff_s"
|
||||
assert len(rows) == 4
|
||||
@@ -63,6 +63,10 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
|
||||
"runner/helpers/blackout_spoof_evaluator.py",
|
||||
"runner/helpers/fc_proxy_runtime.py",
|
||||
"runner/helpers/replay_mode.py",
|
||||
"runner/helpers/streaming_evaluator.py",
|
||||
"runner/helpers/spoof_promotion_evaluator.py",
|
||||
"runner/helpers/ttff_evaluator.py",
|
||||
"runner/helpers/e2e_latency_evaluator.py",
|
||||
"fixtures/sitl_replay_builder/__init__.py",
|
||||
"fixtures/sitl_replay_builder/builder.py",
|
||||
"fixtures/sitl_replay_builder/build_p01_fixtures.py",
|
||||
@@ -125,6 +129,10 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
|
||||
"tests/negative/test_ft_n_04_blackout_spoof.py",
|
||||
"tests/negative/test_ft_n_05_stale_tile_rejection.py",
|
||||
"tests/negative/test_ft_n_06_mid_flight_freshness.py",
|
||||
"tests/performance/test_nft_perf_01_e2e_latency.py",
|
||||
"tests/performance/test_nft_perf_02_streaming.py",
|
||||
"tests/performance/test_nft_perf_03_ttff.py",
|
||||
"tests/performance/test_nft_perf_04_spoof_promotion.py",
|
||||
],
|
||||
)
|
||||
def test_required_path_exists(relative_path: str) -> None:
|
||||
|
||||
Reference in New Issue
Block a user