[AZ-428] [AZ-429] [AZ-430] [AZ-431] Add NFT-PERF-01..04 perf scenarios

Batch 85 — 4 Performance NFT scenarios + pure-logic evaluators.

- NFT-PERF-01 (AZ-428, Tier-2): two-config e2e latency p95 ≤ 400 ms
  (K=3@25°C, K=2 hybrid@50°C) + frame-drop ≤10% + informational per-stage
  partition recording (D-CROSS-LATENCY-1).
- NFT-PERF-02 (AZ-429): inter-emit p95 ≤ 350 ms + no ≥3 missed-emit
  windows. fc-adapter-aware SITL timestamp extraction (tlog vs MSP).
- NFT-PERF-03 (AZ-430, Tier-2): cold-start TTFF p95 ≤ 30 s AND max ≤ 45 s
  over N≥10 iterations.
- NFT-PERF-04 (AZ-431): spoof-promotion latency p95 ≤ 600 ms over N≥20
  randomized-start blackout+spoof events.

All scenarios consume external fixtures (AZ-595 dependency surfaced) and
fail loudly when fixtures are missing or empty. Public-boundary
discipline preserved — evaluators do NOT import src/gps_denied_onboard.

Tests: 60 new unit tests pass; 24 scenarios collect (4 tests × 2 fc × 3
vio). Code review: PASS_WITH_WARNINGS — 1 Medium (fixed in batch),
3 Low (production-dependency surfacings + future hygiene).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 16:46:49 +03:00
parent f25cae4a82
commit 73cd632e95
21 changed files with 3063 additions and 6 deletions
@@ -0,0 +1,226 @@
"""NFT-PERF-01 — End-to-end latency p95 (AZ-428 / AC-4.1 / D-CROSS-LATENCY-1).
Tier-2 ONLY. Two configurations measured per
``(fc_adapter, vio_strategy)`` parameterization:
* (a) ``k3-25c``: K=3 baseline at +25 °C ambient.
* (b) ``k2-hybrid-50c``: K=2 + Jacobian-cov hybrid auto-degrade at +50 °C.
Each config exercises the same hard gate: ``p95(t_emit_at_sitl
t_capture) ≤ 400 ms`` (AC-2 / AC-3) AND ``frame_drop_ratio ≤ 10 %``
(AC-4). Per-stage partition (AC-5) is recorded for trend but is NOT
pass/fail.
Pure-logic AC-2/3/4 covered by
``e2e/_unit_tests/helpers/test_e2e_latency_evaluator.py``.
Production dependency surfaced to AZ-595 / AZ-444 (Tier-2 runner):
``E2E_NFT_PERF_01_LATENCY_FIXTURE`` names a JSON file (absolute path or
relative to ``E2E_SITL_REPLAY_DIR``) shaped:
{
"expected_frame_count": 900,
"configs": [
{
"config_id": "k3-25c",
"chamber_unavailable": false,
"frames": [
{"frame_id": "f0001", "t_capture_ms": 0, "t_emit_at_sitl_ms": 220},
...
],
"stage_samples": {
"c1_okvis2": [150.0, 152.0, ...],
"c2_ultravpr": [50.0, ...],
...
}
},
...
]
}
``chamber_unavailable`` defaults to false. For the ``k2-hybrid-50c``
config it should be true when run on the workstation without a
chamber — surfaces as a flag in the evidence row.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from runner.helpers import e2e_latency_evaluator as ee
LATENCY_FIXTURE_ENV_VAR = "E2E_NFT_PERF_01_LATENCY_FIXTURE"
DEFAULT_FIXTURE_NAME = "nft_perf_01_latency.json"
REQUIRED_CONFIG_IDS = ("k3-25c", "k2-hybrid-50c")
@pytest.mark.tier2_only
@pytest.mark.scenario_id("nft-perf-01")
@pytest.mark.traces_to("AC-4.1,AC-1,AC-2,AC-3,AC-4,AC-5,AC-6")
def test_nft_perf_01_e2e_latency(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
sitl_replay_ready: bool,
) -> None:
"""AC-2 + AC-3 + AC-4 across both configs; AC-5 partition recorded only."""
if not sitl_replay_ready:
pytest.skip(
"NFT-PERF-01 requires `E2E_SITL_REPLAY_DIR` to point at a "
"prepared SITL replay fixture (AZ-595) with N≥900 captured "
"frames per config across both K=3@25°C and K=2@50°C. "
"Pure-logic AC-2/3/4 covered by "
"e2e/_unit_tests/helpers/test_e2e_latency_evaluator.py."
)
fixture_path = _resolve_latency_fixture_path()
if not fixture_path.is_file():
pytest.fail(
f"NFT-PERF-01: latency fixture not found at {fixture_path}. "
f"`{LATENCY_FIXTURE_ENV_VAR}` env var must point at a JSON file "
"carrying per-config frame samples (see scenario docstring). "
"Production dependency: AZ-595 + AZ-444."
)
expected_frames, configs = _load_latency_fixture(fixture_path)
config_ids = tuple(c["config_id"] for c in configs)
missing = [cid for cid in REQUIRED_CONFIG_IDS if cid not in config_ids]
if missing:
pytest.fail(
f"NFT-PERF-01: latency fixture {fixture_path} is missing required "
f"config_id(s) {missing}; both {REQUIRED_CONFIG_IDS} are required "
"for AC-4.1 + D-CROSS-LATENCY-1 coverage."
)
reports: list[ee.LatencyReport] = []
for cfg in configs:
samples = [
ee.measure_frame(
str(f.get("frame_id") or f"f{idx:04d}"),
t_capture_ms=int(f["t_capture_ms"]),
t_emit_at_sitl_ms=int(f["t_emit_at_sitl_ms"]),
)
for idx, f in enumerate(cfg["frames"])
]
stage_samples = {
str(k): [float(v) for v in vs]
for k, vs in (cfg.get("stage_samples") or {}).items()
}
reports.append(
ee.evaluate(
config_id=cfg["config_id"],
samples=samples,
stage_samples=stage_samples,
expected_frame_count=expected_frames,
chamber_unavailable=bool(cfg.get("chamber_unavailable", False)),
)
)
base = Path(evidence_dir) / "nft-perf-01" / f"{fc_adapter}-{vio_strategy}"
ee.write_csv_evidence(base.with_suffix(".csv"), reports)
ee.write_per_frame_csv(
base.with_name(base.name + "-per-frame").with_suffix(".csv"), reports
)
ee.write_partition_csv(
base.with_name(base.name + "-partition").with_suffix(".csv"), reports
)
for r in reports:
nfr_recorder.record_metric(
f"nft_perf_01.{r.config_id}.frame_drop_ratio",
float(r.frame_drop_ratio),
ac_id="AC-4",
)
if r.p50_ms is not None:
nfr_recorder.record_metric(
f"nft_perf_01.{r.config_id}.latency_ms_p50", float(r.p50_ms)
)
if r.p95_ms is not None:
ac_id = "AC-3" if r.config_id == "k2-hybrid-50c" else "AC-2"
nfr_recorder.record_metric(
f"nft_perf_01.{r.config_id}.latency_ms_p95",
float(r.p95_ms),
ac_id=ac_id,
)
if r.p99_ms is not None:
nfr_recorder.record_metric(
f"nft_perf_01.{r.config_id}.latency_ms_p99", float(r.p99_ms)
)
breaches = []
for r in reports:
ac_id = "AC-3" if r.config_id == "k2-hybrid-50c" else "AC-2"
if not r.passes_p95:
breaches.append(
f"{ac_id} ({r.config_id}): p95 = {r.p95_ms} ms "
f"> budget {r.p95_budget_ms} ms"
)
if not r.passes_frame_drop:
breaches.append(
f"AC-4 ({r.config_id}): frame_drop_ratio "
f"= {r.frame_drop_ratio:.4f} > budget "
f"{r.frame_drop_budget:.4f}"
)
assert not breaches, "\n".join(breaches)
def _resolve_latency_fixture_path() -> Path:
from runner.helpers import sitl_observer
root = sitl_observer.replay_dir()
raw = os.environ.get(LATENCY_FIXTURE_ENV_VAR, "").strip()
if not raw:
if root is None:
return Path(f"<{LATENCY_FIXTURE_ENV_VAR}-unset>")
return root / DEFAULT_FIXTURE_NAME
path = Path(raw)
if not path.is_absolute() and root is not None:
path = root / path
return path
def _load_latency_fixture(fixture_path: Path) -> tuple[int, list[dict]]:
payload = json.loads(fixture_path.read_text())
if not isinstance(payload, dict):
pytest.fail(
f"NFT-PERF-01: latency fixture {fixture_path} must be a JSON "
f"object; got top-level type={type(payload).__name__}"
)
expected_raw = payload.get("expected_frame_count", ee.DEFAULT_EXPECTED_FRAMES)
try:
expected = int(expected_raw)
except (TypeError, ValueError) as exc:
pytest.fail(
f"NFT-PERF-01: expected_frame_count in {fixture_path} must be "
f"an int: {exc}"
)
configs = payload.get("configs")
if not isinstance(configs, list) or not configs:
pytest.fail(
f"NFT-PERF-01: latency fixture {fixture_path} must contain a "
f'non-empty "configs" list.'
)
for idx, cfg in enumerate(configs):
if not isinstance(cfg, dict):
pytest.fail(
f"NFT-PERF-01: configs[{idx}] in {fixture_path} must be an "
f"object; got {type(cfg).__name__}"
)
if "config_id" not in cfg:
pytest.fail(
f"NFT-PERF-01: configs[{idx}] in {fixture_path} missing "
f"required key `config_id`."
)
frames = cfg.get("frames")
if not isinstance(frames, list):
pytest.fail(
f"NFT-PERF-01: configs[{idx}].frames in {fixture_path} "
f"must be a list of frame records."
)
return expected, configs
@@ -0,0 +1,160 @@
"""NFT-PERF-02 — frame-by-frame streaming, no batching (AZ-429 / AC-4.4).
Replays the 5-minute Derkachi flight at the 3 Hz target cadence; reads
SITL-side receipt timestamps for accepted GPS_INPUT (ArduPilot
mavproxy tlog) / MSP2_SENSOR_GPS (iNav SITL MSP capture) messages;
asserts:
* AC-1: ``p95(inter_emit_interval) ≤ 350 ms`` (inter-frame × 1.05).
* AC-2: no window contains ≥3 consecutive missed emits.
Tier-1 OR Tier-2; both parametrizations run. The pure-logic AC-1/AC-2
evaluators are covered by
``e2e/_unit_tests/helpers/test_streaming_evaluator.py``.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import streaming_evaluator as ste
DERKACHI_DIR = (
Path(__file__).resolve().parents[3]
/ "_docs"
/ "00_problem"
/ "input_data"
/ "flight_derkachi"
)
DERKACHI_MP4 = DERKACHI_DIR / "flight_derkachi.mp4"
# 5 min Derkachi replay at 3 Hz target. The window length feeds into the
# iNav MSP collector; the ArduPilot path reads the tlog regardless of
# `window_s` (the tlog encodes its own duration).
REPLAY_WINDOW_S = 300.0
INAV_MSP_PORT = 5760
ARDUPILOT_GPS_MSG_KIND = "GPS_INPUT"
@pytest.mark.scenario_id("nft-perf-02")
@pytest.mark.traces_to("AC-4.4,AC-1,AC-2,AC-3")
def test_nft_perf_02_streaming_inter_emit(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
sitl_replay_ready: bool,
) -> None:
"""NFT-PERF-02 AC-1 + AC-2 across `(fc_adapter, vio_strategy)`."""
if not sitl_replay_ready:
pytest.skip(
"NFT-PERF-02 requires `E2E_SITL_REPLAY_DIR` to point at a prepared "
"SITL replay fixture (AZ-595) carrying the 5 min Derkachi @ 3 Hz "
"replay. AC-1/AC-2 pure-logic covered by "
"e2e/_unit_tests/helpers/test_streaming_evaluator.py."
)
from runner.helpers import mavproxy_tlog_reader, msp_frame_observer, sitl_observer
from runner.helpers.frame_source_replay import FrameSourceReplayer
from runner.helpers.replay_mode import NullFrameSink
# 1. Drive the 5 min replay (3 Hz target inside the fixture).
FrameSourceReplayer(NullFrameSink()).replay_video(DERKACHI_MP4)
# 2. Read SITL-side receipt timestamps for the FC-specific accepted GPS frame.
host = f"{fc_adapter}-sitl"
emit_times_ms = _read_emit_times_ms(
fc_adapter,
host,
sitl_observer=sitl_observer,
mavproxy_tlog_reader=mavproxy_tlog_reader,
)
if not emit_times_ms:
pytest.fail(
f"NFT-PERF-02: SITL ({host}) reported zero accepted GPS frames "
"during the 5 min Derkachi replay. The replay fixture exists but "
"the SUT emitted nothing — fail-loud rather than skip."
)
# 3. Evaluate AC-1 + AC-2.
report = ste.evaluate(emit_times_ms)
# 4. Emit per-interval + summary CSV evidence.
base = Path(evidence_dir) / "nft-perf-02" / f"{fc_adapter}-{vio_strategy}"
ste.write_csv_evidence(base.with_suffix(".csv"), report)
ste.write_intervals_csv(
base.with_name(base.name + "-intervals").with_suffix(".csv"),
emit_times_ms,
)
# 5. NFR metrics.
if report.inter_emit.p50_ms is not None:
nfr_recorder.record_metric(
"nft_perf_02.inter_emit_ms_p50", report.inter_emit.p50_ms
)
if report.inter_emit.p95_ms is not None:
nfr_recorder.record_metric(
"nft_perf_02.inter_emit_ms_p95",
report.inter_emit.p95_ms,
ac_id="AC-1",
)
if report.inter_emit.max_ms is not None:
nfr_recorder.record_metric(
"nft_perf_02.inter_emit_ms_max", report.inter_emit.max_ms
)
nfr_recorder.record_metric(
"nft_perf_02.longest_missed_run",
float(report.missed_emits.longest_run),
ac_id="AC-2",
)
# 6. AC assertions.
assert report.inter_emit.passes_p95, (
f"AC-1: p95(inter_emit) > {ste.STREAMING_P95_BUDGET_MS} ms "
f"(got {report.inter_emit.p95_ms} ms over "
f"{report.inter_emit.interval_count} intervals; "
f"max={report.inter_emit.max_ms} ms)"
)
assert report.missed_emits.passes, (
f"AC-2: longest missed-emit run = {report.missed_emits.longest_run} "
f">= limit {report.missed_emits.limit}; "
f"first window @ "
f"{report.missed_emits.windows[0].start_ms if report.missed_emits.windows else 'n/a'} ms"
)
def _read_emit_times_ms(
fc_adapter: str,
host: str,
*,
sitl_observer, # type: ignore[no-untyped-def]
mavproxy_tlog_reader, # type: ignore[no-untyped-def]
) -> list[float]:
"""Project SITL-side accepted-GPS receipt timestamps into a ms list.
* ArduPilot: filter mavproxy tlog for ``GPS_INPUT`` and project
``timestamp_us / 1000``.
* iNav: ``collect_inav_msp_frames`` then filter for
``MSP2_SENSOR_GPS`` (function id ``0x1F03``) and project
``monotonic_ms`` directly.
"""
if fc_adapter == "ardupilot":
tlog_path = sitl_observer.capture_ap_tlog(host=host, duration_s=REPLAY_WINDOW_S)
return [
float(msg.timestamp_us) / 1000.0
for msg in mavproxy_tlog_reader.iter_messages(tlog_path)
if msg.msg_type == ARDUPILOT_GPS_MSG_KIND
]
if fc_adapter == "inav":
capture = sitl_observer.collect_inav_msp_frames(
host=host, port=INAV_MSP_PORT, window_s=REPLAY_WINDOW_S
)
return [
float(f.monotonic_ms)
for f in capture.frames
if f.function_id == msp_frame_observer.MSP2_SENSOR_GPS_FUNCTION_ID
]
raise ValueError(f"unknown fc_adapter {fc_adapter!r}")
@@ -0,0 +1,189 @@
"""NFT-PERF-03 — Cold-start Time-To-First-Fix (AZ-430 / AC-NEW-1).
Tier-2 ONLY. N≥10 cold-start iterations; each measures
``t_first_emission t_first_frame_arrival``; asserts:
* AC-3: ``p95(TTFF) ≤ 30 s``.
* AC-4: ``max(TTFF) ≤ 45 s``.
Per-iteration cleanup (fdr-output volume wipe + SITL cold-boot reload
+ SUT lifecycle restart) is owned by the Tier-2 Jetson harness
(AZ-444). The runner-side scenario here only consumes a fixture that
encodes the N captured ``(first_frame_arrival_ms, first_emission_ms)``
pairs.
Production dependency surfaced to AZ-595 / AZ-444: the
``E2E_NFT_PERF_03_TTFF_FIXTURE`` env var names a JSON file (absolute
path or relative to ``E2E_SITL_REPLAY_DIR``) with shape:
{
"iterations": [
{
"iteration_id": "iter-01",
"first_frame_arrival_ms": 1234,
"first_emission_ms": 16789
},
...
]
}
``first_emission_ms`` may be ``null`` for a timed-out iteration —
counted as ``missed_starts`` and treated as a budget breach.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from runner.helpers import ttff_evaluator as te
TTFF_FIXTURE_ENV_VAR = "E2E_NFT_PERF_03_TTFF_FIXTURE"
TTFF_DEFAULT_FIXTURE_NAME = "nft_perf_03_ttff.json"
@pytest.mark.tier2_only
@pytest.mark.scenario_id("nft-perf-03")
@pytest.mark.traces_to("AC-NEW-1,AC-1,AC-2,AC-3,AC-4,AC-5")
def test_nft_perf_03_cold_start_ttff(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
sitl_replay_ready: bool,
) -> None:
"""AC-3 + AC-4 + iteration-count gate across ``(fc_adapter, vio_strategy)``."""
if not sitl_replay_ready:
pytest.skip(
"NFT-PERF-03 requires `E2E_SITL_REPLAY_DIR` to point at a "
"prepared SITL replay fixture (AZ-595) containing N≥10 cold-start "
"iterations. Pure-logic AC-3/AC-4 covered by "
"e2e/_unit_tests/helpers/test_ttff_evaluator.py."
)
fixture_path = _resolve_ttff_fixture_path()
if not fixture_path.is_file():
pytest.fail(
f"NFT-PERF-03: TTFF fixture not found at {fixture_path}. "
f"`{TTFF_FIXTURE_ENV_VAR}` env var must point at a JSON file "
"carrying N≥10 cold-start iteration records (see scenario "
"docstring). Production dependency: AZ-595 + AZ-444."
)
iterations = _load_iterations(fixture_path)
if not iterations:
pytest.fail(
f"NFT-PERF-03: TTFF fixture {fixture_path} contains zero "
"iterations. AZ-430 requires N≥10."
)
report = te.evaluate(iterations)
base = Path(evidence_dir) / "nft-perf-03" / f"{fc_adapter}-{vio_strategy}"
te.write_csv_evidence(base.with_suffix(".csv"), report)
te.write_per_iteration_csv(
base.with_name(base.name + "-per-iter").with_suffix(".csv"),
report,
)
nfr_recorder.record_metric(
"nft_perf_03.iteration_count", float(report.iteration_count), ac_id="AC-3"
)
nfr_recorder.record_metric(
"nft_perf_03.missed_starts", float(report.missed_starts)
)
if report.p50_s is not None:
nfr_recorder.record_metric("nft_perf_03.ttff_s_p50", float(report.p50_s))
if report.p95_s is not None:
nfr_recorder.record_metric(
"nft_perf_03.ttff_s_p95", float(report.p95_s), ac_id="AC-3"
)
if report.max_s is not None:
nfr_recorder.record_metric(
"nft_perf_03.ttff_s_max", float(report.max_s), ac_id="AC-4"
)
assert report.passes_iteration_count, (
f"AC-1 (iteration count): collected only {report.iteration_count} "
f"iterations; require N ≥ {report.min_iteration_count}"
)
assert report.passes_p95, (
f"AC-3: p95(TTFF) = {report.p95_s} s > budget "
f"{report.p95_budget_s} s "
f"(missed_starts={report.missed_starts})"
)
assert report.passes_max, (
f"AC-4: max(TTFF) = {report.max_s} s > budget "
f"{report.max_budget_s} s "
f"(missed_starts={report.missed_starts})"
)
def _resolve_ttff_fixture_path() -> Path:
raw = os.environ.get(TTFF_FIXTURE_ENV_VAR, "").strip()
from runner.helpers import sitl_observer
root = sitl_observer.replay_dir()
if not raw:
if root is None:
return Path(f"<{TTFF_FIXTURE_ENV_VAR}-unset>")
return root / TTFF_DEFAULT_FIXTURE_NAME
path = Path(raw)
if not path.is_absolute() and root is not None:
path = root / path
return path
def _load_iterations(fixture_path: Path) -> list[te.ColdStartIteration]:
payload = json.loads(fixture_path.read_text())
raw = payload.get("iterations") if isinstance(payload, dict) else None
if not isinstance(raw, list):
pytest.fail(
f"NFT-PERF-03: TTFF fixture {fixture_path} must be a JSON object "
f'with key "iterations" → list; got top-level '
f"type={type(payload).__name__}"
)
parsed: list[te.ColdStartIteration] = []
for idx, entry in enumerate(raw):
if not isinstance(entry, dict):
pytest.fail(
f"NFT-PERF-03: iterations[{idx}] in {fixture_path} must be "
f"an object; got {type(entry).__name__}"
)
iter_id = str(entry.get("iteration_id") or f"iter-{idx:02d}")
try:
arrival = int(entry["first_frame_arrival_ms"])
except (KeyError, TypeError, ValueError) as exc:
pytest.fail(
f"NFT-PERF-03: iterations[{idx}].first_frame_arrival_ms "
f"in {fixture_path} must be an int ms timestamp: {exc}"
)
first_emission_raw = entry.get("first_emission_ms")
first_emission: int | None
if first_emission_raw is None:
first_emission = None
else:
try:
first_emission = int(first_emission_raw)
except (TypeError, ValueError) as exc:
pytest.fail(
f"NFT-PERF-03: iterations[{idx}].first_emission_ms "
f"in {fixture_path} must be int or null: {exc}"
)
try:
parsed.append(
te.measure_iteration(
iter_id,
first_frame_arrival_ms=arrival,
first_emission_ms=first_emission,
)
)
except ValueError as exc:
pytest.fail(
f"NFT-PERF-03: iterations[{idx}] in {fixture_path} rejected: {exc}"
)
return parsed
@@ -0,0 +1,193 @@
"""NFT-PERF-04 — Spoofing-promotion latency (AZ-431 / AC-NEW-2).
Replays N≥20 blackout+spoof events at randomized window starts; per
event measures ``t_label_switch_to_dead_reckoned t_blackout_onset``;
asserts ``p95(latency) ≤ 600 ms``.
Tier-1 OR Tier-2. The pure-logic AC-1/AC-2 evaluators are covered by
``e2e/_unit_tests/helpers/test_spoof_promotion_evaluator.py``.
Production dependency surfaced to AZ-595 (fixture builder): the
``E2E_NFT_PERF_04_EVENTS_FIXTURE`` env var names a JSON file under
``E2E_SITL_REPLAY_DIR`` carrying the N≥20 sampled events. Each entry
encodes the injector-emitted ``blackout_onset_ms`` AND the per-event
sequence of outbound ``(monotonic_ms, source_label)`` samples observed
from SITL. Shape (validated at parse time):
{
"events": [
{
"event_id": "evt-01",
"blackout_onset_ms": 45123,
"samples": [
{"monotonic_ms": 45050, "source_label": "satellite_anchored"},
{"monotonic_ms": 45380, "source_label": "dead_reckoned"},
...
]
},
...
]
}
When the env var is unset OR the file is missing, the scenario skips
with a fail-loud reason listing the missing fixture path.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from runner.helpers import spoof_promotion_evaluator as spe
EVENTS_FIXTURE_ENV_VAR = "E2E_NFT_PERF_04_EVENTS_FIXTURE"
@pytest.mark.scenario_id("nft-perf-04")
@pytest.mark.traces_to("AC-NEW-2,AC-1,AC-2,AC-3")
def test_nft_perf_04_spoof_promotion_latency(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
sitl_replay_ready: bool,
) -> None:
"""AC-1 (N≥20 events sampled) + AC-2 (p95 ≤ 600 ms)."""
if not sitl_replay_ready:
pytest.skip(
"NFT-PERF-04 requires `E2E_SITL_REPLAY_DIR` to point at a "
"prepared SITL replay fixture (AZ-595) containing N≥20 "
"randomized-start blackout+spoof events. Pure-logic AC-1/AC-2 "
"covered by e2e/_unit_tests/helpers/test_spoof_promotion_evaluator.py."
)
fixture_path = _resolve_events_fixture_path()
if not fixture_path.is_file():
pytest.fail(
f"NFT-PERF-04: events fixture not found at {fixture_path}. "
f"`{EVENTS_FIXTURE_ENV_VAR}` env var must point at a JSON file "
"(absolute path, or relative to `E2E_SITL_REPLAY_DIR`) carrying "
"the N≥20 sampled blackout+spoof events (see scenario docstring "
"for shape). Production dependency: AZ-595 fixture builder."
)
events = _load_events(fixture_path)
if not events:
pytest.fail(
f"NFT-PERF-04: events fixture {fixture_path} contains zero events. "
"Fail-loud per the tests-as-gates discipline; AZ-431 requires N≥20."
)
report = spe.evaluate(events)
base = Path(evidence_dir) / "nft-perf-04" / f"{fc_adapter}-{vio_strategy}"
spe.write_csv_evidence(base.with_suffix(".csv"), report)
spe.write_per_event_csv(
base.with_name(base.name + "-per-event").with_suffix(".csv"),
report,
)
nfr_recorder.record_metric(
"nft_perf_04.event_count", float(report.event_count), ac_id="AC-1"
)
nfr_recorder.record_metric(
"nft_perf_04.missing_promotions", float(report.missing_promotions)
)
if report.p50_ms is not None:
nfr_recorder.record_metric(
"nft_perf_04.latency_ms_p50", float(report.p50_ms)
)
if report.p95_ms is not None:
nfr_recorder.record_metric(
"nft_perf_04.latency_ms_p95", float(report.p95_ms), ac_id="AC-2"
)
if report.p99_ms is not None:
nfr_recorder.record_metric(
"nft_perf_04.latency_ms_p99", float(report.p99_ms)
)
if report.max_ms is not None:
nfr_recorder.record_metric(
"nft_perf_04.latency_ms_max", float(report.max_ms)
)
assert report.passes_event_count, (
f"AC-1: only {report.event_count} events sampled; "
f"AC-NEW-2 requires N ≥ {report.min_event_count}"
)
assert report.passes_p95, (
f"AC-2: p95(latency_ms) = {report.p95_ms} > budget "
f"{report.budget_ms} ms (missing_promotions={report.missing_promotions})"
)
def _resolve_events_fixture_path() -> Path:
from runner.helpers import sitl_observer
root = sitl_observer.replay_dir()
raw = os.environ.get(EVENTS_FIXTURE_ENV_VAR, "").strip()
if not raw:
if root is None:
return Path(f"<{EVENTS_FIXTURE_ENV_VAR}-unset>")
return root / "nft_perf_04_events.json"
path = Path(raw)
if not path.is_absolute() and root is not None:
path = root / path
return path
def _load_events(fixture_path: Path) -> list[spe.SpoofEvent]:
"""Parse the fixture into ``SpoofEvent`` list (fail-loud on malformed shape)."""
payload = json.loads(fixture_path.read_text())
raw_events = payload.get("events") if isinstance(payload, dict) else None
if not isinstance(raw_events, list):
pytest.fail(
f"NFT-PERF-04: events fixture {fixture_path} must be a JSON object "
f'with key "events" → list; got top-level type={type(payload).__name__}'
)
parsed: list[spe.SpoofEvent] = []
for idx, entry in enumerate(raw_events):
if not isinstance(entry, dict):
pytest.fail(
f"NFT-PERF-04: events[{idx}] in {fixture_path} must be an "
f"object; got {type(entry).__name__}"
)
event_id = entry.get("event_id") or f"evt-{idx:02d}"
try:
onset = int(entry["blackout_onset_ms"])
except (KeyError, TypeError, ValueError) as exc:
pytest.fail(
f"NFT-PERF-04: events[{idx}].blackout_onset_ms in "
f"{fixture_path} must be an integer ms timestamp: {exc}"
)
samples_raw = entry.get("samples")
if not isinstance(samples_raw, list):
pytest.fail(
f"NFT-PERF-04: events[{idx}].samples in {fixture_path} must "
f"be a list of {{monotonic_ms, source_label}} objects"
)
samples: list[spe.OutboundLabelSample] = []
for j, s in enumerate(samples_raw):
try:
samples.append(
spe.OutboundLabelSample(
monotonic_ms=int(s["monotonic_ms"]),
source_label=str(s["source_label"]),
)
)
except (KeyError, TypeError, ValueError) as exc:
pytest.fail(
f"NFT-PERF-04: events[{idx}].samples[{j}] in "
f"{fixture_path} malformed: {exc}"
)
parsed.append(
spe.SpoofEvent(
event_id=str(event_id),
blackout_onset_ms=onset,
samples=tuple(samples),
)
)
return parsed