Files
gps-denied-onboard/e2e/runner/helpers/ttff_evaluator.py
T
Oleksandr Bezdieniezhnykh 73cd632e95 [AZ-428] [AZ-429] [AZ-430] [AZ-431] Add NFT-PERF-01..04 perf scenarios
Batch 85 — 4 Performance NFT scenarios + pure-logic evaluators.

- NFT-PERF-01 (AZ-428, Tier-2): two-config e2e latency p95 ≤ 400 ms
  (K=3@25°C, K=2 hybrid@50°C) + frame-drop ≤10% + informational per-stage
  partition recording (D-CROSS-LATENCY-1).
- NFT-PERF-02 (AZ-429): inter-emit p95 ≤ 350 ms + no ≥3 missed-emit
  windows. fc-adapter-aware SITL timestamp extraction (tlog vs MSP).
- NFT-PERF-03 (AZ-430, Tier-2): cold-start TTFF p95 ≤ 30 s AND max ≤ 45 s
  over N≥10 iterations.
- NFT-PERF-04 (AZ-431): spoof-promotion latency p95 ≤ 600 ms over N≥20
  randomized-start blackout+spoof events.

All scenarios consume external fixtures (AZ-595 dependency surfaced) and
fail loudly when fixtures are missing or empty. Public-boundary
discipline preserved — evaluators do NOT import src/gps_denied_onboard.

Tests: 60 new unit tests pass; 24 scenarios collect (4 tests × 2 fc × 3
vio). Code review: PASS_WITH_WARNINGS — 1 Medium (fixed in batch),
3 Low (production-dependency surfacings + future hygiene).

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-17 16:46:49 +03:00

218 lines
6.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Cold-start TTFF evaluator for NFT-PERF-03 (AZ-430 / AC-NEW-1).
The SUT promises a Time-To-First-Fix budget of 30 s p95 (and a relaxed
max ceiling of 45 s for tail-latency outlier detection) when started
from cold on Tier-2 (Jetson Orin Nano Super) hardware. AZ-430 collects
N≥``MIN_ITERATION_COUNT`` cold-start TTFF samples; this module owns the
pure-logic side: distribution stats + budget gates + evidence CSV.
Per AZ-430:
* AC-3: ``p95(TTFF) ≤ TTFF_P95_BUDGET_S`` (=30 s).
* AC-4: ``max(TTFF) ≤ TTFF_MAX_BUDGET_S`` (=45 s).
Public-boundary discipline: does NOT import any
``src/gps_denied_onboard`` symbol. Re-uses
``streaming_evaluator._percentile`` for the linear-interpolation p95.
"""
from __future__ import annotations
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Sequence
from .streaming_evaluator import _percentile
TTFF_P95_BUDGET_S = 30.0
TTFF_MAX_BUDGET_S = 45.0
MIN_ITERATION_COUNT = 10
@dataclass(frozen=True)
class ColdStartIteration:
"""One cold-start iteration outcome.
``ttff_s`` is the measured ``t_first_emission t_first_frame_arrival``
in seconds. ``None`` means the iteration timed out before producing
its first emission — categorical miss (treated as budget breach for
the aggregate verdict).
"""
iteration_id: str
first_frame_arrival_ms: int
first_emission_ms: int | None
ttff_s: float | None
@property
def emitted(self) -> bool:
return self.first_emission_ms is not None
@dataclass(frozen=True)
class TtffReport:
"""Aggregate NFT-PERF-03 result over N iterations."""
iterations: tuple[ColdStartIteration, ...]
p50_s: float | None
p95_s: float | None
p99_s: float | None
max_s: float | None
missed_starts: int # iterations where ``ttff_s is None``
min_iteration_count: int
p95_budget_s: float
max_budget_s: float
@property
def iteration_count(self) -> int:
return len(self.iterations)
@property
def passes_iteration_count(self) -> bool:
return self.iteration_count >= self.min_iteration_count
@property
def passes_p95(self) -> bool:
return (
self.missed_starts == 0
and self.p95_s is not None
and self.p95_s <= self.p95_budget_s
)
@property
def passes_max(self) -> bool:
return (
self.missed_starts == 0
and self.max_s is not None
and self.max_s <= self.max_budget_s
)
@property
def passes(self) -> bool:
return self.passes_iteration_count and self.passes_p95 and self.passes_max
def measure_iteration(
iteration_id: str,
*,
first_frame_arrival_ms: int,
first_emission_ms: int | None,
) -> ColdStartIteration:
"""Project a captured iteration into a typed sample.
Negative TTFF (emission before first frame) is a fixture-shape error
and raises ``ValueError`` so the breach surfaces immediately instead
of producing a non-sensible report.
"""
if first_emission_ms is None:
return ColdStartIteration(
iteration_id=iteration_id,
first_frame_arrival_ms=int(first_frame_arrival_ms),
first_emission_ms=None,
ttff_s=None,
)
delta_ms = int(first_emission_ms) - int(first_frame_arrival_ms)
if delta_ms < 0:
raise ValueError(
f"ttff iteration {iteration_id}: first_emission_ms "
f"({first_emission_ms}) precedes first_frame_arrival_ms "
f"({first_frame_arrival_ms}); fixture shape invalid"
)
return ColdStartIteration(
iteration_id=iteration_id,
first_frame_arrival_ms=int(first_frame_arrival_ms),
first_emission_ms=int(first_emission_ms),
ttff_s=delta_ms / 1000.0,
)
def evaluate(
iterations: Sequence[ColdStartIteration],
*,
p95_budget_s: float = TTFF_P95_BUDGET_S,
max_budget_s: float = TTFF_MAX_BUDGET_S,
min_iteration_count: int = MIN_ITERATION_COUNT,
) -> TtffReport:
"""Aggregate iterations into AC-3 + AC-4 verdicts."""
valid = [it.ttff_s for it in iterations if it.ttff_s is not None]
missed = sum(1 for it in iterations if not it.emitted)
return TtffReport(
iterations=tuple(iterations),
p50_s=_percentile(valid, 50.0),
p95_s=_percentile(valid, 95.0),
p99_s=_percentile(valid, 99.0),
max_s=max(valid) if valid else None,
missed_starts=missed,
min_iteration_count=min_iteration_count,
p95_budget_s=p95_budget_s,
max_budget_s=max_budget_s,
)
def write_csv_evidence(out_path: Path, report: TtffReport) -> Path:
"""Aggregate-summary CSV (one row per run)."""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"iteration_count",
"min_iteration_count",
"missed_starts",
"p50_s",
"p95_s",
"p99_s",
"max_s",
"p95_budget_s",
"max_budget_s",
"ac1_iteration_count_passes",
"ac3_p95_passes",
"ac4_max_passes",
"passes",
]
)
writer.writerow(
[
report.iteration_count,
report.min_iteration_count,
report.missed_starts,
"" if report.p50_s is None else f"{report.p50_s:.3f}",
"" if report.p95_s is None else f"{report.p95_s:.3f}",
"" if report.p99_s is None else f"{report.p99_s:.3f}",
"" if report.max_s is None else f"{report.max_s:.3f}",
f"{report.p95_budget_s:.3f}",
f"{report.max_budget_s:.3f}",
"true" if report.passes_iteration_count else "false",
"true" if report.passes_p95 else "false",
"true" if report.passes_max else "false",
"true" if report.passes else "false",
]
)
return out_path
def write_per_iteration_csv(out_path: Path, report: TtffReport) -> Path:
"""One row per iteration — detail used during AC-4 outlier investigation."""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"iteration_id",
"first_frame_arrival_ms",
"first_emission_ms",
"ttff_s",
]
)
for it in report.iterations:
writer.writerow(
[
it.iteration_id,
it.first_frame_arrival_ms,
"" if it.first_emission_ms is None else it.first_emission_ms,
"" if it.ttff_s is None else f"{it.ttff_s:.3f}",
]
)
return out_path