mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 17:31:13 +00:00
[AZ-428] [AZ-429] [AZ-430] [AZ-431] Add NFT-PERF-01..04 perf scenarios
Batch 85 — 4 Performance NFT scenarios + pure-logic evaluators. - NFT-PERF-01 (AZ-428, Tier-2): two-config e2e latency p95 ≤ 400 ms (K=3@25°C, K=2 hybrid@50°C) + frame-drop ≤10% + informational per-stage partition recording (D-CROSS-LATENCY-1). - NFT-PERF-02 (AZ-429): inter-emit p95 ≤ 350 ms + no ≥3 missed-emit windows. fc-adapter-aware SITL timestamp extraction (tlog vs MSP). - NFT-PERF-03 (AZ-430, Tier-2): cold-start TTFF p95 ≤ 30 s AND max ≤ 45 s over N≥10 iterations. - NFT-PERF-04 (AZ-431): spoof-promotion latency p95 ≤ 600 ms over N≥20 randomized-start blackout+spoof events. All scenarios consume external fixtures (AZ-595 dependency surfaced) and fail loudly when fixtures are missing or empty. Public-boundary discipline preserved — evaluators do NOT import src/gps_denied_onboard. Tests: 60 new unit tests pass; 24 scenarios collect (4 tests × 2 fc × 3 vio). Code review: PASS_WITH_WARNINGS — 1 Medium (fixed in batch), 3 Low (production-dependency surfacings + future hygiene). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,251 @@
|
||||
"""End-to-end latency evaluator for NFT-PERF-01 (AZ-428 / AC-4.1).
|
||||
|
||||
D-CROSS-LATENCY-1 fixes a hard p95 budget of 400 ms across two
|
||||
configurations:
|
||||
|
||||
* (a) K=3 baseline at +25 °C ambient.
|
||||
* (b) K=2 + Jacobian-cov hybrid auto-degrade at +50 °C ambient.
|
||||
|
||||
This module owns the pure-logic side: distribution stats, frame-drop
|
||||
accounting (AC-4), and informational per-stage partition recording
|
||||
(AC-5). It does NOT import anything from ``src/gps_denied_onboard``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from .streaming_evaluator import _percentile
|
||||
|
||||
LATENCY_P95_BUDGET_MS = 400.0
|
||||
FRAME_DROP_RATIO_BUDGET = 0.10
|
||||
DEFAULT_EXPECTED_FRAMES = 900 # 3 Hz × 300 s
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FrameLatencySample:
|
||||
"""One frame: ``(t_capture_ms, t_emit_at_sitl_ms)`` → latency_ms."""
|
||||
|
||||
frame_id: str
|
||||
t_capture_ms: int
|
||||
t_emit_at_sitl_ms: int
|
||||
|
||||
@property
|
||||
def latency_ms(self) -> float:
|
||||
return float(self.t_emit_at_sitl_ms - self.t_capture_ms)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StagePartition:
|
||||
"""Per-stage informational latency record (AC-5 — no hard threshold)."""
|
||||
|
||||
stage_name: str
|
||||
p50_ms: float | None
|
||||
p95_ms: float | None
|
||||
p99_ms: float | None
|
||||
sample_count: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LatencyReport:
|
||||
"""Aggregate verdict for ONE configuration."""
|
||||
|
||||
config_id: str # "k3-25c" / "k2-hybrid-50c"
|
||||
samples: tuple[FrameLatencySample, ...]
|
||||
expected_frame_count: int
|
||||
p50_ms: float | None
|
||||
p95_ms: float | None
|
||||
p99_ms: float | None
|
||||
max_ms: float | None
|
||||
frame_drop_ratio: float
|
||||
stage_partitions: tuple[StagePartition, ...]
|
||||
p95_budget_ms: float
|
||||
frame_drop_budget: float
|
||||
chamber_unavailable: bool
|
||||
|
||||
@property
|
||||
def sample_count(self) -> int:
|
||||
return len(self.samples)
|
||||
|
||||
@property
|
||||
def passes_p95(self) -> bool:
|
||||
return self.p95_ms is not None and self.p95_ms <= self.p95_budget_ms
|
||||
|
||||
@property
|
||||
def passes_frame_drop(self) -> bool:
|
||||
return self.frame_drop_ratio <= self.frame_drop_budget
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.passes_p95 and self.passes_frame_drop
|
||||
|
||||
|
||||
def measure_frame(
|
||||
frame_id: str, *, t_capture_ms: int, t_emit_at_sitl_ms: int
|
||||
) -> FrameLatencySample:
|
||||
"""Project a captured frame into a typed sample.
|
||||
|
||||
Negative latency is fixture-shape error → fail-loud.
|
||||
"""
|
||||
if t_emit_at_sitl_ms < t_capture_ms:
|
||||
raise ValueError(
|
||||
f"latency frame {frame_id}: t_emit_at_sitl_ms "
|
||||
f"({t_emit_at_sitl_ms}) precedes t_capture_ms "
|
||||
f"({t_capture_ms}); fixture shape invalid"
|
||||
)
|
||||
return FrameLatencySample(
|
||||
frame_id=frame_id,
|
||||
t_capture_ms=int(t_capture_ms),
|
||||
t_emit_at_sitl_ms=int(t_emit_at_sitl_ms),
|
||||
)
|
||||
|
||||
|
||||
def evaluate(
|
||||
config_id: str,
|
||||
samples: Sequence[FrameLatencySample],
|
||||
stage_samples: dict[str, Sequence[float]] | None = None,
|
||||
*,
|
||||
expected_frame_count: int = DEFAULT_EXPECTED_FRAMES,
|
||||
p95_budget_ms: float = LATENCY_P95_BUDGET_MS,
|
||||
frame_drop_budget: float = FRAME_DROP_RATIO_BUDGET,
|
||||
chamber_unavailable: bool = False,
|
||||
) -> LatencyReport:
|
||||
"""Aggregate ``samples`` (and optional stage partitions) into a verdict.
|
||||
|
||||
``stage_samples`` keys = stage names from D-CROSS-LATENCY-1; values
|
||||
= lists of per-frame stage-latency_ms readings. The per-stage p95 is
|
||||
recorded only — AC-5 is informational.
|
||||
"""
|
||||
latencies = [s.latency_ms for s in samples]
|
||||
if expected_frame_count <= 0:
|
||||
raise ValueError(
|
||||
f"expected_frame_count must be >0, got {expected_frame_count}"
|
||||
)
|
||||
received = min(len(samples), expected_frame_count)
|
||||
drop_ratio = (expected_frame_count - received) / expected_frame_count
|
||||
partitions = _partition_stage_samples(stage_samples or {})
|
||||
return LatencyReport(
|
||||
config_id=config_id,
|
||||
samples=tuple(samples),
|
||||
expected_frame_count=expected_frame_count,
|
||||
p50_ms=_percentile(latencies, 50.0),
|
||||
p95_ms=_percentile(latencies, 95.0),
|
||||
p99_ms=_percentile(latencies, 99.0),
|
||||
max_ms=max(latencies) if latencies else None,
|
||||
frame_drop_ratio=drop_ratio,
|
||||
stage_partitions=tuple(partitions),
|
||||
p95_budget_ms=p95_budget_ms,
|
||||
frame_drop_budget=frame_drop_budget,
|
||||
chamber_unavailable=chamber_unavailable,
|
||||
)
|
||||
|
||||
|
||||
def _partition_stage_samples(
|
||||
stage_samples: dict[str, Sequence[float]],
|
||||
) -> list[StagePartition]:
|
||||
partitions: list[StagePartition] = []
|
||||
for stage_name in sorted(stage_samples.keys()):
|
||||
values = list(stage_samples[stage_name])
|
||||
partitions.append(
|
||||
StagePartition(
|
||||
stage_name=stage_name,
|
||||
p50_ms=_percentile(values, 50.0),
|
||||
p95_ms=_percentile(values, 95.0),
|
||||
p99_ms=_percentile(values, 99.0),
|
||||
sample_count=len(values),
|
||||
)
|
||||
)
|
||||
return partitions
|
||||
|
||||
|
||||
def write_csv_evidence(out_path: Path, reports: Sequence[LatencyReport]) -> Path:
|
||||
"""One-row-per-config summary."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"config_id",
|
||||
"sample_count",
|
||||
"expected_frame_count",
|
||||
"frame_drop_ratio",
|
||||
"p50_ms",
|
||||
"p95_ms",
|
||||
"p99_ms",
|
||||
"max_ms",
|
||||
"p95_budget_ms",
|
||||
"frame_drop_budget",
|
||||
"chamber_unavailable",
|
||||
"ac2_or_ac3_p95_passes",
|
||||
"ac4_frame_drop_passes",
|
||||
"passes",
|
||||
]
|
||||
)
|
||||
for r in reports:
|
||||
writer.writerow(
|
||||
[
|
||||
r.config_id,
|
||||
r.sample_count,
|
||||
r.expected_frame_count,
|
||||
f"{r.frame_drop_ratio:.4f}",
|
||||
"" if r.p50_ms is None else f"{r.p50_ms:.3f}",
|
||||
"" if r.p95_ms is None else f"{r.p95_ms:.3f}",
|
||||
"" if r.p99_ms is None else f"{r.p99_ms:.3f}",
|
||||
"" if r.max_ms is None else f"{r.max_ms:.3f}",
|
||||
f"{r.p95_budget_ms:.3f}",
|
||||
f"{r.frame_drop_budget:.4f}",
|
||||
"true" if r.chamber_unavailable else "false",
|
||||
"true" if r.passes_p95 else "false",
|
||||
"true" if r.passes_frame_drop else "false",
|
||||
"true" if r.passes else "false",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
def write_per_frame_csv(out_path: Path, reports: Sequence[LatencyReport]) -> Path:
|
||||
"""One row per frame per config — detail for outlier investigation."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
["config_id", "frame_id", "t_capture_ms", "t_emit_at_sitl_ms", "latency_ms"]
|
||||
)
|
||||
for r in reports:
|
||||
for s in r.samples:
|
||||
writer.writerow(
|
||||
[
|
||||
r.config_id,
|
||||
s.frame_id,
|
||||
s.t_capture_ms,
|
||||
s.t_emit_at_sitl_ms,
|
||||
f"{s.latency_ms:.3f}",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
def write_partition_csv(out_path: Path, reports: Sequence[LatencyReport]) -> Path:
|
||||
"""Per-stage partition table — AC-5 informational evidence."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
["config_id", "stage_name", "sample_count", "p50_ms", "p95_ms", "p99_ms"]
|
||||
)
|
||||
for r in reports:
|
||||
for p in r.stage_partitions:
|
||||
writer.writerow(
|
||||
[
|
||||
r.config_id,
|
||||
p.stage_name,
|
||||
p.sample_count,
|
||||
"" if p.p50_ms is None else f"{p.p50_ms:.3f}",
|
||||
"" if p.p95_ms is None else f"{p.p95_ms:.3f}",
|
||||
"" if p.p99_ms is None else f"{p.p99_ms:.3f}",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
@@ -0,0 +1,222 @@
|
||||
"""Spoofing-promotion latency evaluator for NFT-PERF-04 (AZ-431 / AC-NEW-2).
|
||||
|
||||
Per AC-NEW-2 the time from a blackout+spoof event to the SUT correctly
|
||||
labeling its emission ``dead_reckoned`` must satisfy
|
||||
``p95(latency) ≤ SPOOF_PROMOTION_BUDGET_MS`` (=600 ms).
|
||||
|
||||
The scenario test gathers N≥``MIN_EVENT_COUNT`` events at randomized
|
||||
window starts (the random sampling is owned by the fixture builder —
|
||||
AZ-431 is statistical, FT-N-04 / AZ-426 is functional), measures the
|
||||
per-event ``t_label_switch_to_dead_reckoned − t_blackout_onset``, and
|
||||
runs the aggregate p95 check via ``evaluate``.
|
||||
|
||||
Public-boundary discipline: does NOT import any
|
||||
``src/gps_denied_onboard`` symbol. Reuses
|
||||
``runner.helpers.streaming_evaluator._percentile`` for the linear-
|
||||
interpolation p95 — both NFT-PERF tests measure latencies as the same
|
||||
shape of distribution.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from .streaming_evaluator import _percentile
|
||||
|
||||
# AC-NEW-2 budget — 600 ms on Tier-1 or Tier-2.
|
||||
SPOOF_PROMOTION_BUDGET_MS = 600.0
|
||||
# Statistical confidence floor — AZ-431 spec sets N=20 as default.
|
||||
MIN_EVENT_COUNT = 20
|
||||
DEAD_RECKONED_LABEL = "dead_reckoned"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OutboundLabelSample:
|
||||
"""One SUT outbound emission projected for AC-NEW-2."""
|
||||
|
||||
monotonic_ms: int
|
||||
source_label: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SpoofEvent:
|
||||
"""One blackout+spoof event and the labels observed afterwards.
|
||||
|
||||
``samples`` should cover at least the window starting at
|
||||
``blackout_onset_ms`` and extending past the expected first
|
||||
``dead_reckoned`` emission. The evaluator scans them in order.
|
||||
"""
|
||||
|
||||
event_id: str
|
||||
blackout_onset_ms: int
|
||||
samples: Sequence[OutboundLabelSample]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EventLatencyReport:
|
||||
"""Per-event latency outcome.
|
||||
|
||||
``latency_ms`` is ``None`` when no ``dead_reckoned`` emission was
|
||||
observed after ``blackout_onset_ms`` — that's a categorical miss
|
||||
(treated as a budget breach for the aggregate verdict).
|
||||
"""
|
||||
|
||||
event_id: str
|
||||
blackout_onset_ms: int
|
||||
first_dead_reckoned_ms: int | None
|
||||
latency_ms: int | None
|
||||
|
||||
@property
|
||||
def has_promotion(self) -> bool:
|
||||
return self.first_dead_reckoned_ms is not None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SpoofPromotionReport:
|
||||
"""Aggregate NFT-PERF-04 result over N events."""
|
||||
|
||||
events: tuple[EventLatencyReport, ...]
|
||||
p50_ms: float | None
|
||||
p95_ms: float | None
|
||||
p99_ms: float | None
|
||||
max_ms: float | None
|
||||
missing_promotions: int
|
||||
min_event_count: int
|
||||
budget_ms: float
|
||||
|
||||
@property
|
||||
def event_count(self) -> int:
|
||||
return len(self.events)
|
||||
|
||||
@property
|
||||
def passes_event_count(self) -> bool:
|
||||
return self.event_count >= self.min_event_count
|
||||
|
||||
@property
|
||||
def passes_p95(self) -> bool:
|
||||
return (
|
||||
self.missing_promotions == 0
|
||||
and self.p95_ms is not None
|
||||
and self.p95_ms <= self.budget_ms
|
||||
)
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.passes_event_count and self.passes_p95
|
||||
|
||||
|
||||
def measure_event_latency(event: SpoofEvent) -> EventLatencyReport:
|
||||
"""Compute promotion latency for one event.
|
||||
|
||||
Walks ``event.samples`` in ascending ``monotonic_ms``, finds the first
|
||||
sample with ``source_label == "dead_reckoned"`` AND
|
||||
``monotonic_ms >= blackout_onset_ms``, and returns
|
||||
``first_dead_reckoned_ms − blackout_onset_ms``. Returns ``None``
|
||||
for both ``first_dead_reckoned_ms`` and ``latency_ms`` if no such
|
||||
sample exists.
|
||||
"""
|
||||
ordered = sorted(event.samples, key=lambda s: s.monotonic_ms)
|
||||
for s in ordered:
|
||||
if s.monotonic_ms < event.blackout_onset_ms:
|
||||
continue
|
||||
if s.source_label == DEAD_RECKONED_LABEL:
|
||||
return EventLatencyReport(
|
||||
event_id=event.event_id,
|
||||
blackout_onset_ms=event.blackout_onset_ms,
|
||||
first_dead_reckoned_ms=int(s.monotonic_ms),
|
||||
latency_ms=int(s.monotonic_ms - event.blackout_onset_ms),
|
||||
)
|
||||
return EventLatencyReport(
|
||||
event_id=event.event_id,
|
||||
blackout_onset_ms=event.blackout_onset_ms,
|
||||
first_dead_reckoned_ms=None,
|
||||
latency_ms=None,
|
||||
)
|
||||
|
||||
|
||||
def evaluate(
|
||||
events: Sequence[SpoofEvent],
|
||||
*,
|
||||
budget_ms: float = SPOOF_PROMOTION_BUDGET_MS,
|
||||
min_event_count: int = MIN_EVENT_COUNT,
|
||||
) -> SpoofPromotionReport:
|
||||
"""AC-1 (N events sampled) + AC-2 (p95 latency ≤ budget)."""
|
||||
per_event = tuple(measure_event_latency(e) for e in events)
|
||||
valid = [r.latency_ms for r in per_event if r.latency_ms is not None]
|
||||
missing = sum(1 for r in per_event if not r.has_promotion)
|
||||
return SpoofPromotionReport(
|
||||
events=per_event,
|
||||
p50_ms=_percentile(valid, 50.0),
|
||||
p95_ms=_percentile(valid, 95.0),
|
||||
p99_ms=_percentile(valid, 99.0),
|
||||
max_ms=max(valid) if valid else None,
|
||||
missing_promotions=missing,
|
||||
min_event_count=min_event_count,
|
||||
budget_ms=budget_ms,
|
||||
)
|
||||
|
||||
|
||||
def write_csv_evidence(out_path: Path, report: SpoofPromotionReport) -> Path:
|
||||
"""Aggregate-summary CSV (one row per run)."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"event_count",
|
||||
"min_event_count",
|
||||
"missing_promotions",
|
||||
"p50_ms",
|
||||
"p95_ms",
|
||||
"p99_ms",
|
||||
"max_ms",
|
||||
"budget_ms",
|
||||
"ac1_passes",
|
||||
"ac2_passes",
|
||||
"passes",
|
||||
]
|
||||
)
|
||||
writer.writerow(
|
||||
[
|
||||
report.event_count,
|
||||
report.min_event_count,
|
||||
report.missing_promotions,
|
||||
"" if report.p50_ms is None else f"{report.p50_ms:.3f}",
|
||||
"" if report.p95_ms is None else f"{report.p95_ms:.3f}",
|
||||
"" if report.p99_ms is None else f"{report.p99_ms:.3f}",
|
||||
"" if report.max_ms is None else f"{report.max_ms:.3f}",
|
||||
f"{report.budget_ms:.3f}",
|
||||
"true" if report.passes_event_count else "false",
|
||||
"true" if report.passes_p95 else "false",
|
||||
"true" if report.passes else "false",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
def write_per_event_csv(out_path: Path, report: SpoofPromotionReport) -> Path:
|
||||
"""Detail CSV: one row per event with onset / first-dead-reckoned / latency."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"event_id",
|
||||
"blackout_onset_ms",
|
||||
"first_dead_reckoned_ms",
|
||||
"latency_ms",
|
||||
]
|
||||
)
|
||||
for r in report.events:
|
||||
writer.writerow(
|
||||
[
|
||||
r.event_id,
|
||||
r.blackout_onset_ms,
|
||||
"" if r.first_dead_reckoned_ms is None else r.first_dead_reckoned_ms,
|
||||
"" if r.latency_ms is None else r.latency_ms,
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
@@ -0,0 +1,314 @@
|
||||
"""Inter-emit interval evaluator for NFT-PERF-02 (AZ-429 / AC-4.4).
|
||||
|
||||
The SUT promises that estimates are streamed frame-by-frame, NOT batched.
|
||||
The contract is observable at the SITL boundary: the receipt timestamps of
|
||||
consecutive accepted ``GPS_INPUT`` (ArduPilot) / ``MSP2_SENSOR_GPS``
|
||||
(iNav) messages should track the configured target cadence with little
|
||||
jitter and never miss ≥3 consecutive emits.
|
||||
|
||||
This module owns the pure-logic side. The scenario test
|
||||
(``e2e/tests/performance/test_nft_perf_02_streaming.py``) is a thin
|
||||
adapter that reads timestamps from ``sitl_observer`` and asks the
|
||||
helpers below for the per-AC verdict.
|
||||
|
||||
ACs evaluated (per AZ-429):
|
||||
|
||||
* AC-1: ``p95(inter_emit_interval) ≤ STREAMING_P95_BUDGET_MS`` (=350 ms
|
||||
at the 3 Hz target = inter-frame × 1.05).
|
||||
* AC-2: no window contains ≥``MISSED_EMIT_WINDOW_LIMIT`` (=3) consecutive
|
||||
missed emits, where a "missed emit" is an interval >
|
||||
``MISSED_EMIT_RATIO`` (=2.0) × target inter-frame.
|
||||
|
||||
Public-boundary discipline: does NOT import any
|
||||
``src/gps_denied_onboard`` symbol; reads only float lists of SITL-side
|
||||
ms timestamps that the scenario adapter projects out of the boundary
|
||||
observers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from math import floor
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Sequence
|
||||
|
||||
# AC-1 — inter-frame × 1.05 at 3 Hz target (333.333 ms × 1.05 = 350 ms).
|
||||
TARGET_FRAME_RATE_HZ = 3.0
|
||||
TARGET_INTER_FRAME_MS = 1000.0 / TARGET_FRAME_RATE_HZ # 333.333... ms
|
||||
STREAMING_P95_BUDGET_MS = 350.0
|
||||
# AC-2 — a "missed emit" interval is > 2× target = >666 ms at 3 Hz.
|
||||
MISSED_EMIT_RATIO = 2.0
|
||||
MISSED_EMIT_WINDOW_LIMIT = 3
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InterEmitReport:
|
||||
"""Aggregate AC-1 result for one run."""
|
||||
|
||||
sample_count: int
|
||||
interval_count: int # = sample_count - 1
|
||||
p50_ms: float | None
|
||||
p95_ms: float | None
|
||||
p99_ms: float | None
|
||||
max_ms: float | None
|
||||
target_inter_frame_ms: float
|
||||
budget_ms: float
|
||||
|
||||
@property
|
||||
def passes_p95(self) -> bool:
|
||||
return self.p95_ms is not None and self.p95_ms <= self.budget_ms
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MissedEmitWindow:
|
||||
"""One run of consecutive missed-emit intervals starting at a sample index."""
|
||||
|
||||
start_index: int # index into the SORTED timestamp list (0-based)
|
||||
length: int
|
||||
start_ms: float
|
||||
end_ms: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MissedEmitReport:
|
||||
"""AC-2 result: list of consecutive-missed-emit windows + verdict."""
|
||||
|
||||
missed_emit_threshold_ms: float
|
||||
longest_run: int
|
||||
windows: tuple[MissedEmitWindow, ...]
|
||||
limit: int
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.longest_run < self.limit
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StreamingReport:
|
||||
"""Aggregate FT-PERF-02 result for one parameterized run."""
|
||||
|
||||
inter_emit: InterEmitReport
|
||||
missed_emits: MissedEmitReport
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.inter_emit.passes_p95 and self.missed_emits.passes
|
||||
|
||||
|
||||
def _sorted_intervals_ms(emit_times_ms: Sequence[float]) -> list[float]:
|
||||
"""Return positive inter-emit intervals from a sorted timestamp list.
|
||||
|
||||
Sorting is defensive — sitl_observer emits in monotonic order but the
|
||||
helper must not silently produce negative intervals if a caller hands
|
||||
in an unsorted list.
|
||||
"""
|
||||
if len(emit_times_ms) < 2:
|
||||
return []
|
||||
ordered = sorted(float(t) for t in emit_times_ms)
|
||||
return [ordered[i] - ordered[i - 1] for i in range(1, len(ordered))]
|
||||
|
||||
|
||||
def _percentile(values: Sequence[float], q: float) -> float | None:
|
||||
"""Linear-interpolation percentile (``numpy.percentile``-equivalent).
|
||||
|
||||
Returns ``None`` when ``values`` is empty so callers can distinguish
|
||||
a no-data run from a zero-latency run. Accepts any real ``q`` in
|
||||
[0, 100]; outside that range is a programmer error.
|
||||
"""
|
||||
if not 0.0 <= q <= 100.0:
|
||||
raise ValueError(f"percentile q must be in [0, 100], got {q!r}")
|
||||
if not values:
|
||||
return None
|
||||
ordered = sorted(values)
|
||||
if len(ordered) == 1:
|
||||
return ordered[0]
|
||||
rank = (q / 100.0) * (len(ordered) - 1)
|
||||
lo = floor(rank)
|
||||
hi = min(lo + 1, len(ordered) - 1)
|
||||
frac = rank - lo
|
||||
return ordered[lo] + (ordered[hi] - ordered[lo]) * frac
|
||||
|
||||
|
||||
def evaluate_inter_emit(
|
||||
emit_times_ms: Sequence[float],
|
||||
*,
|
||||
target_inter_frame_ms: float = TARGET_INTER_FRAME_MS,
|
||||
budget_ms: float = STREAMING_P95_BUDGET_MS,
|
||||
) -> InterEmitReport:
|
||||
"""AC-1: p95 inter-emit interval ≤ ``budget_ms``.
|
||||
|
||||
Caller passes the SITL-side receipt timestamps (ms, any epoch — only
|
||||
deltas matter). ``target_inter_frame_ms`` is recorded for the
|
||||
evidence file but does not gate the verdict; ``budget_ms`` does.
|
||||
"""
|
||||
intervals = _sorted_intervals_ms(emit_times_ms)
|
||||
return InterEmitReport(
|
||||
sample_count=len(emit_times_ms),
|
||||
interval_count=len(intervals),
|
||||
p50_ms=_percentile(intervals, 50.0),
|
||||
p95_ms=_percentile(intervals, 95.0),
|
||||
p99_ms=_percentile(intervals, 99.0),
|
||||
max_ms=max(intervals) if intervals else None,
|
||||
target_inter_frame_ms=target_inter_frame_ms,
|
||||
budget_ms=budget_ms,
|
||||
)
|
||||
|
||||
|
||||
def evaluate_missed_emits(
|
||||
emit_times_ms: Sequence[float],
|
||||
*,
|
||||
target_inter_frame_ms: float = TARGET_INTER_FRAME_MS,
|
||||
missed_ratio: float = MISSED_EMIT_RATIO,
|
||||
limit: int = MISSED_EMIT_WINDOW_LIMIT,
|
||||
) -> MissedEmitReport:
|
||||
"""AC-2: longest run of consecutive missed-emit intervals < ``limit``.
|
||||
|
||||
A "missed emit" is an inter-emit interval that exceeds
|
||||
``missed_ratio × target_inter_frame_ms``. We collect every maximal
|
||||
run of consecutive missed-emit intervals and the longest length.
|
||||
"""
|
||||
if missed_ratio <= 1.0:
|
||||
raise ValueError(
|
||||
f"missed_ratio must be > 1.0 (was {missed_ratio!r}) — equal or "
|
||||
"below the target stride would flag every interval as missed"
|
||||
)
|
||||
if limit < 1:
|
||||
raise ValueError(f"limit must be >= 1 (was {limit!r})")
|
||||
threshold = missed_ratio * target_inter_frame_ms
|
||||
ordered = sorted(float(t) for t in emit_times_ms)
|
||||
windows: list[MissedEmitWindow] = []
|
||||
# `run_start` is the sample index of the FIRST sample of an
|
||||
# in-progress missed-interval run. Number of missed intervals in
|
||||
# the open run after processing iteration `i` is `i - run_start`.
|
||||
run_start: int | None = None
|
||||
run_start_ms: float | None = None
|
||||
longest = 0
|
||||
for i in range(1, len(ordered)):
|
||||
delta = ordered[i] - ordered[i - 1]
|
||||
if delta > threshold:
|
||||
if run_start is None:
|
||||
run_start = i - 1
|
||||
run_start_ms = ordered[i - 1]
|
||||
longest = max(longest, i - run_start)
|
||||
elif run_start is not None and run_start_ms is not None:
|
||||
length = (i - 1) - run_start
|
||||
windows.append(
|
||||
MissedEmitWindow(
|
||||
start_index=run_start,
|
||||
length=length,
|
||||
start_ms=run_start_ms,
|
||||
end_ms=ordered[i - 1],
|
||||
)
|
||||
)
|
||||
run_start = None
|
||||
run_start_ms = None
|
||||
if run_start is not None and run_start_ms is not None:
|
||||
length = (len(ordered) - 1) - run_start
|
||||
windows.append(
|
||||
MissedEmitWindow(
|
||||
start_index=run_start,
|
||||
length=length,
|
||||
start_ms=run_start_ms,
|
||||
end_ms=ordered[-1],
|
||||
)
|
||||
)
|
||||
longest = max(longest, length)
|
||||
return MissedEmitReport(
|
||||
missed_emit_threshold_ms=threshold,
|
||||
longest_run=longest,
|
||||
windows=tuple(windows),
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
def evaluate(
|
||||
emit_times_ms: Sequence[float],
|
||||
*,
|
||||
target_inter_frame_ms: float = TARGET_INTER_FRAME_MS,
|
||||
budget_ms: float = STREAMING_P95_BUDGET_MS,
|
||||
missed_ratio: float = MISSED_EMIT_RATIO,
|
||||
limit: int = MISSED_EMIT_WINDOW_LIMIT,
|
||||
) -> StreamingReport:
|
||||
"""Run AC-1 + AC-2 over one boundary-observed emit-time list."""
|
||||
return StreamingReport(
|
||||
inter_emit=evaluate_inter_emit(
|
||||
emit_times_ms,
|
||||
target_inter_frame_ms=target_inter_frame_ms,
|
||||
budget_ms=budget_ms,
|
||||
),
|
||||
missed_emits=evaluate_missed_emits(
|
||||
emit_times_ms,
|
||||
target_inter_frame_ms=target_inter_frame_ms,
|
||||
missed_ratio=missed_ratio,
|
||||
limit=limit,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def write_csv_evidence(out_path: Path, report: StreamingReport) -> Path:
|
||||
"""One-row evidence file naming the AC-1/AC-2 verdict + percentiles."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
r = report
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"sample_count",
|
||||
"interval_count",
|
||||
"p50_ms",
|
||||
"p95_ms",
|
||||
"p99_ms",
|
||||
"max_ms",
|
||||
"target_inter_frame_ms",
|
||||
"p95_budget_ms",
|
||||
"ac1_passes",
|
||||
"missed_emit_threshold_ms",
|
||||
"longest_missed_run",
|
||||
"ac2_passes",
|
||||
"passes",
|
||||
]
|
||||
)
|
||||
ie = r.inter_emit
|
||||
me = r.missed_emits
|
||||
writer.writerow(
|
||||
[
|
||||
ie.sample_count,
|
||||
ie.interval_count,
|
||||
"" if ie.p50_ms is None else f"{ie.p50_ms:.3f}",
|
||||
"" if ie.p95_ms is None else f"{ie.p95_ms:.3f}",
|
||||
"" if ie.p99_ms is None else f"{ie.p99_ms:.3f}",
|
||||
"" if ie.max_ms is None else f"{ie.max_ms:.3f}",
|
||||
f"{ie.target_inter_frame_ms:.3f}",
|
||||
f"{ie.budget_ms:.3f}",
|
||||
"true" if ie.passes_p95 else "false",
|
||||
f"{me.missed_emit_threshold_ms:.3f}",
|
||||
me.longest_run,
|
||||
"true" if me.passes else "false",
|
||||
"true" if r.passes else "false",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
def write_intervals_csv(out_path: Path, emit_times_ms: Iterable[float]) -> Path:
|
||||
"""Per-interval CSV for evidence (one row per consecutive pair).
|
||||
|
||||
The aggregate ``write_csv_evidence`` row is the AC verdict; this
|
||||
detail CSV is what a reviewer reads when the budget is breached.
|
||||
"""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ordered = sorted(float(t) for t in emit_times_ms)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(["index", "t_emit_ms", "inter_emit_ms"])
|
||||
for i, t in enumerate(ordered):
|
||||
interval = (t - ordered[i - 1]) if i > 0 else ""
|
||||
writer.writerow(
|
||||
[
|
||||
i,
|
||||
f"{t:.3f}",
|
||||
"" if interval == "" else f"{interval:.3f}",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
@@ -0,0 +1,217 @@
|
||||
"""Cold-start TTFF evaluator for NFT-PERF-03 (AZ-430 / AC-NEW-1).
|
||||
|
||||
The SUT promises a Time-To-First-Fix budget of 30 s p95 (and a relaxed
|
||||
max ceiling of 45 s for tail-latency outlier detection) when started
|
||||
from cold on Tier-2 (Jetson Orin Nano Super) hardware. AZ-430 collects
|
||||
N≥``MIN_ITERATION_COUNT`` cold-start TTFF samples; this module owns the
|
||||
pure-logic side: distribution stats + budget gates + evidence CSV.
|
||||
|
||||
Per AZ-430:
|
||||
|
||||
* AC-3: ``p95(TTFF) ≤ TTFF_P95_BUDGET_S`` (=30 s).
|
||||
* AC-4: ``max(TTFF) ≤ TTFF_MAX_BUDGET_S`` (=45 s).
|
||||
|
||||
Public-boundary discipline: does NOT import any
|
||||
``src/gps_denied_onboard`` symbol. Re-uses
|
||||
``streaming_evaluator._percentile`` for the linear-interpolation p95.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
from .streaming_evaluator import _percentile
|
||||
|
||||
TTFF_P95_BUDGET_S = 30.0
|
||||
TTFF_MAX_BUDGET_S = 45.0
|
||||
MIN_ITERATION_COUNT = 10
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ColdStartIteration:
|
||||
"""One cold-start iteration outcome.
|
||||
|
||||
``ttff_s`` is the measured ``t_first_emission − t_first_frame_arrival``
|
||||
in seconds. ``None`` means the iteration timed out before producing
|
||||
its first emission — categorical miss (treated as budget breach for
|
||||
the aggregate verdict).
|
||||
"""
|
||||
|
||||
iteration_id: str
|
||||
first_frame_arrival_ms: int
|
||||
first_emission_ms: int | None
|
||||
ttff_s: float | None
|
||||
|
||||
@property
|
||||
def emitted(self) -> bool:
|
||||
return self.first_emission_ms is not None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TtffReport:
|
||||
"""Aggregate NFT-PERF-03 result over N iterations."""
|
||||
|
||||
iterations: tuple[ColdStartIteration, ...]
|
||||
p50_s: float | None
|
||||
p95_s: float | None
|
||||
p99_s: float | None
|
||||
max_s: float | None
|
||||
missed_starts: int # iterations where ``ttff_s is None``
|
||||
min_iteration_count: int
|
||||
p95_budget_s: float
|
||||
max_budget_s: float
|
||||
|
||||
@property
|
||||
def iteration_count(self) -> int:
|
||||
return len(self.iterations)
|
||||
|
||||
@property
|
||||
def passes_iteration_count(self) -> bool:
|
||||
return self.iteration_count >= self.min_iteration_count
|
||||
|
||||
@property
|
||||
def passes_p95(self) -> bool:
|
||||
return (
|
||||
self.missed_starts == 0
|
||||
and self.p95_s is not None
|
||||
and self.p95_s <= self.p95_budget_s
|
||||
)
|
||||
|
||||
@property
|
||||
def passes_max(self) -> bool:
|
||||
return (
|
||||
self.missed_starts == 0
|
||||
and self.max_s is not None
|
||||
and self.max_s <= self.max_budget_s
|
||||
)
|
||||
|
||||
@property
|
||||
def passes(self) -> bool:
|
||||
return self.passes_iteration_count and self.passes_p95 and self.passes_max
|
||||
|
||||
|
||||
def measure_iteration(
|
||||
iteration_id: str,
|
||||
*,
|
||||
first_frame_arrival_ms: int,
|
||||
first_emission_ms: int | None,
|
||||
) -> ColdStartIteration:
|
||||
"""Project a captured iteration into a typed sample.
|
||||
|
||||
Negative TTFF (emission before first frame) is a fixture-shape error
|
||||
and raises ``ValueError`` so the breach surfaces immediately instead
|
||||
of producing a non-sensible report.
|
||||
"""
|
||||
if first_emission_ms is None:
|
||||
return ColdStartIteration(
|
||||
iteration_id=iteration_id,
|
||||
first_frame_arrival_ms=int(first_frame_arrival_ms),
|
||||
first_emission_ms=None,
|
||||
ttff_s=None,
|
||||
)
|
||||
delta_ms = int(first_emission_ms) - int(first_frame_arrival_ms)
|
||||
if delta_ms < 0:
|
||||
raise ValueError(
|
||||
f"ttff iteration {iteration_id}: first_emission_ms "
|
||||
f"({first_emission_ms}) precedes first_frame_arrival_ms "
|
||||
f"({first_frame_arrival_ms}); fixture shape invalid"
|
||||
)
|
||||
return ColdStartIteration(
|
||||
iteration_id=iteration_id,
|
||||
first_frame_arrival_ms=int(first_frame_arrival_ms),
|
||||
first_emission_ms=int(first_emission_ms),
|
||||
ttff_s=delta_ms / 1000.0,
|
||||
)
|
||||
|
||||
|
||||
def evaluate(
|
||||
iterations: Sequence[ColdStartIteration],
|
||||
*,
|
||||
p95_budget_s: float = TTFF_P95_BUDGET_S,
|
||||
max_budget_s: float = TTFF_MAX_BUDGET_S,
|
||||
min_iteration_count: int = MIN_ITERATION_COUNT,
|
||||
) -> TtffReport:
|
||||
"""Aggregate iterations into AC-3 + AC-4 verdicts."""
|
||||
valid = [it.ttff_s for it in iterations if it.ttff_s is not None]
|
||||
missed = sum(1 for it in iterations if not it.emitted)
|
||||
return TtffReport(
|
||||
iterations=tuple(iterations),
|
||||
p50_s=_percentile(valid, 50.0),
|
||||
p95_s=_percentile(valid, 95.0),
|
||||
p99_s=_percentile(valid, 99.0),
|
||||
max_s=max(valid) if valid else None,
|
||||
missed_starts=missed,
|
||||
min_iteration_count=min_iteration_count,
|
||||
p95_budget_s=p95_budget_s,
|
||||
max_budget_s=max_budget_s,
|
||||
)
|
||||
|
||||
|
||||
def write_csv_evidence(out_path: Path, report: TtffReport) -> Path:
|
||||
"""Aggregate-summary CSV (one row per run)."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"iteration_count",
|
||||
"min_iteration_count",
|
||||
"missed_starts",
|
||||
"p50_s",
|
||||
"p95_s",
|
||||
"p99_s",
|
||||
"max_s",
|
||||
"p95_budget_s",
|
||||
"max_budget_s",
|
||||
"ac1_iteration_count_passes",
|
||||
"ac3_p95_passes",
|
||||
"ac4_max_passes",
|
||||
"passes",
|
||||
]
|
||||
)
|
||||
writer.writerow(
|
||||
[
|
||||
report.iteration_count,
|
||||
report.min_iteration_count,
|
||||
report.missed_starts,
|
||||
"" if report.p50_s is None else f"{report.p50_s:.3f}",
|
||||
"" if report.p95_s is None else f"{report.p95_s:.3f}",
|
||||
"" if report.p99_s is None else f"{report.p99_s:.3f}",
|
||||
"" if report.max_s is None else f"{report.max_s:.3f}",
|
||||
f"{report.p95_budget_s:.3f}",
|
||||
f"{report.max_budget_s:.3f}",
|
||||
"true" if report.passes_iteration_count else "false",
|
||||
"true" if report.passes_p95 else "false",
|
||||
"true" if report.passes_max else "false",
|
||||
"true" if report.passes else "false",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
|
||||
|
||||
def write_per_iteration_csv(out_path: Path, report: TtffReport) -> Path:
|
||||
"""One row per iteration — detail used during AC-4 outlier investigation."""
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with out_path.open("w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(
|
||||
[
|
||||
"iteration_id",
|
||||
"first_frame_arrival_ms",
|
||||
"first_emission_ms",
|
||||
"ttff_s",
|
||||
]
|
||||
)
|
||||
for it in report.iterations:
|
||||
writer.writerow(
|
||||
[
|
||||
it.iteration_id,
|
||||
it.first_frame_arrival_ms,
|
||||
"" if it.first_emission_ms is None else it.first_emission_ms,
|
||||
"" if it.ttff_s is None else f"{it.ttff_s:.3f}",
|
||||
]
|
||||
)
|
||||
return out_path
|
||||
Reference in New Issue
Block a user