mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 17:21:13 +00:00
[AZ-446] CSV reporter: band + ci95 annotations + report.csv emitter
Batch 89 — adds optional `band`, `ci95_low`, `ci95_high` kw-only parameters to `_NfrRecorder.record_metric` and emits a new per-metric report.csv artifact (one row per scenario × metric, columns: scenario_id, metric_name, value, value_band, ci95_low, ci95_high, ac_id, outcome). Backwards compatible — existing 4-arg callers unchanged; unbalanced ci95 pair raises ValueError. report.csv is written once per pytest session from `pytest_sessionfinish` so the annotation pass runs once per CI invocation regardless of (fc_adapter, vio_strategy) (AC-3). `regression-baseline.json` intentionally kept flat to preserve the diff contract used by regression-detection tooling. NFT-RES-03 + NFT-PERF-01 scenarios updated to pass real bands and compute empirical 2.5/97.5-percentile ci95 from their own sample streams (per-iteration envelope ratios for Monte Carlo, per-frame latency samples for N-sample latency). Tests: 1229 e2e/_unit_tests pass (+6 vs. batch 88 for AZ-446 band/CI behavior, value-error on unbalanced ci95, report.csv columns, explicit-path override, and end-to-end emission via the pytest plugin). Code review: PASS_WITH_WARNINGS — 1 Low (empirical-CI semantics, documented inline), 1 Medium carried over from batch 88's cumulative-review backlog (write_csv_evidence + _resolve_fixture_path duplication is outside AZ-446 reporting scope). This commit closes Step 10 Implement Tests for cycle 1 (41 of 41 blackbox-test tasks done, AZ-406..AZ-446). Greenfield auto-chains to Step 11 Run Tests next. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -136,10 +136,19 @@ def test_nft_perf_01_e2e_latency(
|
||||
f"nft_perf_01.{r.config_id}.frame_drop_ratio",
|
||||
float(r.frame_drop_ratio),
|
||||
ac_id="AC-4",
|
||||
band=f"≤{r.frame_drop_budget:.2f}",
|
||||
)
|
||||
# AZ-446 AC-2 — CI95 columns derive from the empirical 2.5 / 97.5
|
||||
# percentile of the underlying per-frame latency samples (N≥900).
|
||||
latencies = [s.latency_ms for s in r.samples]
|
||||
ci_low, ci_high = _percentile_pair(latencies, 2.5, 97.5)
|
||||
if r.p50_ms is not None:
|
||||
nfr_recorder.record_metric(
|
||||
f"nft_perf_01.{r.config_id}.latency_ms_p50", float(r.p50_ms)
|
||||
f"nft_perf_01.{r.config_id}.latency_ms_p50",
|
||||
float(r.p50_ms),
|
||||
band="(median, no budget)",
|
||||
ci95_low=ci_low,
|
||||
ci95_high=ci_high,
|
||||
)
|
||||
if r.p95_ms is not None:
|
||||
ac_id = "AC-3" if r.config_id == "k2-hybrid-50c" else "AC-2"
|
||||
@@ -147,10 +156,17 @@ def test_nft_perf_01_e2e_latency(
|
||||
f"nft_perf_01.{r.config_id}.latency_ms_p95",
|
||||
float(r.p95_ms),
|
||||
ac_id=ac_id,
|
||||
band=f"≤{r.p95_budget_ms:.0f} ms",
|
||||
ci95_low=ci_low,
|
||||
ci95_high=ci_high,
|
||||
)
|
||||
if r.p99_ms is not None:
|
||||
nfr_recorder.record_metric(
|
||||
f"nft_perf_01.{r.config_id}.latency_ms_p99", float(r.p99_ms)
|
||||
f"nft_perf_01.{r.config_id}.latency_ms_p99",
|
||||
float(r.p99_ms),
|
||||
band="(p99, no budget)",
|
||||
ci95_low=ci_low,
|
||||
ci95_high=ci_high,
|
||||
)
|
||||
|
||||
breaches = []
|
||||
@@ -170,6 +186,31 @@ def test_nft_perf_01_e2e_latency(
|
||||
assert not breaches, "\n".join(breaches)
|
||||
|
||||
|
||||
def _percentile_pair(
|
||||
values: list[float], q_low: float, q_high: float
|
||||
) -> tuple[float | None, float | None]:
|
||||
"""Linear-interpolation percentile pair (AZ-446 CI95 helper).
|
||||
|
||||
Returns ``(None, None)`` for empty input. Used to project the
|
||||
empirical 95% interval (2.5th / 97.5th percentile) of the
|
||||
underlying latency samples onto the recorded percentile metrics.
|
||||
"""
|
||||
if not values:
|
||||
return None, None
|
||||
ordered = sorted(values)
|
||||
if len(ordered) == 1:
|
||||
return float(ordered[0]), float(ordered[0])
|
||||
|
||||
def _at(q: float) -> float:
|
||||
rank = (q / 100.0) * (len(ordered) - 1)
|
||||
lo = int(rank)
|
||||
hi = min(lo + 1, len(ordered) - 1)
|
||||
frac = rank - lo
|
||||
return float(ordered[lo] + (ordered[hi] - ordered[lo]) * frac)
|
||||
|
||||
return _at(q_low), _at(q_high)
|
||||
|
||||
|
||||
def _resolve_latency_fixture_path() -> Path:
|
||||
from runner.helpers import sitl_observer
|
||||
|
||||
|
||||
@@ -115,14 +115,26 @@ def test_nft_res_03_monte_carlo(
|
||||
)
|
||||
|
||||
nfr_recorder.record_metric(
|
||||
"nft_res_03.iteration_count", float(report1.iteration_count), ac_id="AC-1"
|
||||
"nft_res_03.iteration_count",
|
||||
float(report1.iteration_count),
|
||||
ac_id="AC-1",
|
||||
band=f"≥{report1.min_iteration_count} iterations",
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"nft_res_03.total_samples", float(report1.total_samples)
|
||||
)
|
||||
if report1.envelope_ratio is not None:
|
||||
# AZ-446 AC-2 — per-iteration envelope ratios provide the empirical
|
||||
# 95% interval (2.5th / 97.5th percentile across 100 iterations).
|
||||
per_iter_ratios = _per_iteration_envelope_ratios(report1)
|
||||
ci_low, ci_high = _percentile_pair(per_iter_ratios, 2.5, 97.5)
|
||||
nfr_recorder.record_metric(
|
||||
"nft_res_03.envelope_ratio", float(report1.envelope_ratio), ac_id="AC-3"
|
||||
"nft_res_03.envelope_ratio",
|
||||
float(report1.envelope_ratio),
|
||||
ac_id="AC-3",
|
||||
band=f"≥{report1.envelope_ratio_budget:.2f}",
|
||||
ci95_low=ci_low,
|
||||
ci95_high=ci_high,
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"nft_res_03.master_seed", float(report1.master_seed)
|
||||
@@ -147,6 +159,41 @@ def _full_matrix_enabled() -> bool:
|
||||
return os.environ.get(NFT_RES_03_FULL_MATRIX_ENV_VAR, "").strip() in {"1", "true", "yes"}
|
||||
|
||||
|
||||
def _per_iteration_envelope_ratios(report: mce.MonteCarloReport) -> list[float]:
|
||||
"""Per-iteration ``covered/frames`` ratios (AZ-446 CI95 input)."""
|
||||
ratios: list[float] = []
|
||||
for it in report.iterations:
|
||||
if not it.samples:
|
||||
continue
|
||||
covered = sum(
|
||||
1
|
||||
for s in it.samples
|
||||
if s.error_m <= mce.ENVELOPE_MULTIPLIER * s.cov_semi_major_m
|
||||
)
|
||||
ratios.append(covered / len(it.samples))
|
||||
return ratios
|
||||
|
||||
|
||||
def _percentile_pair(
|
||||
values: list[float], q_low: float, q_high: float
|
||||
) -> tuple[float | None, float | None]:
|
||||
"""Linear-interpolation percentiles. Returns ``(None, None)`` if empty."""
|
||||
if not values:
|
||||
return None, None
|
||||
ordered = sorted(values)
|
||||
if len(ordered) == 1:
|
||||
return float(ordered[0]), float(ordered[0])
|
||||
|
||||
def _at(q: float) -> float:
|
||||
rank = (q / 100.0) * (len(ordered) - 1)
|
||||
lo = int(rank)
|
||||
hi = min(lo + 1, len(ordered) - 1)
|
||||
frac = rank - lo
|
||||
return float(ordered[lo] + (ordered[hi] - ordered[lo]) * frac)
|
||||
|
||||
return _at(q_low), _at(q_high)
|
||||
|
||||
|
||||
def _resolve_fixture_path() -> Path:
|
||||
raw = os.environ.get(NFT_RES_03_FIXTURE_ENV_VAR, "").strip()
|
||||
from runner.helpers import sitl_observer
|
||||
|
||||
Reference in New Issue
Block a user