mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 14:41:15 +00:00
[AZ-446] CSV reporter: band + ci95 annotations + report.csv emitter
Batch 89 — adds optional `band`, `ci95_low`, `ci95_high` kw-only parameters to `_NfrRecorder.record_metric` and emits a new per-metric report.csv artifact (one row per scenario × metric, columns: scenario_id, metric_name, value, value_band, ci95_low, ci95_high, ac_id, outcome). Backwards compatible — existing 4-arg callers unchanged; unbalanced ci95 pair raises ValueError. report.csv is written once per pytest session from `pytest_sessionfinish` so the annotation pass runs once per CI invocation regardless of (fc_adapter, vio_strategy) (AC-3). `regression-baseline.json` intentionally kept flat to preserve the diff contract used by regression-detection tooling. NFT-RES-03 + NFT-PERF-01 scenarios updated to pass real bands and compute empirical 2.5/97.5-percentile ci95 from their own sample streams (per-iteration envelope ratios for Monte Carlo, per-frame latency samples for N-sample latency). Tests: 1229 e2e/_unit_tests pass (+6 vs. batch 88 for AZ-446 band/CI behavior, value-error on unbalanced ci95, report.csv columns, explicit-path override, and end-to-end emission via the pytest plugin). Code review: PASS_WITH_WARNINGS — 1 Low (empirical-CI semantics, documented inline), 1 Medium carried over from batch 88's cumulative-review backlog (write_csv_evidence + _resolve_fixture_path duplication is outside AZ-446 reporting scope). This commit closes Step 10 Implement Tests for cycle 1 (41 of 41 blackbox-test tasks done, AZ-406..AZ-446). Greenfield auto-chains to Step 11 Run Tests next. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -303,3 +303,234 @@ def test_nfr_recorder_fixture_emits_artifacts_in_run(tmp_path: Path) -> None:
|
||||
assert status["AC-4.2"]["status"] == "NOT COVERED"
|
||||
baseline = json.loads((evidence_out / "regression-baseline.json").read_text())
|
||||
assert baseline["scenarios"]["NFT-PERF-01"]["metrics"] == {"latency_p95_ms": 380.4}
|
||||
|
||||
|
||||
# ───────────────────── AZ-446 — band + CI95 annotations ─────────────────────
|
||||
|
||||
|
||||
def test_record_metric_band_kwarg_stored_in_internal_record(tmp_path: Path) -> None:
|
||||
"""AZ-446 AC-1 — band annotation persists into the metric entry."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-4.1"])
|
||||
agg.ensure_record("NFT-PERF-01", "test_a", ("AC-4.1",))
|
||||
|
||||
# Act
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p95_ms",
|
||||
value=380.4,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
band="≤400 ms",
|
||||
)
|
||||
|
||||
# Assert
|
||||
[rec] = agg.records()
|
||||
assert rec.metrics["latency_p95_ms"] == {
|
||||
"value": 380.4,
|
||||
"ac_id": "AC-4.1",
|
||||
"band": "≤400 ms",
|
||||
}
|
||||
|
||||
|
||||
def test_record_metric_ci95_pair_stored_in_internal_record(tmp_path: Path) -> None:
|
||||
"""AZ-446 AC-2 — ci95_low / ci95_high persist into the metric entry."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-3"])
|
||||
agg.ensure_record("NFT-RES-03", "test_a", ("AC-3",))
|
||||
|
||||
# Act
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-RES-03",
|
||||
name="envelope_ratio",
|
||||
value=0.957,
|
||||
ac_id="AC-3",
|
||||
nodeid="test_a",
|
||||
band="≥0.95",
|
||||
ci95_low=0.92,
|
||||
ci95_high=0.99,
|
||||
)
|
||||
|
||||
# Assert
|
||||
[rec] = agg.records()
|
||||
assert rec.metrics["envelope_ratio"] == {
|
||||
"value": 0.957,
|
||||
"ac_id": "AC-3",
|
||||
"band": "≥0.95",
|
||||
"ci95_low": 0.92,
|
||||
"ci95_high": 0.99,
|
||||
}
|
||||
|
||||
|
||||
def test_record_metric_ci95_unbalanced_rejected_via_fixture_wrapper() -> None:
|
||||
"""AZ-446 — passing only one of ci95_low / ci95_high is a hard error."""
|
||||
|
||||
# Arrange
|
||||
from runner.reporting.nfr_recorder import _NfrRecorder, _RunAggregator
|
||||
|
||||
agg = _RunAggregator(Path("."), [])
|
||||
agg.ensure_record("S", "n", ())
|
||||
rec = _NfrRecorder(scenario_id="S", nodeid="n", traces_to=(), run=agg)
|
||||
|
||||
# Act + Assert
|
||||
with pytest.raises(ValueError, match="ci95_low and ci95_high"):
|
||||
rec.record_metric("m", 1.0, ci95_low=0.5)
|
||||
with pytest.raises(ValueError, match="ci95_low and ci95_high"):
|
||||
rec.record_metric("m", 1.0, ci95_high=0.5)
|
||||
|
||||
|
||||
def test_emit_per_metric_report_writes_csv_with_band_and_ci(tmp_path: Path) -> None:
|
||||
"""AZ-446 AC-1 + AC-2 — report.csv carries band + ci95 columns."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-4.1", "AC-3"])
|
||||
agg.ensure_record("NFT-PERF-01", "test_a", ("AC-4.1",))
|
||||
agg.ensure_record("NFT-RES-03", "test_b", ("AC-3",))
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p95_ms",
|
||||
value=380.4,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
band="≤400 ms",
|
||||
)
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p99_ms",
|
||||
value=420.0,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-RES-03",
|
||||
name="envelope_ratio",
|
||||
value=0.957,
|
||||
ac_id="AC-3",
|
||||
nodeid="test_b",
|
||||
band="≥0.95",
|
||||
ci95_low=0.92,
|
||||
ci95_high=0.99,
|
||||
)
|
||||
agg.set_outcome("test_a", "PASS")
|
||||
agg.set_outcome("test_b", "PASS")
|
||||
|
||||
# Act
|
||||
path = agg.emit_per_metric_report()
|
||||
|
||||
# Assert
|
||||
assert path == tmp_path / "report.csv"
|
||||
lines = path.read_text().splitlines()
|
||||
assert lines[0] == (
|
||||
"scenario_id,metric_name,value,value_band,ci95_low,ci95_high,ac_id,outcome"
|
||||
)
|
||||
rows = sorted(lines[1:])
|
||||
assert rows == sorted(
|
||||
[
|
||||
"NFT-PERF-01,latency_p95_ms,380.4,≤400 ms,,,AC-4.1,PASS",
|
||||
"NFT-PERF-01,latency_p99_ms,420,,,,AC-4.1,PASS",
|
||||
"NFT-RES-03,envelope_ratio,0.957,≥0.95,0.92,0.99,AC-3,PASS",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_emit_per_metric_report_accepts_explicit_path(tmp_path: Path) -> None:
|
||||
"""AZ-446 — explicit ``path=`` overrides the default ``<evidence>/report.csv``."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, [])
|
||||
agg.ensure_record("NFT-PERF-01", "n", ())
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="m",
|
||||
value=1.0,
|
||||
ac_id=None,
|
||||
nodeid="n",
|
||||
)
|
||||
agg.set_outcome("n", "PASS")
|
||||
|
||||
# Act
|
||||
target = tmp_path / "subdir" / "alt.csv"
|
||||
out = agg.emit_per_metric_report(target)
|
||||
|
||||
# Assert
|
||||
assert out == target
|
||||
assert target.is_file()
|
||||
assert "NFT-PERF-01,m,1," in target.read_text()
|
||||
|
||||
|
||||
def test_per_metric_report_emitted_in_pytest_run(tmp_path: Path) -> None:
|
||||
"""AZ-446 AC-3 — report.csv emitted exactly once per CI invocation."""
|
||||
|
||||
# Arrange
|
||||
matrix = tmp_path / "matrix.md"
|
||||
matrix.write_text(
|
||||
"## Acceptance Criteria Coverage\n\n"
|
||||
"| AC ID | Desc | Source | Status |\n"
|
||||
"|-------|------|--------|--------|\n"
|
||||
"| AC-4.1 | foo | NFT-PERF-01 | Covered |\n"
|
||||
)
|
||||
evidence_out = tmp_path / "evidence"
|
||||
evidence_out.mkdir()
|
||||
|
||||
# Unique basename — otherwise pytest's import cache collides with the
|
||||
# `test_inner.py` already created by
|
||||
# ``test_nfr_recorder_fixture_emits_artifacts_in_run``.
|
||||
inner = tmp_path / "test_inner_az446.py"
|
||||
inner.write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
import pytest
|
||||
|
||||
@pytest.mark.scenario_id("NFT-PERF-01")
|
||||
@pytest.mark.traces_to(("AC-4.1",))
|
||||
def test_inner_perf(nfr_recorder):
|
||||
nfr_recorder.record_metric(
|
||||
"latency_p95_ms",
|
||||
380.4,
|
||||
ac_id="AC-4.1",
|
||||
band="≤400 ms",
|
||||
ci95_low=350.0,
|
||||
ci95_high=395.0,
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
(tmp_path / "conftest.py").write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--evidence-out",
|
||||
action="store",
|
||||
default=".",
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Act
|
||||
rc = pytest.main(
|
||||
[
|
||||
"-p",
|
||||
"runner.reporting.csv_reporter",
|
||||
"-p",
|
||||
"runner.reporting.nfr_recorder",
|
||||
str(inner),
|
||||
f"--evidence-out={evidence_out}",
|
||||
f"--traceability-matrix={matrix}",
|
||||
"--no-header",
|
||||
"-q",
|
||||
]
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert rc == 0, f"inner pytest run failed with rc={rc}"
|
||||
report = evidence_out / "report.csv"
|
||||
assert report.is_file()
|
||||
lines = report.read_text().splitlines()
|
||||
assert lines[0] == (
|
||||
"scenario_id,metric_name,value,value_band,ci95_low,ci95_high,ac_id,outcome"
|
||||
)
|
||||
assert "NFT-PERF-01,latency_p95_ms,380.4,≤400 ms,350,395,AC-4.1,PASS" in lines
|
||||
|
||||
Reference in New Issue
Block a user