"""NFR metrics recorder + run-end aggregator (AZ-445). Extends the AZ-406 reporting subsystem with three additional artifacts: * ``per-nfr/.json`` — canonical metric blob per NFT scenario. * ``traceability-status.json`` — per-AC coverage roll-up across the run. * ``regression-baseline.json`` — flat dump of every numeric metric the run captured (diffable across runs). Public API (used by NFT scenario tests): def test_nft_perf_01_partition_latency_p95(nfr_recorder): nfr_recorder.record_metric("latency_ms_p95", 380.4, ac_id="AC-4.1") nfr_recorder.partial( "AC-4.1", "p95 exceeds 400 ms when chamber is enabled (deferred to NFT-PERF-01b)", ) The recorder also exposes ``recorder.scenario_id`` for tests that need to name their evidence files consistently with the per-NFR JSON. PARTIAL propagation: ``recorder.partial(ac_id, reason)`` marks the current test row as PARTIAL in the CSV reporter and the corresponding AC as PARTIAL in the traceability roll-up. Tests that PASS without calling ``partial`` are recorded as Covered. """ from __future__ import annotations import csv import json import logging import re from dataclasses import dataclass, field from pathlib import Path from typing import Any import pytest from .csv_reporter import reporter_for def _stringify(value: Any) -> str: """CSV cell projection — ``None`` → empty cell; floats keep precision.""" if value is None: return "" if isinstance(value, float): return f"{value:.6g}" return str(value) logger = logging.getLogger(__name__) # ───────────────────────── data model ───────────────────────── @dataclass class _ScenarioRecord: scenario_id: str nodeid: str traces_to: tuple[str, ...] metrics: dict[str, Any] = field(default_factory=dict) partial_acs: dict[str, str] = field(default_factory=dict) # ac_id → reason outcome: str | None = None # filled in at logreport time # ───────────────────── traceability matrix parser ───────────────────── _AC_ROW_RE = re.compile(r"^\|\s*(AC-[A-Za-z0-9\.-]+)\s*\|", re.MULTILINE) _RESTRICT_ROW_RE = re.compile( r"^\|\s*(RESTRICT-[A-Za-z0-9\.-]+)\s*\|", re.MULTILINE ) def parse_traceability_matrix(matrix_path: Path) -> list[str]: """Extract every AC / RESTRICT ID declared in the matrix file. Returns a sorted, deduplicated list. Public so unit tests can call it independently of pytest. """ if not matrix_path.is_file(): raise FileNotFoundError(f"traceability matrix not found at {matrix_path}") text = matrix_path.read_text() ids: set[str] = set() for match in _AC_ROW_RE.finditer(text): ids.add(match.group(1)) for match in _RESTRICT_ROW_RE.finditer(text): ids.add(match.group(1)) return sorted(ids) # ───────────────────── recorder fixture ───────────────────── class _NfrRecorder: """Per-test handle exposed via the ``nfr_recorder`` pytest fixture.""" def __init__( self, scenario_id: str, nodeid: str, traces_to: tuple[str, ...], run: "_RunAggregator", ) -> None: self.scenario_id = scenario_id self.nodeid = nodeid self.traces_to = traces_to self._run = run def record_metric( self, name: str, value: Any, ac_id: str | None = None, *, band: str | None = None, ci95_low: float | None = None, ci95_high: float | None = None, ) -> None: """Capture a numeric / structured metric for this scenario. Optional kwargs (AZ-446): * ``band`` — short human-readable AC threshold text (e.g. ``"≤400 ms"``). Surfaces as ``_band`` in the per-metric report.csv and as ``"band"`` in regression-baseline.json. * ``ci95_low`` / ``ci95_high`` — 95% interval bounds for the metric, used by Monte Carlo (NFT-RES-03) and N-sample (NFT-PERF-01) scenarios. Both must be passed together or both omitted; passing only one raises ``ValueError``. """ if not isinstance(name, str) or not name: raise ValueError(f"metric name must be a non-empty str, got {name!r}") if (ci95_low is None) != (ci95_high is None): raise ValueError( f"ci95_low and ci95_high must be provided together " f"(got low={ci95_low!r}, high={ci95_high!r})" ) self._run.record_metric( scenario_id=self.scenario_id, name=name, value=value, ac_id=ac_id, nodeid=self.nodeid, band=band, ci95_low=ci95_low, ci95_high=ci95_high, ) def partial(self, ac_id: str, reason: str) -> None: """Mark `ac_id` PARTIAL for this scenario and propagate to CSV row.""" if not ac_id or not reason: raise ValueError("partial() requires both ac_id and reason") self._run.mark_partial( scenario_id=self.scenario_id, ac_id=ac_id, reason=reason, nodeid=self.nodeid, ) # ───────────────────── run aggregator ───────────────────── class _RunAggregator: """Plugin-scoped state for the whole pytest session.""" def __init__( self, evidence_dir: Path, matrix_ids: list[str], ) -> None: self.evidence_dir = evidence_dir self.matrix_ids = matrix_ids self._records: dict[str, _ScenarioRecord] = {} # --- mutation API used by _NfrRecorder --- def ensure_record( self, scenario_id: str, nodeid: str, traces_to: tuple[str, ...] ) -> _ScenarioRecord: rec = self._records.get(nodeid) if rec is None: rec = _ScenarioRecord( scenario_id=scenario_id, nodeid=nodeid, traces_to=traces_to, ) self._records[nodeid] = rec return rec def record_metric( self, *, scenario_id: str, name: str, value: Any, ac_id: str | None, nodeid: str, band: str | None = None, ci95_low: float | None = None, ci95_high: float | None = None, ) -> None: rec = self._records[nodeid] entry: dict[str, Any] = {"value": value, "ac_id": ac_id} if band is not None: entry["band"] = band if ci95_low is not None: entry["ci95_low"] = ci95_low if ci95_high is not None: entry["ci95_high"] = ci95_high rec.metrics[name] = entry def mark_partial( self, *, scenario_id: str, ac_id: str, reason: str, nodeid: str, ) -> None: rec = self._records[nodeid] rec.partial_acs[ac_id] = reason def set_outcome(self, nodeid: str, outcome: str) -> None: """Called by the plugin's logreport hook.""" rec = self._records.get(nodeid) if rec is not None: rec.outcome = outcome # --- read-only accessors used by tests + emission --- def records(self) -> list[_ScenarioRecord]: return list(self._records.values()) # --- emission (called at session end) --- def emit_per_nfr_json(self) -> list[Path]: """One file per scenario under ``/per-nfr/``.""" out_dir = self.evidence_dir / "per-nfr" out_dir.mkdir(parents=True, exist_ok=True) emitted: list[Path] = [] for rec in self._records.values(): path = out_dir / f"{rec.scenario_id}.json" blob = { "scenario_id": rec.scenario_id, "nodeid": rec.nodeid, "traces_to": list(rec.traces_to), "outcome": rec.outcome or "UNKNOWN", "metrics": rec.metrics, "partial_acs": rec.partial_acs, } path.write_text(json.dumps(blob, sort_keys=True, indent=2) + "\n") emitted.append(path) return emitted def compute_traceability_status(self) -> dict: """Aggregate per-AC status across all recorded scenarios. Algorithm: * NOT COVERED — no scenario traces to this AC. * PARTIAL — at least one scenario marks the AC PARTIAL OR has outcome ∈ {FAIL, SKIP}. * Covered — every tracing scenario has outcome ∈ {PASS, XFAIL} and none marked PARTIAL. """ by_ac: dict[str, dict] = { ac: {"status": "NOT COVERED", "sources": []} for ac in self.matrix_ids } for rec in self._records.values(): for ac in rec.traces_to: entry = by_ac.setdefault(ac, {"status": "NOT COVERED", "sources": []}) entry["sources"].append(rec.scenario_id) outcome = (rec.outcome or "").upper() if ac in rec.partial_acs: entry["status"] = "PARTIAL" elif outcome in {"FAIL", "SKIP"}: # Worse than partial — still surface as PARTIAL per # AZ-445 AC-2 (status enum is {Covered, PARTIAL, NOT COVERED}). if entry["status"] != "PARTIAL": entry["status"] = "PARTIAL" elif outcome in {"PASS", "XFAIL"}: # Promote NOT COVERED → Covered; keep PARTIAL pinned. if entry["status"] == "NOT COVERED": entry["status"] = "Covered" # Unknown / missing outcomes stay as whatever they were # — we don't downgrade a PARTIAL by an unknown. # Make output deterministic: sort sources within each AC entry. for entry in by_ac.values(): entry["sources"] = sorted(set(entry["sources"])) return by_ac def emit_traceability_status(self) -> Path: path = self.evidence_dir / "traceability-status.json" path.write_text( json.dumps(self.compute_traceability_status(), sort_keys=True, indent=2) + "\n" ) return path def emit_regression_baseline(self) -> Path: """Flat dump of every numeric metric for diff tooling. Kept intentionally flat (``{metric_name: numeric_value}``) so regression-detection scripts can diff two baselines via a simple dict-walk. The AZ-446 ``band`` / ``ci95_low`` / ``ci95_high`` annotations live in ``report.csv`` and the per-NFR JSON instead — they're documentation about the metric, not independently diffable measurements. """ path = self.evidence_dir / "regression-baseline.json" blob = { "scenarios": { rec.scenario_id: { "metrics": { name: entry["value"] for name, entry in rec.metrics.items() if isinstance(entry["value"], (int, float)) }, "outcome": rec.outcome or "UNKNOWN", } for rec in self._records.values() } } path.write_text(json.dumps(blob, sort_keys=True, indent=2) + "\n") return path def emit_per_metric_report(self, path: Path | None = None) -> Path: """AZ-446 — flat per-metric report (one row per scenario × metric). Default path: ``/report.csv``. Columns: scenario_id, metric_name, value, value_band, ci95_low, ci95_high, ac_id, outcome Non-numeric metric values are still emitted (cast to ``str``) so the file captures every captured signal; downstream tooling filters by ``value_band`` / ``ci95_low`` to decide what to treat as numeric. Rows are sorted by ``(scenario_id, metric_name)`` for deterministic diffing across runs. """ target = path if path is not None else self.evidence_dir / "report.csv" target.parent.mkdir(parents=True, exist_ok=True) rows: list[tuple[str, str, str, str, str, str, str, str]] = [] for rec in self._records.values(): for name, entry in rec.metrics.items(): rows.append( ( rec.scenario_id, name, _stringify(entry.get("value")), _stringify(entry.get("band")), _stringify(entry.get("ci95_low")), _stringify(entry.get("ci95_high")), _stringify(entry.get("ac_id")), rec.outcome or "UNKNOWN", ) ) rows.sort(key=lambda r: (r[0], r[1])) with target.open("w", newline="") as fh: writer = csv.writer(fh) writer.writerow( [ "scenario_id", "metric_name", "value", "value_band", "ci95_low", "ci95_high", "ac_id", "outcome", ] ) writer.writerows(rows) return target # ───────────────────── pytest plugin glue ───────────────────── _AGGREGATOR_KEY = pytest.StashKey["_RunAggregator | None"]() def pytest_addoption(parser: pytest.Parser) -> None: group = parser.getgroup("e2e-runner") group.addoption( "--traceability-matrix", action="store", default=None, help=( "Path to traceability-matrix.md (default: " "_docs/02_document/tests/traceability-matrix.md relative to repo root). " "Used to seed the NOT COVERED rows in traceability-status.json." ), ) def _resolve_matrix_path(config: pytest.Config) -> Path: opt = config.getoption("--traceability-matrix") if opt: return Path(opt) return Path(__file__).resolve().parents[3] / "_docs" / "02_document" / "tests" / "traceability-matrix.md" def pytest_configure(config: pytest.Config) -> None: """Parse the traceability matrix and create the aggregator. `--evidence-out` is owned by the runner's conftest.py; by the time this hook fires, that option is registered and available. The aggregator's emission directory is therefore known up front. """ config.stash[_AGGREGATOR_KEY] = None matrix_path = _resolve_matrix_path(config) try: matrix_ids = parse_traceability_matrix(matrix_path) except FileNotFoundError: logger.warning( "traceability matrix not found at %s — NOT COVERED rows will be empty", matrix_path, ) matrix_ids = [] try: evidence_out = config.getoption("--evidence-out") except ValueError: # `--evidence-out` is registered by the runner's conftest. In # unit-test contexts where that conftest isn't loaded, default # to the cwd so the aggregator still emits something — the unit # test redirects this via direct construction of `_RunAggregator`. evidence_out = "." aggregator = _RunAggregator(Path(evidence_out), matrix_ids) config.stash[_AGGREGATOR_KEY] = aggregator config.pluginmanager.register(_PluginHooks(aggregator), name="e2e-nfr-recorder") config.addinivalue_line( "markers", "scenario_id(name): explicit NFT scenario id for the per-NFR JSON" ) class _PluginHooks: """Tiny plugin instance that owns the logreport+sessionfinish hooks.""" def __init__(self, aggregator: _RunAggregator) -> None: self._agg = aggregator def pytest_runtest_logreport(self, report: pytest.TestReport) -> None: if report.when != "call": return outcome_map = { "passed": "PASS", "failed": "FAIL", "skipped": "SKIP", } self._agg.set_outcome(report.nodeid, outcome_map.get(report.outcome, "UNKNOWN")) def pytest_sessionfinish(self, session: pytest.Session, exitstatus: int) -> None: # noqa: ARG002 self._agg.emit_per_nfr_json() self._agg.emit_traceability_status() self._agg.emit_regression_baseline() self._agg.emit_per_metric_report() def _scenario_id_for(item: pytest.Item) -> str: marker = item.get_closest_marker("scenario_id") if marker and marker.args: return str(marker.args[0]) # Fall back to the test_id marker (compat with csv_reporter) or # finally the nodeid. test_id = item.get_closest_marker("test_id") if test_id and test_id.args: return str(test_id.args[0]) return item.nodeid def _traces_to_for(item: pytest.Item) -> tuple[str, ...]: marker = item.get_closest_marker("traces_to") if marker is None: return () ids = marker.args[0] if marker.args else marker.kwargs.get("ids", ()) if isinstance(ids, str): return tuple(s.strip() for s in ids.split(",") if s.strip()) return tuple(ids) @pytest.fixture def nfr_recorder(request: pytest.FixtureRequest) -> _NfrRecorder: """Fixture handle for NFT scenarios to record metrics + partials.""" aggregator = request.config.stash.get(_AGGREGATOR_KEY, None) if aggregator is None: pytest.skip( "nfr_recorder requires --evidence-out (the bundler's option) " "to be set; the harness configures it at runtime." ) scenario_id = _scenario_id_for(request.node) traces_to = _traces_to_for(request.node) rec = aggregator.ensure_record(scenario_id, request.node.nodeid, traces_to) return _NfrRecorder( scenario_id=rec.scenario_id, nodeid=rec.nodeid, traces_to=rec.traces_to, run=aggregator, ) # ───────────────────── public accessors for cross-plugin use ───────────────────── def aggregator_for(config: pytest.Config) -> _RunAggregator | None: """Used by csv_reporter to propagate PARTIAL into the row's result column.""" return config.stash.get(_AGGREGATOR_KEY, None)