mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 19:31:12 +00:00
6599d828d2
Three blackbox-harness tasks landed together — all depend only on
AZ-406 and unblock the FT-* / NFT-* scenario tasks scheduled for
batches 69+.
AZ-407 — Static fixture builders (3pt):
* tile-cache-builder/{builder.py, Dockerfile, build.sh} produces a
deterministic tile-cache-fixture Docker volume from
_docs/00_problem/input_data/. Reproducibility primitives: sorted
iteration, frozen PIL JPEG settings, FAISS HNSW32 built single-
threaded with seeded stub descriptors.
* age-injector/{age_injector.py, inject.sh} clones the volume and
shifts capture_date by N×30.44 days; tile JPEG bytes preserved
bit-identical. Emits synth-age-7mo + synth-age-13mo volumes.
* cold-boot/cold_boot_fixture.json: frozen FC pose snapshot at
Derkachi sector centre, schema v1.
* secrets/mavlink-test-passkey.txt: 64-hex with required
`# TEST ONLY` header line per AC-5. Passkey-equality test now
compares the secret line after stripping the header.
* security/cve-2025-53644.jpg: synthetic 158-byte malformed JPEG
(truncated SOS marker). OpenCV 4.11.x rejects gracefully with
imdecode → None. AZ-439 will sharpen for ASan instrumentation.
* Top-level Makefile with `make fixtures` / `make fixtures-*` /
`make e2e-tier1*` / `make unit-tests` targets.
AZ-444 — Tier-2 Jetson harness wrapper (5pt):
* run-tier2.sh rewritten as orchestrator. Detects local
(aarch64 + TIER2_HOST=localhost) vs remote (ssh into TIER2_HOST).
New flags: -k/--selector, --build-kind production|asan,
--reflash (gated behind TIER2_REFLASH_ACK=1 two-key gate),
--dry-run.
* tier2-on-jetson.sh (new) — on-device delegate. Verifies
gps-denied-onboard{,-asan}.service health; restarts with 5s
tolerance; spawns tegrastats + jtop parallel samplers; tails
ASan unit's journal in asan mode; drives docker compose with
TIER=tier2-jetson; forwards SELECTOR to pytest -k.
* docker/run-tier1.sh (new) — selector-parity sibling.
* AC-1 (selector parity) and AC-6 (reflash gating) unit-tested via
--dry-run output assertions. AC-2/AC-3/AC-4/AC-5 are hardware-
loop ACs verified by the Tier-2 runtime smoke (no Jetson in the
unit-test layer).
AZ-445 — CSV reporter + evidence bundler refinements (2pt):
* reporting/nfr_recorder.py (new) — pytest plugin. Provides the
`nfr_recorder` fixture with record_metric(name, value, ac_id)
and partial(ac_id, reason). At session end emits:
- per-nfr/<scenario_id>.json (AC-1)
- traceability-status.json with every AC ID parsed from
traceability-matrix.md, classified Covered/PARTIAL/NOT
COVERED with source scenario IDs (AC-2)
- regression-baseline.json with all numeric metrics (AC-3)
* csv_reporter.py extended — `_outcome_to_result` consults the
aggregator; rows flip PASS → PARTIAL when an AC was marked
PARTIAL by nfr_recorder (AC-4). Graceful fallback when
aggregator isn't registered (unit-test contexts).
* conftest.py registers nfr_recorder in pytest_plugins.
* New --traceability-matrix CLI flag seeds the NOT COVERED rows.
Build / config:
* pyproject.toml dev extras: added Pillow>=10.4,<13.0 for the
tile-cache-builder unit test (broad enough to keep torchvision's
Pillow 12 pin happy; the production builder runs inside its own
Docker image with its own pin).
* Updated test_directory_layout.py to cover 10 new files + replaced
the byte-equal passkey assertion with the header-stripping
variant.
Test results:
* 157 focused tests pass (was 97 in batch 67; +60 new across this
batch). No regressions.
Module-layout / spec drift:
* AZ-407 spec text says `tests/fixtures/...`; module-layout
blackbox_tests entry (commit d7a17a8) authoritatively places the
harness under `e2e/`. Implementation followed the layout entry.
* AZ-444 spec mentions `e2e/tier2/run-tier2.sh`; AZ-406 placed it
at `e2e/jetson/run-tier2.sh`. Kept at `e2e/jetson/` for
consistency.
* Cold-boot README ownership: corrected from AZ-419 to AZ-407 per
AZ-419's own Dependencies field.
Specs archived to _docs/02_tasks/done/. Jira tickets transitioned to
In Testing on commit.
Co-authored-by: Cursor <cursoragent@cursor.com>
409 lines
14 KiB
Python
409 lines
14 KiB
Python
"""NFR metrics recorder + run-end aggregator (AZ-445).
|
|
|
|
Extends the AZ-406 reporting subsystem with three additional artifacts:
|
|
|
|
* ``per-nfr/<scenario_id>.json`` — canonical metric blob per NFT scenario.
|
|
* ``traceability-status.json`` — per-AC coverage roll-up across the run.
|
|
* ``regression-baseline.json`` — flat dump of every numeric metric the
|
|
run captured (diffable across runs).
|
|
|
|
Public API (used by NFT scenario tests):
|
|
|
|
def test_nft_perf_01_partition_latency_p95(nfr_recorder):
|
|
nfr_recorder.record_metric("latency_ms_p95", 380.4, ac_id="AC-4.1")
|
|
nfr_recorder.partial(
|
|
"AC-4.1",
|
|
"p95 exceeds 400 ms when chamber is enabled (deferred to NFT-PERF-01b)",
|
|
)
|
|
|
|
The recorder also exposes ``recorder.scenario_id`` for tests that need
|
|
to name their evidence files consistently with the per-NFR JSON.
|
|
|
|
PARTIAL propagation: ``recorder.partial(ac_id, reason)`` marks the
|
|
current test row as PARTIAL in the CSV reporter and the corresponding
|
|
AC as PARTIAL in the traceability roll-up. Tests that PASS without
|
|
calling ``partial`` are recorded as Covered.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
from .csv_reporter import reporter_for
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ───────────────────────── data model ─────────────────────────
|
|
|
|
|
|
@dataclass
|
|
class _ScenarioRecord:
|
|
scenario_id: str
|
|
nodeid: str
|
|
traces_to: tuple[str, ...]
|
|
metrics: dict[str, Any] = field(default_factory=dict)
|
|
partial_acs: dict[str, str] = field(default_factory=dict) # ac_id → reason
|
|
outcome: str | None = None # filled in at logreport time
|
|
|
|
|
|
# ───────────────────── traceability matrix parser ─────────────────────
|
|
|
|
|
|
_AC_ROW_RE = re.compile(r"^\|\s*(AC-[A-Za-z0-9\.-]+)\s*\|", re.MULTILINE)
|
|
_RESTRICT_ROW_RE = re.compile(
|
|
r"^\|\s*(RESTRICT-[A-Za-z0-9\.-]+)\s*\|", re.MULTILINE
|
|
)
|
|
|
|
|
|
def parse_traceability_matrix(matrix_path: Path) -> list[str]:
|
|
"""Extract every AC / RESTRICT ID declared in the matrix file.
|
|
|
|
Returns a sorted, deduplicated list. Public so unit tests can call
|
|
it independently of pytest.
|
|
"""
|
|
|
|
if not matrix_path.is_file():
|
|
raise FileNotFoundError(f"traceability matrix not found at {matrix_path}")
|
|
text = matrix_path.read_text()
|
|
ids: set[str] = set()
|
|
for match in _AC_ROW_RE.finditer(text):
|
|
ids.add(match.group(1))
|
|
for match in _RESTRICT_ROW_RE.finditer(text):
|
|
ids.add(match.group(1))
|
|
return sorted(ids)
|
|
|
|
|
|
# ───────────────────── recorder fixture ─────────────────────
|
|
|
|
|
|
class _NfrRecorder:
|
|
"""Per-test handle exposed via the ``nfr_recorder`` pytest fixture."""
|
|
|
|
def __init__(
|
|
self,
|
|
scenario_id: str,
|
|
nodeid: str,
|
|
traces_to: tuple[str, ...],
|
|
run: "_RunAggregator",
|
|
) -> None:
|
|
self.scenario_id = scenario_id
|
|
self.nodeid = nodeid
|
|
self.traces_to = traces_to
|
|
self._run = run
|
|
|
|
def record_metric(self, name: str, value: Any, ac_id: str | None = None) -> None:
|
|
"""Capture a numeric / structured metric for this scenario."""
|
|
if not isinstance(name, str) or not name:
|
|
raise ValueError(f"metric name must be a non-empty str, got {name!r}")
|
|
self._run.record_metric(
|
|
scenario_id=self.scenario_id,
|
|
name=name,
|
|
value=value,
|
|
ac_id=ac_id,
|
|
nodeid=self.nodeid,
|
|
)
|
|
|
|
def partial(self, ac_id: str, reason: str) -> None:
|
|
"""Mark `ac_id` PARTIAL for this scenario and propagate to CSV row."""
|
|
if not ac_id or not reason:
|
|
raise ValueError("partial() requires both ac_id and reason")
|
|
self._run.mark_partial(
|
|
scenario_id=self.scenario_id,
|
|
ac_id=ac_id,
|
|
reason=reason,
|
|
nodeid=self.nodeid,
|
|
)
|
|
|
|
|
|
# ───────────────────── run aggregator ─────────────────────
|
|
|
|
|
|
class _RunAggregator:
|
|
"""Plugin-scoped state for the whole pytest session."""
|
|
|
|
def __init__(
|
|
self,
|
|
evidence_dir: Path,
|
|
matrix_ids: list[str],
|
|
) -> None:
|
|
self.evidence_dir = evidence_dir
|
|
self.matrix_ids = matrix_ids
|
|
self._records: dict[str, _ScenarioRecord] = {}
|
|
|
|
# --- mutation API used by _NfrRecorder ---
|
|
|
|
def ensure_record(
|
|
self, scenario_id: str, nodeid: str, traces_to: tuple[str, ...]
|
|
) -> _ScenarioRecord:
|
|
rec = self._records.get(nodeid)
|
|
if rec is None:
|
|
rec = _ScenarioRecord(
|
|
scenario_id=scenario_id,
|
|
nodeid=nodeid,
|
|
traces_to=traces_to,
|
|
)
|
|
self._records[nodeid] = rec
|
|
return rec
|
|
|
|
def record_metric(
|
|
self,
|
|
*,
|
|
scenario_id: str,
|
|
name: str,
|
|
value: Any,
|
|
ac_id: str | None,
|
|
nodeid: str,
|
|
) -> None:
|
|
rec = self._records[nodeid]
|
|
rec.metrics[name] = {"value": value, "ac_id": ac_id}
|
|
|
|
def mark_partial(
|
|
self,
|
|
*,
|
|
scenario_id: str,
|
|
ac_id: str,
|
|
reason: str,
|
|
nodeid: str,
|
|
) -> None:
|
|
rec = self._records[nodeid]
|
|
rec.partial_acs[ac_id] = reason
|
|
|
|
def set_outcome(self, nodeid: str, outcome: str) -> None:
|
|
"""Called by the plugin's logreport hook."""
|
|
rec = self._records.get(nodeid)
|
|
if rec is not None:
|
|
rec.outcome = outcome
|
|
|
|
# --- read-only accessors used by tests + emission ---
|
|
|
|
def records(self) -> list[_ScenarioRecord]:
|
|
return list(self._records.values())
|
|
|
|
# --- emission (called at session end) ---
|
|
|
|
def emit_per_nfr_json(self) -> list[Path]:
|
|
"""One file per scenario under ``<evidence_dir>/per-nfr/``."""
|
|
out_dir = self.evidence_dir / "per-nfr"
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
emitted: list[Path] = []
|
|
for rec in self._records.values():
|
|
path = out_dir / f"{rec.scenario_id}.json"
|
|
blob = {
|
|
"scenario_id": rec.scenario_id,
|
|
"nodeid": rec.nodeid,
|
|
"traces_to": list(rec.traces_to),
|
|
"outcome": rec.outcome or "UNKNOWN",
|
|
"metrics": rec.metrics,
|
|
"partial_acs": rec.partial_acs,
|
|
}
|
|
path.write_text(json.dumps(blob, sort_keys=True, indent=2) + "\n")
|
|
emitted.append(path)
|
|
return emitted
|
|
|
|
def compute_traceability_status(self) -> dict:
|
|
"""Aggregate per-AC status across all recorded scenarios.
|
|
|
|
Algorithm:
|
|
* NOT COVERED — no scenario traces to this AC.
|
|
* PARTIAL — at least one scenario marks the AC PARTIAL
|
|
OR has outcome ∈ {FAIL, SKIP}.
|
|
* Covered — every tracing scenario has outcome ∈
|
|
{PASS, XFAIL} and none marked PARTIAL.
|
|
"""
|
|
by_ac: dict[str, dict] = {
|
|
ac: {"status": "NOT COVERED", "sources": []} for ac in self.matrix_ids
|
|
}
|
|
for rec in self._records.values():
|
|
for ac in rec.traces_to:
|
|
entry = by_ac.setdefault(ac, {"status": "NOT COVERED", "sources": []})
|
|
entry["sources"].append(rec.scenario_id)
|
|
outcome = (rec.outcome or "").upper()
|
|
if ac in rec.partial_acs:
|
|
entry["status"] = "PARTIAL"
|
|
elif outcome in {"FAIL", "SKIP"}:
|
|
# Worse than partial — still surface as PARTIAL per
|
|
# AZ-445 AC-2 (status enum is {Covered, PARTIAL, NOT COVERED}).
|
|
if entry["status"] != "PARTIAL":
|
|
entry["status"] = "PARTIAL"
|
|
elif outcome in {"PASS", "XFAIL"}:
|
|
# Promote NOT COVERED → Covered; keep PARTIAL pinned.
|
|
if entry["status"] == "NOT COVERED":
|
|
entry["status"] = "Covered"
|
|
# Unknown / missing outcomes stay as whatever they were
|
|
# — we don't downgrade a PARTIAL by an unknown.
|
|
|
|
# Make output deterministic: sort sources within each AC entry.
|
|
for entry in by_ac.values():
|
|
entry["sources"] = sorted(set(entry["sources"]))
|
|
return by_ac
|
|
|
|
def emit_traceability_status(self) -> Path:
|
|
path = self.evidence_dir / "traceability-status.json"
|
|
path.write_text(
|
|
json.dumps(self.compute_traceability_status(), sort_keys=True, indent=2)
|
|
+ "\n"
|
|
)
|
|
return path
|
|
|
|
def emit_regression_baseline(self) -> Path:
|
|
"""Flat dump of every numeric metric for diff tooling."""
|
|
path = self.evidence_dir / "regression-baseline.json"
|
|
blob = {
|
|
"scenarios": {
|
|
rec.scenario_id: {
|
|
"metrics": {
|
|
name: entry["value"]
|
|
for name, entry in rec.metrics.items()
|
|
if isinstance(entry["value"], (int, float))
|
|
},
|
|
"outcome": rec.outcome or "UNKNOWN",
|
|
}
|
|
for rec in self._records.values()
|
|
}
|
|
}
|
|
path.write_text(json.dumps(blob, sort_keys=True, indent=2) + "\n")
|
|
return path
|
|
|
|
|
|
# ───────────────────── pytest plugin glue ─────────────────────
|
|
|
|
|
|
_AGGREGATOR_KEY = pytest.StashKey["_RunAggregator | None"]()
|
|
|
|
|
|
def pytest_addoption(parser: pytest.Parser) -> None:
|
|
group = parser.getgroup("e2e-runner")
|
|
group.addoption(
|
|
"--traceability-matrix",
|
|
action="store",
|
|
default=None,
|
|
help=(
|
|
"Path to traceability-matrix.md (default: "
|
|
"_docs/02_document/tests/traceability-matrix.md relative to repo root). "
|
|
"Used to seed the NOT COVERED rows in traceability-status.json."
|
|
),
|
|
)
|
|
|
|
|
|
def _resolve_matrix_path(config: pytest.Config) -> Path:
|
|
opt = config.getoption("--traceability-matrix")
|
|
if opt:
|
|
return Path(opt)
|
|
return Path(__file__).resolve().parents[3] / "_docs" / "02_document" / "tests" / "traceability-matrix.md"
|
|
|
|
|
|
def pytest_configure(config: pytest.Config) -> None:
|
|
"""Parse the traceability matrix and create the aggregator.
|
|
|
|
`--evidence-out` is owned by the runner's conftest.py; by the time
|
|
this hook fires, that option is registered and available. The
|
|
aggregator's emission directory is therefore known up front.
|
|
"""
|
|
config.stash[_AGGREGATOR_KEY] = None
|
|
matrix_path = _resolve_matrix_path(config)
|
|
try:
|
|
matrix_ids = parse_traceability_matrix(matrix_path)
|
|
except FileNotFoundError:
|
|
logger.warning(
|
|
"traceability matrix not found at %s — NOT COVERED rows will be empty",
|
|
matrix_path,
|
|
)
|
|
matrix_ids = []
|
|
|
|
try:
|
|
evidence_out = config.getoption("--evidence-out")
|
|
except ValueError:
|
|
# `--evidence-out` is registered by the runner's conftest. In
|
|
# unit-test contexts where that conftest isn't loaded, default
|
|
# to the cwd so the aggregator still emits something — the unit
|
|
# test redirects this via direct construction of `_RunAggregator`.
|
|
evidence_out = "."
|
|
|
|
aggregator = _RunAggregator(Path(evidence_out), matrix_ids)
|
|
config.stash[_AGGREGATOR_KEY] = aggregator
|
|
config.pluginmanager.register(_PluginHooks(aggregator), name="e2e-nfr-recorder")
|
|
|
|
config.addinivalue_line(
|
|
"markers", "scenario_id(name): explicit NFT scenario id for the per-NFR JSON"
|
|
)
|
|
|
|
|
|
class _PluginHooks:
|
|
"""Tiny plugin instance that owns the logreport+sessionfinish hooks."""
|
|
|
|
def __init__(self, aggregator: _RunAggregator) -> None:
|
|
self._agg = aggregator
|
|
|
|
def pytest_runtest_logreport(self, report: pytest.TestReport) -> None:
|
|
if report.when != "call":
|
|
return
|
|
outcome_map = {
|
|
"passed": "PASS",
|
|
"failed": "FAIL",
|
|
"skipped": "SKIP",
|
|
}
|
|
self._agg.set_outcome(report.nodeid, outcome_map.get(report.outcome, "UNKNOWN"))
|
|
|
|
def pytest_sessionfinish(self, session: pytest.Session, exitstatus: int) -> None: # noqa: ARG002
|
|
self._agg.emit_per_nfr_json()
|
|
self._agg.emit_traceability_status()
|
|
self._agg.emit_regression_baseline()
|
|
|
|
|
|
def _scenario_id_for(item: pytest.Item) -> str:
|
|
marker = item.get_closest_marker("scenario_id")
|
|
if marker and marker.args:
|
|
return str(marker.args[0])
|
|
# Fall back to the test_id marker (compat with csv_reporter) or
|
|
# finally the nodeid.
|
|
test_id = item.get_closest_marker("test_id")
|
|
if test_id and test_id.args:
|
|
return str(test_id.args[0])
|
|
return item.nodeid
|
|
|
|
|
|
def _traces_to_for(item: pytest.Item) -> tuple[str, ...]:
|
|
marker = item.get_closest_marker("traces_to")
|
|
if marker is None:
|
|
return ()
|
|
ids = marker.args[0] if marker.args else marker.kwargs.get("ids", ())
|
|
if isinstance(ids, str):
|
|
return tuple(s.strip() for s in ids.split(",") if s.strip())
|
|
return tuple(ids)
|
|
|
|
|
|
@pytest.fixture
|
|
def nfr_recorder(request: pytest.FixtureRequest) -> _NfrRecorder:
|
|
"""Fixture handle for NFT scenarios to record metrics + partials."""
|
|
aggregator = request.config.stash.get(_AGGREGATOR_KEY, None)
|
|
if aggregator is None:
|
|
pytest.skip(
|
|
"nfr_recorder requires --evidence-out (the bundler's option) "
|
|
"to be set; the harness configures it at runtime."
|
|
)
|
|
scenario_id = _scenario_id_for(request.node)
|
|
traces_to = _traces_to_for(request.node)
|
|
rec = aggregator.ensure_record(scenario_id, request.node.nodeid, traces_to)
|
|
return _NfrRecorder(
|
|
scenario_id=rec.scenario_id,
|
|
nodeid=rec.nodeid,
|
|
traces_to=rec.traces_to,
|
|
run=aggregator,
|
|
)
|
|
|
|
|
|
# ───────────────────── public accessors for cross-plugin use ─────────────────────
|
|
|
|
|
|
def aggregator_for(config: pytest.Config) -> _RunAggregator | None:
|
|
"""Used by csv_reporter to propagate PARTIAL into the row's result column."""
|
|
return config.stash.get(_AGGREGATOR_KEY, None)
|