mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 06:11:14 +00:00
[AZ-407] [AZ-444] [AZ-445] Batch 68: fixtures, Tier-2 harness, NFR reporter
Three blackbox-harness tasks landed together — all depend only on
AZ-406 and unblock the FT-* / NFT-* scenario tasks scheduled for
batches 69+.
AZ-407 — Static fixture builders (3pt):
* tile-cache-builder/{builder.py, Dockerfile, build.sh} produces a
deterministic tile-cache-fixture Docker volume from
_docs/00_problem/input_data/. Reproducibility primitives: sorted
iteration, frozen PIL JPEG settings, FAISS HNSW32 built single-
threaded with seeded stub descriptors.
* age-injector/{age_injector.py, inject.sh} clones the volume and
shifts capture_date by N×30.44 days; tile JPEG bytes preserved
bit-identical. Emits synth-age-7mo + synth-age-13mo volumes.
* cold-boot/cold_boot_fixture.json: frozen FC pose snapshot at
Derkachi sector centre, schema v1.
* secrets/mavlink-test-passkey.txt: 64-hex with required
`# TEST ONLY` header line per AC-5. Passkey-equality test now
compares the secret line after stripping the header.
* security/cve-2025-53644.jpg: synthetic 158-byte malformed JPEG
(truncated SOS marker). OpenCV 4.11.x rejects gracefully with
imdecode → None. AZ-439 will sharpen for ASan instrumentation.
* Top-level Makefile with `make fixtures` / `make fixtures-*` /
`make e2e-tier1*` / `make unit-tests` targets.
AZ-444 — Tier-2 Jetson harness wrapper (5pt):
* run-tier2.sh rewritten as orchestrator. Detects local
(aarch64 + TIER2_HOST=localhost) vs remote (ssh into TIER2_HOST).
New flags: -k/--selector, --build-kind production|asan,
--reflash (gated behind TIER2_REFLASH_ACK=1 two-key gate),
--dry-run.
* tier2-on-jetson.sh (new) — on-device delegate. Verifies
gps-denied-onboard{,-asan}.service health; restarts with 5s
tolerance; spawns tegrastats + jtop parallel samplers; tails
ASan unit's journal in asan mode; drives docker compose with
TIER=tier2-jetson; forwards SELECTOR to pytest -k.
* docker/run-tier1.sh (new) — selector-parity sibling.
* AC-1 (selector parity) and AC-6 (reflash gating) unit-tested via
--dry-run output assertions. AC-2/AC-3/AC-4/AC-5 are hardware-
loop ACs verified by the Tier-2 runtime smoke (no Jetson in the
unit-test layer).
AZ-445 — CSV reporter + evidence bundler refinements (2pt):
* reporting/nfr_recorder.py (new) — pytest plugin. Provides the
`nfr_recorder` fixture with record_metric(name, value, ac_id)
and partial(ac_id, reason). At session end emits:
- per-nfr/<scenario_id>.json (AC-1)
- traceability-status.json with every AC ID parsed from
traceability-matrix.md, classified Covered/PARTIAL/NOT
COVERED with source scenario IDs (AC-2)
- regression-baseline.json with all numeric metrics (AC-3)
* csv_reporter.py extended — `_outcome_to_result` consults the
aggregator; rows flip PASS → PARTIAL when an AC was marked
PARTIAL by nfr_recorder (AC-4). Graceful fallback when
aggregator isn't registered (unit-test contexts).
* conftest.py registers nfr_recorder in pytest_plugins.
* New --traceability-matrix CLI flag seeds the NOT COVERED rows.
Build / config:
* pyproject.toml dev extras: added Pillow>=10.4,<13.0 for the
tile-cache-builder unit test (broad enough to keep torchvision's
Pillow 12 pin happy; the production builder runs inside its own
Docker image with its own pin).
* Updated test_directory_layout.py to cover 10 new files + replaced
the byte-equal passkey assertion with the header-stripping
variant.
Test results:
* 157 focused tests pass (was 97 in batch 67; +60 new across this
batch). No regressions.
Module-layout / spec drift:
* AZ-407 spec text says `tests/fixtures/...`; module-layout
blackbox_tests entry (commit d7a17a8) authoritatively places the
harness under `e2e/`. Implementation followed the layout entry.
* AZ-444 spec mentions `e2e/tier2/run-tier2.sh`; AZ-406 placed it
at `e2e/jetson/run-tier2.sh`. Kept at `e2e/jetson/` for
consistency.
* Cold-boot README ownership: corrected from AZ-419 to AZ-407 per
AZ-419's own Dependencies field.
Specs archived to _docs/02_tasks/done/. Jira tickets transitioned to
In Testing on commit.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -148,6 +148,22 @@ def test_build_row_records_evidence_paths() -> None:
|
||||
assert row["evidence_paths"] == "evidence/a.tlog,evidence/b.csv"
|
||||
|
||||
|
||||
def test_build_row_pass_when_no_session_attribute() -> None:
|
||||
"""The PARTIAL propagation path swallows AttributeError on a fake item.
|
||||
|
||||
AZ-445: when nfr_recorder is loaded the result column may flip to
|
||||
PARTIAL; when it isn't (or when item.session is missing — unit-test
|
||||
fake context), the row stays PASS.
|
||||
"""
|
||||
# Arrange — fake item without .session
|
||||
item = _FakeItem()
|
||||
report = _report("passed")
|
||||
# Act
|
||||
row = build_row(item, report, "2026-05-16T10:00:00+00:00", 1)
|
||||
# Assert
|
||||
assert row["result"] == "PASS", "no aggregator available → result must be PASS"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# In-process plugin integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,305 @@
|
||||
"""Tests for the AZ-445 NFR recorder + run-end aggregator."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.reporting import nfr_recorder
|
||||
from runner.reporting.nfr_recorder import (
|
||||
_RunAggregator,
|
||||
parse_traceability_matrix,
|
||||
)
|
||||
|
||||
|
||||
# ───────────────────── traceability matrix parser ─────────────────────
|
||||
|
||||
|
||||
def test_parse_traceability_matrix_extracts_ac_ids(tmp_path: Path) -> None:
|
||||
"""Every row prefixed by an `AC-…` or `RESTRICT-…` token is captured."""
|
||||
|
||||
# Arrange
|
||||
matrix = tmp_path / "matrix.md"
|
||||
matrix.write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
## Acceptance Criteria Coverage
|
||||
|
||||
| AC ID | Description | Source | Status |
|
||||
|-------|-------------|--------|--------|
|
||||
| AC-1.1 | something | FT-P-01 | Covered |
|
||||
| AC-7.1 | nope | — | NOT COVERED |
|
||||
| RESTRICT-CAM-2 | restriction | NFT-SEC-01 | Covered |
|
||||
|
||||
text in between (no row).
|
||||
|
||||
| AC-NEW-3 | another | NFT-LIM-02 | Covered |
|
||||
"""
|
||||
).strip()
|
||||
)
|
||||
|
||||
# Act
|
||||
ids = parse_traceability_matrix(matrix)
|
||||
|
||||
# Assert
|
||||
assert ids == sorted(["AC-1.1", "AC-7.1", "RESTRICT-CAM-2", "AC-NEW-3"])
|
||||
|
||||
|
||||
def test_parse_traceability_matrix_missing_file(tmp_path: Path) -> None:
|
||||
"""Missing matrix file surfaces as a clear FileNotFoundError."""
|
||||
# Act + Assert
|
||||
with pytest.raises(FileNotFoundError):
|
||||
parse_traceability_matrix(tmp_path / "does-not-exist.md")
|
||||
|
||||
|
||||
# ───────────────────── aggregator: per-scenario state ─────────────────────
|
||||
|
||||
|
||||
def _aggregator(tmp_path: Path, matrix_ids: list[str]) -> _RunAggregator:
|
||||
return _RunAggregator(tmp_path, matrix_ids)
|
||||
|
||||
|
||||
def test_aggregator_records_metric_and_partial(tmp_path: Path) -> None:
|
||||
"""ensure_record → record_metric → mark_partial round-trips into _records."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-1.1", "AC-4.1"])
|
||||
rec = agg.ensure_record(
|
||||
scenario_id="NFT-PERF-01", nodeid="test_x", traces_to=("AC-4.1",)
|
||||
)
|
||||
|
||||
# Act
|
||||
agg.record_metric(
|
||||
scenario_id=rec.scenario_id,
|
||||
name="latency_p95_ms",
|
||||
value=380.4,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_x",
|
||||
)
|
||||
agg.mark_partial(
|
||||
scenario_id=rec.scenario_id,
|
||||
ac_id="AC-4.1",
|
||||
reason="exceeds 400ms in chamber",
|
||||
nodeid="test_x",
|
||||
)
|
||||
agg.set_outcome("test_x", "PASS")
|
||||
|
||||
# Assert
|
||||
[stored] = agg.records()
|
||||
assert stored.metrics["latency_p95_ms"] == {"value": 380.4, "ac_id": "AC-4.1"}
|
||||
assert stored.partial_acs == {"AC-4.1": "exceeds 400ms in chamber"}
|
||||
assert stored.outcome == "PASS"
|
||||
|
||||
|
||||
# ───────────────────── aggregator: emission ─────────────────────
|
||||
|
||||
|
||||
def test_emit_per_nfr_json_writes_one_file_per_scenario(tmp_path: Path) -> None:
|
||||
"""AC-1: per-NFR JSON emitted for each recorded scenario."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-4.1"])
|
||||
agg.ensure_record("NFT-PERF-01", "test_a", ("AC-4.1",))
|
||||
agg.ensure_record("NFT-PERF-02", "test_b", ("AC-4.4",))
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p95_ms",
|
||||
value=380.4,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.set_outcome("test_a", "PASS")
|
||||
agg.set_outcome("test_b", "PASS")
|
||||
|
||||
# Act
|
||||
paths = agg.emit_per_nfr_json()
|
||||
|
||||
# Assert
|
||||
assert len(paths) == 2
|
||||
assert {p.name for p in paths} == {"NFT-PERF-01.json", "NFT-PERF-02.json"}
|
||||
blob_a = json.loads((tmp_path / "per-nfr" / "NFT-PERF-01.json").read_text())
|
||||
assert blob_a["scenario_id"] == "NFT-PERF-01"
|
||||
assert blob_a["outcome"] == "PASS"
|
||||
assert blob_a["traces_to"] == ["AC-4.1"]
|
||||
assert blob_a["metrics"]["latency_p95_ms"]["value"] == 380.4
|
||||
|
||||
|
||||
def test_emit_traceability_status_classifies_acs(tmp_path: Path) -> None:
|
||||
"""AC-2: every matrix AC ID appears with status + sources."""
|
||||
|
||||
# Arrange — matrix has 3 ACs. One scenario covers AC-1.1 (PASS) +
|
||||
# AC-4.1 (PARTIAL). A second scenario covers AC-1.1 (PASS).
|
||||
# AC-NEW-3 has no tracing scenario.
|
||||
agg = _aggregator(tmp_path, ["AC-1.1", "AC-4.1", "AC-NEW-3"])
|
||||
agg.ensure_record("FT-P-01", "test_p01", ("AC-1.1",))
|
||||
agg.ensure_record("FT-P-01-dup", "test_p01b", ("AC-1.1",))
|
||||
agg.ensure_record("NFT-PERF-01", "test_perf01", ("AC-4.1",))
|
||||
agg.mark_partial(
|
||||
scenario_id="NFT-PERF-01",
|
||||
ac_id="AC-4.1",
|
||||
reason="exceeds threshold under chamber",
|
||||
nodeid="test_perf01",
|
||||
)
|
||||
agg.set_outcome("test_p01", "PASS")
|
||||
agg.set_outcome("test_p01b", "PASS")
|
||||
agg.set_outcome("test_perf01", "PASS")
|
||||
|
||||
# Act
|
||||
status = agg.compute_traceability_status()
|
||||
emitted_path = agg.emit_traceability_status()
|
||||
|
||||
# Assert
|
||||
assert status["AC-1.1"]["status"] == "Covered"
|
||||
assert sorted(status["AC-1.1"]["sources"]) == ["FT-P-01", "FT-P-01-dup"]
|
||||
assert status["AC-4.1"]["status"] == "PARTIAL"
|
||||
assert status["AC-4.1"]["sources"] == ["NFT-PERF-01"]
|
||||
assert status["AC-NEW-3"]["status"] == "NOT COVERED"
|
||||
assert status["AC-NEW-3"]["sources"] == []
|
||||
persisted = json.loads(emitted_path.read_text())
|
||||
assert persisted == status
|
||||
|
||||
|
||||
def test_emit_traceability_status_downgrades_on_fail(tmp_path: Path) -> None:
|
||||
"""A FAILing test tracing to an AC keeps the AC out of Covered."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-1.1"])
|
||||
agg.ensure_record("FT-P-01", "test_p01", ("AC-1.1",))
|
||||
agg.set_outcome("test_p01", "FAIL")
|
||||
|
||||
# Act
|
||||
status = agg.compute_traceability_status()
|
||||
|
||||
# Assert
|
||||
# Per AZ-445 AC-2 the status enum is {Covered, PARTIAL, NOT COVERED}.
|
||||
# A FAIL is downgraded to PARTIAL (it's covered by a scenario but
|
||||
# the scenario didn't pass).
|
||||
assert status["AC-1.1"]["status"] == "PARTIAL"
|
||||
|
||||
|
||||
def test_emit_regression_baseline_dumps_numeric_metrics(tmp_path: Path) -> None:
|
||||
"""AC-3: regression-baseline.json contains every numeric metric per scenario."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-4.1"])
|
||||
agg.ensure_record("NFT-PERF-01", "test_a", ("AC-4.1",))
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p95_ms",
|
||||
value=380.4,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p99_ms",
|
||||
value=420.7,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="extra_meta",
|
||||
value={"k": "v"}, # non-numeric — dropped from baseline
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.set_outcome("test_a", "PASS")
|
||||
|
||||
# Act
|
||||
path = agg.emit_regression_baseline()
|
||||
|
||||
# Assert
|
||||
blob = json.loads(path.read_text())
|
||||
assert blob["scenarios"]["NFT-PERF-01"]["metrics"] == {
|
||||
"latency_p95_ms": 380.4,
|
||||
"latency_p99_ms": 420.7,
|
||||
}
|
||||
assert blob["scenarios"]["NFT-PERF-01"]["outcome"] == "PASS"
|
||||
assert "extra_meta" not in blob["scenarios"]["NFT-PERF-01"]["metrics"]
|
||||
|
||||
|
||||
# ───────────────────── integration with pytest plugin ─────────────────────
|
||||
|
||||
|
||||
def test_nfr_recorder_fixture_emits_artifacts_in_run(tmp_path: Path) -> None:
|
||||
"""End-to-end: invoke an in-process pytest run, assert artifacts exist.
|
||||
|
||||
The inner test calls `nfr_recorder.record_metric` + `partial` and
|
||||
asserts PASS. The outer test (this one) checks that the run emitted
|
||||
per-nfr/<id>.json, traceability-status.json, and
|
||||
regression-baseline.json into the evidence dir.
|
||||
"""
|
||||
|
||||
# Arrange
|
||||
matrix = tmp_path / "matrix.md"
|
||||
matrix.write_text(
|
||||
"## Acceptance Criteria Coverage\n\n"
|
||||
"| AC ID | Desc | Source | Status |\n"
|
||||
"|-------|------|--------|--------|\n"
|
||||
"| AC-4.1 | foo | NFT-PERF-01 | Covered |\n"
|
||||
"| AC-4.2 | bar | NFT-PERF-02 | Covered |\n"
|
||||
)
|
||||
evidence_out = tmp_path / "evidence"
|
||||
evidence_out.mkdir()
|
||||
|
||||
inner = tmp_path / "test_inner.py"
|
||||
inner.write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
import pytest
|
||||
|
||||
@pytest.mark.scenario_id("NFT-PERF-01")
|
||||
@pytest.mark.traces_to(("AC-4.1",))
|
||||
def test_inner_perf(nfr_recorder):
|
||||
nfr_recorder.record_metric("latency_p95_ms", 380.4, ac_id="AC-4.1")
|
||||
nfr_recorder.partial("AC-4.1", "exceeds threshold")
|
||||
"""
|
||||
)
|
||||
)
|
||||
# Minimal conftest registering only `--evidence-out` so nfr_recorder
|
||||
# has a place to write. (The real harness's conftest is heavy; we
|
||||
# don't want to drag it in.)
|
||||
(tmp_path / "conftest.py").write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--evidence-out",
|
||||
action="store",
|
||||
default=".",
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Act
|
||||
rc = pytest.main(
|
||||
[
|
||||
"-p",
|
||||
"runner.reporting.csv_reporter",
|
||||
"-p",
|
||||
"runner.reporting.nfr_recorder",
|
||||
str(inner),
|
||||
f"--evidence-out={evidence_out}",
|
||||
f"--traceability-matrix={matrix}",
|
||||
"--no-header",
|
||||
"-q",
|
||||
]
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert rc == 0, f"inner pytest run failed with rc={rc}"
|
||||
per_nfr = evidence_out / "per-nfr" / "NFT-PERF-01.json"
|
||||
assert per_nfr.exists()
|
||||
blob = json.loads(per_nfr.read_text())
|
||||
assert blob["scenario_id"] == "NFT-PERF-01"
|
||||
assert blob["partial_acs"] == {"AC-4.1": "exceeds threshold"}
|
||||
status = json.loads((evidence_out / "traceability-status.json").read_text())
|
||||
assert status["AC-4.1"]["status"] == "PARTIAL"
|
||||
assert status["AC-4.2"]["status"] == "NOT COVERED"
|
||||
baseline = json.loads((evidence_out / "regression-baseline.json").read_text())
|
||||
assert baseline["scenarios"]["NFT-PERF-01"]["metrics"] == {"latency_p95_ms": 380.4}
|
||||
Reference in New Issue
Block a user