mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 08:51:12 +00:00
[AZ-407] [AZ-444] [AZ-445] Batch 68: fixtures, Tier-2 harness, NFR reporter
Three blackbox-harness tasks landed together — all depend only on
AZ-406 and unblock the FT-* / NFT-* scenario tasks scheduled for
batches 69+.
AZ-407 — Static fixture builders (3pt):
* tile-cache-builder/{builder.py, Dockerfile, build.sh} produces a
deterministic tile-cache-fixture Docker volume from
_docs/00_problem/input_data/. Reproducibility primitives: sorted
iteration, frozen PIL JPEG settings, FAISS HNSW32 built single-
threaded with seeded stub descriptors.
* age-injector/{age_injector.py, inject.sh} clones the volume and
shifts capture_date by N×30.44 days; tile JPEG bytes preserved
bit-identical. Emits synth-age-7mo + synth-age-13mo volumes.
* cold-boot/cold_boot_fixture.json: frozen FC pose snapshot at
Derkachi sector centre, schema v1.
* secrets/mavlink-test-passkey.txt: 64-hex with required
`# TEST ONLY` header line per AC-5. Passkey-equality test now
compares the secret line after stripping the header.
* security/cve-2025-53644.jpg: synthetic 158-byte malformed JPEG
(truncated SOS marker). OpenCV 4.11.x rejects gracefully with
imdecode → None. AZ-439 will sharpen for ASan instrumentation.
* Top-level Makefile with `make fixtures` / `make fixtures-*` /
`make e2e-tier1*` / `make unit-tests` targets.
AZ-444 — Tier-2 Jetson harness wrapper (5pt):
* run-tier2.sh rewritten as orchestrator. Detects local
(aarch64 + TIER2_HOST=localhost) vs remote (ssh into TIER2_HOST).
New flags: -k/--selector, --build-kind production|asan,
--reflash (gated behind TIER2_REFLASH_ACK=1 two-key gate),
--dry-run.
* tier2-on-jetson.sh (new) — on-device delegate. Verifies
gps-denied-onboard{,-asan}.service health; restarts with 5s
tolerance; spawns tegrastats + jtop parallel samplers; tails
ASan unit's journal in asan mode; drives docker compose with
TIER=tier2-jetson; forwards SELECTOR to pytest -k.
* docker/run-tier1.sh (new) — selector-parity sibling.
* AC-1 (selector parity) and AC-6 (reflash gating) unit-tested via
--dry-run output assertions. AC-2/AC-3/AC-4/AC-5 are hardware-
loop ACs verified by the Tier-2 runtime smoke (no Jetson in the
unit-test layer).
AZ-445 — CSV reporter + evidence bundler refinements (2pt):
* reporting/nfr_recorder.py (new) — pytest plugin. Provides the
`nfr_recorder` fixture with record_metric(name, value, ac_id)
and partial(ac_id, reason). At session end emits:
- per-nfr/<scenario_id>.json (AC-1)
- traceability-status.json with every AC ID parsed from
traceability-matrix.md, classified Covered/PARTIAL/NOT
COVERED with source scenario IDs (AC-2)
- regression-baseline.json with all numeric metrics (AC-3)
* csv_reporter.py extended — `_outcome_to_result` consults the
aggregator; rows flip PASS → PARTIAL when an AC was marked
PARTIAL by nfr_recorder (AC-4). Graceful fallback when
aggregator isn't registered (unit-test contexts).
* conftest.py registers nfr_recorder in pytest_plugins.
* New --traceability-matrix CLI flag seeds the NOT COVERED rows.
Build / config:
* pyproject.toml dev extras: added Pillow>=10.4,<13.0 for the
tile-cache-builder unit test (broad enough to keep torchvision's
Pillow 12 pin happy; the production builder runs inside its own
Docker image with its own pin).
* Updated test_directory_layout.py to cover 10 new files + replaced
the byte-equal passkey assertion with the header-stripping
variant.
Test results:
* 157 focused tests pass (was 97 in batch 67; +60 new across this
batch). No regressions.
Module-layout / spec drift:
* AZ-407 spec text says `tests/fixtures/...`; module-layout
blackbox_tests entry (commit d7a17a8) authoritatively places the
harness under `e2e/`. Implementation followed the layout entry.
* AZ-444 spec mentions `e2e/tier2/run-tier2.sh`; AZ-406 placed it
at `e2e/jetson/run-tier2.sh`. Kept at `e2e/jetson/` for
consistency.
* Cold-boot README ownership: corrected from AZ-419 to AZ-407 per
AZ-419's own Dependencies field.
Specs archived to _docs/02_tasks/done/. Jira tickets transitioned to
In Testing on commit.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,202 @@
|
||||
"""Tests for the AZ-407 age-injector.
|
||||
|
||||
Covers AC-3 (capture_date shifted, pixels bit-identical) and AC-7
|
||||
(provenance docs present).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import datetime as _dt
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
INPUT_DIR = REPO_ROOT / "_docs" / "00_problem" / "input_data"
|
||||
BUILDER_PY = REPO_ROOT / "e2e" / "fixtures" / "tile-cache-builder" / "builder.py"
|
||||
INJECTOR_PY = REPO_ROOT / "e2e" / "fixtures" / "age-injector" / "age_injector.py"
|
||||
INJECTOR_DIR = REPO_ROOT / "e2e" / "fixtures" / "age-injector"
|
||||
|
||||
|
||||
def _run(cmd: list[str]) -> str:
|
||||
"""Run a subprocess, return stdout (raises on failure)."""
|
||||
|
||||
env = dict(os.environ, PYTHONHASHSEED="0")
|
||||
result = subprocess.run(cmd, check=True, capture_output=True, text=True, env=env)
|
||||
return result.stdout
|
||||
|
||||
|
||||
def _build_source_cache(out_dir: Path) -> Path:
|
||||
"""Run the tile-cache builder; return the populated dir."""
|
||||
|
||||
_run(
|
||||
[
|
||||
sys.executable,
|
||||
str(BUILDER_PY),
|
||||
"--input-dir",
|
||||
str(INPUT_DIR),
|
||||
"--output-dir",
|
||||
str(out_dir),
|
||||
"--quiet",
|
||||
]
|
||||
)
|
||||
return out_dir
|
||||
|
||||
|
||||
def _file_hashes(root: Path, suffix: str) -> dict[str, str]:
|
||||
return {
|
||||
p.relative_to(root).as_posix(): hashlib.sha256(p.read_bytes()).hexdigest()
|
||||
for p in sorted(root.rglob(f"*{suffix}"))
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def source_cache(tmp_path_factory: pytest.TempPathFactory) -> Path:
|
||||
"""One-shot module-scoped tile-cache build (~1s)."""
|
||||
|
||||
return _build_source_cache(tmp_path_factory.mktemp("source-cache"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("age_months,threshold_days", [(7, 6 * 30), (13, 12 * 30)])
|
||||
def test_age_injector_shifts_capture_date(
|
||||
tmp_path: Path,
|
||||
source_cache: Path,
|
||||
age_months: int,
|
||||
threshold_days: int,
|
||||
) -> None:
|
||||
"""AC-3: every manifest row's capture_date is now - age_months ±1 day."""
|
||||
|
||||
# Arrange
|
||||
out = tmp_path / f"out-{age_months}mo"
|
||||
today = _dt.datetime.now(tz=_dt.timezone.utc).date()
|
||||
|
||||
# Act
|
||||
_run(
|
||||
[
|
||||
sys.executable,
|
||||
str(INJECTOR_PY),
|
||||
"--source-dir",
|
||||
str(source_cache),
|
||||
"--output-dir",
|
||||
str(out),
|
||||
"--age-months",
|
||||
str(age_months),
|
||||
]
|
||||
)
|
||||
|
||||
# Assert
|
||||
with (out / "manifest.csv").open() as fp:
|
||||
rows = list(csv.DictReader(fp))
|
||||
assert rows, "aged manifest is empty"
|
||||
for r in rows:
|
||||
shifted = _dt.date.fromisoformat(r["capture_date"])
|
||||
delta_days = (today - shifted).days
|
||||
target_days = int(round(age_months * 30.44))
|
||||
assert abs(delta_days - target_days) <= 1, (
|
||||
f"row {r['tile_x']},{r['tile_y']}: capture_date offset is "
|
||||
f"{delta_days} days, expected {target_days} ±1"
|
||||
)
|
||||
assert delta_days > threshold_days, (
|
||||
f"aged capture_date {r['capture_date']} did not exceed the "
|
||||
f"{threshold_days}-day threshold"
|
||||
)
|
||||
|
||||
|
||||
def test_age_injector_preserves_tile_bytes(tmp_path: Path, source_cache: Path) -> None:
|
||||
"""AC-3: tile JPEG bodies copy bit-identical."""
|
||||
|
||||
# Arrange
|
||||
out = tmp_path / "out-7mo"
|
||||
|
||||
# Act
|
||||
_run(
|
||||
[
|
||||
sys.executable,
|
||||
str(INJECTOR_PY),
|
||||
"--source-dir",
|
||||
str(source_cache),
|
||||
"--output-dir",
|
||||
str(out),
|
||||
"--age-months",
|
||||
"7",
|
||||
]
|
||||
)
|
||||
|
||||
# Assert
|
||||
src_hashes = _file_hashes(source_cache / "tiles", ".jpg")
|
||||
out_hashes = _file_hashes(out / "tiles", ".jpg")
|
||||
assert src_hashes == out_hashes, "tile JPEG bytes drifted across age injection"
|
||||
|
||||
|
||||
def test_age_injector_updates_sidecar_dates(tmp_path: Path, source_cache: Path) -> None:
|
||||
"""AC-3: per-tile sidecar JSON also reflects the aged date."""
|
||||
|
||||
# Arrange
|
||||
out = tmp_path / "out-13mo"
|
||||
|
||||
# Act
|
||||
_run(
|
||||
[
|
||||
sys.executable,
|
||||
str(INJECTOR_PY),
|
||||
"--source-dir",
|
||||
str(source_cache),
|
||||
"--output-dir",
|
||||
str(out),
|
||||
"--age-months",
|
||||
"13",
|
||||
]
|
||||
)
|
||||
|
||||
# Assert
|
||||
today = _dt.datetime.now(tz=_dt.timezone.utc).date()
|
||||
target_days = int(round(13 * 30.44))
|
||||
for sidecar in sorted((out / "tiles").rglob("*.json")):
|
||||
data = json.loads(sidecar.read_text())
|
||||
shifted = _dt.date.fromisoformat(data["capture_date"])
|
||||
delta = (today - shifted).days
|
||||
assert abs(delta - target_days) <= 1, (
|
||||
f"sidecar {sidecar}: capture_date offset {delta}d, expected {target_days}d ±1"
|
||||
)
|
||||
|
||||
|
||||
def test_age_injector_rejects_non_positive_months(tmp_path: Path, source_cache: Path) -> None:
|
||||
"""Defensive: zero or negative age_months must error out, not silently no-op."""
|
||||
|
||||
# Arrange
|
||||
out = tmp_path / "rejected"
|
||||
|
||||
# Act + Assert
|
||||
with pytest.raises(subprocess.CalledProcessError) as excinfo:
|
||||
_run(
|
||||
[
|
||||
sys.executable,
|
||||
str(INJECTOR_PY),
|
||||
"--source-dir",
|
||||
str(source_cache),
|
||||
"--output-dir",
|
||||
str(out),
|
||||
"--age-months",
|
||||
"0",
|
||||
]
|
||||
)
|
||||
assert "must be positive" in (excinfo.value.stderr or "")
|
||||
|
||||
|
||||
def test_age_injector_provenance_readme_exists() -> None:
|
||||
"""AC-7: README documents the injector."""
|
||||
|
||||
# Arrange / Act
|
||||
readme = INJECTOR_DIR / "README.md"
|
||||
|
||||
# Assert
|
||||
assert readme.exists()
|
||||
content = readme.read_text()
|
||||
assert "Provenance" in content
|
||||
assert "Reproducibility" in content
|
||||
@@ -0,0 +1,84 @@
|
||||
"""Tests for the AZ-407 cold-boot fixture.
|
||||
|
||||
AC-4 (SITL loads pose within ±1 m) requires SITL which the unit-test
|
||||
layer cannot run; that path is covered by AZ-419's FT-P-11 inside the
|
||||
Docker-bound runner. AZ-407's unit-test obligation is to verify the
|
||||
JSON shape and bounds.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
FIXTURE_PATH = REPO_ROOT / "e2e" / "fixtures" / "cold-boot" / "cold_boot_fixture.json"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def cold_boot() -> dict:
|
||||
return json.loads(FIXTURE_PATH.read_text())
|
||||
|
||||
|
||||
def test_schema_version(cold_boot: dict) -> None:
|
||||
"""The schema field locks the file shape; AZ-419's loader keys off it."""
|
||||
# Assert
|
||||
assert cold_boot["_schema"] == "cold-boot-fixture/v1"
|
||||
|
||||
|
||||
def test_global_position_int_block(cold_boot: dict) -> None:
|
||||
"""GLOBAL_POSITION_INT fields use canonical MAVLink units."""
|
||||
|
||||
# Arrange
|
||||
gpi = cold_boot["global_position_int"]
|
||||
|
||||
# Assert
|
||||
required = {
|
||||
"time_boot_ms",
|
||||
"lat_e7",
|
||||
"lon_e7",
|
||||
"alt_mm",
|
||||
"relative_alt_mm",
|
||||
"vx_cm_s",
|
||||
"vy_cm_s",
|
||||
"vz_cm_s",
|
||||
"hdg_cdeg",
|
||||
}
|
||||
assert required <= set(gpi), f"missing fields: {required - set(gpi)}"
|
||||
assert -90 * 10**7 <= gpi["lat_e7"] <= 90 * 10**7
|
||||
assert -180 * 10**7 <= gpi["lon_e7"] <= 180 * 10**7
|
||||
assert -50_000_000 <= gpi["alt_mm"] <= 50_000_000
|
||||
|
||||
|
||||
def test_attitude_block(cold_boot: dict) -> None:
|
||||
"""Attitude angles fall inside [-pi, pi]."""
|
||||
|
||||
# Arrange
|
||||
att = cold_boot["attitude"]
|
||||
import math
|
||||
|
||||
# Assert
|
||||
for field in ("roll_rad", "pitch_rad", "yaw_rad"):
|
||||
assert -math.pi <= att[field] <= math.pi, f"{field} out of range: {att[field]}"
|
||||
|
||||
|
||||
def test_derkachi_lat_lon_inside_bbox(cold_boot: dict) -> None:
|
||||
"""The frozen pose must be inside the Derkachi route bbox used by C2."""
|
||||
|
||||
# Arrange
|
||||
lat = cold_boot["global_position_int"]["lat_e7"] / 10**7
|
||||
lon = cold_boot["global_position_int"]["lon_e7"] / 10**7
|
||||
|
||||
# Assert
|
||||
assert 50.05 <= lat <= 50.10, f"lat {lat} outside Derkachi bbox"
|
||||
assert 36.10 <= lon <= 36.20, f"lon {lon} outside Derkachi bbox"
|
||||
|
||||
|
||||
def test_provenance_block_present(cold_boot: dict) -> None:
|
||||
"""AC-7: license + provenance fields documented inside the JSON itself."""
|
||||
# Assert
|
||||
assert "_license" in cold_boot
|
||||
assert "_provenance" in cold_boot
|
||||
assert "AZ-419" in cold_boot["_authored_for"][1]
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Tests for the AZ-407 CVE-2025-53644 fixture (AC-6, AC-7)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
GENERATOR = REPO_ROOT / "e2e" / "fixtures" / "security" / "generate_cve_jpeg.py"
|
||||
COMMITTED_FIXTURE = REPO_ROOT / "e2e" / "fixtures" / "security" / "cve-2025-53644.jpg"
|
||||
|
||||
# Pin the committed fixture's SHA-256 so any change to the generator's
|
||||
# byte layout fails the unit test explicitly.
|
||||
COMMITTED_SHA256 = "c281d2f2595916dbbaca8173d2ab37507b6e3c6511aa8e420c1f4e81c877002e"
|
||||
|
||||
|
||||
def _generator_run(out_path: Path) -> None:
|
||||
env = dict(os.environ, PYTHONHASHSEED="0")
|
||||
subprocess.run(
|
||||
[sys.executable, str(GENERATOR), str(out_path)],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
)
|
||||
|
||||
|
||||
def test_generator_is_idempotent(tmp_path: Path) -> None:
|
||||
"""AC-6 / determinism: same call → identical bytes."""
|
||||
|
||||
# Arrange
|
||||
out_a = tmp_path / "a.jpg"
|
||||
out_b = tmp_path / "b.jpg"
|
||||
|
||||
# Act
|
||||
_generator_run(out_a)
|
||||
_generator_run(out_b)
|
||||
|
||||
# Assert
|
||||
assert out_a.read_bytes() == out_b.read_bytes()
|
||||
|
||||
|
||||
def test_committed_fixture_matches_generator(tmp_path: Path) -> None:
|
||||
"""The checked-in JPEG must equal the generator's current output."""
|
||||
|
||||
# Arrange
|
||||
regen = tmp_path / "regen.jpg"
|
||||
|
||||
# Act
|
||||
_generator_run(regen)
|
||||
|
||||
# Assert
|
||||
assert COMMITTED_FIXTURE.exists(), "the AZ-407 deliverable JPEG must be checked in"
|
||||
assert COMMITTED_FIXTURE.read_bytes() == regen.read_bytes(), (
|
||||
"committed cve-2025-53644.jpg drifted from generator output; "
|
||||
"re-run `make fixtures-cve` to regenerate"
|
||||
)
|
||||
assert hashlib.sha256(COMMITTED_FIXTURE.read_bytes()).hexdigest() == COMMITTED_SHA256
|
||||
|
||||
|
||||
def test_jpeg_has_soi_and_truncated_sos() -> None:
|
||||
"""Structural sanity: SOI present, SOS present, NO EOI (truncated stream)."""
|
||||
|
||||
# Arrange
|
||||
data = COMMITTED_FIXTURE.read_bytes()
|
||||
|
||||
# Assert
|
||||
assert data.startswith(b"\xff\xd8"), "missing SOI marker"
|
||||
assert b"\xff\xda" in data, "missing SOS marker"
|
||||
assert not data.endswith(b"\xff\xd9"), "EOI present — CVE truncation is gone"
|
||||
|
||||
|
||||
def test_opencv_rejects_without_crash() -> None:
|
||||
"""AC-6: OpenCV must return a clean None imdecode result, no crash."""
|
||||
|
||||
# Arrange
|
||||
cv2 = pytest.importorskip("cv2", reason="opencv-python not in test venv")
|
||||
import numpy as np # noqa: PLC0415
|
||||
|
||||
# Act
|
||||
buf = np.fromfile(str(COMMITTED_FIXTURE), dtype=np.uint8)
|
||||
img = cv2.imdecode(buf, cv2.IMREAD_COLOR)
|
||||
|
||||
# Assert
|
||||
assert img is None, (
|
||||
"OpenCV decoded the malformed JPEG — the AZ-407 fixture no longer "
|
||||
"exercises the CVE-2025-53644 truncation path"
|
||||
)
|
||||
|
||||
|
||||
def test_provenance_readme_exists() -> None:
|
||||
"""AC-7: README documents source, license, redistribution."""
|
||||
|
||||
# Arrange
|
||||
readme = REPO_ROOT / "e2e" / "fixtures" / "security" / "README.md"
|
||||
|
||||
# Assert
|
||||
assert readme.exists()
|
||||
content = readme.read_text()
|
||||
assert "Provenance" in content
|
||||
assert "Re-distribution" in content
|
||||
assert "License" in content
|
||||
@@ -0,0 +1,47 @@
|
||||
"""Tests for the AZ-407 MAVLink test passkey fixture (AC-5)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
PASSKEY_PATH = REPO_ROOT / "e2e" / "fixtures" / "secrets" / "mavlink-test-passkey.txt"
|
||||
|
||||
|
||||
def _hex_lines(path: Path) -> list[str]:
|
||||
"""Return non-comment, non-blank stripped lines."""
|
||||
out: list[str] = []
|
||||
for raw in path.read_text().splitlines():
|
||||
line = raw.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
out.append(line)
|
||||
return out
|
||||
|
||||
|
||||
def test_passkey_has_comment_header() -> None:
|
||||
"""AC-5: the first line is the human-readable test-only header."""
|
||||
# Arrange
|
||||
first_line = PASSKEY_PATH.read_text().splitlines()[0]
|
||||
# Assert
|
||||
assert first_line.startswith("# TEST ONLY")
|
||||
assert "not for production use" in first_line
|
||||
|
||||
|
||||
def test_passkey_is_64_hex_chars() -> None:
|
||||
"""AC-5: the secret line is exactly 64 hex chars (32 bytes)."""
|
||||
# Arrange
|
||||
lines = _hex_lines(PASSKEY_PATH)
|
||||
# Assert
|
||||
assert len(lines) == 1, f"expected one hex line, got {len(lines)}"
|
||||
secret = lines[0]
|
||||
assert len(secret) == 64, f"passkey length {len(secret)}, expected 64"
|
||||
int(secret, 16) # raises ValueError if not hex
|
||||
|
||||
|
||||
def test_passkey_is_lowercase() -> None:
|
||||
"""Conventionally lowercase so byte-equality comparisons are stable."""
|
||||
# Arrange
|
||||
secret = _hex_lines(PASSKEY_PATH)[0]
|
||||
# Assert
|
||||
assert secret == secret.lower()
|
||||
@@ -0,0 +1,216 @@
|
||||
"""Tests for the AZ-407 tile-cache-builder.
|
||||
|
||||
Covers AC-1 (deterministic), AC-2 (footprint coverage), AC-7 (provenance
|
||||
docs present). FAISS portion gated via importorskip — the production
|
||||
Docker image installs faiss-cpu, but the local venv runs the test fine
|
||||
without it (asserting only manifest + tile-filesystem determinism).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
INPUT_DIR = REPO_ROOT / "_docs" / "00_problem" / "input_data"
|
||||
BUILDER_DIR = REPO_ROOT / "e2e" / "fixtures" / "tile-cache-builder"
|
||||
BUILDER_PY = BUILDER_DIR / "builder.py"
|
||||
|
||||
|
||||
def _run_builder(output_dir: Path) -> dict:
|
||||
"""Invoke builder.py against the project input_data, return summary."""
|
||||
|
||||
env = dict(os.environ)
|
||||
env["PYTHONHASHSEED"] = "0"
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
str(BUILDER_PY),
|
||||
"--input-dir",
|
||||
str(INPUT_DIR),
|
||||
"--output-dir",
|
||||
str(output_dir),
|
||||
"--quiet",
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env=env,
|
||||
)
|
||||
return json.loads(result.stdout)
|
||||
|
||||
|
||||
def _walk_file_hashes(root: Path) -> dict[str, str]:
|
||||
"""Return {relative_path: sha256_hex} for every file under root."""
|
||||
|
||||
hashes: dict[str, str] = {}
|
||||
for path in sorted(root.rglob("*")):
|
||||
if not path.is_file():
|
||||
continue
|
||||
rel = path.relative_to(root).as_posix()
|
||||
hashes[rel] = hashlib.sha256(path.read_bytes()).hexdigest()
|
||||
return hashes
|
||||
|
||||
|
||||
def test_builder_is_deterministic(tmp_path: Path) -> None:
|
||||
"""AC-1: two consecutive runs produce a bit-identical output tree."""
|
||||
|
||||
# Arrange
|
||||
out_a = tmp_path / "run-a"
|
||||
out_b = tmp_path / "run-b"
|
||||
|
||||
# Act
|
||||
summary_a = _run_builder(out_a)
|
||||
summary_b = _run_builder(out_b)
|
||||
|
||||
# Assert
|
||||
assert summary_a["manifest_hash"] == summary_b["manifest_hash"], (
|
||||
f"manifest hash drift: {summary_a['manifest_hash']} vs "
|
||||
f"{summary_b['manifest_hash']} — AC-1 broken"
|
||||
)
|
||||
if summary_a["descriptors_index_hash"] is not None:
|
||||
assert summary_a["descriptors_index_hash"] == summary_b["descriptors_index_hash"], (
|
||||
"FAISS descriptors.index drift between runs — AC-1 broken"
|
||||
)
|
||||
hashes_a = _walk_file_hashes(out_a)
|
||||
hashes_b = _walk_file_hashes(out_b)
|
||||
assert hashes_a == hashes_b, (
|
||||
"Tile filesystem byte-drift between runs — AC-1 broken. "
|
||||
f"diff(a-b)={set(hashes_a) - set(hashes_b)}, "
|
||||
f"diff(b-a)={set(hashes_b) - set(hashes_a)}"
|
||||
)
|
||||
|
||||
|
||||
def test_manifest_covers_60_stills_plus_bbox(tmp_path: Path) -> None:
|
||||
"""AC-2: manifest contains 60 still entries + 1 Derkachi bbox entry."""
|
||||
|
||||
# Arrange
|
||||
out = tmp_path / "run"
|
||||
|
||||
# Act
|
||||
summary = _run_builder(out)
|
||||
|
||||
# Assert
|
||||
assert summary["tile_count"] == 61, (
|
||||
f"expected 60 stills + 1 bbox = 61 rows, got {summary['tile_count']}"
|
||||
)
|
||||
manifest_path = out / "manifest.csv"
|
||||
assert manifest_path.exists()
|
||||
with manifest_path.open() as fp:
|
||||
rows = list(csv.DictReader(fp))
|
||||
assert len(rows) == 61
|
||||
bbox_rows = [r for r in rows if r["provenance"].startswith("STUB_BBOX:derkachi")]
|
||||
assert len(bbox_rows) == 1, "exactly one Derkachi bbox row required"
|
||||
for r in rows:
|
||||
assert float(r["m_per_px"]) >= 0.5, (
|
||||
f"row {r['tile_x']},{r['tile_y']} below 0.5 m/px AC-8.1 floor"
|
||||
)
|
||||
|
||||
|
||||
def test_manifest_schema_matches_restrictions_md(tmp_path: Path) -> None:
|
||||
"""AC-2 / data_model.md alignment: column order is the contract."""
|
||||
|
||||
# Arrange
|
||||
out = tmp_path / "run"
|
||||
_run_builder(out)
|
||||
|
||||
# Act
|
||||
with (out / "manifest.csv").open() as fp:
|
||||
reader = csv.reader(fp)
|
||||
header = next(reader)
|
||||
|
||||
# Assert
|
||||
assert header == [
|
||||
"zoom_level",
|
||||
"tile_x",
|
||||
"tile_y",
|
||||
"capture_date",
|
||||
"source",
|
||||
"m_per_px",
|
||||
"jpeg_path",
|
||||
"content_hash",
|
||||
"provenance",
|
||||
]
|
||||
|
||||
|
||||
def test_real_tile_count_matches_paired_gmaps(tmp_path: Path) -> None:
|
||||
"""AC-2: every `_gmaps.png` reference becomes a `source=googlemaps` row."""
|
||||
|
||||
# Arrange
|
||||
out = tmp_path / "run"
|
||||
|
||||
# Act
|
||||
summary = _run_builder(out)
|
||||
|
||||
# Assert
|
||||
paired_count = len(list(INPUT_DIR.glob("AD*_gmaps.png")))
|
||||
assert summary["real_count"] == paired_count, (
|
||||
f"paired _gmaps.png files: {paired_count}, real rows: {summary['real_count']}"
|
||||
)
|
||||
assert summary["paired_gmaps_count"] == paired_count
|
||||
|
||||
|
||||
def test_sidecar_json_per_tile(tmp_path: Path) -> None:
|
||||
"""data_model.md § 2.1.2: every tile JPEG has a matching JSON sidecar."""
|
||||
|
||||
# Arrange
|
||||
out = tmp_path / "run"
|
||||
_run_builder(out)
|
||||
|
||||
# Act
|
||||
jpgs = sorted((out / "tiles").rglob("*.jpg"))
|
||||
jsons = sorted((out / "tiles").rglob("*.json"))
|
||||
|
||||
# Assert
|
||||
assert len(jpgs) == len(jsons) > 0
|
||||
for jpg, sidecar in zip(jpgs, jsons, strict=True):
|
||||
assert jpg.with_suffix(".json") == sidecar
|
||||
data = json.loads(sidecar.read_text())
|
||||
assert {"zoom_level", "tile_x", "tile_y", "capture_date", "source"} <= set(data)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not BUILDER_DIR.joinpath("README.md").exists(),
|
||||
reason="builder README is the AC-7 provenance doc",
|
||||
)
|
||||
def test_provenance_readme_lists_required_sections() -> None:
|
||||
"""AC-7: README documents source URL/synthetic, license, redistribution."""
|
||||
|
||||
# Arrange
|
||||
readme = (BUILDER_DIR / "README.md").read_text()
|
||||
|
||||
# Assert
|
||||
for required in ("Provenance", "License", "Reproducibility", "License-Expression: MIT".split(":")[0]):
|
||||
# accept "Provenance" as a section header OR "License" header
|
||||
if required == "Provenance":
|
||||
assert "## Provenance" in readme or "## Provenance (AC-7)" in readme
|
||||
elif required == "License":
|
||||
assert "License" in readme or "license" in readme
|
||||
elif required == "Reproducibility":
|
||||
assert "Reproducibility" in readme
|
||||
|
||||
|
||||
def test_faiss_index_emitted_when_faiss_available(tmp_path: Path) -> None:
|
||||
"""AC-1: descriptors.index is bit-stable across runs (FAISS gate)."""
|
||||
|
||||
# Arrange
|
||||
pytest.importorskip("faiss", reason="faiss-cpu not in test venv")
|
||||
out = tmp_path / "run"
|
||||
|
||||
# Act
|
||||
summary = _run_builder(out)
|
||||
|
||||
# Assert
|
||||
assert summary["descriptors_index_hash"] is not None, (
|
||||
"faiss-cpu IS importable but builder produced no descriptors.index"
|
||||
)
|
||||
index_path = out / "descriptors.index"
|
||||
assert index_path.exists()
|
||||
assert index_path.stat().st_size > 0
|
||||
@@ -0,0 +1,356 @@
|
||||
"""Tests for the AZ-444 Tier-2 harness scripts.
|
||||
|
||||
The scripts themselves can only be END-TO-END validated on a real Jetson
|
||||
host; unit tests cover:
|
||||
|
||||
* CLI flag parsing (rejects bad combos, accepts valid combos)
|
||||
* --dry-run mode emits the expected ssh/docker command sequence
|
||||
* Selector parity: same `-k <expr>` flag produces a pytest invocation
|
||||
with the same `-k` argument on both Tier-1 and Tier-2
|
||||
* AC-6 reflash gating: --reflash without TIER2_REFLASH_ACK=1 refuses
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
TIER1_SH = REPO_ROOT / "e2e" / "docker" / "run-tier1.sh"
|
||||
TIER2_SH = REPO_ROOT / "e2e" / "jetson" / "run-tier2.sh"
|
||||
ON_JETSON_SH = REPO_ROOT / "e2e" / "jetson" / "tier2-on-jetson.sh"
|
||||
|
||||
# Skip all tests in this module when bash isn't available.
|
||||
pytestmark = pytest.mark.skipif(
|
||||
shutil.which("bash") is None,
|
||||
reason="bash not available in this environment",
|
||||
)
|
||||
|
||||
|
||||
def _run(args: list[str], env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
|
||||
"""Invoke a script and return the completed process (no `check=True`)."""
|
||||
|
||||
full_env = dict(os.environ)
|
||||
if env:
|
||||
full_env.update(env)
|
||||
return subprocess.run(args, capture_output=True, text=True, env=full_env)
|
||||
|
||||
|
||||
# ───────── Existence + executable bit ─────────
|
||||
|
||||
|
||||
@pytest.mark.parametrize("script", [TIER1_SH, TIER2_SH, ON_JETSON_SH])
|
||||
def test_script_exists_and_executable(script: Path) -> None:
|
||||
# Assert
|
||||
assert script.exists(), f"missing script: {script}"
|
||||
assert os.access(script, os.X_OK), f"script not executable: {script}"
|
||||
|
||||
|
||||
# ───────── CLI parsing — happy paths ─────────
|
||||
|
||||
|
||||
def test_tier1_dry_run_emits_compose_command() -> None:
|
||||
"""Tier-1 --dry-run prints the docker-compose invocation."""
|
||||
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER1_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"--dry-run",
|
||||
]
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "docker compose" in proc.stdout
|
||||
assert "docker-compose.test.yml" in proc.stdout
|
||||
assert "TIER=tier1-workstation" in proc.stdout
|
||||
assert "e2e-runner" in proc.stdout
|
||||
|
||||
|
||||
def test_tier2_dry_run_local_mode() -> None:
|
||||
"""Tier-2 --dry-run on local mode shows the delegate command."""
|
||||
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": "localhost"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "tier2-on-jetson.sh" in proc.stdout
|
||||
assert "(local)" in proc.stdout, "local mode marker missing"
|
||||
|
||||
|
||||
def test_tier2_dry_run_remote_mode() -> None:
|
||||
"""Tier-2 --dry-run with TIER2_HOST set ssh's via the delegate."""
|
||||
|
||||
# Arrange
|
||||
fake_key = REPO_ROOT / "e2e" / "_unit_tests" / "jetson" / "_fake_key.tmp"
|
||||
fake_key.write_text("fake")
|
||||
try:
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"inav",
|
||||
"--vio-strategy",
|
||||
"klt_ransac",
|
||||
"--dry-run",
|
||||
],
|
||||
env={
|
||||
"TIER2_HOST": "jetson-test-01.internal",
|
||||
"TIER2_USER": "azaion",
|
||||
"TIER2_KEY_PATH": str(fake_key),
|
||||
},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "ssh -o StrictHostKeyChecking=accept-new" in proc.stdout
|
||||
assert "azaion@jetson-test-01.internal" in proc.stdout
|
||||
assert "rsync" in proc.stdout
|
||||
assert "tier2-on-jetson.sh" in proc.stdout
|
||||
finally:
|
||||
fake_key.unlink(missing_ok=True)
|
||||
|
||||
|
||||
# ───────── CLI parsing — rejection paths ─────────
|
||||
|
||||
|
||||
def test_tier2_rejects_unknown_fc_adapter() -> None:
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"px4",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": "localhost"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 2
|
||||
assert "--fc-adapter must be ardupilot or inav" in proc.stderr
|
||||
|
||||
|
||||
def test_tier2_rejects_unknown_vio_strategy() -> None:
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"msckf",
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": "localhost"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 2
|
||||
assert "--vio-strategy must be" in proc.stderr
|
||||
|
||||
|
||||
def test_tier2_rejects_unknown_build_kind() -> None:
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"--build-kind",
|
||||
"debug",
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": "localhost"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 2
|
||||
assert "--build-kind must be production or asan" in proc.stderr
|
||||
|
||||
|
||||
def test_tier2_requires_tier2_host_on_non_arm() -> None:
|
||||
"""Without TIER2_HOST set on a non-aarch64 host, the script errors."""
|
||||
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": ""},
|
||||
)
|
||||
|
||||
# Assert — exit 5 unless we're actually on aarch64 (in which case
|
||||
# localhost gets auto-selected and the script proceeds).
|
||||
if os.uname().machine == "aarch64":
|
||||
assert proc.returncode == 0
|
||||
else:
|
||||
assert proc.returncode == 5
|
||||
assert "TIER2_HOST must be set" in proc.stderr
|
||||
|
||||
|
||||
# ───────── AC-6: reflash gating ─────────
|
||||
|
||||
|
||||
def test_reflash_refuses_without_ack() -> None:
|
||||
"""--reflash without TIER2_REFLASH_ACK=1 must refuse to proceed."""
|
||||
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"--reflash",
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": "localhost"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 4
|
||||
assert "TIER2_REFLASH_ACK=1" in proc.stderr
|
||||
|
||||
|
||||
def test_reflash_dry_run_with_ack_shows_flash_command() -> None:
|
||||
"""--reflash with the ack present shows the sdkmanager command on --dry-run."""
|
||||
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"--reflash",
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": "localhost", "TIER2_REFLASH_ACK": "1"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
assert "nvidia-sdkmanager-cli flash" in proc.stdout
|
||||
|
||||
|
||||
# ───────── AC-1: selector parity ─────────
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"selector,tier_args,expected_in_stdout",
|
||||
[
|
||||
("not_tier2_only", "tier1", "TIER=tier1-workstation"),
|
||||
("FT_P", "tier2", "JETSON_HOST=localhost"),
|
||||
],
|
||||
)
|
||||
def test_selector_appears_in_dry_run(
|
||||
selector: str, tier_args: str, expected_in_stdout: str
|
||||
) -> None:
|
||||
"""The same -k selector arg surfaces in both tier dry-runs."""
|
||||
|
||||
# Arrange
|
||||
script = TIER1_SH if tier_args == "tier1" else TIER2_SH
|
||||
|
||||
# Act
|
||||
proc = _run(
|
||||
[
|
||||
str(script),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"-k",
|
||||
selector,
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": "localhost"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
# The Tier-1 selector appears directly in the printed pytest arg
|
||||
# list; the Tier-2 selector is forwarded via SELECTOR= env var into
|
||||
# the delegate, which then puts it on the pytest cmdline. Both
|
||||
# variations end up containing the selector string.
|
||||
assert selector in proc.stdout, (
|
||||
f"selector '{selector}' not present in {script.name} dry-run output"
|
||||
)
|
||||
assert expected_in_stdout in proc.stdout
|
||||
|
||||
|
||||
def test_selector_parity_pytest_args_equivalent() -> None:
|
||||
"""Tier-1 and Tier-2 dry-runs both compose `-k <selector>` into the
|
||||
pytest argv. We extract the `-k` arg from each and assert they
|
||||
match.
|
||||
"""
|
||||
|
||||
# Arrange
|
||||
selector = "FT_P_09_AP and not asan"
|
||||
|
||||
# Act
|
||||
p1 = _run(
|
||||
[
|
||||
str(TIER1_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"-k",
|
||||
selector,
|
||||
"--dry-run",
|
||||
]
|
||||
)
|
||||
p2 = _run(
|
||||
[
|
||||
str(TIER2_SH),
|
||||
"--fc-adapter",
|
||||
"ardupilot",
|
||||
"--vio-strategy",
|
||||
"okvis2",
|
||||
"-k",
|
||||
selector,
|
||||
"--dry-run",
|
||||
],
|
||||
env={"TIER2_HOST": "localhost"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert p1.returncode == 0 and p2.returncode == 0
|
||||
# Tier-1 shows `-k <selector>` directly in the dry-run output.
|
||||
assert f"-k {selector}" in p1.stdout
|
||||
# Tier-2 forwards via SELECTOR=<selector> env var.
|
||||
assert f"SELECTOR={selector}" in p2.stdout
|
||||
@@ -148,6 +148,22 @@ def test_build_row_records_evidence_paths() -> None:
|
||||
assert row["evidence_paths"] == "evidence/a.tlog,evidence/b.csv"
|
||||
|
||||
|
||||
def test_build_row_pass_when_no_session_attribute() -> None:
|
||||
"""The PARTIAL propagation path swallows AttributeError on a fake item.
|
||||
|
||||
AZ-445: when nfr_recorder is loaded the result column may flip to
|
||||
PARTIAL; when it isn't (or when item.session is missing — unit-test
|
||||
fake context), the row stays PASS.
|
||||
"""
|
||||
# Arrange — fake item without .session
|
||||
item = _FakeItem()
|
||||
report = _report("passed")
|
||||
# Act
|
||||
row = build_row(item, report, "2026-05-16T10:00:00+00:00", 1)
|
||||
# Assert
|
||||
assert row["result"] == "PASS", "no aggregator available → result must be PASS"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# In-process plugin integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,305 @@
|
||||
"""Tests for the AZ-445 NFR recorder + run-end aggregator."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.reporting import nfr_recorder
|
||||
from runner.reporting.nfr_recorder import (
|
||||
_RunAggregator,
|
||||
parse_traceability_matrix,
|
||||
)
|
||||
|
||||
|
||||
# ───────────────────── traceability matrix parser ─────────────────────
|
||||
|
||||
|
||||
def test_parse_traceability_matrix_extracts_ac_ids(tmp_path: Path) -> None:
|
||||
"""Every row prefixed by an `AC-…` or `RESTRICT-…` token is captured."""
|
||||
|
||||
# Arrange
|
||||
matrix = tmp_path / "matrix.md"
|
||||
matrix.write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
## Acceptance Criteria Coverage
|
||||
|
||||
| AC ID | Description | Source | Status |
|
||||
|-------|-------------|--------|--------|
|
||||
| AC-1.1 | something | FT-P-01 | Covered |
|
||||
| AC-7.1 | nope | — | NOT COVERED |
|
||||
| RESTRICT-CAM-2 | restriction | NFT-SEC-01 | Covered |
|
||||
|
||||
text in between (no row).
|
||||
|
||||
| AC-NEW-3 | another | NFT-LIM-02 | Covered |
|
||||
"""
|
||||
).strip()
|
||||
)
|
||||
|
||||
# Act
|
||||
ids = parse_traceability_matrix(matrix)
|
||||
|
||||
# Assert
|
||||
assert ids == sorted(["AC-1.1", "AC-7.1", "RESTRICT-CAM-2", "AC-NEW-3"])
|
||||
|
||||
|
||||
def test_parse_traceability_matrix_missing_file(tmp_path: Path) -> None:
|
||||
"""Missing matrix file surfaces as a clear FileNotFoundError."""
|
||||
# Act + Assert
|
||||
with pytest.raises(FileNotFoundError):
|
||||
parse_traceability_matrix(tmp_path / "does-not-exist.md")
|
||||
|
||||
|
||||
# ───────────────────── aggregator: per-scenario state ─────────────────────
|
||||
|
||||
|
||||
def _aggregator(tmp_path: Path, matrix_ids: list[str]) -> _RunAggregator:
|
||||
return _RunAggregator(tmp_path, matrix_ids)
|
||||
|
||||
|
||||
def test_aggregator_records_metric_and_partial(tmp_path: Path) -> None:
|
||||
"""ensure_record → record_metric → mark_partial round-trips into _records."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-1.1", "AC-4.1"])
|
||||
rec = agg.ensure_record(
|
||||
scenario_id="NFT-PERF-01", nodeid="test_x", traces_to=("AC-4.1",)
|
||||
)
|
||||
|
||||
# Act
|
||||
agg.record_metric(
|
||||
scenario_id=rec.scenario_id,
|
||||
name="latency_p95_ms",
|
||||
value=380.4,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_x",
|
||||
)
|
||||
agg.mark_partial(
|
||||
scenario_id=rec.scenario_id,
|
||||
ac_id="AC-4.1",
|
||||
reason="exceeds 400ms in chamber",
|
||||
nodeid="test_x",
|
||||
)
|
||||
agg.set_outcome("test_x", "PASS")
|
||||
|
||||
# Assert
|
||||
[stored] = agg.records()
|
||||
assert stored.metrics["latency_p95_ms"] == {"value": 380.4, "ac_id": "AC-4.1"}
|
||||
assert stored.partial_acs == {"AC-4.1": "exceeds 400ms in chamber"}
|
||||
assert stored.outcome == "PASS"
|
||||
|
||||
|
||||
# ───────────────────── aggregator: emission ─────────────────────
|
||||
|
||||
|
||||
def test_emit_per_nfr_json_writes_one_file_per_scenario(tmp_path: Path) -> None:
|
||||
"""AC-1: per-NFR JSON emitted for each recorded scenario."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-4.1"])
|
||||
agg.ensure_record("NFT-PERF-01", "test_a", ("AC-4.1",))
|
||||
agg.ensure_record("NFT-PERF-02", "test_b", ("AC-4.4",))
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p95_ms",
|
||||
value=380.4,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.set_outcome("test_a", "PASS")
|
||||
agg.set_outcome("test_b", "PASS")
|
||||
|
||||
# Act
|
||||
paths = agg.emit_per_nfr_json()
|
||||
|
||||
# Assert
|
||||
assert len(paths) == 2
|
||||
assert {p.name for p in paths} == {"NFT-PERF-01.json", "NFT-PERF-02.json"}
|
||||
blob_a = json.loads((tmp_path / "per-nfr" / "NFT-PERF-01.json").read_text())
|
||||
assert blob_a["scenario_id"] == "NFT-PERF-01"
|
||||
assert blob_a["outcome"] == "PASS"
|
||||
assert blob_a["traces_to"] == ["AC-4.1"]
|
||||
assert blob_a["metrics"]["latency_p95_ms"]["value"] == 380.4
|
||||
|
||||
|
||||
def test_emit_traceability_status_classifies_acs(tmp_path: Path) -> None:
|
||||
"""AC-2: every matrix AC ID appears with status + sources."""
|
||||
|
||||
# Arrange — matrix has 3 ACs. One scenario covers AC-1.1 (PASS) +
|
||||
# AC-4.1 (PARTIAL). A second scenario covers AC-1.1 (PASS).
|
||||
# AC-NEW-3 has no tracing scenario.
|
||||
agg = _aggregator(tmp_path, ["AC-1.1", "AC-4.1", "AC-NEW-3"])
|
||||
agg.ensure_record("FT-P-01", "test_p01", ("AC-1.1",))
|
||||
agg.ensure_record("FT-P-01-dup", "test_p01b", ("AC-1.1",))
|
||||
agg.ensure_record("NFT-PERF-01", "test_perf01", ("AC-4.1",))
|
||||
agg.mark_partial(
|
||||
scenario_id="NFT-PERF-01",
|
||||
ac_id="AC-4.1",
|
||||
reason="exceeds threshold under chamber",
|
||||
nodeid="test_perf01",
|
||||
)
|
||||
agg.set_outcome("test_p01", "PASS")
|
||||
agg.set_outcome("test_p01b", "PASS")
|
||||
agg.set_outcome("test_perf01", "PASS")
|
||||
|
||||
# Act
|
||||
status = agg.compute_traceability_status()
|
||||
emitted_path = agg.emit_traceability_status()
|
||||
|
||||
# Assert
|
||||
assert status["AC-1.1"]["status"] == "Covered"
|
||||
assert sorted(status["AC-1.1"]["sources"]) == ["FT-P-01", "FT-P-01-dup"]
|
||||
assert status["AC-4.1"]["status"] == "PARTIAL"
|
||||
assert status["AC-4.1"]["sources"] == ["NFT-PERF-01"]
|
||||
assert status["AC-NEW-3"]["status"] == "NOT COVERED"
|
||||
assert status["AC-NEW-3"]["sources"] == []
|
||||
persisted = json.loads(emitted_path.read_text())
|
||||
assert persisted == status
|
||||
|
||||
|
||||
def test_emit_traceability_status_downgrades_on_fail(tmp_path: Path) -> None:
|
||||
"""A FAILing test tracing to an AC keeps the AC out of Covered."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-1.1"])
|
||||
agg.ensure_record("FT-P-01", "test_p01", ("AC-1.1",))
|
||||
agg.set_outcome("test_p01", "FAIL")
|
||||
|
||||
# Act
|
||||
status = agg.compute_traceability_status()
|
||||
|
||||
# Assert
|
||||
# Per AZ-445 AC-2 the status enum is {Covered, PARTIAL, NOT COVERED}.
|
||||
# A FAIL is downgraded to PARTIAL (it's covered by a scenario but
|
||||
# the scenario didn't pass).
|
||||
assert status["AC-1.1"]["status"] == "PARTIAL"
|
||||
|
||||
|
||||
def test_emit_regression_baseline_dumps_numeric_metrics(tmp_path: Path) -> None:
|
||||
"""AC-3: regression-baseline.json contains every numeric metric per scenario."""
|
||||
|
||||
# Arrange
|
||||
agg = _aggregator(tmp_path, ["AC-4.1"])
|
||||
agg.ensure_record("NFT-PERF-01", "test_a", ("AC-4.1",))
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p95_ms",
|
||||
value=380.4,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="latency_p99_ms",
|
||||
value=420.7,
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.record_metric(
|
||||
scenario_id="NFT-PERF-01",
|
||||
name="extra_meta",
|
||||
value={"k": "v"}, # non-numeric — dropped from baseline
|
||||
ac_id="AC-4.1",
|
||||
nodeid="test_a",
|
||||
)
|
||||
agg.set_outcome("test_a", "PASS")
|
||||
|
||||
# Act
|
||||
path = agg.emit_regression_baseline()
|
||||
|
||||
# Assert
|
||||
blob = json.loads(path.read_text())
|
||||
assert blob["scenarios"]["NFT-PERF-01"]["metrics"] == {
|
||||
"latency_p95_ms": 380.4,
|
||||
"latency_p99_ms": 420.7,
|
||||
}
|
||||
assert blob["scenarios"]["NFT-PERF-01"]["outcome"] == "PASS"
|
||||
assert "extra_meta" not in blob["scenarios"]["NFT-PERF-01"]["metrics"]
|
||||
|
||||
|
||||
# ───────────────────── integration with pytest plugin ─────────────────────
|
||||
|
||||
|
||||
def test_nfr_recorder_fixture_emits_artifacts_in_run(tmp_path: Path) -> None:
|
||||
"""End-to-end: invoke an in-process pytest run, assert artifacts exist.
|
||||
|
||||
The inner test calls `nfr_recorder.record_metric` + `partial` and
|
||||
asserts PASS. The outer test (this one) checks that the run emitted
|
||||
per-nfr/<id>.json, traceability-status.json, and
|
||||
regression-baseline.json into the evidence dir.
|
||||
"""
|
||||
|
||||
# Arrange
|
||||
matrix = tmp_path / "matrix.md"
|
||||
matrix.write_text(
|
||||
"## Acceptance Criteria Coverage\n\n"
|
||||
"| AC ID | Desc | Source | Status |\n"
|
||||
"|-------|------|--------|--------|\n"
|
||||
"| AC-4.1 | foo | NFT-PERF-01 | Covered |\n"
|
||||
"| AC-4.2 | bar | NFT-PERF-02 | Covered |\n"
|
||||
)
|
||||
evidence_out = tmp_path / "evidence"
|
||||
evidence_out.mkdir()
|
||||
|
||||
inner = tmp_path / "test_inner.py"
|
||||
inner.write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
import pytest
|
||||
|
||||
@pytest.mark.scenario_id("NFT-PERF-01")
|
||||
@pytest.mark.traces_to(("AC-4.1",))
|
||||
def test_inner_perf(nfr_recorder):
|
||||
nfr_recorder.record_metric("latency_p95_ms", 380.4, ac_id="AC-4.1")
|
||||
nfr_recorder.partial("AC-4.1", "exceeds threshold")
|
||||
"""
|
||||
)
|
||||
)
|
||||
# Minimal conftest registering only `--evidence-out` so nfr_recorder
|
||||
# has a place to write. (The real harness's conftest is heavy; we
|
||||
# don't want to drag it in.)
|
||||
(tmp_path / "conftest.py").write_text(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--evidence-out",
|
||||
action="store",
|
||||
default=".",
|
||||
)
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
# Act
|
||||
rc = pytest.main(
|
||||
[
|
||||
"-p",
|
||||
"runner.reporting.csv_reporter",
|
||||
"-p",
|
||||
"runner.reporting.nfr_recorder",
|
||||
str(inner),
|
||||
f"--evidence-out={evidence_out}",
|
||||
f"--traceability-matrix={matrix}",
|
||||
"--no-header",
|
||||
"-q",
|
||||
]
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert rc == 0, f"inner pytest run failed with rc={rc}"
|
||||
per_nfr = evidence_out / "per-nfr" / "NFT-PERF-01.json"
|
||||
assert per_nfr.exists()
|
||||
blob = json.loads(per_nfr.read_text())
|
||||
assert blob["scenario_id"] == "NFT-PERF-01"
|
||||
assert blob["partial_acs"] == {"AC-4.1": "exceeds threshold"}
|
||||
status = json.loads((evidence_out / "traceability-status.json").read_text())
|
||||
assert status["AC-4.1"]["status"] == "PARTIAL"
|
||||
assert status["AC-4.2"]["status"] == "NOT COVERED"
|
||||
baseline = json.loads((evidence_out / "regression-baseline.json").read_text())
|
||||
assert baseline["scenarios"]["NFT-PERF-01"]["metrics"] == {"latency_p95_ms": 380.4}
|
||||
@@ -22,7 +22,9 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
|
||||
"docker/docker-compose.test.yml",
|
||||
"docker/docker-compose.tier2-bridge.yml",
|
||||
"docker/secrets/mavlink_passkey",
|
||||
"docker/run-tier1.sh",
|
||||
"jetson/run-tier2.sh",
|
||||
"jetson/tier2-on-jetson.sh",
|
||||
"jetson/tier2.service",
|
||||
"jetson/tegrastats_parser.py",
|
||||
"jetson/jtop_parser.py",
|
||||
@@ -32,6 +34,7 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
|
||||
"runner/conftest.py",
|
||||
"runner/reporting/csv_reporter.py",
|
||||
"runner/reporting/evidence_bundler.py",
|
||||
"runner/reporting/nfr_recorder.py",
|
||||
"runner/helpers/frame_source_replay.py",
|
||||
"runner/helpers/imu_replay.py",
|
||||
"runner/helpers/sitl_observer.py",
|
||||
@@ -42,14 +45,21 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
|
||||
"fixtures/mock-suite-sat/app.py",
|
||||
"fixtures/mock-suite-sat/requirements.txt",
|
||||
"fixtures/tile-cache-builder/README.md",
|
||||
"fixtures/tile-cache-builder/builder.py",
|
||||
"fixtures/tile-cache-builder/Dockerfile",
|
||||
"fixtures/tile-cache-builder/build.sh",
|
||||
"fixtures/age-injector/README.md",
|
||||
"fixtures/age-injector/age_injector.py",
|
||||
"fixtures/age-injector/inject.sh",
|
||||
"fixtures/injectors/outlier.py",
|
||||
"fixtures/injectors/blackout_spoof.py",
|
||||
"fixtures/injectors/multi_segment.py",
|
||||
"fixtures/injectors/cold_boot.py",
|
||||
"fixtures/cold-boot/README.md",
|
||||
"fixtures/cold-boot/cold_boot_fixture.json",
|
||||
"fixtures/secrets/mavlink-test-passkey.txt",
|
||||
"fixtures/security/generate_cve_jpeg.py",
|
||||
"fixtures/security/cve-2025-53644.jpg",
|
||||
"fixtures/security/README.md",
|
||||
"tests/__init__.py",
|
||||
"tests/conftest.py",
|
||||
@@ -63,19 +73,35 @@ E2E_ROOT = Path(__file__).resolve().parents[1]
|
||||
],
|
||||
)
|
||||
def test_required_path_exists(relative_path: str) -> None:
|
||||
"""Each path AZ-406 commits to must exist on disk."""
|
||||
"""Each path AZ-406 + AZ-407 + AZ-444 + AZ-445 commit to must exist on disk."""
|
||||
assert (E2E_ROOT / relative_path).exists(), (
|
||||
f"AZ-406 layout invariant broken: e2e/{relative_path} is missing"
|
||||
f"layout invariant broken: e2e/{relative_path} is missing"
|
||||
)
|
||||
|
||||
|
||||
def test_passkey_files_match() -> None:
|
||||
"""Docker secret and runner-side passkey fixture must hold the same bytes."""
|
||||
"""Docker secret and runner-side passkey fixture must encode the same secret.
|
||||
|
||||
The docker-secret file is consumed by mavproxy as a raw 64-hex passkey
|
||||
(no comments allowed in its body). The runner-side fixture file is the
|
||||
AZ-407 AC-5 deliverable and ships with a ``# TEST ONLY...`` header
|
||||
line so it self-documents during code review.
|
||||
|
||||
We therefore compare the FIRST 64-hex line of each file rather than
|
||||
the raw bytes. The two files MUST encode the same 32-byte secret;
|
||||
drift between them would mean a mavproxy run uses a different key
|
||||
than the runner fixture states.
|
||||
"""
|
||||
|
||||
# Arrange
|
||||
docker_pk = (E2E_ROOT / "docker/secrets/mavlink_passkey").read_bytes()
|
||||
runner_pk = (E2E_ROOT / "fixtures/secrets/mavlink-test-passkey.txt").read_bytes()
|
||||
docker_pk = (E2E_ROOT / "docker/secrets/mavlink_passkey").read_text().strip().splitlines()
|
||||
runner_pk_lines = (E2E_ROOT / "fixtures/secrets/mavlink-test-passkey.txt").read_text().strip().splitlines()
|
||||
runner_pk = [line for line in runner_pk_lines if not line.lstrip().startswith("#")]
|
||||
|
||||
# Assert
|
||||
assert docker_pk == runner_pk, (
|
||||
"MAVLink test passkey bytes differ between docker secret and runner "
|
||||
"fixture. They MUST be kept in sync — see e2e/fixtures/secrets/README.md."
|
||||
assert docker_pk and runner_pk, "passkey files must contain at least one non-comment line"
|
||||
assert docker_pk[0] == runner_pk[0], (
|
||||
"MAVLink test passkey secrets differ between docker secret and runner "
|
||||
"fixture. They MUST encode the same 32-byte secret — see "
|
||||
"e2e/fixtures/secrets/README.md."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user