mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 20:11:15 +00:00
6599d828d2
Three blackbox-harness tasks landed together — all depend only on
AZ-406 and unblock the FT-* / NFT-* scenario tasks scheduled for
batches 69+.
AZ-407 — Static fixture builders (3pt):
* tile-cache-builder/{builder.py, Dockerfile, build.sh} produces a
deterministic tile-cache-fixture Docker volume from
_docs/00_problem/input_data/. Reproducibility primitives: sorted
iteration, frozen PIL JPEG settings, FAISS HNSW32 built single-
threaded with seeded stub descriptors.
* age-injector/{age_injector.py, inject.sh} clones the volume and
shifts capture_date by N×30.44 days; tile JPEG bytes preserved
bit-identical. Emits synth-age-7mo + synth-age-13mo volumes.
* cold-boot/cold_boot_fixture.json: frozen FC pose snapshot at
Derkachi sector centre, schema v1.
* secrets/mavlink-test-passkey.txt: 64-hex with required
`# TEST ONLY` header line per AC-5. Passkey-equality test now
compares the secret line after stripping the header.
* security/cve-2025-53644.jpg: synthetic 158-byte malformed JPEG
(truncated SOS marker). OpenCV 4.11.x rejects gracefully with
imdecode → None. AZ-439 will sharpen for ASan instrumentation.
* Top-level Makefile with `make fixtures` / `make fixtures-*` /
`make e2e-tier1*` / `make unit-tests` targets.
AZ-444 — Tier-2 Jetson harness wrapper (5pt):
* run-tier2.sh rewritten as orchestrator. Detects local
(aarch64 + TIER2_HOST=localhost) vs remote (ssh into TIER2_HOST).
New flags: -k/--selector, --build-kind production|asan,
--reflash (gated behind TIER2_REFLASH_ACK=1 two-key gate),
--dry-run.
* tier2-on-jetson.sh (new) — on-device delegate. Verifies
gps-denied-onboard{,-asan}.service health; restarts with 5s
tolerance; spawns tegrastats + jtop parallel samplers; tails
ASan unit's journal in asan mode; drives docker compose with
TIER=tier2-jetson; forwards SELECTOR to pytest -k.
* docker/run-tier1.sh (new) — selector-parity sibling.
* AC-1 (selector parity) and AC-6 (reflash gating) unit-tested via
--dry-run output assertions. AC-2/AC-3/AC-4/AC-5 are hardware-
loop ACs verified by the Tier-2 runtime smoke (no Jetson in the
unit-test layer).
AZ-445 — CSV reporter + evidence bundler refinements (2pt):
* reporting/nfr_recorder.py (new) — pytest plugin. Provides the
`nfr_recorder` fixture with record_metric(name, value, ac_id)
and partial(ac_id, reason). At session end emits:
- per-nfr/<scenario_id>.json (AC-1)
- traceability-status.json with every AC ID parsed from
traceability-matrix.md, classified Covered/PARTIAL/NOT
COVERED with source scenario IDs (AC-2)
- regression-baseline.json with all numeric metrics (AC-3)
* csv_reporter.py extended — `_outcome_to_result` consults the
aggregator; rows flip PASS → PARTIAL when an AC was marked
PARTIAL by nfr_recorder (AC-4). Graceful fallback when
aggregator isn't registered (unit-test contexts).
* conftest.py registers nfr_recorder in pytest_plugins.
* New --traceability-matrix CLI flag seeds the NOT COVERED rows.
Build / config:
* pyproject.toml dev extras: added Pillow>=10.4,<13.0 for the
tile-cache-builder unit test (broad enough to keep torchvision's
Pillow 12 pin happy; the production builder runs inside its own
Docker image with its own pin).
* Updated test_directory_layout.py to cover 10 new files + replaced
the byte-equal passkey assertion with the header-stripping
variant.
Test results:
* 157 focused tests pass (was 97 in batch 67; +60 new across this
batch). No regressions.
Module-layout / spec drift:
* AZ-407 spec text says `tests/fixtures/...`; module-layout
blackbox_tests entry (commit d7a17a8) authoritatively places the
harness under `e2e/`. Implementation followed the layout entry.
* AZ-444 spec mentions `e2e/tier2/run-tier2.sh`; AZ-406 placed it
at `e2e/jetson/run-tier2.sh`. Kept at `e2e/jetson/` for
consistency.
* Cold-boot README ownership: corrected from AZ-419 to AZ-407 per
AZ-419's own Dependencies field.
Specs archived to _docs/02_tasks/done/. Jira tickets transitioned to
In Testing on commit.
Co-authored-by: Cursor <cursoragent@cursor.com>
357 lines
9.3 KiB
Python
357 lines
9.3 KiB
Python
"""Tests for the AZ-444 Tier-2 harness scripts.
|
|
|
|
The scripts themselves can only be END-TO-END validated on a real Jetson
|
|
host; unit tests cover:
|
|
|
|
* CLI flag parsing (rejects bad combos, accepts valid combos)
|
|
* --dry-run mode emits the expected ssh/docker command sequence
|
|
* Selector parity: same `-k <expr>` flag produces a pytest invocation
|
|
with the same `-k` argument on both Tier-1 and Tier-2
|
|
* AC-6 reflash gating: --reflash without TIER2_REFLASH_ACK=1 refuses
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[3]
|
|
TIER1_SH = REPO_ROOT / "e2e" / "docker" / "run-tier1.sh"
|
|
TIER2_SH = REPO_ROOT / "e2e" / "jetson" / "run-tier2.sh"
|
|
ON_JETSON_SH = REPO_ROOT / "e2e" / "jetson" / "tier2-on-jetson.sh"
|
|
|
|
# Skip all tests in this module when bash isn't available.
|
|
pytestmark = pytest.mark.skipif(
|
|
shutil.which("bash") is None,
|
|
reason="bash not available in this environment",
|
|
)
|
|
|
|
|
|
def _run(args: list[str], env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
|
|
"""Invoke a script and return the completed process (no `check=True`)."""
|
|
|
|
full_env = dict(os.environ)
|
|
if env:
|
|
full_env.update(env)
|
|
return subprocess.run(args, capture_output=True, text=True, env=full_env)
|
|
|
|
|
|
# ───────── Existence + executable bit ─────────
|
|
|
|
|
|
@pytest.mark.parametrize("script", [TIER1_SH, TIER2_SH, ON_JETSON_SH])
|
|
def test_script_exists_and_executable(script: Path) -> None:
|
|
# Assert
|
|
assert script.exists(), f"missing script: {script}"
|
|
assert os.access(script, os.X_OK), f"script not executable: {script}"
|
|
|
|
|
|
# ───────── CLI parsing — happy paths ─────────
|
|
|
|
|
|
def test_tier1_dry_run_emits_compose_command() -> None:
|
|
"""Tier-1 --dry-run prints the docker-compose invocation."""
|
|
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER1_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"--dry-run",
|
|
]
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 0, proc.stderr
|
|
assert "docker compose" in proc.stdout
|
|
assert "docker-compose.test.yml" in proc.stdout
|
|
assert "TIER=tier1-workstation" in proc.stdout
|
|
assert "e2e-runner" in proc.stdout
|
|
|
|
|
|
def test_tier2_dry_run_local_mode() -> None:
|
|
"""Tier-2 --dry-run on local mode shows the delegate command."""
|
|
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": "localhost"},
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 0, proc.stderr
|
|
assert "tier2-on-jetson.sh" in proc.stdout
|
|
assert "(local)" in proc.stdout, "local mode marker missing"
|
|
|
|
|
|
def test_tier2_dry_run_remote_mode() -> None:
|
|
"""Tier-2 --dry-run with TIER2_HOST set ssh's via the delegate."""
|
|
|
|
# Arrange
|
|
fake_key = REPO_ROOT / "e2e" / "_unit_tests" / "jetson" / "_fake_key.tmp"
|
|
fake_key.write_text("fake")
|
|
try:
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"inav",
|
|
"--vio-strategy",
|
|
"klt_ransac",
|
|
"--dry-run",
|
|
],
|
|
env={
|
|
"TIER2_HOST": "jetson-test-01.internal",
|
|
"TIER2_USER": "azaion",
|
|
"TIER2_KEY_PATH": str(fake_key),
|
|
},
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 0, proc.stderr
|
|
assert "ssh -o StrictHostKeyChecking=accept-new" in proc.stdout
|
|
assert "azaion@jetson-test-01.internal" in proc.stdout
|
|
assert "rsync" in proc.stdout
|
|
assert "tier2-on-jetson.sh" in proc.stdout
|
|
finally:
|
|
fake_key.unlink(missing_ok=True)
|
|
|
|
|
|
# ───────── CLI parsing — rejection paths ─────────
|
|
|
|
|
|
def test_tier2_rejects_unknown_fc_adapter() -> None:
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"px4",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": "localhost"},
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 2
|
|
assert "--fc-adapter must be ardupilot or inav" in proc.stderr
|
|
|
|
|
|
def test_tier2_rejects_unknown_vio_strategy() -> None:
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"msckf",
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": "localhost"},
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 2
|
|
assert "--vio-strategy must be" in proc.stderr
|
|
|
|
|
|
def test_tier2_rejects_unknown_build_kind() -> None:
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"--build-kind",
|
|
"debug",
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": "localhost"},
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 2
|
|
assert "--build-kind must be production or asan" in proc.stderr
|
|
|
|
|
|
def test_tier2_requires_tier2_host_on_non_arm() -> None:
|
|
"""Without TIER2_HOST set on a non-aarch64 host, the script errors."""
|
|
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": ""},
|
|
)
|
|
|
|
# Assert — exit 5 unless we're actually on aarch64 (in which case
|
|
# localhost gets auto-selected and the script proceeds).
|
|
if os.uname().machine == "aarch64":
|
|
assert proc.returncode == 0
|
|
else:
|
|
assert proc.returncode == 5
|
|
assert "TIER2_HOST must be set" in proc.stderr
|
|
|
|
|
|
# ───────── AC-6: reflash gating ─────────
|
|
|
|
|
|
def test_reflash_refuses_without_ack() -> None:
|
|
"""--reflash without TIER2_REFLASH_ACK=1 must refuse to proceed."""
|
|
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"--reflash",
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": "localhost"},
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 4
|
|
assert "TIER2_REFLASH_ACK=1" in proc.stderr
|
|
|
|
|
|
def test_reflash_dry_run_with_ack_shows_flash_command() -> None:
|
|
"""--reflash with the ack present shows the sdkmanager command on --dry-run."""
|
|
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"--reflash",
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": "localhost", "TIER2_REFLASH_ACK": "1"},
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 0, proc.stderr
|
|
assert "nvidia-sdkmanager-cli flash" in proc.stdout
|
|
|
|
|
|
# ───────── AC-1: selector parity ─────────
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"selector,tier_args,expected_in_stdout",
|
|
[
|
|
("not_tier2_only", "tier1", "TIER=tier1-workstation"),
|
|
("FT_P", "tier2", "JETSON_HOST=localhost"),
|
|
],
|
|
)
|
|
def test_selector_appears_in_dry_run(
|
|
selector: str, tier_args: str, expected_in_stdout: str
|
|
) -> None:
|
|
"""The same -k selector arg surfaces in both tier dry-runs."""
|
|
|
|
# Arrange
|
|
script = TIER1_SH if tier_args == "tier1" else TIER2_SH
|
|
|
|
# Act
|
|
proc = _run(
|
|
[
|
|
str(script),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"-k",
|
|
selector,
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": "localhost"},
|
|
)
|
|
|
|
# Assert
|
|
assert proc.returncode == 0, proc.stderr
|
|
# The Tier-1 selector appears directly in the printed pytest arg
|
|
# list; the Tier-2 selector is forwarded via SELECTOR= env var into
|
|
# the delegate, which then puts it on the pytest cmdline. Both
|
|
# variations end up containing the selector string.
|
|
assert selector in proc.stdout, (
|
|
f"selector '{selector}' not present in {script.name} dry-run output"
|
|
)
|
|
assert expected_in_stdout in proc.stdout
|
|
|
|
|
|
def test_selector_parity_pytest_args_equivalent() -> None:
|
|
"""Tier-1 and Tier-2 dry-runs both compose `-k <selector>` into the
|
|
pytest argv. We extract the `-k` arg from each and assert they
|
|
match.
|
|
"""
|
|
|
|
# Arrange
|
|
selector = "FT_P_09_AP and not asan"
|
|
|
|
# Act
|
|
p1 = _run(
|
|
[
|
|
str(TIER1_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"-k",
|
|
selector,
|
|
"--dry-run",
|
|
]
|
|
)
|
|
p2 = _run(
|
|
[
|
|
str(TIER2_SH),
|
|
"--fc-adapter",
|
|
"ardupilot",
|
|
"--vio-strategy",
|
|
"okvis2",
|
|
"-k",
|
|
selector,
|
|
"--dry-run",
|
|
],
|
|
env={"TIER2_HOST": "localhost"},
|
|
)
|
|
|
|
# Assert
|
|
assert p1.returncode == 0 and p2.returncode == 0
|
|
# Tier-1 shows `-k <selector>` directly in the dry-run output.
|
|
assert f"-k {selector}" in p1.stdout
|
|
# Tier-2 forwards via SELECTOR=<selector> env var.
|
|
assert f"SELECTOR={selector}" in p2.stdout
|