mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 23:11:13 +00:00
6e4a575221
Batch 88 — adds four resource-limit blackbox scenarios + pure-logic helpers + unit tests: - NFT-LIM-01 Jetson memory (AC-NEW-13): tier2_only; Plan A/B budgets; AC-4 OOM-event scan; 30 s warm-up window; VmRSS + tegrastats streams. - NFT-LIM-02 FDR size (AC-7.3): 30 min → 8 h linear extrapolation against 50 GiB; ±60 s replay-window slack for AC-1. - NFT-LIM-03+05 storage (AC-7.4 + AC-NEW-12 + RESTRICT-STORAGE): aggregate ≤ 100 GiB across tile-cache + tile-cache-write + fdr-output; thumbnail-log < 1 GiB strict 8 h-extrapolated. - NFT-LIM-04 thermal (AC-NEW-5 PARTIAL): tier2_only; CPU/SoC p99 ≤ T_throttle − 5 °C; throttle-event scan; PARTIAL annotation written to traceability-status.json. Thresholds fixture lives at e2e/fixtures/jetson/thermal-thresholds.json (moved from the task spec's suggested tests/fixtures/ path so the file stays inside the blackbox_tests Owns: e2e/** envelope). All four helpers are public-boundary-only (no src/gps_denied_onboard imports). Scenarios skip cleanly in the Tier-1 docker harness pending AZ-595 (SITL replay builder) for the four shared fixture inputs and AZ-444 (Tier-2 Jetson runner) for the tier2_only scenarios. Code review: PASS_WITH_WARNINGS (0/0/2/1). Both Mediums are carried-over write_csv_evidence + _resolve_fixture_path duplication, deferred to AZ-446 (batch 89). Low is the self-resolved AZ-443 fixture ownership drift documented in the review. Tests: 1223 e2e/_unit_tests passing (+1 vs. batch 87 from the new directory-layout entry); 24 resource_limit scenarios collect and skip cleanly under runner/pytest.ini. Co-authored-by: Cursor <cursoragent@cursor.com>
219 lines
7.9 KiB
Python
219 lines
7.9 KiB
Python
"""NFT-LIM-04 — Jetson thermal envelope @ workstation ambient
|
||
(AZ-443 / AC-NEW-5 PARTIAL).
|
||
|
||
Tier-2 ONLY. 30 min Derkachi loop at workstation ambient; runner
|
||
samples ``tegrastats`` at 1 Hz (cpu_temp, soc_temp) and parses
|
||
``dmesg --since "<run_start>"`` for thermal-throttle entries. AC-2
|
||
asserts zero throttling events; AC-3 asserts both
|
||
``p99(cpu_temp) ≤ T_throttle_cpu − 5 °C`` and the same for SoC.
|
||
Threshold values are read from
|
||
``e2e/fixtures/jetson/thermal-thresholds.json`` so future hardware
|
||
revisions only require a fixture bump.
|
||
|
||
AC-4 emits the PARTIAL annotation for AC-NEW-5 in the evidence
|
||
``traceability-status.json``; the +50 °C chamber portion is the
|
||
deferred release-gate scenario, not in this CI scope.
|
||
|
||
Production dependency surfaced to AZ-595 + AZ-444:
|
||
``E2E_NFT_LIM_04_FIXTURE`` names a JSON file (absolute path or
|
||
relative to ``E2E_SITL_REPLAY_DIR``) shaped:
|
||
|
||
{
|
||
"samples": [
|
||
{"monotonic_ms": <int>, "cpu_temp_c": <f>, "soc_temp_c": <f>},
|
||
...
|
||
],
|
||
"throttle_events": [
|
||
{"monotonic_ms": <int|null>, "snippet": "<dmesg line>"},
|
||
...
|
||
]
|
||
}
|
||
|
||
Pure-logic AC-2/AC-3 covered by
|
||
``e2e/_unit_tests/helpers/test_thermal_envelope_evaluator.py``.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import os
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
from runner.helpers import thermal_envelope_evaluator as tee
|
||
|
||
NFT_LIM_04_FIXTURE_ENV_VAR = "E2E_NFT_LIM_04_FIXTURE"
|
||
NFT_LIM_04_DEFAULT_FIXTURE_NAME = "nft_lim_04_thermal.json"
|
||
# Owned by `blackbox_tests`; lives under `e2e/fixtures/jetson/`.
|
||
THRESHOLDS_FIXTURE_RELPATH = Path("fixtures/jetson/thermal-thresholds.json")
|
||
|
||
|
||
@pytest.mark.tier2_only
|
||
@pytest.mark.scenario_id("nft-lim-04")
|
||
@pytest.mark.traces_to("AC-NEW-5,AC-1,AC-2,AC-3,AC-4,AC-5")
|
||
def test_nft_lim_04_thermal(
|
||
fc_adapter: str,
|
||
vio_strategy: str,
|
||
evidence_dir, # type: ignore[no-untyped-def]
|
||
run_id: str,
|
||
nfr_recorder, # type: ignore[no-untyped-def]
|
||
sitl_replay_ready: bool,
|
||
) -> None:
|
||
"""AC-2 (no throttle) + AC-3 (5 °C headroom) + AC-4 (PARTIAL annotation)."""
|
||
if not sitl_replay_ready:
|
||
pytest.skip(
|
||
"NFT-LIM-04 requires `E2E_SITL_REPLAY_DIR` to point at a prepared "
|
||
"SITL replay fixture (AZ-595) carrying per-second tegrastats "
|
||
"temperature samples + dmesg throttle records for a 30 min "
|
||
"Derkachi loop. Pure-logic AC-2/AC-3 covered by "
|
||
"e2e/_unit_tests/helpers/test_thermal_envelope_evaluator.py."
|
||
)
|
||
|
||
fixture_path = _resolve_fixture_path()
|
||
if not fixture_path.is_file():
|
||
pytest.fail(
|
||
f"NFT-LIM-04: fixture not found at {fixture_path}. "
|
||
f"`{NFT_LIM_04_FIXTURE_ENV_VAR}` env var must point at a JSON "
|
||
"file with the schema documented in the scenario docstring. "
|
||
"Production dependency: AZ-595 + AZ-444."
|
||
)
|
||
|
||
thresholds_path = _resolve_thresholds_path()
|
||
if not thresholds_path.is_file():
|
||
pytest.fail(
|
||
f"NFT-LIM-04: thermal thresholds fixture not found at "
|
||
f"{thresholds_path}; AC-3 cannot evaluate without "
|
||
f"hardware-documented T_throttle values."
|
||
)
|
||
thresholds = tee.ThermalThresholds.load_from_fixture(thresholds_path)
|
||
|
||
payload = json.loads(fixture_path.read_text())
|
||
samples, throttle_events = _parse_payload(payload, fixture_path)
|
||
report = tee.evaluate(samples, throttle_events, thresholds)
|
||
|
||
base = Path(evidence_dir) / "nft-lim-04" / f"{fc_adapter}-{vio_strategy}"
|
||
tee.write_csv_evidence(base.with_suffix(".csv"), report)
|
||
tee.write_throttle_events_csv(
|
||
base.with_name(base.name + "-throttle").with_suffix(".csv"),
|
||
report.throttle_events,
|
||
)
|
||
# AC-4 — PARTIAL annotation in the bundle-shared traceability-status.json.
|
||
tee.write_traceability_partial_annotation(
|
||
Path(evidence_dir) / "traceability-status.json"
|
||
)
|
||
|
||
if report.cpu.p99_c is not None:
|
||
nfr_recorder.record_metric(
|
||
"nft_lim_04.cpu_temp_c_p99", float(report.cpu.p99_c), ac_id="AC-3"
|
||
)
|
||
if report.soc.p99_c is not None:
|
||
nfr_recorder.record_metric(
|
||
"nft_lim_04.soc_temp_c_p99", float(report.soc.p99_c), ac_id="AC-3"
|
||
)
|
||
nfr_recorder.record_metric(
|
||
"nft_lim_04.throttle_event_count",
|
||
float(len(report.throttle_events)),
|
||
ac_id="AC-2",
|
||
)
|
||
|
||
breaches: list[str] = []
|
||
if not report.passes_no_throttle:
|
||
first = report.throttle_events[0]
|
||
breaches.append(
|
||
f"AC-2: {len(report.throttle_events)} thermal-throttle event(s) "
|
||
f"since run_start; first: {first.snippet[:120]}"
|
||
)
|
||
if not report.passes_headroom:
|
||
breaches.append(
|
||
f"AC-3: headroom violated — CPU p99={report.cpu.p99_c}, "
|
||
f"budget={thresholds.cpu_budget_c}; SoC p99={report.soc.p99_c}, "
|
||
f"budget={thresholds.soc_budget_c}"
|
||
)
|
||
assert not breaches, "\n".join(breaches)
|
||
|
||
|
||
def _resolve_fixture_path() -> Path:
|
||
raw = os.environ.get(NFT_LIM_04_FIXTURE_ENV_VAR, "").strip()
|
||
from runner.helpers import sitl_observer
|
||
|
||
root = sitl_observer.replay_dir()
|
||
if not raw:
|
||
if root is None:
|
||
return Path(f"<{NFT_LIM_04_FIXTURE_ENV_VAR}-unset>")
|
||
return root / NFT_LIM_04_DEFAULT_FIXTURE_NAME
|
||
path = Path(raw)
|
||
if not path.is_absolute() and root is not None:
|
||
path = root / path
|
||
return path
|
||
|
||
|
||
def _resolve_thresholds_path() -> Path:
|
||
"""e2e-root-relative resolution of the thresholds fixture."""
|
||
e2e_root = Path(__file__).resolve().parents[2]
|
||
return e2e_root / THRESHOLDS_FIXTURE_RELPATH
|
||
|
||
|
||
def _parse_payload(
|
||
payload: object, fixture_path: Path
|
||
) -> tuple[list[tee.ThermalSample], list[tee.ThrottleEvent]]:
|
||
if not isinstance(payload, dict):
|
||
pytest.fail(
|
||
f"NFT-LIM-04: fixture {fixture_path} must be a JSON object; "
|
||
f"got top-level type={type(payload).__name__}"
|
||
)
|
||
samples_raw = payload.get("samples")
|
||
if not isinstance(samples_raw, list) or not samples_raw:
|
||
pytest.fail(
|
||
f"NFT-LIM-04: fixture {fixture_path} 'samples' must be a "
|
||
f"non-empty list"
|
||
)
|
||
samples: list[tee.ThermalSample] = []
|
||
for i, entry in enumerate(samples_raw):
|
||
if not isinstance(entry, dict):
|
||
pytest.fail(
|
||
f"NFT-LIM-04: samples[{i}] in {fixture_path} must be an object"
|
||
)
|
||
try:
|
||
samples.append(
|
||
tee.ThermalSample(
|
||
monotonic_ms=int(entry["monotonic_ms"]),
|
||
cpu_temp_c=float(entry["cpu_temp_c"]),
|
||
soc_temp_c=float(entry["soc_temp_c"]),
|
||
)
|
||
)
|
||
except (KeyError, TypeError, ValueError) as exc:
|
||
pytest.fail(
|
||
f"NFT-LIM-04: samples[{i}] in {fixture_path} shape invalid: {exc}"
|
||
)
|
||
|
||
throttle_raw = payload.get("throttle_events", [])
|
||
if not isinstance(throttle_raw, list):
|
||
pytest.fail(
|
||
f"NFT-LIM-04: fixture {fixture_path} 'throttle_events' must be a "
|
||
f"list (may be empty); got {type(throttle_raw).__name__}"
|
||
)
|
||
throttle_events: list[tee.ThrottleEvent] = []
|
||
for i, entry in enumerate(throttle_raw):
|
||
if not isinstance(entry, dict):
|
||
pytest.fail(
|
||
f"NFT-LIM-04: throttle_events[{i}] in {fixture_path} must be "
|
||
f"an object"
|
||
)
|
||
try:
|
||
mono_raw = entry.get("monotonic_ms")
|
||
mono = int(mono_raw) if mono_raw is not None else None
|
||
throttle_events.append(
|
||
tee.ThrottleEvent(
|
||
monotonic_ms=mono,
|
||
snippet=str(entry.get("snippet", "")),
|
||
)
|
||
)
|
||
except (TypeError, ValueError) as exc:
|
||
pytest.fail(
|
||
f"NFT-LIM-04: throttle_events[{i}] in {fixture_path} shape "
|
||
f"invalid: {exc}"
|
||
)
|
||
|
||
return samples, throttle_events
|