[AZ-440] [AZ-441] [AZ-442] [AZ-443] NFT-LIM-01/02/03+05/04 blackbox scenarios

Batch 88 — adds four resource-limit blackbox scenarios + pure-logic
helpers + unit tests:

- NFT-LIM-01 Jetson memory (AC-NEW-13): tier2_only; Plan A/B budgets;
  AC-4 OOM-event scan; 30 s warm-up window; VmRSS + tegrastats streams.
- NFT-LIM-02 FDR size (AC-7.3): 30 min → 8 h linear extrapolation
  against 50 GiB; ±60 s replay-window slack for AC-1.
- NFT-LIM-03+05 storage (AC-7.4 + AC-NEW-12 + RESTRICT-STORAGE):
  aggregate ≤ 100 GiB across tile-cache + tile-cache-write +
  fdr-output; thumbnail-log < 1 GiB strict 8 h-extrapolated.
- NFT-LIM-04 thermal (AC-NEW-5 PARTIAL): tier2_only; CPU/SoC p99
  ≤ T_throttle − 5 °C; throttle-event scan; PARTIAL annotation written
  to traceability-status.json. Thresholds fixture lives at
  e2e/fixtures/jetson/thermal-thresholds.json (moved from the
  task spec's suggested tests/fixtures/ path so the file stays
  inside the blackbox_tests Owns: e2e/** envelope).

All four helpers are public-boundary-only (no src/gps_denied_onboard
imports). Scenarios skip cleanly in the Tier-1 docker harness pending
AZ-595 (SITL replay builder) for the four shared fixture inputs and
AZ-444 (Tier-2 Jetson runner) for the tier2_only scenarios.

Code review: PASS_WITH_WARNINGS (0/0/2/1). Both Mediums are
carried-over write_csv_evidence + _resolve_fixture_path duplication,
deferred to AZ-446 (batch 89). Low is the self-resolved AZ-443 fixture
ownership drift documented in the review.

Tests: 1223 e2e/_unit_tests passing (+1 vs. batch 87 from the new
directory-layout entry); 24 resource_limit scenarios collect and skip
cleanly under runner/pytest.ini.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 18:01:55 +03:00
parent d1e30f818f
commit 6e4a575221
22 changed files with 2785 additions and 4 deletions
@@ -0,0 +1,218 @@
"""NFT-LIM-04 — Jetson thermal envelope @ workstation ambient
(AZ-443 / AC-NEW-5 PARTIAL).
Tier-2 ONLY. 30 min Derkachi loop at workstation ambient; runner
samples ``tegrastats`` at 1 Hz (cpu_temp, soc_temp) and parses
``dmesg --since "<run_start>"`` for thermal-throttle entries. AC-2
asserts zero throttling events; AC-3 asserts both
``p99(cpu_temp) ≤ T_throttle_cpu 5 °C`` and the same for SoC.
Threshold values are read from
``e2e/fixtures/jetson/thermal-thresholds.json`` so future hardware
revisions only require a fixture bump.
AC-4 emits the PARTIAL annotation for AC-NEW-5 in the evidence
``traceability-status.json``; the +50 °C chamber portion is the
deferred release-gate scenario, not in this CI scope.
Production dependency surfaced to AZ-595 + AZ-444:
``E2E_NFT_LIM_04_FIXTURE`` names a JSON file (absolute path or
relative to ``E2E_SITL_REPLAY_DIR``) shaped:
{
"samples": [
{"monotonic_ms": <int>, "cpu_temp_c": <f>, "soc_temp_c": <f>},
...
],
"throttle_events": [
{"monotonic_ms": <int|null>, "snippet": "<dmesg line>"},
...
]
}
Pure-logic AC-2/AC-3 covered by
``e2e/_unit_tests/helpers/test_thermal_envelope_evaluator.py``.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from runner.helpers import thermal_envelope_evaluator as tee
NFT_LIM_04_FIXTURE_ENV_VAR = "E2E_NFT_LIM_04_FIXTURE"
NFT_LIM_04_DEFAULT_FIXTURE_NAME = "nft_lim_04_thermal.json"
# Owned by `blackbox_tests`; lives under `e2e/fixtures/jetson/`.
THRESHOLDS_FIXTURE_RELPATH = Path("fixtures/jetson/thermal-thresholds.json")
@pytest.mark.tier2_only
@pytest.mark.scenario_id("nft-lim-04")
@pytest.mark.traces_to("AC-NEW-5,AC-1,AC-2,AC-3,AC-4,AC-5")
def test_nft_lim_04_thermal(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
sitl_replay_ready: bool,
) -> None:
"""AC-2 (no throttle) + AC-3 (5 °C headroom) + AC-4 (PARTIAL annotation)."""
if not sitl_replay_ready:
pytest.skip(
"NFT-LIM-04 requires `E2E_SITL_REPLAY_DIR` to point at a prepared "
"SITL replay fixture (AZ-595) carrying per-second tegrastats "
"temperature samples + dmesg throttle records for a 30 min "
"Derkachi loop. Pure-logic AC-2/AC-3 covered by "
"e2e/_unit_tests/helpers/test_thermal_envelope_evaluator.py."
)
fixture_path = _resolve_fixture_path()
if not fixture_path.is_file():
pytest.fail(
f"NFT-LIM-04: fixture not found at {fixture_path}. "
f"`{NFT_LIM_04_FIXTURE_ENV_VAR}` env var must point at a JSON "
"file with the schema documented in the scenario docstring. "
"Production dependency: AZ-595 + AZ-444."
)
thresholds_path = _resolve_thresholds_path()
if not thresholds_path.is_file():
pytest.fail(
f"NFT-LIM-04: thermal thresholds fixture not found at "
f"{thresholds_path}; AC-3 cannot evaluate without "
f"hardware-documented T_throttle values."
)
thresholds = tee.ThermalThresholds.load_from_fixture(thresholds_path)
payload = json.loads(fixture_path.read_text())
samples, throttle_events = _parse_payload(payload, fixture_path)
report = tee.evaluate(samples, throttle_events, thresholds)
base = Path(evidence_dir) / "nft-lim-04" / f"{fc_adapter}-{vio_strategy}"
tee.write_csv_evidence(base.with_suffix(".csv"), report)
tee.write_throttle_events_csv(
base.with_name(base.name + "-throttle").with_suffix(".csv"),
report.throttle_events,
)
# AC-4 — PARTIAL annotation in the bundle-shared traceability-status.json.
tee.write_traceability_partial_annotation(
Path(evidence_dir) / "traceability-status.json"
)
if report.cpu.p99_c is not None:
nfr_recorder.record_metric(
"nft_lim_04.cpu_temp_c_p99", float(report.cpu.p99_c), ac_id="AC-3"
)
if report.soc.p99_c is not None:
nfr_recorder.record_metric(
"nft_lim_04.soc_temp_c_p99", float(report.soc.p99_c), ac_id="AC-3"
)
nfr_recorder.record_metric(
"nft_lim_04.throttle_event_count",
float(len(report.throttle_events)),
ac_id="AC-2",
)
breaches: list[str] = []
if not report.passes_no_throttle:
first = report.throttle_events[0]
breaches.append(
f"AC-2: {len(report.throttle_events)} thermal-throttle event(s) "
f"since run_start; first: {first.snippet[:120]}"
)
if not report.passes_headroom:
breaches.append(
f"AC-3: headroom violated — CPU p99={report.cpu.p99_c}, "
f"budget={thresholds.cpu_budget_c}; SoC p99={report.soc.p99_c}, "
f"budget={thresholds.soc_budget_c}"
)
assert not breaches, "\n".join(breaches)
def _resolve_fixture_path() -> Path:
raw = os.environ.get(NFT_LIM_04_FIXTURE_ENV_VAR, "").strip()
from runner.helpers import sitl_observer
root = sitl_observer.replay_dir()
if not raw:
if root is None:
return Path(f"<{NFT_LIM_04_FIXTURE_ENV_VAR}-unset>")
return root / NFT_LIM_04_DEFAULT_FIXTURE_NAME
path = Path(raw)
if not path.is_absolute() and root is not None:
path = root / path
return path
def _resolve_thresholds_path() -> Path:
"""e2e-root-relative resolution of the thresholds fixture."""
e2e_root = Path(__file__).resolve().parents[2]
return e2e_root / THRESHOLDS_FIXTURE_RELPATH
def _parse_payload(
payload: object, fixture_path: Path
) -> tuple[list[tee.ThermalSample], list[tee.ThrottleEvent]]:
if not isinstance(payload, dict):
pytest.fail(
f"NFT-LIM-04: fixture {fixture_path} must be a JSON object; "
f"got top-level type={type(payload).__name__}"
)
samples_raw = payload.get("samples")
if not isinstance(samples_raw, list) or not samples_raw:
pytest.fail(
f"NFT-LIM-04: fixture {fixture_path} 'samples' must be a "
f"non-empty list"
)
samples: list[tee.ThermalSample] = []
for i, entry in enumerate(samples_raw):
if not isinstance(entry, dict):
pytest.fail(
f"NFT-LIM-04: samples[{i}] in {fixture_path} must be an object"
)
try:
samples.append(
tee.ThermalSample(
monotonic_ms=int(entry["monotonic_ms"]),
cpu_temp_c=float(entry["cpu_temp_c"]),
soc_temp_c=float(entry["soc_temp_c"]),
)
)
except (KeyError, TypeError, ValueError) as exc:
pytest.fail(
f"NFT-LIM-04: samples[{i}] in {fixture_path} shape invalid: {exc}"
)
throttle_raw = payload.get("throttle_events", [])
if not isinstance(throttle_raw, list):
pytest.fail(
f"NFT-LIM-04: fixture {fixture_path} 'throttle_events' must be a "
f"list (may be empty); got {type(throttle_raw).__name__}"
)
throttle_events: list[tee.ThrottleEvent] = []
for i, entry in enumerate(throttle_raw):
if not isinstance(entry, dict):
pytest.fail(
f"NFT-LIM-04: throttle_events[{i}] in {fixture_path} must be "
f"an object"
)
try:
mono_raw = entry.get("monotonic_ms")
mono = int(mono_raw) if mono_raw is not None else None
throttle_events.append(
tee.ThrottleEvent(
monotonic_ms=mono,
snippet=str(entry.get("snippet", "")),
)
)
except (TypeError, ValueError) as exc:
pytest.fail(
f"NFT-LIM-04: throttle_events[{i}] in {fixture_path} shape "
f"invalid: {exc}"
)
return samples, throttle_events