Files
Oleksandr Bezdieniezhnykh 6e4a575221 [AZ-440] [AZ-441] [AZ-442] [AZ-443] NFT-LIM-01/02/03+05/04 blackbox scenarios
Batch 88 — adds four resource-limit blackbox scenarios + pure-logic
helpers + unit tests:

- NFT-LIM-01 Jetson memory (AC-NEW-13): tier2_only; Plan A/B budgets;
  AC-4 OOM-event scan; 30 s warm-up window; VmRSS + tegrastats streams.
- NFT-LIM-02 FDR size (AC-7.3): 30 min → 8 h linear extrapolation
  against 50 GiB; ±60 s replay-window slack for AC-1.
- NFT-LIM-03+05 storage (AC-7.4 + AC-NEW-12 + RESTRICT-STORAGE):
  aggregate ≤ 100 GiB across tile-cache + tile-cache-write +
  fdr-output; thumbnail-log < 1 GiB strict 8 h-extrapolated.
- NFT-LIM-04 thermal (AC-NEW-5 PARTIAL): tier2_only; CPU/SoC p99
  ≤ T_throttle − 5 °C; throttle-event scan; PARTIAL annotation written
  to traceability-status.json. Thresholds fixture lives at
  e2e/fixtures/jetson/thermal-thresholds.json (moved from the
  task spec's suggested tests/fixtures/ path so the file stays
  inside the blackbox_tests Owns: e2e/** envelope).

All four helpers are public-boundary-only (no src/gps_denied_onboard
imports). Scenarios skip cleanly in the Tier-1 docker harness pending
AZ-595 (SITL replay builder) for the four shared fixture inputs and
AZ-444 (Tier-2 Jetson runner) for the tier2_only scenarios.

Code review: PASS_WITH_WARNINGS (0/0/2/1). Both Mediums are
carried-over write_csv_evidence + _resolve_fixture_path duplication,
deferred to AZ-446 (batch 89). Low is the self-resolved AZ-443 fixture
ownership drift documented in the review.

Tests: 1223 e2e/_unit_tests passing (+1 vs. batch 87 from the new
directory-layout entry); 24 resource_limit scenarios collect and skip
cleanly under runner/pytest.ini.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-17 18:01:55 +03:00

219 lines
7.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""NFT-LIM-04 — Jetson thermal envelope @ workstation ambient
(AZ-443 / AC-NEW-5 PARTIAL).
Tier-2 ONLY. 30 min Derkachi loop at workstation ambient; runner
samples ``tegrastats`` at 1 Hz (cpu_temp, soc_temp) and parses
``dmesg --since "<run_start>"`` for thermal-throttle entries. AC-2
asserts zero throttling events; AC-3 asserts both
``p99(cpu_temp) ≤ T_throttle_cpu 5 °C`` and the same for SoC.
Threshold values are read from
``e2e/fixtures/jetson/thermal-thresholds.json`` so future hardware
revisions only require a fixture bump.
AC-4 emits the PARTIAL annotation for AC-NEW-5 in the evidence
``traceability-status.json``; the +50 °C chamber portion is the
deferred release-gate scenario, not in this CI scope.
Production dependency surfaced to AZ-595 + AZ-444:
``E2E_NFT_LIM_04_FIXTURE`` names a JSON file (absolute path or
relative to ``E2E_SITL_REPLAY_DIR``) shaped:
{
"samples": [
{"monotonic_ms": <int>, "cpu_temp_c": <f>, "soc_temp_c": <f>},
...
],
"throttle_events": [
{"monotonic_ms": <int|null>, "snippet": "<dmesg line>"},
...
]
}
Pure-logic AC-2/AC-3 covered by
``e2e/_unit_tests/helpers/test_thermal_envelope_evaluator.py``.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from runner.helpers import thermal_envelope_evaluator as tee
NFT_LIM_04_FIXTURE_ENV_VAR = "E2E_NFT_LIM_04_FIXTURE"
NFT_LIM_04_DEFAULT_FIXTURE_NAME = "nft_lim_04_thermal.json"
# Owned by `blackbox_tests`; lives under `e2e/fixtures/jetson/`.
THRESHOLDS_FIXTURE_RELPATH = Path("fixtures/jetson/thermal-thresholds.json")
@pytest.mark.tier2_only
@pytest.mark.scenario_id("nft-lim-04")
@pytest.mark.traces_to("AC-NEW-5,AC-1,AC-2,AC-3,AC-4,AC-5")
def test_nft_lim_04_thermal(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
sitl_replay_ready: bool,
) -> None:
"""AC-2 (no throttle) + AC-3 (5 °C headroom) + AC-4 (PARTIAL annotation)."""
if not sitl_replay_ready:
pytest.skip(
"NFT-LIM-04 requires `E2E_SITL_REPLAY_DIR` to point at a prepared "
"SITL replay fixture (AZ-595) carrying per-second tegrastats "
"temperature samples + dmesg throttle records for a 30 min "
"Derkachi loop. Pure-logic AC-2/AC-3 covered by "
"e2e/_unit_tests/helpers/test_thermal_envelope_evaluator.py."
)
fixture_path = _resolve_fixture_path()
if not fixture_path.is_file():
pytest.fail(
f"NFT-LIM-04: fixture not found at {fixture_path}. "
f"`{NFT_LIM_04_FIXTURE_ENV_VAR}` env var must point at a JSON "
"file with the schema documented in the scenario docstring. "
"Production dependency: AZ-595 + AZ-444."
)
thresholds_path = _resolve_thresholds_path()
if not thresholds_path.is_file():
pytest.fail(
f"NFT-LIM-04: thermal thresholds fixture not found at "
f"{thresholds_path}; AC-3 cannot evaluate without "
f"hardware-documented T_throttle values."
)
thresholds = tee.ThermalThresholds.load_from_fixture(thresholds_path)
payload = json.loads(fixture_path.read_text())
samples, throttle_events = _parse_payload(payload, fixture_path)
report = tee.evaluate(samples, throttle_events, thresholds)
base = Path(evidence_dir) / "nft-lim-04" / f"{fc_adapter}-{vio_strategy}"
tee.write_csv_evidence(base.with_suffix(".csv"), report)
tee.write_throttle_events_csv(
base.with_name(base.name + "-throttle").with_suffix(".csv"),
report.throttle_events,
)
# AC-4 — PARTIAL annotation in the bundle-shared traceability-status.json.
tee.write_traceability_partial_annotation(
Path(evidence_dir) / "traceability-status.json"
)
if report.cpu.p99_c is not None:
nfr_recorder.record_metric(
"nft_lim_04.cpu_temp_c_p99", float(report.cpu.p99_c), ac_id="AC-3"
)
if report.soc.p99_c is not None:
nfr_recorder.record_metric(
"nft_lim_04.soc_temp_c_p99", float(report.soc.p99_c), ac_id="AC-3"
)
nfr_recorder.record_metric(
"nft_lim_04.throttle_event_count",
float(len(report.throttle_events)),
ac_id="AC-2",
)
breaches: list[str] = []
if not report.passes_no_throttle:
first = report.throttle_events[0]
breaches.append(
f"AC-2: {len(report.throttle_events)} thermal-throttle event(s) "
f"since run_start; first: {first.snippet[:120]}"
)
if not report.passes_headroom:
breaches.append(
f"AC-3: headroom violated — CPU p99={report.cpu.p99_c}, "
f"budget={thresholds.cpu_budget_c}; SoC p99={report.soc.p99_c}, "
f"budget={thresholds.soc_budget_c}"
)
assert not breaches, "\n".join(breaches)
def _resolve_fixture_path() -> Path:
raw = os.environ.get(NFT_LIM_04_FIXTURE_ENV_VAR, "").strip()
from runner.helpers import sitl_observer
root = sitl_observer.replay_dir()
if not raw:
if root is None:
return Path(f"<{NFT_LIM_04_FIXTURE_ENV_VAR}-unset>")
return root / NFT_LIM_04_DEFAULT_FIXTURE_NAME
path = Path(raw)
if not path.is_absolute() and root is not None:
path = root / path
return path
def _resolve_thresholds_path() -> Path:
"""e2e-root-relative resolution of the thresholds fixture."""
e2e_root = Path(__file__).resolve().parents[2]
return e2e_root / THRESHOLDS_FIXTURE_RELPATH
def _parse_payload(
payload: object, fixture_path: Path
) -> tuple[list[tee.ThermalSample], list[tee.ThrottleEvent]]:
if not isinstance(payload, dict):
pytest.fail(
f"NFT-LIM-04: fixture {fixture_path} must be a JSON object; "
f"got top-level type={type(payload).__name__}"
)
samples_raw = payload.get("samples")
if not isinstance(samples_raw, list) or not samples_raw:
pytest.fail(
f"NFT-LIM-04: fixture {fixture_path} 'samples' must be a "
f"non-empty list"
)
samples: list[tee.ThermalSample] = []
for i, entry in enumerate(samples_raw):
if not isinstance(entry, dict):
pytest.fail(
f"NFT-LIM-04: samples[{i}] in {fixture_path} must be an object"
)
try:
samples.append(
tee.ThermalSample(
monotonic_ms=int(entry["monotonic_ms"]),
cpu_temp_c=float(entry["cpu_temp_c"]),
soc_temp_c=float(entry["soc_temp_c"]),
)
)
except (KeyError, TypeError, ValueError) as exc:
pytest.fail(
f"NFT-LIM-04: samples[{i}] in {fixture_path} shape invalid: {exc}"
)
throttle_raw = payload.get("throttle_events", [])
if not isinstance(throttle_raw, list):
pytest.fail(
f"NFT-LIM-04: fixture {fixture_path} 'throttle_events' must be a "
f"list (may be empty); got {type(throttle_raw).__name__}"
)
throttle_events: list[tee.ThrottleEvent] = []
for i, entry in enumerate(throttle_raw):
if not isinstance(entry, dict):
pytest.fail(
f"NFT-LIM-04: throttle_events[{i}] in {fixture_path} must be "
f"an object"
)
try:
mono_raw = entry.get("monotonic_ms")
mono = int(mono_raw) if mono_raw is not None else None
throttle_events.append(
tee.ThrottleEvent(
monotonic_ms=mono,
snippet=str(entry.get("snippet", "")),
)
)
except (TypeError, ValueError) as exc:
pytest.fail(
f"NFT-LIM-04: throttle_events[{i}] in {fixture_path} shape "
f"invalid: {exc}"
)
return samples, throttle_events