[AZ-440] [AZ-441] [AZ-442] [AZ-443] NFT-LIM-01/02/03+05/04 blackbox scenarios

Batch 88 — adds four resource-limit blackbox scenarios + pure-logic
helpers + unit tests:

- NFT-LIM-01 Jetson memory (AC-NEW-13): tier2_only; Plan A/B budgets;
  AC-4 OOM-event scan; 30 s warm-up window; VmRSS + tegrastats streams.
- NFT-LIM-02 FDR size (AC-7.3): 30 min → 8 h linear extrapolation
  against 50 GiB; ±60 s replay-window slack for AC-1.
- NFT-LIM-03+05 storage (AC-7.4 + AC-NEW-12 + RESTRICT-STORAGE):
  aggregate ≤ 100 GiB across tile-cache + tile-cache-write +
  fdr-output; thumbnail-log < 1 GiB strict 8 h-extrapolated.
- NFT-LIM-04 thermal (AC-NEW-5 PARTIAL): tier2_only; CPU/SoC p99
  ≤ T_throttle − 5 °C; throttle-event scan; PARTIAL annotation written
  to traceability-status.json. Thresholds fixture lives at
  e2e/fixtures/jetson/thermal-thresholds.json (moved from the
  task spec's suggested tests/fixtures/ path so the file stays
  inside the blackbox_tests Owns: e2e/** envelope).

All four helpers are public-boundary-only (no src/gps_denied_onboard
imports). Scenarios skip cleanly in the Tier-1 docker harness pending
AZ-595 (SITL replay builder) for the four shared fixture inputs and
AZ-444 (Tier-2 Jetson runner) for the tier2_only scenarios.

Code review: PASS_WITH_WARNINGS (0/0/2/1). Both Mediums are
carried-over write_csv_evidence + _resolve_fixture_path duplication,
deferred to AZ-446 (batch 89). Low is the self-resolved AZ-443 fixture
ownership drift documented in the review.

Tests: 1223 e2e/_unit_tests passing (+1 vs. batch 87 from the new
directory-layout entry); 24 resource_limit scenarios collect and skip
cleanly under runner/pytest.ini.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 18:01:55 +03:00
parent d1e30f818f
commit 6e4a575221
22 changed files with 2785 additions and 4 deletions
@@ -0,0 +1,183 @@
"""Unit tests for ``runner.helpers.fdr_size_evaluator`` (AZ-441 / NFT-LIM-02)."""
from __future__ import annotations
import csv
from pathlib import Path
import pytest
from runner.helpers import fdr_size_evaluator as fse
THIRTY_MIN_MS = 30 * 60 * 1000
def _linear_samples(
*, size_at_30min_bytes: int, sample_count: int = 31
) -> list[fse.FdrSizeSample]:
"""Per-minute sweep from 0 → size_at_30min_bytes."""
return [
fse.FdrSizeSample(
monotonic_ms=i * 60_000,
size_bytes=int(size_at_30min_bytes * i / (sample_count - 1)),
)
for i in range(sample_count)
]
# ───────────────────────── evaluate ─────────────────────────
def test_evaluate_under_budget_passes() -> None:
# Arrange — 1 GiB at 30 min → extrapolated 16 GiB at 8 h (well under 50 GiB)
samples = _linear_samples(size_at_30min_bytes=1 * fse.GIB_BYTES)
# Act
report = fse.evaluate(samples)
# Assert
assert report.passes_replay_window
assert report.passes_extrapolation
assert report.passes
assert report.extrapolated_8h_bytes == 16 * fse.GIB_BYTES
def test_evaluate_at_budget_passes_ac_2() -> None:
# Arrange — 50 GiB / 16 = 3.125 GiB at 30 min → 50 GiB extrapolated (equals budget)
size_at_30 = int((50 * fse.GIB_BYTES) / 16)
samples = _linear_samples(size_at_30min_bytes=size_at_30)
# Act
report = fse.evaluate(samples)
# Assert — AC-2 is "≤" so exactly-at-budget passes
assert report.passes_extrapolation
def test_evaluate_over_budget_fails_ac_2() -> None:
# Arrange — 4 GiB at 30 min → 64 GiB extrapolated (> 50 GiB budget)
samples = _linear_samples(size_at_30min_bytes=4 * fse.GIB_BYTES)
# Act
report = fse.evaluate(samples)
# Assert
assert not report.passes_extrapolation
assert not report.passes
assert report.extrapolated_8h_bytes == 64 * fse.GIB_BYTES
def test_evaluate_short_window_fails_ac_1() -> None:
# Arrange — only 5 min of samples; AC-1 demands the runner replay 30 min
samples = [
fse.FdrSizeSample(monotonic_ms=i * 60_000, size_bytes=i * (10 * 1024**2))
for i in range(6)
]
# Act
report = fse.evaluate(samples)
# Assert
assert not report.passes_replay_window
assert not report.passes
def test_evaluate_window_within_slack_passes_ac_1() -> None:
# Arrange — 30 min ± 30 s; default slack is 60 s
samples = _linear_samples(size_at_30min_bytes=1 * fse.GIB_BYTES, sample_count=31)
# Move the final sample to 30 min + 30 s
samples[-1] = fse.FdrSizeSample(
monotonic_ms=THIRTY_MIN_MS + 30_000,
size_bytes=samples[-1].size_bytes,
)
# Act
report = fse.evaluate(samples)
# Assert
assert report.passes_replay_window
def test_evaluate_unsorted_samples_still_correct() -> None:
# Arrange — same data, shuffled
samples = _linear_samples(size_at_30min_bytes=1 * fse.GIB_BYTES)
shuffled = list(reversed(samples))
# Act
report_a = fse.evaluate(samples)
report_b = fse.evaluate(shuffled)
# Assert
assert report_a.size_at_30min_bytes == report_b.size_at_30min_bytes
assert report_a.extrapolated_8h_bytes == report_b.extrapolated_8h_bytes
assert report_a.replay_window_ms == report_b.replay_window_ms
def test_evaluate_empty_samples_returns_zero_report() -> None:
# Act
report = fse.evaluate([])
# Assert
assert report.sample_count == 0
assert report.size_at_30min_bytes is None
assert report.extrapolated_8h_bytes is None
assert not report.passes_replay_window
assert not report.passes_extrapolation
def test_evaluate_rejects_non_positive_budget() -> None:
# Assert
with pytest.raises(ValueError):
fse.evaluate([], budget_bytes=0)
def test_evaluate_rejects_negative_slack() -> None:
# Assert
with pytest.raises(ValueError):
fse.evaluate([], replay_window_slack_ms=-1)
def test_evaluate_custom_budget_overrides_default() -> None:
# Arrange — 1 GiB at 30 min → 16 GiB at 8 h; custom budget = 8 GiB → fail
samples = _linear_samples(size_at_30min_bytes=1 * fse.GIB_BYTES)
# Act
report = fse.evaluate(samples, budget_bytes=8 * fse.GIB_BYTES)
# Assert
assert not report.passes_extrapolation
assert report.budget_bytes == 8 * fse.GIB_BYTES
# ───────────────────────── CSV evidence ─────────────────────────
def test_write_csv_evidence_one_row(tmp_path: Path) -> None:
# Arrange
samples = _linear_samples(size_at_30min_bytes=1 * fse.GIB_BYTES)
report = fse.evaluate(samples)
out = tmp_path / "report.csv"
# Act
fse.write_csv_evidence(out, report)
# Assert
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "sample_count"
assert rows[1][-1] == "true"
def test_write_per_minute_csv_orders_by_timestamp(tmp_path: Path) -> None:
# Arrange
samples = list(reversed(_linear_samples(size_at_30min_bytes=1 * fse.GIB_BYTES)))
out = tmp_path / "per-min.csv"
# Act
fse.write_per_minute_csv(out, samples)
# Assert
with out.open() as fh:
rows = list(csv.reader(fh))
timestamps = [int(r[1]) for r in rows[1:]]
assert timestamps == sorted(timestamps)
@@ -0,0 +1,255 @@
"""Unit tests for ``runner.helpers.memory_budget_evaluator`` (AZ-440 / NFT-LIM-01)."""
from __future__ import annotations
import csv
from pathlib import Path
import pytest
from runner.helpers import memory_budget_evaluator as mbe
# ───────────────────────── PlanBudgets ─────────────────────────
def test_plan_a_budgets_match_ac_2_and_ac_3() -> None:
# Assert
budgets = mbe.PlanBudgets.for_plan(mbe.Plan.PLAN_A)
assert budgets.steady_bytes == int(4.5 * mbe.GIB_BYTES)
assert budgets.peak_bytes == int(5.0 * mbe.GIB_BYTES)
def test_plan_b_budgets_match_ac_5() -> None:
# Assert
budgets = mbe.PlanBudgets.for_plan(mbe.Plan.PLAN_B)
assert budgets.steady_bytes == int(6.0 * mbe.GIB_BYTES)
assert budgets.peak_bytes == int(6.5 * mbe.GIB_BYTES)
def test_plan_budgets_rejects_unknown_plan() -> None:
# Assert
class _FakePlan:
pass
with pytest.raises(ValueError):
mbe.PlanBudgets.for_plan(_FakePlan()) # type: ignore[arg-type]
# ───────────────────────── _percentile_int ─────────────────────
def test_percentile_int_q_must_be_in_range() -> None:
# Assert
with pytest.raises(ValueError):
mbe._percentile_int([1, 2, 3], -1.0)
with pytest.raises(ValueError):
mbe._percentile_int([1, 2, 3], 101.0)
def test_percentile_int_empty_returns_none() -> None:
# Assert
assert mbe._percentile_int([], 50.0) is None
def test_percentile_int_single_value_returns_that_value() -> None:
# Assert
assert mbe._percentile_int([42], 0.0) == 42
assert mbe._percentile_int([42], 50.0) == 42
assert mbe._percentile_int([42], 100.0) == 42
def test_percentile_int_linear_interpolation_then_rounded() -> None:
# Arrange — 100..1000 step 100
values = list(range(100, 1001, 100))
# Assert
assert mbe._percentile_int(values, 50.0) == 550 # even-length midpoint
assert mbe._percentile_int(values, 100.0) == 1000
assert mbe._percentile_int(values, 0.0) == 100
# ───────────────────────── _post_warmup_window ─────────────────
def test_post_warmup_drops_samples_inside_warmup_window() -> None:
# Arrange
samples = [
mbe.MemorySample(monotonic_ms=t, vmrss_bytes=t, tegrastats_used_bytes=t)
for t in (0, 10_000, 20_000, 30_000, 31_000)
]
# Act
kept = mbe._post_warmup_window(samples, warm_up_ms=30_000)
# Assert
assert [s.monotonic_ms for s in kept] == [30_000, 31_000]
def test_post_warmup_rejects_negative_warmup() -> None:
# Assert
with pytest.raises(ValueError):
mbe._post_warmup_window([], warm_up_ms=-1)
def test_post_warmup_empty_samples_returns_empty() -> None:
# Assert
assert mbe._post_warmup_window([], warm_up_ms=30_000) == []
# ───────────────────────── evaluate ────────────────────────────
def _flat_samples(
n: int, *, vmrss: int, tegrastats: int, start_ms: int = 0
) -> list[mbe.MemorySample]:
return [
mbe.MemorySample(
monotonic_ms=start_ms + i * 1000,
vmrss_bytes=vmrss,
tegrastats_used_bytes=tegrastats,
)
for i in range(n)
]
def test_evaluate_plan_a_under_budget_passes() -> None:
# Arrange — both streams constant at 4.0 GiB; well under 4.5/5.0 budget
bytes_4gib = 4 * mbe.GIB_BYTES
samples = _flat_samples(60, vmrss=bytes_4gib, tegrastats=bytes_4gib, start_ms=30_000)
# Act
report = mbe.evaluate(samples, oom_events=[], plan=mbe.Plan.PLAN_A, warm_up_ms=0)
# Assert
assert report.passes_steady_state
assert report.passes_peak
assert report.passes_no_oom
assert report.passes
def test_evaluate_plan_a_steady_breach_fails_ac_2() -> None:
# Arrange — steady 4.8 GiB > 4.5 GiB budget
bytes_48 = int(4.8 * mbe.GIB_BYTES)
samples = _flat_samples(20, vmrss=bytes_48, tegrastats=bytes_48)
# Act
report = mbe.evaluate(samples, oom_events=[], plan=mbe.Plan.PLAN_A, warm_up_ms=0)
# Assert
assert not report.passes_steady_state
assert not report.passes
def test_evaluate_plan_a_peak_breach_fails_ac_3() -> None:
# Arrange — most under, one spike just over peak budget
under = int(4.0 * mbe.GIB_BYTES)
spike = int(5.5 * mbe.GIB_BYTES)
samples = _flat_samples(20, vmrss=under, tegrastats=under)
samples = list(samples)
samples[10] = mbe.MemorySample(
monotonic_ms=samples[10].monotonic_ms,
vmrss_bytes=spike,
tegrastats_used_bytes=under,
)
# Act
report = mbe.evaluate(samples, oom_events=[], plan=mbe.Plan.PLAN_A, warm_up_ms=0)
# Assert — VmRSS max breaches peak budget
assert not report.passes_peak
assert not report.passes
def test_evaluate_plan_b_relaxes_budgets() -> None:
# Arrange — 5.5 GiB steady would breach Plan A but pass Plan B
bytes_55 = int(5.5 * mbe.GIB_BYTES)
samples = _flat_samples(20, vmrss=bytes_55, tegrastats=bytes_55)
# Act
report_a = mbe.evaluate(samples, oom_events=[], plan=mbe.Plan.PLAN_A, warm_up_ms=0)
report_b = mbe.evaluate(samples, oom_events=[], plan=mbe.Plan.PLAN_B, warm_up_ms=0)
# Assert
assert not report_a.passes_steady_state
assert report_b.passes_steady_state
def test_evaluate_with_oom_event_fails_ac_4() -> None:
# Arrange
bytes_3gib = 3 * mbe.GIB_BYTES
samples = _flat_samples(20, vmrss=bytes_3gib, tegrastats=bytes_3gib)
oom = [mbe.OomEvent(monotonic_ms=12_345, snippet="Out of memory: Killed process 4242")]
# Act
report = mbe.evaluate(samples, oom_events=oom, plan=mbe.Plan.PLAN_A, warm_up_ms=0)
# Assert
assert not report.passes_no_oom
assert not report.passes
def test_evaluate_warmup_eliminates_spike_during_warmup() -> None:
# Arrange — spike inside warm-up + clean afterwards; AC-2/3 evaluate
# the POST-warm-up window only, so the run should pass.
bytes_3gib = 3 * mbe.GIB_BYTES
spike = int(7.0 * mbe.GIB_BYTES)
samples = [
mbe.MemorySample(monotonic_ms=0, vmrss_bytes=spike, tegrastats_used_bytes=spike),
*_flat_samples(20, vmrss=bytes_3gib, tegrastats=bytes_3gib, start_ms=30_000),
]
# Act
report = mbe.evaluate(samples, oom_events=[], plan=mbe.Plan.PLAN_A, warm_up_ms=30_000)
# Assert
assert report.passes
assert (report.vmrss.max_bytes or 0) == bytes_3gib
def test_evaluate_empty_samples_returns_none_stats() -> None:
# Act
report = mbe.evaluate([], oom_events=[], plan=mbe.Plan.PLAN_A, warm_up_ms=0)
# Assert
assert report.vmrss.p50_bytes is None
assert report.tegrastats.p50_bytes is None
assert not report.passes_steady_state
assert not report.passes_peak
# ───────────────────────── CSV evidence ─────────────────────────
def test_write_csv_evidence_one_row_with_verdict(tmp_path: Path) -> None:
# Arrange
bytes_3gib = 3 * mbe.GIB_BYTES
samples = _flat_samples(10, vmrss=bytes_3gib, tegrastats=bytes_3gib)
report = mbe.evaluate(samples, oom_events=[], plan=mbe.Plan.PLAN_A, warm_up_ms=0)
out = tmp_path / "lim-01" / "report.csv"
# Act
returned = mbe.write_csv_evidence(out, report)
# Assert
assert returned == out
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "plan"
assert rows[1][0] == mbe.Plan.PLAN_A.value
assert rows[1][-1] == "true"
def test_write_oom_events_csv_truncates_long_snippet(tmp_path: Path) -> None:
# Arrange
long_snippet = "X" * 500
events = [mbe.OomEvent(monotonic_ms=1, snippet=long_snippet)]
out = tmp_path / "lim-01" / "oom.csv"
# Act
mbe.write_oom_events_csv(out, events)
# Assert
with out.open() as fh:
rows = list(csv.reader(fh))
assert len(rows[1][2]) == 200
@@ -0,0 +1,238 @@
"""Unit tests for ``runner.helpers.storage_budget_evaluator`` (AZ-442 / NFT-LIM-03+05)."""
from __future__ import annotations
import csv
from pathlib import Path
import pytest
from runner.helpers import storage_budget_evaluator as sbe
GIB = sbe.GIB_BYTES
def _snapshot(
monotonic_ms: int,
*,
tile_cache: int = 0,
tile_cache_write: int = 0,
fdr_output: int = 0,
thumbnail_log: int = 0,
) -> sbe.VolumeSnapshot:
return sbe.VolumeSnapshot(
monotonic_ms=monotonic_ms,
tile_cache_bytes=tile_cache,
tile_cache_write_bytes=tile_cache_write,
fdr_output_bytes=fdr_output,
thumbnail_log_bytes=thumbnail_log,
)
# ───────────────────────── VolumeSnapshot.aggregate_bytes ─────────────────
def test_aggregate_excludes_thumbnail_log() -> None:
# Arrange
s = _snapshot(
0,
tile_cache=10,
tile_cache_write=20,
fdr_output=30,
thumbnail_log=999, # NOT in aggregate per AC-1 scope
)
# Assert
assert s.aggregate_bytes == 60
# ───────────────────────── evaluate ────────────────────────────
def test_evaluate_under_aggregate_and_thumbnail_budgets_passes() -> None:
# Arrange — end-of-run aggregate = 50 GiB; thumb @ 30 min = 0.05 GiB → 8h = 0.8 GiB
samples = [
_snapshot(0, tile_cache=0),
_snapshot(
30 * 60_000,
tile_cache=20 * GIB,
tile_cache_write=10 * GIB,
fdr_output=20 * GIB,
thumbnail_log=int(0.05 * GIB),
),
]
# Act
report = sbe.evaluate(samples)
# Assert
assert report.passes_aggregate
assert report.passes_thumbnail_log
assert report.passes
def test_evaluate_aggregate_breach_fails_ac_1() -> None:
# Arrange — aggregate = 101 GiB > 100 GiB budget
samples = [
_snapshot(
30 * 60_000,
tile_cache=40 * GIB,
tile_cache_write=30 * GIB,
fdr_output=31 * GIB,
thumbnail_log=0,
)
]
# Act
report = sbe.evaluate(samples)
# Assert
assert not report.passes_aggregate
assert not report.passes
def test_evaluate_aggregate_at_budget_passes_ac_1() -> None:
# Arrange — aggregate = 100 GiB exactly; "≤" means PASS
samples = [
_snapshot(
30 * 60_000,
tile_cache=40 * GIB,
tile_cache_write=30 * GIB,
fdr_output=30 * GIB,
)
]
# Act
report = sbe.evaluate(samples)
# Assert
assert report.passes_aggregate
def test_evaluate_thumbnail_log_strict_lt_fails_at_budget() -> None:
# Arrange — thumb @ 30 min produces extrapolated 1 GiB exactly; AC-2 is strict "<"
target_30min = sbe.THUMBNAIL_LOG_BUDGET_BYTES // 16
samples = [_snapshot(30 * 60_000, thumbnail_log=target_30min)]
# Act
report = sbe.evaluate(samples)
# Assert — extrapolated equals budget → AC-2 fails (strict <)
assert report.thumbnail_log_extrapolated_8h_bytes == sbe.THUMBNAIL_LOG_BUDGET_BYTES
assert not report.passes_thumbnail_log
def test_evaluate_thumbnail_log_just_under_budget_passes() -> None:
# Arrange — slightly under
target_30min = (sbe.THUMBNAIL_LOG_BUDGET_BYTES - 1024) // 16
samples = [_snapshot(30 * 60_000, thumbnail_log=target_30min)]
# Act
report = sbe.evaluate(samples)
# Assert
assert report.passes_thumbnail_log
def test_evaluate_uses_end_of_run_snapshot_not_max() -> None:
# Arrange — peak in the middle, smaller at end → AC-1 evaluates end
samples = [
_snapshot(
10 * 60_000,
tile_cache=200 * GIB,
tile_cache_write=0,
fdr_output=0,
),
_snapshot(
30 * 60_000,
tile_cache=10 * GIB,
tile_cache_write=10 * GIB,
fdr_output=10 * GIB,
),
]
# Act
report = sbe.evaluate(samples)
# Assert — uses last snapshot (sorted by monotonic_ms)
assert report.aggregate_at_end_bytes == 30 * GIB
assert report.passes_aggregate
def test_evaluate_unsorted_samples_are_sorted_before_end_pick() -> None:
# Arrange
samples = [
_snapshot(30 * 60_000, tile_cache=1 * GIB),
_snapshot(0, tile_cache=200 * GIB),
]
# Act
report = sbe.evaluate(samples)
# Assert — end snapshot is the t=30 min one
assert report.aggregate_at_end_bytes == 1 * GIB
def test_evaluate_empty_samples_returns_none_stats() -> None:
# Act
report = sbe.evaluate([])
# Assert
assert report.sample_count == 0
assert report.aggregate_at_end_bytes is None
assert report.thumbnail_log_at_end_bytes is None
assert report.thumbnail_log_extrapolated_8h_bytes is None
assert not report.passes
def test_evaluate_rejects_non_positive_budgets() -> None:
# Assert
with pytest.raises(ValueError):
sbe.evaluate([], aggregate_budget_bytes=0)
with pytest.raises(ValueError):
sbe.evaluate([], thumbnail_log_budget_bytes=0)
# ───────────────────────── CSV evidence ─────────────────────────
def test_write_csv_evidence_writes_aggregate_and_thumbnail(tmp_path: Path) -> None:
# Arrange
samples = [
_snapshot(
30 * 60_000,
tile_cache=10 * GIB,
tile_cache_write=10 * GIB,
fdr_output=10 * GIB,
thumbnail_log=int(0.05 * GIB),
)
]
report = sbe.evaluate(samples)
out = tmp_path / "report.csv"
# Act
sbe.write_csv_evidence(out, report)
# Assert
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "sample_count"
assert rows[1][-1] == "true"
def test_write_per_minute_csv_orders_by_timestamp(tmp_path: Path) -> None:
# Arrange
samples = [
_snapshot(30 * 60_000, tile_cache=10 * GIB),
_snapshot(0, tile_cache=0),
]
out = tmp_path / "per-min.csv"
# Act
sbe.write_per_minute_csv(out, samples)
# Assert
with out.open() as fh:
rows = list(csv.reader(fh))
timestamps = [int(r[1]) for r in rows[1:]]
assert timestamps == sorted(timestamps)
@@ -0,0 +1,260 @@
"""Unit tests for ``runner.helpers.thermal_envelope_evaluator`` (AZ-443 / NFT-LIM-04)."""
from __future__ import annotations
import csv
import json
from pathlib import Path
import pytest
from runner.helpers import thermal_envelope_evaluator as tee
# ───────────────────────── ThermalThresholds ─────────────────────────
def test_default_thresholds_match_orin_nano_super() -> None:
# Arrange / Act
t = tee.ThermalThresholds()
# Assert
assert t.cpu_t_throttle_c == 97.0
assert t.soc_t_throttle_c == 95.0
assert t.cpu_budget_c == 97.0 - tee.HEADROOM_C
assert t.soc_budget_c == 95.0 - tee.HEADROOM_C
def test_load_from_fixture_round_trip(tmp_path: Path) -> None:
# Arrange
payload = {"cpu_t_throttle_c": 99.5, "soc_t_throttle_c": 92.0}
fixture = tmp_path / "thermal.json"
fixture.write_text(json.dumps(payload))
# Act
t = tee.ThermalThresholds.load_from_fixture(fixture)
# Assert
assert t.cpu_t_throttle_c == 99.5
assert t.soc_t_throttle_c == 92.0
def test_load_from_fixture_rejects_non_object(tmp_path: Path) -> None:
# Arrange
fixture = tmp_path / "thermal.json"
fixture.write_text("[1, 2, 3]")
# Assert
with pytest.raises(ValueError):
tee.ThermalThresholds.load_from_fixture(fixture)
def test_load_from_fixture_rejects_missing_key(tmp_path: Path) -> None:
# Arrange — missing soc_t_throttle_c
fixture = tmp_path / "thermal.json"
fixture.write_text(json.dumps({"cpu_t_throttle_c": 97.0}))
# Assert
with pytest.raises(ValueError):
tee.ThermalThresholds.load_from_fixture(fixture)
def test_load_from_fixture_rejects_non_numeric(tmp_path: Path) -> None:
# Arrange
fixture = tmp_path / "thermal.json"
fixture.write_text(
json.dumps({"cpu_t_throttle_c": "hot", "soc_t_throttle_c": 95.0})
)
# Assert
with pytest.raises(ValueError):
tee.ThermalThresholds.load_from_fixture(fixture)
# ───────────────────────── _percentile_float ─────────────────────
def test_percentile_float_q_must_be_in_range() -> None:
# Assert
with pytest.raises(ValueError):
tee._percentile_float([10.0], -0.1)
with pytest.raises(ValueError):
tee._percentile_float([10.0], 100.1)
def test_percentile_float_empty_returns_none() -> None:
# Assert
assert tee._percentile_float([], 99.0) is None
def test_percentile_float_single_value_returns_that_value() -> None:
# Assert
assert tee._percentile_float([55.0], 99.0) == 55.0
def test_percentile_float_known_distribution() -> None:
# Arrange — 1..100 step 1
values = [float(i) for i in range(1, 101)]
# Assert — p99 = linear interp between values[98] and values[99] at rank 98.01
assert tee._percentile_float(values, 99.0) == pytest.approx(99.01)
# ───────────────────────── evaluate ────────────────────────────
def _cool_samples(n: int = 60, cpu_c: float = 60.0, soc_c: float = 55.0) -> list[tee.ThermalSample]:
return [
tee.ThermalSample(monotonic_ms=i * 1000, cpu_temp_c=cpu_c, soc_temp_c=soc_c)
for i in range(n)
]
def test_evaluate_cool_run_passes() -> None:
# Arrange
samples = _cool_samples()
thresholds = tee.ThermalThresholds()
# Act
report = tee.evaluate(samples, throttle_events=[], thresholds=thresholds)
# Assert
assert report.passes_no_throttle
assert report.passes_headroom
assert report.passes
def test_evaluate_throttle_event_fails_ac_2() -> None:
# Arrange
samples = _cool_samples()
events = [tee.ThrottleEvent(monotonic_ms=42, snippet="thermal_throttle: zone CPU_thermal")]
thresholds = tee.ThermalThresholds()
# Act
report = tee.evaluate(samples, throttle_events=events, thresholds=thresholds)
# Assert
assert not report.passes_no_throttle
assert not report.passes
def test_evaluate_cpu_above_budget_fails_ac_3() -> None:
# Arrange — CPU stuck near 95 °C; budget = 97 - 5 = 92 °C
samples = _cool_samples(cpu_c=95.0, soc_c=55.0)
thresholds = tee.ThermalThresholds()
# Act
report = tee.evaluate(samples, throttle_events=[], thresholds=thresholds)
# Assert
assert not report.passes_headroom
assert not report.passes
def test_evaluate_soc_above_budget_fails_ac_3() -> None:
# Arrange — SoC near 92 °C; budget = 95 - 5 = 90 °C
samples = _cool_samples(cpu_c=60.0, soc_c=92.0)
thresholds = tee.ThermalThresholds()
# Act
report = tee.evaluate(samples, throttle_events=[], thresholds=thresholds)
# Assert
assert not report.passes_headroom
def test_evaluate_cpu_p99_exactly_at_budget_passes() -> None:
# Arrange — flat run at exactly the budget
thresholds = tee.ThermalThresholds()
samples = _cool_samples(cpu_c=thresholds.cpu_budget_c, soc_c=thresholds.soc_budget_c)
# Act
report = tee.evaluate(samples, throttle_events=[], thresholds=thresholds)
# Assert — "≤" means at-budget passes
assert report.passes_headroom
def test_evaluate_empty_samples_returns_none_p99_and_fails() -> None:
# Act
thresholds = tee.ThermalThresholds()
report = tee.evaluate([], throttle_events=[], thresholds=thresholds)
# Assert
assert report.cpu.p99_c is None
assert report.soc.p99_c is None
assert not report.passes_headroom
# ───────────────────────── PARTIAL annotation ────────────────────
def test_write_traceability_partial_creates_file(tmp_path: Path) -> None:
# Arrange
out = tmp_path / "traceability-status.json"
# Act
tee.write_traceability_partial_annotation(out)
# Assert
payload = json.loads(out.read_text())
assert payload["AC-NEW-5"] == "PARTIAL — chamber required for full"
def test_write_traceability_partial_merges_existing(tmp_path: Path) -> None:
# Arrange
out = tmp_path / "traceability-status.json"
out.write_text(json.dumps({"AC-OTHER": "covered"}))
# Act
tee.write_traceability_partial_annotation(out)
# Assert
payload = json.loads(out.read_text())
assert payload["AC-OTHER"] == "covered"
assert payload["AC-NEW-5"] == "PARTIAL — chamber required for full"
def test_write_traceability_partial_rejects_non_object_existing(tmp_path: Path) -> None:
# Arrange
out = tmp_path / "traceability-status.json"
out.write_text("[1, 2]")
# Assert
with pytest.raises(ValueError):
tee.write_traceability_partial_annotation(out)
# ───────────────────────── CSV evidence ─────────────────────────
def test_write_csv_evidence_one_row(tmp_path: Path) -> None:
# Arrange
samples = _cool_samples()
thresholds = tee.ThermalThresholds()
report = tee.evaluate(samples, throttle_events=[], thresholds=thresholds)
out = tmp_path / "report.csv"
# Act
tee.write_csv_evidence(out, report)
# Assert
with out.open() as fh:
rows = list(csv.reader(fh))
assert rows[0][0] == "cpu_t_throttle_c"
assert rows[1][-1] == "true"
def test_write_throttle_events_csv_truncates_long_snippet(tmp_path: Path) -> None:
# Arrange
long_snippet = "T" * 500
events = [tee.ThrottleEvent(monotonic_ms=1, snippet=long_snippet)]
out = tmp_path / "throttle.csv"
# Act
tee.write_throttle_events_csv(out, events)
# Assert
with out.open() as fh:
rows = list(csv.reader(fh))
assert len(rows[1][2]) == 200