[AZ-440] [AZ-441] [AZ-442] [AZ-443] NFT-LIM-01/02/03+05/04 blackbox scenarios

Batch 88 — adds four resource-limit blackbox scenarios + pure-logic helpers + unit tests: - NFT-LIM-01 Jetson memory (AC-NEW-13): tier2_only; Plan A/B budgets; AC-4 OOM-event scan; 30 s warm-up window; VmRSS + tegrastats streams. - NFT-LIM-02 FDR size (AC-7.3): 30 min → 8 h linear extrapolation against 50 GiB; ±60 s replay-window slack for AC-1. - NFT-LIM-03+05 storage (AC-7.4 + AC-NEW-12 + RESTRICT-STORAGE): aggregate ≤ 100 GiB across tile-cache + tile-cache-write + fdr-output; thumbnail-log < 1 GiB strict 8 h-extrapolated. - NFT-LIM-04 thermal (AC-NEW-5 PARTIAL): tier2_only; CPU/SoC p99 ≤ T_throttle − 5 °C; throttle-event scan; PARTIAL annotation written to traceability-status.json. Thresholds fixture lives at e2e/fixtures/jetson/thermal-thresholds.json (moved from the task spec's suggested tests/fixtures/ path so the file stays inside the blackbox_tests Owns: e2e/** envelope). All four helpers are public-boundary-only (no src/gps_denied_onboard imports). Scenarios skip cleanly in the Tier-1 docker harness pending AZ-595 (SITL replay builder) for the four shared fixture inputs and AZ-444 (Tier-2 Jetson runner) for the tier2_only scenarios. Code review: PASS_WITH_WARNINGS (0/0/2/1). Both Mediums are carried-over write_csv_evidence + _resolve_fixture_path duplication, deferred to AZ-446 (batch 89). Low is the self-resolved AZ-443 fixture ownership drift documented in the review. Tests: 1223 e2e/_unit_tests passing (+1 vs. batch 87 from the new directory-layout entry); 24 resource_limit scenarios collect and skip cleanly under runner/pytest.ini. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-22 21:41:13 +00:00 · 2026-05-17 18:01:55 +03:00
parent d1e30f818f
commit 6e4a575221
22 changed files with 2785 additions and 4 deletions
@@ -0,0 +1,162 @@
+"""FDR size budget evaluator for NFT-LIM-02 (AZ-441 / AC-7.3).
+
+A 30 min Derkachi replay (4× the 8 min flight) is sampled per-minute
+via ``du -sh fdr-output``. The per-minute samples are projected into a
+typed ``(monotonic_ms, size_bytes)`` stream by the scenario; this
+module extrapolates the 30 min size linearly to 8 h:
+
+    extrapolated_bytes = size_at_30min_bytes / 30 × 480
+
+and asserts ``extrapolated_bytes ≤ 50 GiB`` (AC-2).
+
+AC-1 (the runner actually looped Derkachi for 30 min wall-clock) is
+verdict-checked here from the sample timestamps; the scenario test
+provides the canonical replay duration as input.
+
+Public-boundary discipline: does NOT import any
+``src/gps_denied_onboard`` symbol — inputs are pre-projected typed
+samples.
+"""
+
+from __future__ import annotations
+
+import csv
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Sequence
+
+GIB_BYTES = 1024**3
+
+REPLAY_WINDOW_MINUTES = 30
+EXTRAPOLATION_WINDOW_MINUTES = 8 * 60  # AC-2 — 8 hours
+DEFAULT_BUDGET_BYTES = 50 * GIB_BYTES  # AC-2 — ≤ 50 GiB
+
+# AC-1 tolerance: the scenario claims a 30 min replay; in practice the
+# wall-clock window may drift by a few seconds due to loop overhead.
+# Accept ±60 s slack — anything beyond that is a real replay deviation.
+REPLAY_WINDOW_SLACK_MS = 60_000
+
+
+@dataclass(frozen=True)
+class FdrSizeSample:
+    """One ``du -sh fdr-output`` sample at a monotonic timestamp."""
+
+    monotonic_ms: int
+    size_bytes: int
+
+
+@dataclass(frozen=True)
+class FdrSizeReport:
+    """Aggregate NFT-LIM-02 verdict for one run."""
+
+    sample_count: int
+    replay_window_ms: int
+    size_at_30min_bytes: int | None
+    extrapolated_8h_bytes: int | None
+    budget_bytes: int
+    replay_window_slack_ms: int
+
+    @property
+    def passes_replay_window(self) -> bool:
+        # AC-1 — actual sampled window is within ±slack of 30 min.
+        target_ms = REPLAY_WINDOW_MINUTES * 60_000
+        return abs(self.replay_window_ms - target_ms) <= self.replay_window_slack_ms
+
+    @property
+    def passes_extrapolation(self) -> bool:
+        # AC-2 — extrapolated 8 h size ≤ budget.
+        return (
+            self.extrapolated_8h_bytes is not None
+            and self.extrapolated_8h_bytes <= self.budget_bytes
+        )
+
+    @property
+    def passes(self) -> bool:
+        return self.passes_replay_window and self.passes_extrapolation
+
+
+def evaluate(
+    samples: Sequence[FdrSizeSample],
+    *,
+    budget_bytes: int = DEFAULT_BUDGET_BYTES,
+    replay_window_slack_ms: int = REPLAY_WINDOW_SLACK_MS,
+) -> FdrSizeReport:
+    """Compute AC-1 + AC-2 verdict from a sorted-or-unsorted sample list."""
+    if budget_bytes <= 0:
+        raise ValueError(f"budget_bytes must be > 0 (was {budget_bytes!r})")
+    if replay_window_slack_ms < 0:
+        raise ValueError(
+            f"replay_window_slack_ms must be >= 0 (was {replay_window_slack_ms!r})"
+        )
+    if not samples:
+        return FdrSizeReport(
+            sample_count=0,
+            replay_window_ms=0,
+            size_at_30min_bytes=None,
+            extrapolated_8h_bytes=None,
+            budget_bytes=budget_bytes,
+            replay_window_slack_ms=replay_window_slack_ms,
+        )
+    ordered = sorted(samples, key=lambda s: s.monotonic_ms)
+    window_ms = ordered[-1].monotonic_ms - ordered[0].monotonic_ms
+    size_at_end = ordered[-1].size_bytes
+    extrapolated = int(
+        round((size_at_end / REPLAY_WINDOW_MINUTES) * EXTRAPOLATION_WINDOW_MINUTES)
+    )
+    return FdrSizeReport(
+        sample_count=len(ordered),
+        replay_window_ms=window_ms,
+        size_at_30min_bytes=size_at_end,
+        extrapolated_8h_bytes=extrapolated,
+        budget_bytes=budget_bytes,
+        replay_window_slack_ms=replay_window_slack_ms,
+    )
+
+
+def write_csv_evidence(out_path: Path, report: FdrSizeReport) -> Path:
+    """One-row evidence file naming AC-1/AC-2 verdict + sizes."""
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    r = report
+    with out_path.open("w", newline="") as fh:
+        writer = csv.writer(fh)
+        writer.writerow(
+            [
+                "sample_count",
+                "replay_window_ms",
+                "size_at_30min_bytes",
+                "extrapolated_8h_bytes",
+                "budget_bytes",
+                "replay_window_slack_ms",
+                "ac1_replay_window_passes",
+                "ac2_extrapolation_passes",
+                "passes",
+            ]
+        )
+        writer.writerow(
+            [
+                r.sample_count,
+                r.replay_window_ms,
+                "" if r.size_at_30min_bytes is None else r.size_at_30min_bytes,
+                "" if r.extrapolated_8h_bytes is None else r.extrapolated_8h_bytes,
+                r.budget_bytes,
+                r.replay_window_slack_ms,
+                "true" if r.passes_replay_window else "false",
+                "true" if r.passes_extrapolation else "false",
+                "true" if r.passes else "false",
+            ]
+        )
+    return out_path
+
+
+def write_per_minute_csv(
+    out_path: Path, samples: Sequence[FdrSizeSample]
+) -> Path:
+    """Per-sample CSV (one row per minute) for evidence trend lines."""
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    ordered = sorted(samples, key=lambda s: s.monotonic_ms)
+    with out_path.open("w", newline="") as fh:
+        writer = csv.writer(fh)
+        writer.writerow(["index", "monotonic_ms", "size_bytes"])
+        for i, s in enumerate(ordered):
+            writer.writerow([i, s.monotonic_ms, s.size_bytes])
+    return out_path