"""Tile-cache + storage compliance evaluators (AZ-421 / FT-P-15/16/18). Pure-logic evaluators sourced from: * **FDR archive** — the SUT's startup ``cache-self-check`` record carries the tile manifest entries the freshness/source/CRS contract has to hold over (FT-P-15 / AC-8.1, AC-NEW-2). * **Docker network + container inspect JSON** — verifies the SUT container is attached only to the ``e2e-net`` network and the network is configured with ``Internal: true`` (FT-P-16 / AC-8.3, RESTRICT-SAT-1). * **Filesystem walks** of ``${FDR_OUTPUT}`` and ``/var/azaion/tile-cache`` — verifies the SUT does NOT retain raw nav-camera / AI-camera frames (FT-P-18 / AC-8.5). The shared shape across all three sub-scenarios is the ``X...Report(passes: bool)`` dataclass — a scenario test that wants to assert all three pulls the report objects and asserts ``passes``. Public-boundary discipline: this module imports nothing from ``src/gps_denied_onboard``. Inputs are filesystem paths, parsed FDR records, and dicts decoded from ``docker network inspect`` / ``docker inspect`` JSON. """ from __future__ import annotations from dataclasses import dataclass from pathlib import Path from typing import Iterable, Sequence # ─────────────────────────── FT-P-15 / AC-8.1 ─────────────────────────── MANIFEST_M_PER_PX_FLOOR = 0.5 MANIFEST_REQUIRED_FIELDS: tuple[str, ...] = ( "crs", "tile_matrix", "dimension", "m_per_px", "capture_date", "source", "compression", ) CACHE_SELF_CHECK_FDR_KIND = "cache-self-check" TILE_LOAD_REJECTED_FDR_KIND = "tile-load-rejected" @dataclass(frozen=True) class ManifestEntryReport: """Per-entry result of the manifest schema + resolution-floor checks.""" entry_id: str missing_fields: tuple[str, ...] m_per_px: float | None @property def has_all_fields(self) -> bool: return not self.missing_fields @property def passes_floor(self) -> bool: return self.m_per_px is not None and self.m_per_px >= MANIFEST_M_PER_PX_FLOOR @property def passes(self) -> bool: return self.has_all_fields and self.passes_floor @dataclass(frozen=True) class ManifestSchemaReport: """AC-1 + AC-2 of FT-P-15: schema completeness + resolution floor.""" entries: tuple[ManifestEntryReport, ...] rejected_below_floor_ids: tuple[str, ...] m_per_px_floor: float = MANIFEST_M_PER_PX_FLOOR @property def total_entries(self) -> int: return len(self.entries) @property def entries_with_missing_fields(self) -> tuple[ManifestEntryReport, ...]: return tuple(e for e in self.entries if not e.has_all_fields) @property def entries_below_floor(self) -> tuple[ManifestEntryReport, ...]: return tuple(e for e in self.entries if e.m_per_px is not None and not e.passes_floor) @property def passes(self) -> bool: if not self.entries: return False if self.entries_with_missing_fields: return False for entry in self.entries: if entry.m_per_px is None: return False if entry.passes_floor: continue # below floor — must be rejected at load if entry.entry_id not in self.rejected_below_floor_ids: return False return True def evaluate_manifest_schema( manifest_entries: Sequence[dict], *, tile_load_rejected_ids: Sequence[str] = (), m_per_px_floor: float = MANIFEST_M_PER_PX_FLOOR, required_fields: Sequence[str] = MANIFEST_REQUIRED_FIELDS, ) -> ManifestSchemaReport: """Evaluate AC-1 + AC-2 of FT-P-15 against parsed manifest entries. Each ``manifest_entries`` element is the ``payload.entries[i]`` dict extracted from an FDR ``cache-self-check`` record. ``entry_id`` is looked up under ``"id"`` then ``"tile_id"`` then synthesised from the entry's index — scenarios should prefer ``"id"`` if their schema names it differently and adjust upstream. ``tile_load_rejected_ids`` is the set of tile IDs the SUT has rejected at load time via FDR ``tile-load-rejected`` events; an entry with ``m_per_px < floor`` only passes if its ID appears in this set. """ if m_per_px_floor <= 0: raise ValueError(f"m_per_px_floor must be > 0, got {m_per_px_floor}") rejected = tuple(tile_load_rejected_ids) entries: list[ManifestEntryReport] = [] for idx, entry in enumerate(manifest_entries): entry_id = _resolve_entry_id(entry, idx) missing = tuple(f for f in required_fields if f not in entry) raw_m_per_px = entry.get("m_per_px") m_per_px: float | None if isinstance(raw_m_per_px, (int, float)): m_per_px = float(raw_m_per_px) else: m_per_px = None entries.append( ManifestEntryReport( entry_id=entry_id, missing_fields=missing, m_per_px=m_per_px, ) ) return ManifestSchemaReport( entries=tuple(entries), rejected_below_floor_ids=rejected, m_per_px_floor=m_per_px_floor, ) def _resolve_entry_id(entry: dict, idx: int) -> str: for key in ("id", "tile_id", "tile_matrix"): if key in entry and isinstance(entry[key], str) and entry[key]: return entry[key] return f"entry_{idx}" # ─────────────────────────── FT-P-16 / AC-8.3 ─────────────────────────── E2E_NETWORK_NAME = "e2e-net" @dataclass(frozen=True) class OfflineModeReport: """AC-3 of FT-P-16: SUT container is on `e2e-net` only and the net is internal.""" network_name: str network_internal: bool | None container_networks: tuple[str, ...] expected_network: str = E2E_NETWORK_NAME @property def container_has_only_expected_network(self) -> bool: return self.container_networks == (self.expected_network,) @property def passes(self) -> bool: if self.network_internal is not True: return False return self.container_has_only_expected_network def evaluate_offline_mode( network_inspect: dict, container_inspect: dict, *, expected_network: str = E2E_NETWORK_NAME, ) -> OfflineModeReport: """Evaluate AC-3 of FT-P-16 from ``docker network inspect`` + ``docker inspect``. ``network_inspect`` is a single network object (the JSON shape ``docker network inspect `` returns inside a list — the scenario unwraps the list). Required key: ``Internal: bool``. ``container_inspect`` is a single container object. Required key path: ``NetworkSettings.Networks`` (a dict whose keys are network names the container is attached to). """ network_internal = network_inspect.get("Internal") if not isinstance(network_internal, bool): network_internal = None nets_map = ( container_inspect.get("NetworkSettings", {}).get("Networks", {}) if isinstance(container_inspect.get("NetworkSettings"), dict) else {} ) container_networks: tuple[str, ...] = ( tuple(sorted(nets_map.keys())) if isinstance(nets_map, dict) else () ) return OfflineModeReport( network_name=str(network_inspect.get("Name", "")), network_internal=network_internal, container_networks=container_networks, expected_network=expected_network, ) # ─────────────────────────── FT-P-18 / AC-8.5 ─────────────────────────── NAV_CAMERA_RAW_DIMENSIONS = (5472, 3648) NAV_CAMERA_DECODED_DIMENSIONS = (880, 720) RAW_FRAME_EXTENSIONS = (".jpg", ".jpeg", ".raw", ".dng", ".cr2", ".nef", ".arw", ".bin") THUMBNAIL_LOG_EXTENSIONS = (".log", ".jsonl", ".txt") THUMBNAIL_LOG_MAX_SIZE_GB_PER_8H = 1.0 THUMBNAIL_LOG_MAX_SIZE_BYTES_PER_8H = int(THUMBNAIL_LOG_MAX_SIZE_GB_PER_8H * 1024**3) @dataclass(frozen=True) class RawFrameCandidate: """One filesystem entry that matched the raw-frame heuristic.""" path: Path size_bytes: int dimensions: tuple[int, int] | None reason: str @dataclass(frozen=True) class RawFrameDetectionReport: """AC-4 of FT-P-18: zero raw-frame retention.""" candidates: tuple[RawFrameCandidate, ...] nav_camera_raw_dimensions: tuple[int, int] = NAV_CAMERA_RAW_DIMENSIONS nav_camera_decoded_dimensions: tuple[int, int] = NAV_CAMERA_DECODED_DIMENSIONS @property def candidate_count(self) -> int: return len(self.candidates) @property def passes(self) -> bool: return self.candidate_count == 0 def detect_raw_frames( file_specs: Iterable[tuple[Path, int, tuple[int, int] | None]], *, raw_dimensions: tuple[int, int] = NAV_CAMERA_RAW_DIMENSIONS, decoded_dimensions: tuple[int, int] = NAV_CAMERA_DECODED_DIMENSIONS, raw_extensions: Sequence[str] = RAW_FRAME_EXTENSIONS, ) -> RawFrameDetectionReport: """AC-4: detect any file whose extension + dimensions match raw nav frames. ``file_specs`` is an iterable of ``(path, size_bytes, dimensions)`` triples. The scenario test produces this by walking the filesystem and probing each image file's dimensions; this evaluator only decides *which* of those triples count as raw frames. A file matches when: 1. Extension is in ``raw_extensions``, AND 2. ``dimensions`` equals either the raw nav-cam dims (5472×3648, order-insensitive) OR the H.264-decoded dims (880×720, order-insensitive). A file with a raw extension but unknown dimensions does NOT match (the scenario is expected to fail dimension probe loudly, not be silently absorbed by the evaluator). """ targets = {tuple(sorted(raw_dimensions)), tuple(sorted(decoded_dimensions))} raw_ext_lower = tuple(ext.lower() for ext in raw_extensions) candidates: list[RawFrameCandidate] = [] for path, size_bytes, dims in file_specs: if path.suffix.lower() not in raw_ext_lower: continue if dims is None: continue if tuple(sorted(dims)) not in targets: continue candidates.append( RawFrameCandidate( path=path, size_bytes=size_bytes, dimensions=dims, reason=( f"extension {path.suffix} + dimensions {dims} match nav-camera raw pattern" ), ) ) return RawFrameDetectionReport( candidates=tuple(candidates), nav_camera_raw_dimensions=raw_dimensions, nav_camera_decoded_dimensions=decoded_dimensions, ) @dataclass(frozen=True) class ThumbnailLogBudgetReport: """AC-5 of FT-P-18: thumbnail log size budget under 1 GB / 8 h.""" observed_size_bytes: int observed_duration_h: float extrapolated_8h_size_bytes: int max_size_bytes_per_8h: int = THUMBNAIL_LOG_MAX_SIZE_BYTES_PER_8H @property def passes(self) -> bool: if self.observed_duration_h <= 0: return False return self.extrapolated_8h_size_bytes < self.max_size_bytes_per_8h def evaluate_thumbnail_budget( observed_size_bytes: int, observed_duration_h: float, *, max_size_bytes_per_8h: int = THUMBNAIL_LOG_MAX_SIZE_BYTES_PER_8H, ) -> ThumbnailLogBudgetReport: """AC-5: extrapolate observed thumbnail log size to an 8h flight. ``observed_size_bytes`` is the sum of every thumbnail-log file under the FDR + cache walk (extensions in ``THUMBNAIL_LOG_EXTENSIONS``). ``observed_duration_h`` is the wall-clock duration of the replay segment that produced them. Extrapolation is linear: ``size * (8 / duration_h)``. Returns a report whose ``passes`` flag holds when ``extrapolated_8h_size_bytes < max_size_bytes_per_8h``. """ if observed_size_bytes < 0: raise ValueError(f"observed_size_bytes must be ≥0, got {observed_size_bytes}") if max_size_bytes_per_8h <= 0: raise ValueError( f"max_size_bytes_per_8h must be >0, got {max_size_bytes_per_8h}" ) if observed_duration_h <= 0: extrapolated = -1 else: extrapolated = int(observed_size_bytes * (8.0 / observed_duration_h)) return ThumbnailLogBudgetReport( observed_size_bytes=observed_size_bytes, observed_duration_h=observed_duration_h, extrapolated_8h_size_bytes=extrapolated, max_size_bytes_per_8h=max_size_bytes_per_8h, ) # ─────────────────────── Filesystem walk helpers ─────────────────────── def walk_files(*roots: Path) -> Iterable[Path]: """Recursive file iterator over every existing root. Convenience for the FT-P-18 scenario: stitch together ``fdr_archive_root`` + ``tile_cache_root`` walks under one call. Non-existent roots are silently skipped (the FDR archive may be absent on a skip-gated local run — the scenario explicitly checks that elsewhere). """ for root in roots: if not root.exists(): continue for p in root.rglob("*"): if p.is_file(): yield p def probe_jpeg_dimensions(path: Path) -> tuple[int, int] | None: """Return ``(width, height)`` of a JPEG by parsing its SOF marker. Pure-stdlib JPEG SOF0/SOF1/SOF2 parser — avoids loading the full image (so a directory walk over hundreds of files is cheap) and avoids a runtime dep on Pillow/OpenCV here (both are available in the runner but adding them as a hard import would couple the evaluator to those packages for what is fundamentally a 32-byte header read). Returns ``None`` if the file is not a JPEG, the SOF marker is not present, or the file is truncated. """ try: with path.open("rb") as fh: head = fh.read(2) if head != b"\xff\xd8": return None while True: marker_prefix = fh.read(1) if not marker_prefix: return None if marker_prefix != b"\xff": return None marker = fh.read(1) if not marker: return None # SOF markers: 0xC0-0xCF except 0xC4 (DHT), 0xC8 (JPG), 0xCC (DAC) if marker[0] in (0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF): fh.read(3) # segment length (2) + precision (1) h_bytes = fh.read(2) w_bytes = fh.read(2) if len(h_bytes) != 2 or len(w_bytes) != 2: return None height = int.from_bytes(h_bytes, "big") width = int.from_bytes(w_bytes, "big") return (width, height) seg_len_bytes = fh.read(2) if len(seg_len_bytes) != 2: return None seg_len = int.from_bytes(seg_len_bytes, "big") if seg_len < 2: return None fh.seek(seg_len - 2, 1) except OSError: return None