"""Post-run filesystem read of the FDR archive. The FDR archive is a line-delimited JSON record stream per AZ-272 / AZ-273. Each line is an FDR envelope on the wire schema ``{schema_version, ts, producer_id, kind, payload, extra?}``. This module parses the JSON bytes and validates the wire envelope structurally — the runner image NEVER imports the SUT's FdrRecord schema directly so a breaking SUT change surfaces as a parse failure here (visible drift) rather than silently following along. The runner-side `FdrRecord` dataclass renames `kind` → `record_type` and projects `ts` (ISO 8601 wall-clock) onto an integer `monotonic_ms` field for downstream evaluators that work in milliseconds. Within one flight, ISO 8601 ms-since-epoch is monotonic at the millisecond resolution the evaluators care about (NFR-RES NTP drift is excluded by AC-7 of the FDR contract: the on-board clock is monotonic over the lifetime of one flight session). """ from __future__ import annotations import json from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import Iterator _WIRE_REQUIRED_KEYS = ("schema_version", "ts", "producer_id", "kind", "payload") @dataclass(frozen=True) class FdrRecord: """Mirror of `gps_denied_onboard.fdr_client.records.FdrRecord` — public-boundary copy. The schema is duplicated intentionally; if the SUT's FDR schema evolves in a breaking way, this duplicate file fails to parse (visible drift) rather than silently following along. """ producer_id: str monotonic_ms: int record_type: str payload: dict[str, object] def _ts_to_monotonic_ms(ts: str) -> int: """Project ISO 8601 ``ts`` onto an int millisecond value. Accepts trailing ``Z`` (UTC) which ``datetime.fromisoformat`` did not accept until 3.11; we normalise to ``+00:00`` first. """ normalised = ts[:-1] + "+00:00" if ts.endswith("Z") else ts dt = datetime.fromisoformat(normalised) return int(dt.timestamp() * 1000) def _parse_envelope(line_bytes: bytes, source: Path, line_no: int) -> FdrRecord: """Decode one JSONL line into a typed envelope. Wire-side keys are validated structurally; downstream payload keys are NOT validated here (the consuming evaluator owns its own payload contract). """ decoded = json.loads(line_bytes) if not isinstance(decoded, dict): raise ValueError( f"FDR line is not a JSON object: {source}:{line_no}: type={type(decoded).__name__}" ) missing = [k for k in _WIRE_REQUIRED_KEYS if k not in decoded] if missing: raise ValueError( f"FDR wire envelope missing required keys {missing} at {source}:{line_no}" ) ts = decoded["ts"] if not isinstance(ts, str) or not ts: raise ValueError(f"FDR envelope `ts` must be a non-empty ISO 8601 string at {source}:{line_no}") producer_id = decoded["producer_id"] if not isinstance(producer_id, str) or not producer_id: raise ValueError( f"FDR envelope `producer_id` must be a non-empty string at {source}:{line_no}" ) kind = decoded["kind"] if not isinstance(kind, str) or not kind: raise ValueError(f"FDR envelope `kind` must be a non-empty string at {source}:{line_no}") payload = decoded["payload"] if not isinstance(payload, dict): raise ValueError(f"FDR envelope `payload` must be an object at {source}:{line_no}") return FdrRecord( producer_id=producer_id, monotonic_ms=_ts_to_monotonic_ms(ts), record_type=kind, payload=payload, ) def iter_records(fdr_archive_root: Path) -> Iterator[FdrRecord]: """Iterate every FDR record in the archive root (ordered by monotonic_ms). Walks every ``*.jsonl`` file under ``fdr_archive_root`` (recursive), parses each line as a wire envelope, and yields the runner-side ``FdrRecord`` projection. Records are emitted oldest-first across the union of all files. Raises ``FileNotFoundError`` if the archive root does not exist. Raises ``ValueError`` (with a file + line pointer) on malformed JSON, a wrong-shape envelope, or an unparseable ``ts``. """ if not fdr_archive_root.exists(): raise FileNotFoundError( f"FDR archive root not found: {fdr_archive_root}" ) records: list[FdrRecord] = [] for jsonl_path in sorted(fdr_archive_root.rglob("*.jsonl")): if not jsonl_path.is_file(): continue with jsonl_path.open("rb") as fh: for line_no, raw in enumerate(fh, start=1): stripped = raw.strip() if not stripped: continue records.append(_parse_envelope(stripped, jsonl_path, line_no)) records.sort(key=lambda r: r.monotonic_ms) yield from records def archive_size_bytes(fdr_archive_root: Path) -> int: """Sum the size of every file under ``fdr_archive_root``. Concrete implementation here — it's a thin os.walk + stat loop that NFT-LIM-02 needs as soon as a real archive lands. """ if not fdr_archive_root.exists(): return 0 total = 0 for p in fdr_archive_root.rglob("*"): if p.is_file(): total += p.stat().st_size return total