[AZ-294] [AZ-295] [AZ-296] Finish C13: tile snapshot + record-kind policy + takeoff abort

AZ-294: MidFlightTileSnapshotSink writes orthorectified tile JPEGs
atomically to flight_root/<flight_id>/tiles/<tile_id>.jpg, emits a
kind="mid_flight_tile_snapshot" pointer record, and evicts the oldest
tile when the per-flight 64 MiB cap is exceeded. Adds optional
frame_id to the snapshot payload (fdr_record_schema bump).

AZ-295: RecordKindPolicy with two paired gates:
- enforce_or_raise (producer-side) raises RawFrameWriteForbiddenError
  for raw_nav_frame / raw_ai_cam_frame at the call site, defending
  AC-8.5 / RESTRICT-UAV-4.
- gate_for_writer (writer-side) tumbling-window rate-caps
  failed_tile_thumbnail records at <= 0.1 Hz; over-cap drops are
  coalesced into kind="overrun" records with the originating
  producer slug.

AZ-296: take_off() composition-root sequence with strict ordering
(writer.__init__ -> start -> open_flight -> fc_adapter.__init__ ->
fc_adapter.open). On FdrOpenError, logs ERROR record, calls
writer.stop(), prints the documented FATAL line to stderr, and
sys.exit(EXIT_FDR_OPEN_FAILURE=2). composition_root_protocol bumped
to v1.1.0 with the new constants + takeoff-sequence section.

29 new tests; full suite 356 passed / 2 skipped / 0 failures.
No new dependencies (stdlib only).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-11 03:52:07 +03:00
parent b5dd6031d2
commit e4ecdaf619
21 changed files with 1657 additions and 9 deletions
@@ -0,0 +1,213 @@
"""AZ-294 — MidFlightTileSnapshotSink unit tests."""
from __future__ import annotations
import struct
from datetime import datetime, timedelta, timezone
from pathlib import Path
from uuid import uuid4
import pytest
from gps_denied_onboard.components.c13_fdr import (
MidFlightTileSnapshotSink,
TileSnapshotInvalidIdError,
TileSnapshotTooLargeError,
)
from gps_denied_onboard.config import TileSnapshotConfig
from gps_denied_onboard.fdr_client.client import FdrClient
from gps_denied_onboard.fdr_client.records import OVERRUN_KIND, parse
_LENGTH_PREFIX = struct.Struct("<I")
_JPEG_MAGIC = b"\xff\xd8\xff\xe0"
def _jpeg_blob(size: int = 1024) -> bytes:
return _JPEG_MAGIC + b"\x00" * (size - len(_JPEG_MAGIC))
def _make_sink(
tmp_path: Path,
config: TileSnapshotConfig | None = None,
) -> tuple[MidFlightTileSnapshotSink, FdrClient]:
client = FdrClient(producer_id="shared.tile_snapshot_sink", capacity=256, _emit_diag_log=False)
sink = MidFlightTileSnapshotSink(
flight_root=tmp_path,
flight_id=uuid4(),
fdr_client=client,
config=config or TileSnapshotConfig(),
)
return sink, client
def _drain_kinds(client: FdrClient) -> list[str]:
return [rec.kind for rec in client.drain(max_records=1024)]
def test_ac1_write_snapshot_creates_canonical_jpeg(tmp_path: Path) -> None:
# Arrange
sink, _client = _make_sink(tmp_path)
blob = _jpeg_blob(2048)
# Act
path = sink.write_snapshot(
tile_id="tile_001",
jpeg_bytes=blob,
captured_at=datetime(2026, 5, 11, tzinfo=timezone.utc),
)
# Assert
assert path.exists()
assert path.name == "tile_001.jpg"
assert path.read_bytes() == blob
assert path.parent == sink.tiles_dir
def test_ac2_write_snapshot_emits_pointer_record(tmp_path: Path) -> None:
# Arrange
sink, client = _make_sink(tmp_path)
captured = datetime(2026, 5, 11, 12, 0, 0, tzinfo=timezone.utc)
# Act
sink.write_snapshot("tile_a", _jpeg_blob(), captured)
batch = client.drain(max_records=16)
# Assert
assert len(batch) == 1
rec = batch[0]
assert rec.kind == "mid_flight_tile_snapshot"
assert rec.payload["snapshot_path"] == "tiles/tile_a.jpg"
assert rec.payload["captured_at"] == captured.isoformat()
def test_ac3_oversize_jpeg_rejected(tmp_path: Path) -> None:
# Arrange
config = TileSnapshotConfig(jpeg_max_bytes=256)
sink, client = _make_sink(tmp_path, config)
# Act + Assert
with pytest.raises(TileSnapshotTooLargeError, match=r"jpeg_max_bytes"):
sink.write_snapshot("tile_a", b"\x00" * 257, datetime.now(tz=timezone.utc))
# No file is written; no pointer record enqueued.
assert not sink.tiles_dir.exists() or not any(sink.tiles_dir.iterdir())
assert _drain_kinds(client) == []
def test_ac4_invalid_tile_id_rejected(tmp_path: Path) -> None:
# Arrange
sink, client = _make_sink(tmp_path)
invalid_ids = ["../etc/passwd", "tile with space", "../../e", "a" * 129, ""]
# Act + Assert
for tile_id in invalid_ids:
with pytest.raises(TileSnapshotInvalidIdError):
sink.write_snapshot(tile_id, _jpeg_blob(), datetime.now(tz=timezone.utc))
assert _drain_kinds(client) == []
def test_ac5_atomic_write_temp_file_cleaned(tmp_path: Path) -> None:
# Arrange
sink, _client = _make_sink(tmp_path)
# Act
sink.write_snapshot("tile_b", _jpeg_blob(), datetime.now(tz=timezone.utc))
# Assert — no leftover `.tmp` file in the tiles directory
leftovers = [p for p in sink.tiles_dir.iterdir() if p.name.endswith(".tmp")]
assert leftovers == []
def test_ac6_cap_drop_oldest_when_exceeded(tmp_path: Path) -> None:
# Arrange: cap = 4 KiB; each JPEG = 2 KiB → 3rd write must evict 1st.
config = TileSnapshotConfig(
tile_snapshot_cap_bytes=4 * 1024,
jpeg_max_bytes=3 * 1024,
)
sink, client = _make_sink(tmp_path, config)
blob = _jpeg_blob(2 * 1024)
t0 = datetime(2026, 5, 11, tzinfo=timezone.utc)
# Act
sink.write_snapshot("tile_1", blob, t0)
sink.write_snapshot("tile_2", blob, t0 + timedelta(seconds=1))
sink.write_snapshot("tile_3", blob, t0 + timedelta(seconds=2))
# Assert — tile_1 evicted; tile_2 + tile_3 survive
surviving = sorted(p.name for p in sink.tiles_dir.iterdir())
assert "tile_1.jpg" not in surviving
assert "tile_2.jpg" in surviving
assert "tile_3.jpg" in surviving
kinds = [r.kind for r in client.drain(max_records=64)]
assert kinds.count(OVERRUN_KIND) == 1
assert kinds.count("mid_flight_tile_snapshot") == 3
def test_ac7_thread_safe_concurrent_writes(tmp_path: Path) -> None:
# Arrange
import threading
sink, client = _make_sink(tmp_path)
errors: list[BaseException] = []
def writer(idx: int) -> None:
try:
sink.write_snapshot(
f"tile_{idx:03d}",
_jpeg_blob(1024),
datetime.now(tz=timezone.utc),
)
except BaseException as exc:
errors.append(exc)
# Act
threads = [threading.Thread(target=writer, args=(i,)) for i in range(8)]
for t in threads:
t.start()
for t in threads:
t.join(timeout=2.0)
# Assert — all 8 tiles written; 8 pointer records emitted
assert errors == []
assert sum(1 for _p in sink.tiles_dir.iterdir() if _p.suffix == ".jpg") == 8
kinds = [r.kind for r in client.drain(max_records=64)]
assert kinds.count("mid_flight_tile_snapshot") == 8
def test_ac8_frame_id_optional_in_payload(tmp_path: Path) -> None:
# Arrange
sink, client = _make_sink(tmp_path)
# Act
sink.write_snapshot("tile_c", _jpeg_blob(), datetime.now(tz=timezone.utc), frame_id=42)
batch = client.drain(max_records=16)
assert len(batch) == 1
assert batch[0].payload["frame_id"] == 42
# Act-2: frame_id omitted
sink.write_snapshot("tile_d", _jpeg_blob(), datetime.now(tz=timezone.utc))
batch2 = client.drain(max_records=16)
assert len(batch2) == 1
assert "frame_id" not in batch2[0].payload
def test_ac9_roundtrip_through_parse(tmp_path: Path) -> None:
"""Pointer record survives serialise/parse roundtrip (AZ-272 v1.1)."""
# Arrange
sink, client = _make_sink(tmp_path)
captured = datetime(2026, 5, 11, 9, 0, 0, tzinfo=timezone.utc)
# Act
sink.write_snapshot("tile_r", _jpeg_blob(), captured, frame_id=7)
batch = client.drain(max_records=16)
assert len(batch) == 1
rec = batch[0]
from gps_denied_onboard.fdr_client.records import serialise
roundtrip = parse(serialise(rec))
# Assert
assert roundtrip.kind == "mid_flight_tile_snapshot"
assert roundtrip.payload["snapshot_path"] == "tiles/tile_r.jpg"
assert roundtrip.payload["captured_at"] == captured.isoformat()
assert roundtrip.payload["frame_id"] == 7
@@ -0,0 +1,212 @@
"""AZ-295 — RecordKindPolicy: forbidden-kind + thumbnail rate-cap gates."""
from __future__ import annotations
import time
from unittest import mock
import pytest
from gps_denied_onboard.components.c13_fdr import (
GateDecision,
RawFrameWriteForbiddenError,
make_record_kind_policy,
)
from gps_denied_onboard.config import (
DEFAULT_FORBIDDEN_RECORD_KINDS,
ConfigError,
RecordKindPolicyConfig,
)
from gps_denied_onboard.fdr_client.records import OVERRUN_KIND, FdrRecord
_TS = "2026-05-11T00:00:00.000000Z"
def _rec(kind: str, *, producer_id: str = "c1_vio", payload: dict | None = None) -> FdrRecord:
return FdrRecord(
schema_version=1,
ts=_TS,
producer_id=producer_id,
kind=kind,
payload=payload or {},
)
def test_ac1_enforce_or_raise_rejects_raw_nav_frame() -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig())
# Act + Assert
with pytest.raises(RawFrameWriteForbiddenError) as ei:
policy.enforce_or_raise(_rec("raw_nav_frame", producer_id="c1_vio"))
msg = str(ei.value)
assert "raw_nav_frame" in msg
assert "c1_vio" in msg
def test_ac2_enforce_or_raise_rejects_raw_ai_cam_frame() -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig())
# Act + Assert
with pytest.raises(RawFrameWriteForbiddenError):
policy.enforce_or_raise(_rec("raw_ai_cam_frame"))
def test_ac3_enforce_or_raise_allows_failed_tile_thumbnail() -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig())
# Act
policy.enforce_or_raise(
_rec(
"failed_tile_thumbnail",
payload={"frame_id": 1, "tile_id": "x", "jpeg_bytes_b64": "AAAA"},
)
)
def test_ac4_gate_admits_first_thumbnail_in_fresh_window() -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig(failed_tile_thumbnail_max_hz=0.1))
# Act + Assert
assert policy.gate_for_writer(_rec("failed_tile_thumbnail")) is GateDecision.ENQUEUE
def test_ac5_gate_drops_overflow_then_emits_coalesced_overrun() -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig(failed_tile_thumbnail_max_hz=0.1))
# Act — 5 thumbnails in immediate succession (well within 10 s window)
decisions = [
policy.gate_for_writer(_rec("failed_tile_thumbnail", producer_id="c6_tile_cache"))
for _ in range(5)
]
# Assert — first ENQUEUE, next 4 DROP
assert decisions[0] is GateDecision.ENQUEUE
assert decisions[1:] == [GateDecision.DROP] * 4
overrun = policy.drain_pending_overrun()
assert overrun is not None
assert overrun.kind == OVERRUN_KIND
assert overrun.payload["dropped_count"] == 4
assert overrun.payload["producer_id"] == "c6_tile_cache"
# Second drain is empty (counter cleared after drain).
assert policy.drain_pending_overrun() is None
def test_ac6_forbidden_set_rejects_removal_of_defaults() -> None:
# Arrange + Act + Assert
with pytest.raises(ConfigError, match=r"raw_nav_frame|raw_ai_cam_frame"):
RecordKindPolicyConfig(forbidden_record_kinds=frozenset())
def test_ac7_forbidden_set_allows_additions() -> None:
# Arrange
extra = DEFAULT_FORBIDDEN_RECORD_KINDS | {"raw_thermal_frame"}
policy = make_record_kind_policy(
RecordKindPolicyConfig(forbidden_record_kinds=frozenset(extra))
)
# Act + Assert
for kind in extra:
with pytest.raises(RawFrameWriteForbiddenError):
policy.enforce_or_raise(_rec(kind))
def test_ac8_zero_hz_rejected_at_config_validation() -> None:
# Arrange + Act + Assert
with pytest.raises(ConfigError, match=r"failed_tile_thumbnail_max_hz"):
RecordKindPolicyConfig(failed_tile_thumbnail_max_hz=0.0)
def test_ac9_sliding_window_resets_across_windows(monkeypatch: pytest.MonkeyPatch) -> None:
# Arrange — drive time via mock so the test is deterministic.
fake_clock = [0.0]
def fake_monotonic() -> float:
return fake_clock[0]
monkeypatch.setattr(
"gps_denied_onboard.components.c13_fdr.record_kind_policy.time.monotonic",
fake_monotonic,
)
policy = make_record_kind_policy(RecordKindPolicyConfig(failed_tile_thumbnail_max_hz=0.1))
# Act — t=0, t=11, t=22
fake_clock[0] = 0.0
d0 = policy.gate_for_writer(_rec("failed_tile_thumbnail"))
fake_clock[0] = 11.0
d1 = policy.gate_for_writer(_rec("failed_tile_thumbnail"))
fake_clock[0] = 22.0
d2 = policy.gate_for_writer(_rec("failed_tile_thumbnail"))
# Assert
assert [d0, d1, d2] == [GateDecision.ENQUEUE] * 3
assert policy.drain_pending_overrun() is None
def test_ac10_producer_slug_propagates_to_overrun(
monkeypatch: pytest.MonkeyPatch,
) -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig(failed_tile_thumbnail_max_hz=0.1))
# Act — first thumbnail (admitted) from one producer; second (dropped) from another
policy.gate_for_writer(_rec("failed_tile_thumbnail", producer_id="c6_tile_cache"))
policy.gate_for_writer(_rec("failed_tile_thumbnail", producer_id="c6_tile_cache"))
overrun = policy.drain_pending_overrun()
assert overrun is not None
assert overrun.payload["producer_id"] == "c6_tile_cache"
def test_nfr_perf_enforce_or_raise_microbench() -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig())
rec = _rec("vio.tick")
# Act
start = time.perf_counter()
for _ in range(10_000):
policy.enforce_or_raise(rec)
elapsed_s = time.perf_counter() - start
# Assert: p99 ≤ 1 µs implies average should be well under 5 µs.
avg_us = (elapsed_s / 10_000) * 1e6
assert avg_us < 5.0, f"enforce_or_raise avg {avg_us:.2f} µs too high"
def test_nfr_reliability_immutable_forbidden_kinds() -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig())
# Act + Assert — frozenset has no add/remove
with pytest.raises(AttributeError):
policy.forbidden_kinds.add("foo") # type: ignore[attr-defined]
def test_non_thumbnail_records_always_enqueue() -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig())
# Act + Assert
for kind in ("vio.tick", "state.tick", "tile_match", "log"):
assert policy.gate_for_writer(_rec(kind)) is GateDecision.ENQUEUE
def test_warn_log_rate_limited(monkeypatch: pytest.MonkeyPatch) -> None:
# Arrange
policy = make_record_kind_policy(RecordKindPolicyConfig(failed_tile_thumbnail_max_hz=0.1))
# Capture log warnings emitted by the policy.
with mock.patch.object(policy._log, "warning") as warn_mock:
# Act — many drops in quick succession
for _ in range(20):
policy.gate_for_writer(_rec("failed_tile_thumbnail"))
# Assert — at most 1 warning fires (≤ 1 WARN/sec rate cap; first drop fires it)
assert warn_mock.call_count <= 1