Files
gps-denied-onboard/tests/unit/c13_fdr/test_az291_writer_thread.py
T
Oleksandr Bezdieniezhnykh b5dd6031d2 [AZ-291] [AZ-292] [AZ-293] C13 FDR writer chain (batch 6)
AZ-291 — FileFdrWriter: single writer thread draining every registered
FdrClient SPSC ring buffer to per-flight segment files; per-segment
size rotation; cross-process fcntl.flock filelock on flight_root;
ENOSPC degraded mode with rate-capped ERROR logs and one GCS alert.

AZ-292 — FlightHeader/FlightFooter dataclasses + open_flight /
close_flight lifecycle methods; four per-flight monotonic counters
(records_written, records_dropped_overrun, bytes_written,
rollover_count) reported by the footer; flight_id mismatch and
close-without-open are typed errors.

AZ-293 — CapacityCapPolicy (post-rotation hook): walks the flight
directory, drops the oldest CLOSED segment when total > cap (default
64 GiB), emits a kind="segment_rollover" record per drop. Never drops
the currently-open segment or segment 0 alone; cap_misconfigured path
logs ERROR + GCS alert. No config flag disables emission (C13-ST-01).

Schema: bumped fdr_record_schema flight_header / flight_footer payload
key sets to match the AZ-292 task spec (effective 1.0.0 -> 1.1.0; no
prior producer); KNOWN_PAYLOAD_KEYS updated. Added FdrWriterConfig
nested in FdrConfig (segment_size_bytes, batch_size, flight_cap_bytes,
debug_log_per_record).

Tests: 29 new unit tests (8 AC + 1 invariant per task); full suite
323 passed, 2 pre-existing skips, 0 regressions.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-11 03:38:58 +03:00

405 lines
11 KiB
Python

"""AZ-291 — FileFdrWriter writer thread + segment lifecycle.
Covers AC-1..AC-8 + a fresh-flight_id helper used by every test.
"""
from __future__ import annotations
import errno
import os
import struct
import time
from collections.abc import Iterator
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from unittest import mock
from uuid import UUID, uuid4
import pytest
from gps_denied_onboard.components.c13_fdr import (
FdrConcurrentWriterError,
FileFdrWriter,
FlightHeader,
)
from gps_denied_onboard.config import FdrWriterConfig
from gps_denied_onboard.fdr_client.client import FdrClient
from gps_denied_onboard.fdr_client.records import FdrRecord, parse
_LENGTH_PREFIX = struct.Struct("<I")
def _make_header(flight_id: UUID) -> FlightHeader:
return FlightHeader(
flight_id=flight_id,
flight_started_at_iso=datetime.now(tz=timezone.utc).isoformat(),
flight_started_at_monotonic_ns=time.monotonic_ns(),
config_snapshot={"tier": 2},
signing_key_rotation_event={},
manifest_content_hashes={},
build_info={"commit": "abc1234"},
)
def _make_client(producer_id: str = "c1_vio", capacity: int = 256) -> FdrClient:
return FdrClient(producer_id=producer_id, capacity=capacity, _emit_diag_log=False)
def _payload(i: int) -> FdrRecord:
return FdrRecord(
schema_version=1,
ts=datetime.now(tz=timezone.utc).isoformat(),
producer_id="c1_vio",
kind="vio.tick",
payload={
"frame_id": i,
"R": [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
"t": [0, 0, 0],
"P": [],
"last_anchor_age_ms": 0,
},
)
def _read_records(path: Path) -> list[FdrRecord]:
records: list[FdrRecord] = []
data = path.read_bytes()
offset = 0
while offset < len(data):
(length,) = _LENGTH_PREFIX.unpack_from(data, offset)
offset += _LENGTH_PREFIX.size
records.append(parse(data[offset : offset + length]))
offset += length
return records
def _collect_alerts() -> tuple[list[str], Any]:
msgs: list[str] = []
def alert(msg: str) -> None:
msgs.append(msg)
return msgs, alert
@pytest.fixture()
def flight_root(tmp_path: Path) -> Path:
return tmp_path / "fdr"
@pytest.fixture()
def flight_id() -> UUID:
return uuid4()
@pytest.fixture()
def base_config() -> FdrWriterConfig:
return FdrWriterConfig(
segment_size_bytes=64 * 1024 * 1024,
batch_size=64,
flight_cap_bytes=64 * 1024**3,
debug_log_per_record=False,
)
@pytest.fixture()
def writer(
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
) -> Iterator[FileFdrWriter]:
_alerts, alert_fn = _collect_alerts()
client = _make_client()
w = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=base_config,
fdr_clients=[client],
gcs_alert=alert_fn,
)
yield w
if not w._closed:
w.stop()
def test_ac1_drain_all_registered_producers(
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
) -> None:
# Arrange
clients = [_make_client(f"c{i}_test") for i in range(3)]
_alerts, alert_fn = _collect_alerts()
writer = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=base_config,
fdr_clients=clients,
gcs_alert=alert_fn,
)
writer.start()
writer.open_flight(_make_header(flight_id))
for client in clients:
for i in range(100):
client.enqueue(_payload(i))
# Act
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline:
if all(c._buffer_size() == 0 for c in clients):
break
time.sleep(0.01)
footer = writer.close_flight()
# Assert
records = _read_records(writer.current_segment_path())
vio_count = sum(1 for r in records if r.kind == "vio.tick")
assert vio_count == 300
assert records[0].kind == "flight_header"
assert records[-1].kind == "flight_footer"
assert footer.records_written == 302 # 300 + header + footer
def test_ac2_per_segment_rotation_at_size_cap(flight_root: Path, flight_id: UUID) -> None:
# Arrange — small segment cap; the writer must rotate.
config = FdrWriterConfig(segment_size_bytes=2048, batch_size=4, flight_cap_bytes=1024**3)
_alerts, alert_fn = _collect_alerts()
client = _make_client()
writer = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=config,
fdr_clients=[client],
gcs_alert=alert_fn,
)
writer.start()
writer.open_flight(_make_header(flight_id))
for i in range(40):
client.enqueue(_payload(i))
# Act
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline and client._buffer_size() > 0:
time.sleep(0.01)
writer.close_flight()
# Assert — at least two segment files exist.
segs = sorted(writer.flight_dir.glob("segment-*.fdr"))
assert len(segs) >= 2, f"expected >=2 segments, got {[p.name for p in segs]}"
all_records: list[FdrRecord] = []
for seg in segs:
all_records.extend(_read_records(seg))
vio = [r for r in all_records if r.kind == "vio.tick"]
frame_ids = [r.payload["frame_id"] for r in vio]
assert frame_ids == list(range(40))
def test_ac3_atomic_rotation_no_half_segment(flight_root: Path, flight_id: UUID) -> None:
# Arrange
config = FdrWriterConfig(segment_size_bytes=1024, batch_size=4, flight_cap_bytes=1024**3)
_alerts, alert_fn = _collect_alerts()
client = _make_client()
writer = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=config,
fdr_clients=[client],
gcs_alert=alert_fn,
)
writer.start()
writer.open_flight(_make_header(flight_id))
for i in range(20):
client.enqueue(_payload(i))
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline and client._buffer_size() > 0:
time.sleep(0.01)
# Act — abrupt stop (no close_flight).
writer.stop()
# Assert — every segment file parses cleanly.
for seg in sorted(writer.flight_dir.glob("segment-*.fdr")):
records = _read_records(seg)
for r in records:
assert r.schema_version >= 1
def test_ac4_concurrent_writer_blocked_by_filelock(
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
) -> None:
# Arrange
_alerts, alert_fn = _collect_alerts()
client_a = _make_client("c1_vio")
writer_a = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=base_config,
fdr_clients=[client_a],
gcs_alert=alert_fn,
)
writer_a.start()
client_b = _make_client("c2_vpr")
writer_b = FileFdrWriter(
flight_root=flight_root,
flight_id=uuid4(),
config=base_config,
fdr_clients=[client_b],
gcs_alert=alert_fn,
)
# Act, Assert
with pytest.raises(FdrConcurrentWriterError):
writer_b.start()
# Cleanup
writer_a.stop()
def test_ac5_enospc_degrades_and_alerts(
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
) -> None:
# Arrange
alerts, alert_fn = _collect_alerts()
client = _make_client()
writer = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=base_config,
fdr_clients=[client],
gcs_alert=alert_fn,
)
writer.start()
writer.open_flight(_make_header(flight_id))
real_write = os.write
state = {"first": True}
def failing_write(fd: int, data: bytes) -> int:
if state["first"]:
state["first"] = False
raise OSError(errno.ENOSPC, "fake ENOSPC")
return real_write(fd, data)
# Act
with mock.patch(
"gps_denied_onboard.components.c13_fdr.writer.os.write", side_effect=failing_write
):
client.enqueue(_payload(0))
deadline = time.monotonic() + 2.0
while time.monotonic() < deadline and not writer.is_degraded():
time.sleep(0.01)
# Assert
assert writer.is_degraded()
assert len(alerts) >= 1
assert "FDR write failure" in alerts[0]
writer.stop()
def test_ac6_stop_drains_and_releases_lock(
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
) -> None:
# Arrange
_alerts, alert_fn = _collect_alerts()
client = _make_client()
writer = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=base_config,
fdr_clients=[client],
gcs_alert=alert_fn,
)
writer.start()
writer.open_flight(_make_header(flight_id))
for i in range(50):
client.enqueue(_payload(i))
# Act
writer.stop()
# Assert — a second writer can claim the filelock.
second = FileFdrWriter(
flight_root=flight_root,
flight_id=uuid4(),
config=base_config,
fdr_clients=[_make_client("c5_state")],
gcs_alert=alert_fn,
)
second.start() # would raise if lock still held
second.stop()
def test_ac7_segment_layout(flight_root: Path, flight_id: UUID) -> None:
# Arrange
config = FdrWriterConfig(segment_size_bytes=1024, batch_size=4, flight_cap_bytes=1024**3)
_alerts, alert_fn = _collect_alerts()
client = _make_client()
writer = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=config,
fdr_clients=[client],
gcs_alert=alert_fn,
)
writer.start()
writer.open_flight(_make_header(flight_id))
for i in range(40):
client.enqueue(_payload(i))
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline and client._buffer_size() > 0:
time.sleep(0.01)
writer.close_flight()
# Assert
flight_dir = flight_root / str(flight_id)
names = sorted(p.name for p in flight_dir.iterdir() if p.is_file())
for name in names:
assert name.startswith("segment-") and name.endswith(".fdr"), name
# 4-digit zero-padded.
stem = name[len("segment-") : -len(".fdr")]
assert len(stem) == 4 and stem.isdigit()
def test_ac8_steady_state_no_overrun(
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
) -> None:
# Arrange — a small burst that the writer drains within a few seconds.
_alerts, alert_fn = _collect_alerts()
client = _make_client(capacity=2048)
writer = FileFdrWriter(
flight_root=flight_root,
flight_id=flight_id,
config=base_config,
fdr_clients=[client],
gcs_alert=alert_fn,
)
overrun_seen = {"count": 0}
def overrun_hook(record: FdrRecord) -> None:
overrun_seen["count"] += 1
client.on_overrun = overrun_hook
writer.start()
writer.open_flight(_make_header(flight_id))
# Act — emit 200 records spaced ~5 ms apart (~200 Hz steady state).
for i in range(200):
client.enqueue(_payload(i))
time.sleep(0.001)
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline and client._buffer_size() > 0:
time.sleep(0.01)
# Assert
assert overrun_seen["count"] == 0
writer.close_flight()
def test_double_start_raises(writer: FileFdrWriter, flight_id: UUID) -> None:
from gps_denied_onboard.components.c13_fdr import FdrWriterError
# Arrange
writer.start()
# Assert
with pytest.raises(FdrWriterError):
writer.start()
writer.open_flight(_make_header(flight_id))