mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-23 00:11:14 +00:00
[AZ-291] [AZ-292] [AZ-293] C13 FDR writer chain (batch 6)
AZ-291 — FileFdrWriter: single writer thread draining every registered FdrClient SPSC ring buffer to per-flight segment files; per-segment size rotation; cross-process fcntl.flock filelock on flight_root; ENOSPC degraded mode with rate-capped ERROR logs and one GCS alert. AZ-292 — FlightHeader/FlightFooter dataclasses + open_flight / close_flight lifecycle methods; four per-flight monotonic counters (records_written, records_dropped_overrun, bytes_written, rollover_count) reported by the footer; flight_id mismatch and close-without-open are typed errors. AZ-293 — CapacityCapPolicy (post-rotation hook): walks the flight directory, drops the oldest CLOSED segment when total > cap (default 64 GiB), emits a kind="segment_rollover" record per drop. Never drops the currently-open segment or segment 0 alone; cap_misconfigured path logs ERROR + GCS alert. No config flag disables emission (C13-ST-01). Schema: bumped fdr_record_schema flight_header / flight_footer payload key sets to match the AZ-292 task spec (effective 1.0.0 -> 1.1.0; no prior producer); KNOWN_PAYLOAD_KEYS updated. Added FdrWriterConfig nested in FdrConfig (segment_size_bytes, batch_size, flight_cap_bytes, debug_log_per_record). Tests: 29 new unit tests (8 AC + 1 invariant per task); full suite 323 passed, 2 pre-existing skips, 0 regressions. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,404 @@
|
||||
"""AZ-291 — FileFdrWriter writer thread + segment lifecycle.
|
||||
|
||||
Covers AC-1..AC-8 + a fresh-flight_id helper used by every test.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import os
|
||||
import struct
|
||||
import time
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest import mock
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard.components.c13_fdr import (
|
||||
FdrConcurrentWriterError,
|
||||
FileFdrWriter,
|
||||
FlightHeader,
|
||||
)
|
||||
from gps_denied_onboard.config import FdrWriterConfig
|
||||
from gps_denied_onboard.fdr_client.client import FdrClient
|
||||
from gps_denied_onboard.fdr_client.records import FdrRecord, parse
|
||||
|
||||
_LENGTH_PREFIX = struct.Struct("<I")
|
||||
|
||||
|
||||
def _make_header(flight_id: UUID) -> FlightHeader:
|
||||
return FlightHeader(
|
||||
flight_id=flight_id,
|
||||
flight_started_at_iso=datetime.now(tz=timezone.utc).isoformat(),
|
||||
flight_started_at_monotonic_ns=time.monotonic_ns(),
|
||||
config_snapshot={"tier": 2},
|
||||
signing_key_rotation_event={},
|
||||
manifest_content_hashes={},
|
||||
build_info={"commit": "abc1234"},
|
||||
)
|
||||
|
||||
|
||||
def _make_client(producer_id: str = "c1_vio", capacity: int = 256) -> FdrClient:
|
||||
return FdrClient(producer_id=producer_id, capacity=capacity, _emit_diag_log=False)
|
||||
|
||||
|
||||
def _payload(i: int) -> FdrRecord:
|
||||
return FdrRecord(
|
||||
schema_version=1,
|
||||
ts=datetime.now(tz=timezone.utc).isoformat(),
|
||||
producer_id="c1_vio",
|
||||
kind="vio.tick",
|
||||
payload={
|
||||
"frame_id": i,
|
||||
"R": [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
|
||||
"t": [0, 0, 0],
|
||||
"P": [],
|
||||
"last_anchor_age_ms": 0,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _read_records(path: Path) -> list[FdrRecord]:
|
||||
records: list[FdrRecord] = []
|
||||
data = path.read_bytes()
|
||||
offset = 0
|
||||
while offset < len(data):
|
||||
(length,) = _LENGTH_PREFIX.unpack_from(data, offset)
|
||||
offset += _LENGTH_PREFIX.size
|
||||
records.append(parse(data[offset : offset + length]))
|
||||
offset += length
|
||||
return records
|
||||
|
||||
|
||||
def _collect_alerts() -> tuple[list[str], Any]:
|
||||
msgs: list[str] = []
|
||||
|
||||
def alert(msg: str) -> None:
|
||||
msgs.append(msg)
|
||||
|
||||
return msgs, alert
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def flight_root(tmp_path: Path) -> Path:
|
||||
return tmp_path / "fdr"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def flight_id() -> UUID:
|
||||
return uuid4()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def base_config() -> FdrWriterConfig:
|
||||
return FdrWriterConfig(
|
||||
segment_size_bytes=64 * 1024 * 1024,
|
||||
batch_size=64,
|
||||
flight_cap_bytes=64 * 1024**3,
|
||||
debug_log_per_record=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def writer(
|
||||
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
||||
) -> Iterator[FileFdrWriter]:
|
||||
_alerts, alert_fn = _collect_alerts()
|
||||
client = _make_client()
|
||||
w = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=base_config,
|
||||
fdr_clients=[client],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
yield w
|
||||
if not w._closed:
|
||||
w.stop()
|
||||
|
||||
|
||||
def test_ac1_drain_all_registered_producers(
|
||||
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
||||
) -> None:
|
||||
# Arrange
|
||||
clients = [_make_client(f"c{i}_test") for i in range(3)]
|
||||
_alerts, alert_fn = _collect_alerts()
|
||||
writer = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=base_config,
|
||||
fdr_clients=clients,
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
writer.start()
|
||||
writer.open_flight(_make_header(flight_id))
|
||||
for client in clients:
|
||||
for i in range(100):
|
||||
client.enqueue(_payload(i))
|
||||
|
||||
# Act
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline:
|
||||
if all(c._buffer_size() == 0 for c in clients):
|
||||
break
|
||||
time.sleep(0.01)
|
||||
footer = writer.close_flight()
|
||||
|
||||
# Assert
|
||||
records = _read_records(writer.current_segment_path())
|
||||
vio_count = sum(1 for r in records if r.kind == "vio.tick")
|
||||
assert vio_count == 300
|
||||
assert records[0].kind == "flight_header"
|
||||
assert records[-1].kind == "flight_footer"
|
||||
assert footer.records_written == 302 # 300 + header + footer
|
||||
|
||||
|
||||
def test_ac2_per_segment_rotation_at_size_cap(flight_root: Path, flight_id: UUID) -> None:
|
||||
# Arrange — small segment cap; the writer must rotate.
|
||||
config = FdrWriterConfig(segment_size_bytes=2048, batch_size=4, flight_cap_bytes=1024**3)
|
||||
_alerts, alert_fn = _collect_alerts()
|
||||
client = _make_client()
|
||||
writer = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=config,
|
||||
fdr_clients=[client],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
writer.start()
|
||||
writer.open_flight(_make_header(flight_id))
|
||||
for i in range(40):
|
||||
client.enqueue(_payload(i))
|
||||
|
||||
# Act
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline and client._buffer_size() > 0:
|
||||
time.sleep(0.01)
|
||||
writer.close_flight()
|
||||
|
||||
# Assert — at least two segment files exist.
|
||||
segs = sorted(writer.flight_dir.glob("segment-*.fdr"))
|
||||
assert len(segs) >= 2, f"expected >=2 segments, got {[p.name for p in segs]}"
|
||||
all_records: list[FdrRecord] = []
|
||||
for seg in segs:
|
||||
all_records.extend(_read_records(seg))
|
||||
vio = [r for r in all_records if r.kind == "vio.tick"]
|
||||
frame_ids = [r.payload["frame_id"] for r in vio]
|
||||
assert frame_ids == list(range(40))
|
||||
|
||||
|
||||
def test_ac3_atomic_rotation_no_half_segment(flight_root: Path, flight_id: UUID) -> None:
|
||||
# Arrange
|
||||
config = FdrWriterConfig(segment_size_bytes=1024, batch_size=4, flight_cap_bytes=1024**3)
|
||||
_alerts, alert_fn = _collect_alerts()
|
||||
client = _make_client()
|
||||
writer = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=config,
|
||||
fdr_clients=[client],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
writer.start()
|
||||
writer.open_flight(_make_header(flight_id))
|
||||
for i in range(20):
|
||||
client.enqueue(_payload(i))
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline and client._buffer_size() > 0:
|
||||
time.sleep(0.01)
|
||||
|
||||
# Act — abrupt stop (no close_flight).
|
||||
writer.stop()
|
||||
|
||||
# Assert — every segment file parses cleanly.
|
||||
for seg in sorted(writer.flight_dir.glob("segment-*.fdr")):
|
||||
records = _read_records(seg)
|
||||
for r in records:
|
||||
assert r.schema_version >= 1
|
||||
|
||||
|
||||
def test_ac4_concurrent_writer_blocked_by_filelock(
|
||||
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
||||
) -> None:
|
||||
# Arrange
|
||||
_alerts, alert_fn = _collect_alerts()
|
||||
client_a = _make_client("c1_vio")
|
||||
writer_a = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=base_config,
|
||||
fdr_clients=[client_a],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
writer_a.start()
|
||||
client_b = _make_client("c2_vpr")
|
||||
writer_b = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=uuid4(),
|
||||
config=base_config,
|
||||
fdr_clients=[client_b],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
|
||||
# Act, Assert
|
||||
with pytest.raises(FdrConcurrentWriterError):
|
||||
writer_b.start()
|
||||
|
||||
# Cleanup
|
||||
writer_a.stop()
|
||||
|
||||
|
||||
def test_ac5_enospc_degrades_and_alerts(
|
||||
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
||||
) -> None:
|
||||
# Arrange
|
||||
alerts, alert_fn = _collect_alerts()
|
||||
client = _make_client()
|
||||
writer = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=base_config,
|
||||
fdr_clients=[client],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
writer.start()
|
||||
writer.open_flight(_make_header(flight_id))
|
||||
|
||||
real_write = os.write
|
||||
state = {"first": True}
|
||||
|
||||
def failing_write(fd: int, data: bytes) -> int:
|
||||
if state["first"]:
|
||||
state["first"] = False
|
||||
raise OSError(errno.ENOSPC, "fake ENOSPC")
|
||||
return real_write(fd, data)
|
||||
|
||||
# Act
|
||||
with mock.patch(
|
||||
"gps_denied_onboard.components.c13_fdr.writer.os.write", side_effect=failing_write
|
||||
):
|
||||
client.enqueue(_payload(0))
|
||||
deadline = time.monotonic() + 2.0
|
||||
while time.monotonic() < deadline and not writer.is_degraded():
|
||||
time.sleep(0.01)
|
||||
|
||||
# Assert
|
||||
assert writer.is_degraded()
|
||||
assert len(alerts) >= 1
|
||||
assert "FDR write failure" in alerts[0]
|
||||
writer.stop()
|
||||
|
||||
|
||||
def test_ac6_stop_drains_and_releases_lock(
|
||||
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
||||
) -> None:
|
||||
# Arrange
|
||||
_alerts, alert_fn = _collect_alerts()
|
||||
client = _make_client()
|
||||
writer = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=base_config,
|
||||
fdr_clients=[client],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
writer.start()
|
||||
writer.open_flight(_make_header(flight_id))
|
||||
for i in range(50):
|
||||
client.enqueue(_payload(i))
|
||||
|
||||
# Act
|
||||
writer.stop()
|
||||
|
||||
# Assert — a second writer can claim the filelock.
|
||||
second = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=uuid4(),
|
||||
config=base_config,
|
||||
fdr_clients=[_make_client("c5_state")],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
second.start() # would raise if lock still held
|
||||
second.stop()
|
||||
|
||||
|
||||
def test_ac7_segment_layout(flight_root: Path, flight_id: UUID) -> None:
|
||||
# Arrange
|
||||
config = FdrWriterConfig(segment_size_bytes=1024, batch_size=4, flight_cap_bytes=1024**3)
|
||||
_alerts, alert_fn = _collect_alerts()
|
||||
client = _make_client()
|
||||
writer = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=config,
|
||||
fdr_clients=[client],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
writer.start()
|
||||
writer.open_flight(_make_header(flight_id))
|
||||
for i in range(40):
|
||||
client.enqueue(_payload(i))
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline and client._buffer_size() > 0:
|
||||
time.sleep(0.01)
|
||||
writer.close_flight()
|
||||
|
||||
# Assert
|
||||
flight_dir = flight_root / str(flight_id)
|
||||
names = sorted(p.name for p in flight_dir.iterdir() if p.is_file())
|
||||
for name in names:
|
||||
assert name.startswith("segment-") and name.endswith(".fdr"), name
|
||||
# 4-digit zero-padded.
|
||||
stem = name[len("segment-") : -len(".fdr")]
|
||||
assert len(stem) == 4 and stem.isdigit()
|
||||
|
||||
|
||||
def test_ac8_steady_state_no_overrun(
|
||||
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
||||
) -> None:
|
||||
# Arrange — a small burst that the writer drains within a few seconds.
|
||||
_alerts, alert_fn = _collect_alerts()
|
||||
client = _make_client(capacity=2048)
|
||||
writer = FileFdrWriter(
|
||||
flight_root=flight_root,
|
||||
flight_id=flight_id,
|
||||
config=base_config,
|
||||
fdr_clients=[client],
|
||||
gcs_alert=alert_fn,
|
||||
)
|
||||
overrun_seen = {"count": 0}
|
||||
|
||||
def overrun_hook(record: FdrRecord) -> None:
|
||||
overrun_seen["count"] += 1
|
||||
|
||||
client.on_overrun = overrun_hook
|
||||
writer.start()
|
||||
writer.open_flight(_make_header(flight_id))
|
||||
|
||||
# Act — emit 200 records spaced ~5 ms apart (~200 Hz steady state).
|
||||
for i in range(200):
|
||||
client.enqueue(_payload(i))
|
||||
time.sleep(0.001)
|
||||
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline and client._buffer_size() > 0:
|
||||
time.sleep(0.01)
|
||||
|
||||
# Assert
|
||||
assert overrun_seen["count"] == 0
|
||||
writer.close_flight()
|
||||
|
||||
|
||||
def test_double_start_raises(writer: FileFdrWriter, flight_id: UUID) -> None:
|
||||
from gps_denied_onboard.components.c13_fdr import FdrWriterError
|
||||
|
||||
# Arrange
|
||||
writer.start()
|
||||
# Assert
|
||||
with pytest.raises(FdrWriterError):
|
||||
writer.start()
|
||||
writer.open_flight(_make_header(flight_id))
|
||||
Reference in New Issue
Block a user