mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 22:21:13 +00:00
b5dd6031d2
AZ-291 — FileFdrWriter: single writer thread draining every registered FdrClient SPSC ring buffer to per-flight segment files; per-segment size rotation; cross-process fcntl.flock filelock on flight_root; ENOSPC degraded mode with rate-capped ERROR logs and one GCS alert. AZ-292 — FlightHeader/FlightFooter dataclasses + open_flight / close_flight lifecycle methods; four per-flight monotonic counters (records_written, records_dropped_overrun, bytes_written, rollover_count) reported by the footer; flight_id mismatch and close-without-open are typed errors. AZ-293 — CapacityCapPolicy (post-rotation hook): walks the flight directory, drops the oldest CLOSED segment when total > cap (default 64 GiB), emits a kind="segment_rollover" record per drop. Never drops the currently-open segment or segment 0 alone; cap_misconfigured path logs ERROR + GCS alert. No config flag disables emission (C13-ST-01). Schema: bumped fdr_record_schema flight_header / flight_footer payload key sets to match the AZ-292 task spec (effective 1.0.0 -> 1.1.0; no prior producer); KNOWN_PAYLOAD_KEYS updated. Added FdrWriterConfig nested in FdrConfig (segment_size_bytes, batch_size, flight_cap_bytes, debug_log_per_record). Tests: 29 new unit tests (8 AC + 1 invariant per task); full suite 323 passed, 2 pre-existing skips, 0 regressions. Co-authored-by: Cursor <cursoragent@cursor.com>
405 lines
11 KiB
Python
405 lines
11 KiB
Python
"""AZ-291 — FileFdrWriter writer thread + segment lifecycle.
|
|
|
|
Covers AC-1..AC-8 + a fresh-flight_id helper used by every test.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import errno
|
|
import os
|
|
import struct
|
|
import time
|
|
from collections.abc import Iterator
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from unittest import mock
|
|
from uuid import UUID, uuid4
|
|
|
|
import pytest
|
|
|
|
from gps_denied_onboard.components.c13_fdr import (
|
|
FdrConcurrentWriterError,
|
|
FileFdrWriter,
|
|
FlightHeader,
|
|
)
|
|
from gps_denied_onboard.config import FdrWriterConfig
|
|
from gps_denied_onboard.fdr_client.client import FdrClient
|
|
from gps_denied_onboard.fdr_client.records import FdrRecord, parse
|
|
|
|
_LENGTH_PREFIX = struct.Struct("<I")
|
|
|
|
|
|
def _make_header(flight_id: UUID) -> FlightHeader:
|
|
return FlightHeader(
|
|
flight_id=flight_id,
|
|
flight_started_at_iso=datetime.now(tz=timezone.utc).isoformat(),
|
|
flight_started_at_monotonic_ns=time.monotonic_ns(),
|
|
config_snapshot={"tier": 2},
|
|
signing_key_rotation_event={},
|
|
manifest_content_hashes={},
|
|
build_info={"commit": "abc1234"},
|
|
)
|
|
|
|
|
|
def _make_client(producer_id: str = "c1_vio", capacity: int = 256) -> FdrClient:
|
|
return FdrClient(producer_id=producer_id, capacity=capacity, _emit_diag_log=False)
|
|
|
|
|
|
def _payload(i: int) -> FdrRecord:
|
|
return FdrRecord(
|
|
schema_version=1,
|
|
ts=datetime.now(tz=timezone.utc).isoformat(),
|
|
producer_id="c1_vio",
|
|
kind="vio.tick",
|
|
payload={
|
|
"frame_id": i,
|
|
"R": [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
|
|
"t": [0, 0, 0],
|
|
"P": [],
|
|
"last_anchor_age_ms": 0,
|
|
},
|
|
)
|
|
|
|
|
|
def _read_records(path: Path) -> list[FdrRecord]:
|
|
records: list[FdrRecord] = []
|
|
data = path.read_bytes()
|
|
offset = 0
|
|
while offset < len(data):
|
|
(length,) = _LENGTH_PREFIX.unpack_from(data, offset)
|
|
offset += _LENGTH_PREFIX.size
|
|
records.append(parse(data[offset : offset + length]))
|
|
offset += length
|
|
return records
|
|
|
|
|
|
def _collect_alerts() -> tuple[list[str], Any]:
|
|
msgs: list[str] = []
|
|
|
|
def alert(msg: str) -> None:
|
|
msgs.append(msg)
|
|
|
|
return msgs, alert
|
|
|
|
|
|
@pytest.fixture()
|
|
def flight_root(tmp_path: Path) -> Path:
|
|
return tmp_path / "fdr"
|
|
|
|
|
|
@pytest.fixture()
|
|
def flight_id() -> UUID:
|
|
return uuid4()
|
|
|
|
|
|
@pytest.fixture()
|
|
def base_config() -> FdrWriterConfig:
|
|
return FdrWriterConfig(
|
|
segment_size_bytes=64 * 1024 * 1024,
|
|
batch_size=64,
|
|
flight_cap_bytes=64 * 1024**3,
|
|
debug_log_per_record=False,
|
|
)
|
|
|
|
|
|
@pytest.fixture()
|
|
def writer(
|
|
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
|
) -> Iterator[FileFdrWriter]:
|
|
_alerts, alert_fn = _collect_alerts()
|
|
client = _make_client()
|
|
w = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=base_config,
|
|
fdr_clients=[client],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
yield w
|
|
if not w._closed:
|
|
w.stop()
|
|
|
|
|
|
def test_ac1_drain_all_registered_producers(
|
|
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
|
) -> None:
|
|
# Arrange
|
|
clients = [_make_client(f"c{i}_test") for i in range(3)]
|
|
_alerts, alert_fn = _collect_alerts()
|
|
writer = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=base_config,
|
|
fdr_clients=clients,
|
|
gcs_alert=alert_fn,
|
|
)
|
|
writer.start()
|
|
writer.open_flight(_make_header(flight_id))
|
|
for client in clients:
|
|
for i in range(100):
|
|
client.enqueue(_payload(i))
|
|
|
|
# Act
|
|
deadline = time.monotonic() + 5.0
|
|
while time.monotonic() < deadline:
|
|
if all(c._buffer_size() == 0 for c in clients):
|
|
break
|
|
time.sleep(0.01)
|
|
footer = writer.close_flight()
|
|
|
|
# Assert
|
|
records = _read_records(writer.current_segment_path())
|
|
vio_count = sum(1 for r in records if r.kind == "vio.tick")
|
|
assert vio_count == 300
|
|
assert records[0].kind == "flight_header"
|
|
assert records[-1].kind == "flight_footer"
|
|
assert footer.records_written == 302 # 300 + header + footer
|
|
|
|
|
|
def test_ac2_per_segment_rotation_at_size_cap(flight_root: Path, flight_id: UUID) -> None:
|
|
# Arrange — small segment cap; the writer must rotate.
|
|
config = FdrWriterConfig(segment_size_bytes=2048, batch_size=4, flight_cap_bytes=1024**3)
|
|
_alerts, alert_fn = _collect_alerts()
|
|
client = _make_client()
|
|
writer = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=config,
|
|
fdr_clients=[client],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
writer.start()
|
|
writer.open_flight(_make_header(flight_id))
|
|
for i in range(40):
|
|
client.enqueue(_payload(i))
|
|
|
|
# Act
|
|
deadline = time.monotonic() + 5.0
|
|
while time.monotonic() < deadline and client._buffer_size() > 0:
|
|
time.sleep(0.01)
|
|
writer.close_flight()
|
|
|
|
# Assert — at least two segment files exist.
|
|
segs = sorted(writer.flight_dir.glob("segment-*.fdr"))
|
|
assert len(segs) >= 2, f"expected >=2 segments, got {[p.name for p in segs]}"
|
|
all_records: list[FdrRecord] = []
|
|
for seg in segs:
|
|
all_records.extend(_read_records(seg))
|
|
vio = [r for r in all_records if r.kind == "vio.tick"]
|
|
frame_ids = [r.payload["frame_id"] for r in vio]
|
|
assert frame_ids == list(range(40))
|
|
|
|
|
|
def test_ac3_atomic_rotation_no_half_segment(flight_root: Path, flight_id: UUID) -> None:
|
|
# Arrange
|
|
config = FdrWriterConfig(segment_size_bytes=1024, batch_size=4, flight_cap_bytes=1024**3)
|
|
_alerts, alert_fn = _collect_alerts()
|
|
client = _make_client()
|
|
writer = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=config,
|
|
fdr_clients=[client],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
writer.start()
|
|
writer.open_flight(_make_header(flight_id))
|
|
for i in range(20):
|
|
client.enqueue(_payload(i))
|
|
deadline = time.monotonic() + 5.0
|
|
while time.monotonic() < deadline and client._buffer_size() > 0:
|
|
time.sleep(0.01)
|
|
|
|
# Act — abrupt stop (no close_flight).
|
|
writer.stop()
|
|
|
|
# Assert — every segment file parses cleanly.
|
|
for seg in sorted(writer.flight_dir.glob("segment-*.fdr")):
|
|
records = _read_records(seg)
|
|
for r in records:
|
|
assert r.schema_version >= 1
|
|
|
|
|
|
def test_ac4_concurrent_writer_blocked_by_filelock(
|
|
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
|
) -> None:
|
|
# Arrange
|
|
_alerts, alert_fn = _collect_alerts()
|
|
client_a = _make_client("c1_vio")
|
|
writer_a = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=base_config,
|
|
fdr_clients=[client_a],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
writer_a.start()
|
|
client_b = _make_client("c2_vpr")
|
|
writer_b = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=uuid4(),
|
|
config=base_config,
|
|
fdr_clients=[client_b],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
|
|
# Act, Assert
|
|
with pytest.raises(FdrConcurrentWriterError):
|
|
writer_b.start()
|
|
|
|
# Cleanup
|
|
writer_a.stop()
|
|
|
|
|
|
def test_ac5_enospc_degrades_and_alerts(
|
|
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
|
) -> None:
|
|
# Arrange
|
|
alerts, alert_fn = _collect_alerts()
|
|
client = _make_client()
|
|
writer = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=base_config,
|
|
fdr_clients=[client],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
writer.start()
|
|
writer.open_flight(_make_header(flight_id))
|
|
|
|
real_write = os.write
|
|
state = {"first": True}
|
|
|
|
def failing_write(fd: int, data: bytes) -> int:
|
|
if state["first"]:
|
|
state["first"] = False
|
|
raise OSError(errno.ENOSPC, "fake ENOSPC")
|
|
return real_write(fd, data)
|
|
|
|
# Act
|
|
with mock.patch(
|
|
"gps_denied_onboard.components.c13_fdr.writer.os.write", side_effect=failing_write
|
|
):
|
|
client.enqueue(_payload(0))
|
|
deadline = time.monotonic() + 2.0
|
|
while time.monotonic() < deadline and not writer.is_degraded():
|
|
time.sleep(0.01)
|
|
|
|
# Assert
|
|
assert writer.is_degraded()
|
|
assert len(alerts) >= 1
|
|
assert "FDR write failure" in alerts[0]
|
|
writer.stop()
|
|
|
|
|
|
def test_ac6_stop_drains_and_releases_lock(
|
|
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
|
) -> None:
|
|
# Arrange
|
|
_alerts, alert_fn = _collect_alerts()
|
|
client = _make_client()
|
|
writer = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=base_config,
|
|
fdr_clients=[client],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
writer.start()
|
|
writer.open_flight(_make_header(flight_id))
|
|
for i in range(50):
|
|
client.enqueue(_payload(i))
|
|
|
|
# Act
|
|
writer.stop()
|
|
|
|
# Assert — a second writer can claim the filelock.
|
|
second = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=uuid4(),
|
|
config=base_config,
|
|
fdr_clients=[_make_client("c5_state")],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
second.start() # would raise if lock still held
|
|
second.stop()
|
|
|
|
|
|
def test_ac7_segment_layout(flight_root: Path, flight_id: UUID) -> None:
|
|
# Arrange
|
|
config = FdrWriterConfig(segment_size_bytes=1024, batch_size=4, flight_cap_bytes=1024**3)
|
|
_alerts, alert_fn = _collect_alerts()
|
|
client = _make_client()
|
|
writer = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=config,
|
|
fdr_clients=[client],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
writer.start()
|
|
writer.open_flight(_make_header(flight_id))
|
|
for i in range(40):
|
|
client.enqueue(_payload(i))
|
|
deadline = time.monotonic() + 5.0
|
|
while time.monotonic() < deadline and client._buffer_size() > 0:
|
|
time.sleep(0.01)
|
|
writer.close_flight()
|
|
|
|
# Assert
|
|
flight_dir = flight_root / str(flight_id)
|
|
names = sorted(p.name for p in flight_dir.iterdir() if p.is_file())
|
|
for name in names:
|
|
assert name.startswith("segment-") and name.endswith(".fdr"), name
|
|
# 4-digit zero-padded.
|
|
stem = name[len("segment-") : -len(".fdr")]
|
|
assert len(stem) == 4 and stem.isdigit()
|
|
|
|
|
|
def test_ac8_steady_state_no_overrun(
|
|
flight_root: Path, flight_id: UUID, base_config: FdrWriterConfig
|
|
) -> None:
|
|
# Arrange — a small burst that the writer drains within a few seconds.
|
|
_alerts, alert_fn = _collect_alerts()
|
|
client = _make_client(capacity=2048)
|
|
writer = FileFdrWriter(
|
|
flight_root=flight_root,
|
|
flight_id=flight_id,
|
|
config=base_config,
|
|
fdr_clients=[client],
|
|
gcs_alert=alert_fn,
|
|
)
|
|
overrun_seen = {"count": 0}
|
|
|
|
def overrun_hook(record: FdrRecord) -> None:
|
|
overrun_seen["count"] += 1
|
|
|
|
client.on_overrun = overrun_hook
|
|
writer.start()
|
|
writer.open_flight(_make_header(flight_id))
|
|
|
|
# Act — emit 200 records spaced ~5 ms apart (~200 Hz steady state).
|
|
for i in range(200):
|
|
client.enqueue(_payload(i))
|
|
time.sleep(0.001)
|
|
|
|
deadline = time.monotonic() + 5.0
|
|
while time.monotonic() < deadline and client._buffer_size() > 0:
|
|
time.sleep(0.01)
|
|
|
|
# Assert
|
|
assert overrun_seen["count"] == 0
|
|
writer.close_flight()
|
|
|
|
|
|
def test_double_start_raises(writer: FileFdrWriter, flight_id: UUID) -> None:
|
|
from gps_denied_onboard.components.c13_fdr import FdrWriterError
|
|
|
|
# Arrange
|
|
writer.start()
|
|
# Assert
|
|
with pytest.raises(FdrWriterError):
|
|
writer.start()
|
|
writer.open_flight(_make_header(flight_id))
|