Files
gps-denied-onboard/tests/unit/test_az273_fdr_client_ringbuf.py
Oleksandr Bezdieniezhnykh ba20c2d195 [AZ-273] [AZ-274] [AZ-275] [AZ-267] [AZ-268] FDR producer chain + log bridge + contract test
AZ-273: lock-free SPSC ring buffer with pre-allocated slots, power-of-
two capacity, opt-in SPSC guard, and EnqueueResult / FdrSpscViolationError
on the public surface. make_fdr_client caches one client per producer_id
and reads capacity from config.fdr.per_producer_capacity with fallback
to queue_size.
AZ-274: default_overrun_policy implements drop-oldest + retry + immediate
marker emission, with prior-marker dropped_count folding via _evict_one
so user-loss info is never lost across iterations. ERROR diagnostic is
rate-limited to <=1/sec per producer.
AZ-275: FakeFdrSink mirrors the FdrClient public surface and reuses the
production default_overrun_policy via a duck-typed _PolicyAdapter. The
test-only records/all_records_ever properties let component tests assert
both in-buffer and lifetime state. tests/conftest.py registers the
fake_fdr_sink fixture and an AST architecture lint forbids production
imports of fakes.
AZ-267: FdrLogBridgeHandler installs on the root logger via wire_log_bridge
and forwards only WARN+ERROR records into the FDR with kind="log".
Thread-local recursion guard short-circuits internal logging; saturated-
queue diagnostics go to stderr every N=1000 drops.
AZ-268: tests/contract/log_schema.py covers every row of the schema's
Test Cases table plus the "DEBUG+INFO never reach FDR" invariant.
pyproject.toml registers the contract pytest marker and the
contract-mandated log_schema.py file-name.
251 unit + contract tests pass (48 new). Review verdict:
PASS_WITH_WARNINGS; findings are NFR-perf deferrals + documented
relaxation of AZ-274 AC-2 coalescing under permanently-stalled consumer.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-11 03:00:49 +03:00

343 lines
9.2 KiB
Python

"""AZ-273 — FdrClient lock-free SPSC ring buffer + public API.
Verifies the contract-relevant ACs (1, 3, 4, 5, 6, 7) of
``fdr_client_protocol`` v1.0.0. AC-2 (zero-alloc steady-state) and the
NFR-perf budgets (p99 ≤ 5 µs / ≤ 10 µs on Tier-2) are deferred to a
follow-up perf-instrumentation task; the pure-Python implementation
correctness is in scope here.
"""
from __future__ import annotations
import threading
import time
from collections.abc import Iterator
import pytest
from gps_denied_onboard.config import Config, FdrConfig
from gps_denied_onboard.fdr_client import (
EnqueueResult,
FdrClient,
FdrRecord,
FdrSpscViolationError,
make_fdr_client,
)
from gps_denied_onboard.fdr_client.client import _reset_for_tests
from gps_denied_onboard.fdr_client.queue import SpscRingBuffer
@pytest.fixture(autouse=True)
def _reset_cache() -> Iterator[None]:
_reset_for_tests()
yield
_reset_for_tests()
def _make_record(producer_id: str = "test.producer", frame_id: int | None = 0) -> FdrRecord:
return FdrRecord(
schema_version=1,
ts="2026-05-11T00:00:00.000000Z",
producer_id=producer_id,
kind="log",
payload={
"level": "INFO",
"component": producer_id,
"frame_id": frame_id,
"kind": "test.tick",
"msg": "hello",
"kv": {},
"exc": None,
},
)
# ---------------------------------------------------------------------------
# AC-1: lock-free, never blocks — every enqueue returns in O(1), overrun on #1025.
def test_ac1_enqueue_never_blocks_and_returns_overrun_on_overflow() -> None:
# Arrange
client = FdrClient(producer_id="c1_vio", capacity=1024)
# Act
last_result = EnqueueResult.OK
timings: list[float] = []
for i in range(1025):
start = time.perf_counter()
last_result = client.enqueue(_make_record(frame_id=i))
timings.append(time.perf_counter() - start)
# Assert
assert last_result == EnqueueResult.OVERRUN, "the 1025th enqueue must overrun"
# Pure-Python budget: every individual call must return under 50 ms
# (the NFR-perf 50 µs budget is Tier-2-only; we keep a generous
# ceiling here to catch genuine blocking regressions only).
assert max(timings) < 0.05, f"slow enqueue suggests blocking; max={max(timings) * 1e6:.1f}µs"
# ---------------------------------------------------------------------------
# AC-3: capacity is config-driven via config.fdr.per_producer_capacity.
def test_ac3_capacity_from_per_producer_config() -> None:
# Arrange
fdr_block = FdrConfig(per_producer_capacity={"c1_vio": 4096})
config = Config(fdr=fdr_block)
# Act
client = make_fdr_client("c1_vio", config)
# Assert
assert client._capacity() == 4096
def test_ac3_capacity_falls_back_to_default_queue_size() -> None:
# Arrange
config = Config(fdr=FdrConfig(queue_size=2048))
# Act
client = make_fdr_client("c2_vpr", config)
# Assert
assert client._capacity() == 2048
def test_ac3_non_power_of_two_rounds_up() -> None:
# Arrange
config = Config(fdr=FdrConfig(queue_size=1000))
# Act
client = make_fdr_client("c3_matcher", config)
# Assert
assert client._capacity() == 1024 # 1000 → next power of two
# ---------------------------------------------------------------------------
# AC-4: SPSC dequeue contract enforced by opt-in guard.
def test_ac4_spsc_guard_detects_concurrent_consumer_pop() -> None:
# Arrange
buf = SpscRingBuffer(capacity=16, enforce_spsc=True)
barrier = threading.Barrier(2)
errors: list[FdrSpscViolationError] = []
def consume() -> None:
barrier.wait()
for _ in range(64):
try:
buf.pop()
except FdrSpscViolationError as exc:
errors.append(exc)
return
t1 = threading.Thread(target=consume)
t2 = threading.Thread(target=consume)
# Act
t1.start()
t2.start()
t1.join(timeout=5.0)
t2.join(timeout=5.0)
# Assert
assert errors, "second consumer thread must trip the SPSC guard"
assert errors[0].side == "consumer"
def test_ac4_spsc_guard_detects_concurrent_producer_push() -> None:
# Arrange
buf = SpscRingBuffer(capacity=16, enforce_spsc=True)
barrier = threading.Barrier(2)
errors: list[FdrSpscViolationError] = []
def produce() -> None:
barrier.wait()
for _ in range(64):
try:
buf.push(object())
except FdrSpscViolationError as exc:
errors.append(exc)
return
t1 = threading.Thread(target=produce)
t2 = threading.Thread(target=produce)
# Act
t1.start()
t2.start()
t1.join(timeout=5.0)
t2.join(timeout=5.0)
# Assert
assert errors, "second producer thread must trip the SPSC guard"
assert errors[0].side == "producer"
def test_ac4_default_is_no_guard() -> None:
# Arrange
buf = SpscRingBuffer(capacity=16) # enforce_spsc defaults to False
# Act — two threads push and pop concurrently; no exception expected.
def stress() -> None:
for i in range(32):
buf.push(i)
buf.pop()
t1 = threading.Thread(target=stress)
t2 = threading.Thread(target=stress)
t1.start()
t2.start()
t1.join(timeout=5.0)
t2.join(timeout=5.0)
# Assert — no exception, no SPSC complaints; production wiring opts out.
# ---------------------------------------------------------------------------
# AC-5: on_overrun hook is wired exactly once per overrun.
def test_ac5_on_overrun_hook_fires_once_per_overrun() -> None:
# Arrange
client = FdrClient(producer_id="c4_pose", capacity=16)
seen: list[FdrRecord] = []
client.on_overrun = seen.append
# Fill the buffer (capacity 16 holds 15 records before overrun).
for i in range(15):
client.enqueue(_make_record(frame_id=i))
offending = _make_record(frame_id=999)
# Act
result = client.enqueue(offending)
# Assert
assert result == EnqueueResult.OVERRUN
assert seen == [offending]
def test_ac5_invalid_hook_rejected() -> None:
# Arrange
client = FdrClient(producer_id="c4_pose", capacity=16)
# Act / Assert
with pytest.raises(TypeError):
client.on_overrun = "not_callable" # type: ignore[assignment]
# ---------------------------------------------------------------------------
# AC-6: flush() drains the buffer.
def test_ac6_flush_returns_only_when_empty() -> None:
# Arrange
client = FdrClient(producer_id="c5_state", capacity=16)
for i in range(8):
client.enqueue(_make_record(frame_id=i))
drained: list[FdrRecord] = []
def drain() -> None:
while True:
item = client.pop_one()
if item is None and client._buffer_size() == 0:
return
if item is not None:
drained.append(item)
drainer = threading.Thread(target=drain)
drainer.start()
# Act
client.flush()
# Assert
drainer.join(timeout=5.0)
assert client._buffer_size() == 0
assert len(drained) == 8
# ---------------------------------------------------------------------------
# AC-7: empty producer_id raises ValueError.
def test_ac7_empty_producer_id_raises_value_error() -> None:
# Arrange / Act / Assert
with pytest.raises(ValueError, match="producer_id"):
FdrClient(producer_id="", capacity=16)
def test_ac7_make_fdr_client_rejects_empty_producer_id() -> None:
# Arrange
config = Config()
# Act / Assert
with pytest.raises(ValueError, match="producer_id"):
make_fdr_client("", config)
# ---------------------------------------------------------------------------
# Invariant: one client per producer_id (NFR-reliability).
def test_invariant_make_fdr_client_caches_by_producer_id() -> None:
# Arrange
config = Config()
# Act
a = make_fdr_client("c8_fc_adapter", config)
b = make_fdr_client("c8_fc_adapter", config)
# Assert
assert a is b
# ---------------------------------------------------------------------------
# Invariant: enqueue does not mutate record.producer_id.
def test_invariant_enqueue_preserves_producer_id() -> None:
# Arrange
client = FdrClient(producer_id="c5_state", capacity=16)
record = _make_record(producer_id="c5_state", frame_id=42)
# Act
client.enqueue(record)
popped = client.pop_one()
# Assert
assert popped is record
assert popped.producer_id == "c5_state"
# ---------------------------------------------------------------------------
# Buffer-level invariants: capacity validation.
def test_capacity_must_be_at_least_minimum() -> None:
# Arrange / Act / Assert
with pytest.raises(ValueError, match=">= 16"):
SpscRingBuffer(capacity=8)
def test_capacity_must_be_power_of_two() -> None:
# Arrange / Act / Assert
with pytest.raises(ValueError, match="power of two"):
SpscRingBuffer(capacity=20)
def test_drain_returns_fifo_order() -> None:
# Arrange
client = FdrClient(producer_id="c7_inference", capacity=16)
records = [_make_record(frame_id=i) for i in range(5)]
for r in records:
client.enqueue(r)
# Act
drained = client.drain(max_records=10)
# Assert
assert drained == records