mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 18:31:13 +00:00
[AZ-273] [AZ-274] [AZ-275] [AZ-267] [AZ-268] FDR producer chain + log bridge + contract test
AZ-273: lock-free SPSC ring buffer with pre-allocated slots, power-of- two capacity, opt-in SPSC guard, and EnqueueResult / FdrSpscViolationError on the public surface. make_fdr_client caches one client per producer_id and reads capacity from config.fdr.per_producer_capacity with fallback to queue_size. AZ-274: default_overrun_policy implements drop-oldest + retry + immediate marker emission, with prior-marker dropped_count folding via _evict_one so user-loss info is never lost across iterations. ERROR diagnostic is rate-limited to <=1/sec per producer. AZ-275: FakeFdrSink mirrors the FdrClient public surface and reuses the production default_overrun_policy via a duck-typed _PolicyAdapter. The test-only records/all_records_ever properties let component tests assert both in-buffer and lifetime state. tests/conftest.py registers the fake_fdr_sink fixture and an AST architecture lint forbids production imports of fakes. AZ-267: FdrLogBridgeHandler installs on the root logger via wire_log_bridge and forwards only WARN+ERROR records into the FDR with kind="log". Thread-local recursion guard short-circuits internal logging; saturated- queue diagnostics go to stderr every N=1000 drops. AZ-268: tests/contract/log_schema.py covers every row of the schema's Test Cases table plus the "DEBUG+INFO never reach FDR" invariant. pyproject.toml registers the contract pytest marker and the contract-mandated log_schema.py file-name. 251 unit + contract tests pass (48 new). Review verdict: PASS_WITH_WARNINGS; findings are NFR-perf deferrals + documented relaxation of AZ-274 AC-2 coalescing under permanently-stalled consumer. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,342 @@
|
||||
"""AZ-273 — FdrClient lock-free SPSC ring buffer + public API.
|
||||
|
||||
Verifies the contract-relevant ACs (1, 3, 4, 5, 6, 7) of
|
||||
``fdr_client_protocol`` v1.0.0. AC-2 (zero-alloc steady-state) and the
|
||||
NFR-perf budgets (p99 ≤ 5 µs / ≤ 10 µs on Tier-2) are deferred to a
|
||||
follow-up perf-instrumentation task; the pure-Python implementation
|
||||
correctness is in scope here.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Iterator
|
||||
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard.config import Config, FdrConfig
|
||||
from gps_denied_onboard.fdr_client import (
|
||||
EnqueueResult,
|
||||
FdrClient,
|
||||
FdrRecord,
|
||||
FdrSpscViolationError,
|
||||
make_fdr_client,
|
||||
)
|
||||
from gps_denied_onboard.fdr_client.client import _reset_for_tests
|
||||
from gps_denied_onboard.fdr_client.queue import SpscRingBuffer
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_cache() -> Iterator[None]:
|
||||
_reset_for_tests()
|
||||
yield
|
||||
_reset_for_tests()
|
||||
|
||||
|
||||
def _make_record(producer_id: str = "test.producer", frame_id: int | None = 0) -> FdrRecord:
|
||||
return FdrRecord(
|
||||
schema_version=1,
|
||||
ts="2026-05-11T00:00:00.000000Z",
|
||||
producer_id=producer_id,
|
||||
kind="log",
|
||||
payload={
|
||||
"level": "INFO",
|
||||
"component": producer_id,
|
||||
"frame_id": frame_id,
|
||||
"kind": "test.tick",
|
||||
"msg": "hello",
|
||||
"kv": {},
|
||||
"exc": None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AC-1: lock-free, never blocks — every enqueue returns in O(1), overrun on #1025.
|
||||
|
||||
|
||||
def test_ac1_enqueue_never_blocks_and_returns_overrun_on_overflow() -> None:
|
||||
# Arrange
|
||||
client = FdrClient(producer_id="c1_vio", capacity=1024)
|
||||
|
||||
# Act
|
||||
last_result = EnqueueResult.OK
|
||||
timings: list[float] = []
|
||||
for i in range(1025):
|
||||
start = time.perf_counter()
|
||||
last_result = client.enqueue(_make_record(frame_id=i))
|
||||
timings.append(time.perf_counter() - start)
|
||||
|
||||
# Assert
|
||||
assert last_result == EnqueueResult.OVERRUN, "the 1025th enqueue must overrun"
|
||||
# Pure-Python budget: every individual call must return under 50 ms
|
||||
# (the NFR-perf 50 µs budget is Tier-2-only; we keep a generous
|
||||
# ceiling here to catch genuine blocking regressions only).
|
||||
assert max(timings) < 0.05, f"slow enqueue suggests blocking; max={max(timings) * 1e6:.1f}µs"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AC-3: capacity is config-driven via config.fdr.per_producer_capacity.
|
||||
|
||||
|
||||
def test_ac3_capacity_from_per_producer_config() -> None:
|
||||
# Arrange
|
||||
fdr_block = FdrConfig(per_producer_capacity={"c1_vio": 4096})
|
||||
config = Config(fdr=fdr_block)
|
||||
|
||||
# Act
|
||||
client = make_fdr_client("c1_vio", config)
|
||||
|
||||
# Assert
|
||||
assert client._capacity() == 4096
|
||||
|
||||
|
||||
def test_ac3_capacity_falls_back_to_default_queue_size() -> None:
|
||||
# Arrange
|
||||
config = Config(fdr=FdrConfig(queue_size=2048))
|
||||
|
||||
# Act
|
||||
client = make_fdr_client("c2_vpr", config)
|
||||
|
||||
# Assert
|
||||
assert client._capacity() == 2048
|
||||
|
||||
|
||||
def test_ac3_non_power_of_two_rounds_up() -> None:
|
||||
# Arrange
|
||||
config = Config(fdr=FdrConfig(queue_size=1000))
|
||||
|
||||
# Act
|
||||
client = make_fdr_client("c3_matcher", config)
|
||||
|
||||
# Assert
|
||||
assert client._capacity() == 1024 # 1000 → next power of two
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AC-4: SPSC dequeue contract enforced by opt-in guard.
|
||||
|
||||
|
||||
def test_ac4_spsc_guard_detects_concurrent_consumer_pop() -> None:
|
||||
# Arrange
|
||||
buf = SpscRingBuffer(capacity=16, enforce_spsc=True)
|
||||
barrier = threading.Barrier(2)
|
||||
errors: list[FdrSpscViolationError] = []
|
||||
|
||||
def consume() -> None:
|
||||
barrier.wait()
|
||||
for _ in range(64):
|
||||
try:
|
||||
buf.pop()
|
||||
except FdrSpscViolationError as exc:
|
||||
errors.append(exc)
|
||||
return
|
||||
|
||||
t1 = threading.Thread(target=consume)
|
||||
t2 = threading.Thread(target=consume)
|
||||
|
||||
# Act
|
||||
t1.start()
|
||||
t2.start()
|
||||
t1.join(timeout=5.0)
|
||||
t2.join(timeout=5.0)
|
||||
|
||||
# Assert
|
||||
assert errors, "second consumer thread must trip the SPSC guard"
|
||||
assert errors[0].side == "consumer"
|
||||
|
||||
|
||||
def test_ac4_spsc_guard_detects_concurrent_producer_push() -> None:
|
||||
# Arrange
|
||||
buf = SpscRingBuffer(capacity=16, enforce_spsc=True)
|
||||
barrier = threading.Barrier(2)
|
||||
errors: list[FdrSpscViolationError] = []
|
||||
|
||||
def produce() -> None:
|
||||
barrier.wait()
|
||||
for _ in range(64):
|
||||
try:
|
||||
buf.push(object())
|
||||
except FdrSpscViolationError as exc:
|
||||
errors.append(exc)
|
||||
return
|
||||
|
||||
t1 = threading.Thread(target=produce)
|
||||
t2 = threading.Thread(target=produce)
|
||||
|
||||
# Act
|
||||
t1.start()
|
||||
t2.start()
|
||||
t1.join(timeout=5.0)
|
||||
t2.join(timeout=5.0)
|
||||
|
||||
# Assert
|
||||
assert errors, "second producer thread must trip the SPSC guard"
|
||||
assert errors[0].side == "producer"
|
||||
|
||||
|
||||
def test_ac4_default_is_no_guard() -> None:
|
||||
# Arrange
|
||||
buf = SpscRingBuffer(capacity=16) # enforce_spsc defaults to False
|
||||
|
||||
# Act — two threads push and pop concurrently; no exception expected.
|
||||
def stress() -> None:
|
||||
for i in range(32):
|
||||
buf.push(i)
|
||||
buf.pop()
|
||||
|
||||
t1 = threading.Thread(target=stress)
|
||||
t2 = threading.Thread(target=stress)
|
||||
t1.start()
|
||||
t2.start()
|
||||
t1.join(timeout=5.0)
|
||||
t2.join(timeout=5.0)
|
||||
|
||||
# Assert — no exception, no SPSC complaints; production wiring opts out.
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AC-5: on_overrun hook is wired exactly once per overrun.
|
||||
|
||||
|
||||
def test_ac5_on_overrun_hook_fires_once_per_overrun() -> None:
|
||||
# Arrange
|
||||
client = FdrClient(producer_id="c4_pose", capacity=16)
|
||||
seen: list[FdrRecord] = []
|
||||
client.on_overrun = seen.append
|
||||
# Fill the buffer (capacity 16 holds 15 records before overrun).
|
||||
for i in range(15):
|
||||
client.enqueue(_make_record(frame_id=i))
|
||||
offending = _make_record(frame_id=999)
|
||||
|
||||
# Act
|
||||
result = client.enqueue(offending)
|
||||
|
||||
# Assert
|
||||
assert result == EnqueueResult.OVERRUN
|
||||
assert seen == [offending]
|
||||
|
||||
|
||||
def test_ac5_invalid_hook_rejected() -> None:
|
||||
# Arrange
|
||||
client = FdrClient(producer_id="c4_pose", capacity=16)
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(TypeError):
|
||||
client.on_overrun = "not_callable" # type: ignore[assignment]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AC-6: flush() drains the buffer.
|
||||
|
||||
|
||||
def test_ac6_flush_returns_only_when_empty() -> None:
|
||||
# Arrange
|
||||
client = FdrClient(producer_id="c5_state", capacity=16)
|
||||
for i in range(8):
|
||||
client.enqueue(_make_record(frame_id=i))
|
||||
|
||||
drained: list[FdrRecord] = []
|
||||
|
||||
def drain() -> None:
|
||||
while True:
|
||||
item = client.pop_one()
|
||||
if item is None and client._buffer_size() == 0:
|
||||
return
|
||||
if item is not None:
|
||||
drained.append(item)
|
||||
|
||||
drainer = threading.Thread(target=drain)
|
||||
drainer.start()
|
||||
|
||||
# Act
|
||||
client.flush()
|
||||
|
||||
# Assert
|
||||
drainer.join(timeout=5.0)
|
||||
assert client._buffer_size() == 0
|
||||
assert len(drained) == 8
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AC-7: empty producer_id raises ValueError.
|
||||
|
||||
|
||||
def test_ac7_empty_producer_id_raises_value_error() -> None:
|
||||
# Arrange / Act / Assert
|
||||
with pytest.raises(ValueError, match="producer_id"):
|
||||
FdrClient(producer_id="", capacity=16)
|
||||
|
||||
|
||||
def test_ac7_make_fdr_client_rejects_empty_producer_id() -> None:
|
||||
# Arrange
|
||||
config = Config()
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="producer_id"):
|
||||
make_fdr_client("", config)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Invariant: one client per producer_id (NFR-reliability).
|
||||
|
||||
|
||||
def test_invariant_make_fdr_client_caches_by_producer_id() -> None:
|
||||
# Arrange
|
||||
config = Config()
|
||||
|
||||
# Act
|
||||
a = make_fdr_client("c8_fc_adapter", config)
|
||||
b = make_fdr_client("c8_fc_adapter", config)
|
||||
|
||||
# Assert
|
||||
assert a is b
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Invariant: enqueue does not mutate record.producer_id.
|
||||
|
||||
|
||||
def test_invariant_enqueue_preserves_producer_id() -> None:
|
||||
# Arrange
|
||||
client = FdrClient(producer_id="c5_state", capacity=16)
|
||||
record = _make_record(producer_id="c5_state", frame_id=42)
|
||||
|
||||
# Act
|
||||
client.enqueue(record)
|
||||
popped = client.pop_one()
|
||||
|
||||
# Assert
|
||||
assert popped is record
|
||||
assert popped.producer_id == "c5_state"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Buffer-level invariants: capacity validation.
|
||||
|
||||
|
||||
def test_capacity_must_be_at_least_minimum() -> None:
|
||||
# Arrange / Act / Assert
|
||||
with pytest.raises(ValueError, match=">= 16"):
|
||||
SpscRingBuffer(capacity=8)
|
||||
|
||||
|
||||
def test_capacity_must_be_power_of_two() -> None:
|
||||
# Arrange / Act / Assert
|
||||
with pytest.raises(ValueError, match="power of two"):
|
||||
SpscRingBuffer(capacity=20)
|
||||
|
||||
|
||||
def test_drain_returns_fifo_order() -> None:
|
||||
# Arrange
|
||||
client = FdrClient(producer_id="c7_inference", capacity=16)
|
||||
records = [_make_record(frame_id=i) for i in range(5)]
|
||||
for r in records:
|
||||
client.enqueue(r)
|
||||
|
||||
# Act
|
||||
drained = client.drain(max_records=10)
|
||||
|
||||
# Assert
|
||||
assert drained == records
|
||||
Reference in New Issue
Block a user