mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 22:11:12 +00:00
ba20c2d195
AZ-273: lock-free SPSC ring buffer with pre-allocated slots, power-of- two capacity, opt-in SPSC guard, and EnqueueResult / FdrSpscViolationError on the public surface. make_fdr_client caches one client per producer_id and reads capacity from config.fdr.per_producer_capacity with fallback to queue_size. AZ-274: default_overrun_policy implements drop-oldest + retry + immediate marker emission, with prior-marker dropped_count folding via _evict_one so user-loss info is never lost across iterations. ERROR diagnostic is rate-limited to <=1/sec per producer. AZ-275: FakeFdrSink mirrors the FdrClient public surface and reuses the production default_overrun_policy via a duck-typed _PolicyAdapter. The test-only records/all_records_ever properties let component tests assert both in-buffer and lifetime state. tests/conftest.py registers the fake_fdr_sink fixture and an AST architecture lint forbids production imports of fakes. AZ-267: FdrLogBridgeHandler installs on the root logger via wire_log_bridge and forwards only WARN+ERROR records into the FDR with kind="log". Thread-local recursion guard short-circuits internal logging; saturated- queue diagnostics go to stderr every N=1000 drops. AZ-268: tests/contract/log_schema.py covers every row of the schema's Test Cases table plus the "DEBUG+INFO never reach FDR" invariant. pyproject.toml registers the contract pytest marker and the contract-mandated log_schema.py file-name. 251 unit + contract tests pass (48 new). Review verdict: PASS_WITH_WARNINGS; findings are NFR-perf deferrals + documented relaxation of AZ-274 AC-2 coalescing under permanently-stalled consumer. Co-authored-by: Cursor <cursoragent@cursor.com>
343 lines
9.2 KiB
Python
343 lines
9.2 KiB
Python
"""AZ-273 — FdrClient lock-free SPSC ring buffer + public API.
|
|
|
|
Verifies the contract-relevant ACs (1, 3, 4, 5, 6, 7) of
|
|
``fdr_client_protocol`` v1.0.0. AC-2 (zero-alloc steady-state) and the
|
|
NFR-perf budgets (p99 ≤ 5 µs / ≤ 10 µs on Tier-2) are deferred to a
|
|
follow-up perf-instrumentation task; the pure-Python implementation
|
|
correctness is in scope here.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import threading
|
|
import time
|
|
from collections.abc import Iterator
|
|
|
|
import pytest
|
|
|
|
from gps_denied_onboard.config import Config, FdrConfig
|
|
from gps_denied_onboard.fdr_client import (
|
|
EnqueueResult,
|
|
FdrClient,
|
|
FdrRecord,
|
|
FdrSpscViolationError,
|
|
make_fdr_client,
|
|
)
|
|
from gps_denied_onboard.fdr_client.client import _reset_for_tests
|
|
from gps_denied_onboard.fdr_client.queue import SpscRingBuffer
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_cache() -> Iterator[None]:
|
|
_reset_for_tests()
|
|
yield
|
|
_reset_for_tests()
|
|
|
|
|
|
def _make_record(producer_id: str = "test.producer", frame_id: int | None = 0) -> FdrRecord:
|
|
return FdrRecord(
|
|
schema_version=1,
|
|
ts="2026-05-11T00:00:00.000000Z",
|
|
producer_id=producer_id,
|
|
kind="log",
|
|
payload={
|
|
"level": "INFO",
|
|
"component": producer_id,
|
|
"frame_id": frame_id,
|
|
"kind": "test.tick",
|
|
"msg": "hello",
|
|
"kv": {},
|
|
"exc": None,
|
|
},
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AC-1: lock-free, never blocks — every enqueue returns in O(1), overrun on #1025.
|
|
|
|
|
|
def test_ac1_enqueue_never_blocks_and_returns_overrun_on_overflow() -> None:
|
|
# Arrange
|
|
client = FdrClient(producer_id="c1_vio", capacity=1024)
|
|
|
|
# Act
|
|
last_result = EnqueueResult.OK
|
|
timings: list[float] = []
|
|
for i in range(1025):
|
|
start = time.perf_counter()
|
|
last_result = client.enqueue(_make_record(frame_id=i))
|
|
timings.append(time.perf_counter() - start)
|
|
|
|
# Assert
|
|
assert last_result == EnqueueResult.OVERRUN, "the 1025th enqueue must overrun"
|
|
# Pure-Python budget: every individual call must return under 50 ms
|
|
# (the NFR-perf 50 µs budget is Tier-2-only; we keep a generous
|
|
# ceiling here to catch genuine blocking regressions only).
|
|
assert max(timings) < 0.05, f"slow enqueue suggests blocking; max={max(timings) * 1e6:.1f}µs"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AC-3: capacity is config-driven via config.fdr.per_producer_capacity.
|
|
|
|
|
|
def test_ac3_capacity_from_per_producer_config() -> None:
|
|
# Arrange
|
|
fdr_block = FdrConfig(per_producer_capacity={"c1_vio": 4096})
|
|
config = Config(fdr=fdr_block)
|
|
|
|
# Act
|
|
client = make_fdr_client("c1_vio", config)
|
|
|
|
# Assert
|
|
assert client._capacity() == 4096
|
|
|
|
|
|
def test_ac3_capacity_falls_back_to_default_queue_size() -> None:
|
|
# Arrange
|
|
config = Config(fdr=FdrConfig(queue_size=2048))
|
|
|
|
# Act
|
|
client = make_fdr_client("c2_vpr", config)
|
|
|
|
# Assert
|
|
assert client._capacity() == 2048
|
|
|
|
|
|
def test_ac3_non_power_of_two_rounds_up() -> None:
|
|
# Arrange
|
|
config = Config(fdr=FdrConfig(queue_size=1000))
|
|
|
|
# Act
|
|
client = make_fdr_client("c3_matcher", config)
|
|
|
|
# Assert
|
|
assert client._capacity() == 1024 # 1000 → next power of two
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AC-4: SPSC dequeue contract enforced by opt-in guard.
|
|
|
|
|
|
def test_ac4_spsc_guard_detects_concurrent_consumer_pop() -> None:
|
|
# Arrange
|
|
buf = SpscRingBuffer(capacity=16, enforce_spsc=True)
|
|
barrier = threading.Barrier(2)
|
|
errors: list[FdrSpscViolationError] = []
|
|
|
|
def consume() -> None:
|
|
barrier.wait()
|
|
for _ in range(64):
|
|
try:
|
|
buf.pop()
|
|
except FdrSpscViolationError as exc:
|
|
errors.append(exc)
|
|
return
|
|
|
|
t1 = threading.Thread(target=consume)
|
|
t2 = threading.Thread(target=consume)
|
|
|
|
# Act
|
|
t1.start()
|
|
t2.start()
|
|
t1.join(timeout=5.0)
|
|
t2.join(timeout=5.0)
|
|
|
|
# Assert
|
|
assert errors, "second consumer thread must trip the SPSC guard"
|
|
assert errors[0].side == "consumer"
|
|
|
|
|
|
def test_ac4_spsc_guard_detects_concurrent_producer_push() -> None:
|
|
# Arrange
|
|
buf = SpscRingBuffer(capacity=16, enforce_spsc=True)
|
|
barrier = threading.Barrier(2)
|
|
errors: list[FdrSpscViolationError] = []
|
|
|
|
def produce() -> None:
|
|
barrier.wait()
|
|
for _ in range(64):
|
|
try:
|
|
buf.push(object())
|
|
except FdrSpscViolationError as exc:
|
|
errors.append(exc)
|
|
return
|
|
|
|
t1 = threading.Thread(target=produce)
|
|
t2 = threading.Thread(target=produce)
|
|
|
|
# Act
|
|
t1.start()
|
|
t2.start()
|
|
t1.join(timeout=5.0)
|
|
t2.join(timeout=5.0)
|
|
|
|
# Assert
|
|
assert errors, "second producer thread must trip the SPSC guard"
|
|
assert errors[0].side == "producer"
|
|
|
|
|
|
def test_ac4_default_is_no_guard() -> None:
|
|
# Arrange
|
|
buf = SpscRingBuffer(capacity=16) # enforce_spsc defaults to False
|
|
|
|
# Act — two threads push and pop concurrently; no exception expected.
|
|
def stress() -> None:
|
|
for i in range(32):
|
|
buf.push(i)
|
|
buf.pop()
|
|
|
|
t1 = threading.Thread(target=stress)
|
|
t2 = threading.Thread(target=stress)
|
|
t1.start()
|
|
t2.start()
|
|
t1.join(timeout=5.0)
|
|
t2.join(timeout=5.0)
|
|
|
|
# Assert — no exception, no SPSC complaints; production wiring opts out.
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AC-5: on_overrun hook is wired exactly once per overrun.
|
|
|
|
|
|
def test_ac5_on_overrun_hook_fires_once_per_overrun() -> None:
|
|
# Arrange
|
|
client = FdrClient(producer_id="c4_pose", capacity=16)
|
|
seen: list[FdrRecord] = []
|
|
client.on_overrun = seen.append
|
|
# Fill the buffer (capacity 16 holds 15 records before overrun).
|
|
for i in range(15):
|
|
client.enqueue(_make_record(frame_id=i))
|
|
offending = _make_record(frame_id=999)
|
|
|
|
# Act
|
|
result = client.enqueue(offending)
|
|
|
|
# Assert
|
|
assert result == EnqueueResult.OVERRUN
|
|
assert seen == [offending]
|
|
|
|
|
|
def test_ac5_invalid_hook_rejected() -> None:
|
|
# Arrange
|
|
client = FdrClient(producer_id="c4_pose", capacity=16)
|
|
|
|
# Act / Assert
|
|
with pytest.raises(TypeError):
|
|
client.on_overrun = "not_callable" # type: ignore[assignment]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AC-6: flush() drains the buffer.
|
|
|
|
|
|
def test_ac6_flush_returns_only_when_empty() -> None:
|
|
# Arrange
|
|
client = FdrClient(producer_id="c5_state", capacity=16)
|
|
for i in range(8):
|
|
client.enqueue(_make_record(frame_id=i))
|
|
|
|
drained: list[FdrRecord] = []
|
|
|
|
def drain() -> None:
|
|
while True:
|
|
item = client.pop_one()
|
|
if item is None and client._buffer_size() == 0:
|
|
return
|
|
if item is not None:
|
|
drained.append(item)
|
|
|
|
drainer = threading.Thread(target=drain)
|
|
drainer.start()
|
|
|
|
# Act
|
|
client.flush()
|
|
|
|
# Assert
|
|
drainer.join(timeout=5.0)
|
|
assert client._buffer_size() == 0
|
|
assert len(drained) == 8
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# AC-7: empty producer_id raises ValueError.
|
|
|
|
|
|
def test_ac7_empty_producer_id_raises_value_error() -> None:
|
|
# Arrange / Act / Assert
|
|
with pytest.raises(ValueError, match="producer_id"):
|
|
FdrClient(producer_id="", capacity=16)
|
|
|
|
|
|
def test_ac7_make_fdr_client_rejects_empty_producer_id() -> None:
|
|
# Arrange
|
|
config = Config()
|
|
|
|
# Act / Assert
|
|
with pytest.raises(ValueError, match="producer_id"):
|
|
make_fdr_client("", config)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Invariant: one client per producer_id (NFR-reliability).
|
|
|
|
|
|
def test_invariant_make_fdr_client_caches_by_producer_id() -> None:
|
|
# Arrange
|
|
config = Config()
|
|
|
|
# Act
|
|
a = make_fdr_client("c8_fc_adapter", config)
|
|
b = make_fdr_client("c8_fc_adapter", config)
|
|
|
|
# Assert
|
|
assert a is b
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Invariant: enqueue does not mutate record.producer_id.
|
|
|
|
|
|
def test_invariant_enqueue_preserves_producer_id() -> None:
|
|
# Arrange
|
|
client = FdrClient(producer_id="c5_state", capacity=16)
|
|
record = _make_record(producer_id="c5_state", frame_id=42)
|
|
|
|
# Act
|
|
client.enqueue(record)
|
|
popped = client.pop_one()
|
|
|
|
# Assert
|
|
assert popped is record
|
|
assert popped.producer_id == "c5_state"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Buffer-level invariants: capacity validation.
|
|
|
|
|
|
def test_capacity_must_be_at_least_minimum() -> None:
|
|
# Arrange / Act / Assert
|
|
with pytest.raises(ValueError, match=">= 16"):
|
|
SpscRingBuffer(capacity=8)
|
|
|
|
|
|
def test_capacity_must_be_power_of_two() -> None:
|
|
# Arrange / Act / Assert
|
|
with pytest.raises(ValueError, match="power of two"):
|
|
SpscRingBuffer(capacity=20)
|
|
|
|
|
|
def test_drain_returns_fifo_order() -> None:
|
|
# Arrange
|
|
client = FdrClient(producer_id="c7_inference", capacity=16)
|
|
records = [_make_record(frame_id=i) for i in range(5)]
|
|
for r in records:
|
|
client.enqueue(r)
|
|
|
|
# Act
|
|
drained = client.drain(max_records=10)
|
|
|
|
# Assert
|
|
assert drained == records
|