mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 20:51:14 +00:00
[AZ-273] [AZ-274] [AZ-275] [AZ-267] [AZ-268] FDR producer chain + log bridge + contract test
AZ-273: lock-free SPSC ring buffer with pre-allocated slots, power-of- two capacity, opt-in SPSC guard, and EnqueueResult / FdrSpscViolationError on the public surface. make_fdr_client caches one client per producer_id and reads capacity from config.fdr.per_producer_capacity with fallback to queue_size. AZ-274: default_overrun_policy implements drop-oldest + retry + immediate marker emission, with prior-marker dropped_count folding via _evict_one so user-loss info is never lost across iterations. ERROR diagnostic is rate-limited to <=1/sec per producer. AZ-275: FakeFdrSink mirrors the FdrClient public surface and reuses the production default_overrun_policy via a duck-typed _PolicyAdapter. The test-only records/all_records_ever properties let component tests assert both in-buffer and lifetime state. tests/conftest.py registers the fake_fdr_sink fixture and an AST architecture lint forbids production imports of fakes. AZ-267: FdrLogBridgeHandler installs on the root logger via wire_log_bridge and forwards only WARN+ERROR records into the FDR with kind="log". Thread-local recursion guard short-circuits internal logging; saturated- queue diagnostics go to stderr every N=1000 drops. AZ-268: tests/contract/log_schema.py covers every row of the schema's Test Cases table plus the "DEBUG+INFO never reach FDR" invariant. pyproject.toml registers the contract pytest marker and the contract-mandated log_schema.py file-name. 251 unit + contract tests pass (48 new). Review verdict: PASS_WITH_WARNINGS; findings are NFR-perf deferrals + documented relaxation of AZ-274 AC-2 coalescing under permanently-stalled consumer. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,16 +1,227 @@
|
||||
"""FDR producer-side client API — STUB.
|
||||
"""``FdrClient`` — producer-side FDR queue (AZ-273 / E-CC-FDR-CLIENT).
|
||||
|
||||
Concrete client is owned by AZ-273. Bootstrap exposes the class so every component
|
||||
can type `fdr: FdrClient` on its constructor.
|
||||
The single handle every onboard producer holds. Calls :meth:`enqueue`
|
||||
on its component-local frame; never blocks. The consumer side is the
|
||||
C13 writer thread (AZ-248) which drains via :meth:`pop_one` /
|
||||
:meth:`drain`.
|
||||
|
||||
Public surface frozen by
|
||||
``_docs/02_document/contracts/shared_fdr_client/fdr_client_protocol.md``
|
||||
v1.0.0.
|
||||
|
||||
Capacity sourcing
|
||||
-----------------
|
||||
``make_fdr_client(producer_id, config)`` resolves the per-producer
|
||||
capacity in this precedence order:
|
||||
|
||||
1. ``config.fdr.per_producer_capacity[producer_id]`` if present.
|
||||
2. ``config.fdr.queue_size`` (the documented cross-cutting default).
|
||||
|
||||
If the resolved value is not a positive power of two, it is rounded UP
|
||||
to the next power of two (and clipped to :data:`MIN_CAPACITY`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import Final
|
||||
|
||||
from gps_denied_onboard.config import Config
|
||||
from gps_denied_onboard.fdr_client.queue import (
|
||||
MIN_CAPACITY,
|
||||
FdrSpscViolationError,
|
||||
SpscRingBuffer,
|
||||
)
|
||||
from gps_denied_onboard.fdr_client.records import FdrRecord
|
||||
from gps_denied_onboard.logging import get_logger
|
||||
|
||||
__all__ = [
|
||||
"EnqueueResult",
|
||||
"FdrClient",
|
||||
"FdrSpscViolationError",
|
||||
"make_fdr_client",
|
||||
]
|
||||
|
||||
|
||||
class EnqueueResult:
|
||||
"""Return value of :meth:`FdrClient.enqueue`.
|
||||
|
||||
String enum (not :class:`enum.Enum`) so the steady-state path
|
||||
returns an interned string instead of allocating a wrapper.
|
||||
"""
|
||||
|
||||
OK: Final[str] = "ok"
|
||||
OVERRUN: Final[str] = "overrun"
|
||||
|
||||
|
||||
_DIAG_LOGGER_NAME: Final[str] = "shared.fdr_client"
|
||||
|
||||
|
||||
def _next_power_of_two(value: int) -> int:
|
||||
"""Round ``value`` UP to the next power of two (clipped to ``MIN_CAPACITY``)."""
|
||||
if value <= MIN_CAPACITY:
|
||||
return MIN_CAPACITY
|
||||
if value & (value - 1) == 0:
|
||||
return value
|
||||
return 1 << value.bit_length()
|
||||
|
||||
|
||||
class FdrClient:
|
||||
"""Producer-side FDR API: enqueue records, drop-oldest on overrun."""
|
||||
"""Producer-side FDR queue handle for a single component."""
|
||||
|
||||
def emit(self, record: FdrRecord) -> None:
|
||||
raise NotImplementedError("FdrClient.emit concrete impl is AZ-273")
|
||||
def __init__(
|
||||
self,
|
||||
producer_id: str,
|
||||
capacity: int,
|
||||
*,
|
||||
enforce_spsc: bool = False,
|
||||
_emit_diag_log: bool = True,
|
||||
) -> None:
|
||||
if not isinstance(producer_id, str) or not producer_id:
|
||||
raise ValueError(
|
||||
f"FdrClient.producer_id must be a non-empty string; got {producer_id!r}"
|
||||
)
|
||||
normalised_capacity = _next_power_of_two(capacity)
|
||||
self._buffer: SpscRingBuffer = SpscRingBuffer(
|
||||
normalised_capacity, enforce_spsc=enforce_spsc
|
||||
)
|
||||
self._producer_id: str = producer_id
|
||||
self._on_overrun: Callable[[FdrRecord], None] | None = None
|
||||
if _emit_diag_log:
|
||||
# One-time INFO at construction (NOT on the steady-state path).
|
||||
get_logger(_DIAG_LOGGER_NAME).info(
|
||||
"FdrClient constructed",
|
||||
extra={
|
||||
"component": _DIAG_LOGGER_NAME,
|
||||
"kind": "fdr.client_constructed",
|
||||
"kv": {"producer_id": producer_id, "capacity": normalised_capacity},
|
||||
},
|
||||
)
|
||||
|
||||
@property
|
||||
def producer_id(self) -> str:
|
||||
return self._producer_id
|
||||
|
||||
@property
|
||||
def on_overrun(self) -> Callable[[FdrRecord], None] | None:
|
||||
return self._on_overrun
|
||||
|
||||
@on_overrun.setter
|
||||
def on_overrun(self, hook: Callable[[FdrRecord], None] | None) -> None:
|
||||
if hook is not None and not callable(hook):
|
||||
raise TypeError(f"on_overrun hook must be callable or None; got {hook!r}")
|
||||
self._on_overrun = hook
|
||||
|
||||
def enqueue(self, record: FdrRecord) -> str:
|
||||
"""Non-blocking single-producer enqueue.
|
||||
|
||||
Returns :attr:`EnqueueResult.OK` on success or
|
||||
:attr:`EnqueueResult.OVERRUN` when the buffer is full.
|
||||
On overrun, the ``on_overrun`` hook (if set) is invoked exactly
|
||||
once with the offending record before returning.
|
||||
"""
|
||||
ok = self._buffer.push(record)
|
||||
if ok:
|
||||
return EnqueueResult.OK
|
||||
hook = self._on_overrun
|
||||
if hook is not None:
|
||||
try:
|
||||
hook(record)
|
||||
except Exception:
|
||||
# Overrun-path closure errors are swallowed to keep the
|
||||
# producer's hot path free of exceptions. The policy's
|
||||
# own error logging (AZ-274 NFR-reliability) records
|
||||
# what went wrong.
|
||||
pass
|
||||
return EnqueueResult.OVERRUN
|
||||
|
||||
def pop_one(self) -> FdrRecord | None:
|
||||
"""Single-consumer dequeue. Returns ``None`` when the buffer is empty."""
|
||||
return self._buffer.pop()
|
||||
|
||||
def drain(self, max_records: int) -> list[FdrRecord]:
|
||||
"""Single-consumer drain up to ``max_records`` records in FIFO order."""
|
||||
return self._buffer.drain(max_records)
|
||||
|
||||
def flush(self) -> None:
|
||||
"""Test-only: spin until the buffer is empty.
|
||||
|
||||
Production code MUST NOT call this on the hot path.
|
||||
"""
|
||||
while not self._buffer.is_empty():
|
||||
pass
|
||||
|
||||
def drop_oldest_for_overrun(self) -> FdrRecord | None:
|
||||
"""Producer-side drop-oldest used by the AZ-274 overrun policy.
|
||||
|
||||
Public so the overrun closure can pop the head without
|
||||
violating the SPSC contract on the consumer side.
|
||||
"""
|
||||
return self._buffer.drop_oldest()
|
||||
|
||||
def _capacity(self) -> int:
|
||||
"""Test-only introspection of the underlying buffer's capacity (AZ-273 AC-3)."""
|
||||
return self._buffer.capacity
|
||||
|
||||
def _buffer_size(self) -> int:
|
||||
"""Test-only introspection of the buffer's current size."""
|
||||
return self._buffer.size()
|
||||
|
||||
|
||||
_CACHE: dict[str, FdrClient] = {}
|
||||
|
||||
|
||||
def _resolve_capacity(producer_id: str, config: Config) -> int:
|
||||
"""Pick the per-producer capacity from config; fall back to the default."""
|
||||
per_producer = getattr(config.fdr, "per_producer_capacity", {}) or {}
|
||||
override = per_producer.get(producer_id) if isinstance(per_producer, dict) else None
|
||||
if override is not None:
|
||||
if not isinstance(override, int) or isinstance(override, bool):
|
||||
raise ValueError(
|
||||
f"config.fdr.per_producer_capacity[{producer_id!r}] must be a "
|
||||
f"non-bool int; got {override!r}"
|
||||
)
|
||||
if override <= 0:
|
||||
raise ValueError(
|
||||
f"config.fdr.per_producer_capacity[{producer_id!r}] must be > 0; got {override}"
|
||||
)
|
||||
return override
|
||||
return config.fdr.queue_size
|
||||
|
||||
|
||||
def make_fdr_client(producer_id: str, config: Config) -> FdrClient:
|
||||
"""Construct (or return cached) FdrClient for ``producer_id``.
|
||||
|
||||
Cached: two calls with the same ``producer_id`` return the same
|
||||
instance for the lifetime of the process. ``_reset_for_tests()``
|
||||
clears the cache.
|
||||
"""
|
||||
if not isinstance(producer_id, str) or not producer_id:
|
||||
raise ValueError(
|
||||
f"make_fdr_client.producer_id must be a non-empty string; got {producer_id!r}"
|
||||
)
|
||||
cached = _CACHE.get(producer_id)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
capacity = _resolve_capacity(producer_id, config)
|
||||
client = FdrClient(producer_id=producer_id, capacity=capacity)
|
||||
|
||||
# Wire the default drop-oldest overrun policy. Imported lazily so
|
||||
# importing ``FdrClient`` for typing purposes does not pull in the
|
||||
# policy module's logger handle.
|
||||
from gps_denied_onboard.fdr_client.overrun_policy import default_overrun_policy
|
||||
|
||||
client.on_overrun = default_overrun_policy(client)
|
||||
_CACHE[producer_id] = client
|
||||
return client
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the cache. Documented test-only entry per contract Non-Goals."""
|
||||
_CACHE.clear()
|
||||
|
||||
|
||||
def _cached_clients() -> dict[str, FdrClient]:
|
||||
"""Test-only snapshot of the cache used by AZ-274 AC-4."""
|
||||
return dict(_CACHE)
|
||||
|
||||
Reference in New Issue
Block a user