mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 11:51:14 +00:00
ba20c2d195
AZ-273: lock-free SPSC ring buffer with pre-allocated slots, power-of- two capacity, opt-in SPSC guard, and EnqueueResult / FdrSpscViolationError on the public surface. make_fdr_client caches one client per producer_id and reads capacity from config.fdr.per_producer_capacity with fallback to queue_size. AZ-274: default_overrun_policy implements drop-oldest + retry + immediate marker emission, with prior-marker dropped_count folding via _evict_one so user-loss info is never lost across iterations. ERROR diagnostic is rate-limited to <=1/sec per producer. AZ-275: FakeFdrSink mirrors the FdrClient public surface and reuses the production default_overrun_policy via a duck-typed _PolicyAdapter. The test-only records/all_records_ever properties let component tests assert both in-buffer and lifetime state. tests/conftest.py registers the fake_fdr_sink fixture and an AST architecture lint forbids production imports of fakes. AZ-267: FdrLogBridgeHandler installs on the root logger via wire_log_bridge and forwards only WARN+ERROR records into the FDR with kind="log". Thread-local recursion guard short-circuits internal logging; saturated- queue diagnostics go to stderr every N=1000 drops. AZ-268: tests/contract/log_schema.py covers every row of the schema's Test Cases table plus the "DEBUG+INFO never reach FDR" invariant. pyproject.toml registers the contract pytest marker and the contract-mandated log_schema.py file-name. 251 unit + contract tests pass (48 new). Review verdict: PASS_WITH_WARNINGS; findings are NFR-perf deferrals + documented relaxation of AZ-274 AC-2 coalescing under permanently-stalled consumer. Co-authored-by: Cursor <cursoragent@cursor.com>
264 lines
9.1 KiB
Python
264 lines
9.1 KiB
Python
"""FDR log bridge — forwards WARN + ERROR records into the FDR (AZ-267).
|
|
|
|
A :class:`logging.Handler` subclass installed on the root onboard logger
|
|
(or each named logger). For every emitted record at level WARN or
|
|
ERROR, it builds a ``kind="log"`` :class:`FdrRecord` carrying the
|
|
schema-mandated 7 fields and enqueues into a producer-side
|
|
:class:`FdrClient`. INFO and DEBUG records are dropped at the handler's
|
|
level filter — they never reach the FDR (AC-3).
|
|
|
|
Public surface frozen by
|
|
``_docs/02_document/contracts/shared_logging/log_record_schema.md``
|
|
v1.0.0 (the wire shape of the resulting record's payload) and
|
|
``_docs/02_document/contracts/shared_fdr_client/fdr_client_protocol.md``
|
|
v1.0.0 (the queue we enqueue into).
|
|
|
|
Concurrency
|
|
-----------
|
|
|
|
* The handler may run on any logger thread.
|
|
* A thread-local recursion guard short-circuits any logging call that
|
|
originates from inside the handler itself — without this guard, a
|
|
failure path that emits a diagnostic WARN would recurse through the
|
|
same handler indefinitely.
|
|
* Saturated-queue diagnostics throttle to 1-per-``_DROP_LOG_EVERY_N``
|
|
occurrences via stdout (not via the bridge) to avoid the same loop.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime as _dt
|
|
import logging
|
|
import sys
|
|
import threading
|
|
from collections.abc import Callable
|
|
from typing import Any, Final
|
|
|
|
from gps_denied_onboard.fdr_client.client import EnqueueResult
|
|
from gps_denied_onboard.fdr_client.records import (
|
|
CURRENT_SCHEMA_VERSION,
|
|
FdrRecord,
|
|
)
|
|
|
|
# The resolver's return value must expose ``enqueue(record) -> str``; both
|
|
# the production ``FdrClient`` and the test ``FakeFdrSink`` satisfy that.
|
|
# Typed as ``Any`` here to avoid pulling FakeFdrSink (a tests-only fake)
|
|
# into the production module's type contract.
|
|
_FdrLikeResolver = Callable[[str], Any]
|
|
|
|
__all__ = [
|
|
"FdrLogBridgeHandler",
|
|
"wire_log_bridge",
|
|
]
|
|
|
|
|
|
_BRIDGE_HANDLER_MARKER_ATTR: Final[str] = "_gps_denied_fdr_bridge_handler"
|
|
|
|
# Throttle the stdout "queue saturated" diagnostic to one in every N
|
|
# occurrences per NFR-reliability ("at least N>=1000").
|
|
_DROP_LOG_EVERY_N: Final[int] = 1000
|
|
|
|
|
|
# Level-name normalisation matching the schema contract (``WARNING`` ->
|
|
# ``WARN``). Mirrors the logic in ``logging.structured._normalise_level``;
|
|
# duplicated locally to keep this module self-contained and avoid a
|
|
# circular import.
|
|
def _normalise_level(stdlib_levelname: str) -> str:
|
|
if stdlib_levelname == "WARNING":
|
|
return "WARN"
|
|
return stdlib_levelname
|
|
|
|
|
|
def _iso_utc(created_epoch: float) -> str:
|
|
"""RFC 3339 UTC with microsecond precision + ``Z`` suffix."""
|
|
dt = _dt.datetime.fromtimestamp(created_epoch, tz=_dt.timezone.utc)
|
|
return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond:06d}Z"
|
|
|
|
|
|
# Reserved stdlib LogRecord attributes that must not leak into the FDR
|
|
# record's payload.kv. Mirrors the same list in
|
|
# ``logging.structured._RESERVED_LOG_RECORD_KEYS``.
|
|
_RESERVED_LOG_RECORD_KEYS: Final[frozenset[str]] = frozenset(
|
|
{
|
|
"args",
|
|
"asctime",
|
|
"created",
|
|
"exc_info",
|
|
"exc_text",
|
|
"filename",
|
|
"funcName",
|
|
"levelname",
|
|
"levelno",
|
|
"lineno",
|
|
"message",
|
|
"module",
|
|
"msecs",
|
|
"msg",
|
|
"name",
|
|
"pathname",
|
|
"process",
|
|
"processName",
|
|
"relativeCreated",
|
|
"stack_info",
|
|
"taskName",
|
|
"thread",
|
|
"threadName",
|
|
"frame_id",
|
|
"kind",
|
|
"kv",
|
|
"component",
|
|
}
|
|
)
|
|
|
|
|
|
def _coerce_jsonable(value: Any) -> Any:
|
|
"""Mirror of the formatter's coercer; raises on non-JSON-safe types."""
|
|
if isinstance(value, (str, int, float, bool)) or value is None:
|
|
return value
|
|
if isinstance(value, dict):
|
|
return {str(k): _coerce_jsonable(v) for k, v in value.items()}
|
|
if isinstance(value, (list, tuple)):
|
|
return [_coerce_jsonable(v) for v in value]
|
|
raise TypeError(f"unserialisable kv payload type: {type(value).__name__}")
|
|
|
|
|
|
class FdrLogBridgeHandler(logging.Handler):
|
|
"""Forwards WARN + ERROR :class:`logging.LogRecord` into the FDR.
|
|
|
|
Constructed with a callable that resolves to the per-record
|
|
:class:`FdrClient`. The callable lets the composition root inject
|
|
either a per-component sink (one FdrClient per component) or a
|
|
shared "log fan-out" client; either way the bridge stays decoupled
|
|
from the registry.
|
|
"""
|
|
|
|
_recursion_local = threading.local()
|
|
|
|
def __init__(
|
|
self,
|
|
fdr_client_resolver: _FdrLikeResolver,
|
|
*,
|
|
level: int = logging.WARNING,
|
|
) -> None:
|
|
super().__init__(level=level)
|
|
if not callable(fdr_client_resolver):
|
|
raise TypeError(f"fdr_client_resolver must be callable; got {fdr_client_resolver!r}")
|
|
self._resolver: _FdrLikeResolver = fdr_client_resolver
|
|
self._drop_counter: int = 0
|
|
self._drop_lock: threading.Lock = threading.Lock()
|
|
setattr(self, _BRIDGE_HANDLER_MARKER_ATTR, True)
|
|
|
|
def emit(self, record: logging.LogRecord) -> None:
|
|
if getattr(self._recursion_local, "in_bridge", False):
|
|
# Short-circuit: a log call originating from inside the
|
|
# bridge (saturated-queue diagnostic, etc.) must NOT loop.
|
|
return
|
|
self._recursion_local.in_bridge = True
|
|
try:
|
|
self._emit_unguarded(record)
|
|
finally:
|
|
self._recursion_local.in_bridge = False
|
|
|
|
def _emit_unguarded(self, record: logging.LogRecord) -> None:
|
|
try:
|
|
fdr_record = self._build_fdr_record(record)
|
|
except Exception as exc:
|
|
# Translation failed (e.g. non-serialisable kv that even the
|
|
# formatter's fallback didn't catch). Skip this record;
|
|
# never raise into the caller.
|
|
self._note_drop(reason=f"translate_error: {exc}")
|
|
return
|
|
|
|
component = fdr_record.payload.get("component") or record.name
|
|
try:
|
|
client = self._resolver(component if isinstance(component, str) else record.name)
|
|
except Exception as exc:
|
|
self._note_drop(reason=f"resolve_error: {exc}")
|
|
return
|
|
|
|
result = client.enqueue(fdr_record)
|
|
if result == EnqueueResult.OVERRUN:
|
|
self._note_drop(reason="queue_saturated")
|
|
|
|
def _build_fdr_record(self, record: logging.LogRecord) -> FdrRecord:
|
|
rec_dict = record.__dict__
|
|
component = rec_dict.get("component") or record.name
|
|
kind = rec_dict.get("kind") or "log.diag"
|
|
frame_id = rec_dict.get("frame_id")
|
|
|
|
explicit_kv = rec_dict.get("kv")
|
|
if explicit_kv is None:
|
|
kv_raw: dict[str, Any] = {
|
|
k: v
|
|
for k, v in rec_dict.items()
|
|
if k not in _RESERVED_LOG_RECORD_KEYS and not k.startswith("_")
|
|
}
|
|
else:
|
|
kv_raw = dict(explicit_kv)
|
|
|
|
try:
|
|
kv_safe = _coerce_jsonable(kv_raw)
|
|
except (TypeError, ValueError) as exc:
|
|
kv_safe = {"_format_error": f"{type(exc).__name__}: {exc}"}
|
|
|
|
exc_text: str | None = None
|
|
if record.exc_info:
|
|
exc_text = logging.Formatter().formatException(record.exc_info)
|
|
|
|
return FdrRecord(
|
|
schema_version=CURRENT_SCHEMA_VERSION,
|
|
ts=_iso_utc(record.created),
|
|
producer_id=component,
|
|
kind="log",
|
|
payload={
|
|
"level": _normalise_level(record.levelname),
|
|
"component": component,
|
|
"frame_id": frame_id,
|
|
"kind": kind,
|
|
"msg": record.getMessage().replace("\n", " "),
|
|
"kv": kv_safe,
|
|
"exc": exc_text,
|
|
},
|
|
)
|
|
|
|
def _note_drop(self, *, reason: str) -> None:
|
|
"""Throttled stdout diagnostic; intentionally bypasses the logger."""
|
|
with self._drop_lock:
|
|
self._drop_counter += 1
|
|
if self._drop_counter % _DROP_LOG_EVERY_N != 1:
|
|
return
|
|
current = self._drop_counter
|
|
# stderr (not stdout) so log capture in tests doesn't confuse
|
|
# the bridge's diagnostic with normal application output.
|
|
print(
|
|
f"FdrLogBridgeHandler: dropped record #{current} (reason={reason})",
|
|
file=sys.stderr,
|
|
)
|
|
|
|
|
|
def wire_log_bridge(
|
|
fdr_client_resolver: _FdrLikeResolver,
|
|
*,
|
|
target_loggers: tuple[str, ...] = ("",),
|
|
level: int = logging.WARNING,
|
|
) -> FdrLogBridgeHandler:
|
|
"""Install a single :class:`FdrLogBridgeHandler` on ``target_loggers``.
|
|
|
|
Idempotent: re-calling replaces any prior bridge handler on the
|
|
same logger(s) — exactly one bridge per logger (AC-5).
|
|
|
|
Returns the installed handler so the composition root can keep a
|
|
handle for teardown (e.g. test isolation).
|
|
"""
|
|
handler = FdrLogBridgeHandler(fdr_client_resolver, level=level)
|
|
for name in target_loggers:
|
|
target = logging.getLogger(name)
|
|
target.handlers = [
|
|
h for h in target.handlers if not getattr(h, _BRIDGE_HANDLER_MARKER_ATTR, False)
|
|
]
|
|
target.addHandler(handler)
|
|
# Lower the logger's effective level if it is currently above WARN.
|
|
if target.level == logging.NOTSET or target.level > level:
|
|
target.setLevel(level)
|
|
return handler
|