"""FDR log bridge — forwards WARN + ERROR records into the FDR (AZ-267). A :class:`logging.Handler` subclass installed on the root onboard logger (or each named logger). For every emitted record at level WARN or ERROR, it builds a ``kind="log"`` :class:`FdrRecord` carrying the schema-mandated 7 fields and enqueues into a producer-side :class:`FdrClient`. INFO and DEBUG records are dropped at the handler's level filter — they never reach the FDR (AC-3). Public surface frozen by ``_docs/02_document/contracts/shared_logging/log_record_schema.md`` v1.0.0 (the wire shape of the resulting record's payload) and ``_docs/02_document/contracts/shared_fdr_client/fdr_client_protocol.md`` v1.0.0 (the queue we enqueue into). Concurrency ----------- * The handler may run on any logger thread. * A thread-local recursion guard short-circuits any logging call that originates from inside the handler itself — without this guard, a failure path that emits a diagnostic WARN would recurse through the same handler indefinitely. * Saturated-queue diagnostics throttle to 1-per-``_DROP_LOG_EVERY_N`` occurrences via stdout (not via the bridge) to avoid the same loop. """ from __future__ import annotations import datetime as _dt import logging import sys import threading from collections.abc import Callable from typing import Any, Final from gps_denied_onboard.fdr_client.client import EnqueueResult from gps_denied_onboard.fdr_client.records import ( CURRENT_SCHEMA_VERSION, FdrRecord, ) # The resolver's return value must expose ``enqueue(record) -> str``; both # the production ``FdrClient`` and the test ``FakeFdrSink`` satisfy that. # Typed as ``Any`` here to avoid pulling FakeFdrSink (a tests-only fake) # into the production module's type contract. _FdrLikeResolver = Callable[[str], Any] __all__ = [ "FdrLogBridgeHandler", "wire_log_bridge", ] _BRIDGE_HANDLER_MARKER_ATTR: Final[str] = "_gps_denied_fdr_bridge_handler" # Throttle the stdout "queue saturated" diagnostic to one in every N # occurrences per NFR-reliability ("at least N>=1000"). _DROP_LOG_EVERY_N: Final[int] = 1000 # Level-name normalisation matching the schema contract (``WARNING`` -> # ``WARN``). Mirrors the logic in ``logging.structured._normalise_level``; # duplicated locally to keep this module self-contained and avoid a # circular import. def _normalise_level(stdlib_levelname: str) -> str: if stdlib_levelname == "WARNING": return "WARN" return stdlib_levelname def _iso_utc(created_epoch: float) -> str: """RFC 3339 UTC with microsecond precision + ``Z`` suffix.""" dt = _dt.datetime.fromtimestamp(created_epoch, tz=_dt.timezone.utc) return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond:06d}Z" # Reserved stdlib LogRecord attributes that must not leak into the FDR # record's payload.kv. Mirrors the same list in # ``logging.structured._RESERVED_LOG_RECORD_KEYS``. _RESERVED_LOG_RECORD_KEYS: Final[frozenset[str]] = frozenset( { "args", "asctime", "created", "exc_info", "exc_text", "filename", "funcName", "levelname", "levelno", "lineno", "message", "module", "msecs", "msg", "name", "pathname", "process", "processName", "relativeCreated", "stack_info", "taskName", "thread", "threadName", "frame_id", "kind", "kv", "component", } ) def _coerce_jsonable(value: Any) -> Any: """Mirror of the formatter's coercer; raises on non-JSON-safe types.""" if isinstance(value, (str, int, float, bool)) or value is None: return value if isinstance(value, dict): return {str(k): _coerce_jsonable(v) for k, v in value.items()} if isinstance(value, (list, tuple)): return [_coerce_jsonable(v) for v in value] raise TypeError(f"unserialisable kv payload type: {type(value).__name__}") class FdrLogBridgeHandler(logging.Handler): """Forwards WARN + ERROR :class:`logging.LogRecord` into the FDR. Constructed with a callable that resolves to the per-record :class:`FdrClient`. The callable lets the composition root inject either a per-component sink (one FdrClient per component) or a shared "log fan-out" client; either way the bridge stays decoupled from the registry. """ _recursion_local = threading.local() def __init__( self, fdr_client_resolver: _FdrLikeResolver, *, level: int = logging.WARNING, ) -> None: super().__init__(level=level) if not callable(fdr_client_resolver): raise TypeError(f"fdr_client_resolver must be callable; got {fdr_client_resolver!r}") self._resolver: _FdrLikeResolver = fdr_client_resolver self._drop_counter: int = 0 self._drop_lock: threading.Lock = threading.Lock() setattr(self, _BRIDGE_HANDLER_MARKER_ATTR, True) def emit(self, record: logging.LogRecord) -> None: if getattr(self._recursion_local, "in_bridge", False): # Short-circuit: a log call originating from inside the # bridge (saturated-queue diagnostic, etc.) must NOT loop. return self._recursion_local.in_bridge = True try: self._emit_unguarded(record) finally: self._recursion_local.in_bridge = False def _emit_unguarded(self, record: logging.LogRecord) -> None: try: fdr_record = self._build_fdr_record(record) except Exception as exc: # Translation failed (e.g. non-serialisable kv that even the # formatter's fallback didn't catch). Skip this record; # never raise into the caller. self._note_drop(reason=f"translate_error: {exc}") return component = fdr_record.payload.get("component") or record.name try: client = self._resolver(component if isinstance(component, str) else record.name) except Exception as exc: self._note_drop(reason=f"resolve_error: {exc}") return result = client.enqueue(fdr_record) if result == EnqueueResult.OVERRUN: self._note_drop(reason="queue_saturated") def _build_fdr_record(self, record: logging.LogRecord) -> FdrRecord: rec_dict = record.__dict__ component = rec_dict.get("component") or record.name kind = rec_dict.get("kind") or "log.diag" frame_id = rec_dict.get("frame_id") explicit_kv = rec_dict.get("kv") if explicit_kv is None: kv_raw: dict[str, Any] = { k: v for k, v in rec_dict.items() if k not in _RESERVED_LOG_RECORD_KEYS and not k.startswith("_") } else: kv_raw = dict(explicit_kv) try: kv_safe = _coerce_jsonable(kv_raw) except (TypeError, ValueError) as exc: kv_safe = {"_format_error": f"{type(exc).__name__}: {exc}"} exc_text: str | None = None if record.exc_info: exc_text = logging.Formatter().formatException(record.exc_info) return FdrRecord( schema_version=CURRENT_SCHEMA_VERSION, ts=_iso_utc(record.created), producer_id=component, kind="log", payload={ "level": _normalise_level(record.levelname), "component": component, "frame_id": frame_id, "kind": kind, "msg": record.getMessage().replace("\n", " "), "kv": kv_safe, "exc": exc_text, }, ) def _note_drop(self, *, reason: str) -> None: """Throttled stdout diagnostic; intentionally bypasses the logger.""" with self._drop_lock: self._drop_counter += 1 if self._drop_counter % _DROP_LOG_EVERY_N != 1: return current = self._drop_counter # stderr (not stdout) so log capture in tests doesn't confuse # the bridge's diagnostic with normal application output. print( f"FdrLogBridgeHandler: dropped record #{current} (reason={reason})", file=sys.stderr, ) def wire_log_bridge( fdr_client_resolver: _FdrLikeResolver, *, target_loggers: tuple[str, ...] = ("",), level: int = logging.WARNING, ) -> FdrLogBridgeHandler: """Install a single :class:`FdrLogBridgeHandler` on ``target_loggers``. Idempotent: re-calling replaces any prior bridge handler on the same logger(s) — exactly one bridge per logger (AC-5). Returns the installed handler so the composition root can keep a handle for teardown (e.g. test isolation). """ handler = FdrLogBridgeHandler(fdr_client_resolver, level=level) for name in target_loggers: target = logging.getLogger(name) target.handlers = [ h for h in target.handlers if not getattr(h, _BRIDGE_HANDLER_MARKER_ATTR, False) ] target.addHandler(handler) # Lower the logger's effective level if it is currently above WARN. if target.level == logging.NOTSET or target.level > level: target.setLevel(level) return handler