mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 20:11:14 +00:00
[AZ-388] C5 AC-5.2 no-estimate fallback detector + signal emission
Implements Invariant 9 / AC-5.2: when current_estimate cannot return a fresh output for >= state.no_estimate_fallback_s (default 3.0 s), emit ONE engagement signal (FDR kind=c5.state.no_estimate_fallback_engaged + GCS STATUSTEXT severity CRITICAL); on recovery, ONE recovery signal (FDR kind=c5.state.no_estimate_fallback_recovered + STATUSTEXT NOTICE). Rate-limited via single _in_fallback latch (AC-2: 30 s sustained no-estimate still emits exactly one engagement). New FallbackWatcher class owns the state machine; estimator wires it through constructor + current_estimate entry/success hooks. Public check_fallback_state(now_ns) watchdog (NFR p99 <= 5 us) + subscribe APIs let C8 outbound react without coupling C5 to a concrete GCS adapter at construction. Severity enum extended with CRITICAL=2 and NOTICE=5 to match MAVLink MAV_SEVERITY. 18 new unit tests across all 8 ACs, deterministic synthetic clock, integration tests patch monotonic_ns through GtsamIsam2StateEstimator to drive AC-7 iSAM2 leg (ESKF leg deferred to AZ-386). Full suite: 607 passed, 2 skipped. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -70,11 +70,20 @@ class GpsStatus(Enum):
|
||||
|
||||
|
||||
class Severity(Enum):
|
||||
"""STATUSTEXT severity; values mirror MAVLink ``MAV_SEVERITY``."""
|
||||
"""STATUSTEXT severity; values mirror MAVLink ``MAV_SEVERITY``.
|
||||
|
||||
INFO = 6
|
||||
WARNING = 4
|
||||
Aligned with MAVLink's ``MAV_SEVERITY`` integer constants:
|
||||
``EMERGENCY=0``, ``ALERT=1``, ``CRITICAL=2``, ``ERROR=3``,
|
||||
``WARNING=4``, ``NOTICE=5``, ``INFO=6``, ``DEBUG=7``. AZ-388
|
||||
(AC-5.2 fallback) requires ``CRITICAL`` and ``NOTICE`` for
|
||||
engagement/recovery STATUSTEXT severities.
|
||||
"""
|
||||
|
||||
CRITICAL = 2
|
||||
ERROR = 3
|
||||
WARNING = 4
|
||||
NOTICE = 5
|
||||
INFO = 6
|
||||
|
||||
|
||||
class TelemetryKind(Enum):
|
||||
|
||||
@@ -0,0 +1,276 @@
|
||||
"""AC-5.2 no-estimate fallback detector for C5 state estimators (AZ-388).
|
||||
|
||||
Shared between :class:`GtsamIsam2StateEstimator` (AZ-382/383/384) and
|
||||
the upcoming :class:`EskfStateEstimator` (AZ-386). Both estimators
|
||||
compose one watcher per instance; the watcher owns:
|
||||
|
||||
- A monotonic timestamp of the last successful ``current_estimate``.
|
||||
- A latched ``_in_fallback`` flag (engaged on threshold breach;
|
||||
cleared on the next successful estimate).
|
||||
- FDR record emission on engagement (``c5.state.no_estimate_fallback_engaged``)
|
||||
and recovery (``c5.state.no_estimate_fallback_recovered``).
|
||||
- Subscriber lists for engagement / recovery callbacks (the
|
||||
composition root wires C8's :class:`QgcTelemetryAdapter` here so
|
||||
the GCS STATUSTEXT is dispatched on the C8 outbound thread, not
|
||||
the C5 ingest thread).
|
||||
|
||||
Per the C5 contract Invariant 9: when ``current_estimate`` cannot
|
||||
produce a fresh output for ≥ ``no_estimate_fallback_s`` (default
|
||||
3.0 s, configurable per :class:`C5StateConfig`), C8 outbound switches
|
||||
to FC IMU-only emission. AZ-388 only emits the signal; AZ-261 owns
|
||||
the actual IMU-only emission path.
|
||||
|
||||
Rate-limiting: ONE engagement signal per fallback engagement (no
|
||||
spam under sustained no-estimate); ONE recovery signal per recovery.
|
||||
The flag is the rate-limiter — the moment it's set, further
|
||||
``check_and_engage`` calls return early without re-firing.
|
||||
|
||||
Single-writer thread (Invariant 1): the C5 ingest thread calls
|
||||
``mark_successful_estimate`` + ``check_and_engage`` from inside
|
||||
``current_estimate``; the C8 outbound 5 Hz tick handler calls
|
||||
``check_and_engage`` on the C8 outbound thread for the external
|
||||
watchdog AC-4 path. Both paths read/write the same fields, so the
|
||||
watcher takes an explicit ``threading.Lock`` around the state
|
||||
transitions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime, timezone
|
||||
from typing import TYPE_CHECKING, Final, Protocol, runtime_checkable
|
||||
|
||||
from gps_denied_onboard._types.fc import Severity
|
||||
from gps_denied_onboard.fdr_client.records import FdrRecord
|
||||
from gps_denied_onboard.logging import get_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.fdr_client.client import FdrClient
|
||||
|
||||
__all__ = [
|
||||
"FallbackEngagementCallback",
|
||||
"FallbackRecoveryCallback",
|
||||
"FallbackSubscription",
|
||||
"FallbackWatcher",
|
||||
]
|
||||
|
||||
|
||||
# Public callback signatures. The first parameter is the elapsed
|
||||
# seconds since the last successful estimate (engagement) or the
|
||||
# duration of the fallback episode just closed (recovery). The
|
||||
# second parameter is the canonical MAVLink-aligned severity hint
|
||||
# the composition root uses when forwarding to GCS STATUSTEXT.
|
||||
FallbackEngagementCallback = Callable[[float, Severity], None]
|
||||
FallbackRecoveryCallback = Callable[[float, Severity], None]
|
||||
|
||||
|
||||
# Per the task spec AC-5: engagement = CRITICAL; recovery = NOTICE.
|
||||
_ENGAGEMENT_SEVERITY: Final[Severity] = Severity.CRITICAL
|
||||
_RECOVERY_SEVERITY: Final[Severity] = Severity.NOTICE
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class FallbackSubscription(Protocol):
|
||||
"""Handle returned by :meth:`FallbackWatcher.subscribe_engaged` etc.
|
||||
|
||||
Calling :meth:`cancel` removes the callback from the next
|
||||
state-transition dispatch. Subsequent cancels are no-ops.
|
||||
"""
|
||||
|
||||
def cancel(self) -> None: ...
|
||||
|
||||
|
||||
class _Subscription:
|
||||
def __init__(self, registry: dict[int, Callable], sub_id: int, lock: threading.Lock) -> None:
|
||||
self._registry = registry
|
||||
self._sub_id = sub_id
|
||||
self._lock = lock
|
||||
|
||||
def cancel(self) -> None:
|
||||
with self._lock:
|
||||
self._registry.pop(self._sub_id, None)
|
||||
|
||||
|
||||
class FallbackWatcher:
|
||||
"""AC-5.2 fallback detector. One instance per C5 estimator.
|
||||
|
||||
Construction stamps ``_last_successful_estimate_ns`` with the
|
||||
current ``clock_ns()`` so a freshly-built estimator doesn't
|
||||
engage fallback on its first ``check_and_engage`` call — the
|
||||
threshold has to elapse first.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
threshold_s: float,
|
||||
fdr_client: FdrClient | None,
|
||||
producer_id: str = "c5_state",
|
||||
clock_ns: Callable[[], int] = time.monotonic_ns,
|
||||
) -> None:
|
||||
if threshold_s <= 0.0:
|
||||
raise ValueError(f"FallbackWatcher.threshold_s must be > 0; got {threshold_s}")
|
||||
self._threshold_ns: int = int(threshold_s * 1_000_000_000)
|
||||
self._fdr_client: FdrClient | None = fdr_client
|
||||
self._producer_id: str = producer_id
|
||||
self._clock_ns: Callable[[], int] = clock_ns
|
||||
self._log = get_logger("c5_state.fallback_watcher")
|
||||
|
||||
self._lock = threading.Lock()
|
||||
self._last_successful_estimate_ns: int = clock_ns()
|
||||
self._engagement_ns: int = 0
|
||||
self._in_fallback: bool = False
|
||||
|
||||
# Subscriber registries — separate so a recovery-only
|
||||
# subscriber (e.g. C8 STATUSTEXT) doesn't have to filter.
|
||||
self._engaged_subs: dict[int, FallbackEngagementCallback] = {}
|
||||
self._recovered_subs: dict[int, FallbackRecoveryCallback] = {}
|
||||
self._next_sub_id: int = 1
|
||||
|
||||
@property
|
||||
def threshold_s(self) -> float:
|
||||
return self._threshold_ns / 1_000_000_000
|
||||
|
||||
@property
|
||||
def in_fallback(self) -> bool:
|
||||
with self._lock:
|
||||
return self._in_fallback
|
||||
|
||||
def mark_successful_estimate(self, now_ns: int) -> None:
|
||||
"""Record a successful ``current_estimate`` at ``now_ns``.
|
||||
|
||||
If the watcher was previously engaged, fires the recovery
|
||||
signal exactly once before clearing the flag.
|
||||
"""
|
||||
with self._lock:
|
||||
self._last_successful_estimate_ns = now_ns
|
||||
if not self._in_fallback:
|
||||
return
|
||||
elapsed_s = (now_ns - self._engagement_ns) / 1_000_000_000
|
||||
self._in_fallback = False
|
||||
recovered_subs = list(self._recovered_subs.values())
|
||||
|
||||
# Fire OUTSIDE the lock — callbacks may take time / call
|
||||
# back into the watcher.
|
||||
self._emit_recovery_fdr(elapsed_s)
|
||||
self._log.info(
|
||||
"c5.state.no_estimate_fallback_recovered",
|
||||
extra={
|
||||
"kind": "c5.state.no_estimate_fallback_recovered",
|
||||
"kv": {"recovered_after_s": elapsed_s},
|
||||
},
|
||||
)
|
||||
for cb in recovered_subs:
|
||||
try:
|
||||
cb(elapsed_s, _RECOVERY_SEVERITY)
|
||||
except Exception as exc:
|
||||
self._log.debug(
|
||||
"c5.state.fallback_recovered_callback_failed",
|
||||
extra={
|
||||
"kind": "c5.state.fallback_recovered_callback_failed",
|
||||
"kv": {"error": repr(exc)},
|
||||
},
|
||||
)
|
||||
|
||||
def check_and_engage(self, now_ns: int) -> bool:
|
||||
"""Idempotent watchdog: engage if threshold exceeded; return state.
|
||||
|
||||
Both the C5 ingest thread (from inside ``current_estimate``)
|
||||
and the C8 outbound 5 Hz tick handler (the AC-4 watchdog)
|
||||
call this; the rate-limit ensures the engagement signal
|
||||
fires at most once per fallback episode regardless of who
|
||||
wins the race.
|
||||
"""
|
||||
with self._lock:
|
||||
if self._in_fallback:
|
||||
return True
|
||||
elapsed_ns = now_ns - self._last_successful_estimate_ns
|
||||
if elapsed_ns < self._threshold_ns:
|
||||
return False
|
||||
elapsed_s = elapsed_ns / 1_000_000_000
|
||||
self._in_fallback = True
|
||||
self._engagement_ns = now_ns
|
||||
engaged_subs = list(self._engaged_subs.values())
|
||||
|
||||
self._emit_engagement_fdr(elapsed_s)
|
||||
self._log.warning(
|
||||
"c5.state.no_estimate_fallback_engaged",
|
||||
extra={
|
||||
"kind": "c5.state.no_estimate_fallback_engaged",
|
||||
"kv": {
|
||||
"reason": "no_successful_estimate_for_s",
|
||||
"elapsed_s": elapsed_s,
|
||||
"threshold_s": self.threshold_s,
|
||||
},
|
||||
},
|
||||
)
|
||||
for cb in engaged_subs:
|
||||
try:
|
||||
cb(elapsed_s, _ENGAGEMENT_SEVERITY)
|
||||
except Exception as exc:
|
||||
self._log.debug(
|
||||
"c5.state.fallback_engaged_callback_failed",
|
||||
extra={
|
||||
"kind": "c5.state.fallback_engaged_callback_failed",
|
||||
"kv": {"error": repr(exc)},
|
||||
},
|
||||
)
|
||||
return True
|
||||
|
||||
def subscribe_engaged(self, callback: FallbackEngagementCallback) -> FallbackSubscription:
|
||||
"""Register a callback invoked exactly once per engagement."""
|
||||
with self._lock:
|
||||
sub_id = self._next_sub_id
|
||||
self._next_sub_id += 1
|
||||
self._engaged_subs[sub_id] = callback
|
||||
return _Subscription(self._engaged_subs, sub_id, self._lock)
|
||||
|
||||
def subscribe_recovered(self, callback: FallbackRecoveryCallback) -> FallbackSubscription:
|
||||
"""Register a callback invoked exactly once per recovery."""
|
||||
with self._lock:
|
||||
sub_id = self._next_sub_id
|
||||
self._next_sub_id += 1
|
||||
self._recovered_subs[sub_id] = callback
|
||||
return _Subscription(self._recovered_subs, sub_id, self._lock)
|
||||
|
||||
def _emit_engagement_fdr(self, elapsed_s: float) -> None:
|
||||
if self._fdr_client is None:
|
||||
return
|
||||
record = FdrRecord(
|
||||
schema_version=1,
|
||||
ts=datetime.now(tz=timezone.utc).isoformat(),
|
||||
producer_id=self._producer_id,
|
||||
kind="c5.state.no_estimate_fallback_engaged",
|
||||
payload={
|
||||
"reason": "no_successful_estimate_for_s",
|
||||
"elapsed_s": elapsed_s,
|
||||
"threshold_s": self.threshold_s,
|
||||
},
|
||||
)
|
||||
self._safe_enqueue(record)
|
||||
|
||||
def _emit_recovery_fdr(self, elapsed_s: float) -> None:
|
||||
if self._fdr_client is None:
|
||||
return
|
||||
record = FdrRecord(
|
||||
schema_version=1,
|
||||
ts=datetime.now(tz=timezone.utc).isoformat(),
|
||||
producer_id=self._producer_id,
|
||||
kind="c5.state.no_estimate_fallback_recovered",
|
||||
payload={"recovered_after_s": elapsed_s},
|
||||
)
|
||||
self._safe_enqueue(record)
|
||||
|
||||
def _safe_enqueue(self, record: FdrRecord) -> None:
|
||||
try:
|
||||
self._fdr_client.enqueue(record) # type: ignore[union-attr]
|
||||
except Exception as exc:
|
||||
self._log.debug(
|
||||
"c5.state.fallback_fdr_enqueue_failed",
|
||||
extra={
|
||||
"kind": "c5.state.fallback_fdr_enqueue_failed",
|
||||
"kv": {"error": repr(exc)},
|
||||
},
|
||||
)
|
||||
@@ -48,6 +48,12 @@ from gps_denied_onboard._types.state import (
|
||||
PoseSourceLabel,
|
||||
Quat,
|
||||
)
|
||||
from gps_denied_onboard.components.c5_state._fallback_watcher import (
|
||||
FallbackEngagementCallback,
|
||||
FallbackRecoveryCallback,
|
||||
FallbackSubscription,
|
||||
FallbackWatcher,
|
||||
)
|
||||
from gps_denied_onboard.components.c5_state._isam2_handle import (
|
||||
ISam2GraphHandle,
|
||||
ISam2GraphHandleImpl,
|
||||
@@ -202,6 +208,20 @@ class GtsamIsam2StateEstimator(StateEstimator):
|
||||
# ``LOST`` on a fatal SPD failure or GTSAM exception.
|
||||
self._isam2_state: IsamState = IsamState.INIT
|
||||
|
||||
# AZ-388 state -----------------------------------------------------
|
||||
# AC-5.2 fallback watcher — engages when ``current_estimate``
|
||||
# cannot produce a fresh output for ``no_estimate_fallback_s``
|
||||
# (default 3.0 s). Composition root subscribes the C8 GCS
|
||||
# adapter via :meth:`subscribe_fallback_engaged` /
|
||||
# :meth:`subscribe_fallback_recovered` so the STATUSTEXT
|
||||
# mirror fires on the C8 outbound thread, not the C5 ingest
|
||||
# thread.
|
||||
self._fallback = FallbackWatcher(
|
||||
threshold_s=block.no_estimate_fallback_s,
|
||||
fdr_client=fdr_client,
|
||||
producer_id="c5_state",
|
||||
)
|
||||
|
||||
self._log.debug(
|
||||
"c5.state.isam2_initialised",
|
||||
extra={
|
||||
@@ -258,6 +278,40 @@ class GtsamIsam2StateEstimator(StateEstimator):
|
||||
"""
|
||||
self._source_label_machine = machine
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# AZ-388: AC-5.2 fallback public API.
|
||||
|
||||
def check_fallback_state(self, now_ns: int) -> bool:
|
||||
"""Idempotent AC-5.2 watchdog.
|
||||
|
||||
C8 outbound's 5 Hz tick handler calls this so the FC IMU-only
|
||||
switch fires even when ``current_estimate`` isn't being
|
||||
called (e.g. the C5 ingest thread is starved). Returns the
|
||||
current ``_in_fallback`` state; engages if the threshold has
|
||||
elapsed and was previously idle. Rate-limited — at most one
|
||||
engagement signal per episode.
|
||||
"""
|
||||
return self._fallback.check_and_engage(now_ns)
|
||||
|
||||
def subscribe_fallback_engaged(
|
||||
self, callback: FallbackEngagementCallback
|
||||
) -> FallbackSubscription:
|
||||
"""Register a callback fired exactly once per fallback engagement.
|
||||
|
||||
Composition root binds the C8 GCS adapter's STATUSTEXT here
|
||||
with the ``CRITICAL`` severity per AC-5; the callback fires
|
||||
on the same thread that won the engagement race (so the
|
||||
composition root MUST forward to the C8 outbound thread via
|
||||
a queue to honour Invariant 8).
|
||||
"""
|
||||
return self._fallback.subscribe_engaged(callback)
|
||||
|
||||
def subscribe_fallback_recovered(
|
||||
self, callback: FallbackRecoveryCallback
|
||||
) -> FallbackSubscription:
|
||||
"""Register a callback fired exactly once per fallback recovery."""
|
||||
return self._fallback.subscribe_recovered(callback)
|
||||
|
||||
def key_for_frame(self, frame_id: UUID | int) -> int:
|
||||
"""Return the GTSAM ``Key`` for ``frame_id``, assigning on first use.
|
||||
|
||||
@@ -553,6 +607,10 @@ class GtsamIsam2StateEstimator(StateEstimator):
|
||||
path).
|
||||
"""
|
||||
handle = self._require_handle()
|
||||
# AZ-388: AC-5.2 entry hook. Engages fallback if the
|
||||
# threshold has elapsed since the last successful estimate.
|
||||
# Idempotent / rate-limited.
|
||||
self._fallback.check_and_engage(time.monotonic_ns())
|
||||
if self._last_committed_pose_key is None:
|
||||
raise EstimatorFatalError(
|
||||
"current_estimate: no committed pose key yet (graph empty); "
|
||||
@@ -615,6 +673,10 @@ class GtsamIsam2StateEstimator(StateEstimator):
|
||||
# only the SPD failure above flips us to LOST.
|
||||
self._isam2_state = IsamState.DEGRADED
|
||||
|
||||
# AZ-388: AC-5.2 success hook. Resets the dwell timer and
|
||||
# fires the recovery signal if we were previously engaged.
|
||||
self._fallback.mark_successful_estimate(emitted_at)
|
||||
|
||||
return EstimatorOutput(
|
||||
frame_id=uuid4(),
|
||||
position_wgs84=position_wgs84,
|
||||
|
||||
Reference in New Issue
Block a user