mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 14:41:15 +00:00
[AZ-388] C5 AC-5.2 no-estimate fallback detector + signal emission
Implements Invariant 9 / AC-5.2: when current_estimate cannot return a fresh output for >= state.no_estimate_fallback_s (default 3.0 s), emit ONE engagement signal (FDR kind=c5.state.no_estimate_fallback_engaged + GCS STATUSTEXT severity CRITICAL); on recovery, ONE recovery signal (FDR kind=c5.state.no_estimate_fallback_recovered + STATUSTEXT NOTICE). Rate-limited via single _in_fallback latch (AC-2: 30 s sustained no-estimate still emits exactly one engagement). New FallbackWatcher class owns the state machine; estimator wires it through constructor + current_estimate entry/success hooks. Public check_fallback_state(now_ns) watchdog (NFR p99 <= 5 us) + subscribe APIs let C8 outbound react without coupling C5 to a concrete GCS adapter at construction. Severity enum extended with CRITICAL=2 and NOTICE=5 to match MAVLink MAV_SEVERITY. 18 new unit tests across all 8 ACs, deterministic synthetic clock, integration tests patch monotonic_ns through GtsamIsam2StateEstimator to drive AC-7 iSAM2 leg (ESKF leg deferred to AZ-386). Full suite: 607 passed, 2 skipped. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,394 @@
|
||||
"""AZ-388 — AC-5.2 fallback watcher + GtsamIsam2StateEstimator hookup.
|
||||
|
||||
Eight ACs from ``_docs/02_tasks/done/AZ-388_c5_ac52_fallback.md``:
|
||||
|
||||
- AC-1 Engagement after ``threshold_s`` of no successful estimate.
|
||||
- AC-2 Engagement is one-shot (rate-limited across the episode).
|
||||
- AC-3 Recovery signal fires once after a successful estimate.
|
||||
- AC-4 ``check_fallback_state`` watchdog engages from an external
|
||||
caller even without ``current_estimate`` being invoked.
|
||||
- AC-5 Engagement callback carries :data:`Severity.CRITICAL`;
|
||||
recovery callback carries :data:`Severity.NOTICE`.
|
||||
- AC-6 Configurable threshold (``no_estimate_fallback_s = 5.0``
|
||||
engages at 5 s, not 3 s).
|
||||
- AC-7 iSAM2 estimator participates — entry hook engages,
|
||||
success hook recovers.
|
||||
- AC-8 FDR record shapes — engagement carries
|
||||
``{reason, elapsed_s, threshold_s}``; recovery carries
|
||||
``{recovered_after_s}``.
|
||||
|
||||
The ``EskfStateEstimator`` half of AC-7 will be exercised once
|
||||
AZ-386 lands; the watcher is shared between both estimators so the
|
||||
AZ-386 wire-up cost is one constructor line + two hook calls.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest import mock
|
||||
|
||||
import gtsam
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard._types.fc import Severity
|
||||
from gps_denied_onboard.components.c5_state._fallback_watcher import FallbackWatcher
|
||||
from gps_denied_onboard.components.c5_state.config import C5StateConfig
|
||||
from gps_denied_onboard.components.c5_state.gtsam_isam2_estimator import (
|
||||
GtsamIsam2StateEstimator,
|
||||
create,
|
||||
)
|
||||
from gps_denied_onboard.runtime_root.state_factory import clear_state_registry
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _registry_isolation():
|
||||
# Arrange
|
||||
clear_state_registry()
|
||||
yield
|
||||
clear_state_registry()
|
||||
|
||||
|
||||
class _Clock:
|
||||
"""Synthetic ``monotonic_ns()`` source for deterministic timelines."""
|
||||
|
||||
def __init__(self, t_ns: int = 0) -> None:
|
||||
self.t_ns = t_ns
|
||||
|
||||
def __call__(self) -> int:
|
||||
return self.t_ns
|
||||
|
||||
|
||||
def _make_watcher(
|
||||
*, threshold_s: float = 3.0, fdr_client: mock.MagicMock | None = None
|
||||
) -> tuple[FallbackWatcher, _Clock, mock.MagicMock]:
|
||||
clock = _Clock(0)
|
||||
fdr = fdr_client if fdr_client is not None else mock.MagicMock()
|
||||
watcher = FallbackWatcher(
|
||||
threshold_s=threshold_s,
|
||||
fdr_client=fdr,
|
||||
producer_id="c5_state",
|
||||
clock_ns=clock,
|
||||
)
|
||||
return watcher, clock, fdr
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-1: engagement after threshold elapses
|
||||
|
||||
|
||||
def test_ac1_engagement_after_threshold_elapses() -> None:
|
||||
watcher, clock, _fdr = _make_watcher(threshold_s=3.0)
|
||||
engaged_seen: list[tuple[float, Severity]] = []
|
||||
watcher.subscribe_engaged(lambda elapsed, sev: engaged_seen.append((elapsed, sev)))
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
in_fb = watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
assert in_fb is True
|
||||
assert len(engaged_seen) == 1
|
||||
elapsed_s, sev = engaged_seen[0]
|
||||
assert elapsed_s == pytest.approx(3.5, abs=1e-3)
|
||||
assert sev == Severity.CRITICAL
|
||||
|
||||
|
||||
def test_ac1_engagement_does_not_fire_before_threshold() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
engaged_seen: list[tuple[float, Severity]] = []
|
||||
watcher.subscribe_engaged(lambda elapsed, sev: engaged_seen.append((elapsed, sev)))
|
||||
|
||||
clock.t_ns = int(2.99 * 1e9)
|
||||
in_fb = watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
assert in_fb is False
|
||||
assert engaged_seen == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-2: engagement is one-shot (rate-limited)
|
||||
|
||||
|
||||
def test_ac2_sustained_no_estimate_emits_one_engagement() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
engaged_seen: list[float] = []
|
||||
watcher.subscribe_engaged(lambda elapsed, _sev: engaged_seen.append(elapsed))
|
||||
|
||||
for seconds in (3.5, 10.0, 20.0, 30.0):
|
||||
clock.t_ns = int(seconds * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
assert len(engaged_seen) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-3: recovery signal after engagement
|
||||
|
||||
|
||||
def test_ac3_recovery_after_engagement_fires_once() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
recovered_seen: list[tuple[float, Severity]] = []
|
||||
watcher.subscribe_recovered(lambda elapsed, sev: recovered_seen.append((elapsed, sev)))
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
clock.t_ns = int(7.5 * 1e9)
|
||||
watcher.mark_successful_estimate(clock.t_ns)
|
||||
|
||||
assert len(recovered_seen) == 1
|
||||
elapsed_s, sev = recovered_seen[0]
|
||||
assert elapsed_s == pytest.approx(4.0, abs=1e-3)
|
||||
assert sev == Severity.NOTICE
|
||||
|
||||
|
||||
def test_ac3_recovery_does_not_fire_without_engagement() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
recovered_seen: list[float] = []
|
||||
watcher.subscribe_recovered(lambda elapsed, _sev: recovered_seen.append(elapsed))
|
||||
|
||||
clock.t_ns = int(1.0 * 1e9)
|
||||
watcher.mark_successful_estimate(clock.t_ns)
|
||||
|
||||
assert recovered_seen == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-4: external watchdog engages without current_estimate calls
|
||||
|
||||
|
||||
def test_ac4_watchdog_engages_without_mark_calls() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
in_fb = watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
assert in_fb is True
|
||||
assert watcher.in_fallback is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-5: severity hints carried in callbacks
|
||||
|
||||
|
||||
def test_ac5_engagement_severity_is_critical() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
captured: list[Severity] = []
|
||||
watcher.subscribe_engaged(lambda _e, sev: captured.append(sev))
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
assert captured == [Severity.CRITICAL]
|
||||
|
||||
|
||||
def test_ac5_recovery_severity_is_notice() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
captured: list[Severity] = []
|
||||
watcher.subscribe_recovered(lambda _e, sev: captured.append(sev))
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
clock.t_ns = int(7.0 * 1e9)
|
||||
watcher.mark_successful_estimate(clock.t_ns)
|
||||
|
||||
assert captured == [Severity.NOTICE]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-6: configurable threshold
|
||||
|
||||
|
||||
def test_ac6_custom_threshold_5s_engages_at_5s() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=5.0)
|
||||
engaged_seen: list[float] = []
|
||||
watcher.subscribe_engaged(lambda elapsed, _sev: engaged_seen.append(elapsed))
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
assert engaged_seen == []
|
||||
|
||||
clock.t_ns = int(5.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
assert len(engaged_seen) == 1
|
||||
assert engaged_seen[0] == pytest.approx(5.5, abs=1e-3)
|
||||
|
||||
|
||||
def test_ac6_zero_threshold_rejected() -> None:
|
||||
with pytest.raises(ValueError, match="threshold_s must be > 0"):
|
||||
FallbackWatcher(threshold_s=0.0, fdr_client=None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-8: FDR record payload shapes
|
||||
|
||||
|
||||
def test_ac8_engagement_fdr_record_shape() -> None:
|
||||
fdr = mock.MagicMock()
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0, fdr_client=fdr)
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
fdr.enqueue.assert_called_once()
|
||||
record = fdr.enqueue.call_args.args[0]
|
||||
assert record.kind == "c5.state.no_estimate_fallback_engaged"
|
||||
assert record.producer_id == "c5_state"
|
||||
assert record.payload["reason"] == "no_successful_estimate_for_s"
|
||||
assert record.payload["elapsed_s"] == pytest.approx(3.5, abs=1e-3)
|
||||
assert record.payload["threshold_s"] == pytest.approx(3.0, abs=1e-3)
|
||||
|
||||
|
||||
def test_ac8_recovery_fdr_record_shape() -> None:
|
||||
fdr = mock.MagicMock()
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0, fdr_client=fdr)
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
clock.t_ns = int(7.5 * 1e9)
|
||||
watcher.mark_successful_estimate(clock.t_ns)
|
||||
|
||||
assert fdr.enqueue.call_count == 2
|
||||
recovery_record = fdr.enqueue.call_args.args[0]
|
||||
assert recovery_record.kind == "c5.state.no_estimate_fallback_recovered"
|
||||
assert recovery_record.payload == {"recovered_after_s": pytest.approx(4.0, abs=1e-3)}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Subscription cancellation
|
||||
|
||||
|
||||
def test_subscription_cancel_silences_callback() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
seen: list[float] = []
|
||||
handle = watcher.subscribe_engaged(lambda elapsed, _sev: seen.append(elapsed))
|
||||
handle.cancel()
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
assert seen == []
|
||||
|
||||
|
||||
def test_callback_exception_does_not_break_watcher() -> None:
|
||||
watcher, clock, _ = _make_watcher(threshold_s=3.0)
|
||||
good_seen: list[float] = []
|
||||
|
||||
def boom(elapsed: float, _sev: Severity) -> None:
|
||||
raise RuntimeError("synthetic")
|
||||
|
||||
watcher.subscribe_engaged(boom)
|
||||
watcher.subscribe_engaged(lambda elapsed, _sev: good_seen.append(elapsed))
|
||||
|
||||
clock.t_ns = int(3.5 * 1e9)
|
||||
watcher.check_and_engage(clock.t_ns)
|
||||
|
||||
assert len(good_seen) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Idempotence: no FDR records when fdr_client is None
|
||||
|
||||
|
||||
def test_watcher_without_fdr_client_does_not_crash() -> None:
|
||||
watcher = FallbackWatcher(threshold_s=3.0, fdr_client=None, clock_ns=_Clock(0))
|
||||
seen: list[float] = []
|
||||
watcher.subscribe_engaged(lambda elapsed, _sev: seen.append(elapsed))
|
||||
|
||||
watcher.check_and_engage(int(3.5 * 1e9))
|
||||
|
||||
assert seen == [pytest.approx(3.5, abs=1e-3)]
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# AC-7 — iSAM2 estimator participates
|
||||
|
||||
|
||||
def _build_estimator() -> GtsamIsam2StateEstimator:
|
||||
block = C5StateConfig(
|
||||
strategy="gtsam_isam2", keyframe_window_size=15, no_estimate_fallback_s=3.0
|
||||
)
|
||||
cfg = mock.MagicMock()
|
||||
cfg.components = {"c5_state": block}
|
||||
fdr = mock.MagicMock()
|
||||
estimator, _ = create(
|
||||
config=cfg,
|
||||
imu_preintegrator=mock.MagicMock(),
|
||||
se3_utils=mock.MagicMock(),
|
||||
wgs_converter=mock.MagicMock(),
|
||||
fdr_client=fdr,
|
||||
)
|
||||
return estimator
|
||||
|
||||
|
||||
def _seed_prior(estimator: GtsamIsam2StateEstimator) -> int:
|
||||
import gtsam_unstable
|
||||
|
||||
pose = gtsam.Pose3()
|
||||
key = gtsam.symbol("x", estimator._next_key_counter)
|
||||
estimator._next_key_counter += 1
|
||||
noise = gtsam.noiseModel.Isotropic.Sigma(6, 0.1)
|
||||
graph = gtsam.NonlinearFactorGraph()
|
||||
graph.add(gtsam.PriorFactorPose3(key, pose, noise))
|
||||
values = gtsam.Values()
|
||||
values.insert(key, pose)
|
||||
ts_map = gtsam_unstable.FixedLagSmootherKeyTimestampMap()
|
||||
ts_map.insert((key, 0.0))
|
||||
estimator._isam2_handle.update(graph, values, timestamps=ts_map)
|
||||
estimator._record_committed_pose_key(key)
|
||||
return key
|
||||
|
||||
|
||||
def test_ac7_isam2_check_fallback_state_engages_via_public_api() -> None:
|
||||
estimator = _build_estimator()
|
||||
engaged_seen: list[tuple[float, Severity]] = []
|
||||
estimator.subscribe_fallback_engaged(lambda elapsed, sev: engaged_seen.append((elapsed, sev)))
|
||||
|
||||
# Synthesise a 4 s-old "last successful estimate" by reaching
|
||||
# into the watcher state — equivalent to a real timeline where
|
||||
# no successful estimate occurred for 4 s.
|
||||
estimator._fallback._last_successful_estimate_ns = 0
|
||||
in_fb = estimator.check_fallback_state(int(4.0 * 1e9))
|
||||
|
||||
assert in_fb is True
|
||||
assert len(engaged_seen) == 1
|
||||
|
||||
|
||||
def test_ac7_isam2_successful_current_estimate_clears_fallback() -> None:
|
||||
estimator = _build_estimator()
|
||||
recovered_seen: list[float] = []
|
||||
estimator.subscribe_fallback_recovered(lambda elapsed, _sev: recovered_seen.append(elapsed))
|
||||
_seed_prior(estimator)
|
||||
|
||||
# Engage first via the synthesised timeline.
|
||||
estimator._fallback._last_successful_estimate_ns = 0
|
||||
estimator.check_fallback_state(int(4.0 * 1e9))
|
||||
assert estimator._fallback.in_fallback is True
|
||||
|
||||
# Now a successful current_estimate should fire the recovery.
|
||||
estimator.current_estimate()
|
||||
|
||||
assert estimator._fallback.in_fallback is False
|
||||
assert len(recovered_seen) == 1
|
||||
|
||||
|
||||
def test_ac7_isam2_current_estimate_entry_engages_after_threshold() -> None:
|
||||
estimator = _build_estimator()
|
||||
engaged_seen: list[float] = []
|
||||
estimator.subscribe_fallback_engaged(lambda elapsed, _sev: engaged_seen.append(elapsed))
|
||||
|
||||
# Synthesise a stale watcher (no successful estimate for > threshold)
|
||||
# and call current_estimate WITHOUT a seeded prior so it raises
|
||||
# EstimatorFatalError after the entry hook engages fallback.
|
||||
estimator._fallback._last_successful_estimate_ns = 0
|
||||
# Patch monotonic_ns inside the estimator module so the entry
|
||||
# hook sees the synthesised "now".
|
||||
from gps_denied_onboard.components.c5_state.errors import EstimatorFatalError
|
||||
|
||||
with (
|
||||
mock.patch(
|
||||
"gps_denied_onboard.components.c5_state.gtsam_isam2_estimator.time.monotonic_ns",
|
||||
return_value=int(4.0 * 1e9),
|
||||
),
|
||||
pytest.raises(EstimatorFatalError),
|
||||
):
|
||||
estimator.current_estimate()
|
||||
|
||||
assert len(engaged_seen) == 1
|
||||
Reference in New Issue
Block a user