//! AZ-651 — lost-link failsafe ladder. //! //! Two distinct link concerns are tracked: //! //! 1. **Operator modem link** (Ground-Station ↔ autopilot). This is the //! link the ladder watches. Its state climbs: //! `LinkOk` → `LinkDegraded` (5–30 s) → `LinkLost` (>30 s) → //! (optionally) `LinkLostInFollow` when target-follow is active, with //! a configurable 30 s grace before promotion to `LinkLost`. //! //! 2. **MAVLink link** (autopilot ↔ ArduPilot). This one is owned by //! `mavlink_layer`'s heartbeat watchdog. When *it* fires `LinkLost`, //! the airframe runs its OWN built-in failsafe — autopilot must NOT //! issue `MAV_CMD_NAV_RETURN_TO_LAUNCH` itself. The ladder records the //! state (`MavlinkLost`) and surfaces it to health, but never emits //! an RTL trigger while the MAVLink link is down. //! //! The ladder is **pure logic**: `tick(now, input)` is deterministic. //! Wiring (subscribe to MAVLink `LinkEvent`s, drive ticks on a 100 ms //! schedule, call `MavlinkHandle::send_command`, set the executor's //! failsafe flag) lives in [`LostLinkDriver::run`]. use std::sync::Arc; use std::time::Duration; use async_trait::async_trait; use tokio::sync::{broadcast, watch, Mutex}; use tokio::task::JoinHandle; use tokio::time::Instant; use mavlink_layer::{CommandLong, LinkEvent, MavlinkHandle, SendCommandError}; use shared::error::AutopilotError; use crate::FailsafeKind; use crate::MissionExecutorHandle; /// MAVLink `MAV_CMD_NAV_RETURN_TO_LAUNCH` command id. pub const MAV_CMD_NAV_RETURN_TO_LAUNCH: u16 = 20; /// Default operator-link thresholds and tick cadence per AZ-651 §Outcome. #[derive(Debug, Clone, Copy)] pub struct LostLinkConfig { /// Time-since-last-operator-heartbeat after which the ladder moves /// from `LinkOk` to `LinkDegraded`. Default 5 s. pub degraded_after: Duration, /// Time-since-last-operator-heartbeat after which the ladder moves /// from `LinkDegraded` to `LinkLost` (or `LinkLostInFollow` if /// target-follow is active). Default 30 s. pub lost_after: Duration, /// Additional grace before `LinkLostInFollow` is promoted to /// `LinkLost` (and RTL fires). Default 30 s — operators commonly /// have brief connectivity drops mid-follow. pub follow_grace: Duration, /// Driver tick cadence. Default 100 ms (well under the AZ-651 NFR /// budget of ≤5 ms per tick — the cadence is what we wait on; the /// tick itself runs in microseconds). pub tick_interval: Duration, } impl Default for LostLinkConfig { fn default() -> Self { Self { degraded_after: Duration::from_secs(5), lost_after: Duration::from_secs(30), follow_grace: Duration::from_secs(30), tick_interval: Duration::from_millis(100), } } } /// Where the ladder currently sits. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum LadderState { /// Operator-link heartbeats are arriving within `degraded_after`. LinkOk, /// Operator-link heartbeats have been absent for `degraded_after` /// but less than `lost_after`. Health yellow; no command issued. LinkDegraded, /// Operator-link absent past `lost_after`, target-follow inactive. /// On entry, the driver issues RTL exactly once and flips the /// executor's failsafe flag. LinkLost, /// Operator-link absent past `lost_after` AND target-follow is /// active. Stay here for `follow_grace`, then promote to `LinkLost`. LinkLostInFollow, /// The MAVLink link to ArduPilot is down. Airframe handles its own /// failsafe; autopilot NEVER issues RTL itself in this state. The /// ladder still tracks operator-link state internally — once /// MAVLink recovers, the operator-link ladder picks up where it /// left off. MavlinkLost, } /// Per-tick input to the ladder. Externalising every signal keeps the /// logic pure and deterministic; tests construct these directly. #[derive(Debug, Clone, Copy)] pub struct LadderInput { pub now: Instant, pub op_link_up: bool, pub mavlink_link_up: bool, pub target_follow_active: bool, } /// Per-tick output. `rtl_should_fire` is the actionable bit — when /// `true`, the caller must issue exactly one `MAV_CMD_NAV_RETURN_TO_LAUNCH` /// and flip the executor's failsafe flag. `previous_state` is exposed /// (rather than reconstructed) so consumers don't have to track it. #[derive(Debug, Clone, Copy)] pub struct LadderOutput { pub previous_state: LadderState, pub state: LadderState, pub state_changed: bool, pub rtl_should_fire: bool, } /// Broadcast event emitted on state transitions and RTL trigger. Lets /// `operator_bridge` / `telemetry_stream` surface failsafe state to the /// operator UI without polling. #[derive(Debug, Clone, Copy)] #[non_exhaustive] pub enum LadderEvent { StateChanged { from: LadderState, to: LadderState }, RtlIssued { rtl_count: u64 }, RtlSendFailed { rtl_count: u64 }, } /// Pure ladder logic. Stateful only across ticks; one `LostLinkLadder` /// per autopilot instance. #[derive(Debug)] pub struct LostLinkLadder { config: LostLinkConfig, state: LadderState, /// `Some(t)` while operator link has been down since `t`. op_link_down_since: Option, /// `Some(t)` while we have been in `LinkLostInFollow` since `t`. follow_lost_since: Option, /// Count of RTL triggers since construction. Exposed for health. rtl_count: u64, /// `Some(t)` when the operator link last transitioned down. Public /// via [`LostLinkLadder::time_in_state`]. state_entered_at: Option, } impl LostLinkLadder { pub fn new(config: LostLinkConfig) -> Self { Self { config, state: LadderState::LinkOk, op_link_down_since: None, follow_lost_since: None, rtl_count: 0, state_entered_at: None, } } pub fn state(&self) -> LadderState { self.state } pub fn rtl_count(&self) -> u64 { self.rtl_count } /// How long has the ladder been in its current state? `None` if the /// ladder has never advanced past its initial `LinkOk`. pub fn time_in_state(&self, now: Instant) -> Option { self.state_entered_at .map(|t| now.saturating_duration_since(t)) } /// Advance the ladder by one tick. Returns the actionable outcome. /// Caller is responsible for honouring `rtl_should_fire`. pub fn tick(&mut self, input: LadderInput) -> LadderOutput { let prev = self.state; // MAVLink down dominates — airframe handles its own failsafe. // Track operator-link state internally so when MAVLink recovers // we resume the right rung of the ladder, but never fire RTL. if !input.mavlink_link_up { self.advance_op_link_tracking(input); self.set_state(LadderState::MavlinkLost, input.now, prev); return LadderOutput { previous_state: prev, state: LadderState::MavlinkLost, state_changed: prev != LadderState::MavlinkLost, rtl_should_fire: false, }; } // MAVLink is up. Pure operator-link ladder. let new_state = self.compute_op_link_state(input); let entering_lost = new_state == LadderState::LinkLost && prev != LadderState::LinkLost; let rtl_should_fire = entering_lost; if rtl_should_fire { self.rtl_count += 1; } self.set_state(new_state, input.now, prev); LadderOutput { previous_state: prev, state: new_state, state_changed: prev != new_state, rtl_should_fire, } } /// Update `op_link_down_since` / `follow_lost_since` from the /// current input WITHOUT promoting the visible state. Used while /// the ladder is masked by `MavlinkLost`. fn advance_op_link_tracking(&mut self, input: LadderInput) { if input.op_link_up { self.op_link_down_since = None; self.follow_lost_since = None; } else if self.op_link_down_since.is_none() { self.op_link_down_since = Some(input.now); } } fn compute_op_link_state(&mut self, input: LadderInput) -> LadderState { if input.op_link_up { self.op_link_down_since = None; self.follow_lost_since = None; return LadderState::LinkOk; } let down_since = *self.op_link_down_since.get_or_insert(input.now); let elapsed = input.now.saturating_duration_since(down_since); if elapsed < self.config.degraded_after { // Still within the initial OK window. Keep `down_since` // sticky so a short blip doesn't reset the clock. LadderState::LinkOk } else if elapsed < self.config.lost_after { self.follow_lost_since = None; LadderState::LinkDegraded } else if input.target_follow_active { let follow_since = *self.follow_lost_since.get_or_insert(input.now); if input.now.saturating_duration_since(follow_since) < self.config.follow_grace { LadderState::LinkLostInFollow } else { LadderState::LinkLost } } else { self.follow_lost_since = None; LadderState::LinkLost } } fn set_state(&mut self, new_state: LadderState, now: Instant, prev: LadderState) { if prev != new_state { self.state_entered_at = Some(now); } self.state = new_state; } } // ============================================================================ // Driver — owns the ladder and wires it to MAVLink + the executor. // ============================================================================ /// Pluggable command issuer. Production wires this to /// [`MavlinkCommandIssuer`] which calls /// `MavlinkHandle::send_command(MAV_CMD_NAV_RETURN_TO_LAUNCH)`. Tests /// supply a spy implementation so RTL invocations can be counted /// without spinning up a real MAVLink loopback. /// /// The trait deliberately stays narrow (`issue_rtl` only) — adding more /// commands here would couple every failsafe to one trait, and /// AZ-652 / AZ-650 each own their own command surface. #[async_trait] pub trait LostLinkCommandIssuer: Send + Sync { async fn issue_rtl(&self) -> Result<(), AutopilotError>; } /// Production `LostLinkCommandIssuer` backed by `mavlink_layer`. #[derive(Debug, Clone)] pub struct MavlinkCommandIssuer { pub handle: MavlinkHandle, pub target_system: u8, pub target_component: u8, /// Optional override for the `send_command` deadline (default uses /// `MavlinkLayerOptions::command_ack_deadline`). pub ack_deadline: Option, } impl MavlinkCommandIssuer { pub fn new(handle: MavlinkHandle, target_system: u8, target_component: u8) -> Self { Self { handle, target_system, target_component, ack_deadline: None, } } } #[async_trait] impl LostLinkCommandIssuer for MavlinkCommandIssuer { async fn issue_rtl(&self) -> Result<(), AutopilotError> { let cmd = CommandLong { param1: 0.0, param2: 0.0, param3: 0.0, param4: 0.0, param5: 0.0, param6: 0.0, param7: 0.0, command: MAV_CMD_NAV_RETURN_TO_LAUNCH, target_system: self.target_system, target_component: self.target_component, confirmation: 0, }; self.handle .send_command(cmd, self.ack_deadline) .await .map(|_ack| ()) .map_err(|e| match e { SendCommandError::Timeout(d) => { AutopilotError::Internal(format!("RTL command ack timeout after {d:?}")) } SendCommandError::Duplicate(id) => { AutopilotError::Internal(format!("RTL command duplicate in flight (id={id})")) } SendCommandError::ChannelClosed(reason) => { AutopilotError::Internal(format!("RTL command channel closed: {reason}")) } }) } } /// Public read-side handle for the lost-link ladder. Cloneable; safe /// to share across `operator_bridge` / `telemetry_stream` / health. #[derive(Debug, Clone)] pub struct LostLinkLadderHandle { inner: Arc>, events_tx: broadcast::Sender, } impl LostLinkLadderHandle { pub async fn state(&self) -> LadderState { self.inner.lock().await.state() } pub async fn rtl_count(&self) -> u64 { self.inner.lock().await.rtl_count() } pub fn subscribe(&self) -> broadcast::Receiver { self.events_tx.subscribe() } } /// Driver — owns the ladder and ticks it from external signals. /// /// Construct with [`LostLinkDriver::new`] then call /// [`LostLinkDriver::spawn`]. The returned [`LostLinkLadderHandle`] is /// read-only; events can be subscribed to via the handle. pub struct LostLinkDriver { config: LostLinkConfig, command_issuer: Arc, executor: MissionExecutorHandle, /// Operator-link state — `true` means heartbeats arriving. Updated /// externally by `operator_bridge` / `telemetry_stream` wiring. op_link_rx: watch::Receiver, /// Most-recent MAVLink link event. Used to flip the /// `mavlink_link_up` flag fed into the ladder. mavlink_events_rx: broadcast::Receiver, /// Optional override of "now" — for tests. Production passes /// `None`, which makes the driver use `tokio::time::Instant::now`. now_source: Option Instant + Send + Sync>>, /// Optional target-follow signal. `None` means follow-grace is /// never engaged (the case for current autopilot — AZ-684 will /// wire `scan_controller`'s target-follow state in later). target_follow_rx: Option>, /// Initial assumption for MAVLink link state. Production hands in /// `false` (link is initially down until the first inbound /// heartbeat arrives); the driver flips this to `true` on /// `LinkEvent::LinkUp`. initial_mavlink_up: bool, } impl LostLinkDriver { pub fn new( config: LostLinkConfig, command_issuer: Arc, executor: MissionExecutorHandle, op_link_rx: watch::Receiver, mavlink_events_rx: broadcast::Receiver, ) -> Self { Self { config, command_issuer, executor, op_link_rx, mavlink_events_rx, now_source: None, target_follow_rx: None, initial_mavlink_up: false, } } /// Provide a target-follow watch channel. When the watched value /// is `true`, the ladder engages the `LinkLostInFollow` grace. pub fn with_target_follow(mut self, rx: watch::Receiver) -> Self { self.target_follow_rx = Some(rx); self } /// Treat the MAVLink link as up from the start (skip waiting for /// the first `LinkUp` event). Useful in tests where the MAVLink /// peer is presumed healthy. pub fn with_initial_mavlink_up(mut self, up: bool) -> Self { self.initial_mavlink_up = up; self } /// Override the clock — only used in tests. Production omits this. pub fn with_now_source(mut self, f: Arc Instant + Send + Sync>) -> Self { self.now_source = Some(f); self } /// Spawn the driver task. Returns a read-side handle plus the /// background task's join handle. pub fn spawn( self, mut shutdown: watch::Receiver, ) -> (LostLinkLadderHandle, JoinHandle<()>) { let (events_tx, _events_rx) = broadcast::channel::(64); let ladder = Arc::new(Mutex::new(LostLinkLadder::new(self.config))); let handle = LostLinkLadderHandle { inner: ladder.clone(), events_tx: events_tx.clone(), }; let LostLinkDriver { config, command_issuer, executor, mut op_link_rx, mut mavlink_events_rx, now_source, target_follow_rx, initial_mavlink_up, } = self; let mut tf_rx = target_follow_rx; let mut mavlink_link_up = initial_mavlink_up; let join = tokio::spawn(async move { let mut ticker = tokio::time::interval(config.tick_interval); ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); loop { tokio::select! { biased; _ = shutdown.changed() => { tracing::info!("lost_link driver shutdown"); return; } Ok(ev) = mavlink_events_rx.recv() => { match ev { LinkEvent::LinkUp => mavlink_link_up = true, LinkEvent::LinkLost => mavlink_link_up = false, } } _ = ticker.tick() => { let now = match &now_source { Some(f) => (f)(), None => Instant::now(), }; let op_link_up = *op_link_rx.borrow_and_update(); let target_follow_active = tf_rx .as_mut() .map(|rx| *rx.borrow_and_update()) .unwrap_or(false); let output = { let mut guard = ladder.lock().await; guard.tick(LadderInput { now, op_link_up, mavlink_link_up, target_follow_active, }) }; if output.state_changed { let _ = events_tx.send(LadderEvent::StateChanged { from: output.previous_state, to: output.state, }); } if output.rtl_should_fire { let rtl_count_for_log = { let g = ladder.lock().await; g.rtl_count() }; tracing::warn!( rtl_count = rtl_count_for_log, "lost_link: operator link lost; issuing RTL" ); match command_issuer.issue_rtl().await { Ok(()) => { let count = ladder.lock().await.rtl_count(); let _ = events_tx .send(LadderEvent::RtlIssued { rtl_count: count }); } Err(e) => { let count = ladder.lock().await.rtl_count(); tracing::error!(error=%e, "lost_link RTL command failed"); let _ = events_tx .send(LadderEvent::RtlSendFailed { rtl_count: count }); } } if let Err(e) = executor.failsafe_trigger(FailsafeKind::LinkLost).await { tracing::error!(error=%e, "lost_link: executor failsafe_trigger failed"); } } } } } }); (handle, join) } } #[cfg(test)] mod tests { use super::*; fn make_config() -> LostLinkConfig { LostLinkConfig { degraded_after: Duration::from_millis(50), lost_after: Duration::from_millis(150), follow_grace: Duration::from_millis(100), tick_interval: Duration::from_millis(10), } } #[test] fn empty_state_starts_link_ok() { // Arrange let l = LostLinkLadder::new(make_config()); // Assert assert_eq!(l.state(), LadderState::LinkOk); assert_eq!(l.rtl_count(), 0); } #[test] fn mavlink_lost_short_circuits_rtl() { // Arrange — op-link is down for plenty long enough to trigger RTL let mut l = LostLinkLadder::new(make_config()); let t0 = Instant::now(); // Act — but MAVLink is down too. Should never fire RTL. for ms in (0..500).step_by(10) { let out = l.tick(LadderInput { now: t0 + Duration::from_millis(ms), op_link_up: false, mavlink_link_up: false, target_follow_active: false, }); assert!(!out.rtl_should_fire, "rtl fired at t={ms}"); } // Assert assert_eq!(l.state(), LadderState::MavlinkLost); assert_eq!(l.rtl_count(), 0); } }