Files
autopilot/crates/mission_executor/tests/lost_link_ladder.rs
T
Oleksandr Bezdieniezhnykh 2bcd4a8059 [AZ-651] [AZ-668] lost-link failsafe ladder + mapobjects persistence (batch 7)
AZ-651 (mission_executor lost-link ladder):
- LostLinkLadder pure-logic state machine (LinkOk -> Degraded -> Lost
  -> LinkLostInFollow + MavlinkLost branch). Configurable thresholds
  via LostLinkConfig.
- LostLinkCommandIssuer trait + MavlinkCommandIssuer production impl
  emitting MAV_CMD_NAV_RETURN_TO_LAUNCH via MavlinkHandle::send_command.
- LostLinkDriver task wires the ladder to operator-link watch, MAVLink
  LinkEvent broadcast, and optional target-follow signal. On RTL,
  driver calls the issuer THEN MissionExecutorHandle::failsafe_trigger.
- failsafe_trigger(LinkLost | LinkLostInFollow) short-circuits FlyMission
  -> Land via direct FSM state mutation + TransitionEvent emission;
  Paused state is intentionally NOT overridden.
- Tests: 4/4 ACs locally green (degraded-no-rtl; lost-fires-once;
  follow-grace; mavlink-loss-no-rtl) plus driver + FSM integration.

AZ-668 (mapobjects_store persistence):
- Snapshot serializable shape + Store::{to_snapshot,from_snapshot}
  round trip.
- MapObjectsPersistence async trait + JsonSnapshotEngine default impl
  (write to .tmp, sync_all, atomic rename, best-effort parent fsync).
- PersistenceError::{Corrupt, SchemaMismatch} surfaces explicit errors
  on bad blob; PersistenceMetrics tracks last_snapshot_ts,
  snapshot_size_bytes, snapshot_errors_total.
- MapObjectsStore::from_snapshot factory for crash recovery from the
  composition root.
- Tests: 4/4 ACs locally green (round-trip; atomic rename ignores
  partial .tmp; crash recovery preserves pending; corruption returns
  explicit error) plus schema-mismatch + metrics smoke checks.

Quality gates:
- cargo fmt: clean.
- cargo clippy -p mission_executor -p mapobjects_store --tests: 0 warns.
- cargo test --workspace: all green.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-19 18:59:28 +03:00

474 lines
16 KiB
Rust

//! AZ-651 acceptance criteria — lost-link failsafe ladder.
//!
//! AC-1, AC-3, AC-4 are exercised purely against the public
//! `LostLinkLadder` API (deterministic ticks driven by an explicit
//! `Instant`).
//!
//! AC-2 has two halves:
//! - **Pure ladder**: RTL fires exactly once when `LinkOk → LinkLost`
//! happens; subsequent ticks in `LinkLost` do not re-fire. Tested
//! against the ladder directly.
//! - **Integration**: the executor's FSM transitions from
//! `FlyMission` to `Land` when `failsafe_trigger(LinkLost)` is
//! called. Tested via a real `MissionExecutor` and a spy
//! `LostLinkCommandIssuer`.
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant as StdInstant};
use async_trait::async_trait;
use mission_executor::{
DriverError, FailsafeKind, LadderInput, LadderState, LostLinkCommandIssuer, LostLinkConfig,
LostLinkDriver, LostLinkLadder, MissionDriver, MissionExecutor, MissionExecutorConfig,
MissionExecutorHandle, MissionState, Telemetry,
};
use shared::error::AutopilotError;
use shared::models::mission::MissionWaypoint;
use tokio::sync::{broadcast, watch};
use tokio::time::Instant;
// =============================================================================
// Pure ladder tests (AC-1, AC-2 fire-once half, AC-3, AC-4, MAVLink recovery)
// =============================================================================
/// Compact config so the tests don't have to wait real wall-clock time.
/// degraded_after = 50 ms, lost_after = 150 ms, follow_grace = 100 ms.
fn fast_config() -> LostLinkConfig {
LostLinkConfig {
degraded_after: Duration::from_millis(50),
lost_after: Duration::from_millis(150),
follow_grace: Duration::from_millis(100),
tick_interval: Duration::from_millis(10),
}
}
/// AC-1 — operator-link degraded then recovers; no RTL.
#[test]
fn ac1_degraded_then_recovers_no_rtl() {
// Arrange
let mut l = LostLinkLadder::new(fast_config());
let t0 = Instant::now();
let out = l.tick(LadderInput {
now: t0,
op_link_up: true,
mavlink_link_up: true,
target_follow_active: false,
});
assert_eq!(out.state, LadderState::LinkOk);
// Act — op-link drops; tick at +70 ms (past degraded_after=50 ms)
l.tick(LadderInput {
now: t0 + Duration::from_millis(10),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: false,
});
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(70),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: false,
});
assert_eq!(out.state, LadderState::LinkDegraded);
assert!(!out.rtl_should_fire);
// Act — op-link recovers before lost_after fires
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(100),
op_link_up: true,
mavlink_link_up: true,
target_follow_active: false,
});
// Assert
assert_eq!(out.state, LadderState::LinkOk);
assert!(out.state_changed);
assert!(!out.rtl_should_fire);
assert_eq!(l.rtl_count(), 0);
}
/// AC-2 (ladder half) — operator-link lost triggers RTL exactly once.
#[test]
fn ac2_operator_link_lost_triggers_rtl_exactly_once() {
// Arrange
let mut l = LostLinkLadder::new(fast_config());
let t0 = Instant::now();
l.tick(LadderInput {
now: t0,
op_link_up: true,
mavlink_link_up: true,
target_follow_active: false,
});
// Act — op-link drops at +10 ms; tick at +170 ms so the down
// duration (160 ms) exceeds lost_after (150 ms).
l.tick(LadderInput {
now: t0 + Duration::from_millis(10),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: false,
});
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(170),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: false,
});
// Assert — entered LinkLost; RTL fires
assert_eq!(out.state, LadderState::LinkLost);
assert!(out.state_changed);
assert!(out.rtl_should_fire);
assert_eq!(l.rtl_count(), 1);
// Act — keep ticking while still in LinkLost; RTL must NOT re-fire
for ms in [180, 200, 300, 500, 1000] {
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(ms),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: false,
});
assert_eq!(out.state, LadderState::LinkLost);
assert!(!out.rtl_should_fire, "rtl re-fired at +{ms} ms");
}
assert_eq!(l.rtl_count(), 1);
}
/// AC-3 — `LinkLostInFollow` grace then RTL.
#[test]
fn ac3_lost_in_follow_grace_then_rtl() {
// Arrange — degraded=50, lost=150, follow_grace=100 → RTL fires at +250 ms total
let mut l = LostLinkLadder::new(fast_config());
let t0 = Instant::now();
l.tick(LadderInput {
now: t0,
op_link_up: true,
mavlink_link_up: true,
target_follow_active: true,
});
// Act — drop op-link at +10 ms; at +170 ms we'd be LinkLost without
// target-follow, but the follow grace engages instead.
l.tick(LadderInput {
now: t0 + Duration::from_millis(10),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: true,
});
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(170),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: true,
});
// Assert — engaged the follow grace
assert_eq!(out.state, LadderState::LinkLostInFollow);
assert!(!out.rtl_should_fire);
assert_eq!(l.rtl_count(), 0);
// Act — still inside grace
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(230),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: true,
});
assert_eq!(out.state, LadderState::LinkLostInFollow);
assert!(!out.rtl_should_fire);
assert_eq!(l.rtl_count(), 0);
// Act — grace expires (grace started at +170 ms; +100 ms = +270 ms)
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(280),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: true,
});
// Assert — promoted to LinkLost; RTL fires once now
assert_eq!(out.state, LadderState::LinkLost);
assert!(out.state_changed);
assert!(out.rtl_should_fire);
assert_eq!(l.rtl_count(), 1);
}
/// AC-4 — MAVLink loss does NOT trigger autopilot-side RTL.
#[test]
fn ac4_mavlink_loss_does_not_trigger_autopilot_rtl() {
// Arrange
let mut l = LostLinkLadder::new(fast_config());
let t0 = Instant::now();
// Act — op-link down AND mavlink down for far longer than lost_after
let mut last_state = LadderState::LinkOk;
for ms in (0..1000).step_by(10) {
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(ms),
op_link_up: false,
mavlink_link_up: false,
target_follow_active: false,
});
// Assert — never fire while mavlink is down
assert!(!out.rtl_should_fire, "rtl fired at +{ms} ms with mavlink down");
last_state = out.state;
}
// Assert
assert_eq!(last_state, LadderState::MavlinkLost);
assert_eq!(l.rtl_count(), 0);
}
/// Supplementary — MAVLink recovers while op-link is still down past
/// lost_after; the ladder resumes the op-link rung and fires RTL once.
#[test]
fn mavlink_recovery_resumes_operator_ladder() {
// Arrange
let mut l = LostLinkLadder::new(fast_config());
let t0 = Instant::now();
l.tick(LadderInput {
now: t0,
op_link_up: true,
mavlink_link_up: true,
target_follow_active: false,
});
// Act — both links go down at +10 ms; run long enough to exceed lost_after
for ms in (10..300).step_by(10) {
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(ms),
op_link_up: false,
mavlink_link_up: false,
target_follow_active: false,
});
assert!(!out.rtl_should_fire);
assert_eq!(out.state, LadderState::MavlinkLost);
}
// Act — mavlink recovers; op-link still down. The internal
// op_link_down_since clock has been ticking since +10 ms, so
// elapsed = 300 ms > lost_after (150 ms) → LinkLost on next tick.
let out = l.tick(LadderInput {
now: t0 + Duration::from_millis(310),
op_link_up: false,
mavlink_link_up: true,
target_follow_active: false,
});
// Assert
assert_eq!(out.previous_state, LadderState::MavlinkLost);
assert_eq!(out.state, LadderState::LinkLost);
assert!(out.rtl_should_fire);
assert_eq!(l.rtl_count(), 1);
}
// =============================================================================
// Integration — driver issues RTL once + FSM transitions FlyMission → Land
// =============================================================================
/// Spy `LostLinkCommandIssuer` that counts RTL invocations.
#[derive(Debug, Default)]
struct SpyCommandIssuer {
rtl_count: AtomicU64,
}
#[async_trait]
impl LostLinkCommandIssuer for SpyCommandIssuer {
async fn issue_rtl(&self) -> Result<(), AutopilotError> {
self.rtl_count.fetch_add(1, Ordering::SeqCst);
Ok(())
}
}
impl SpyCommandIssuer {
fn count(&self) -> u64 {
self.rtl_count.load(Ordering::SeqCst)
}
}
/// Auto-completing `MissionDriver` — every action returns `Ok(())` so
/// the FSM can race through Disconnected → FlyMission once telemetry
/// guards open.
struct AutoDriver {
arm_calls: AtomicU32,
takeoff_calls: AtomicU32,
upload_calls: AtomicU32,
set_auto_calls: AtomicU32,
post_flight_calls: AtomicU32,
}
impl AutoDriver {
fn new() -> Arc<Self> {
Arc::new(Self {
arm_calls: AtomicU32::new(0),
takeoff_calls: AtomicU32::new(0),
upload_calls: AtomicU32::new(0),
set_auto_calls: AtomicU32::new(0),
post_flight_calls: AtomicU32::new(0),
})
}
}
#[async_trait]
impl MissionDriver for AutoDriver {
async fn arm(&self) -> Result<(), DriverError> {
self.arm_calls.fetch_add(1, Ordering::SeqCst);
Ok(())
}
async fn takeoff(&self, _altitude_m: f32) -> Result<(), DriverError> {
self.takeoff_calls.fetch_add(1, Ordering::SeqCst);
Ok(())
}
async fn upload_mission(&self, _items: &[MissionWaypoint]) -> Result<(), DriverError> {
self.upload_calls.fetch_add(1, Ordering::SeqCst);
Ok(())
}
async fn set_auto_mode(&self) -> Result<(), DriverError> {
self.set_auto_calls.fetch_add(1, Ordering::SeqCst);
Ok(())
}
async fn post_flight_sync(&self) -> Result<(), DriverError> {
self.post_flight_calls.fetch_add(1, Ordering::SeqCst);
Ok(())
}
}
/// Drive the executor through telemetry until it reaches `FlyMission`.
/// Uses real time with a short tick interval so the test finishes in
/// well under a second.
async fn drive_to_fly_mission(
handle: &MissionExecutorHandle,
tel_tx: &watch::Sender<Telemetry>,
) {
// mission_reached_final stays false so the FSM idles in FlyMission.
let t = Telemetry {
link_up: true,
health_ok: true,
bit_ok: true,
armed: true,
takeoff_complete: true,
flight_mode_auto: true,
..Telemetry::default()
};
tel_tx.send(t).unwrap();
let deadline = StdInstant::now() + Duration::from_secs(2);
loop {
if matches!(handle.state().await, MissionState::FlyMission) {
return;
}
if StdInstant::now() >= deadline {
panic!(
"FSM never reached FlyMission within 2 s (current state: {:?})",
handle.state().await
);
}
tokio::time::sleep(Duration::from_millis(5)).await;
}
}
fn fast_executor_config() -> MissionExecutorConfig {
let mut cfg = MissionExecutorConfig::multirotor(10.0);
// 2 ms tick — keeps the test fast (~14 ms for 7 transitions).
cfg.tick_interval = Duration::from_millis(2);
cfg
}
/// AC-2 (integration half) — `failsafe_trigger(LinkLost)` while the
/// FSM is in `FlyMission` transitions it to `Land`.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn ac2_integration_failsafe_trigger_transitions_fly_to_land() {
// Arrange
let exec = MissionExecutor::new(fast_executor_config());
let (tel_tx, tel_rx) = watch::channel(Telemetry::default());
let (handle, fsm_join) = exec.run(AutoDriver::new(), vec![], tel_rx);
drive_to_fly_mission(&handle, &tel_tx).await;
assert_eq!(handle.state().await, MissionState::FlyMission);
// Act
handle
.failsafe_trigger(FailsafeKind::LinkLost)
.await
.expect("failsafe_trigger should succeed");
// Assert — transitioned to Land
assert_eq!(handle.state().await, MissionState::Land);
// Cleanup
fsm_join.abort();
}
/// AC-2 (driver half) — the lost-link driver wires the spy command
/// issuer + executor. Operator-link drop causes:
/// - `issue_rtl` called exactly once
/// - FSM transitions from `FlyMission` to `Land`
/// - subsequent ticks do not re-fire RTL
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn ac2_driver_issues_rtl_once_and_transitions_fsm() {
// Arrange — bring the FSM to FlyMission
let exec = MissionExecutor::new(fast_executor_config());
let (tel_tx, tel_rx) = watch::channel(Telemetry::default());
let (handle, fsm_join) = exec.run(AutoDriver::new(), vec![], tel_rx);
drive_to_fly_mission(&handle, &tel_tx).await;
assert_eq!(handle.state().await, MissionState::FlyMission);
// Arrange — spawn the lost-link driver with fast thresholds
let spy = Arc::new(SpyCommandIssuer::default());
let (op_tx, op_rx) = watch::channel(true);
let (mavlink_events_tx, mavlink_events_rx) =
broadcast::channel::<mavlink_layer::LinkEvent>(8);
let (shutdown_tx, shutdown_rx) = watch::channel(false);
let driver = LostLinkDriver::new(
fast_config(),
spy.clone(),
handle.clone(),
op_rx,
mavlink_events_rx,
)
.with_initial_mavlink_up(true);
let (ladder_handle, ladder_join) = driver.spawn(shutdown_rx);
// Act — drop operator link
op_tx.send(false).unwrap();
// Wait for RTL to fire (lost_after = 150 ms + tick interval slack)
let deadline = StdInstant::now() + Duration::from_secs(2);
loop {
if spy.count() >= 1 {
break;
}
if StdInstant::now() >= deadline {
panic!("RTL never fired within 2 s; ladder state={:?}", ladder_handle.state().await);
}
tokio::time::sleep(Duration::from_millis(5)).await;
}
// Assert — exactly one RTL issued; FSM in Land
assert_eq!(spy.count(), 1);
assert_eq!(ladder_handle.rtl_count().await, 1);
assert_eq!(ladder_handle.state().await, LadderState::LinkLost);
// The executor failsafe_trigger happens after the spy is called,
// so give the driver loop a moment to propagate to the FSM.
let deadline = StdInstant::now() + Duration::from_secs(1);
loop {
if matches!(handle.state().await, MissionState::Land) {
break;
}
if StdInstant::now() >= deadline {
panic!(
"FSM never transitioned to Land within 1 s (state: {:?})",
handle.state().await
);
}
tokio::time::sleep(Duration::from_millis(5)).await;
}
assert_eq!(handle.state().await, MissionState::Land);
// Continue ticking — RTL must NOT re-fire
tokio::time::sleep(Duration::from_millis(300)).await;
assert_eq!(spy.count(), 1);
// Cleanup
shutdown_tx.send(true).unwrap();
let _ = ladder_join.await;
fsm_join.abort();
// Keep the broadcast sender alive until shutdown so the driver
// doesn't see ChannelClosed and tear down early.
let _ = mavlink_events_tx;
}