mirror of
https://github.com/azaion/autopilot.git
synced 2026-06-22 17:31:09 +00:00
[AZ-651] [AZ-668] lost-link failsafe ladder + mapobjects persistence (batch 7)
AZ-651 (mission_executor lost-link ladder):
- LostLinkLadder pure-logic state machine (LinkOk -> Degraded -> Lost
-> LinkLostInFollow + MavlinkLost branch). Configurable thresholds
via LostLinkConfig.
- LostLinkCommandIssuer trait + MavlinkCommandIssuer production impl
emitting MAV_CMD_NAV_RETURN_TO_LAUNCH via MavlinkHandle::send_command.
- LostLinkDriver task wires the ladder to operator-link watch, MAVLink
LinkEvent broadcast, and optional target-follow signal. On RTL,
driver calls the issuer THEN MissionExecutorHandle::failsafe_trigger.
- failsafe_trigger(LinkLost | LinkLostInFollow) short-circuits FlyMission
-> Land via direct FSM state mutation + TransitionEvent emission;
Paused state is intentionally NOT overridden.
- Tests: 4/4 ACs locally green (degraded-no-rtl; lost-fires-once;
follow-grace; mavlink-loss-no-rtl) plus driver + FSM integration.
AZ-668 (mapobjects_store persistence):
- Snapshot serializable shape + Store::{to_snapshot,from_snapshot}
round trip.
- MapObjectsPersistence async trait + JsonSnapshotEngine default impl
(write to .tmp, sync_all, atomic rename, best-effort parent fsync).
- PersistenceError::{Corrupt, SchemaMismatch} surfaces explicit errors
on bad blob; PersistenceMetrics tracks last_snapshot_ts,
snapshot_size_bytes, snapshot_errors_total.
- MapObjectsStore::from_snapshot factory for crash recovery from the
composition root.
- Tests: 4/4 ACs locally green (round-trip; atomic rename ignores
partial .tmp; crash recovery preserves pending; corruption returns
explicit error) plus schema-mismatch + metrics smoke checks.
Quality gates:
- cargo fmt: clean.
- cargo clippy -p mission_executor -p mapobjects_store --tests: 0 warns.
- cargo test --workspace: all green.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,473 @@
|
||||
//! AZ-651 acceptance criteria — lost-link failsafe ladder.
|
||||
//!
|
||||
//! AC-1, AC-3, AC-4 are exercised purely against the public
|
||||
//! `LostLinkLadder` API (deterministic ticks driven by an explicit
|
||||
//! `Instant`).
|
||||
//!
|
||||
//! AC-2 has two halves:
|
||||
//! - **Pure ladder**: RTL fires exactly once when `LinkOk → LinkLost`
|
||||
//! happens; subsequent ticks in `LinkLost` do not re-fire. Tested
|
||||
//! against the ladder directly.
|
||||
//! - **Integration**: the executor's FSM transitions from
|
||||
//! `FlyMission` to `Land` when `failsafe_trigger(LinkLost)` is
|
||||
//! called. Tested via a real `MissionExecutor` and a spy
|
||||
//! `LostLinkCommandIssuer`.
|
||||
|
||||
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant as StdInstant};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use mission_executor::{
|
||||
DriverError, FailsafeKind, LadderInput, LadderState, LostLinkCommandIssuer, LostLinkConfig,
|
||||
LostLinkDriver, LostLinkLadder, MissionDriver, MissionExecutor, MissionExecutorConfig,
|
||||
MissionExecutorHandle, MissionState, Telemetry,
|
||||
};
|
||||
use shared::error::AutopilotError;
|
||||
use shared::models::mission::MissionWaypoint;
|
||||
use tokio::sync::{broadcast, watch};
|
||||
use tokio::time::Instant;
|
||||
|
||||
// =============================================================================
|
||||
// Pure ladder tests (AC-1, AC-2 fire-once half, AC-3, AC-4, MAVLink recovery)
|
||||
// =============================================================================
|
||||
|
||||
/// Compact config so the tests don't have to wait real wall-clock time.
|
||||
/// degraded_after = 50 ms, lost_after = 150 ms, follow_grace = 100 ms.
|
||||
fn fast_config() -> LostLinkConfig {
|
||||
LostLinkConfig {
|
||||
degraded_after: Duration::from_millis(50),
|
||||
lost_after: Duration::from_millis(150),
|
||||
follow_grace: Duration::from_millis(100),
|
||||
tick_interval: Duration::from_millis(10),
|
||||
}
|
||||
}
|
||||
|
||||
/// AC-1 — operator-link degraded then recovers; no RTL.
|
||||
#[test]
|
||||
fn ac1_degraded_then_recovers_no_rtl() {
|
||||
// Arrange
|
||||
let mut l = LostLinkLadder::new(fast_config());
|
||||
let t0 = Instant::now();
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0,
|
||||
op_link_up: true,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
assert_eq!(out.state, LadderState::LinkOk);
|
||||
|
||||
// Act — op-link drops; tick at +70 ms (past degraded_after=50 ms)
|
||||
l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(10),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(70),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
assert_eq!(out.state, LadderState::LinkDegraded);
|
||||
assert!(!out.rtl_should_fire);
|
||||
|
||||
// Act — op-link recovers before lost_after fires
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(100),
|
||||
op_link_up: true,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
// Assert
|
||||
assert_eq!(out.state, LadderState::LinkOk);
|
||||
assert!(out.state_changed);
|
||||
assert!(!out.rtl_should_fire);
|
||||
assert_eq!(l.rtl_count(), 0);
|
||||
}
|
||||
|
||||
/// AC-2 (ladder half) — operator-link lost triggers RTL exactly once.
|
||||
#[test]
|
||||
fn ac2_operator_link_lost_triggers_rtl_exactly_once() {
|
||||
// Arrange
|
||||
let mut l = LostLinkLadder::new(fast_config());
|
||||
let t0 = Instant::now();
|
||||
l.tick(LadderInput {
|
||||
now: t0,
|
||||
op_link_up: true,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
|
||||
// Act — op-link drops at +10 ms; tick at +170 ms so the down
|
||||
// duration (160 ms) exceeds lost_after (150 ms).
|
||||
l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(10),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(170),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
// Assert — entered LinkLost; RTL fires
|
||||
assert_eq!(out.state, LadderState::LinkLost);
|
||||
assert!(out.state_changed);
|
||||
assert!(out.rtl_should_fire);
|
||||
assert_eq!(l.rtl_count(), 1);
|
||||
|
||||
// Act — keep ticking while still in LinkLost; RTL must NOT re-fire
|
||||
for ms in [180, 200, 300, 500, 1000] {
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(ms),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
assert_eq!(out.state, LadderState::LinkLost);
|
||||
assert!(!out.rtl_should_fire, "rtl re-fired at +{ms} ms");
|
||||
}
|
||||
assert_eq!(l.rtl_count(), 1);
|
||||
}
|
||||
|
||||
/// AC-3 — `LinkLostInFollow` grace then RTL.
|
||||
#[test]
|
||||
fn ac3_lost_in_follow_grace_then_rtl() {
|
||||
// Arrange — degraded=50, lost=150, follow_grace=100 → RTL fires at +250 ms total
|
||||
let mut l = LostLinkLadder::new(fast_config());
|
||||
let t0 = Instant::now();
|
||||
l.tick(LadderInput {
|
||||
now: t0,
|
||||
op_link_up: true,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: true,
|
||||
});
|
||||
|
||||
// Act — drop op-link at +10 ms; at +170 ms we'd be LinkLost without
|
||||
// target-follow, but the follow grace engages instead.
|
||||
l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(10),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: true,
|
||||
});
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(170),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: true,
|
||||
});
|
||||
// Assert — engaged the follow grace
|
||||
assert_eq!(out.state, LadderState::LinkLostInFollow);
|
||||
assert!(!out.rtl_should_fire);
|
||||
assert_eq!(l.rtl_count(), 0);
|
||||
|
||||
// Act — still inside grace
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(230),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: true,
|
||||
});
|
||||
assert_eq!(out.state, LadderState::LinkLostInFollow);
|
||||
assert!(!out.rtl_should_fire);
|
||||
assert_eq!(l.rtl_count(), 0);
|
||||
|
||||
// Act — grace expires (grace started at +170 ms; +100 ms = +270 ms)
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(280),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: true,
|
||||
});
|
||||
// Assert — promoted to LinkLost; RTL fires once now
|
||||
assert_eq!(out.state, LadderState::LinkLost);
|
||||
assert!(out.state_changed);
|
||||
assert!(out.rtl_should_fire);
|
||||
assert_eq!(l.rtl_count(), 1);
|
||||
}
|
||||
|
||||
/// AC-4 — MAVLink loss does NOT trigger autopilot-side RTL.
|
||||
#[test]
|
||||
fn ac4_mavlink_loss_does_not_trigger_autopilot_rtl() {
|
||||
// Arrange
|
||||
let mut l = LostLinkLadder::new(fast_config());
|
||||
let t0 = Instant::now();
|
||||
|
||||
// Act — op-link down AND mavlink down for far longer than lost_after
|
||||
let mut last_state = LadderState::LinkOk;
|
||||
for ms in (0..1000).step_by(10) {
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(ms),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: false,
|
||||
target_follow_active: false,
|
||||
});
|
||||
// Assert — never fire while mavlink is down
|
||||
assert!(!out.rtl_should_fire, "rtl fired at +{ms} ms with mavlink down");
|
||||
last_state = out.state;
|
||||
}
|
||||
// Assert
|
||||
assert_eq!(last_state, LadderState::MavlinkLost);
|
||||
assert_eq!(l.rtl_count(), 0);
|
||||
}
|
||||
|
||||
/// Supplementary — MAVLink recovers while op-link is still down past
|
||||
/// lost_after; the ladder resumes the op-link rung and fires RTL once.
|
||||
#[test]
|
||||
fn mavlink_recovery_resumes_operator_ladder() {
|
||||
// Arrange
|
||||
let mut l = LostLinkLadder::new(fast_config());
|
||||
let t0 = Instant::now();
|
||||
l.tick(LadderInput {
|
||||
now: t0,
|
||||
op_link_up: true,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
|
||||
// Act — both links go down at +10 ms; run long enough to exceed lost_after
|
||||
for ms in (10..300).step_by(10) {
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(ms),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: false,
|
||||
target_follow_active: false,
|
||||
});
|
||||
assert!(!out.rtl_should_fire);
|
||||
assert_eq!(out.state, LadderState::MavlinkLost);
|
||||
}
|
||||
|
||||
// Act — mavlink recovers; op-link still down. The internal
|
||||
// op_link_down_since clock has been ticking since +10 ms, so
|
||||
// elapsed = 300 ms > lost_after (150 ms) → LinkLost on next tick.
|
||||
let out = l.tick(LadderInput {
|
||||
now: t0 + Duration::from_millis(310),
|
||||
op_link_up: false,
|
||||
mavlink_link_up: true,
|
||||
target_follow_active: false,
|
||||
});
|
||||
// Assert
|
||||
assert_eq!(out.previous_state, LadderState::MavlinkLost);
|
||||
assert_eq!(out.state, LadderState::LinkLost);
|
||||
assert!(out.rtl_should_fire);
|
||||
assert_eq!(l.rtl_count(), 1);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Integration — driver issues RTL once + FSM transitions FlyMission → Land
|
||||
// =============================================================================
|
||||
|
||||
/// Spy `LostLinkCommandIssuer` that counts RTL invocations.
|
||||
#[derive(Debug, Default)]
|
||||
struct SpyCommandIssuer {
|
||||
rtl_count: AtomicU64,
|
||||
}
|
||||
#[async_trait]
|
||||
impl LostLinkCommandIssuer for SpyCommandIssuer {
|
||||
async fn issue_rtl(&self) -> Result<(), AutopilotError> {
|
||||
self.rtl_count.fetch_add(1, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
impl SpyCommandIssuer {
|
||||
fn count(&self) -> u64 {
|
||||
self.rtl_count.load(Ordering::SeqCst)
|
||||
}
|
||||
}
|
||||
|
||||
/// Auto-completing `MissionDriver` — every action returns `Ok(())` so
|
||||
/// the FSM can race through Disconnected → FlyMission once telemetry
|
||||
/// guards open.
|
||||
struct AutoDriver {
|
||||
arm_calls: AtomicU32,
|
||||
takeoff_calls: AtomicU32,
|
||||
upload_calls: AtomicU32,
|
||||
set_auto_calls: AtomicU32,
|
||||
post_flight_calls: AtomicU32,
|
||||
}
|
||||
impl AutoDriver {
|
||||
fn new() -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
arm_calls: AtomicU32::new(0),
|
||||
takeoff_calls: AtomicU32::new(0),
|
||||
upload_calls: AtomicU32::new(0),
|
||||
set_auto_calls: AtomicU32::new(0),
|
||||
post_flight_calls: AtomicU32::new(0),
|
||||
})
|
||||
}
|
||||
}
|
||||
#[async_trait]
|
||||
impl MissionDriver for AutoDriver {
|
||||
async fn arm(&self) -> Result<(), DriverError> {
|
||||
self.arm_calls.fetch_add(1, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
async fn takeoff(&self, _altitude_m: f32) -> Result<(), DriverError> {
|
||||
self.takeoff_calls.fetch_add(1, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
async fn upload_mission(&self, _items: &[MissionWaypoint]) -> Result<(), DriverError> {
|
||||
self.upload_calls.fetch_add(1, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
async fn set_auto_mode(&self) -> Result<(), DriverError> {
|
||||
self.set_auto_calls.fetch_add(1, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
async fn post_flight_sync(&self) -> Result<(), DriverError> {
|
||||
self.post_flight_calls.fetch_add(1, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Drive the executor through telemetry until it reaches `FlyMission`.
|
||||
/// Uses real time with a short tick interval so the test finishes in
|
||||
/// well under a second.
|
||||
async fn drive_to_fly_mission(
|
||||
handle: &MissionExecutorHandle,
|
||||
tel_tx: &watch::Sender<Telemetry>,
|
||||
) {
|
||||
// mission_reached_final stays false so the FSM idles in FlyMission.
|
||||
let t = Telemetry {
|
||||
link_up: true,
|
||||
health_ok: true,
|
||||
bit_ok: true,
|
||||
armed: true,
|
||||
takeoff_complete: true,
|
||||
flight_mode_auto: true,
|
||||
..Telemetry::default()
|
||||
};
|
||||
tel_tx.send(t).unwrap();
|
||||
|
||||
let deadline = StdInstant::now() + Duration::from_secs(2);
|
||||
loop {
|
||||
if matches!(handle.state().await, MissionState::FlyMission) {
|
||||
return;
|
||||
}
|
||||
if StdInstant::now() >= deadline {
|
||||
panic!(
|
||||
"FSM never reached FlyMission within 2 s (current state: {:?})",
|
||||
handle.state().await
|
||||
);
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(5)).await;
|
||||
}
|
||||
}
|
||||
|
||||
fn fast_executor_config() -> MissionExecutorConfig {
|
||||
let mut cfg = MissionExecutorConfig::multirotor(10.0);
|
||||
// 2 ms tick — keeps the test fast (~14 ms for 7 transitions).
|
||||
cfg.tick_interval = Duration::from_millis(2);
|
||||
cfg
|
||||
}
|
||||
|
||||
/// AC-2 (integration half) — `failsafe_trigger(LinkLost)` while the
|
||||
/// FSM is in `FlyMission` transitions it to `Land`.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn ac2_integration_failsafe_trigger_transitions_fly_to_land() {
|
||||
// Arrange
|
||||
let exec = MissionExecutor::new(fast_executor_config());
|
||||
let (tel_tx, tel_rx) = watch::channel(Telemetry::default());
|
||||
let (handle, fsm_join) = exec.run(AutoDriver::new(), vec![], tel_rx);
|
||||
|
||||
drive_to_fly_mission(&handle, &tel_tx).await;
|
||||
assert_eq!(handle.state().await, MissionState::FlyMission);
|
||||
|
||||
// Act
|
||||
handle
|
||||
.failsafe_trigger(FailsafeKind::LinkLost)
|
||||
.await
|
||||
.expect("failsafe_trigger should succeed");
|
||||
|
||||
// Assert — transitioned to Land
|
||||
assert_eq!(handle.state().await, MissionState::Land);
|
||||
|
||||
// Cleanup
|
||||
fsm_join.abort();
|
||||
}
|
||||
|
||||
/// AC-2 (driver half) — the lost-link driver wires the spy command
|
||||
/// issuer + executor. Operator-link drop causes:
|
||||
/// - `issue_rtl` called exactly once
|
||||
/// - FSM transitions from `FlyMission` to `Land`
|
||||
/// - subsequent ticks do not re-fire RTL
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn ac2_driver_issues_rtl_once_and_transitions_fsm() {
|
||||
// Arrange — bring the FSM to FlyMission
|
||||
let exec = MissionExecutor::new(fast_executor_config());
|
||||
let (tel_tx, tel_rx) = watch::channel(Telemetry::default());
|
||||
let (handle, fsm_join) = exec.run(AutoDriver::new(), vec![], tel_rx);
|
||||
drive_to_fly_mission(&handle, &tel_tx).await;
|
||||
assert_eq!(handle.state().await, MissionState::FlyMission);
|
||||
|
||||
// Arrange — spawn the lost-link driver with fast thresholds
|
||||
let spy = Arc::new(SpyCommandIssuer::default());
|
||||
let (op_tx, op_rx) = watch::channel(true);
|
||||
let (mavlink_events_tx, mavlink_events_rx) =
|
||||
broadcast::channel::<mavlink_layer::LinkEvent>(8);
|
||||
let (shutdown_tx, shutdown_rx) = watch::channel(false);
|
||||
|
||||
let driver = LostLinkDriver::new(
|
||||
fast_config(),
|
||||
spy.clone(),
|
||||
handle.clone(),
|
||||
op_rx,
|
||||
mavlink_events_rx,
|
||||
)
|
||||
.with_initial_mavlink_up(true);
|
||||
let (ladder_handle, ladder_join) = driver.spawn(shutdown_rx);
|
||||
|
||||
// Act — drop operator link
|
||||
op_tx.send(false).unwrap();
|
||||
|
||||
// Wait for RTL to fire (lost_after = 150 ms + tick interval slack)
|
||||
let deadline = StdInstant::now() + Duration::from_secs(2);
|
||||
loop {
|
||||
if spy.count() >= 1 {
|
||||
break;
|
||||
}
|
||||
if StdInstant::now() >= deadline {
|
||||
panic!("RTL never fired within 2 s; ladder state={:?}", ladder_handle.state().await);
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(5)).await;
|
||||
}
|
||||
|
||||
// Assert — exactly one RTL issued; FSM in Land
|
||||
assert_eq!(spy.count(), 1);
|
||||
assert_eq!(ladder_handle.rtl_count().await, 1);
|
||||
assert_eq!(ladder_handle.state().await, LadderState::LinkLost);
|
||||
|
||||
// The executor failsafe_trigger happens after the spy is called,
|
||||
// so give the driver loop a moment to propagate to the FSM.
|
||||
let deadline = StdInstant::now() + Duration::from_secs(1);
|
||||
loop {
|
||||
if matches!(handle.state().await, MissionState::Land) {
|
||||
break;
|
||||
}
|
||||
if StdInstant::now() >= deadline {
|
||||
panic!(
|
||||
"FSM never transitioned to Land within 1 s (state: {:?})",
|
||||
handle.state().await
|
||||
);
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(5)).await;
|
||||
}
|
||||
assert_eq!(handle.state().await, MissionState::Land);
|
||||
|
||||
// Continue ticking — RTL must NOT re-fire
|
||||
tokio::time::sleep(Duration::from_millis(300)).await;
|
||||
assert_eq!(spy.count(), 1);
|
||||
|
||||
// Cleanup
|
||||
shutdown_tx.send(true).unwrap();
|
||||
let _ = ladder_join.await;
|
||||
fsm_join.abort();
|
||||
// Keep the broadcast sender alive until shutdown so the driver
|
||||
// doesn't see ChannelClosed and tear down early.
|
||||
let _ = mavlink_events_tx;
|
||||
}
|
||||
Reference in New Issue
Block a user