//! AZ-668 — persistence trait + default JSON snapshot engine. //! //! Default engine per Q3: in-memory + atomic JSON snapshot. The trait //! is kept narrow on purpose so a future SQLite+H3 / RocksDB engine //! can swap in without touching call sites. //! //! Crash-safety: writes go to `${state_dir}/mapobjects/.json.tmp`, //! are fsync'd, then atomically renamed onto the final path. The parent //! directory is fsync'd after the rename so the rename itself survives //! a power loss. Interrupted writes leave the `.tmp` file behind; the //! next `load_snapshot` ignores it. //! //! Corruption surfaces as [`PersistenceError::Corrupt`]: the caller MUST //! refuse to start with stale state and propagate the error to the //! operator (AZ-668 AC-4). The engine does NOT silently fall back to //! an empty store. use std::path::{Path, PathBuf}; use async_trait::async_trait; use thiserror::Error; use tokio::sync::Mutex as AsyncMutex; use tokio::{fs, io::AsyncWriteExt}; use super::snapshot::Snapshot; /// Errors surfaced by [`MapObjectsPersistence`]. #[derive(Debug, Error)] pub enum PersistenceError { #[error("persistence I/O error: {0}")] Io(#[from] std::io::Error), /// The snapshot file was present but unreadable. The caller MUST /// refuse to start with stale state and surface the error to the /// operator — never silently start empty (AZ-668 AC-4). #[error("snapshot corrupt at {path}: {reason}")] Corrupt { path: PathBuf, reason: String }, /// Schema version mismatch — the on-disk blob predates the running /// binary. Treated as corruption (operator must reconcile). #[error("snapshot schema mismatch at {path}: expected {expected}, found {found}")] SchemaMismatch { path: PathBuf, expected: u32, found: u32, }, } /// Engine-level metrics surfaced to the health aggregator. /// Per AZ-668 §Outcome: `last_snapshot_ts`, `snapshot_size_bytes`, /// `snapshot_errors_total`. #[derive(Debug, Clone, Default)] pub struct PersistenceMetrics { pub last_snapshot_ts: Option>, pub snapshot_size_bytes: Option, pub snapshot_errors_total: u64, } /// Pluggable persistence backend. The default impl is the JSON /// snapshot engine (below); future Q3 engines (SQLite+H3, RocksDB, …) /// implement this trait without breaking call sites. /// /// Methods are `async` because file I/O on the Jetson can stall while /// the SD card is busy with detection-evidence writes; blocking the /// runtime worker thread would starve `mavlink_layer`'s heartbeat /// task. Implementations that do nothing async can delegate to /// `tokio::task::spawn_blocking`. #[async_trait] pub trait MapObjectsPersistence: Send + Sync { /// Atomically persist `snapshot` keyed by its `mission_id`. /// Implementations MUST guarantee no partial writes are visible to /// `load_snapshot` — typically by writing to a `.tmp` sibling then /// renaming. async fn save_snapshot(&self, snapshot: &Snapshot) -> Result<(), PersistenceError>; /// Load the most recent snapshot for `mission_id`. Returns /// `Ok(None)` if no snapshot exists; `Err(Corrupt)` on a present /// but unreadable blob (the caller MUST refuse to start). async fn load_snapshot(&self, mission_id: &str) -> Result, PersistenceError>; /// Engine metrics for the health surface. fn metrics(&self) -> PersistenceMetrics; } /// Default Q3 engine: one JSON file per mission, atomic-renamed on /// each write. /// /// Path layout: `${state_dir}/mapobjects/.json`. The /// `mapobjects` subdirectory is created on first write. pub struct JsonSnapshotEngine { state_dir: PathBuf, metrics: AsyncMutex, } impl std::fmt::Debug for JsonSnapshotEngine { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("JsonSnapshotEngine") .field("state_dir", &self.state_dir) .finish_non_exhaustive() } } impl JsonSnapshotEngine { /// Construct an engine rooted at `state_dir`. The directory does /// not have to exist yet — it is created lazily on the first /// successful `save_snapshot`. pub fn new(state_dir: impl Into) -> Self { Self { state_dir: state_dir.into(), metrics: AsyncMutex::new(PersistenceMetrics::default()), } } /// Resolve the canonical snapshot path for `mission_id`. /// /// `mission_id` is treated as an opaque filename component. Callers /// supply trusted ids from the central API; no path traversal /// sanitisation is performed (the AZ-668 spec does not require it). /// If untrusted ids ever flow in, add validation here. pub fn snapshot_path(&self, mission_id: &str) -> PathBuf { self.state_dir .join("mapobjects") .join(format!("{mission_id}.json")) } fn tmp_path(&self, mission_id: &str) -> PathBuf { self.state_dir .join("mapobjects") .join(format!("{mission_id}.json.tmp")) } } #[async_trait] impl MapObjectsPersistence for JsonSnapshotEngine { async fn save_snapshot(&self, snapshot: &Snapshot) -> Result<(), PersistenceError> { let outcome = self.save_snapshot_inner(snapshot).await; if outcome.is_err() { let mut m = self.metrics.lock().await; m.snapshot_errors_total = m.snapshot_errors_total.saturating_add(1); } outcome } async fn load_snapshot(&self, mission_id: &str) -> Result, PersistenceError> { let path = self.snapshot_path(mission_id); let outcome = self.load_snapshot_inner(&path).await; if matches!( outcome, Err(PersistenceError::Corrupt { .. } | PersistenceError::SchemaMismatch { .. }) ) { let mut m = self.metrics.lock().await; m.snapshot_errors_total = m.snapshot_errors_total.saturating_add(1); } outcome } fn metrics(&self) -> PersistenceMetrics { // Cheap snapshot under a non-async borrow — `try_lock` keeps the // health surface non-blocking; if the lock is contended we // return zeros rather than parking the health caller. self.metrics .try_lock() .map(|m| m.clone()) .unwrap_or_default() } } impl JsonSnapshotEngine { async fn save_snapshot_inner(&self, snapshot: &Snapshot) -> Result<(), PersistenceError> { let path = self.snapshot_path(&snapshot.mission_id); let tmp = self.tmp_path(&snapshot.mission_id); let dir = path.parent().expect("snapshot path always has parent"); fs::create_dir_all(dir).await?; let bytes = serde_json::to_vec(snapshot).map_err(|e| PersistenceError::Corrupt { path: path.clone(), reason: format!("serialize: {e}"), })?; let size = bytes.len() as u64; { let mut f = fs::File::create(&tmp).await?; f.write_all(&bytes).await?; f.sync_all().await?; } fs::rename(&tmp, &path).await?; // Best-effort parent fsync so the rename survives a power // loss. POSIX guarantees this is the durability anchor for // directory operations; non-POSIX platforms ignore. if let Ok(dir_handle) = std::fs::File::open(dir) { let _ = dir_handle.sync_all(); } let mut m = self.metrics.lock().await; m.last_snapshot_ts = Some(chrono::Utc::now()); m.snapshot_size_bytes = Some(size); Ok(()) } async fn load_snapshot_inner(&self, path: &Path) -> Result, PersistenceError> { let bytes = match fs::read(path).await { Ok(b) => b, Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None), Err(e) => return Err(e.into()), }; let snapshot: Snapshot = serde_json::from_slice(&bytes).map_err(|e| PersistenceError::Corrupt { path: path.to_path_buf(), reason: format!("deserialize: {e}"), })?; if snapshot.schema_version != Snapshot::CURRENT_SCHEMA_VERSION { return Err(PersistenceError::SchemaMismatch { path: path.to_path_buf(), expected: Snapshot::CURRENT_SCHEMA_VERSION, found: snapshot.schema_version, }); } Ok(Some(snapshot)) } }