mirror of
https://github.com/azaion/autopilot.git
synced 2026-06-22 19:51:10 +00:00
[AZ-657] [AZ-682] frame_ingest RTSP lifecycle + scan_controller FSM (batch 12)
ci/woodpecker/push/build-arm Pipeline failed
ci/woodpecker/push/build-arm Pipeline failed
AZ-657 (frame_ingest): RTSP session lifecycle FSM with bounded exponential backoff (1 s → 30 s cap), AI-lock plumb through watch::Sender that stamps every emitted Frame, and SPS/PPS hard-fail via OpenError::UnsupportedProfile. The actual RTSP wire client is abstracted behind an RtspTransport trait so AZ-658 can pin retina/FFmpeg alongside the decoder; the lifecycle FSM itself is production code today. tokio::select! around every transport call so a hung open/read cannot wedge graceful shutdown. 10 unit + 5 integration tests cover happy path, bounded reconnect, stream- drop reopen, hard-fail no-retry, and AI-lock toggle. AZ-682 (scan_controller): typed ScanState (ZoomedOut / ZoomedIn / TargetFollow) with a complete pure transition catalogue, every (state, trigger) → next_state from description.md §1/§4/§5 covered; spec-disallowed combos return TransitionOutcome.accepted = false with RejectReason::UnsupportedTransition (loud, not silent). Frame- rate floor monitor with hysteresis suppresses ZoomedOut → ZoomedIn while sustained FPS < 10 fps per description.md §5/§6. Rolling 100-sample tick-latency window surfaces p99; health goes yellow above the 10 ms budget. 18 unit + 5 integration tests cover the catalogue, fps-floor activate/clear, and tick-latency budget. Cumulative review (batches 10-12): all OPEN findings carried forward without regressions. See _docs/03_implementation/batch_12_cycle1_report.md §6. Notes: pre-existing dead-code error in autopilot::Runtime:: vlm_provider_name (origin batch 4) blocks workspace -D warnings clippy. Recorded in _docs/_process_leftovers/ — not in batch 12 scope. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
+337
-11
@@ -1,30 +1,267 @@
|
||||
//! `frame_ingest` — RTSP pull + decode + timestamp.
|
||||
//!
|
||||
//! Real implementation lands in:
|
||||
//! - AZ-657 `frame_ingest_rtsp_session`
|
||||
//! - AZ-658 `frame_ingest_decoder`
|
||||
//! - AZ-659 `frame_ingest_publisher`
|
||||
//! - AZ-657 `frame_ingest_rtsp_session` — session lifecycle + bounded
|
||||
//! reconnect + AI-lock plumb (this crate, modules in `internal/`).
|
||||
//! - AZ-658 `frame_ingest_decoder` — H.264/265 decode into raw
|
||||
//! pixel buffers + retina/FFmpeg/GStreamer transport binding.
|
||||
//! - AZ-659 `frame_ingest_publisher` — bounded broadcast + per-consumer
|
||||
//! drop policy.
|
||||
//!
|
||||
//! ## AZ-657 surface
|
||||
//!
|
||||
//! - [`FrameIngest::new`] — construct in `Closed` state.
|
||||
//! - [`FrameIngest::run`] — spawn the lifecycle loop driving the given
|
||||
//! `RtspTransport` through `connect → stream → reconnect` cycles
|
||||
//! with bounded backoff. Returns a `JoinHandle`.
|
||||
//! - [`FrameIngestHandle::subscribe`] — broadcast frame stream (the
|
||||
//! AZ-657 lifecycle emits only synthetic header frames; real
|
||||
//! decoded frames come in AZ-658).
|
||||
//! - [`FrameIngestHandle::set_ai_lock`] — `bringCameraDown` /
|
||||
//! `bringCameraUp` signal. Stamps `Frame.ai_locked` on every
|
||||
//! subsequently emitted frame.
|
||||
//! - [`FrameIngestHandle::session_state`] — current FSM state.
|
||||
//! - [`FrameIngestHandle::health`] — `ComponentHealth` reflecting the
|
||||
//! FSM state + `last_packet_age` + `ai_locked`.
|
||||
|
||||
use tokio::sync::broadcast;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use shared::health::ComponentHealth;
|
||||
use tokio::sync::{broadcast, watch, Mutex};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
use shared::clock::MonoClock;
|
||||
use shared::health::{ComponentHealth, HealthLevel};
|
||||
use shared::models::frame::Frame;
|
||||
|
||||
pub mod internal;
|
||||
|
||||
pub use internal::lifecycle::{BackoffPolicy, LifecycleStats, SessionState};
|
||||
pub use internal::rtsp_client::{
|
||||
OpenError, RtspPacket, RtspSessionConfig, RtspTransport, RtspTransportHint, StreamError,
|
||||
};
|
||||
|
||||
use internal::lifecycle::{transition, Trigger};
|
||||
|
||||
const NAME: &str = "frame_ingest";
|
||||
|
||||
/// Threshold past which `health()` flips to `Red` while the session is
|
||||
/// not `Streaming`. Aligned with `description.md §6` (red after
|
||||
/// `last_frame_age_ms` exceeds a configured threshold).
|
||||
const RED_FRAME_AGE: Duration = Duration::from_secs(5);
|
||||
|
||||
pub struct FrameIngest {
|
||||
tx: broadcast::Sender<Frame>,
|
||||
ai_lock_tx: watch::Sender<bool>,
|
||||
state_tx: watch::Sender<SessionState>,
|
||||
shutdown_tx: watch::Sender<bool>,
|
||||
stats: Arc<LifecycleStats>,
|
||||
backoff: BackoffPolicy,
|
||||
clock: MonoClock,
|
||||
}
|
||||
|
||||
impl FrameIngest {
|
||||
pub fn new(channel_capacity: usize) -> Self {
|
||||
Self::with_backoff(
|
||||
channel_capacity,
|
||||
BackoffPolicy::new(Duration::from_secs(1), Duration::from_secs(30)),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn with_backoff(channel_capacity: usize, backoff: BackoffPolicy) -> Self {
|
||||
let (tx, _rx) = broadcast::channel(channel_capacity);
|
||||
Self { tx }
|
||||
let (ai_lock_tx, _) = watch::channel(false);
|
||||
let (state_tx, _) = watch::channel(SessionState::Closed);
|
||||
let (shutdown_tx, _) = watch::channel(false);
|
||||
Self {
|
||||
tx,
|
||||
ai_lock_tx,
|
||||
state_tx,
|
||||
shutdown_tx,
|
||||
stats: LifecycleStats::new(),
|
||||
backoff,
|
||||
clock: MonoClock::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn handle(&self) -> FrameIngestHandle {
|
||||
FrameIngestHandle {
|
||||
tx: self.tx.clone(),
|
||||
ai_lock_tx: self.ai_lock_tx.clone(),
|
||||
state_rx: self.state_tx.subscribe(),
|
||||
shutdown_tx: self.shutdown_tx.clone(),
|
||||
stats: Arc::clone(&self.stats),
|
||||
clock: self.clock,
|
||||
}
|
||||
}
|
||||
|
||||
/// Spawn the lifecycle loop. The returned handle resolves when
|
||||
/// the loop exits (shutdown signalled via
|
||||
/// [`FrameIngestHandle::shutdown`] or a hard-fail trapped the FSM).
|
||||
pub fn run<T>(&self, transport: T, config: RtspSessionConfig) -> JoinHandle<()>
|
||||
where
|
||||
T: RtspTransport + 'static,
|
||||
{
|
||||
let tx = self.tx.clone();
|
||||
let ai_lock = self.ai_lock_tx.subscribe();
|
||||
let state_tx = self.state_tx.clone();
|
||||
let shutdown_rx = self.shutdown_tx.subscribe();
|
||||
let stats = Arc::clone(&self.stats);
|
||||
let backoff = self.backoff;
|
||||
let clock = self.clock;
|
||||
let transport = Arc::new(Mutex::new(transport));
|
||||
|
||||
tokio::spawn(async move {
|
||||
lifecycle_loop(
|
||||
transport,
|
||||
config,
|
||||
tx,
|
||||
ai_lock,
|
||||
state_tx,
|
||||
shutdown_rx,
|
||||
stats,
|
||||
backoff,
|
||||
clock,
|
||||
)
|
||||
.await;
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn is_shutdown(rx: &watch::Receiver<bool>) -> bool {
|
||||
*rx.borrow()
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn lifecycle_loop<T>(
|
||||
transport: Arc<Mutex<T>>,
|
||||
config: RtspSessionConfig,
|
||||
tx: broadcast::Sender<Frame>,
|
||||
mut ai_lock: watch::Receiver<bool>,
|
||||
state_tx: watch::Sender<SessionState>,
|
||||
mut shutdown_rx: watch::Receiver<bool>,
|
||||
stats: Arc<LifecycleStats>,
|
||||
backoff: BackoffPolicy,
|
||||
clock: MonoClock,
|
||||
) where
|
||||
T: RtspTransport,
|
||||
{
|
||||
let mut state = SessionState::Closed;
|
||||
let mut seq: u64 = 0;
|
||||
|
||||
loop {
|
||||
if is_shutdown(&shutdown_rx) {
|
||||
let mut t = transport.lock().await;
|
||||
t.close().await;
|
||||
state_tx.send_replace(SessionState::Closed);
|
||||
return;
|
||||
}
|
||||
|
||||
state = transition(state, Trigger::OpenAttempted, &backoff).next;
|
||||
state_tx.send_replace(state);
|
||||
|
||||
// Race the open call against shutdown so a hung transport
|
||||
// (real RTSP can block on `DESCRIBE` for many seconds) cannot
|
||||
// wedge graceful exit.
|
||||
let open_result = tokio::select! {
|
||||
biased;
|
||||
res = async {
|
||||
let mut t = transport.lock().await;
|
||||
t.open(&config).await
|
||||
} => res,
|
||||
_ = shutdown_rx.changed() => {
|
||||
let mut t = transport.lock().await;
|
||||
t.close().await;
|
||||
state_tx.send_replace(SessionState::Closed);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
match open_result {
|
||||
Ok(()) => {
|
||||
state = transition(state, Trigger::OpenSucceeded, &backoff).next;
|
||||
state_tx.send_replace(state);
|
||||
stats.note_streaming();
|
||||
|
||||
loop {
|
||||
let packet = tokio::select! {
|
||||
biased;
|
||||
res = async {
|
||||
let mut t = transport.lock().await;
|
||||
t.next_packet().await
|
||||
} => Some(res),
|
||||
_ = shutdown_rx.changed() => None,
|
||||
};
|
||||
|
||||
let Some(packet) = packet else {
|
||||
let mut t = transport.lock().await;
|
||||
t.close().await;
|
||||
state_tx.send_replace(SessionState::Closed);
|
||||
return;
|
||||
};
|
||||
|
||||
match packet {
|
||||
Ok(pkt) => {
|
||||
let now_ns = clock.elapsed_ns();
|
||||
stats.note_packet(now_ns);
|
||||
let locked = *ai_lock.borrow_and_update();
|
||||
// AZ-657 emits a synthetic frame envelope
|
||||
// per inbound RTSP packet so the lifecycle
|
||||
// FSM can be exercised end-to-end without
|
||||
// the decoder (AZ-658 swaps this for the
|
||||
// actual decoded frame).
|
||||
let frame = Frame {
|
||||
seq,
|
||||
capture_ts_monotonic_ns: now_ns,
|
||||
decode_ts_monotonic_ns: now_ns,
|
||||
pixels: Arc::new(pkt.payload),
|
||||
width: 0,
|
||||
height: 0,
|
||||
pix_fmt: shared::models::frame::PixelFormat::Nv12,
|
||||
ai_locked: locked,
|
||||
};
|
||||
seq = seq.saturating_add(1);
|
||||
// A no-subscriber send is a no-op error in
|
||||
// the broadcast channel; the lifecycle
|
||||
// does not care.
|
||||
let _ = tx.send(frame);
|
||||
}
|
||||
Err(e) => {
|
||||
let trig = Trigger::from_stream_error(&e);
|
||||
let t = transition(state, trig, &backoff);
|
||||
state = t.next;
|
||||
state_tx.send_replace(state);
|
||||
stats.note_reopen();
|
||||
if let Some(wait) = t.wait_before_next {
|
||||
tokio::time::sleep(wait).await;
|
||||
}
|
||||
if !t.reopen {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let trig = Trigger::from_open_error(&err);
|
||||
let t = transition(state, trig, &backoff);
|
||||
state = t.next;
|
||||
state_tx.send_replace(state);
|
||||
if let SessionState::Failing { attempt } = state {
|
||||
stats.note_open_failure(attempt);
|
||||
}
|
||||
if let Some(wait) = t.wait_before_next {
|
||||
tokio::time::sleep(wait).await;
|
||||
}
|
||||
if !t.reopen {
|
||||
// Hard-fail (e.g. UnsupportedProfile): leave the
|
||||
// FSM parked in Failing and exit. The supervisor
|
||||
// restarts the process; the operator decides.
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -32,18 +269,91 @@ impl FrameIngest {
|
||||
#[derive(Clone)]
|
||||
pub struct FrameIngestHandle {
|
||||
tx: broadcast::Sender<Frame>,
|
||||
ai_lock_tx: watch::Sender<bool>,
|
||||
state_rx: watch::Receiver<SessionState>,
|
||||
shutdown_tx: watch::Sender<bool>,
|
||||
stats: Arc<LifecycleStats>,
|
||||
clock: MonoClock,
|
||||
}
|
||||
|
||||
impl FrameIngestHandle {
|
||||
/// Subscribe to the frame stream. Consumers receive every frame after they
|
||||
/// subscribed; back-pressure is implemented via broadcast channel lag (see
|
||||
/// AZ-659 for the slow-consumer policy).
|
||||
/// Subscribe to the frame stream. Consumers receive every frame
|
||||
/// after they subscribed; back-pressure is implemented via
|
||||
/// broadcast channel lag (see AZ-659 for the slow-consumer
|
||||
/// policy).
|
||||
pub fn subscribe(&self) -> broadcast::Receiver<Frame> {
|
||||
self.tx.subscribe()
|
||||
}
|
||||
|
||||
/// `bringCameraDown`/`bringCameraUp` per `description.md §2`. When
|
||||
/// `locked == true`, every subsequently emitted frame has
|
||||
/// `Frame::ai_locked = true` and downstream AI consumers
|
||||
/// (detection_client, movement_detector) MUST skip detection.
|
||||
/// `telemetry_stream` continues consuming so the operator sees
|
||||
/// the raw stream.
|
||||
pub fn set_ai_lock(&self, locked: bool) {
|
||||
self.ai_lock_tx.send_replace(locked);
|
||||
}
|
||||
|
||||
pub fn ai_locked(&self) -> bool {
|
||||
*self.ai_lock_tx.borrow()
|
||||
}
|
||||
|
||||
pub fn session_state(&self) -> SessionState {
|
||||
*self.state_rx.borrow()
|
||||
}
|
||||
|
||||
/// Subscribe to FSM state transitions. Useful for operator UI and
|
||||
/// supervisor watchdogs (the latter restarts on prolonged
|
||||
/// `Failing`).
|
||||
pub fn session_state_stream(&self) -> watch::Receiver<SessionState> {
|
||||
self.state_rx.clone()
|
||||
}
|
||||
|
||||
pub fn reopens_total(&self) -> u64 {
|
||||
self.stats.reopens_total.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Request the lifecycle loop to drain to `Closed` and exit. The
|
||||
/// loop races every transport call against this signal, so a
|
||||
/// hung transport cannot wedge graceful exit.
|
||||
pub fn shutdown(&self) {
|
||||
self.shutdown_tx.send_replace(true);
|
||||
}
|
||||
|
||||
pub fn health(&self) -> ComponentHealth {
|
||||
ComponentHealth::disabled(NAME)
|
||||
let state = self.session_state();
|
||||
let now_ns = self.clock.elapsed_ns();
|
||||
let last_pkt_ns = self.stats.last_packet_at_ns.load(Ordering::Relaxed);
|
||||
let age = now_ns.saturating_sub(last_pkt_ns);
|
||||
|
||||
match state {
|
||||
SessionState::Closed => ComponentHealth::disabled(NAME),
|
||||
SessionState::Streaming if last_pkt_ns == 0 => {
|
||||
ComponentHealth::yellow(NAME, "streaming, awaiting first packet")
|
||||
}
|
||||
SessionState::Streaming if age > RED_FRAME_AGE.as_nanos() as u64 => {
|
||||
ComponentHealth::red(NAME, format!("last packet age {} ms", age / 1_000_000))
|
||||
}
|
||||
SessionState::Streaming => {
|
||||
let mut h = ComponentHealth::green(NAME);
|
||||
if self.ai_locked() {
|
||||
h.level = HealthLevel::Yellow;
|
||||
h.detail = Some("ai_locked".to_string());
|
||||
}
|
||||
h
|
||||
}
|
||||
SessionState::Connecting { attempt } => {
|
||||
ComponentHealth::yellow(NAME, format!("connecting (attempt {attempt})"))
|
||||
}
|
||||
SessionState::Failing { attempt } => {
|
||||
if age > RED_FRAME_AGE.as_nanos() as u64 {
|
||||
ComponentHealth::red(NAME, format!("failing, attempt {attempt}"))
|
||||
} else {
|
||||
ComponentHealth::yellow(NAME, format!("failing, attempt {attempt}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,6 +364,22 @@ mod tests {
|
||||
#[test]
|
||||
fn it_compiles() {
|
||||
let h = FrameIngest::new(8).handle();
|
||||
assert_eq!(h.health().level, shared::health::HealthLevel::Disabled);
|
||||
assert_eq!(h.session_state(), SessionState::Closed);
|
||||
assert_eq!(h.health().level, HealthLevel::Disabled);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ai_lock_toggle_propagates() {
|
||||
// Arrange
|
||||
let ingest = FrameIngest::new(8);
|
||||
let handle = ingest.handle();
|
||||
|
||||
// Act
|
||||
handle.set_ai_lock(true);
|
||||
|
||||
// Assert
|
||||
assert!(handle.ai_locked());
|
||||
handle.set_ai_lock(false);
|
||||
assert!(!handle.ai_locked());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user