//! `detection_client` — bi-directional gRPC client to `../detections`. //! //! AZ-660 wires the real `tonic` bi-directional stream + reconnect //! state machine + drop-oldest frame budgeting. AZ-661 layers schema //! validation, `model_version` tracking, and a sliding-window //! latency degradation signal on top. //! //! ## Public surface //! //! - [`DetectionClient`] / [`DetectionClientConfig`] — configuration //! and entry-point. Build a config, hand it to //! [`DetectionClient::new`], then start the supervisor with //! [`DetectionClient::run`]. //! - [`DetectionClientHandle`] — the cheap-clone handle returned //! alongside the supervisor `JoinHandle`. Exposes the event stream, //! health surface, connection state, and shutdown. //! - [`DetectionEvent`] — the union type emitted on the event stream //! (a `tokio::sync::broadcast` channel so multiple consumers may //! observe). Covers normal detection batches plus AZ-661 schema //! mismatches, model-version changes, and Tier-1 latency //! degradation transitions. //! //! The supervisor task lives in [`internal::runtime`]. It is the //! only owner of the gRPC channel; reconnects are bounded and the //! frame-source side never blocks on a slow gRPC server (drop-oldest //! budgeting per AC-3 of AZ-660). use std::sync::Arc; use std::time::Duration; use tokio::sync::{broadcast, watch}; use tokio::task::JoinHandle; use shared::health::{ComponentHealth, HealthLevel}; use shared::models::detection::DetectionBatch; use shared::models::frame::Frame; pub mod internal; pub use internal::latency::DegradationTransition; pub use internal::stats::DetectionStats; const NAME: &str = "detection_client"; /// Configuration for [`DetectionClient`]. Defaults match the /// `description.md §3` baseline (`max_concurrent_in_flight = 2`, /// 100 ms p99 Tier-1 threshold, 1 s → 30 s reconnect backoff, /// `expected_schema_version = 1`). #[derive(Debug, Clone)] pub struct DetectionClientConfig { pub endpoint: String, /// In-flight gRPC request budget. New frames evict the oldest /// in-flight slot when this is reached (AC-3 of AZ-660). pub max_concurrent_in_flight: usize, pub connect_timeout: Duration, pub reconnect_initial: Duration, pub reconnect_cap: Duration, /// Schema version the client was built against. Any response /// with a different `schema_version` is a hard `SchemaMismatch` /// (AC-1 of AZ-661). pub expected_schema_version: u32, /// Capacity of the outbound mpsc channel that feeds the gRPC /// stream. Kept small so frames can't queue indefinitely on the /// client side. pub outbound_buffer: usize, /// Capacity of the `events_tx` broadcast channel. pub event_channel_capacity: usize, /// Capacity of the sliding-window latency ring buffer (AZ-661). pub latency_window_capacity: usize, /// Tier-1 latency threshold (AC-3 of AZ-661). A `Tier1Degraded` /// event is emitted when the sliding-window p99 crosses this /// value; a `Tier1Recovered` event is emitted on the reverse /// crossing. pub latency_p99_threshold: Duration, } impl DetectionClientConfig { pub fn new(endpoint: impl Into) -> Self { Self { endpoint: endpoint.into(), max_concurrent_in_flight: 2, connect_timeout: Duration::from_secs(5), reconnect_initial: Duration::from_secs(1), reconnect_cap: Duration::from_secs(30), expected_schema_version: 1, outbound_buffer: 8, event_channel_capacity: 64, latency_window_capacity: 1024, latency_p99_threshold: Duration::from_millis(100), } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ConnectionState { Disconnected, Connecting, Connected, } #[derive(Debug, Clone)] pub enum DetectionEvent { /// Normal happy-path output. `capture_ts_monotonic_ns` is the /// frame's monotonic timestamp at the moment `frame_ingest` /// captured it (forwarded so downstream consumers can correlate /// detections back to the original frame without re-querying /// `frame_ingest`). `server_latency` is the server-reported /// per-frame processing time. Batch { batch: DetectionBatch, capture_ts_monotonic_ns: u64, server_latency: Duration, }, /// AZ-661 AC-1 — `schema_version` on a response did not match /// `DetectionClientConfig::expected_schema_version`. The /// response is REJECTED — no detections are forwarded for that /// frame. SchemaMismatch { detail: String, frame_seq: u64, }, /// AZ-661 AC-2 — server reported a `model_version` different /// from the last observed one. `previous` is `None` only on the /// very first response in the process lifetime. ModelVersionChanged { previous: Option, current: String, }, /// AZ-661 AC-3 — sliding-window p99 latency crossed the /// configured threshold UPWARDS. The next degraded → healthy /// crossing emits a paired [`DetectionEvent::Tier1Recovered`]. Tier1Degraded { reason: Tier1DegradationReason, }, Tier1Recovered, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Tier1DegradationReason { HighLatency, } /// Entry-point for the gRPC client. `new` is a builder; `run` /// consumes the client and spawns the supervisor task that owns the /// gRPC channel for the lifetime of the autopilot process. #[derive(Debug)] pub struct DetectionClient { config: DetectionClientConfig, } impl DetectionClient { pub fn new(config: DetectionClientConfig) -> Self { Self { config } } /// Spawn the supervisor task. Returns the supervisor's /// `JoinHandle<()>` and a cheap-clone [`DetectionClientHandle`] /// that exposes the event stream, health surface, and /// shutdown. /// /// The supervisor owns `frame_rx` for its full lifetime. /// `frame_rx` is a `tokio::sync::broadcast::Receiver` — /// the composition root is responsible for wiring it to /// `frame_ingest::FrameIngestHandle::subscribe()` (raw) or to /// a `FrameReceiver` forwarder if it wants per-consumer drop /// attribution on the publisher side. pub fn run( self, frame_rx: broadcast::Receiver, ) -> (JoinHandle<()>, DetectionClientHandle) { let (events_tx, _) = broadcast::channel(self.config.event_channel_capacity.max(1)); let (connection_tx, connection_rx) = watch::channel(ConnectionState::Disconnected); let (shutdown_tx, shutdown_rx) = watch::channel(false); let stats = DetectionStats::shared(); let latency = Arc::new(internal::latency::LatencyWindow::with_capacity( self.config.latency_p99_threshold, self.config.latency_window_capacity, )); let join = internal::runtime::spawn_supervisor( self.config.clone(), frame_rx, events_tx.clone(), Arc::clone(&stats), Arc::clone(&latency), connection_tx, shutdown_rx, ); let handle = DetectionClientHandle { stats, latency, connection_state_rx: connection_rx, events_tx, shutdown_tx, }; (join, handle) } } /// Cheap-clone handle for the `DetectionClient` supervisor. Exposes: /// - Event subscription via [`Self::subscribe_events`]. /// - Connection-state watch via [`Self::connection_state`] / /// [`Self::connection_state_stream`]. /// - Health surface (`description.md §3`) via [`Self::health`]. /// - Shutdown via [`Self::shutdown`] (idempotent). #[derive(Debug, Clone)] pub struct DetectionClientHandle { stats: Arc, latency: Arc, connection_state_rx: watch::Receiver, events_tx: broadcast::Sender, shutdown_tx: watch::Sender, } impl DetectionClientHandle { /// Subscribe to the [`DetectionEvent`] stream. The broadcast /// channel applies its own drop-oldest back-pressure to slow /// consumers; new subscribers see events emitted after they /// subscribed. pub fn subscribe_events(&self) -> broadcast::Receiver { self.events_tx.subscribe() } pub fn connection_state(&self) -> ConnectionState { *self.connection_state_rx.borrow() } pub fn connection_state_stream(&self) -> watch::Receiver { self.connection_state_rx.clone() } pub fn stats(&self) -> Arc { Arc::clone(&self.stats) } pub fn latency_p50(&self) -> Option { self.latency.p50() } pub fn latency_p99(&self) -> Option { self.latency.p99() } pub fn shutdown(&self) { self.shutdown_tx.send_replace(true); } pub fn health(&self) -> ComponentHealth { let state = self.connection_state(); match state { ConnectionState::Disconnected => ComponentHealth::red(NAME, "disconnected"), ConnectionState::Connecting => ComponentHealth::yellow(NAME, "connecting"), ConnectionState::Connected => { // `description.md §3` — p99 above threshold is the // operative health signal once we're connected. let mut h = ComponentHealth::green(NAME); if let Some(p99) = self.latency.p99() { if p99 > self.latency.threshold() { h.level = HealthLevel::Yellow; h.detail = Some(format!( "p99 {} ms > threshold {} ms", p99.as_millis(), self.latency.threshold().as_millis() )); } } h } } } } #[cfg(test)] mod tests { use super::*; #[test] fn config_defaults_match_description() { // Arrange let c = DetectionClientConfig::new("http://127.0.0.1:50051"); // Assert — the §3 baseline numbers. assert_eq!(c.max_concurrent_in_flight, 2); assert_eq!(c.reconnect_cap, Duration::from_secs(30)); assert_eq!(c.expected_schema_version, 1); assert_eq!(c.latency_p99_threshold, Duration::from_millis(100)); } }