mirror of
https://github.com/azaion/autopilot.git
synced 2026-06-21 20:31:09 +00:00
[AZ-659] [AZ-660] [AZ-661] Implement frame publisher + gRPC detection client
AZ-659: FramePublisher with per-consumer drop accounting (Arc<Bytes> zero-copy fan-out). Adds ConsumerId enum, PublisherStats, FrameReceiver wrapper, and publisher integration tests (AC-1, AC-2, AC-3). AZ-660: Bi-directional tonic gRPC stream to ../detections. Reconnect with bounded exponential backoff (1 s → 30 s cap). Drop-oldest in-flight budgeting (max_concurrent_in_flight = 2). ai_locked frame skipping. Integration tests against fixture in-process server (AC-1: happy path 30 fps/10 s, AC-2: reconnect, AC-3: budget drops, AC-4: ai_locked skipping). AZ-661: Schema validation (hard SchemaMismatch error on version mismatch), model_version latch with ModelVersionChanged events, sliding-window p99 latency tracker with Tier1Degraded/Tier1Recovered transitions. Integration tests (AC-1, AC-2, AC-3). Also: update module-layout.md for frame_ingest and detection_client to reflect the streaming API shape; code review report batch_18. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,48 +1,274 @@
|
||||
//! `detection_client` — bi-directional gRPC to `../detections`.
|
||||
//! `detection_client` — bi-directional gRPC client to `../detections`.
|
||||
//!
|
||||
//! Real implementation lands in:
|
||||
//! - AZ-660 `detection_client_grpc_stream`
|
||||
//! - AZ-661 `detection_client_schema_and_health`
|
||||
//! AZ-660 wires the real `tonic` bi-directional stream + reconnect
|
||||
//! state machine + drop-oldest frame budgeting. AZ-661 layers schema
|
||||
//! validation, `model_version` tracking, and a sliding-window
|
||||
//! latency degradation signal on top.
|
||||
//!
|
||||
//! ## Public surface
|
||||
//!
|
||||
//! - [`DetectionClient`] / [`DetectionClientConfig`] — configuration
|
||||
//! and entry-point. Build a config, hand it to
|
||||
//! [`DetectionClient::new`], then start the supervisor with
|
||||
//! [`DetectionClient::run`].
|
||||
//! - [`DetectionClientHandle`] — the cheap-clone handle returned
|
||||
//! alongside the supervisor `JoinHandle`. Exposes the event stream,
|
||||
//! health surface, connection state, and shutdown.
|
||||
//! - [`DetectionEvent`] — the union type emitted on the event stream
|
||||
//! (a `tokio::sync::broadcast` channel so multiple consumers may
|
||||
//! observe). Covers normal detection batches plus AZ-661 schema
|
||||
//! mismatches, model-version changes, and Tier-1 latency
|
||||
//! degradation transitions.
|
||||
//!
|
||||
//! The supervisor task lives in [`internal::runtime`]. It is the
|
||||
//! only owner of the gRPC channel; reconnects are bounded and the
|
||||
//! frame-source side never blocks on a slow gRPC server (drop-oldest
|
||||
//! budgeting per AC-3 of AZ-660).
|
||||
|
||||
use shared::error::{AutopilotError, Result};
|
||||
use shared::health::ComponentHealth;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::sync::{broadcast, watch};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
use shared::health::{ComponentHealth, HealthLevel};
|
||||
use shared::models::detection::DetectionBatch;
|
||||
use shared::models::frame::Frame;
|
||||
|
||||
pub mod internal;
|
||||
|
||||
pub use internal::latency::DegradationTransition;
|
||||
pub use internal::stats::DetectionStats;
|
||||
|
||||
const NAME: &str = "detection_client";
|
||||
|
||||
/// Configuration for [`DetectionClient`]. Defaults match the
|
||||
/// `description.md §3` baseline (`max_concurrent_in_flight = 2`,
|
||||
/// 100 ms p99 Tier-1 threshold, 1 s → 30 s reconnect backoff,
|
||||
/// `expected_schema_version = 1`).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DetectionClient {
|
||||
pub struct DetectionClientConfig {
|
||||
pub endpoint: String,
|
||||
/// In-flight gRPC request budget. New frames evict the oldest
|
||||
/// in-flight slot when this is reached (AC-3 of AZ-660).
|
||||
pub max_concurrent_in_flight: usize,
|
||||
pub connect_timeout: Duration,
|
||||
pub reconnect_initial: Duration,
|
||||
pub reconnect_cap: Duration,
|
||||
/// Schema version the client was built against. Any response
|
||||
/// with a different `schema_version` is a hard `SchemaMismatch`
|
||||
/// (AC-1 of AZ-661).
|
||||
pub expected_schema_version: u32,
|
||||
/// Capacity of the outbound mpsc channel that feeds the gRPC
|
||||
/// stream. Kept small so frames can't queue indefinitely on the
|
||||
/// client side.
|
||||
pub outbound_buffer: usize,
|
||||
/// Capacity of the `events_tx` broadcast channel.
|
||||
pub event_channel_capacity: usize,
|
||||
/// Capacity of the sliding-window latency ring buffer (AZ-661).
|
||||
pub latency_window_capacity: usize,
|
||||
/// Tier-1 latency threshold (AC-3 of AZ-661). A `Tier1Degraded`
|
||||
/// event is emitted when the sliding-window p99 crosses this
|
||||
/// value; a `Tier1Recovered` event is emitted on the reverse
|
||||
/// crossing.
|
||||
pub latency_p99_threshold: Duration,
|
||||
}
|
||||
|
||||
impl DetectionClient {
|
||||
pub fn new(endpoint: String) -> Self {
|
||||
Self { endpoint }
|
||||
}
|
||||
|
||||
pub fn handle(&self) -> DetectionClientHandle {
|
||||
DetectionClientHandle {
|
||||
endpoint: self.endpoint.clone(),
|
||||
impl DetectionClientConfig {
|
||||
pub fn new(endpoint: impl Into<String>) -> Self {
|
||||
Self {
|
||||
endpoint: endpoint.into(),
|
||||
max_concurrent_in_flight: 2,
|
||||
connect_timeout: Duration::from_secs(5),
|
||||
reconnect_initial: Duration::from_secs(1),
|
||||
reconnect_cap: Duration::from_secs(30),
|
||||
expected_schema_version: 1,
|
||||
outbound_buffer: 8,
|
||||
event_channel_capacity: 64,
|
||||
latency_window_capacity: 1024,
|
||||
latency_p99_threshold: Duration::from_millis(100),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ConnectionState {
|
||||
Disconnected,
|
||||
Connecting,
|
||||
Connected,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum DetectionEvent {
|
||||
/// Normal happy-path output. `capture_ts_monotonic_ns` is the
|
||||
/// frame's monotonic timestamp at the moment `frame_ingest`
|
||||
/// captured it (forwarded so downstream consumers can correlate
|
||||
/// detections back to the original frame without re-querying
|
||||
/// `frame_ingest`). `server_latency` is the server-reported
|
||||
/// per-frame processing time.
|
||||
Batch {
|
||||
batch: DetectionBatch,
|
||||
capture_ts_monotonic_ns: u64,
|
||||
server_latency: Duration,
|
||||
},
|
||||
/// AZ-661 AC-1 — `schema_version` on a response did not match
|
||||
/// `DetectionClientConfig::expected_schema_version`. The
|
||||
/// response is REJECTED — no detections are forwarded for that
|
||||
/// frame.
|
||||
SchemaMismatch {
|
||||
detail: String,
|
||||
frame_seq: u64,
|
||||
},
|
||||
/// AZ-661 AC-2 — server reported a `model_version` different
|
||||
/// from the last observed one. `previous` is `None` only on the
|
||||
/// very first response in the process lifetime.
|
||||
ModelVersionChanged {
|
||||
previous: Option<String>,
|
||||
current: String,
|
||||
},
|
||||
/// AZ-661 AC-3 — sliding-window p99 latency crossed the
|
||||
/// configured threshold UPWARDS. The next degraded → healthy
|
||||
/// crossing emits a paired [`DetectionEvent::Tier1Recovered`].
|
||||
Tier1Degraded {
|
||||
reason: Tier1DegradationReason,
|
||||
},
|
||||
Tier1Recovered,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Tier1DegradationReason {
|
||||
HighLatency,
|
||||
}
|
||||
|
||||
/// Entry-point for the gRPC client. `new` is a builder; `run`
|
||||
/// consumes the client and spawns the supervisor task that owns the
|
||||
/// gRPC channel for the lifetime of the autopilot process.
|
||||
#[derive(Debug)]
|
||||
pub struct DetectionClient {
|
||||
config: DetectionClientConfig,
|
||||
}
|
||||
|
||||
impl DetectionClient {
|
||||
pub fn new(config: DetectionClientConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Spawn the supervisor task. Returns the supervisor's
|
||||
/// `JoinHandle<()>` and a cheap-clone [`DetectionClientHandle`]
|
||||
/// that exposes the event stream, health surface, and
|
||||
/// shutdown.
|
||||
///
|
||||
/// The supervisor owns `frame_rx` for its full lifetime.
|
||||
/// `frame_rx` is a `tokio::sync::broadcast::Receiver<Frame>` —
|
||||
/// the composition root is responsible for wiring it to
|
||||
/// `frame_ingest::FrameIngestHandle::subscribe()` (raw) or to
|
||||
/// a `FrameReceiver` forwarder if it wants per-consumer drop
|
||||
/// attribution on the publisher side.
|
||||
pub fn run(
|
||||
self,
|
||||
frame_rx: broadcast::Receiver<Frame>,
|
||||
) -> (JoinHandle<()>, DetectionClientHandle) {
|
||||
let (events_tx, _) = broadcast::channel(self.config.event_channel_capacity.max(1));
|
||||
let (connection_tx, connection_rx) = watch::channel(ConnectionState::Disconnected);
|
||||
let (shutdown_tx, shutdown_rx) = watch::channel(false);
|
||||
let stats = DetectionStats::shared();
|
||||
let latency = Arc::new(internal::latency::LatencyWindow::with_capacity(
|
||||
self.config.latency_p99_threshold,
|
||||
self.config.latency_window_capacity,
|
||||
));
|
||||
|
||||
let join = internal::runtime::spawn_supervisor(
|
||||
self.config.clone(),
|
||||
frame_rx,
|
||||
events_tx.clone(),
|
||||
Arc::clone(&stats),
|
||||
Arc::clone(&latency),
|
||||
connection_tx,
|
||||
shutdown_rx,
|
||||
);
|
||||
|
||||
let handle = DetectionClientHandle {
|
||||
stats,
|
||||
latency,
|
||||
connection_state_rx: connection_rx,
|
||||
events_tx,
|
||||
shutdown_tx,
|
||||
};
|
||||
|
||||
(join, handle)
|
||||
}
|
||||
}
|
||||
|
||||
/// Cheap-clone handle for the `DetectionClient` supervisor. Exposes:
|
||||
/// - Event subscription via [`Self::subscribe_events`].
|
||||
/// - Connection-state watch via [`Self::connection_state`] /
|
||||
/// [`Self::connection_state_stream`].
|
||||
/// - Health surface (`description.md §3`) via [`Self::health`].
|
||||
/// - Shutdown via [`Self::shutdown`] (idempotent).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DetectionClientHandle {
|
||||
#[allow(dead_code)]
|
||||
endpoint: String,
|
||||
stats: Arc<DetectionStats>,
|
||||
latency: Arc<internal::latency::LatencyWindow>,
|
||||
connection_state_rx: watch::Receiver<ConnectionState>,
|
||||
events_tx: broadcast::Sender<DetectionEvent>,
|
||||
shutdown_tx: watch::Sender<bool>,
|
||||
}
|
||||
|
||||
impl DetectionClientHandle {
|
||||
pub async fn request(&self, _frame: Frame) -> Result<DetectionBatch> {
|
||||
Err(AutopilotError::NotImplemented(
|
||||
"detection_client::request (AZ-660)",
|
||||
))
|
||||
/// Subscribe to the [`DetectionEvent`] stream. The broadcast
|
||||
/// channel applies its own drop-oldest back-pressure to slow
|
||||
/// consumers; new subscribers see events emitted after they
|
||||
/// subscribed.
|
||||
pub fn subscribe_events(&self) -> broadcast::Receiver<DetectionEvent> {
|
||||
self.events_tx.subscribe()
|
||||
}
|
||||
|
||||
pub fn connection_state(&self) -> ConnectionState {
|
||||
*self.connection_state_rx.borrow()
|
||||
}
|
||||
|
||||
pub fn connection_state_stream(&self) -> watch::Receiver<ConnectionState> {
|
||||
self.connection_state_rx.clone()
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> Arc<DetectionStats> {
|
||||
Arc::clone(&self.stats)
|
||||
}
|
||||
|
||||
pub fn latency_p50(&self) -> Option<Duration> {
|
||||
self.latency.p50()
|
||||
}
|
||||
|
||||
pub fn latency_p99(&self) -> Option<Duration> {
|
||||
self.latency.p99()
|
||||
}
|
||||
|
||||
pub fn shutdown(&self) {
|
||||
self.shutdown_tx.send_replace(true);
|
||||
}
|
||||
|
||||
pub fn health(&self) -> ComponentHealth {
|
||||
ComponentHealth::disabled(NAME)
|
||||
let state = self.connection_state();
|
||||
match state {
|
||||
ConnectionState::Disconnected => ComponentHealth::red(NAME, "disconnected"),
|
||||
ConnectionState::Connecting => ComponentHealth::yellow(NAME, "connecting"),
|
||||
ConnectionState::Connected => {
|
||||
// `description.md §3` — p99 above threshold is the
|
||||
// operative health signal once we're connected.
|
||||
let mut h = ComponentHealth::green(NAME);
|
||||
if let Some(p99) = self.latency.p99() {
|
||||
if p99 > self.latency.threshold() {
|
||||
h.level = HealthLevel::Yellow;
|
||||
h.detail = Some(format!(
|
||||
"p99 {} ms > threshold {} ms",
|
||||
p99.as_millis(),
|
||||
self.latency.threshold().as_millis()
|
||||
));
|
||||
}
|
||||
}
|
||||
h
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,8 +277,14 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn it_compiles() {
|
||||
let h = DetectionClient::new("http://127.0.0.1:50051".into()).handle();
|
||||
assert_eq!(h.health().level, shared::health::HealthLevel::Disabled);
|
||||
fn config_defaults_match_description() {
|
||||
// Arrange
|
||||
let c = DetectionClientConfig::new("http://127.0.0.1:50051");
|
||||
|
||||
// Assert — the §3 baseline numbers.
|
||||
assert_eq!(c.max_concurrent_in_flight, 2);
|
||||
assert_eq!(c.reconnect_cap, Duration::from_secs(30));
|
||||
assert_eq!(c.expected_schema_version, 1);
|
||||
assert_eq!(c.latency_p99_threshold, Duration::from_millis(100));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user