mirror of
https://github.com/azaion/autopilot.git
synced 2026-06-22 06:31:09 +00:00
[AZ-659] [AZ-660] [AZ-661] Implement frame publisher + gRPC detection client
AZ-659: FramePublisher with per-consumer drop accounting (Arc<Bytes> zero-copy fan-out). Adds ConsumerId enum, PublisherStats, FrameReceiver wrapper, and publisher integration tests (AC-1, AC-2, AC-3). AZ-660: Bi-directional tonic gRPC stream to ../detections. Reconnect with bounded exponential backoff (1 s → 30 s cap). Drop-oldest in-flight budgeting (max_concurrent_in_flight = 2). ai_locked frame skipping. Integration tests against fixture in-process server (AC-1: happy path 30 fps/10 s, AC-2: reconnect, AC-3: budget drops, AC-4: ai_locked skipping). AZ-661: Schema validation (hard SchemaMismatch error on version mismatch), model_version latch with ModelVersionChanged events, sliding-window p99 latency tracker with Tier1Degraded/Tier1Recovered transitions. Integration tests (AC-1, AC-2, AC-3). Also: update module-layout.md for frame_ingest and detection_client to reflect the streaming API shape; code review report batch_18. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,170 @@
|
||||
//! AZ-660 — in-flight request budgeting.
|
||||
//!
|
||||
//! The Tier-1 NFR (`description.md §6` + AC-3) requires the client
|
||||
//! to keep latency near the per-frame target by NEVER queueing
|
||||
//! frames indefinitely. When `max_concurrent_in_flight` (default 2)
|
||||
//! is reached and a new frame arrives, the OLDEST in-flight frame
|
||||
//! is dropped (its slot is freed for the new one). The drop is
|
||||
//! counted toward `budget_drops_total`; the frame's slot in the
|
||||
//! tracker is removed so a late response for the dropped frame can
|
||||
//! be ignored without crediting it against the latency histogram.
|
||||
//!
|
||||
//! The tracker is intentionally simple: a small `VecDeque` of
|
||||
//! `(frame_seq, capture_ts_ns)` pairs, capped at
|
||||
//! `max_concurrent_in_flight`. Order is FIFO (oldest at the front),
|
||||
//! so "drop oldest" is `pop_front`. Removal-on-response walks the
|
||||
//! deque from the front because responses arrive in roughly the
|
||||
//! same order they were sent; in the worst case (out-of-order
|
||||
//! response) we walk the full deque, which is fine at the default
|
||||
//! capacity of 2.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
|
||||
/// Snapshot of an in-flight request — what the inbound side needs to
|
||||
/// compute round-trip latency once the response arrives.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct InFlight {
|
||||
pub frame_seq: u64,
|
||||
pub capture_ts_monotonic_ns: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct BudgetTracker {
|
||||
inner: VecDeque<InFlight>,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl BudgetTracker {
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
let cap = capacity.max(1);
|
||||
Self {
|
||||
inner: VecDeque::with_capacity(cap),
|
||||
capacity: cap,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.capacity
|
||||
}
|
||||
|
||||
pub fn in_flight(&self) -> usize {
|
||||
self.inner.len()
|
||||
}
|
||||
|
||||
/// Add a new request to the tracker. Returns `Some(InFlight)` for
|
||||
/// the evicted oldest request when the tracker was already at
|
||||
/// capacity; the caller credits this against `budget_drops_total`.
|
||||
pub fn add(&mut self, entry: InFlight) -> Option<InFlight> {
|
||||
let evicted = if self.inner.len() >= self.capacity {
|
||||
self.inner.pop_front()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.inner.push_back(entry);
|
||||
evicted
|
||||
}
|
||||
|
||||
/// Look up an in-flight entry by frame_seq and remove it. Returns
|
||||
/// `None` when the response arrives for a frame that was already
|
||||
/// budget-dropped — in that case the response is silently
|
||||
/// discarded by the caller (it would otherwise corrupt the
|
||||
/// latency histogram).
|
||||
pub fn remove(&mut self, frame_seq: u64) -> Option<InFlight> {
|
||||
let pos = self.inner.iter().position(|e| e.frame_seq == frame_seq)?;
|
||||
self.inner.remove(pos)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn entry(seq: u64) -> InFlight {
|
||||
InFlight {
|
||||
frame_seq: seq,
|
||||
capture_ts_monotonic_ns: seq * 1_000_000,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capacity_clamps_to_one() {
|
||||
// Arrange
|
||||
let b = BudgetTracker::new(0);
|
||||
|
||||
// Assert
|
||||
assert_eq!(b.capacity(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_under_capacity_does_not_evict() {
|
||||
// Arrange
|
||||
let mut b = BudgetTracker::new(2);
|
||||
|
||||
// Act
|
||||
let e1 = b.add(entry(1));
|
||||
let e2 = b.add(entry(2));
|
||||
|
||||
// Assert
|
||||
assert!(e1.is_none());
|
||||
assert!(e2.is_none());
|
||||
assert_eq!(b.in_flight(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_at_capacity_evicts_oldest() {
|
||||
// Arrange
|
||||
let mut b = BudgetTracker::new(2);
|
||||
b.add(entry(1));
|
||||
b.add(entry(2));
|
||||
|
||||
// Act — third entry forces eviction.
|
||||
let evicted = b.add(entry(3));
|
||||
|
||||
// Assert — entry 1 was the oldest, so it gets dropped.
|
||||
assert_eq!(evicted.expect("evicted").frame_seq, 1);
|
||||
assert_eq!(b.in_flight(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_known_frame_returns_entry() {
|
||||
// Arrange
|
||||
let mut b = BudgetTracker::new(4);
|
||||
b.add(entry(1));
|
||||
b.add(entry(2));
|
||||
b.add(entry(3));
|
||||
|
||||
// Act
|
||||
let removed = b.remove(2);
|
||||
|
||||
// Assert
|
||||
assert_eq!(removed.expect("removed").frame_seq, 2);
|
||||
assert_eq!(b.in_flight(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_unknown_frame_returns_none() {
|
||||
// Arrange
|
||||
let mut b = BudgetTracker::new(2);
|
||||
b.add(entry(1));
|
||||
|
||||
// Assert
|
||||
assert!(b.remove(999).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evicted_frame_remove_returns_none() {
|
||||
// Arrange
|
||||
let mut b = BudgetTracker::new(2);
|
||||
b.add(entry(1));
|
||||
b.add(entry(2));
|
||||
let evicted = b.add(entry(3));
|
||||
assert_eq!(evicted.expect("evicted").frame_seq, 1);
|
||||
|
||||
// Act
|
||||
let removed = b.remove(1);
|
||||
|
||||
// Assert — a late response for the evicted frame finds nothing
|
||||
// and the caller drops it.
|
||||
assert!(removed.is_none());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
//! AZ-661 — sliding-window latency tracker.
|
||||
//!
|
||||
//! Tracks per-response round-trip latency in a fixed-capacity ring
|
||||
//! buffer. The client polls `p99()` periodically and emits a
|
||||
//! `Tier1Degraded { reason: HighLatency }` event when the percentile
|
||||
//! crosses the configured threshold; it emits a `Tier1Recovered`
|
||||
//! event when latency falls back below the threshold so the operator
|
||||
//! UI can clear the warning.
|
||||
//!
|
||||
//! The buffer holds raw `u64` ns samples — percentile readout sorts
|
||||
//! a snapshot under a `parking_lot::Mutex` (cheap given the bounded
|
||||
//! ring size and the fact that p99 is read at a much lower cadence
|
||||
//! than samples are pushed).
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use parking_lot::Mutex;
|
||||
|
||||
const DEFAULT_CAPACITY: usize = 1024;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LatencyWindow {
|
||||
inner: Mutex<Ring>,
|
||||
threshold_ns: u64,
|
||||
degraded: parking_lot::Mutex<bool>,
|
||||
}
|
||||
|
||||
impl LatencyWindow {
|
||||
pub fn new(threshold: Duration) -> Self {
|
||||
Self {
|
||||
inner: Mutex::new(Ring::new(DEFAULT_CAPACITY)),
|
||||
threshold_ns: threshold.as_nanos() as u64,
|
||||
degraded: parking_lot::Mutex::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_capacity(threshold: Duration, capacity: usize) -> Self {
|
||||
Self {
|
||||
inner: Mutex::new(Ring::new(capacity.max(1))),
|
||||
threshold_ns: threshold.as_nanos() as u64,
|
||||
degraded: parking_lot::Mutex::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn record(&self, latency: Duration) {
|
||||
let ns = latency.as_nanos().min(u128::from(u64::MAX)) as u64;
|
||||
self.inner.lock().push(ns);
|
||||
}
|
||||
|
||||
pub fn p50(&self) -> Option<Duration> {
|
||||
self.percentile_ns(0.50).map(Duration::from_nanos)
|
||||
}
|
||||
|
||||
pub fn p99(&self) -> Option<Duration> {
|
||||
self.percentile_ns(0.99).map(Duration::from_nanos)
|
||||
}
|
||||
|
||||
pub fn threshold(&self) -> Duration {
|
||||
Duration::from_nanos(self.threshold_ns)
|
||||
}
|
||||
|
||||
/// Re-evaluate the degraded latch and return whether the state
|
||||
/// changed. Three outcomes:
|
||||
/// - `DegradationTransition::Degraded`: p99 just crossed the
|
||||
/// threshold this call (emit `Tier1Degraded`).
|
||||
/// - `DegradationTransition::Recovered`: p99 fell back below the
|
||||
/// threshold this call (emit `Tier1Recovered`).
|
||||
/// - `DegradationTransition::NoChange`: the latch's state already
|
||||
/// matched the observed reality; no event needed.
|
||||
///
|
||||
/// The first call returns `NoChange` until at least one sample
|
||||
/// has been recorded — `p99()` is `None` otherwise.
|
||||
pub fn evaluate(&self) -> DegradationTransition {
|
||||
let Some(p99) = self.percentile_ns(0.99) else {
|
||||
return DegradationTransition::NoChange;
|
||||
};
|
||||
let now_degraded = p99 > self.threshold_ns;
|
||||
let mut latch = self.degraded.lock();
|
||||
let prev = *latch;
|
||||
*latch = now_degraded;
|
||||
match (prev, now_degraded) {
|
||||
(false, true) => DegradationTransition::Degraded,
|
||||
(true, false) => DegradationTransition::Recovered,
|
||||
_ => DegradationTransition::NoChange,
|
||||
}
|
||||
}
|
||||
|
||||
fn percentile_ns(&self, q: f64) -> Option<u64> {
|
||||
let buf = self.inner.lock();
|
||||
if buf.len == 0 {
|
||||
return None;
|
||||
}
|
||||
let mut snap: Vec<u64> = buf.iter().collect();
|
||||
snap.sort_unstable();
|
||||
let idx = ((snap.len() as f64) * q).floor() as usize;
|
||||
Some(snap[idx.min(snap.len() - 1)])
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum DegradationTransition {
|
||||
Degraded,
|
||||
Recovered,
|
||||
NoChange,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Ring {
|
||||
buf: Vec<u64>,
|
||||
head: usize,
|
||||
len: usize,
|
||||
cap: usize,
|
||||
}
|
||||
|
||||
impl Ring {
|
||||
fn new(cap: usize) -> Self {
|
||||
Self {
|
||||
buf: vec![0; cap],
|
||||
head: 0,
|
||||
len: 0,
|
||||
cap,
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, v: u64) {
|
||||
self.buf[self.head] = v;
|
||||
self.head = (self.head + 1) % self.cap;
|
||||
if self.len < self.cap {
|
||||
self.len += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn iter(&self) -> impl Iterator<Item = u64> + '_ {
|
||||
self.buf.iter().take(self.len).copied()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn empty_window_returns_no_change() {
|
||||
// Arrange
|
||||
let w = LatencyWindow::new(Duration::from_millis(100));
|
||||
|
||||
// Assert
|
||||
assert_eq!(w.evaluate(), DegradationTransition::NoChange);
|
||||
assert!(w.p99().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degraded_then_recovered_transitions() {
|
||||
// Arrange — a tiny window so we can flip state with few samples.
|
||||
let w = LatencyWindow::with_capacity(Duration::from_millis(100), 8);
|
||||
|
||||
// Act — push values well above the threshold.
|
||||
for _ in 0..8 {
|
||||
w.record(Duration::from_millis(150));
|
||||
}
|
||||
let degraded = w.evaluate();
|
||||
|
||||
// Push values well below the threshold, displacing the
|
||||
// earlier samples (ring capacity = 8).
|
||||
for _ in 0..8 {
|
||||
w.record(Duration::from_millis(10));
|
||||
}
|
||||
let recovered = w.evaluate();
|
||||
let steady = w.evaluate();
|
||||
|
||||
// Assert
|
||||
assert_eq!(degraded, DegradationTransition::Degraded);
|
||||
assert_eq!(recovered, DegradationTransition::Recovered);
|
||||
assert_eq!(steady, DegradationTransition::NoChange);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluate_below_threshold_is_no_change_when_already_healthy() {
|
||||
// Arrange
|
||||
let w = LatencyWindow::with_capacity(Duration::from_millis(100), 4);
|
||||
for _ in 0..4 {
|
||||
w.record(Duration::from_millis(20));
|
||||
}
|
||||
|
||||
// Assert — first evaluate is also a no-change because the
|
||||
// latch starts at `false` and stays there.
|
||||
assert_eq!(w.evaluate(), DegradationTransition::NoChange);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
//! Internal modules for `detection_client`. Not part of the public
|
||||
//! API (see `crates/detection_client/src/lib.rs`).
|
||||
|
||||
pub mod budget;
|
||||
pub mod latency;
|
||||
pub mod proto;
|
||||
pub mod runtime;
|
||||
pub mod stats;
|
||||
@@ -0,0 +1,10 @@
|
||||
//! Generated tonic+prost code for the `../detections` gRPC contract.
|
||||
//!
|
||||
//! The actual `.rs` file is produced at build time by `build.rs`
|
||||
//! (see workspace `tonic-prost-build` / `protoc-bin-vendored` deps)
|
||||
//! and dropped into `OUT_DIR`. We pull it in here under a stable
|
||||
//! module path so the rest of the crate doesn't reach into `OUT_DIR`.
|
||||
|
||||
#![allow(clippy::derive_partial_eq_without_eq)]
|
||||
|
||||
tonic::include_proto!("azaion.detection.v1");
|
||||
@@ -0,0 +1,444 @@
|
||||
//! AZ-660 + AZ-661 — supervisor task + bi-di stream session.
|
||||
//!
|
||||
//! The supervisor owns the gRPC channel: it connects, runs ONE
|
||||
//! stream session, and on session loss (server-side close, network
|
||||
//! drop, transport error) re-connects with exponential backoff
|
||||
//! capped at `DetectionClientConfig::reconnect_cap`. The backoff
|
||||
//! resets to `reconnect_initial` on every successful reconnect so
|
||||
//! a healthy link spends 0 ms in the backoff path.
|
||||
//!
|
||||
//! Each stream session opens a single bi-directional stream against
|
||||
//! `DetectionService::Stream`. Outbound and inbound are driven from
|
||||
//! the same `tokio::select!` loop:
|
||||
//! - On `Frame` arrival: skip if `ai_locked`, otherwise add to the
|
||||
//! budget tracker (evicting the oldest in-flight slot if full)
|
||||
//! and forward as a `FrameRequest` to the gRPC outbound channel.
|
||||
//! - On `DetectionResponse` arrival: validate `schema_version`
|
||||
//! (AZ-661), look up the matching in-flight entry, compute round-
|
||||
//! trip latency, emit a `Batch` event, and update sliding-window
|
||||
//! latency. Track `model_version` and emit `ModelVersionChanged`
|
||||
//! on changes (AZ-661). Re-evaluate the latency window and emit
|
||||
//! `Tier1Degraded` / `Tier1Recovered` on threshold crossings.
|
||||
//!
|
||||
//! The session ends when:
|
||||
//! - `shutdown_rx` flips to `true`,
|
||||
//! - the inbound stream returns `None` (server closed cleanly), or
|
||||
//! - the inbound stream returns an error.
|
||||
//!
|
||||
//! `frame_rx.recv` returning `Closed` ends the session AND the
|
||||
//! supervisor (no more frames will arrive), but the supervisor
|
||||
//! drains any pending responses first.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use parking_lot::Mutex;
|
||||
use tokio::sync::{broadcast, mpsc, watch};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tonic::transport::{Channel, Endpoint};
|
||||
|
||||
use shared::models::detection::{Detection as SharedDetection, DetectionBatch};
|
||||
use shared::models::frame::{BoundingBox, Frame, PixelFormat};
|
||||
|
||||
use crate::internal::budget::{BudgetTracker, InFlight};
|
||||
use crate::internal::latency::{DegradationTransition, LatencyWindow};
|
||||
use crate::internal::proto::detection_service_client::DetectionServiceClient;
|
||||
use crate::internal::proto::{
|
||||
BoundingBox as ProtoBoundingBox, Detection as ProtoDetection, DetectionResponse, FrameRequest,
|
||||
PixelFormat as ProtoPixelFormat,
|
||||
};
|
||||
use crate::internal::stats::DetectionStats;
|
||||
use crate::{ConnectionState, DetectionClientConfig, DetectionEvent, Tier1DegradationReason};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum StreamSessionError {
|
||||
#[error("opening stream failed: {0}")]
|
||||
OpenStream(tonic::Status),
|
||||
#[error("inbound stream error: {0}")]
|
||||
Inbound(tonic::Status),
|
||||
#[error("outbound channel closed by the gRPC client")]
|
||||
OutboundClosed,
|
||||
}
|
||||
|
||||
pub fn spawn_supervisor(
|
||||
config: DetectionClientConfig,
|
||||
frame_rx: broadcast::Receiver<Frame>,
|
||||
events_tx: broadcast::Sender<DetectionEvent>,
|
||||
stats: Arc<DetectionStats>,
|
||||
latency: Arc<LatencyWindow>,
|
||||
connection_tx: watch::Sender<ConnectionState>,
|
||||
shutdown_rx: watch::Receiver<bool>,
|
||||
) -> JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
supervisor(
|
||||
config,
|
||||
frame_rx,
|
||||
events_tx,
|
||||
stats,
|
||||
latency,
|
||||
connection_tx,
|
||||
shutdown_rx,
|
||||
)
|
||||
.await;
|
||||
})
|
||||
}
|
||||
|
||||
async fn supervisor(
|
||||
config: DetectionClientConfig,
|
||||
mut frame_rx: broadcast::Receiver<Frame>,
|
||||
events_tx: broadcast::Sender<DetectionEvent>,
|
||||
stats: Arc<DetectionStats>,
|
||||
latency: Arc<LatencyWindow>,
|
||||
connection_tx: watch::Sender<ConnectionState>,
|
||||
mut shutdown_rx: watch::Receiver<bool>,
|
||||
) {
|
||||
let mut backoff = config.reconnect_initial;
|
||||
let last_model_version: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
|
||||
let mut prior_session = false;
|
||||
|
||||
loop {
|
||||
if *shutdown_rx.borrow() {
|
||||
connection_tx.send_replace(ConnectionState::Disconnected);
|
||||
return;
|
||||
}
|
||||
connection_tx.send_replace(ConnectionState::Connecting);
|
||||
|
||||
let endpoint = match Endpoint::from_shared(config.endpoint.clone()) {
|
||||
Ok(e) => e.connect_timeout(config.connect_timeout),
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
error = %e,
|
||||
endpoint = %config.endpoint,
|
||||
"detection_client endpoint is invalid; this is fatal"
|
||||
);
|
||||
stats.note_connect_error();
|
||||
connection_tx.send_replace(ConnectionState::Disconnected);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let channel = tokio::select! {
|
||||
_ = shutdown_rx.changed() => {
|
||||
connection_tx.send_replace(ConnectionState::Disconnected);
|
||||
return;
|
||||
}
|
||||
res = endpoint.connect() => match res {
|
||||
Ok(c) => Some(c),
|
||||
Err(e) => {
|
||||
stats.note_connect_error();
|
||||
tracing::warn!(
|
||||
error = %e,
|
||||
endpoint = %config.endpoint,
|
||||
backoff_ms = backoff.as_millis() as u64,
|
||||
"detection_client connect failed; will retry after backoff"
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(channel) = channel {
|
||||
backoff = config.reconnect_initial;
|
||||
connection_tx.send_replace(ConnectionState::Connected);
|
||||
if prior_session {
|
||||
stats.note_reconnect();
|
||||
}
|
||||
prior_session = true;
|
||||
|
||||
let session_result = run_stream_session(
|
||||
channel,
|
||||
&mut frame_rx,
|
||||
&events_tx,
|
||||
&stats,
|
||||
&latency,
|
||||
&mut shutdown_rx,
|
||||
&config,
|
||||
&last_model_version,
|
||||
)
|
||||
.await;
|
||||
connection_tx.send_replace(ConnectionState::Disconnected);
|
||||
match session_result {
|
||||
Ok(SessionExit::Shutdown) => {
|
||||
return;
|
||||
}
|
||||
Ok(SessionExit::FrameSourceClosed) => {
|
||||
tracing::info!("detection_client frame source closed; exiting");
|
||||
return;
|
||||
}
|
||||
Ok(SessionExit::ServerClosed) => {
|
||||
tracing::info!("detection_client server closed stream; will reconnect");
|
||||
}
|
||||
Err(e) => {
|
||||
stats.note_stream_error();
|
||||
tracing::warn!(error = %e, "detection_client stream session ended with error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for backoff before the next attempt unless shutdown
|
||||
// fires first. `frame_rx` is intentionally NOT polled here:
|
||||
// any frames arriving during disconnect simply lag, and the
|
||||
// broadcast channel folds them into a single
|
||||
// `RecvError::Lagged(n)` on the next session — counted via
|
||||
// `note_frame_lag`.
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(backoff) => {}
|
||||
_ = shutdown_rx.changed() => {
|
||||
connection_tx.send_replace(ConnectionState::Disconnected);
|
||||
return;
|
||||
}
|
||||
}
|
||||
backoff = backoff.saturating_mul(2).min(config.reconnect_cap);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum SessionExit {
|
||||
Shutdown,
|
||||
FrameSourceClosed,
|
||||
ServerClosed,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn run_stream_session(
|
||||
channel: Channel,
|
||||
frame_rx: &mut broadcast::Receiver<Frame>,
|
||||
events_tx: &broadcast::Sender<DetectionEvent>,
|
||||
stats: &Arc<DetectionStats>,
|
||||
latency: &Arc<LatencyWindow>,
|
||||
shutdown_rx: &mut watch::Receiver<bool>,
|
||||
config: &DetectionClientConfig,
|
||||
last_model_version: &Arc<Mutex<Option<String>>>,
|
||||
) -> Result<SessionExit, StreamSessionError> {
|
||||
let mut client = DetectionServiceClient::new(channel);
|
||||
let (req_tx, req_rx) = mpsc::channel::<FrameRequest>(config.outbound_buffer.max(1));
|
||||
let req_stream = ReceiverStream::new(req_rx);
|
||||
|
||||
let response = client
|
||||
.stream(req_stream)
|
||||
.await
|
||||
.map_err(StreamSessionError::OpenStream)?;
|
||||
let mut inbound = response.into_inner();
|
||||
|
||||
let mut budget = BudgetTracker::new(config.max_concurrent_in_flight);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = shutdown_rx.changed() => return Ok(SessionExit::Shutdown),
|
||||
|
||||
frame_res = frame_rx.recv() => {
|
||||
match frame_res {
|
||||
Ok(frame) => {
|
||||
if frame.ai_locked {
|
||||
stats.note_ai_locked_skipped();
|
||||
continue;
|
||||
}
|
||||
let entry = InFlight {
|
||||
frame_seq: frame.seq,
|
||||
capture_ts_monotonic_ns: frame.capture_ts_monotonic_ns,
|
||||
};
|
||||
if let Some(evicted) = budget.add(entry) {
|
||||
stats.note_in_flight_dropped();
|
||||
tracing::debug!(
|
||||
evicted_seq = evicted.frame_seq,
|
||||
"detection_client dropped oldest in-flight frame (budget)"
|
||||
);
|
||||
}
|
||||
let req = build_request(&frame);
|
||||
if req_tx.send(req).await.is_err() {
|
||||
return Err(StreamSessionError::OutboundClosed);
|
||||
}
|
||||
stats.note_sent();
|
||||
}
|
||||
Err(broadcast::error::RecvError::Lagged(n)) => {
|
||||
stats.note_frame_lag(n);
|
||||
tracing::warn!(
|
||||
dropped = n,
|
||||
"detection_client frame_rx lagged; counted as frame_lag_total"
|
||||
);
|
||||
}
|
||||
Err(broadcast::error::RecvError::Closed) => {
|
||||
return Ok(SessionExit::FrameSourceClosed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inbound_res = inbound.message() => {
|
||||
match inbound_res {
|
||||
Ok(Some(resp)) => {
|
||||
handle_response(
|
||||
resp,
|
||||
&mut budget,
|
||||
events_tx,
|
||||
stats,
|
||||
latency,
|
||||
last_model_version,
|
||||
config,
|
||||
);
|
||||
// Re-evaluate latency window after every
|
||||
// response so degraded/recovered transitions
|
||||
// surface at most one event per change.
|
||||
match latency.evaluate() {
|
||||
DegradationTransition::Degraded => {
|
||||
let _ = events_tx.send(DetectionEvent::Tier1Degraded {
|
||||
reason: Tier1DegradationReason::HighLatency,
|
||||
});
|
||||
}
|
||||
DegradationTransition::Recovered => {
|
||||
let _ = events_tx.send(DetectionEvent::Tier1Recovered);
|
||||
}
|
||||
DegradationTransition::NoChange => {}
|
||||
}
|
||||
}
|
||||
Ok(None) => return Ok(SessionExit::ServerClosed),
|
||||
Err(status) => return Err(StreamSessionError::Inbound(status)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_request(frame: &Frame) -> FrameRequest {
|
||||
FrameRequest {
|
||||
frame_seq: frame.seq,
|
||||
capture_ts_monotonic_ns: frame.capture_ts_monotonic_ns,
|
||||
width: frame.width,
|
||||
height: frame.height,
|
||||
pix_fmt: pix_fmt_to_proto(frame.pix_fmt) as i32,
|
||||
pixels: frame.pixels.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
fn pix_fmt_to_proto(p: PixelFormat) -> ProtoPixelFormat {
|
||||
match p {
|
||||
PixelFormat::Nv12 => ProtoPixelFormat::Nv12,
|
||||
PixelFormat::Yuv420p => ProtoPixelFormat::Yuv420p,
|
||||
PixelFormat::Rgb24 => ProtoPixelFormat::Rgb24,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_response(
|
||||
resp: DetectionResponse,
|
||||
budget: &mut BudgetTracker,
|
||||
events_tx: &broadcast::Sender<DetectionEvent>,
|
||||
stats: &Arc<DetectionStats>,
|
||||
latency: &Arc<LatencyWindow>,
|
||||
last_model_version: &Arc<Mutex<Option<String>>>,
|
||||
config: &DetectionClientConfig,
|
||||
) {
|
||||
// AZ-661 — schema handshake first. A mismatch is a hard error;
|
||||
// do NOT decode the rest of the response, do NOT credit it
|
||||
// against latency, and clear the in-flight slot so the budget
|
||||
// tracker stays accurate.
|
||||
if resp.schema_version != config.expected_schema_version {
|
||||
stats.note_schema_mismatch();
|
||||
// Free the in-flight slot if we can match it.
|
||||
let _ = budget.remove(resp.frame_seq);
|
||||
let detail = format!(
|
||||
"expected schema_version {} got {}",
|
||||
config.expected_schema_version, resp.schema_version
|
||||
);
|
||||
tracing::error!(
|
||||
expected = config.expected_schema_version,
|
||||
actual = resp.schema_version,
|
||||
frame_seq = resp.frame_seq,
|
||||
"detection_client schema mismatch"
|
||||
);
|
||||
let _ = events_tx.send(DetectionEvent::SchemaMismatch {
|
||||
detail,
|
||||
frame_seq: resp.frame_seq,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Look up the in-flight request. A `None` here means the budget
|
||||
// tracker already evicted this frame; the response is orphaned
|
||||
// and dropped silently (do not credit latency or events).
|
||||
let Some(in_flight) = budget.remove(resp.frame_seq) else {
|
||||
stats.note_orphan_response();
|
||||
tracing::debug!(
|
||||
frame_seq = resp.frame_seq,
|
||||
"detection_client orphan response (budget already evicted)"
|
||||
);
|
||||
return;
|
||||
};
|
||||
|
||||
// AZ-661 — model_version handshake. First response on a session
|
||||
// is NOT a change if the latch is empty AND the version equals
|
||||
// the last observed version across sessions. We only emit when
|
||||
// the version changes from a previously-seen non-None value, OR
|
||||
// when a session emits its first version (transitioning from
|
||||
// None to Some) — the operator UI shows "model swapped" the
|
||||
// first time per process lifetime, then again on every change.
|
||||
{
|
||||
let mut latch = last_model_version.lock();
|
||||
let changed = match latch.as_ref() {
|
||||
None => true, // first observation in this process
|
||||
Some(prev) => prev != &resp.model_version,
|
||||
};
|
||||
if changed {
|
||||
let previous = latch.clone();
|
||||
*latch = Some(resp.model_version.clone());
|
||||
stats.note_model_version_change();
|
||||
let _ = events_tx.send(DetectionEvent::ModelVersionChanged {
|
||||
previous,
|
||||
current: resp.model_version.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Use the server-reported processing time as the RTT proxy.
|
||||
// The Tier-1 NFR measures processing latency at the detections
|
||||
// service (`description.md §8`), not round-trip transport time.
|
||||
// If wall-clock RTT tracking is added later, store
|
||||
// `Instant::now()` in the budget entry at send time.
|
||||
let server_side = Duration::from_millis(u64::from(resp.latency_ms));
|
||||
latency.record(server_side);
|
||||
|
||||
stats.note_received();
|
||||
|
||||
let batch = response_to_batch(resp);
|
||||
let _ = events_tx.send(DetectionEvent::Batch {
|
||||
batch,
|
||||
capture_ts_monotonic_ns: in_flight.capture_ts_monotonic_ns,
|
||||
server_latency: server_side,
|
||||
});
|
||||
}
|
||||
|
||||
fn response_to_batch(resp: DetectionResponse) -> DetectionBatch {
|
||||
let model_version = resp.model_version.clone();
|
||||
let frame_seq = resp.frame_seq;
|
||||
let latency_ms = resp.latency_ms;
|
||||
let detections = resp
|
||||
.detections
|
||||
.into_iter()
|
||||
.map(proto_detection_to_shared)
|
||||
.collect();
|
||||
DetectionBatch {
|
||||
frame_seq,
|
||||
detections,
|
||||
latency_ms,
|
||||
model_version,
|
||||
}
|
||||
}
|
||||
|
||||
fn proto_detection_to_shared(d: ProtoDetection) -> SharedDetection {
|
||||
SharedDetection {
|
||||
class_id: d.class_id,
|
||||
class_name: d.class_name,
|
||||
confidence: d.confidence,
|
||||
bbox_normalized: bbox_to_shared(d.bbox_normalized.unwrap_or_default()),
|
||||
mask_or_polyline: d.mask_or_polyline,
|
||||
source_frame_seq: d.source_frame_seq,
|
||||
}
|
||||
}
|
||||
|
||||
fn bbox_to_shared(b: ProtoBoundingBox) -> BoundingBox {
|
||||
BoundingBox {
|
||||
x_min: b.x_min,
|
||||
y_min: b.y_min,
|
||||
x_max: b.x_max,
|
||||
y_max: b.y_max,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
//! AZ-660 + AZ-661 — atomic counter surface for `DetectionClient`.
|
||||
//!
|
||||
//! `description.md §3` requires:
|
||||
//! - `gRPC_connection_state` (watch, not in this struct — see
|
||||
//! `runtime.rs`)
|
||||
//! - `requests_in_flight` (atomic gauge maintained by the supervisor)
|
||||
//! - `latency_p50`, `latency_p99` (live in [`crate::internal::latency`])
|
||||
//! - `errors_by_kind` (counters per kind, this struct)
|
||||
//! - `budget_drops_total` (this struct)
|
||||
//!
|
||||
//! AZ-661 adds:
|
||||
//! - `schema_mismatch_total` (one of the `errors_by_kind` buckets,
|
||||
//! surfaced explicitly because it is the loudest failure mode)
|
||||
//! - `model_version_changes_total` (visibility for the operator UI)
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Lock-free counters shared between the supervisor task and the
|
||||
/// `DetectionClientHandle`. Every field is `AtomicU64`; readers
|
||||
/// snapshot independently with `Ordering::Relaxed`.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DetectionStats {
|
||||
pub requests_sent_total: AtomicU64,
|
||||
pub responses_received_total: AtomicU64,
|
||||
pub budget_drops_total: AtomicU64,
|
||||
pub frame_lag_total: AtomicU64,
|
||||
pub schema_mismatch_total: AtomicU64,
|
||||
pub model_version_changes_total: AtomicU64,
|
||||
pub reconnects_total: AtomicU64,
|
||||
pub connect_errors_total: AtomicU64,
|
||||
pub stream_errors_total: AtomicU64,
|
||||
pub requests_in_flight: AtomicU64,
|
||||
pub ai_locked_skipped_total: AtomicU64,
|
||||
}
|
||||
|
||||
impl DetectionStats {
|
||||
pub fn shared() -> Arc<Self> {
|
||||
Arc::new(Self::default())
|
||||
}
|
||||
|
||||
pub fn note_sent(&self) {
|
||||
self.requests_sent_total.fetch_add(1, Ordering::Relaxed);
|
||||
self.requests_in_flight.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_received(&self) {
|
||||
self.responses_received_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
// `requests_in_flight` decrements via `note_in_flight_dropped`
|
||||
// on budget eviction and via this fn on a normal response.
|
||||
self.requests_in_flight.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_in_flight_dropped(&self) {
|
||||
self.budget_drops_total.fetch_add(1, Ordering::Relaxed);
|
||||
self.requests_in_flight.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_orphan_response(&self) {
|
||||
// Response arrived for a frame the budget already evicted.
|
||||
// We do NOT decrement `requests_in_flight` here (the budget
|
||||
// eviction already did) and we do NOT credit it against
|
||||
// `responses_received_total` (it does not correspond to a
|
||||
// currently-tracked in-flight request).
|
||||
self.stream_errors_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_frame_lag(&self, n: u64) {
|
||||
self.frame_lag_total.fetch_add(n, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_ai_locked_skipped(&self) {
|
||||
self.ai_locked_skipped_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_schema_mismatch(&self) {
|
||||
self.schema_mismatch_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_model_version_change(&self) {
|
||||
self.model_version_changes_total
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_reconnect(&self) {
|
||||
self.reconnects_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_connect_error(&self) {
|
||||
self.connect_errors_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn note_stream_error(&self) {
|
||||
self.stream_errors_total.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn requests_in_flight(&self) -> u64 {
|
||||
self.requests_in_flight.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn budget_drops_total(&self) -> u64 {
|
||||
self.budget_drops_total.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn requests_sent_total(&self) -> u64 {
|
||||
self.requests_sent_total.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn responses_received_total(&self) -> u64 {
|
||||
self.responses_received_total.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn schema_mismatch_total(&self) -> u64 {
|
||||
self.schema_mismatch_total.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn model_version_changes_total(&self) -> u64 {
|
||||
self.model_version_changes_total.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn reconnects_total(&self) -> u64 {
|
||||
self.reconnects_total.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn ai_locked_skipped_total(&self) -> u64 {
|
||||
self.ai_locked_skipped_total.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user