[AZ-659] [AZ-660] [AZ-661] Implement frame publisher + gRPC detection client

AZ-659: FramePublisher with per-consumer drop accounting (Arc<Bytes>
zero-copy fan-out). Adds ConsumerId enum, PublisherStats, FrameReceiver
wrapper, and publisher integration tests (AC-1, AC-2, AC-3).

AZ-660: Bi-directional tonic gRPC stream to ../detections. Reconnect
with bounded exponential backoff (1 s → 30 s cap). Drop-oldest
in-flight budgeting (max_concurrent_in_flight = 2). ai_locked frame
skipping. Integration tests against fixture in-process server
(AC-1: happy path 30 fps/10 s, AC-2: reconnect, AC-3: budget drops,
AC-4: ai_locked skipping).

AZ-661: Schema validation (hard SchemaMismatch error on version
mismatch), model_version latch with ModelVersionChanged events,
sliding-window p99 latency tracker with Tier1Degraded/Tier1Recovered
transitions. Integration tests (AC-1, AC-2, AC-3).

Also: update module-layout.md for frame_ingest and detection_client
to reflect the streaming API shape; code review report batch_18.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-20 18:23:56 +03:00
parent a7df02d434
commit 0854d3be1c
18 changed files with 2738 additions and 55 deletions
@@ -0,0 +1,170 @@
//! AZ-660 — in-flight request budgeting.
//!
//! The Tier-1 NFR (`description.md §6` + AC-3) requires the client
//! to keep latency near the per-frame target by NEVER queueing
//! frames indefinitely. When `max_concurrent_in_flight` (default 2)
//! is reached and a new frame arrives, the OLDEST in-flight frame
//! is dropped (its slot is freed for the new one). The drop is
//! counted toward `budget_drops_total`; the frame's slot in the
//! tracker is removed so a late response for the dropped frame can
//! be ignored without crediting it against the latency histogram.
//!
//! The tracker is intentionally simple: a small `VecDeque` of
//! `(frame_seq, capture_ts_ns)` pairs, capped at
//! `max_concurrent_in_flight`. Order is FIFO (oldest at the front),
//! so "drop oldest" is `pop_front`. Removal-on-response walks the
//! deque from the front because responses arrive in roughly the
//! same order they were sent; in the worst case (out-of-order
//! response) we walk the full deque, which is fine at the default
//! capacity of 2.
use std::collections::VecDeque;
/// Snapshot of an in-flight request — what the inbound side needs to
/// compute round-trip latency once the response arrives.
#[derive(Debug, Clone, Copy)]
pub struct InFlight {
pub frame_seq: u64,
pub capture_ts_monotonic_ns: u64,
}
#[derive(Debug)]
pub struct BudgetTracker {
inner: VecDeque<InFlight>,
capacity: usize,
}
impl BudgetTracker {
pub fn new(capacity: usize) -> Self {
let cap = capacity.max(1);
Self {
inner: VecDeque::with_capacity(cap),
capacity: cap,
}
}
pub fn capacity(&self) -> usize {
self.capacity
}
pub fn in_flight(&self) -> usize {
self.inner.len()
}
/// Add a new request to the tracker. Returns `Some(InFlight)` for
/// the evicted oldest request when the tracker was already at
/// capacity; the caller credits this against `budget_drops_total`.
pub fn add(&mut self, entry: InFlight) -> Option<InFlight> {
let evicted = if self.inner.len() >= self.capacity {
self.inner.pop_front()
} else {
None
};
self.inner.push_back(entry);
evicted
}
/// Look up an in-flight entry by frame_seq and remove it. Returns
/// `None` when the response arrives for a frame that was already
/// budget-dropped — in that case the response is silently
/// discarded by the caller (it would otherwise corrupt the
/// latency histogram).
pub fn remove(&mut self, frame_seq: u64) -> Option<InFlight> {
let pos = self.inner.iter().position(|e| e.frame_seq == frame_seq)?;
self.inner.remove(pos)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn entry(seq: u64) -> InFlight {
InFlight {
frame_seq: seq,
capture_ts_monotonic_ns: seq * 1_000_000,
}
}
#[test]
fn capacity_clamps_to_one() {
// Arrange
let b = BudgetTracker::new(0);
// Assert
assert_eq!(b.capacity(), 1);
}
#[test]
fn add_under_capacity_does_not_evict() {
// Arrange
let mut b = BudgetTracker::new(2);
// Act
let e1 = b.add(entry(1));
let e2 = b.add(entry(2));
// Assert
assert!(e1.is_none());
assert!(e2.is_none());
assert_eq!(b.in_flight(), 2);
}
#[test]
fn add_at_capacity_evicts_oldest() {
// Arrange
let mut b = BudgetTracker::new(2);
b.add(entry(1));
b.add(entry(2));
// Act — third entry forces eviction.
let evicted = b.add(entry(3));
// Assert — entry 1 was the oldest, so it gets dropped.
assert_eq!(evicted.expect("evicted").frame_seq, 1);
assert_eq!(b.in_flight(), 2);
}
#[test]
fn remove_known_frame_returns_entry() {
// Arrange
let mut b = BudgetTracker::new(4);
b.add(entry(1));
b.add(entry(2));
b.add(entry(3));
// Act
let removed = b.remove(2);
// Assert
assert_eq!(removed.expect("removed").frame_seq, 2);
assert_eq!(b.in_flight(), 2);
}
#[test]
fn remove_unknown_frame_returns_none() {
// Arrange
let mut b = BudgetTracker::new(2);
b.add(entry(1));
// Assert
assert!(b.remove(999).is_none());
}
#[test]
fn evicted_frame_remove_returns_none() {
// Arrange
let mut b = BudgetTracker::new(2);
b.add(entry(1));
b.add(entry(2));
let evicted = b.add(entry(3));
assert_eq!(evicted.expect("evicted").frame_seq, 1);
// Act
let removed = b.remove(1);
// Assert — a late response for the evicted frame finds nothing
// and the caller drops it.
assert!(removed.is_none());
}
}
@@ -0,0 +1,189 @@
//! AZ-661 — sliding-window latency tracker.
//!
//! Tracks per-response round-trip latency in a fixed-capacity ring
//! buffer. The client polls `p99()` periodically and emits a
//! `Tier1Degraded { reason: HighLatency }` event when the percentile
//! crosses the configured threshold; it emits a `Tier1Recovered`
//! event when latency falls back below the threshold so the operator
//! UI can clear the warning.
//!
//! The buffer holds raw `u64` ns samples — percentile readout sorts
//! a snapshot under a `parking_lot::Mutex` (cheap given the bounded
//! ring size and the fact that p99 is read at a much lower cadence
//! than samples are pushed).
use std::time::Duration;
use parking_lot::Mutex;
const DEFAULT_CAPACITY: usize = 1024;
#[derive(Debug)]
pub struct LatencyWindow {
inner: Mutex<Ring>,
threshold_ns: u64,
degraded: parking_lot::Mutex<bool>,
}
impl LatencyWindow {
pub fn new(threshold: Duration) -> Self {
Self {
inner: Mutex::new(Ring::new(DEFAULT_CAPACITY)),
threshold_ns: threshold.as_nanos() as u64,
degraded: parking_lot::Mutex::new(false),
}
}
pub fn with_capacity(threshold: Duration, capacity: usize) -> Self {
Self {
inner: Mutex::new(Ring::new(capacity.max(1))),
threshold_ns: threshold.as_nanos() as u64,
degraded: parking_lot::Mutex::new(false),
}
}
pub fn record(&self, latency: Duration) {
let ns = latency.as_nanos().min(u128::from(u64::MAX)) as u64;
self.inner.lock().push(ns);
}
pub fn p50(&self) -> Option<Duration> {
self.percentile_ns(0.50).map(Duration::from_nanos)
}
pub fn p99(&self) -> Option<Duration> {
self.percentile_ns(0.99).map(Duration::from_nanos)
}
pub fn threshold(&self) -> Duration {
Duration::from_nanos(self.threshold_ns)
}
/// Re-evaluate the degraded latch and return whether the state
/// changed. Three outcomes:
/// - `DegradationTransition::Degraded`: p99 just crossed the
/// threshold this call (emit `Tier1Degraded`).
/// - `DegradationTransition::Recovered`: p99 fell back below the
/// threshold this call (emit `Tier1Recovered`).
/// - `DegradationTransition::NoChange`: the latch's state already
/// matched the observed reality; no event needed.
///
/// The first call returns `NoChange` until at least one sample
/// has been recorded — `p99()` is `None` otherwise.
pub fn evaluate(&self) -> DegradationTransition {
let Some(p99) = self.percentile_ns(0.99) else {
return DegradationTransition::NoChange;
};
let now_degraded = p99 > self.threshold_ns;
let mut latch = self.degraded.lock();
let prev = *latch;
*latch = now_degraded;
match (prev, now_degraded) {
(false, true) => DegradationTransition::Degraded,
(true, false) => DegradationTransition::Recovered,
_ => DegradationTransition::NoChange,
}
}
fn percentile_ns(&self, q: f64) -> Option<u64> {
let buf = self.inner.lock();
if buf.len == 0 {
return None;
}
let mut snap: Vec<u64> = buf.iter().collect();
snap.sort_unstable();
let idx = ((snap.len() as f64) * q).floor() as usize;
Some(snap[idx.min(snap.len() - 1)])
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DegradationTransition {
Degraded,
Recovered,
NoChange,
}
#[derive(Debug)]
struct Ring {
buf: Vec<u64>,
head: usize,
len: usize,
cap: usize,
}
impl Ring {
fn new(cap: usize) -> Self {
Self {
buf: vec![0; cap],
head: 0,
len: 0,
cap,
}
}
fn push(&mut self, v: u64) {
self.buf[self.head] = v;
self.head = (self.head + 1) % self.cap;
if self.len < self.cap {
self.len += 1;
}
}
fn iter(&self) -> impl Iterator<Item = u64> + '_ {
self.buf.iter().take(self.len).copied()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_window_returns_no_change() {
// Arrange
let w = LatencyWindow::new(Duration::from_millis(100));
// Assert
assert_eq!(w.evaluate(), DegradationTransition::NoChange);
assert!(w.p99().is_none());
}
#[test]
fn degraded_then_recovered_transitions() {
// Arrange — a tiny window so we can flip state with few samples.
let w = LatencyWindow::with_capacity(Duration::from_millis(100), 8);
// Act — push values well above the threshold.
for _ in 0..8 {
w.record(Duration::from_millis(150));
}
let degraded = w.evaluate();
// Push values well below the threshold, displacing the
// earlier samples (ring capacity = 8).
for _ in 0..8 {
w.record(Duration::from_millis(10));
}
let recovered = w.evaluate();
let steady = w.evaluate();
// Assert
assert_eq!(degraded, DegradationTransition::Degraded);
assert_eq!(recovered, DegradationTransition::Recovered);
assert_eq!(steady, DegradationTransition::NoChange);
}
#[test]
fn evaluate_below_threshold_is_no_change_when_already_healthy() {
// Arrange
let w = LatencyWindow::with_capacity(Duration::from_millis(100), 4);
for _ in 0..4 {
w.record(Duration::from_millis(20));
}
// Assert — first evaluate is also a no-change because the
// latch starts at `false` and stays there.
assert_eq!(w.evaluate(), DegradationTransition::NoChange);
}
}
@@ -0,0 +1,8 @@
//! Internal modules for `detection_client`. Not part of the public
//! API (see `crates/detection_client/src/lib.rs`).
pub mod budget;
pub mod latency;
pub mod proto;
pub mod runtime;
pub mod stats;
@@ -0,0 +1,10 @@
//! Generated tonic+prost code for the `../detections` gRPC contract.
//!
//! The actual `.rs` file is produced at build time by `build.rs`
//! (see workspace `tonic-prost-build` / `protoc-bin-vendored` deps)
//! and dropped into `OUT_DIR`. We pull it in here under a stable
//! module path so the rest of the crate doesn't reach into `OUT_DIR`.
#![allow(clippy::derive_partial_eq_without_eq)]
tonic::include_proto!("azaion.detection.v1");
@@ -0,0 +1,444 @@
//! AZ-660 + AZ-661 — supervisor task + bi-di stream session.
//!
//! The supervisor owns the gRPC channel: it connects, runs ONE
//! stream session, and on session loss (server-side close, network
//! drop, transport error) re-connects with exponential backoff
//! capped at `DetectionClientConfig::reconnect_cap`. The backoff
//! resets to `reconnect_initial` on every successful reconnect so
//! a healthy link spends 0 ms in the backoff path.
//!
//! Each stream session opens a single bi-directional stream against
//! `DetectionService::Stream`. Outbound and inbound are driven from
//! the same `tokio::select!` loop:
//! - On `Frame` arrival: skip if `ai_locked`, otherwise add to the
//! budget tracker (evicting the oldest in-flight slot if full)
//! and forward as a `FrameRequest` to the gRPC outbound channel.
//! - On `DetectionResponse` arrival: validate `schema_version`
//! (AZ-661), look up the matching in-flight entry, compute round-
//! trip latency, emit a `Batch` event, and update sliding-window
//! latency. Track `model_version` and emit `ModelVersionChanged`
//! on changes (AZ-661). Re-evaluate the latency window and emit
//! `Tier1Degraded` / `Tier1Recovered` on threshold crossings.
//!
//! The session ends when:
//! - `shutdown_rx` flips to `true`,
//! - the inbound stream returns `None` (server closed cleanly), or
//! - the inbound stream returns an error.
//!
//! `frame_rx.recv` returning `Closed` ends the session AND the
//! supervisor (no more frames will arrive), but the supervisor
//! drains any pending responses first.
use std::sync::Arc;
use std::time::Duration;
use parking_lot::Mutex;
use tokio::sync::{broadcast, mpsc, watch};
use tokio::task::JoinHandle;
use tokio_stream::wrappers::ReceiverStream;
use tonic::transport::{Channel, Endpoint};
use shared::models::detection::{Detection as SharedDetection, DetectionBatch};
use shared::models::frame::{BoundingBox, Frame, PixelFormat};
use crate::internal::budget::{BudgetTracker, InFlight};
use crate::internal::latency::{DegradationTransition, LatencyWindow};
use crate::internal::proto::detection_service_client::DetectionServiceClient;
use crate::internal::proto::{
BoundingBox as ProtoBoundingBox, Detection as ProtoDetection, DetectionResponse, FrameRequest,
PixelFormat as ProtoPixelFormat,
};
use crate::internal::stats::DetectionStats;
use crate::{ConnectionState, DetectionClientConfig, DetectionEvent, Tier1DegradationReason};
#[derive(Debug, thiserror::Error)]
enum StreamSessionError {
#[error("opening stream failed: {0}")]
OpenStream(tonic::Status),
#[error("inbound stream error: {0}")]
Inbound(tonic::Status),
#[error("outbound channel closed by the gRPC client")]
OutboundClosed,
}
pub fn spawn_supervisor(
config: DetectionClientConfig,
frame_rx: broadcast::Receiver<Frame>,
events_tx: broadcast::Sender<DetectionEvent>,
stats: Arc<DetectionStats>,
latency: Arc<LatencyWindow>,
connection_tx: watch::Sender<ConnectionState>,
shutdown_rx: watch::Receiver<bool>,
) -> JoinHandle<()> {
tokio::spawn(async move {
supervisor(
config,
frame_rx,
events_tx,
stats,
latency,
connection_tx,
shutdown_rx,
)
.await;
})
}
async fn supervisor(
config: DetectionClientConfig,
mut frame_rx: broadcast::Receiver<Frame>,
events_tx: broadcast::Sender<DetectionEvent>,
stats: Arc<DetectionStats>,
latency: Arc<LatencyWindow>,
connection_tx: watch::Sender<ConnectionState>,
mut shutdown_rx: watch::Receiver<bool>,
) {
let mut backoff = config.reconnect_initial;
let last_model_version: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
let mut prior_session = false;
loop {
if *shutdown_rx.borrow() {
connection_tx.send_replace(ConnectionState::Disconnected);
return;
}
connection_tx.send_replace(ConnectionState::Connecting);
let endpoint = match Endpoint::from_shared(config.endpoint.clone()) {
Ok(e) => e.connect_timeout(config.connect_timeout),
Err(e) => {
tracing::error!(
error = %e,
endpoint = %config.endpoint,
"detection_client endpoint is invalid; this is fatal"
);
stats.note_connect_error();
connection_tx.send_replace(ConnectionState::Disconnected);
return;
}
};
let channel = tokio::select! {
_ = shutdown_rx.changed() => {
connection_tx.send_replace(ConnectionState::Disconnected);
return;
}
res = endpoint.connect() => match res {
Ok(c) => Some(c),
Err(e) => {
stats.note_connect_error();
tracing::warn!(
error = %e,
endpoint = %config.endpoint,
backoff_ms = backoff.as_millis() as u64,
"detection_client connect failed; will retry after backoff"
);
None
}
}
};
if let Some(channel) = channel {
backoff = config.reconnect_initial;
connection_tx.send_replace(ConnectionState::Connected);
if prior_session {
stats.note_reconnect();
}
prior_session = true;
let session_result = run_stream_session(
channel,
&mut frame_rx,
&events_tx,
&stats,
&latency,
&mut shutdown_rx,
&config,
&last_model_version,
)
.await;
connection_tx.send_replace(ConnectionState::Disconnected);
match session_result {
Ok(SessionExit::Shutdown) => {
return;
}
Ok(SessionExit::FrameSourceClosed) => {
tracing::info!("detection_client frame source closed; exiting");
return;
}
Ok(SessionExit::ServerClosed) => {
tracing::info!("detection_client server closed stream; will reconnect");
}
Err(e) => {
stats.note_stream_error();
tracing::warn!(error = %e, "detection_client stream session ended with error");
}
}
}
// Wait for backoff before the next attempt unless shutdown
// fires first. `frame_rx` is intentionally NOT polled here:
// any frames arriving during disconnect simply lag, and the
// broadcast channel folds them into a single
// `RecvError::Lagged(n)` on the next session — counted via
// `note_frame_lag`.
tokio::select! {
_ = tokio::time::sleep(backoff) => {}
_ = shutdown_rx.changed() => {
connection_tx.send_replace(ConnectionState::Disconnected);
return;
}
}
backoff = backoff.saturating_mul(2).min(config.reconnect_cap);
}
}
#[derive(Debug, Clone, Copy)]
enum SessionExit {
Shutdown,
FrameSourceClosed,
ServerClosed,
}
#[allow(clippy::too_many_arguments)]
async fn run_stream_session(
channel: Channel,
frame_rx: &mut broadcast::Receiver<Frame>,
events_tx: &broadcast::Sender<DetectionEvent>,
stats: &Arc<DetectionStats>,
latency: &Arc<LatencyWindow>,
shutdown_rx: &mut watch::Receiver<bool>,
config: &DetectionClientConfig,
last_model_version: &Arc<Mutex<Option<String>>>,
) -> Result<SessionExit, StreamSessionError> {
let mut client = DetectionServiceClient::new(channel);
let (req_tx, req_rx) = mpsc::channel::<FrameRequest>(config.outbound_buffer.max(1));
let req_stream = ReceiverStream::new(req_rx);
let response = client
.stream(req_stream)
.await
.map_err(StreamSessionError::OpenStream)?;
let mut inbound = response.into_inner();
let mut budget = BudgetTracker::new(config.max_concurrent_in_flight);
loop {
tokio::select! {
_ = shutdown_rx.changed() => return Ok(SessionExit::Shutdown),
frame_res = frame_rx.recv() => {
match frame_res {
Ok(frame) => {
if frame.ai_locked {
stats.note_ai_locked_skipped();
continue;
}
let entry = InFlight {
frame_seq: frame.seq,
capture_ts_monotonic_ns: frame.capture_ts_monotonic_ns,
};
if let Some(evicted) = budget.add(entry) {
stats.note_in_flight_dropped();
tracing::debug!(
evicted_seq = evicted.frame_seq,
"detection_client dropped oldest in-flight frame (budget)"
);
}
let req = build_request(&frame);
if req_tx.send(req).await.is_err() {
return Err(StreamSessionError::OutboundClosed);
}
stats.note_sent();
}
Err(broadcast::error::RecvError::Lagged(n)) => {
stats.note_frame_lag(n);
tracing::warn!(
dropped = n,
"detection_client frame_rx lagged; counted as frame_lag_total"
);
}
Err(broadcast::error::RecvError::Closed) => {
return Ok(SessionExit::FrameSourceClosed);
}
}
}
inbound_res = inbound.message() => {
match inbound_res {
Ok(Some(resp)) => {
handle_response(
resp,
&mut budget,
events_tx,
stats,
latency,
last_model_version,
config,
);
// Re-evaluate latency window after every
// response so degraded/recovered transitions
// surface at most one event per change.
match latency.evaluate() {
DegradationTransition::Degraded => {
let _ = events_tx.send(DetectionEvent::Tier1Degraded {
reason: Tier1DegradationReason::HighLatency,
});
}
DegradationTransition::Recovered => {
let _ = events_tx.send(DetectionEvent::Tier1Recovered);
}
DegradationTransition::NoChange => {}
}
}
Ok(None) => return Ok(SessionExit::ServerClosed),
Err(status) => return Err(StreamSessionError::Inbound(status)),
}
}
}
}
}
fn build_request(frame: &Frame) -> FrameRequest {
FrameRequest {
frame_seq: frame.seq,
capture_ts_monotonic_ns: frame.capture_ts_monotonic_ns,
width: frame.width,
height: frame.height,
pix_fmt: pix_fmt_to_proto(frame.pix_fmt) as i32,
pixels: frame.pixels.to_vec(),
}
}
fn pix_fmt_to_proto(p: PixelFormat) -> ProtoPixelFormat {
match p {
PixelFormat::Nv12 => ProtoPixelFormat::Nv12,
PixelFormat::Yuv420p => ProtoPixelFormat::Yuv420p,
PixelFormat::Rgb24 => ProtoPixelFormat::Rgb24,
}
}
fn handle_response(
resp: DetectionResponse,
budget: &mut BudgetTracker,
events_tx: &broadcast::Sender<DetectionEvent>,
stats: &Arc<DetectionStats>,
latency: &Arc<LatencyWindow>,
last_model_version: &Arc<Mutex<Option<String>>>,
config: &DetectionClientConfig,
) {
// AZ-661 — schema handshake first. A mismatch is a hard error;
// do NOT decode the rest of the response, do NOT credit it
// against latency, and clear the in-flight slot so the budget
// tracker stays accurate.
if resp.schema_version != config.expected_schema_version {
stats.note_schema_mismatch();
// Free the in-flight slot if we can match it.
let _ = budget.remove(resp.frame_seq);
let detail = format!(
"expected schema_version {} got {}",
config.expected_schema_version, resp.schema_version
);
tracing::error!(
expected = config.expected_schema_version,
actual = resp.schema_version,
frame_seq = resp.frame_seq,
"detection_client schema mismatch"
);
let _ = events_tx.send(DetectionEvent::SchemaMismatch {
detail,
frame_seq: resp.frame_seq,
});
return;
}
// Look up the in-flight request. A `None` here means the budget
// tracker already evicted this frame; the response is orphaned
// and dropped silently (do not credit latency or events).
let Some(in_flight) = budget.remove(resp.frame_seq) else {
stats.note_orphan_response();
tracing::debug!(
frame_seq = resp.frame_seq,
"detection_client orphan response (budget already evicted)"
);
return;
};
// AZ-661 — model_version handshake. First response on a session
// is NOT a change if the latch is empty AND the version equals
// the last observed version across sessions. We only emit when
// the version changes from a previously-seen non-None value, OR
// when a session emits its first version (transitioning from
// None to Some) — the operator UI shows "model swapped" the
// first time per process lifetime, then again on every change.
{
let mut latch = last_model_version.lock();
let changed = match latch.as_ref() {
None => true, // first observation in this process
Some(prev) => prev != &resp.model_version,
};
if changed {
let previous = latch.clone();
*latch = Some(resp.model_version.clone());
stats.note_model_version_change();
let _ = events_tx.send(DetectionEvent::ModelVersionChanged {
previous,
current: resp.model_version.clone(),
});
}
}
// Use the server-reported processing time as the RTT proxy.
// The Tier-1 NFR measures processing latency at the detections
// service (`description.md §8`), not round-trip transport time.
// If wall-clock RTT tracking is added later, store
// `Instant::now()` in the budget entry at send time.
let server_side = Duration::from_millis(u64::from(resp.latency_ms));
latency.record(server_side);
stats.note_received();
let batch = response_to_batch(resp);
let _ = events_tx.send(DetectionEvent::Batch {
batch,
capture_ts_monotonic_ns: in_flight.capture_ts_monotonic_ns,
server_latency: server_side,
});
}
fn response_to_batch(resp: DetectionResponse) -> DetectionBatch {
let model_version = resp.model_version.clone();
let frame_seq = resp.frame_seq;
let latency_ms = resp.latency_ms;
let detections = resp
.detections
.into_iter()
.map(proto_detection_to_shared)
.collect();
DetectionBatch {
frame_seq,
detections,
latency_ms,
model_version,
}
}
fn proto_detection_to_shared(d: ProtoDetection) -> SharedDetection {
SharedDetection {
class_id: d.class_id,
class_name: d.class_name,
confidence: d.confidence,
bbox_normalized: bbox_to_shared(d.bbox_normalized.unwrap_or_default()),
mask_or_polyline: d.mask_or_polyline,
source_frame_seq: d.source_frame_seq,
}
}
fn bbox_to_shared(b: ProtoBoundingBox) -> BoundingBox {
BoundingBox {
x_min: b.x_min,
y_min: b.y_min,
x_max: b.x_max,
y_max: b.y_max,
}
}
@@ -0,0 +1,129 @@
//! AZ-660 + AZ-661 — atomic counter surface for `DetectionClient`.
//!
//! `description.md §3` requires:
//! - `gRPC_connection_state` (watch, not in this struct — see
//! `runtime.rs`)
//! - `requests_in_flight` (atomic gauge maintained by the supervisor)
//! - `latency_p50`, `latency_p99` (live in [`crate::internal::latency`])
//! - `errors_by_kind` (counters per kind, this struct)
//! - `budget_drops_total` (this struct)
//!
//! AZ-661 adds:
//! - `schema_mismatch_total` (one of the `errors_by_kind` buckets,
//! surfaced explicitly because it is the loudest failure mode)
//! - `model_version_changes_total` (visibility for the operator UI)
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
/// Lock-free counters shared between the supervisor task and the
/// `DetectionClientHandle`. Every field is `AtomicU64`; readers
/// snapshot independently with `Ordering::Relaxed`.
#[derive(Debug, Default)]
pub struct DetectionStats {
pub requests_sent_total: AtomicU64,
pub responses_received_total: AtomicU64,
pub budget_drops_total: AtomicU64,
pub frame_lag_total: AtomicU64,
pub schema_mismatch_total: AtomicU64,
pub model_version_changes_total: AtomicU64,
pub reconnects_total: AtomicU64,
pub connect_errors_total: AtomicU64,
pub stream_errors_total: AtomicU64,
pub requests_in_flight: AtomicU64,
pub ai_locked_skipped_total: AtomicU64,
}
impl DetectionStats {
pub fn shared() -> Arc<Self> {
Arc::new(Self::default())
}
pub fn note_sent(&self) {
self.requests_sent_total.fetch_add(1, Ordering::Relaxed);
self.requests_in_flight.fetch_add(1, Ordering::Relaxed);
}
pub fn note_received(&self) {
self.responses_received_total
.fetch_add(1, Ordering::Relaxed);
// `requests_in_flight` decrements via `note_in_flight_dropped`
// on budget eviction and via this fn on a normal response.
self.requests_in_flight.fetch_sub(1, Ordering::Relaxed);
}
pub fn note_in_flight_dropped(&self) {
self.budget_drops_total.fetch_add(1, Ordering::Relaxed);
self.requests_in_flight.fetch_sub(1, Ordering::Relaxed);
}
pub fn note_orphan_response(&self) {
// Response arrived for a frame the budget already evicted.
// We do NOT decrement `requests_in_flight` here (the budget
// eviction already did) and we do NOT credit it against
// `responses_received_total` (it does not correspond to a
// currently-tracked in-flight request).
self.stream_errors_total.fetch_add(1, Ordering::Relaxed);
}
pub fn note_frame_lag(&self, n: u64) {
self.frame_lag_total.fetch_add(n, Ordering::Relaxed);
}
pub fn note_ai_locked_skipped(&self) {
self.ai_locked_skipped_total.fetch_add(1, Ordering::Relaxed);
}
pub fn note_schema_mismatch(&self) {
self.schema_mismatch_total.fetch_add(1, Ordering::Relaxed);
}
pub fn note_model_version_change(&self) {
self.model_version_changes_total
.fetch_add(1, Ordering::Relaxed);
}
pub fn note_reconnect(&self) {
self.reconnects_total.fetch_add(1, Ordering::Relaxed);
}
pub fn note_connect_error(&self) {
self.connect_errors_total.fetch_add(1, Ordering::Relaxed);
}
pub fn note_stream_error(&self) {
self.stream_errors_total.fetch_add(1, Ordering::Relaxed);
}
pub fn requests_in_flight(&self) -> u64 {
self.requests_in_flight.load(Ordering::Relaxed)
}
pub fn budget_drops_total(&self) -> u64 {
self.budget_drops_total.load(Ordering::Relaxed)
}
pub fn requests_sent_total(&self) -> u64 {
self.requests_sent_total.load(Ordering::Relaxed)
}
pub fn responses_received_total(&self) -> u64 {
self.responses_received_total.load(Ordering::Relaxed)
}
pub fn schema_mismatch_total(&self) -> u64 {
self.schema_mismatch_total.load(Ordering::Relaxed)
}
pub fn model_version_changes_total(&self) -> u64 {
self.model_version_changes_total.load(Ordering::Relaxed)
}
pub fn reconnects_total(&self) -> u64 {
self.reconnects_total.load(Ordering::Relaxed)
}
pub fn ai_locked_skipped_total(&self) -> u64 {
self.ai_locked_skipped_total.load(Ordering::Relaxed)
}
}