mirror of
https://github.com/azaion/autopilot.git
synced 2026-06-22 02:21:10 +00:00
[AZ-675] telemetry_stream Tonic gRPC server + per-client lossy queue
ci/woodpecker/push/build-arm Pipeline failed
ci/woodpecker/push/build-arm Pipeline failed
Pins operator-link transport to gRPC server-streaming (closes architecture Q2 in favour of gRPC). Adds first-time tonic / prost / tonic-build infrastructure to the workspace; uses protoc-bin-vendored so neither dev machines nor CI need system protoc installed. Design — back-pressure lives in the per-topic tokio::sync::broadcast ring, drained directly by the tonic-streamed response via BroadcastStream + StreamMap. No intermediate mpsc buffer that could absorb back-pressure invisibly. Slow client overrun -> Lagged(n) event -> per-(client_id, topic) drop counter incremented; healthy clients on the same topic are unaffected. Service surface — Subscribe(SubscribeRequest) -> stream TelemetryMessage; five topics (TelemetrySample, GimbalState, DetectionEvent, MovementCandidate, MapObjectsBundle); empty topics list defaults to subscribe-all; empty client_id rejected; stream drop decrements subscribed_clients via StreamGuard. TelemetrySink push_detections is now real; push_frame still NotImplemented(AZ-676 video path). Tests — 6 unit + 5 integration (AC-1..AC-3 via in-process gRPC client, plus subscribe-all default + empty-client_id rejection). Clippy on telemetry_stream clean. Pre-existing mission_executor ac3 test polling race surfaces more reliably under the new tonic build pressure; documented as _docs/_process_leftovers/2026-05-20_mission_executor_ac3_flake.md and unchanged by this batch. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,127 @@
|
||||
//! AZ-675 — gRPC `TelemetryStream::Subscribe` service implementation.
|
||||
//!
|
||||
//! The client sends a single `SubscribeRequest`; the server returns a
|
||||
//! server-streaming response built directly from per-topic
|
||||
//! `BroadcastStream`s merged with `StreamMap`. The tonic transport
|
||||
//! is what polls our stream — when the wire (or the operator client)
|
||||
//! cannot keep up, the broadcast ring overflows that client's cursor
|
||||
//! and `BroadcastStream` yields `Err(BroadcastStreamRecvError::Lagged(n))`
|
||||
//! on the next poll. That is the *only* place drop accounting
|
||||
//! happens: there is no intermediate mpsc buffer that could absorb
|
||||
//! back-pressure and hide lag.
|
||||
//!
|
||||
//! `StreamGuard` decrements `subscribed_clients` on stream drop.
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use tokio_stream::wrappers::errors::BroadcastStreamRecvError;
|
||||
use tokio_stream::wrappers::BroadcastStream;
|
||||
use tokio_stream::{Stream, StreamExt, StreamMap};
|
||||
use tonic::{Request, Response, Status};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::internal::proto::telemetry_stream_server::TelemetryStream;
|
||||
use crate::internal::proto::{SubscribeRequest, TelemetryMessage, Topic};
|
||||
use crate::internal::publisher::{TelemetryPublisher, ALL_TOPICS};
|
||||
|
||||
pub struct TelemetryService {
|
||||
publisher: Arc<TelemetryPublisher>,
|
||||
}
|
||||
|
||||
impl TelemetryService {
|
||||
pub fn new(publisher: Arc<TelemetryPublisher>) -> Self {
|
||||
Self { publisher }
|
||||
}
|
||||
}
|
||||
|
||||
type SubscribeStream = Pin<Box<dyn Stream<Item = Result<TelemetryMessage, Status>> + Send>>;
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl TelemetryStream for TelemetryService {
|
||||
type SubscribeStream = SubscribeStream;
|
||||
|
||||
async fn subscribe(
|
||||
&self,
|
||||
request: Request<SubscribeRequest>,
|
||||
) -> Result<Response<Self::SubscribeStream>, Status> {
|
||||
let req = request.into_inner();
|
||||
if req.client_id.trim().is_empty() {
|
||||
return Err(Status::invalid_argument("client_id is required"));
|
||||
}
|
||||
let client_id = req.client_id.clone();
|
||||
|
||||
let requested: Vec<Topic> = if req.topics.is_empty() {
|
||||
ALL_TOPICS.to_vec()
|
||||
} else {
|
||||
let mut out = Vec::with_capacity(req.topics.len());
|
||||
for raw in &req.topics {
|
||||
let t = Topic::try_from(*raw)
|
||||
.map_err(|_| Status::invalid_argument(format!("unknown topic {raw}")))?;
|
||||
if matches!(t, Topic::Unspecified) {
|
||||
return Err(Status::invalid_argument("TOPIC_UNSPECIFIED not allowed"));
|
||||
}
|
||||
out.push(t);
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let mut map: StreamMap<Topic, BroadcastStream<TelemetryMessage>> = StreamMap::new();
|
||||
for &t in &requested {
|
||||
match self.publisher.subscribe_topic(t) {
|
||||
Some(rx) => {
|
||||
map.insert(t, BroadcastStream::new(rx));
|
||||
}
|
||||
None => {
|
||||
return Err(Status::failed_precondition(format!(
|
||||
"topic {t:?} not registered"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.publisher.register_client();
|
||||
info!(client_id = %client_id, topics = ?requested, "telemetry subscribe");
|
||||
|
||||
let publisher = Arc::clone(&self.publisher);
|
||||
let cid = client_id.clone();
|
||||
let stream = map.filter_map(move |(topic, item)| match item {
|
||||
Ok(msg) => Some(Ok(msg)),
|
||||
Err(BroadcastStreamRecvError::Lagged(n)) => {
|
||||
warn!(client_id = %cid, ?topic, dropped = n, "slow client lagged");
|
||||
publisher.record_drops(&cid, topic, n);
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let stream = StreamGuard {
|
||||
inner: stream,
|
||||
publisher: Arc::clone(&self.publisher),
|
||||
};
|
||||
|
||||
Ok(Response::new(Box::pin(stream) as Self::SubscribeStream))
|
||||
}
|
||||
}
|
||||
|
||||
/// Decrement `subscribed_clients` when the per-client outbound
|
||||
/// stream is dropped (tonic drops the stream when the client side
|
||||
/// goes away).
|
||||
struct StreamGuard<S> {
|
||||
inner: S,
|
||||
publisher: Arc<TelemetryPublisher>,
|
||||
}
|
||||
|
||||
impl<S: Stream + Unpin> Stream for StreamGuard<S> {
|
||||
type Item = S::Item;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
Pin::new(&mut self.inner).poll_next(cx)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Drop for StreamGuard<S> {
|
||||
fn drop(&mut self) {
|
||||
self.publisher.deregister_client();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user