Files
autopilot/crates/telemetry_stream/tests/grpc_subscribe.rs
T
Oleksandr Bezdieniezhnykh ff790bd639
ci/woodpecker/push/build-arm Pipeline failed
[AZ-675] telemetry_stream Tonic gRPC server + per-client lossy queue
Pins operator-link transport to gRPC server-streaming (closes
architecture Q2 in favour of gRPC). Adds first-time tonic / prost /
tonic-build infrastructure to the workspace; uses
protoc-bin-vendored so neither dev machines nor CI need system
protoc installed.

Design — back-pressure lives in the per-topic tokio::sync::broadcast
ring, drained directly by the tonic-streamed response via
BroadcastStream + StreamMap. No intermediate mpsc buffer that could
absorb back-pressure invisibly. Slow client overrun -> Lagged(n)
event -> per-(client_id, topic) drop counter incremented; healthy
clients on the same topic are unaffected.

Service surface — Subscribe(SubscribeRequest) -> stream
TelemetryMessage; five topics (TelemetrySample, GimbalState,
DetectionEvent, MovementCandidate, MapObjectsBundle); empty topics
list defaults to subscribe-all; empty client_id rejected; stream
drop decrements subscribed_clients via StreamGuard. TelemetrySink
push_detections is now real; push_frame still NotImplemented(AZ-676
video path).

Tests — 6 unit + 5 integration (AC-1..AC-3 via in-process gRPC
client, plus subscribe-all default + empty-client_id rejection).
Clippy on telemetry_stream clean.

Pre-existing mission_executor ac3 test polling race surfaces more
reliably under the new tonic build pressure; documented as
_docs/_process_leftovers/2026-05-20_mission_executor_ac3_flake.md
and unchanged by this batch.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-20 12:44:39 +03:00

367 lines
12 KiB
Rust

//! AZ-675 integration tests — Tonic server + per-client lossy queue
//! exercised through an in-process gRPC client (the only stub allowed
//! per the task's Runtime Completeness gate).
use std::net::TcpListener;
use std::time::Duration;
use serde::{Deserialize, Serialize};
use tokio::time::timeout;
use tokio_stream::StreamExt;
use tonic::transport::{Channel, Endpoint};
use tonic::Request;
use telemetry_stream::{SubscribeRequest, TelemetryStream, TelemetryStreamClient, TelemetryTopic};
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
struct Sample {
n: u32,
}
/// Bind a TCP listener on `127.0.0.1:0`, return the listener and the
/// resolved port. The test stack uses ephemeral ports so multiple
/// integration tests in the same suite never collide.
fn bind_ephemeral() -> (TcpListener, u16) {
let l = TcpListener::bind("127.0.0.1:0").expect("bind ephemeral");
let port = l.local_addr().unwrap().port();
(l, port)
}
async fn connect(port: u16) -> TelemetryStreamClient<Channel> {
let url = format!("http://127.0.0.1:{port}");
let endpoint = Endpoint::from_shared(url)
.unwrap()
.connect_timeout(Duration::from_secs(2));
// Tiny retry loop — the server can take a few ms to bind.
for _ in 0..50 {
if let Ok(c) = TelemetryStreamClient::connect(endpoint.clone()).await {
return c;
}
tokio::time::sleep(Duration::from_millis(20)).await;
}
panic!("gRPC client failed to connect within ~1s");
}
/// AC-1 — multiple subscribers receive the same stream, ordering preserved.
#[tokio::test]
async fn ac1_multiple_subscribers_receive_same_stream() {
// Arrange
let (listener, port) = bind_ephemeral();
let server = TelemetryStream::new(8);
let handle = server.handle();
let (_join, _guard) = server.spawn_grpc_server_on(listener).unwrap();
let mut c1 = connect(port).await;
let mut c2 = connect(port).await;
let mut c3 = connect(port).await;
let req = |id: &str| {
Request::new(SubscribeRequest {
client_id: id.to_string(),
topics: vec![TelemetryTopic::TelemetrySample as i32],
})
};
let mut s1 = c1.subscribe(req("a")).await.unwrap().into_inner();
let mut s2 = c2.subscribe(req("b")).await.unwrap().into_inner();
let mut s3 = c3.subscribe(req("c")).await.unwrap().into_inner();
// Give the server time to register all three before publishing.
for _ in 0..50 {
if handle.snapshot().subscribed_clients == 3 {
break;
}
tokio::time::sleep(Duration::from_millis(10)).await;
}
assert_eq!(handle.snapshot().subscribed_clients, 3);
// Act
for n in 0..100u32 {
handle
.publish(TelemetryTopic::TelemetrySample, &Sample { n })
.unwrap();
}
async fn collect100(
s: &mut tonic::Streaming<telemetry_stream::TelemetryMessage>,
) -> Vec<Sample> {
let mut out = Vec::new();
while out.len() < 100 {
let msg = timeout(Duration::from_secs(2), s.next())
.await
.expect("ac1 client did not receive 100 in 2s")
.expect("stream ended")
.expect("server status");
let sample: Sample = serde_json::from_slice(&msg.payload_json).unwrap();
out.push(sample);
}
out
}
let r1 = collect100(&mut s1).await;
let r2 = collect100(&mut s2).await;
let r3 = collect100(&mut s3).await;
// Assert — all three receive all 100 in order.
let expected: Vec<Sample> = (0..100).map(|n| Sample { n }).collect();
assert_eq!(r1, expected);
assert_eq!(r2, expected);
assert_eq!(r3, expected);
}
/// AC-2 — slow subscriber drops oldest, healthy unaffected.
///
/// We can't simulate "client never polls" through real tonic+H2
/// because the OS TCP + HTTP/2 receive buffers (often 256 KB+) will
/// absorb a small JSON burst entirely with no observable drop. The
/// realistic AC-2 condition is "slow drain rate" — the slow client
/// polls but at a fraction of the publish rate. With large payloads
/// and a small per-topic broadcast capacity, the slow client's
/// broadcast cursor falls behind the producer and `BroadcastStream`
/// emits `Lagged(n)` — the only signal AZ-675 wires into the per-
/// `(client_id, topic)` drop counters.
#[tokio::test]
async fn ac2_slow_subscriber_drops_oldest_healthy_unaffected() {
// Arrange — capacity 4, big payloads so wire back-pressure
// propagates fast and broadcast lag is observable end-to-end.
let (listener, port) = bind_ephemeral();
let cfg = telemetry_stream::TelemetryStreamConfig {
topic_capacity: 4,
..telemetry_stream::TelemetryStreamConfig::default()
};
let server = TelemetryStream::with_config(cfg);
let handle = server.handle();
let (_join, _guard) = server.spawn_grpc_server_on(listener).unwrap();
#[derive(Serialize)]
struct Big {
n: u32,
// 64 KB filler per message — fills H2 receive window quickly.
pad: Vec<u8>,
}
let mut c_fast = connect(port).await;
let mut c_slow = connect(port).await;
let req = |id: &str| {
Request::new(SubscribeRequest {
client_id: id.to_string(),
topics: vec![TelemetryTopic::TelemetrySample as i32],
})
};
let mut s_fast = c_fast.subscribe(req("fast")).await.unwrap().into_inner();
let mut s_slow = c_slow.subscribe(req("slow")).await.unwrap().into_inner();
for _ in 0..50 {
if handle.snapshot().subscribed_clients == 2 {
break;
}
tokio::time::sleep(Duration::from_millis(10)).await;
}
assert_eq!(handle.snapshot().subscribed_clients, 2);
let total: u32 = 300;
// Fast drains immediately, slow drains 1 msg every 50ms (way below publish rate).
let fast_task = tokio::spawn(async move {
let mut seen = 0u32;
while seen < total {
let _msg = timeout(Duration::from_secs(15), s_fast.next())
.await
.expect("fast did not receive everything in 15s")
.expect("stream ended")
.expect("server status");
seen += 1;
}
seen
});
let slow_task = tokio::spawn(async move {
let mut received = 0u32;
let deadline = std::time::Instant::now() + Duration::from_secs(30);
while std::time::Instant::now() < deadline {
match timeout(Duration::from_millis(500), s_slow.next()).await {
Ok(Some(Ok(_msg))) => received += 1,
Ok(Some(Err(_))) => break,
Ok(None) => break,
Err(_) => break,
}
tokio::time::sleep(Duration::from_millis(50)).await;
}
received
});
// Act — burst publishes at ~1ms cadence with 64 KB payloads so
// broadcast capacity is overrun for the slow consumer.
let payload_pad = vec![0u8; 64 * 1024];
for n in 0..total {
handle
.publish(
TelemetryTopic::TelemetrySample,
&Big {
n,
pad: payload_pad.clone(),
},
)
.unwrap();
tokio::time::sleep(Duration::from_millis(2)).await;
}
let fast_count = fast_task.await.unwrap();
let slow_count = slow_task.await.unwrap();
let snap = handle.snapshot();
let slow_drops = snap
.drops_total
.get(&("slow".to_string(), TelemetryTopic::TelemetrySample))
.copied()
.unwrap_or(0);
let fast_drops = snap
.drops_total
.get(&("fast".to_string(), TelemetryTopic::TelemetrySample))
.copied()
.unwrap_or(0);
// Assert
assert_eq!(fast_count, total, "fast client receives every message");
assert_eq!(fast_drops, 0, "fast client MUST NOT have drops");
assert!(
slow_drops > 0,
"slow client MUST have lagged at some point (got 0 drops; slow_count={slow_count})"
);
assert!(
slow_count < total,
"slow client should have lost messages (got {slow_count}/{total})"
);
assert!(
(slow_count as u64) + slow_drops <= (total as u64) + 4,
"accounted={} should not exceed total+capacity",
(slow_count as u64) + slow_drops
);
}
/// AC-3 — disconnect cleanly removes subscriber.
#[tokio::test]
async fn ac3_disconnect_decrements_subscribed_clients() {
// Arrange
let (listener, port) = bind_ephemeral();
let server = TelemetryStream::new(8);
let handle = server.handle();
let (_join, _guard) = server.spawn_grpc_server_on(listener).unwrap();
let mut c1 = connect(port).await;
let req = || {
Request::new(SubscribeRequest {
client_id: "ephemeral".to_string(),
topics: vec![],
})
};
let s1 = c1.subscribe(req()).await.unwrap().into_inner();
for _ in 0..50 {
if handle.snapshot().subscribed_clients == 1 {
break;
}
tokio::time::sleep(Duration::from_millis(10)).await;
}
assert_eq!(handle.snapshot().subscribed_clients, 1);
// Act — drop the stream (simulates client cancel).
drop(s1);
drop(c1);
// Assert — register/deregister is eventually consistent; wait briefly.
let mut final_count = 99;
for _ in 0..100 {
final_count = handle.snapshot().subscribed_clients;
if final_count == 0 {
break;
}
tokio::time::sleep(Duration::from_millis(20)).await;
}
assert_eq!(
final_count, 0,
"disconnect must decrement subscribed_clients"
);
}
/// Subscribing to an empty `topics` list defaults to ALL topics.
#[tokio::test]
async fn empty_topics_list_defaults_to_all() {
// Arrange
let (listener, port) = bind_ephemeral();
let server = TelemetryStream::new(8);
let handle = server.handle();
let (_join, _guard) = server.spawn_grpc_server_on(listener).unwrap();
let mut client = connect(port).await;
let mut stream = client
.subscribe(Request::new(SubscribeRequest {
client_id: "all".to_string(),
topics: vec![],
}))
.await
.unwrap()
.into_inner();
for _ in 0..50 {
if handle.snapshot().subscribed_clients == 1 {
break;
}
tokio::time::sleep(Duration::from_millis(10)).await;
}
// Act — publish one message on each topic.
handle
.publish(TelemetryTopic::TelemetrySample, &Sample { n: 1 })
.unwrap();
handle
.publish(TelemetryTopic::GimbalState, &Sample { n: 2 })
.unwrap();
handle
.publish(TelemetryTopic::DetectionEvent, &Sample { n: 3 })
.unwrap();
handle
.publish(TelemetryTopic::MovementCandidate, &Sample { n: 4 })
.unwrap();
handle
.publish(TelemetryTopic::MapObjectsBundle, &Sample { n: 5 })
.unwrap();
let mut seen_topics = std::collections::HashSet::new();
while seen_topics.len() < 5 {
let msg = timeout(Duration::from_secs(2), stream.next())
.await
.expect("did not receive all 5 topics in 2s")
.unwrap()
.unwrap();
seen_topics.insert(msg.topic);
}
// Assert
assert_eq!(seen_topics.len(), 5);
}
/// Empty client_id is rejected at the server boundary.
#[tokio::test]
async fn empty_client_id_is_rejected() {
// Arrange
let (listener, port) = bind_ephemeral();
let server = TelemetryStream::new(8);
let _h = server.handle();
let (_join, _guard) = server.spawn_grpc_server_on(listener).unwrap();
let mut client = connect(port).await;
// Act
let err = client
.subscribe(Request::new(SubscribeRequest {
client_id: String::new(),
topics: vec![],
}))
.await
.expect_err("empty client_id must return Status error");
// Assert
assert_eq!(err.code(), tonic::Code::InvalidArgument);
}