mirror of
https://github.com/azaion/autopilot.git
synced 2026-06-22 19:01:10 +00:00
[AZ-653] gimbal_controller ViewPro A40 vendor UDP transport (batch 10)
ci/woodpecker/push/build-arm Pipeline failed
ci/woodpecker/push/build-arm Pipeline failed
Implements the vendor wire protocol for the A40 gimbal (XOR-8 checksum, not CRC16 — task spec corrected against ArduPilot AP_Mount_Viewpro.h): frame encode/decode, typed FrameId/CameraCommand/ImageSensor, A1 angles, C1 camera, C2 set-zoom command builders, and a tokio UdpSocket transport with bounded retry, per-command deadline, and atomic vendor-fault counters surfaced via faults()/health(). GimbalControllerHandle::set_pose and zoom now ride the transport when wired; remain disabled when no transport is bound. 32/32 gimbal_controller tests green; workspace test suite green except for a pre-existing flake in mission_executor::state_machine::ac3_bounded_retry_then_success that reproduces only under parallel workspace test load (passes 5/5 in isolation; flagged in batch 8 report, unrelated to this batch). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,330 @@
|
||||
//! UDP transport for the ViewPro A40.
|
||||
//!
|
||||
//! Owns the [`UdpSocket`], the rolling frame counter, the bounded
|
||||
//! retry policy, and the vendor-fault counters that feed the
|
||||
//! component's health surface. Inbound frames are checksum-validated
|
||||
//! by [`super::a40_protocol::decode_frame`]; mismatches are counted
|
||||
//! as `vendor_faults_total{kind="crc"}` and dropped.
|
||||
//!
|
||||
//! The transport is **command/response** keyed by `(FrameId, frame_counter)`:
|
||||
//! each `send_with_response` issues a frame, awaits the next
|
||||
//! matching inbound frame within a per-command deadline, and retries
|
||||
//! up to `max_retries` on timeout. Unmatched inbound frames (e.g.
|
||||
//! the gimbal's HEARTBEAT) are still surfaced through the
|
||||
//! broadcast stream so a future telemetry pump can consume them.
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::net::UdpSocket;
|
||||
use tokio::sync::{broadcast, Mutex};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::{timeout, Instant};
|
||||
|
||||
use super::a40_protocol::frame::{decode_frame, encode_frame, Frame, FrameDecodeError, FrameId};
|
||||
|
||||
/// Default per-command response deadline. The NFR is ≤200 ms on a
|
||||
/// healthy link; 150 ms leaves headroom for the bounded-retry budget.
|
||||
pub const DEFAULT_COMMAND_DEADLINE: Duration = Duration::from_millis(150);
|
||||
|
||||
/// Default retry budget for `send_with_response`. Vendor link is
|
||||
/// best-effort UDP; bounded retries match the AZ-651 ladder pattern.
|
||||
pub const DEFAULT_MAX_RETRIES: u8 = 3;
|
||||
|
||||
/// Broadcast channel capacity for inbound frames. Slow consumers
|
||||
/// see `Lagged`; the transport itself is unaffected.
|
||||
pub const INBOUND_CHANNEL_CAPACITY: usize = 64;
|
||||
|
||||
/// Counters surfaced through `health()`. Tracked atomically by the
|
||||
/// transport; readers see a coherent snapshot via the public
|
||||
/// getters.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct VendorFaults {
|
||||
/// Inbound frames that failed checksum / framing validation.
|
||||
pub crc: std::sync::atomic::AtomicU64,
|
||||
/// Outbound commands that exhausted their retry budget without a
|
||||
/// matching response.
|
||||
pub timeout: std::sync::atomic::AtomicU64,
|
||||
/// Inbound frames whose `FrameId` could not be decoded.
|
||||
pub unknown_frame_id: std::sync::atomic::AtomicU64,
|
||||
}
|
||||
|
||||
impl VendorFaults {
|
||||
fn inc_crc(&self) {
|
||||
self.crc.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
fn inc_timeout(&self) {
|
||||
self.timeout
|
||||
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
fn inc_unknown_frame_id(&self) {
|
||||
self.unknown_frame_id
|
||||
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
pub fn snapshot(&self) -> VendorFaultsSnapshot {
|
||||
VendorFaultsSnapshot {
|
||||
crc: self.crc.load(std::sync::atomic::Ordering::Relaxed),
|
||||
timeout: self.timeout.load(std::sync::atomic::Ordering::Relaxed),
|
||||
unknown_frame_id: self
|
||||
.unknown_frame_id
|
||||
.load(std::sync::atomic::Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Read-side snapshot of [`VendorFaults`].
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub struct VendorFaultsSnapshot {
|
||||
pub crc: u64,
|
||||
pub timeout: u64,
|
||||
pub unknown_frame_id: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum A40Error {
|
||||
#[error("frame too large for vendor protocol (max body 63 bytes)")]
|
||||
FrameTooLarge,
|
||||
#[error("max retries exceeded ({attempts} attempts) waiting for {expected:?}")]
|
||||
MaxRetriesExceeded { attempts: u8, expected: FrameId },
|
||||
#[error("UDP I/O: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("inbound broadcast channel closed")]
|
||||
InboundChannelClosed,
|
||||
}
|
||||
|
||||
/// UDP transport for the A40. Cheap to clone — both the socket and
|
||||
/// the inbound broadcast sender are wrapped in `Arc`.
|
||||
#[derive(Clone)]
|
||||
pub struct A40Transport {
|
||||
socket: Arc<UdpSocket>,
|
||||
peer: SocketAddr,
|
||||
inbound_tx: broadcast::Sender<Frame>,
|
||||
faults: Arc<VendorFaults>,
|
||||
frame_counter: Arc<Mutex<u8>>,
|
||||
command_deadline: Duration,
|
||||
max_retries: u8,
|
||||
}
|
||||
|
||||
impl A40Transport {
|
||||
/// Build a transport bound to a local UDP port and pre-connected
|
||||
/// to `peer`. The receive task is spawned and returned alongside
|
||||
/// the transport so the caller owns the join handle.
|
||||
pub async fn bind(
|
||||
local: SocketAddr,
|
||||
peer: SocketAddr,
|
||||
) -> Result<(Self, JoinHandle<()>), A40Error> {
|
||||
let socket = UdpSocket::bind(local).await?;
|
||||
socket.connect(peer).await?;
|
||||
Self::from_socket(Arc::new(socket), peer)
|
||||
}
|
||||
|
||||
/// Construct a transport directly from a pre-bound socket. Used
|
||||
/// by tests that need to control both endpoints.
|
||||
pub fn from_socket(
|
||||
socket: Arc<UdpSocket>,
|
||||
peer: SocketAddr,
|
||||
) -> Result<(Self, JoinHandle<()>), A40Error> {
|
||||
let (inbound_tx, _rx) = broadcast::channel::<Frame>(INBOUND_CHANNEL_CAPACITY);
|
||||
let faults = Arc::new(VendorFaults::default());
|
||||
let transport = Self {
|
||||
socket: socket.clone(),
|
||||
peer,
|
||||
inbound_tx: inbound_tx.clone(),
|
||||
faults: faults.clone(),
|
||||
frame_counter: Arc::new(Mutex::new(0)),
|
||||
command_deadline: DEFAULT_COMMAND_DEADLINE,
|
||||
max_retries: DEFAULT_MAX_RETRIES,
|
||||
};
|
||||
let recv_task = tokio::spawn(receive_loop(socket, inbound_tx, faults));
|
||||
Ok((transport, recv_task))
|
||||
}
|
||||
|
||||
pub fn with_command_deadline(mut self, deadline: Duration) -> Self {
|
||||
self.command_deadline = deadline;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_max_retries(mut self, retries: u8) -> Self {
|
||||
self.max_retries = retries;
|
||||
self
|
||||
}
|
||||
|
||||
/// Subscribe to inbound frames. Receivers that lag past the
|
||||
/// channel capacity see `RecvError::Lagged` and are responsible
|
||||
/// for resyncing.
|
||||
pub fn subscribe_inbound(&self) -> broadcast::Receiver<Frame> {
|
||||
self.inbound_tx.subscribe()
|
||||
}
|
||||
|
||||
pub fn faults(&self) -> VendorFaultsSnapshot {
|
||||
self.faults.snapshot()
|
||||
}
|
||||
|
||||
/// Send a fire-and-forget frame; no response is awaited and no
|
||||
/// retry is performed. Use for outbound packets the vendor does
|
||||
/// not acknowledge (e.g. `M_AHRS` attitude pushes).
|
||||
pub async fn send_oneway(&self, frame_id: FrameId, data: &[u8]) -> Result<(), A40Error> {
|
||||
let counter = self.next_counter().await;
|
||||
let bytes = encode_frame(frame_id, data, counter).ok_or(A40Error::FrameTooLarge)?;
|
||||
self.socket.send(&bytes).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Send a frame and await the first inbound frame whose
|
||||
/// `FrameId` matches `expected_reply` within the per-command
|
||||
/// deadline. Retries up to `max_retries` times on timeout;
|
||||
/// returns `Err(MaxRetriesExceeded)` on cap exhaustion.
|
||||
///
|
||||
/// Inbound frames with non-matching ids are still broadcast to
|
||||
/// subscribers; they just don't satisfy *this* call.
|
||||
pub async fn send_with_response(
|
||||
&self,
|
||||
frame_id: FrameId,
|
||||
data: &[u8],
|
||||
expected_reply: FrameId,
|
||||
) -> Result<Frame, A40Error> {
|
||||
let bytes_template = {
|
||||
// Re-encode per attempt because the counter increments;
|
||||
// do one bounds check up-front so we never enter the
|
||||
// retry loop with a doomed frame.
|
||||
let probe_counter = 0u8;
|
||||
encode_frame(frame_id, data, probe_counter).ok_or(A40Error::FrameTooLarge)?
|
||||
};
|
||||
// Use `bytes_template` purely as a size validator above; the
|
||||
// counter we actually use is fresh per attempt.
|
||||
drop(bytes_template);
|
||||
|
||||
let mut inbound_rx = self.inbound_tx.subscribe();
|
||||
let deadline = self.command_deadline;
|
||||
let max_retries = self.max_retries.max(1);
|
||||
|
||||
let mut attempts: u8 = 0;
|
||||
while attempts < max_retries {
|
||||
attempts += 1;
|
||||
let counter = self.next_counter().await;
|
||||
let bytes = encode_frame(frame_id, data, counter).ok_or(A40Error::FrameTooLarge)?;
|
||||
self.socket.send(&bytes).await?;
|
||||
|
||||
// Await the next matching inbound frame within the
|
||||
// deadline. We re-loop on non-matching frames so the
|
||||
// gimbal's HEARTBEAT etc. doesn't cancel our wait.
|
||||
let started = Instant::now();
|
||||
loop {
|
||||
let remaining = deadline.saturating_sub(started.elapsed());
|
||||
if remaining.is_zero() {
|
||||
break;
|
||||
}
|
||||
match timeout(remaining, inbound_rx.recv()).await {
|
||||
Ok(Ok(frame)) if frame.frame_id == expected_reply => {
|
||||
return Ok(frame);
|
||||
}
|
||||
Ok(Ok(_other)) => continue,
|
||||
Ok(Err(broadcast::error::RecvError::Lagged(_))) => {
|
||||
// We may have missed the reply; treat as
|
||||
// timeout for this attempt rather than
|
||||
// hanging.
|
||||
break;
|
||||
}
|
||||
Ok(Err(broadcast::error::RecvError::Closed)) => {
|
||||
return Err(A40Error::InboundChannelClosed);
|
||||
}
|
||||
Err(_elapsed) => break, // timed out
|
||||
}
|
||||
}
|
||||
self.faults.inc_timeout();
|
||||
tracing::warn!(
|
||||
attempts,
|
||||
max_retries,
|
||||
?frame_id,
|
||||
?expected_reply,
|
||||
"A40 command timeout; retrying"
|
||||
);
|
||||
}
|
||||
Err(A40Error::MaxRetriesExceeded {
|
||||
attempts,
|
||||
expected: expected_reply,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn peer(&self) -> SocketAddr {
|
||||
self.peer
|
||||
}
|
||||
|
||||
async fn next_counter(&self) -> u8 {
|
||||
let mut c = self.frame_counter.lock().await;
|
||||
let v = *c;
|
||||
*c = (*c).wrapping_add(1) & 0b11;
|
||||
v
|
||||
}
|
||||
}
|
||||
|
||||
async fn receive_loop(
|
||||
socket: Arc<UdpSocket>,
|
||||
inbound_tx: broadcast::Sender<Frame>,
|
||||
faults: Arc<VendorFaults>,
|
||||
) {
|
||||
// Vendor packet ceiling is 63 bytes; round up to 128 for safety.
|
||||
let mut buf = [0u8; 128];
|
||||
loop {
|
||||
match socket.recv(&mut buf).await {
|
||||
Ok(len) => match decode_frame(&buf[..len]) {
|
||||
Ok(frame) => {
|
||||
let _ = inbound_tx.send(frame);
|
||||
}
|
||||
Err(FrameDecodeError::BadChecksum { .. }) => {
|
||||
faults.inc_crc();
|
||||
tracing::debug!("A40 inbound checksum mismatch; dropping frame");
|
||||
}
|
||||
Err(FrameDecodeError::UnknownFrameId(_)) => {
|
||||
faults.inc_unknown_frame_id();
|
||||
}
|
||||
Err(e) => {
|
||||
// Other framing errors share the crc counter
|
||||
// (they are all "frame envelope invalid" faults
|
||||
// from the operator's perspective).
|
||||
faults.inc_crc();
|
||||
tracing::debug!(error=?e, "A40 inbound frame rejected");
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::error!(error=%e, "A40 transport recv error; shutting down receive loop");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn faults_default_zero() {
|
||||
// Arrange + Act
|
||||
let f = VendorFaults::default();
|
||||
|
||||
// Assert
|
||||
let s = f.snapshot();
|
||||
assert_eq!(s.crc, 0);
|
||||
assert_eq!(s.timeout, 0);
|
||||
assert_eq!(s.unknown_frame_id, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn faults_counters_increment_independently() {
|
||||
// Arrange
|
||||
let f = VendorFaults::default();
|
||||
|
||||
// Act
|
||||
f.inc_crc();
|
||||
f.inc_crc();
|
||||
f.inc_timeout();
|
||||
|
||||
// Assert
|
||||
let s = f.snapshot();
|
||||
assert_eq!(s.crc, 2);
|
||||
assert_eq!(s.timeout, 1);
|
||||
assert_eq!(s.unknown_frame_id, 0);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user