mirror of
https://github.com/azaion/autopilot.git
synced 2026-06-22 07:31:10 +00:00
288e7f8c46
ci/woodpecker/push/build-arm Pipeline failed
Implements the vendor wire protocol for the A40 gimbal (XOR-8 checksum, not CRC16 — task spec corrected against ArduPilot AP_Mount_Viewpro.h): frame encode/decode, typed FrameId/CameraCommand/ImageSensor, A1 angles, C1 camera, C2 set-zoom command builders, and a tokio UdpSocket transport with bounded retry, per-command deadline, and atomic vendor-fault counters surfaced via faults()/health(). GimbalControllerHandle::set_pose and zoom now ride the transport when wired; remain disabled when no transport is bound. 32/32 gimbal_controller tests green; workspace test suite green except for a pre-existing flake in mission_executor::state_machine::ac3_bounded_retry_then_success that reproduces only under parallel workspace test load (passes 5/5 in isolation; flagged in batch 8 report, unrelated to this batch). Co-authored-by: Cursor <cursoragent@cursor.com>
331 lines
12 KiB
Rust
331 lines
12 KiB
Rust
//! UDP transport for the ViewPro A40.
|
|
//!
|
|
//! Owns the [`UdpSocket`], the rolling frame counter, the bounded
|
|
//! retry policy, and the vendor-fault counters that feed the
|
|
//! component's health surface. Inbound frames are checksum-validated
|
|
//! by [`super::a40_protocol::decode_frame`]; mismatches are counted
|
|
//! as `vendor_faults_total{kind="crc"}` and dropped.
|
|
//!
|
|
//! The transport is **command/response** keyed by `(FrameId, frame_counter)`:
|
|
//! each `send_with_response` issues a frame, awaits the next
|
|
//! matching inbound frame within a per-command deadline, and retries
|
|
//! up to `max_retries` on timeout. Unmatched inbound frames (e.g.
|
|
//! the gimbal's HEARTBEAT) are still surfaced through the
|
|
//! broadcast stream so a future telemetry pump can consume them.
|
|
|
|
use std::net::SocketAddr;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
|
|
use tokio::net::UdpSocket;
|
|
use tokio::sync::{broadcast, Mutex};
|
|
use tokio::task::JoinHandle;
|
|
use tokio::time::{timeout, Instant};
|
|
|
|
use super::a40_protocol::frame::{decode_frame, encode_frame, Frame, FrameDecodeError, FrameId};
|
|
|
|
/// Default per-command response deadline. The NFR is ≤200 ms on a
|
|
/// healthy link; 150 ms leaves headroom for the bounded-retry budget.
|
|
pub const DEFAULT_COMMAND_DEADLINE: Duration = Duration::from_millis(150);
|
|
|
|
/// Default retry budget for `send_with_response`. Vendor link is
|
|
/// best-effort UDP; bounded retries match the AZ-651 ladder pattern.
|
|
pub const DEFAULT_MAX_RETRIES: u8 = 3;
|
|
|
|
/// Broadcast channel capacity for inbound frames. Slow consumers
|
|
/// see `Lagged`; the transport itself is unaffected.
|
|
pub const INBOUND_CHANNEL_CAPACITY: usize = 64;
|
|
|
|
/// Counters surfaced through `health()`. Tracked atomically by the
|
|
/// transport; readers see a coherent snapshot via the public
|
|
/// getters.
|
|
#[derive(Debug, Default)]
|
|
pub struct VendorFaults {
|
|
/// Inbound frames that failed checksum / framing validation.
|
|
pub crc: std::sync::atomic::AtomicU64,
|
|
/// Outbound commands that exhausted their retry budget without a
|
|
/// matching response.
|
|
pub timeout: std::sync::atomic::AtomicU64,
|
|
/// Inbound frames whose `FrameId` could not be decoded.
|
|
pub unknown_frame_id: std::sync::atomic::AtomicU64,
|
|
}
|
|
|
|
impl VendorFaults {
|
|
fn inc_crc(&self) {
|
|
self.crc.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
|
}
|
|
fn inc_timeout(&self) {
|
|
self.timeout
|
|
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
|
}
|
|
fn inc_unknown_frame_id(&self) {
|
|
self.unknown_frame_id
|
|
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
|
}
|
|
pub fn snapshot(&self) -> VendorFaultsSnapshot {
|
|
VendorFaultsSnapshot {
|
|
crc: self.crc.load(std::sync::atomic::Ordering::Relaxed),
|
|
timeout: self.timeout.load(std::sync::atomic::Ordering::Relaxed),
|
|
unknown_frame_id: self
|
|
.unknown_frame_id
|
|
.load(std::sync::atomic::Ordering::Relaxed),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Read-side snapshot of [`VendorFaults`].
|
|
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
|
pub struct VendorFaultsSnapshot {
|
|
pub crc: u64,
|
|
pub timeout: u64,
|
|
pub unknown_frame_id: u64,
|
|
}
|
|
|
|
#[derive(Debug, thiserror::Error)]
|
|
pub enum A40Error {
|
|
#[error("frame too large for vendor protocol (max body 63 bytes)")]
|
|
FrameTooLarge,
|
|
#[error("max retries exceeded ({attempts} attempts) waiting for {expected:?}")]
|
|
MaxRetriesExceeded { attempts: u8, expected: FrameId },
|
|
#[error("UDP I/O: {0}")]
|
|
Io(#[from] std::io::Error),
|
|
#[error("inbound broadcast channel closed")]
|
|
InboundChannelClosed,
|
|
}
|
|
|
|
/// UDP transport for the A40. Cheap to clone — both the socket and
|
|
/// the inbound broadcast sender are wrapped in `Arc`.
|
|
#[derive(Clone)]
|
|
pub struct A40Transport {
|
|
socket: Arc<UdpSocket>,
|
|
peer: SocketAddr,
|
|
inbound_tx: broadcast::Sender<Frame>,
|
|
faults: Arc<VendorFaults>,
|
|
frame_counter: Arc<Mutex<u8>>,
|
|
command_deadline: Duration,
|
|
max_retries: u8,
|
|
}
|
|
|
|
impl A40Transport {
|
|
/// Build a transport bound to a local UDP port and pre-connected
|
|
/// to `peer`. The receive task is spawned and returned alongside
|
|
/// the transport so the caller owns the join handle.
|
|
pub async fn bind(
|
|
local: SocketAddr,
|
|
peer: SocketAddr,
|
|
) -> Result<(Self, JoinHandle<()>), A40Error> {
|
|
let socket = UdpSocket::bind(local).await?;
|
|
socket.connect(peer).await?;
|
|
Self::from_socket(Arc::new(socket), peer)
|
|
}
|
|
|
|
/// Construct a transport directly from a pre-bound socket. Used
|
|
/// by tests that need to control both endpoints.
|
|
pub fn from_socket(
|
|
socket: Arc<UdpSocket>,
|
|
peer: SocketAddr,
|
|
) -> Result<(Self, JoinHandle<()>), A40Error> {
|
|
let (inbound_tx, _rx) = broadcast::channel::<Frame>(INBOUND_CHANNEL_CAPACITY);
|
|
let faults = Arc::new(VendorFaults::default());
|
|
let transport = Self {
|
|
socket: socket.clone(),
|
|
peer,
|
|
inbound_tx: inbound_tx.clone(),
|
|
faults: faults.clone(),
|
|
frame_counter: Arc::new(Mutex::new(0)),
|
|
command_deadline: DEFAULT_COMMAND_DEADLINE,
|
|
max_retries: DEFAULT_MAX_RETRIES,
|
|
};
|
|
let recv_task = tokio::spawn(receive_loop(socket, inbound_tx, faults));
|
|
Ok((transport, recv_task))
|
|
}
|
|
|
|
pub fn with_command_deadline(mut self, deadline: Duration) -> Self {
|
|
self.command_deadline = deadline;
|
|
self
|
|
}
|
|
|
|
pub fn with_max_retries(mut self, retries: u8) -> Self {
|
|
self.max_retries = retries;
|
|
self
|
|
}
|
|
|
|
/// Subscribe to inbound frames. Receivers that lag past the
|
|
/// channel capacity see `RecvError::Lagged` and are responsible
|
|
/// for resyncing.
|
|
pub fn subscribe_inbound(&self) -> broadcast::Receiver<Frame> {
|
|
self.inbound_tx.subscribe()
|
|
}
|
|
|
|
pub fn faults(&self) -> VendorFaultsSnapshot {
|
|
self.faults.snapshot()
|
|
}
|
|
|
|
/// Send a fire-and-forget frame; no response is awaited and no
|
|
/// retry is performed. Use for outbound packets the vendor does
|
|
/// not acknowledge (e.g. `M_AHRS` attitude pushes).
|
|
pub async fn send_oneway(&self, frame_id: FrameId, data: &[u8]) -> Result<(), A40Error> {
|
|
let counter = self.next_counter().await;
|
|
let bytes = encode_frame(frame_id, data, counter).ok_or(A40Error::FrameTooLarge)?;
|
|
self.socket.send(&bytes).await?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Send a frame and await the first inbound frame whose
|
|
/// `FrameId` matches `expected_reply` within the per-command
|
|
/// deadline. Retries up to `max_retries` times on timeout;
|
|
/// returns `Err(MaxRetriesExceeded)` on cap exhaustion.
|
|
///
|
|
/// Inbound frames with non-matching ids are still broadcast to
|
|
/// subscribers; they just don't satisfy *this* call.
|
|
pub async fn send_with_response(
|
|
&self,
|
|
frame_id: FrameId,
|
|
data: &[u8],
|
|
expected_reply: FrameId,
|
|
) -> Result<Frame, A40Error> {
|
|
let bytes_template = {
|
|
// Re-encode per attempt because the counter increments;
|
|
// do one bounds check up-front so we never enter the
|
|
// retry loop with a doomed frame.
|
|
let probe_counter = 0u8;
|
|
encode_frame(frame_id, data, probe_counter).ok_or(A40Error::FrameTooLarge)?
|
|
};
|
|
// Use `bytes_template` purely as a size validator above; the
|
|
// counter we actually use is fresh per attempt.
|
|
drop(bytes_template);
|
|
|
|
let mut inbound_rx = self.inbound_tx.subscribe();
|
|
let deadline = self.command_deadline;
|
|
let max_retries = self.max_retries.max(1);
|
|
|
|
let mut attempts: u8 = 0;
|
|
while attempts < max_retries {
|
|
attempts += 1;
|
|
let counter = self.next_counter().await;
|
|
let bytes = encode_frame(frame_id, data, counter).ok_or(A40Error::FrameTooLarge)?;
|
|
self.socket.send(&bytes).await?;
|
|
|
|
// Await the next matching inbound frame within the
|
|
// deadline. We re-loop on non-matching frames so the
|
|
// gimbal's HEARTBEAT etc. doesn't cancel our wait.
|
|
let started = Instant::now();
|
|
loop {
|
|
let remaining = deadline.saturating_sub(started.elapsed());
|
|
if remaining.is_zero() {
|
|
break;
|
|
}
|
|
match timeout(remaining, inbound_rx.recv()).await {
|
|
Ok(Ok(frame)) if frame.frame_id == expected_reply => {
|
|
return Ok(frame);
|
|
}
|
|
Ok(Ok(_other)) => continue,
|
|
Ok(Err(broadcast::error::RecvError::Lagged(_))) => {
|
|
// We may have missed the reply; treat as
|
|
// timeout for this attempt rather than
|
|
// hanging.
|
|
break;
|
|
}
|
|
Ok(Err(broadcast::error::RecvError::Closed)) => {
|
|
return Err(A40Error::InboundChannelClosed);
|
|
}
|
|
Err(_elapsed) => break, // timed out
|
|
}
|
|
}
|
|
self.faults.inc_timeout();
|
|
tracing::warn!(
|
|
attempts,
|
|
max_retries,
|
|
?frame_id,
|
|
?expected_reply,
|
|
"A40 command timeout; retrying"
|
|
);
|
|
}
|
|
Err(A40Error::MaxRetriesExceeded {
|
|
attempts,
|
|
expected: expected_reply,
|
|
})
|
|
}
|
|
|
|
pub fn peer(&self) -> SocketAddr {
|
|
self.peer
|
|
}
|
|
|
|
async fn next_counter(&self) -> u8 {
|
|
let mut c = self.frame_counter.lock().await;
|
|
let v = *c;
|
|
*c = (*c).wrapping_add(1) & 0b11;
|
|
v
|
|
}
|
|
}
|
|
|
|
async fn receive_loop(
|
|
socket: Arc<UdpSocket>,
|
|
inbound_tx: broadcast::Sender<Frame>,
|
|
faults: Arc<VendorFaults>,
|
|
) {
|
|
// Vendor packet ceiling is 63 bytes; round up to 128 for safety.
|
|
let mut buf = [0u8; 128];
|
|
loop {
|
|
match socket.recv(&mut buf).await {
|
|
Ok(len) => match decode_frame(&buf[..len]) {
|
|
Ok(frame) => {
|
|
let _ = inbound_tx.send(frame);
|
|
}
|
|
Err(FrameDecodeError::BadChecksum { .. }) => {
|
|
faults.inc_crc();
|
|
tracing::debug!("A40 inbound checksum mismatch; dropping frame");
|
|
}
|
|
Err(FrameDecodeError::UnknownFrameId(_)) => {
|
|
faults.inc_unknown_frame_id();
|
|
}
|
|
Err(e) => {
|
|
// Other framing errors share the crc counter
|
|
// (they are all "frame envelope invalid" faults
|
|
// from the operator's perspective).
|
|
faults.inc_crc();
|
|
tracing::debug!(error=?e, "A40 inbound frame rejected");
|
|
}
|
|
},
|
|
Err(e) => {
|
|
tracing::error!(error=%e, "A40 transport recv error; shutting down receive loop");
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn faults_default_zero() {
|
|
// Arrange + Act
|
|
let f = VendorFaults::default();
|
|
|
|
// Assert
|
|
let s = f.snapshot();
|
|
assert_eq!(s.crc, 0);
|
|
assert_eq!(s.timeout, 0);
|
|
assert_eq!(s.unknown_frame_id, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn faults_counters_increment_independently() {
|
|
// Arrange
|
|
let f = VendorFaults::default();
|
|
|
|
// Act
|
|
f.inc_crc();
|
|
f.inc_crc();
|
|
f.inc_timeout();
|
|
|
|
// Assert
|
|
let s = f.snapshot();
|
|
assert_eq!(s.crc, 2);
|
|
assert_eq!(s.timeout, 1);
|
|
assert_eq!(s.unknown_frame_id, 0);
|
|
}
|
|
}
|