mirror of
https://github.com/azaion/autopilot.git
synced 2026-06-21 22:41:09 +00:00
[AZ-666] [AZ-673] [AZ-648] ignored set + UDS VLM + mission FSM batch 5
ci/woodpecker/push/build-arm Pipeline failed
ci/woodpecker/push/build-arm Pipeline failed
AZ-666 mapobjects_store: - internal/ignored.rs (HashSet<(mgrs, class_group)> for O(1) suppression) - internal/passes.rs (per-region PassTracker with observed-id set and end-of-pass removed-candidate sweep) - Classification::Ignored wired into classify; apply_decline + is_ignored + pass_start + end_of_pass on MapObjectsStoreHandle - new tests/ignored_and_sweep.rs (3 AC + 2 supplementary) AZ-673 vlm_client: - internal/peer_cred.rs (Linux SO_PEERCRED via libc getsockopt; PeerCredOutcome::SkippedNonLinux on macOS dev hosts per description.md §8) - internal/prompt.rs (pre-send ROI size + format + prompt non-emptiness validation) - internal/wire.rs (length-prefixed JSON envelope with base64 ROI) - internal/uds_client.rs (tokio UnixStream client; bounded reconnect; hard-stop on peer-cred mismatch; per-request deadline) - VlmClient with both eager (open/connect) and lazy (new) ctor - workspace Cargo.toml: base64 + libc as workspace deps AZ-648 mission_executor: - internal/types.rs (Variant, MissionState, TransitionKey, Telemetry, TransitionEvent, StepOutcome) - internal/driver.rs (MissionDriver trait + DriverError + DriverAction) - internal/fsm.rs (variant-agnostic Transition + FsmCore + step_one with per-transition retry budget keyed by TransitionKey) - internal/multirotor.rs + internal/fixed_wing.rs (typed transition tables; multirotor has Armed/TakeOff, fixed-wing parks in WaitAuto for operator AUTO) - public API: MissionExecutor::run spawns the FSM task and returns a clone-safe MissionExecutorHandle (state, health, subscribe, paused_reason, retry_count) - new tests/state_machine.rs (AC-1..AC-4 via ScriptedDriver fake; SITL conformance lands with AZ-649 telemetry forwarding) Workspace: cargo fmt + clippy -D warnings clean; full cargo test --workspace --all-features green (1 ignored = AZ-665 perf gate). Tasks moved todo/ → done/, autodev state set to batch 6 selection. Refs: _docs/03_implementation/batch_05_cycle1_report.md Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -9,12 +9,23 @@ authors.workspace = true
|
||||
|
||||
[features]
|
||||
default = []
|
||||
# Real NanoLLM/VILA IPC path. With `vlm` off, `VlmClient` returns the disabled
|
||||
# no-op assessment (architecture.md §7.6 Optionality model).
|
||||
vlm = []
|
||||
# Real NanoLLM/VILA IPC path. With `vlm` off, the crate exports only
|
||||
# `PROVIDER_NAME` — there is no `VlmClient` type and no IPC code is
|
||||
# compiled. With `vlm` on, the IPC client + peer-cred check + pre-send
|
||||
# validation are pulled in (AZ-673), plus schema validation (AZ-674).
|
||||
vlm = ["dep:serde", "dep:serde_json", "dep:thiserror", "dep:base64", "dep:libc"]
|
||||
|
||||
[dependencies]
|
||||
shared = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
serde = { workspace = true, optional = true }
|
||||
serde_json = { workspace = true, optional = true }
|
||||
thiserror = { workspace = true, optional = true }
|
||||
base64 = { workspace = true, optional = true }
|
||||
libc = { workspace = true, optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "net", "io-util", "time", "sync"] }
|
||||
|
||||
@@ -1,54 +1,131 @@
|
||||
//! Feature-gated entry point. Compiled only when `--features vlm` is on.
|
||||
//!
|
||||
//! AZ-672 installs the trait + a placeholder constructor; the real IPC
|
||||
//! body lands in AZ-673 (`vlm_client_nanollm_ipc`). Until then `assess`
|
||||
//! returns `VlmAssessment::disabled()` so the runtime can be wired
|
||||
//! end-to-end without a working NanoLLM peer.
|
||||
//! AZ-672 installed the trait + a placeholder constructor; AZ-673
|
||||
//! replaces the placeholder with the real `NanoLlmClient` (UDS
|
||||
//! connection, peer-cred check, pre-send validation, bounded request
|
||||
//! deadline, bounded reconnect).
|
||||
//!
|
||||
//! Two construction paths are supported:
|
||||
//!
|
||||
//! - `VlmClient::new(path)` — synchronous, **lazy**. Composition-root
|
||||
//! wiring in `crates/autopilot/src/runtime.rs` uses this so the
|
||||
//! runtime can be built without requiring the NanoLLM peer to be
|
||||
//! reachable yet. The UDS connection and peer-cred check happen on
|
||||
//! the first `assess` call. A peer-cred mismatch on that first
|
||||
//! call surfaces as `VlmAssessment { status: IpcError, .. }` and
|
||||
//! subsequent calls also fail because the inner client locks.
|
||||
//!
|
||||
//! - `VlmClient::open(path)` / `VlmClient::connect(options)` —
|
||||
//! asynchronous, **eager**. Used by integration tests and by
|
||||
//! startup code that wants peer-cred mismatch to hard-fail at
|
||||
//! process boot.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use tokio::sync::OnceCell;
|
||||
|
||||
use shared::contracts::VlmProvider;
|
||||
use shared::error::Result;
|
||||
use shared::health::ComponentHealth;
|
||||
use shared::models::vlm::VlmAssessment;
|
||||
use shared::models::vlm::{VlmAssessment, VlmLabel, VlmStatus};
|
||||
|
||||
use super::PROVIDER_NAME;
|
||||
use crate::internal::uds_client::{ConnectError, NanoLlmClient, NanoLlmClientOptions};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub struct VlmClient {
|
||||
ipc_socket: String,
|
||||
options: NanoLlmClientOptions,
|
||||
inner: Arc<OnceCell<NanoLlmClient>>,
|
||||
}
|
||||
|
||||
impl VlmClient {
|
||||
/// Construct the feature-enabled client. Until AZ-673 lands, the
|
||||
/// returned instance still answers `assess` with the disabled
|
||||
/// no-op assessment — the difference vs `DisabledVlmProvider` is
|
||||
/// that this socket address has been validated and the IPC
|
||||
/// connection will be established here in AZ-673.
|
||||
pub fn new(ipc_socket: impl Into<String>) -> Self {
|
||||
/// Synchronous, lazy. The first `assess` call dials the UDS peer
|
||||
/// and performs the SO_PEERCRED check. Use this when the
|
||||
/// composition root must stay sync.
|
||||
pub fn new(socket_path: impl Into<PathBuf>) -> Self {
|
||||
Self {
|
||||
ipc_socket: ipc_socket.into(),
|
||||
options: NanoLlmClientOptions::new(socket_path),
|
||||
inner: Arc::new(OnceCell::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ipc_socket(&self) -> &str {
|
||||
&self.ipc_socket
|
||||
/// Asynchronous, eager. Opens the UDS connection and performs the
|
||||
/// peer-cred check up front. Use this when startup must hard-fail
|
||||
/// on peer-cred mismatch (AZ-673 AC-2).
|
||||
pub async fn open(socket_path: impl Into<PathBuf>) -> std::result::Result<Self, ConnectError> {
|
||||
Self::connect(NanoLlmClientOptions::new(socket_path)).await
|
||||
}
|
||||
|
||||
/// Asynchronous, eager, with full options (peer-cred expectations,
|
||||
/// timeouts, payload limits).
|
||||
pub async fn connect(options: NanoLlmClientOptions) -> std::result::Result<Self, ConnectError> {
|
||||
let inner_client = NanoLlmClient::connect(options.clone()).await?;
|
||||
let cell = OnceCell::new();
|
||||
cell.set(inner_client)
|
||||
.ok()
|
||||
.expect("freshly constructed OnceCell must be empty");
|
||||
Ok(Self {
|
||||
options,
|
||||
inner: Arc::new(cell),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ipc_socket(&self) -> &std::path::Path {
|
||||
&self.options.socket_path
|
||||
}
|
||||
|
||||
pub fn health(&self) -> ComponentHealth {
|
||||
// Until AZ-673 connects, we surface yellow with the configured
|
||||
// socket so the operator sees the build *did* enable VLM but
|
||||
// the IPC peer is not yet wired.
|
||||
ComponentHealth::yellow(PROVIDER_NAME, format!("ipc_pending: {}", self.ipc_socket))
|
||||
let connected = self.inner.initialized();
|
||||
let level = if connected {
|
||||
ComponentHealth::green(PROVIDER_NAME)
|
||||
} else {
|
||||
ComponentHealth::yellow(PROVIDER_NAME, "ipc connect deferred")
|
||||
};
|
||||
level.with_detail(format!("ipc_socket={}", self.options.socket_path.display()))
|
||||
}
|
||||
|
||||
/// Reference to the lazily-initialised inner client (`None` if no
|
||||
/// `assess` has been made yet on a `new()`-constructed instance).
|
||||
pub fn inner(&self) -> Option<&NanoLlmClient> {
|
||||
self.inner.get()
|
||||
}
|
||||
|
||||
async fn ensure_connected(&self) -> std::result::Result<&NanoLlmClient, ConnectError> {
|
||||
let options = self.options.clone();
|
||||
self.inner
|
||||
.get_or_try_init(|| async move { NanoLlmClient::connect(options).await })
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
trait HealthDetail {
|
||||
fn with_detail(self, detail: impl Into<String>) -> Self;
|
||||
}
|
||||
|
||||
impl HealthDetail for ComponentHealth {
|
||||
fn with_detail(mut self, detail: impl Into<String>) -> Self {
|
||||
self.detail = Some(detail.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl VlmProvider for VlmClient {
|
||||
async fn assess(&self, _roi: Vec<u8>, _prompt: String) -> Result<VlmAssessment> {
|
||||
// Real IPC call lands in AZ-673. Returning disabled keeps the
|
||||
// runtime end-to-end exercisable until that task completes.
|
||||
Ok(VlmAssessment::disabled())
|
||||
async fn assess(&self, roi: Vec<u8>, prompt: String) -> Result<VlmAssessment> {
|
||||
match self.ensure_connected().await {
|
||||
Ok(c) => Ok(c.assess(roi, prompt).await),
|
||||
Err(e) => Ok(VlmAssessment {
|
||||
label: VlmLabel::Error,
|
||||
confidence: 0.0,
|
||||
evidence_spans: Vec::new(),
|
||||
reason: format!("lazy connect: {e}"),
|
||||
status: VlmStatus::IpcError,
|
||||
latency_ms: 0,
|
||||
model_version: String::new(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
@@ -59,20 +136,205 @@ impl VlmProvider for VlmClient {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[cfg(target_os = "linux")]
|
||||
use crate::internal::peer_cred::ExpectedPeer;
|
||||
use crate::internal::prompt::Limits;
|
||||
use shared::models::vlm::VlmStatus;
|
||||
use tempfile::tempdir;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::net::UnixListener;
|
||||
|
||||
/// Spawn a tiny fixture NanoLLM that reads one request frame and
|
||||
/// writes back the supplied assessment JSON (or just hangs if
|
||||
/// `respond` is `None`).
|
||||
async fn fixture(
|
||||
path: std::path::PathBuf,
|
||||
respond: Option<serde_json::Value>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
let listener = UnixListener::bind(&path).unwrap();
|
||||
tokio::spawn(async move {
|
||||
let (mut s, _) = listener.accept().await.unwrap();
|
||||
let mut lenbuf = [0u8; 4];
|
||||
if s.read_exact(&mut lenbuf).await.is_err() {
|
||||
return;
|
||||
}
|
||||
let len = u32::from_be_bytes(lenbuf) as usize;
|
||||
let mut req = vec![0u8; len];
|
||||
if s.read_exact(&mut req).await.is_err() {
|
||||
return;
|
||||
}
|
||||
let Some(body) = respond else {
|
||||
std::future::pending::<()>().await;
|
||||
return;
|
||||
};
|
||||
let bytes = serde_json::to_vec(&body).unwrap();
|
||||
let len = (bytes.len() as u32).to_be_bytes();
|
||||
let _ = s.write_all(&len).await;
|
||||
let _ = s.write_all(&bytes).await;
|
||||
let _ = s.flush().await;
|
||||
})
|
||||
}
|
||||
|
||||
fn ok_response_json() -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"label": "confirmed_concealed_position",
|
||||
"confidence": 0.91,
|
||||
"evidence_spans": ["thicket", "tarp"],
|
||||
"reason": "high foliage + tarp edge",
|
||||
"status": "ok",
|
||||
"latency_ms": 42,
|
||||
"model_version": "VILA1.5-3B-int4"
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn placeholder_assess_returns_disabled_until_az_673() {
|
||||
async fn ac1_happy_path_round_trip() {
|
||||
// Arrange
|
||||
let c = VlmClient::new("/run/vila/ipc.sock");
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("nanollm.sock");
|
||||
let fixture_handle = fixture(path.clone(), Some(ok_response_json())).await;
|
||||
let client = VlmClient::open(&path).await.expect("connect");
|
||||
|
||||
// Act
|
||||
let r = c
|
||||
.assess(Vec::new(), String::new())
|
||||
let result = client
|
||||
.assess(b"\xff\xd8\xff".to_vec(), "describe".into())
|
||||
.await
|
||||
.expect("placeholder path is infallible");
|
||||
.expect("assess returns Ok envelope");
|
||||
|
||||
// Assert
|
||||
assert_eq!(r.status, VlmStatus::Disabled);
|
||||
assert_eq!(c.name(), "vlm_client");
|
||||
assert_eq!(c.ipc_socket(), "/run/vila/ipc.sock");
|
||||
assert_eq!(result.status, VlmStatus::Ok);
|
||||
assert_eq!(result.confidence, 0.91);
|
||||
assert_eq!(result.model_version, "VILA1.5-3B-int4");
|
||||
assert_eq!(result.latency_ms, 42);
|
||||
fixture_handle.abort();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ac3_oversize_roi_rejected_pre_send() {
|
||||
// Arrange — fixture exists but should never see a request.
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("nanollm.sock");
|
||||
let _listener = UnixListener::bind(&path).unwrap();
|
||||
let mut opts = NanoLlmClientOptions::new(&path);
|
||||
opts.limits = Limits {
|
||||
max_roi_bytes: 4,
|
||||
max_prompt_bytes: 1024,
|
||||
};
|
||||
let client = VlmClient::connect(opts).await.expect("connect");
|
||||
|
||||
// Act
|
||||
let result = client
|
||||
.assess(vec![0u8; 5], "p".into())
|
||||
.await
|
||||
.expect("assess returns SchemaInvalid envelope, not Err");
|
||||
|
||||
// Assert
|
||||
assert_eq!(result.status, VlmStatus::SchemaInvalid);
|
||||
assert!(result.reason.contains("roi too large"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ac4_response_timeout_returns_explicit_status() {
|
||||
// Arrange — fixture accepts the connection but never responds.
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("nanollm.sock");
|
||||
let fixture_handle = fixture(path.clone(), None).await;
|
||||
let mut opts = NanoLlmClientOptions::new(&path);
|
||||
opts.request_deadline = std::time::Duration::from_millis(150);
|
||||
let client = VlmClient::connect(opts).await.expect("connect");
|
||||
|
||||
// Act
|
||||
let started = std::time::Instant::now();
|
||||
let result = client
|
||||
.assess(b"r".to_vec(), "p".into())
|
||||
.await
|
||||
.expect("assess returns Timeout envelope, not Err");
|
||||
let elapsed = started.elapsed();
|
||||
|
||||
// Assert
|
||||
assert_eq!(result.status, VlmStatus::Timeout);
|
||||
assert!(
|
||||
elapsed >= std::time::Duration::from_millis(150),
|
||||
"timeout fired too early: {elapsed:?}",
|
||||
);
|
||||
assert!(
|
||||
elapsed < std::time::Duration::from_secs(1),
|
||||
"timeout overshoot: {elapsed:?}",
|
||||
);
|
||||
fixture_handle.abort();
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[tokio::test]
|
||||
async fn ac2_peer_cred_mismatch_hard_fails_connect() {
|
||||
// Arrange
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("nanollm.sock");
|
||||
let _listener = UnixListener::bind(&path).unwrap();
|
||||
let our_uid = unsafe { libc::geteuid() };
|
||||
let bogus_uid = if our_uid == 0 { 1 } else { 0 };
|
||||
let mut opts = NanoLlmClientOptions::new(&path);
|
||||
opts.expected_peer = ExpectedPeer {
|
||||
uid: Some(bogus_uid),
|
||||
gid: None,
|
||||
};
|
||||
|
||||
// Act
|
||||
let err = VlmClient::connect(opts).await.expect_err("must reject");
|
||||
|
||||
// Assert
|
||||
match err {
|
||||
ConnectError::PeerCredMismatch {
|
||||
expected_uid,
|
||||
actual_uid,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(expected_uid, Some(bogus_uid));
|
||||
assert_eq!(actual_uid, our_uid);
|
||||
}
|
||||
other => panic!("expected PeerCredMismatch, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rejects_empty_prompt_and_empty_roi() {
|
||||
// Arrange
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("nanollm.sock");
|
||||
let _listener = UnixListener::bind(&path).unwrap();
|
||||
let client = VlmClient::open(&path).await.unwrap();
|
||||
|
||||
// Act + Assert — empty roi.
|
||||
let r = client.assess(Vec::new(), "describe".into()).await.unwrap();
|
||||
assert_eq!(r.status, VlmStatus::SchemaInvalid);
|
||||
|
||||
// Act + Assert — empty prompt.
|
||||
let r = client.assess(vec![1u8, 2, 3], String::new()).await.unwrap();
|
||||
assert_eq!(r.status, VlmStatus::SchemaInvalid);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn lazy_new_connects_on_first_assess() {
|
||||
// Arrange — fixture process binds the socket after the client
|
||||
// is constructed; the lazy client must succeed because connect
|
||||
// happens on demand, not at construction.
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("nanollm.sock");
|
||||
|
||||
// Construct the client *before* the fixture exists. With the
|
||||
// old eager constructor this would fail; with lazy it must
|
||||
// succeed.
|
||||
let client = VlmClient::new(&path);
|
||||
assert!(client.inner().is_none(), "should not be connected yet");
|
||||
|
||||
// Bring the fixture up, then call assess.
|
||||
let fixture_handle = fixture(path.clone(), Some(ok_response_json())).await;
|
||||
let result = client
|
||||
.assess(b"r".to_vec(), "p".into())
|
||||
.await
|
||||
.expect("lazy assess");
|
||||
assert_eq!(result.status, VlmStatus::Ok);
|
||||
assert!(client.inner().is_some(), "lazy connect should have run");
|
||||
fixture_handle.abort();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
//! Internal modules used only by the feature-gated `vlm` build.
|
||||
|
||||
pub mod peer_cred;
|
||||
pub mod prompt;
|
||||
pub mod uds_client;
|
||||
pub mod wire;
|
||||
@@ -0,0 +1,164 @@
|
||||
//! `SO_PEERCRED` peer credential check.
|
||||
//!
|
||||
//! Production target is Jetson Linux. On Linux we call `getsockopt`
|
||||
//! with `SO_PEERCRED` and compare the peer's UID/GID against the
|
||||
//! configured expected values; mismatch returns `PeerCredOutcome::Mismatch`.
|
||||
//!
|
||||
//! On macOS dev hosts there is no equivalent that returns both UID
|
||||
//! and GID through `getsockopt` (LOCAL_PEERCRED returns a `xucred`
|
||||
//! with up to NGROUPS, and `LOCAL_PEEREPID` returns only the PID).
|
||||
//! Per the task brief we log a warning and return `SkippedNonLinux`
|
||||
//! so dev workflows do not require sudo / matching service users.
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use std::os::unix::io::AsRawFd;
|
||||
|
||||
use tokio::net::UnixStream;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[allow(dead_code)] // some variants only constructed on certain target_os builds
|
||||
pub enum PeerCredOutcome {
|
||||
/// Peer credentials match (or, on a non-Linux dev host, the check
|
||||
/// was skipped and the connection should proceed).
|
||||
Match { uid: u32, gid: u32 },
|
||||
/// Peer credentials read but do not match the expected values.
|
||||
/// Connect MUST fail with `ConnectError::PeerCredMismatch`.
|
||||
Mismatch {
|
||||
expected_uid: Option<u32>,
|
||||
expected_gid: Option<u32>,
|
||||
actual_uid: u32,
|
||||
actual_gid: u32,
|
||||
},
|
||||
/// Non-Linux dev host: SO_PEERCRED is not available with the same
|
||||
/// shape. The task brief explicitly allows proceeding here for
|
||||
/// development purposes.
|
||||
SkippedNonLinux,
|
||||
/// `getsockopt` itself failed (kernel rejected the call or the
|
||||
/// socket is not actually a UDS). Caller treats this as a hard
|
||||
/// failure — the connection MUST NOT proceed.
|
||||
SystemError(String),
|
||||
}
|
||||
|
||||
/// Expected peer credentials. `None` means "accept any" for that field.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct ExpectedPeer {
|
||||
pub uid: Option<u32>,
|
||||
pub gid: Option<u32>,
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
pub fn check(stream: &UnixStream, expected: ExpectedPeer) -> PeerCredOutcome {
|
||||
let fd = stream.as_raw_fd();
|
||||
let mut cred: libc::ucred = unsafe { std::mem::zeroed() };
|
||||
let mut len = std::mem::size_of::<libc::ucred>() as libc::socklen_t;
|
||||
let rc = unsafe {
|
||||
libc::getsockopt(
|
||||
fd,
|
||||
libc::SOL_SOCKET,
|
||||
libc::SO_PEERCRED,
|
||||
&mut cred as *mut libc::ucred as *mut libc::c_void,
|
||||
&mut len,
|
||||
)
|
||||
};
|
||||
if rc != 0 {
|
||||
let e = std::io::Error::last_os_error();
|
||||
return PeerCredOutcome::SystemError(format!("SO_PEERCRED getsockopt: {e}"));
|
||||
}
|
||||
let actual_uid = cred.uid;
|
||||
let actual_gid = cred.gid;
|
||||
let uid_ok = expected.uid.map(|u| u == actual_uid).unwrap_or(true);
|
||||
let gid_ok = expected.gid.map(|g| g == actual_gid).unwrap_or(true);
|
||||
if uid_ok && gid_ok {
|
||||
PeerCredOutcome::Match {
|
||||
uid: actual_uid,
|
||||
gid: actual_gid,
|
||||
}
|
||||
} else {
|
||||
PeerCredOutcome::Mismatch {
|
||||
expected_uid: expected.uid,
|
||||
expected_gid: expected.gid,
|
||||
actual_uid,
|
||||
actual_gid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
pub fn check(_stream: &UnixStream, _expected: ExpectedPeer) -> PeerCredOutcome {
|
||||
tracing::warn!(
|
||||
"SO_PEERCRED check skipped: non-Linux build (dev host). \
|
||||
Production deployments MUST run on Linux."
|
||||
);
|
||||
PeerCredOutcome::SkippedNonLinux
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn peer_cred_check_on_self_socketpair() {
|
||||
// Arrange — connect to ourselves via a tempfile UDS so we know
|
||||
// the peer is the current process and its credentials are
|
||||
// available.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("peer.sock");
|
||||
let listener = tokio::net::UnixListener::bind(&path).unwrap();
|
||||
let server_task = tokio::spawn(async move {
|
||||
let (s, _) = listener.accept().await.unwrap();
|
||||
s
|
||||
});
|
||||
let client = tokio::net::UnixStream::connect(&path).await.unwrap();
|
||||
let _server = server_task.await.unwrap();
|
||||
|
||||
// Act — accept any UID/GID; we just want to confirm the call
|
||||
// returns Match (Linux) or SkippedNonLinux (macOS).
|
||||
let outcome = check(&client, ExpectedPeer::default());
|
||||
|
||||
// Assert
|
||||
match outcome {
|
||||
PeerCredOutcome::Match { .. } => {}
|
||||
PeerCredOutcome::SkippedNonLinux => {}
|
||||
other => panic!("expected Match or SkippedNonLinux, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[tokio::test]
|
||||
async fn peer_cred_mismatch_when_uid_differs() {
|
||||
// Arrange — connect to a fixture peer and expect a UID we know
|
||||
// is wrong (use 0 == root, which the test process is not).
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().join("peer-mismatch.sock");
|
||||
let listener = tokio::net::UnixListener::bind(&path).unwrap();
|
||||
let _server = tokio::spawn(async move {
|
||||
let (s, _) = listener.accept().await.unwrap();
|
||||
s
|
||||
});
|
||||
let client = tokio::net::UnixStream::connect(&path).await.unwrap();
|
||||
|
||||
// Act — pick the *opposite* of the current uid as the expected one.
|
||||
let our_uid = unsafe { libc::geteuid() };
|
||||
let bogus_uid = if our_uid == 0 { 1 } else { 0 };
|
||||
let outcome = check(
|
||||
&client,
|
||||
ExpectedPeer {
|
||||
uid: Some(bogus_uid),
|
||||
gid: None,
|
||||
},
|
||||
);
|
||||
|
||||
// Assert
|
||||
match outcome {
|
||||
PeerCredOutcome::Mismatch {
|
||||
expected_uid,
|
||||
actual_uid,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(expected_uid, Some(bogus_uid));
|
||||
assert_eq!(actual_uid, our_uid);
|
||||
}
|
||||
other => panic!("expected Mismatch, got {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
//! Pre-send ROI + prompt validation.
|
||||
//!
|
||||
//! Per AZ-673 §Scope and `description.md §8`: payload size is
|
||||
//! validated BEFORE crossing the IPC boundary. We refuse oversize
|
||||
//! ROIs synchronously rather than waste the 5 s deadline on a
|
||||
//! request the peer will reject anyway.
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ValidateError {
|
||||
#[error("roi too large: {size} bytes > max {max} bytes")]
|
||||
OversizeRoi { size: usize, max: usize },
|
||||
|
||||
#[error("prompt too large: {size} bytes > max {max} bytes")]
|
||||
OversizePrompt { size: usize, max: usize },
|
||||
|
||||
#[error("roi is empty")]
|
||||
EmptyRoi,
|
||||
|
||||
#[error("prompt is empty")]
|
||||
EmptyPrompt,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Limits {
|
||||
pub max_roi_bytes: usize,
|
||||
pub max_prompt_bytes: usize,
|
||||
}
|
||||
|
||||
impl Default for Limits {
|
||||
fn default() -> Self {
|
||||
// Defaults follow `description.md §8`: bounded ROI (≤ 1 MiB
|
||||
// raw) and bounded prompt (≤ 4 KiB UTF-8).
|
||||
Self {
|
||||
max_roi_bytes: 1024 * 1024,
|
||||
max_prompt_bytes: 4 * 1024,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(roi: &[u8], prompt: &str, limits: Limits) -> Result<(), ValidateError> {
|
||||
if roi.is_empty() {
|
||||
return Err(ValidateError::EmptyRoi);
|
||||
}
|
||||
if prompt.is_empty() {
|
||||
return Err(ValidateError::EmptyPrompt);
|
||||
}
|
||||
if roi.len() > limits.max_roi_bytes {
|
||||
return Err(ValidateError::OversizeRoi {
|
||||
size: roi.len(),
|
||||
max: limits.max_roi_bytes,
|
||||
});
|
||||
}
|
||||
if prompt.len() > limits.max_prompt_bytes {
|
||||
return Err(ValidateError::OversizePrompt {
|
||||
size: prompt.len(),
|
||||
max: limits.max_prompt_bytes,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn accepts_payload_within_limits() {
|
||||
// Arrange / Act / Assert
|
||||
assert!(validate(b"hello", "describe", Limits::default()).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_oversize_roi() {
|
||||
// Arrange
|
||||
let limits = Limits {
|
||||
max_roi_bytes: 4,
|
||||
max_prompt_bytes: 1024,
|
||||
};
|
||||
// Act
|
||||
let err = validate(&[0u8; 5], "p", limits).unwrap_err();
|
||||
// Assert
|
||||
assert!(matches!(
|
||||
err,
|
||||
ValidateError::OversizeRoi { size: 5, max: 4 }
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_oversize_prompt() {
|
||||
// Arrange
|
||||
let limits = Limits {
|
||||
max_roi_bytes: 1024,
|
||||
max_prompt_bytes: 4,
|
||||
};
|
||||
// Act
|
||||
let err = validate(b"r", "hellos", limits).unwrap_err();
|
||||
// Assert
|
||||
assert!(matches!(err, ValidateError::OversizePrompt { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_empty_inputs() {
|
||||
assert!(matches!(
|
||||
validate(b"", "p", Limits::default()),
|
||||
Err(ValidateError::EmptyRoi)
|
||||
));
|
||||
assert!(matches!(
|
||||
validate(b"r", "", Limits::default()),
|
||||
Err(ValidateError::EmptyPrompt)
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,320 @@
|
||||
//! Tokio-based UDS client for NanoLLM.
|
||||
//!
|
||||
//! State invariants:
|
||||
//!
|
||||
//! - At most one request in flight at a time. The caller serialises
|
||||
//! through a `tokio::sync::Mutex` around the connection.
|
||||
//! - On transport loss, the client reconnects up to `reconnect_max`
|
||||
//! times with exponential backoff.
|
||||
//! - On `PeerCredMismatch`, the client refuses to reconnect — peer
|
||||
//! credential failures are treated as security incidents that
|
||||
//! require operator intervention (AZ-673 AC-2).
|
||||
//! - Every `assess` call is bounded by `request_deadline`. A timeout
|
||||
//! produces a `VlmAssessment { status: Timeout, .. }` and the
|
||||
//! socket is dropped + reconnected so a slow response can't poison
|
||||
//! the next request.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use shared::models::vlm::{VlmAssessment, VlmLabel, VlmStatus};
|
||||
use tokio::net::UnixStream;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::time::timeout;
|
||||
|
||||
use super::peer_cred::{check as check_peer, ExpectedPeer, PeerCredOutcome};
|
||||
use super::prompt::{self, Limits};
|
||||
use super::wire::{read_response, write_request, WireError};
|
||||
|
||||
/// Errors returned from `connect`.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ConnectError {
|
||||
/// Socket file could not be opened (no such file, permission, etc.).
|
||||
#[error("uds connect: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
/// `SO_PEERCRED` returned credentials that did not match the
|
||||
/// configured expected uid/gid. No automatic retry — operator
|
||||
/// intervention required.
|
||||
#[error("peer credential mismatch: expected_uid={expected_uid:?} expected_gid={expected_gid:?} actual_uid={actual_uid} actual_gid={actual_gid}")]
|
||||
PeerCredMismatch {
|
||||
expected_uid: Option<u32>,
|
||||
expected_gid: Option<u32>,
|
||||
actual_uid: u32,
|
||||
actual_gid: u32,
|
||||
},
|
||||
|
||||
/// `getsockopt` itself failed — usually a kernel-level rejection.
|
||||
/// Treated as a hard failure (no retry).
|
||||
#[error("peer credential system error: {0}")]
|
||||
PeerCredSystemError(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NanoLlmClientOptions {
|
||||
pub socket_path: PathBuf,
|
||||
pub expected_peer: ExpectedPeer,
|
||||
pub request_deadline: Duration,
|
||||
pub reconnect_max: u32,
|
||||
pub reconnect_base: Duration,
|
||||
pub reconnect_cap: Duration,
|
||||
pub limits: Limits,
|
||||
}
|
||||
|
||||
impl NanoLlmClientOptions {
|
||||
pub fn new(socket_path: impl Into<PathBuf>) -> Self {
|
||||
Self {
|
||||
socket_path: socket_path.into(),
|
||||
expected_peer: ExpectedPeer::default(),
|
||||
// Per `description.md §8` 5 s ceiling.
|
||||
request_deadline: Duration::from_secs(5),
|
||||
reconnect_max: 3,
|
||||
reconnect_base: Duration::from_millis(100),
|
||||
reconnect_cap: Duration::from_secs(2),
|
||||
limits: Limits::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Long-lived NanoLLM UDS client. Cloneable handle (the inner state
|
||||
/// is an `Arc<Mutex<...>>`); a single backing connection is shared.
|
||||
#[derive(Clone)]
|
||||
pub struct NanoLlmClient {
|
||||
inner: Arc<Mutex<Inner>>,
|
||||
options: Arc<NanoLlmClientOptions>,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
/// `None` between `disconnect_locked` and the next reconnect, or
|
||||
/// when the connection has never been opened.
|
||||
stream: Option<UnixStream>,
|
||||
/// Set when `PeerCredMismatch` was observed. Hard-stops every
|
||||
/// subsequent `assess`/connect attempt until the operator
|
||||
/// rebuilds the client (i.e., restarts the process).
|
||||
peer_cred_locked: bool,
|
||||
/// Diagnostic counter for health surfaces.
|
||||
peer_cred_check_pass: u64,
|
||||
peer_cred_check_total: u64,
|
||||
/// Latency samples for `p50` / `p99` surfaces. Kept ring-buffer
|
||||
/// style to bound memory.
|
||||
latency_samples: Vec<Duration>,
|
||||
}
|
||||
|
||||
const LATENCY_RING_CAPACITY: usize = 128;
|
||||
|
||||
impl NanoLlmClient {
|
||||
/// Open the UDS connection and verify the peer's credentials.
|
||||
/// Caller-side mutex is initialised here.
|
||||
pub async fn connect(options: NanoLlmClientOptions) -> Result<Self, ConnectError> {
|
||||
let stream = open_and_check(&options.socket_path, options.expected_peer).await?;
|
||||
let inner = Inner {
|
||||
stream: Some(stream),
|
||||
peer_cred_locked: false,
|
||||
peer_cred_check_pass: 1,
|
||||
peer_cred_check_total: 1,
|
||||
latency_samples: Vec::with_capacity(LATENCY_RING_CAPACITY),
|
||||
};
|
||||
Ok(Self {
|
||||
inner: Arc::new(Mutex::new(inner)),
|
||||
options: Arc::new(options),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn socket_path(&self) -> &Path {
|
||||
&self.options.socket_path
|
||||
}
|
||||
|
||||
/// Latency samples snapshot (cloned). Caller computes p50/p99.
|
||||
pub async fn latency_samples(&self) -> Vec<Duration> {
|
||||
self.inner.lock().await.latency_samples.clone()
|
||||
}
|
||||
|
||||
/// `(passed, total)` peer-cred check counts since process start.
|
||||
pub async fn peer_cred_stats(&self) -> (u64, u64) {
|
||||
let g = self.inner.lock().await;
|
||||
(g.peer_cred_check_pass, g.peer_cred_check_total)
|
||||
}
|
||||
|
||||
/// True if a peer-cred mismatch ever occurred. Diagnostic only —
|
||||
/// every public method already short-circuits on the lock.
|
||||
pub async fn peer_cred_locked(&self) -> bool {
|
||||
self.inner.lock().await.peer_cred_locked
|
||||
}
|
||||
|
||||
/// Send a single ROI + prompt and await one assessment. Failure
|
||||
/// modes (validate / timeout / IPC error) are encoded in the
|
||||
/// returned `VlmAssessment.status` — `assess` never returns an
|
||||
/// `Err` for these recoverable cases. Hard failures (peer-cred
|
||||
/// lock, exhausted reconnect budget) DO propagate as
|
||||
/// `VlmStatus::IpcError` with `label: Error`.
|
||||
pub async fn assess(&self, roi: Vec<u8>, prompt: String) -> VlmAssessment {
|
||||
// Pre-send validation — never spend IPC time on a known-bad
|
||||
// payload (AZ-673 AC-3).
|
||||
if let Err(e) = prompt::validate(&roi, &prompt, self.options.limits) {
|
||||
return schema_invalid(format!("pre-send validate: {e}"));
|
||||
}
|
||||
|
||||
// Hard-locked by peer-cred mismatch — refuse without IPC.
|
||||
if self.inner.lock().await.peer_cred_locked {
|
||||
return ipc_error("peer-cred mismatch lock active");
|
||||
}
|
||||
|
||||
let started = std::time::Instant::now();
|
||||
let mut guard = self.inner.lock().await;
|
||||
|
||||
// Lazy reconnect if the previous request dropped the stream.
|
||||
if guard.stream.is_none() {
|
||||
match reconnect_locked(&mut guard, &self.options).await {
|
||||
Ok(()) => {}
|
||||
Err(e) => return e,
|
||||
}
|
||||
}
|
||||
|
||||
// Single shot. On any IO error we drop the stream so the next
|
||||
// call reconnects fresh.
|
||||
let stream = guard
|
||||
.stream
|
||||
.as_mut()
|
||||
.expect("stream present after reconnect");
|
||||
match timeout(
|
||||
self.options.request_deadline,
|
||||
send_and_recv(stream, &prompt, &roi),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(mut assessment)) => {
|
||||
let elapsed = started.elapsed();
|
||||
push_latency(&mut guard.latency_samples, elapsed);
|
||||
if assessment.latency_ms == 0 {
|
||||
assessment.latency_ms = elapsed.as_millis().min(u32::MAX as u128) as u32;
|
||||
}
|
||||
assessment
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
tracing::warn!(error = %e, "vlm_client uds io error; dropping connection");
|
||||
guard.stream = None;
|
||||
ipc_error(format!("ipc io: {e}"))
|
||||
}
|
||||
Err(_elapsed) => {
|
||||
tracing::warn!(
|
||||
deadline_ms = self.options.request_deadline.as_millis() as u64,
|
||||
"vlm_client assess timeout"
|
||||
);
|
||||
// Drop the stream — a half-responded peer might still
|
||||
// write bytes on the next call and corrupt the frame.
|
||||
guard.stream = None;
|
||||
timeout_status(self.options.request_deadline)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn open_and_check(path: &Path, expected: ExpectedPeer) -> Result<UnixStream, ConnectError> {
|
||||
let stream = UnixStream::connect(path).await?;
|
||||
match check_peer(&stream, expected) {
|
||||
PeerCredOutcome::Match { uid, gid } => {
|
||||
tracing::info!(uid, gid, "vlm_client uds peer credential check passed");
|
||||
Ok(stream)
|
||||
}
|
||||
PeerCredOutcome::SkippedNonLinux => Ok(stream),
|
||||
PeerCredOutcome::Mismatch {
|
||||
expected_uid,
|
||||
expected_gid,
|
||||
actual_uid,
|
||||
actual_gid,
|
||||
} => Err(ConnectError::PeerCredMismatch {
|
||||
expected_uid,
|
||||
expected_gid,
|
||||
actual_uid,
|
||||
actual_gid,
|
||||
}),
|
||||
PeerCredOutcome::SystemError(s) => Err(ConnectError::PeerCredSystemError(s)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn reconnect_locked(
|
||||
guard: &mut Inner,
|
||||
options: &NanoLlmClientOptions,
|
||||
) -> Result<(), VlmAssessment> {
|
||||
let mut delay = options.reconnect_base;
|
||||
for attempt in 1..=options.reconnect_max {
|
||||
match open_and_check(&options.socket_path, options.expected_peer).await {
|
||||
Ok(s) => {
|
||||
guard.stream = Some(s);
|
||||
guard.peer_cred_check_pass = guard.peer_cred_check_pass.saturating_add(1);
|
||||
guard.peer_cred_check_total = guard.peer_cred_check_total.saturating_add(1);
|
||||
return Ok(());
|
||||
}
|
||||
Err(ConnectError::PeerCredMismatch { .. }) => {
|
||||
guard.peer_cred_locked = true;
|
||||
guard.peer_cred_check_total = guard.peer_cred_check_total.saturating_add(1);
|
||||
return Err(ipc_error("peer-cred mismatch on reconnect"));
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
error = %e,
|
||||
attempt,
|
||||
max = options.reconnect_max,
|
||||
"vlm_client reconnect failed; backing off"
|
||||
);
|
||||
tokio::time::sleep(delay).await;
|
||||
delay = (delay * 2).min(options.reconnect_cap);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(ipc_error("reconnect budget exhausted"))
|
||||
}
|
||||
|
||||
async fn send_and_recv(
|
||||
stream: &mut UnixStream,
|
||||
prompt: &str,
|
||||
roi: &[u8],
|
||||
) -> Result<VlmAssessment, WireError> {
|
||||
write_request(stream, prompt, roi).await?;
|
||||
let resp = read_response(stream).await?;
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
fn push_latency(samples: &mut Vec<Duration>, d: Duration) {
|
||||
if samples.len() == LATENCY_RING_CAPACITY {
|
||||
samples.remove(0);
|
||||
}
|
||||
samples.push(d);
|
||||
}
|
||||
|
||||
fn schema_invalid(reason: impl Into<String>) -> VlmAssessment {
|
||||
VlmAssessment {
|
||||
label: VlmLabel::Inconclusive,
|
||||
confidence: 0.0,
|
||||
evidence_spans: Vec::new(),
|
||||
reason: reason.into(),
|
||||
status: VlmStatus::SchemaInvalid,
|
||||
latency_ms: 0,
|
||||
model_version: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn ipc_error(reason: impl Into<String>) -> VlmAssessment {
|
||||
VlmAssessment {
|
||||
label: VlmLabel::Error,
|
||||
confidence: 0.0,
|
||||
evidence_spans: Vec::new(),
|
||||
reason: reason.into(),
|
||||
status: VlmStatus::IpcError,
|
||||
latency_ms: 0,
|
||||
model_version: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn timeout_status(deadline: Duration) -> VlmAssessment {
|
||||
VlmAssessment {
|
||||
label: VlmLabel::Inconclusive,
|
||||
confidence: 0.0,
|
||||
evidence_spans: Vec::new(),
|
||||
reason: format!("ipc deadline {} ms elapsed", deadline.as_millis()),
|
||||
status: VlmStatus::Timeout,
|
||||
latency_ms: deadline.as_millis().min(u32::MAX as u128) as u32,
|
||||
model_version: String::new(),
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
//! Wire framing for NanoLLM UDS IPC.
|
||||
//!
|
||||
//! Single request → single response, length-prefixed JSON:
|
||||
//!
|
||||
//! ```text
|
||||
//! uint32 BE length || JSON payload
|
||||
//! ```
|
||||
//!
|
||||
//! The request payload is `{"prompt": "...", "roi_b64": "..."}`. The
|
||||
//! response payload is a `shared::models::vlm::VlmAssessment` JSON
|
||||
//! object — the same shape `VlmProvider::assess` returns. AZ-674 will
|
||||
//! add schema-version validation on top of this; AZ-673 leaves the
|
||||
//! body un-validated beyond `serde_json::from_slice`.
|
||||
|
||||
use base64::Engine;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use shared::models::vlm::VlmAssessment;
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
|
||||
|
||||
/// Hard maximum on any single inbound frame. Defends against a peer
|
||||
/// (or a corrupted peer) declaring an arbitrarily large length.
|
||||
pub const MAX_FRAME_BYTES: u32 = 8 * 1024 * 1024;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AssessRequest {
|
||||
pub prompt: String,
|
||||
/// Base64-encoded ROI bytes. Kept inline in the JSON envelope so
|
||||
/// the wire is one read/write per direction.
|
||||
pub roi_b64: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum WireError {
|
||||
#[error("io: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("frame too large: {0} bytes (max {MAX_FRAME_BYTES})")]
|
||||
FrameTooLarge(u32),
|
||||
|
||||
#[error("json: {0}")]
|
||||
Json(#[from] serde_json::Error),
|
||||
|
||||
#[error("unexpected eof while reading frame body")]
|
||||
UnexpectedEof,
|
||||
}
|
||||
|
||||
pub async fn write_request<W: AsyncWrite + Unpin>(
|
||||
w: &mut W,
|
||||
prompt: &str,
|
||||
roi: &[u8],
|
||||
) -> Result<(), WireError> {
|
||||
let req = AssessRequest {
|
||||
prompt: prompt.to_string(),
|
||||
roi_b64: base64::engine::general_purpose::STANDARD.encode(roi),
|
||||
};
|
||||
let body = serde_json::to_vec(&req)?;
|
||||
let len = body.len() as u32;
|
||||
if len > MAX_FRAME_BYTES {
|
||||
return Err(WireError::FrameTooLarge(len));
|
||||
}
|
||||
w.write_all(&len.to_be_bytes()).await?;
|
||||
w.write_all(&body).await?;
|
||||
w.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn read_response<R: AsyncRead + Unpin>(r: &mut R) -> Result<VlmAssessment, WireError> {
|
||||
let mut lenbuf = [0u8; 4];
|
||||
r.read_exact(&mut lenbuf).await?;
|
||||
let len = u32::from_be_bytes(lenbuf);
|
||||
if len > MAX_FRAME_BYTES {
|
||||
return Err(WireError::FrameTooLarge(len));
|
||||
}
|
||||
let mut body = vec![0u8; len as usize];
|
||||
let n = r.read_exact(&mut body).await?;
|
||||
if n != body.len() {
|
||||
return Err(WireError::UnexpectedEof);
|
||||
}
|
||||
let assessment: VlmAssessment = serde_json::from_slice(&body)?;
|
||||
Ok(assessment)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use shared::models::vlm::{VlmLabel, VlmStatus};
|
||||
use tokio::io::duplex;
|
||||
|
||||
#[tokio::test]
|
||||
async fn round_trip_request_and_response() {
|
||||
// Arrange
|
||||
let (mut a, mut b) = duplex(64 * 1024);
|
||||
let prompt = "describe";
|
||||
let roi = b"\xff\xd8\xff\xe0\x00\x10JFIF".to_vec();
|
||||
|
||||
// Act — client side writes the request, fixture side reads it
|
||||
// and writes back a canned response.
|
||||
let fixture = tokio::spawn(async move {
|
||||
// Read request frame.
|
||||
let mut lenbuf = [0u8; 4];
|
||||
b.read_exact(&mut lenbuf).await.unwrap();
|
||||
let len = u32::from_be_bytes(lenbuf) as usize;
|
||||
let mut req_buf = vec![0u8; len];
|
||||
b.read_exact(&mut req_buf).await.unwrap();
|
||||
let req: AssessRequest = serde_json::from_slice(&req_buf).unwrap();
|
||||
assert_eq!(req.prompt, "describe");
|
||||
assert_eq!(
|
||||
base64::engine::general_purpose::STANDARD
|
||||
.decode(req.roi_b64)
|
||||
.unwrap()
|
||||
.as_slice(),
|
||||
b"\xff\xd8\xff\xe0\x00\x10JFIF"
|
||||
);
|
||||
|
||||
// Write canned response.
|
||||
let response = VlmAssessment {
|
||||
label: VlmLabel::ConfirmedConcealedPosition,
|
||||
confidence: 0.91,
|
||||
evidence_spans: vec!["foliage".into()],
|
||||
reason: "match".into(),
|
||||
status: VlmStatus::Ok,
|
||||
latency_ms: 12,
|
||||
model_version: "VILA1.5-3B-int4".into(),
|
||||
};
|
||||
let body = serde_json::to_vec(&response).unwrap();
|
||||
let len = body.len() as u32;
|
||||
b.write_all(&len.to_be_bytes()).await.unwrap();
|
||||
b.write_all(&body).await.unwrap();
|
||||
b.flush().await.unwrap();
|
||||
});
|
||||
|
||||
write_request(&mut a, prompt, &roi).await.unwrap();
|
||||
let resp = read_response(&mut a).await.unwrap();
|
||||
fixture.await.unwrap();
|
||||
|
||||
// Assert
|
||||
assert_eq!(resp.status, VlmStatus::Ok);
|
||||
assert_eq!(resp.label, VlmLabel::ConfirmedConcealedPosition);
|
||||
assert_eq!(resp.model_version, "VILA1.5-3B-int4");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rejects_oversized_inbound_frame() {
|
||||
// Arrange
|
||||
let (mut a, mut b) = duplex(64);
|
||||
let huge = MAX_FRAME_BYTES + 1;
|
||||
b.write_all(&huge.to_be_bytes()).await.unwrap();
|
||||
b.flush().await.unwrap();
|
||||
|
||||
// Act
|
||||
let err = read_response(&mut a).await.unwrap_err();
|
||||
|
||||
// Assert
|
||||
assert!(matches!(err, WireError::FrameTooLarge(n) if n == huge));
|
||||
}
|
||||
}
|
||||
@@ -6,17 +6,26 @@
|
||||
//! never references `vlm_client::VlmClient`.
|
||||
//!
|
||||
//! With the `vlm` feature **on**, `VlmClient` is the real NanoLLM IPC
|
||||
//! client. The IPC plumbing itself lands in:
|
||||
//! - AZ-673 `vlm_client_nanollm_ipc`
|
||||
//! - AZ-674 `vlm_client_schema_and_model_version`
|
||||
//!
|
||||
//! AZ-672 only wires the trait contract + feature flag.
|
||||
//! client:
|
||||
//! - AZ-672 wired the trait contract + feature flag.
|
||||
//! - AZ-673 (this revision) added the UDS connection, SO_PEERCRED
|
||||
//! check, pre-send validation, bounded request deadline, bounded
|
||||
//! reconnect.
|
||||
//! - AZ-674 will add `VlmAssessment` schema-version validation on top.
|
||||
|
||||
#[cfg(feature = "vlm")]
|
||||
mod enabled;
|
||||
#[cfg(feature = "vlm")]
|
||||
mod internal;
|
||||
|
||||
#[cfg(feature = "vlm")]
|
||||
pub use enabled::VlmClient;
|
||||
#[cfg(feature = "vlm")]
|
||||
pub use internal::peer_cred::ExpectedPeer;
|
||||
#[cfg(feature = "vlm")]
|
||||
pub use internal::prompt::Limits;
|
||||
#[cfg(feature = "vlm")]
|
||||
pub use internal::uds_client::{ConnectError, NanoLlmClient, NanoLlmClientOptions};
|
||||
|
||||
/// Stable name used by tracing + `/health` to identify this crate's
|
||||
/// build-time configuration. Mirrors `VlmProvider::name()`.
|
||||
|
||||
Reference in New Issue
Block a user