//! NanoLLM response → `VlmAssessment` parsing + model-version tracking. //! //! AZ-674 introduces a separation between the wire layer (which //! returns raw bytes once the length prefix has been consumed) and //! the parsing layer (this module), which: //! //! 1. Validates the JSON against the `VlmAssessment` schema. Missing //! required fields, wrong types, or anything else that fails //! `serde_json::from_slice` returns //! `VlmAssessment { status: SchemaInvalid, … }` — **NOT** an //! `Err`. Schema-invalid is a recoverable outcome, observable by //! `scan_controller`. //! 2. Logs the raw response (size-capped) at `warn` level whenever a //! schema-invalid is returned. The cap is configurable; default //! 4 KiB per AZ-674 §Scope. //! 3. Tracks `model_version` across calls and emits a single //! `info!` log line the first time a new version is observed. //! //! Required schema fields: `label`, `confidence`, `status`, //! `model_version`, `latency_ms`. `evidence_spans` and `reason` are //! optional (serde defaults to `Vec::new()` / `String::new()`). use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Mutex; use serde::Deserialize; use shared::models::vlm::{VlmAssessment, VlmLabel, VlmStatus}; /// Default size cap for the raw-response log on schema-invalid. pub const DEFAULT_LOG_TRUNCATION_BYTES: usize = 4 * 1024; /// Parser + model-version tracker. Cloneable via `Arc` if a single /// instance must be shared across tasks; the inner state is internally /// synchronised. pub struct AssessmentParser { last_model_version: Mutex>, schema_invalid_count: AtomicU64, model_version_changes: AtomicU64, log_truncation_bytes: usize, } impl AssessmentParser { pub fn new() -> Self { Self::with_truncation_bytes(DEFAULT_LOG_TRUNCATION_BYTES) } pub fn with_truncation_bytes(bytes: usize) -> Self { Self { last_model_version: Mutex::new(None), schema_invalid_count: AtomicU64::new(0), model_version_changes: AtomicU64::new(0), log_truncation_bytes: bytes, } } /// Parse a raw response body into a `VlmAssessment`. A /// schema-invalid response returns `VlmAssessment { status: /// SchemaInvalid, … }`; never returns `Err`. pub fn parse(&self, raw: &[u8]) -> VlmAssessment { let assessment: VlmAssessment = match serde_json::from_slice::(raw) { Ok(wire) => wire.into(), Err(e) => { self.schema_invalid_count.fetch_add(1, Ordering::Relaxed); let excerpt = excerpt(raw, self.log_truncation_bytes); tracing::warn!( error = %e, raw_excerpt = %excerpt, raw_bytes = raw.len(), "vlm_client schema-invalid response" ); return schema_invalid(format!("json: {e}")); } }; self.track_model_version(&assessment.model_version); assessment } /// Cumulative count of schema-invalid responses observed by this /// parser instance. Used by the health surface. pub fn schema_invalid_count(&self) -> u64 { self.schema_invalid_count.load(Ordering::Relaxed) } /// Cumulative count of `model_version` change events emitted. /// First successful parse counts as one change (None → "v1.0"). pub fn model_version_changes(&self) -> u64 { self.model_version_changes.load(Ordering::Relaxed) } /// Latest seen `model_version` (`None` before the first /// successful parse). pub fn current_model_version(&self) -> Option { self.last_model_version .lock() .map(|g| g.clone()) .unwrap_or(None) } fn track_model_version(&self, current: &str) { let mut guard = match self.last_model_version.lock() { Ok(g) => g, Err(_) => return, }; let changed = !matches!(guard.as_deref(), Some(prev) if prev == current); if changed { let previous = guard.clone(); *guard = Some(current.to_string()); self.model_version_changes.fetch_add(1, Ordering::Relaxed); tracing::info!( previous = previous.as_deref().unwrap_or(""), current = current, "vlm_client model_version changed" ); } } } impl Default for AssessmentParser { fn default() -> Self { Self::new() } } /// Wire-side parse target. Matches the production NanoLLM envelope /// per `description.md §8`. Required fields are non-`Option`; serde /// will refuse to deserialise without them. Optional fields default /// to empty. #[derive(Debug, Deserialize)] struct VlmAssessmentWire { label: VlmLabel, confidence: f32, #[serde(default)] evidence_spans: Vec, #[serde(default)] reason: String, status: VlmStatus, latency_ms: u32, model_version: String, } impl From for VlmAssessment { fn from(w: VlmAssessmentWire) -> Self { Self { label: w.label, confidence: w.confidence, evidence_spans: w.evidence_spans, reason: w.reason, status: w.status, latency_ms: w.latency_ms, model_version: w.model_version, } } } fn schema_invalid(reason: impl Into) -> VlmAssessment { VlmAssessment { label: VlmLabel::Inconclusive, confidence: 0.0, evidence_spans: Vec::new(), reason: reason.into(), status: VlmStatus::SchemaInvalid, latency_ms: 0, model_version: String::new(), } } fn excerpt(raw: &[u8], cap: usize) -> String { let cap = cap.min(raw.len()); let slice = &raw[..cap]; let mut s = String::from_utf8_lossy(slice).into_owned(); if raw.len() > cap { s.push_str(&format!("…[truncated, {} more bytes]", raw.len() - cap)); } s } #[cfg(test)] mod tests { use super::*; fn ok_response_bytes() -> Vec { let s = r#"{ "label":"confirmed_concealed_position", "confidence":0.85, "evidence_spans":["foliage"], "reason":"match", "status":"ok", "latency_ms":42, "model_version":"VILA1.5-3B-int4" }"#; s.as_bytes().to_vec() } #[test] fn parses_valid_payload() { // Arrange let parser = AssessmentParser::new(); // Act let a = parser.parse(&ok_response_bytes()); // Assert assert_eq!(a.status, VlmStatus::Ok); assert_eq!(a.model_version, "VILA1.5-3B-int4"); assert_eq!(parser.schema_invalid_count(), 0); } #[test] fn missing_required_field_returns_schema_invalid() { // Arrange — drop `model_version` from the payload. let raw = br#"{ "label":"confirmed_concealed_position", "confidence":0.85, "status":"ok", "latency_ms":42 }"#; let parser = AssessmentParser::new(); // Act let a = parser.parse(raw); // Assert assert_eq!(a.status, VlmStatus::SchemaInvalid); assert_eq!(parser.schema_invalid_count(), 1); } #[test] fn excerpt_truncates_long_bodies() { // Arrange let raw = vec![b'a'; 8192]; // Act let s = excerpt(&raw, 16); // Assert assert!(s.starts_with("aaaaaaaaaaaaaaaa")); assert!(s.contains("truncated")); } }