autopilot/crates/vlm_client/src/internal/parser.rs

//! NanoLLM response → `VlmAssessment` parsing + model-version tracking.
//!
//! AZ-674 introduces a separation between the wire layer (which
//! returns raw bytes once the length prefix has been consumed) and
//! the parsing layer (this module), which:
//!
//! 1. Validates the JSON against the `VlmAssessment` schema. Missing
//!    required fields, wrong types, or anything else that fails
//!    `serde_json::from_slice` returns
//!    `VlmAssessment { status: SchemaInvalid, … }` — **NOT** an
//!    `Err`. Schema-invalid is a recoverable outcome, observable by
//!    `scan_controller`.
//! 2. Logs the raw response (size-capped) at `warn` level whenever a
//!    schema-invalid is returned. The cap is configurable; default
//!    4 KiB per AZ-674 §Scope.
//! 3. Tracks `model_version` across calls and emits a single
//!    `info!` log line the first time a new version is observed.
//!
//! Required schema fields: `label`, `confidence`, `status`,
//! `model_version`, `latency_ms`. `evidence_spans` and `reason` are
//! optional (serde defaults to `Vec::new()` / `String::new()`).

use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Mutex;

use serde::Deserialize;
use shared::models::vlm::{VlmAssessment, VlmLabel, VlmStatus};

/// Default size cap for the raw-response log on schema-invalid.
pub const DEFAULT_LOG_TRUNCATION_BYTES: usize = 4 * 1024;

/// Parser + model-version tracker. Cloneable via `Arc` if a single
/// instance must be shared across tasks; the inner state is internally
/// synchronised.
pub struct AssessmentParser {
    last_model_version: Mutex<Option<String>>,
    schema_invalid_count: AtomicU64,
    model_version_changes: AtomicU64,
    log_truncation_bytes: usize,
}

impl AssessmentParser {
    pub fn new() -> Self {
        Self::with_truncation_bytes(DEFAULT_LOG_TRUNCATION_BYTES)
    }

    pub fn with_truncation_bytes(bytes: usize) -> Self {
        Self {
            last_model_version: Mutex::new(None),
            schema_invalid_count: AtomicU64::new(0),
            model_version_changes: AtomicU64::new(0),
            log_truncation_bytes: bytes,
        }
    }

    /// Parse a raw response body into a `VlmAssessment`. A
    /// schema-invalid response returns `VlmAssessment { status:
    /// SchemaInvalid, … }`; never returns `Err`.
    pub fn parse(&self, raw: &[u8]) -> VlmAssessment {
        let assessment: VlmAssessment = match serde_json::from_slice::<VlmAssessmentWire>(raw) {
            Ok(wire) => wire.into(),
            Err(e) => {
                self.schema_invalid_count.fetch_add(1, Ordering::Relaxed);
                let excerpt = excerpt(raw, self.log_truncation_bytes);
                tracing::warn!(
                    error = %e,
                    raw_excerpt = %excerpt,
                    raw_bytes = raw.len(),
                    "vlm_client schema-invalid response"
                );
                return schema_invalid(format!("json: {e}"));
            }
        };
        self.track_model_version(&assessment.model_version);
        assessment
    }

    /// Cumulative count of schema-invalid responses observed by this
    /// parser instance. Used by the health surface.
    pub fn schema_invalid_count(&self) -> u64 {
        self.schema_invalid_count.load(Ordering::Relaxed)
    }

    /// Cumulative count of `model_version` change events emitted.
    /// First successful parse counts as one change (None → "v1.0").
    pub fn model_version_changes(&self) -> u64 {
        self.model_version_changes.load(Ordering::Relaxed)
    }

    /// Latest seen `model_version` (`None` before the first
    /// successful parse).
    pub fn current_model_version(&self) -> Option<String> {
        self.last_model_version
            .lock()
            .map(|g| g.clone())
            .unwrap_or(None)
    }

    fn track_model_version(&self, current: &str) {
        let mut guard = match self.last_model_version.lock() {
            Ok(g) => g,
            Err(_) => return,
        };
        let changed = !matches!(guard.as_deref(), Some(prev) if prev == current);
        if changed {
            let previous = guard.clone();
            *guard = Some(current.to_string());
            self.model_version_changes.fetch_add(1, Ordering::Relaxed);
            tracing::info!(
                previous = previous.as_deref().unwrap_or("<none>"),
                current = current,
                "vlm_client model_version changed"
            );
        }
    }
}

impl Default for AssessmentParser {
    fn default() -> Self {
        Self::new()
    }
}

/// Wire-side parse target. Matches the production NanoLLM envelope
/// per `description.md §8`. Required fields are non-`Option`; serde
/// will refuse to deserialise without them. Optional fields default
/// to empty.
#[derive(Debug, Deserialize)]
struct VlmAssessmentWire {
    label: VlmLabel,
    confidence: f32,
    #[serde(default)]
    evidence_spans: Vec<String>,
    #[serde(default)]
    reason: String,
    status: VlmStatus,
    latency_ms: u32,
    model_version: String,
}

impl From<VlmAssessmentWire> for VlmAssessment {
    fn from(w: VlmAssessmentWire) -> Self {
        Self {
            label: w.label,
            confidence: w.confidence,
            evidence_spans: w.evidence_spans,
            reason: w.reason,
            status: w.status,
            latency_ms: w.latency_ms,
            model_version: w.model_version,
        }
    }
}

fn schema_invalid(reason: impl Into<String>) -> VlmAssessment {
    VlmAssessment {
        label: VlmLabel::Inconclusive,
        confidence: 0.0,
        evidence_spans: Vec::new(),
        reason: reason.into(),
        status: VlmStatus::SchemaInvalid,
        latency_ms: 0,
        model_version: String::new(),
    }
}

fn excerpt(raw: &[u8], cap: usize) -> String {
    let cap = cap.min(raw.len());
    let slice = &raw[..cap];
    let mut s = String::from_utf8_lossy(slice).into_owned();
    if raw.len() > cap {
        s.push_str(&format!("…[truncated, {} more bytes]", raw.len() - cap));
    }
    s
}

#[cfg(test)]
mod tests {
    use super::*;

    fn ok_response_bytes() -> Vec<u8> {
        let s = r#"{
            "label":"confirmed_concealed_position",
            "confidence":0.85,
            "evidence_spans":["foliage"],
            "reason":"match",
            "status":"ok",
            "latency_ms":42,
            "model_version":"VILA1.5-3B-int4"
        }"#;
        s.as_bytes().to_vec()
    }

    #[test]
    fn parses_valid_payload() {
        // Arrange
        let parser = AssessmentParser::new();

        // Act
        let a = parser.parse(&ok_response_bytes());

        // Assert
        assert_eq!(a.status, VlmStatus::Ok);
        assert_eq!(a.model_version, "VILA1.5-3B-int4");
        assert_eq!(parser.schema_invalid_count(), 0);
    }

    #[test]
    fn missing_required_field_returns_schema_invalid() {
        // Arrange — drop `model_version` from the payload.
        let raw = br#"{
            "label":"confirmed_concealed_position",
            "confidence":0.85,
            "status":"ok",
            "latency_ms":42
        }"#;
        let parser = AssessmentParser::new();

        // Act
        let a = parser.parse(raw);

        // Assert
        assert_eq!(a.status, VlmStatus::SchemaInvalid);
        assert_eq!(parser.schema_invalid_count(), 1);
    }

    #[test]
    fn excerpt_truncates_long_bodies() {
        // Arrange
        let raw = vec![b'a'; 8192];

        // Act
        let s = excerpt(&raw, 16);

        // Assert
        assert!(s.starts_with("aaaaaaaaaaaaaaaa"));
        assert!(s.contains("truncated"));
    }
}