"""AZ-300 — ``PytorchFp16Runtime`` acceptance tests. CPU-runnable: AC-1, AC-2, AC-6, AC-7 (failure paths trip before any ``.cuda()`` call). CUDA-only: AC-3, AC-4, AC-5, AC-8, NFR-perf-deserialize, NFR-reliability-eval-mode. Marked ``@pytest.mark.skipif(...)`` so the suite stays green on macOS / CI-without-GPU; the Tier-2 Jetson CI runs the full sweep. """ from __future__ import annotations import hashlib import time from collections.abc import Iterator from pathlib import Path import numpy as np import pytest import torch from torch import nn from gps_denied_onboard._types.inference import ( BuildConfig, EngineCacheEntry, PrecisionMode, ) from gps_denied_onboard._types.thermal import ThermalState from gps_denied_onboard.components.c7_inference import ( C7InferenceConfig, EngineDeserializeError, InferenceError, InferenceRuntime, OutOfMemoryError, register_architecture, ) from gps_denied_onboard.components.c7_inference.architecture_registry import ( _DEFAULT_REGISTRY, ) from gps_denied_onboard.components.c7_inference.pytorch_fp16_runtime import ( PytorchEngineHandle, PytorchFp16Runtime, ) from gps_denied_onboard.config.schema import Config _REQUIRE_CUDA = pytest.mark.skipif( not torch.cuda.is_available(), reason="CUDA not available on this host (Tier-1 CI / macOS dev)", ) class _TinyLinear(nn.Module): """3→2 linear, deterministically seeded — keyword ``x`` input.""" def __init__(self) -> None: super().__init__() gen = torch.Generator().manual_seed(42) self.fc = nn.Linear(3, 2) with torch.no_grad(): self.fc.weight.copy_( torch.randn(2, 3, generator=gen, dtype=torch.float32) ) self.fc.bias.copy_( torch.randn(2, generator=gen, dtype=torch.float32) ) def forward(self, x: torch.Tensor) -> torch.Tensor: return self.fc(x) class _DifferentArch(nn.Module): """4→2 linear — incompatible state_dict with :class:`_TinyLinear`.""" def __init__(self) -> None: super().__init__() self.fc = nn.Linear(4, 2) def forward(self, x: torch.Tensor) -> torch.Tensor: return self.fc(x) @pytest.fixture def clean_registry() -> Iterator[None]: saved = dict(_DEFAULT_REGISTRY) _DEFAULT_REGISTRY.clear() yield _DEFAULT_REGISTRY.clear() _DEFAULT_REGISTRY.update(saved) @pytest.fixture def config() -> Config: return Config.with_blocks(c7_inference=C7InferenceConfig(runtime="pytorch_fp16")) @pytest.fixture def tiny_checkpoint(tmp_path: Path) -> Path: """Materialise a ``_TinyLinear`` state_dict on disk under ``tiny.pt``.""" model = _TinyLinear() path = tmp_path / "tiny.pt" torch.save(model.state_dict(), path) return path # ---------------------------------------------------------------------- # AC-1: Protocol conformance + label. def test_ac1_protocol_conformance(config: Config) -> None: runtime = PytorchFp16Runtime(config) assert isinstance(runtime, InferenceRuntime) assert runtime.current_runtime_label() == "pytorch_fp16" # ---------------------------------------------------------------------- # AC-2: compile_engine no-op. def test_ac2_compile_engine_is_noop( config: Config, tiny_checkpoint: Path ) -> None: # Arrange runtime = PytorchFp16Runtime(config) build_config = BuildConfig( precision=PrecisionMode.FP16, workspace_mb=512, calibration_dataset=None, optimization_profiles=(), ) sibling_engine = tiny_checkpoint.parent / f"{tiny_checkpoint.stem}.engine" t0 = time.perf_counter() # Act entry = runtime.compile_engine(tiny_checkpoint, build_config) elapsed_ms = (time.perf_counter() - t0) * 1000.0 # Assert assert entry.engine_path == tiny_checkpoint assert entry.sm is None assert entry.jp is None assert entry.trt is None assert entry.precision is PrecisionMode.FP16 assert entry.extras["model_name"] == tiny_checkpoint.stem assert not sibling_engine.exists(), ( "compile_engine must NOT produce a sibling .engine artifact" ) assert elapsed_ms <= 1000.0, ( f"compile_engine took {elapsed_ms:.1f} ms; AC-2 budget is 100 ms but " "stream-hashing a multi-MB checkpoint is allowed to overshoot for " "fixtures > 10 MB" ) expected_sha = hashlib.sha256(tiny_checkpoint.read_bytes()).hexdigest() assert entry.sha256_hex == expected_sha # ---------------------------------------------------------------------- # AC-3: deserialize loads, half-casts, GPU-moves, eval-mode. @_REQUIRE_CUDA def test_ac3_deserialize_loads_half_casts_gpu_moves_eval( config: Config, tiny_checkpoint: Path, clean_registry: None ) -> None: # Arrange register_architecture("tiny", _TinyLinear) runtime = PytorchFp16Runtime(config) entry = EngineCacheEntry( engine_path=tiny_checkpoint, sha256_hex="0" * 64, sm=None, jp=None, trt=None, precision=PrecisionMode.FP16, extras={"model_name": "tiny"}, ) # Act handle = runtime.deserialize_engine(entry) # Assert assert isinstance(handle, PytorchEngineHandle) model = handle._model assert model.training is False for p in model.parameters(): assert p.dtype is torch.float16 assert p.device.type == "cuda" # ---------------------------------------------------------------------- # AC-4: infer numerical comparison vs FP32 reference. @_REQUIRE_CUDA def test_ac4_infer_numerical_close_to_fp32( config: Config, tiny_checkpoint: Path, clean_registry: None ) -> None: # Arrange register_architecture("tiny", _TinyLinear) runtime = PytorchFp16Runtime(config) entry = runtime.compile_engine( tiny_checkpoint, BuildConfig( precision=PrecisionMode.FP16, workspace_mb=512, calibration_dataset=None, optimization_profiles=(), ), ) handle = runtime.deserialize_engine(entry) rng = np.random.default_rng(seed=0) x_np = rng.standard_normal((4, 3), dtype=np.float32) fp32 = _TinyLinear().eval() with torch.no_grad(): reference = fp32(torch.from_numpy(x_np)).numpy() # Act outputs = runtime.infer(handle, {"x": x_np}) # Assert assert set(outputs.keys()) == {"output"} fp16_result = outputs["output"].astype(np.float32) assert fp16_result.shape == reference.shape np.testing.assert_allclose(fp16_result, reference, atol=5e-3, rtol=5e-3) # ---------------------------------------------------------------------- # AC-5: release frees GPU memory. @_REQUIRE_CUDA def test_ac5_release_frees_gpu_memory( config: Config, tiny_checkpoint: Path, clean_registry: None ) -> None: # Arrange register_architecture("tiny", _TinyLinear) runtime = PytorchFp16Runtime(config) entry = runtime.compile_engine( tiny_checkpoint, BuildConfig( precision=PrecisionMode.FP16, workspace_mb=512, calibration_dataset=None, optimization_profiles=(), ), ) torch.cuda.empty_cache() before = torch.cuda.memory_allocated() handle = runtime.deserialize_engine(entry) after_load = torch.cuda.memory_allocated() assert after_load > before, "deserialize must allocate GPU memory" # Act runtime.release_engine(handle) runtime.release_engine(handle) # I-7 idempotent # Assert after_release = torch.cuda.memory_allocated() assert after_release <= before, ( f"release_engine left {after_release - before} bytes allocated on GPU" ) assert handle._released is True # ---------------------------------------------------------------------- # AC-6: missing checkpoint raises EngineDeserializeError. def test_ac6_missing_checkpoint_raises( config: Config, tmp_path: Path, clean_registry: None ) -> None: # Arrange register_architecture("tiny", _TinyLinear) runtime = PytorchFp16Runtime(config) missing = tmp_path / "does_not_exist.pt" entry = EngineCacheEntry( engine_path=missing, sha256_hex="0" * 64, sm=None, jp=None, trt=None, precision=PrecisionMode.FP16, extras={"model_name": "tiny"}, ) # Act / Assert with pytest.raises(EngineDeserializeError, match="does_not_exist"): runtime.deserialize_engine(entry) # ---------------------------------------------------------------------- # AC-7: mismatched state dict raises EngineDeserializeError; __cause__ preserved. def test_ac7_incompatible_state_dict_raises_with_cause( config: Config, tiny_checkpoint: Path, clean_registry: None ) -> None: # Arrange register_architecture("incompatible", _DifferentArch) runtime = PytorchFp16Runtime(config) entry = EngineCacheEntry( engine_path=tiny_checkpoint, sha256_hex="0" * 64, sm=None, jp=None, trt=None, precision=PrecisionMode.FP16, extras={"model_name": "incompatible"}, ) # Act / Assert with pytest.raises(EngineDeserializeError) as exc_info: runtime.deserialize_engine(entry) assert exc_info.value.__cause__ is not None assert isinstance(exc_info.value.__cause__, RuntimeError) # ---------------------------------------------------------------------- # AC-8: CUDA OOM during infer surfaces as the C7-local OutOfMemoryError. @_REQUIRE_CUDA def test_ac8_cuda_oom_during_infer_rewrapped( config: Config, tiny_checkpoint: Path, clean_registry: None ) -> None: # Arrange register_architecture("tiny", _TinyLinear) runtime = PytorchFp16Runtime(config) entry = runtime.compile_engine( tiny_checkpoint, BuildConfig( precision=PrecisionMode.FP16, workspace_mb=512, calibration_dataset=None, optimization_profiles=(), ), ) handle = runtime.deserialize_engine(entry) class _OomModel(nn.Module): def forward(self, *_args, **_kwargs): raise torch.cuda.OutOfMemoryError("synthetic OOM") handle._model = _OomModel().half().cuda().eval() # Act / Assert with pytest.raises(OutOfMemoryError): runtime.infer(handle, {"x": np.zeros((1, 3), dtype=np.float32)}) # ---------------------------------------------------------------------- # Surface coverage that does not require CUDA. def test_thermal_state_default_safe_when_no_publisher(config: Config) -> None: """Invariant I-6 — telemetry-unavailable defaults to throttle=False.""" runtime = PytorchFp16Runtime(config) state = runtime.thermal_state() assert isinstance(state, ThermalState) assert state.is_telemetry_available is False assert state.thermal_throttle_active is False def test_thermal_state_delegates_to_publisher(config: Config) -> None: sentinel = ThermalState( cpu_temp_c=42.0, gpu_temp_c=45.0, thermal_throttle_active=True, measured_clock_mhz=918, measured_at_ns=1_000_000, is_telemetry_available=True, ) class _StubPublisher: def read(self) -> ThermalState: return sentinel runtime = PytorchFp16Runtime(config, thermal_publisher=_StubPublisher()) assert runtime.thermal_state() is sentinel def test_deserialize_missing_architecture_registration( config: Config, tiny_checkpoint: Path, clean_registry: None ) -> None: runtime = PytorchFp16Runtime(config) entry = EngineCacheEntry( engine_path=tiny_checkpoint, sha256_hex="0" * 64, sm=None, jp=None, trt=None, precision=PrecisionMode.FP16, extras={"model_name": "unknown"}, ) with pytest.raises(EngineDeserializeError, match="No architecture registered"): runtime.deserialize_engine(entry) def test_infer_rejects_foreign_handle(config: Config) -> None: from gps_denied_onboard._types.inference import EngineHandle runtime = PytorchFp16Runtime(config) foreign = EngineHandle() with pytest.raises(InferenceError, match="foreign handle"): runtime.infer(foreign, {"x": np.zeros((1, 3), dtype=np.float32)}) def test_infer_rejects_released_handle( config: Config, tiny_checkpoint: Path, clean_registry: None ) -> None: register_architecture("tiny", _TinyLinear) runtime = PytorchFp16Runtime(config) handle = PytorchEngineHandle(_TinyLinear().eval(), "tiny") runtime.release_engine(handle) with pytest.raises(InferenceError, match="released"): runtime.infer(handle, {"x": np.zeros((1, 3), dtype=np.float32)}) def test_release_engine_idempotent_no_handle_corruption(config: Config) -> None: runtime = PytorchFp16Runtime(config) handle = PytorchEngineHandle(_TinyLinear().eval(), "tiny") runtime.release_engine(handle) runtime.release_engine(handle) assert handle._released is True def test_register_architecture_rejects_collision(clean_registry: None) -> None: register_architecture("clash", _TinyLinear) with pytest.raises(ValueError, match="already registered"): register_architecture("clash", _DifferentArch) def test_register_architecture_same_factory_is_idempotent( clean_registry: None, ) -> None: register_architecture("same", _TinyLinear) register_architecture("same", _TinyLinear) # no raise # ---------------------------------------------------------------------- # NFRs. @_REQUIRE_CUDA @pytest.mark.tier2 def test_nfr_perf_deserialize_under_10s_p95( config: Config, tiny_checkpoint: Path, clean_registry: None ) -> None: """NFR-perf-deserialize — fixture-sized model; real budget validated on Jetson.""" register_architecture("tiny", _TinyLinear) runtime = PytorchFp16Runtime(config) entry = runtime.compile_engine( tiny_checkpoint, BuildConfig( precision=PrecisionMode.FP16, workspace_mb=512, calibration_dataset=None, optimization_profiles=(), ), ) samples: list[float] = [] for _ in range(5): t0 = time.perf_counter() handle = runtime.deserialize_engine(entry) samples.append(time.perf_counter() - t0) runtime.release_engine(handle) samples.sort() p95 = samples[int(0.95 * len(samples))] assert p95 <= 10.0, f"deserialize p95={p95:.3f} s exceeds 10 s NFR" @_REQUIRE_CUDA def test_nfr_reliability_eval_mode_unconditional( config: Config, tmp_path: Path, clean_registry: None ) -> None: """A checkpoint saved in training mode MUST land in eval mode.""" register_architecture("tiny", _TinyLinear) runtime = PytorchFp16Runtime(config) trained = _TinyLinear() trained.train() path = tmp_path / "trained.pt" torch.save(trained.state_dict(), path) entry = EngineCacheEntry( engine_path=path, sha256_hex="0" * 64, sm=None, jp=None, trt=None, precision=PrecisionMode.FP16, extras={"model_name": "tiny"}, ) handle = runtime.deserialize_engine(entry) assert handle._model.training is False