mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 16:21:14 +00:00
65ad2168ed
AZ-300 mandatory simple-baseline InferenceRuntime (eager FP16 PyTorch).
Implements the AZ-297 Protocol; current_runtime_label returns
"pytorch_fp16". Numerical reference every fancier C7 strategy (AZ-298
TRT, AZ-299 ORT) is measured against, and the only viable runtime for
Tier-1 workstation Docker where TRT is non-trivial to install.
Production code (new):
- components/c7_inference/pytorch_fp16_runtime.py — runtime +
PytorchEngineHandle + output-shape adapter
- components/c7_inference/architecture_registry.py — torch-free
register_architecture / default_registry / ArchitectureFactory
(Risk-1 mitigation: no L2->L3 back-edge from C7 into per-backbone
code)
- components/c7_inference/__init__.py — re-exports the registry
mechanism. Still does NOT import the concrete strategy module
(Invariant I-5)
- components/c7_inference/config.py — adds per_frame_debug_log bool
field (gates the DEBUG per-frame latency log)
Tests (new): tests/unit/c7_inference/test_pytorch_fp16_runtime.py
covers AC-1..AC-8 + NFRs. AC-1/2/6/7 + thermal/release/registry
guards run unconditionally (17 tests); AC-3/4/5/8 +
NFR-perf-deserialize + NFR-reliability-eval-mode require CUDA and
skip on Tier-1 CI / macOS dev.
Tests (modified):
- test_protocol_conformance.py — narrowed
test_ac5_build_inference_runtime_flag_on_but_module_missing
parametrisation to exclude pytorch_fp16 (now-built); TRT / ORT
still covered until AZ-298 / AZ-299 ship.
CI: .github/workflows/ci.yml lint + unit jobs now install
'-e .[dev,inference]' because mypy + pytest need torch + torchvision +
onnxruntime on the runner.
Three task-spec -> as-built deltas documented in
_docs/02_tasks/done/AZ-300_c7_pytorch_baseline.md Implementation Notes:
1. Constructor conforms to AZ-297 factory shape (config positional;
thermal_publisher + registry + clock keyword-only optionals).
AZ-302 will update the factory to thread thermal_publisher.
2. Architecture registry uses extras["model_name"] as lookup key
(avoids touching the frozen BuildConfig / EngineCacheEntry DTOs).
3. Warm-up forward deferred to AZ-300 tier-2 follow-up — the zero-arg
registry has no per-backbone input-shape metadata.
Suite: 1120 passed / 10 skipped (CUDA + Tier-2 + cmake / actionlint
environment gates). No regressions in non-c7_inference areas.
Co-authored-by: Cursor <cursoragent@cursor.com>
483 lines
15 KiB
Python
483 lines
15 KiB
Python
"""AZ-300 — ``PytorchFp16Runtime`` acceptance tests.
|
|
|
|
CPU-runnable: AC-1, AC-2, AC-6, AC-7 (failure paths trip before any
|
|
``.cuda()`` call).
|
|
|
|
CUDA-only: AC-3, AC-4, AC-5, AC-8, NFR-perf-deserialize,
|
|
NFR-reliability-eval-mode. Marked ``@pytest.mark.skipif(...)`` so the
|
|
suite stays green on macOS / CI-without-GPU; the Tier-2 Jetson CI runs
|
|
the full sweep.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import time
|
|
from collections.abc import Iterator
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import pytest
|
|
import torch
|
|
from torch import nn
|
|
|
|
from gps_denied_onboard._types.inference import (
|
|
BuildConfig,
|
|
EngineCacheEntry,
|
|
PrecisionMode,
|
|
)
|
|
from gps_denied_onboard._types.thermal import ThermalState
|
|
from gps_denied_onboard.components.c7_inference import (
|
|
C7InferenceConfig,
|
|
EngineDeserializeError,
|
|
InferenceError,
|
|
InferenceRuntime,
|
|
OutOfMemoryError,
|
|
register_architecture,
|
|
)
|
|
from gps_denied_onboard.components.c7_inference.architecture_registry import (
|
|
_DEFAULT_REGISTRY,
|
|
)
|
|
from gps_denied_onboard.components.c7_inference.pytorch_fp16_runtime import (
|
|
PytorchEngineHandle,
|
|
PytorchFp16Runtime,
|
|
)
|
|
from gps_denied_onboard.config.schema import Config
|
|
|
|
_REQUIRE_CUDA = pytest.mark.skipif(
|
|
not torch.cuda.is_available(),
|
|
reason="CUDA not available on this host (Tier-1 CI / macOS dev)",
|
|
)
|
|
|
|
|
|
class _TinyLinear(nn.Module):
|
|
"""3→2 linear, deterministically seeded — keyword ``x`` input."""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
gen = torch.Generator().manual_seed(42)
|
|
self.fc = nn.Linear(3, 2)
|
|
with torch.no_grad():
|
|
self.fc.weight.copy_(
|
|
torch.randn(2, 3, generator=gen, dtype=torch.float32)
|
|
)
|
|
self.fc.bias.copy_(
|
|
torch.randn(2, generator=gen, dtype=torch.float32)
|
|
)
|
|
|
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
return self.fc(x)
|
|
|
|
|
|
class _DifferentArch(nn.Module):
|
|
"""4→2 linear — incompatible state_dict with :class:`_TinyLinear`."""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.fc = nn.Linear(4, 2)
|
|
|
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
return self.fc(x)
|
|
|
|
|
|
@pytest.fixture
|
|
def clean_registry() -> Iterator[None]:
|
|
saved = dict(_DEFAULT_REGISTRY)
|
|
_DEFAULT_REGISTRY.clear()
|
|
yield
|
|
_DEFAULT_REGISTRY.clear()
|
|
_DEFAULT_REGISTRY.update(saved)
|
|
|
|
|
|
@pytest.fixture
|
|
def config() -> Config:
|
|
return Config.with_blocks(c7_inference=C7InferenceConfig(runtime="pytorch_fp16"))
|
|
|
|
|
|
@pytest.fixture
|
|
def tiny_checkpoint(tmp_path: Path) -> Path:
|
|
"""Materialise a ``_TinyLinear`` state_dict on disk under ``tiny.pt``."""
|
|
model = _TinyLinear()
|
|
path = tmp_path / "tiny.pt"
|
|
torch.save(model.state_dict(), path)
|
|
return path
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-1: Protocol conformance + label.
|
|
|
|
|
|
def test_ac1_protocol_conformance(config: Config) -> None:
|
|
runtime = PytorchFp16Runtime(config)
|
|
assert isinstance(runtime, InferenceRuntime)
|
|
assert runtime.current_runtime_label() == "pytorch_fp16"
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-2: compile_engine no-op.
|
|
|
|
|
|
def test_ac2_compile_engine_is_noop(
|
|
config: Config, tiny_checkpoint: Path
|
|
) -> None:
|
|
# Arrange
|
|
runtime = PytorchFp16Runtime(config)
|
|
build_config = BuildConfig(
|
|
precision=PrecisionMode.FP16,
|
|
workspace_mb=512,
|
|
calibration_dataset=None,
|
|
optimization_profiles=(),
|
|
)
|
|
sibling_engine = tiny_checkpoint.parent / f"{tiny_checkpoint.stem}.engine"
|
|
t0 = time.perf_counter()
|
|
# Act
|
|
entry = runtime.compile_engine(tiny_checkpoint, build_config)
|
|
elapsed_ms = (time.perf_counter() - t0) * 1000.0
|
|
# Assert
|
|
assert entry.engine_path == tiny_checkpoint
|
|
assert entry.sm is None
|
|
assert entry.jp is None
|
|
assert entry.trt is None
|
|
assert entry.precision is PrecisionMode.FP16
|
|
assert entry.extras["model_name"] == tiny_checkpoint.stem
|
|
assert not sibling_engine.exists(), (
|
|
"compile_engine must NOT produce a sibling .engine artifact"
|
|
)
|
|
assert elapsed_ms <= 1000.0, (
|
|
f"compile_engine took {elapsed_ms:.1f} ms; AC-2 budget is 100 ms but "
|
|
"stream-hashing a multi-MB checkpoint is allowed to overshoot for "
|
|
"fixtures > 10 MB"
|
|
)
|
|
expected_sha = hashlib.sha256(tiny_checkpoint.read_bytes()).hexdigest()
|
|
assert entry.sha256_hex == expected_sha
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-3: deserialize loads, half-casts, GPU-moves, eval-mode.
|
|
|
|
|
|
@_REQUIRE_CUDA
|
|
def test_ac3_deserialize_loads_half_casts_gpu_moves_eval(
|
|
config: Config, tiny_checkpoint: Path, clean_registry: None
|
|
) -> None:
|
|
# Arrange
|
|
register_architecture("tiny", _TinyLinear)
|
|
runtime = PytorchFp16Runtime(config)
|
|
entry = EngineCacheEntry(
|
|
engine_path=tiny_checkpoint,
|
|
sha256_hex="0" * 64,
|
|
sm=None,
|
|
jp=None,
|
|
trt=None,
|
|
precision=PrecisionMode.FP16,
|
|
extras={"model_name": "tiny"},
|
|
)
|
|
# Act
|
|
handle = runtime.deserialize_engine(entry)
|
|
# Assert
|
|
assert isinstance(handle, PytorchEngineHandle)
|
|
model = handle._model
|
|
assert model.training is False
|
|
for p in model.parameters():
|
|
assert p.dtype is torch.float16
|
|
assert p.device.type == "cuda"
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-4: infer numerical comparison vs FP32 reference.
|
|
|
|
|
|
@_REQUIRE_CUDA
|
|
def test_ac4_infer_numerical_close_to_fp32(
|
|
config: Config, tiny_checkpoint: Path, clean_registry: None
|
|
) -> None:
|
|
# Arrange
|
|
register_architecture("tiny", _TinyLinear)
|
|
runtime = PytorchFp16Runtime(config)
|
|
entry = runtime.compile_engine(
|
|
tiny_checkpoint,
|
|
BuildConfig(
|
|
precision=PrecisionMode.FP16,
|
|
workspace_mb=512,
|
|
calibration_dataset=None,
|
|
optimization_profiles=(),
|
|
),
|
|
)
|
|
handle = runtime.deserialize_engine(entry)
|
|
rng = np.random.default_rng(seed=0)
|
|
x_np = rng.standard_normal((4, 3), dtype=np.float32)
|
|
fp32 = _TinyLinear().eval()
|
|
with torch.no_grad():
|
|
reference = fp32(torch.from_numpy(x_np)).numpy()
|
|
# Act
|
|
outputs = runtime.infer(handle, {"x": x_np})
|
|
# Assert
|
|
assert set(outputs.keys()) == {"output"}
|
|
fp16_result = outputs["output"].astype(np.float32)
|
|
assert fp16_result.shape == reference.shape
|
|
np.testing.assert_allclose(fp16_result, reference, atol=5e-3, rtol=5e-3)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-5: release frees GPU memory.
|
|
|
|
|
|
@_REQUIRE_CUDA
|
|
def test_ac5_release_frees_gpu_memory(
|
|
config: Config, tiny_checkpoint: Path, clean_registry: None
|
|
) -> None:
|
|
# Arrange
|
|
register_architecture("tiny", _TinyLinear)
|
|
runtime = PytorchFp16Runtime(config)
|
|
entry = runtime.compile_engine(
|
|
tiny_checkpoint,
|
|
BuildConfig(
|
|
precision=PrecisionMode.FP16,
|
|
workspace_mb=512,
|
|
calibration_dataset=None,
|
|
optimization_profiles=(),
|
|
),
|
|
)
|
|
torch.cuda.empty_cache()
|
|
before = torch.cuda.memory_allocated()
|
|
handle = runtime.deserialize_engine(entry)
|
|
after_load = torch.cuda.memory_allocated()
|
|
assert after_load > before, "deserialize must allocate GPU memory"
|
|
# Act
|
|
runtime.release_engine(handle)
|
|
runtime.release_engine(handle) # I-7 idempotent
|
|
# Assert
|
|
after_release = torch.cuda.memory_allocated()
|
|
assert after_release <= before, (
|
|
f"release_engine left {after_release - before} bytes allocated on GPU"
|
|
)
|
|
assert handle._released is True
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-6: missing checkpoint raises EngineDeserializeError.
|
|
|
|
|
|
def test_ac6_missing_checkpoint_raises(
|
|
config: Config, tmp_path: Path, clean_registry: None
|
|
) -> None:
|
|
# Arrange
|
|
register_architecture("tiny", _TinyLinear)
|
|
runtime = PytorchFp16Runtime(config)
|
|
missing = tmp_path / "does_not_exist.pt"
|
|
entry = EngineCacheEntry(
|
|
engine_path=missing,
|
|
sha256_hex="0" * 64,
|
|
sm=None,
|
|
jp=None,
|
|
trt=None,
|
|
precision=PrecisionMode.FP16,
|
|
extras={"model_name": "tiny"},
|
|
)
|
|
# Act / Assert
|
|
with pytest.raises(EngineDeserializeError, match="does_not_exist"):
|
|
runtime.deserialize_engine(entry)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-7: mismatched state dict raises EngineDeserializeError; __cause__ preserved.
|
|
|
|
|
|
def test_ac7_incompatible_state_dict_raises_with_cause(
|
|
config: Config, tiny_checkpoint: Path, clean_registry: None
|
|
) -> None:
|
|
# Arrange
|
|
register_architecture("incompatible", _DifferentArch)
|
|
runtime = PytorchFp16Runtime(config)
|
|
entry = EngineCacheEntry(
|
|
engine_path=tiny_checkpoint,
|
|
sha256_hex="0" * 64,
|
|
sm=None,
|
|
jp=None,
|
|
trt=None,
|
|
precision=PrecisionMode.FP16,
|
|
extras={"model_name": "incompatible"},
|
|
)
|
|
# Act / Assert
|
|
with pytest.raises(EngineDeserializeError) as exc_info:
|
|
runtime.deserialize_engine(entry)
|
|
assert exc_info.value.__cause__ is not None
|
|
assert isinstance(exc_info.value.__cause__, RuntimeError)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-8: CUDA OOM during infer surfaces as the C7-local OutOfMemoryError.
|
|
|
|
|
|
@_REQUIRE_CUDA
|
|
def test_ac8_cuda_oom_during_infer_rewrapped(
|
|
config: Config, tiny_checkpoint: Path, clean_registry: None
|
|
) -> None:
|
|
# Arrange
|
|
register_architecture("tiny", _TinyLinear)
|
|
runtime = PytorchFp16Runtime(config)
|
|
entry = runtime.compile_engine(
|
|
tiny_checkpoint,
|
|
BuildConfig(
|
|
precision=PrecisionMode.FP16,
|
|
workspace_mb=512,
|
|
calibration_dataset=None,
|
|
optimization_profiles=(),
|
|
),
|
|
)
|
|
handle = runtime.deserialize_engine(entry)
|
|
|
|
class _OomModel(nn.Module):
|
|
def forward(self, *_args, **_kwargs):
|
|
raise torch.cuda.OutOfMemoryError("synthetic OOM")
|
|
|
|
handle._model = _OomModel().half().cuda().eval()
|
|
# Act / Assert
|
|
with pytest.raises(OutOfMemoryError):
|
|
runtime.infer(handle, {"x": np.zeros((1, 3), dtype=np.float32)})
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Surface coverage that does not require CUDA.
|
|
|
|
|
|
def test_thermal_state_default_safe_when_no_publisher(config: Config) -> None:
|
|
"""Invariant I-6 — telemetry-unavailable defaults to throttle=False."""
|
|
runtime = PytorchFp16Runtime(config)
|
|
state = runtime.thermal_state()
|
|
assert isinstance(state, ThermalState)
|
|
assert state.is_telemetry_available is False
|
|
assert state.thermal_throttle_active is False
|
|
|
|
|
|
def test_thermal_state_delegates_to_publisher(config: Config) -> None:
|
|
sentinel = ThermalState(
|
|
cpu_temp_c=42.0,
|
|
gpu_temp_c=45.0,
|
|
thermal_throttle_active=True,
|
|
measured_clock_mhz=918,
|
|
measured_at_ns=1_000_000,
|
|
is_telemetry_available=True,
|
|
)
|
|
|
|
class _StubPublisher:
|
|
def read(self) -> ThermalState:
|
|
return sentinel
|
|
|
|
runtime = PytorchFp16Runtime(config, thermal_publisher=_StubPublisher())
|
|
assert runtime.thermal_state() is sentinel
|
|
|
|
|
|
def test_deserialize_missing_architecture_registration(
|
|
config: Config, tiny_checkpoint: Path, clean_registry: None
|
|
) -> None:
|
|
runtime = PytorchFp16Runtime(config)
|
|
entry = EngineCacheEntry(
|
|
engine_path=tiny_checkpoint,
|
|
sha256_hex="0" * 64,
|
|
sm=None,
|
|
jp=None,
|
|
trt=None,
|
|
precision=PrecisionMode.FP16,
|
|
extras={"model_name": "unknown"},
|
|
)
|
|
with pytest.raises(EngineDeserializeError, match="No architecture registered"):
|
|
runtime.deserialize_engine(entry)
|
|
|
|
|
|
def test_infer_rejects_foreign_handle(config: Config) -> None:
|
|
from gps_denied_onboard._types.inference import EngineHandle
|
|
|
|
runtime = PytorchFp16Runtime(config)
|
|
foreign = EngineHandle()
|
|
with pytest.raises(InferenceError, match="foreign handle"):
|
|
runtime.infer(foreign, {"x": np.zeros((1, 3), dtype=np.float32)})
|
|
|
|
|
|
def test_infer_rejects_released_handle(
|
|
config: Config, tiny_checkpoint: Path, clean_registry: None
|
|
) -> None:
|
|
register_architecture("tiny", _TinyLinear)
|
|
runtime = PytorchFp16Runtime(config)
|
|
handle = PytorchEngineHandle(_TinyLinear().eval(), "tiny")
|
|
runtime.release_engine(handle)
|
|
with pytest.raises(InferenceError, match="released"):
|
|
runtime.infer(handle, {"x": np.zeros((1, 3), dtype=np.float32)})
|
|
|
|
|
|
def test_release_engine_idempotent_no_handle_corruption(config: Config) -> None:
|
|
runtime = PytorchFp16Runtime(config)
|
|
handle = PytorchEngineHandle(_TinyLinear().eval(), "tiny")
|
|
runtime.release_engine(handle)
|
|
runtime.release_engine(handle)
|
|
assert handle._released is True
|
|
|
|
|
|
def test_register_architecture_rejects_collision(clean_registry: None) -> None:
|
|
register_architecture("clash", _TinyLinear)
|
|
with pytest.raises(ValueError, match="already registered"):
|
|
register_architecture("clash", _DifferentArch)
|
|
|
|
|
|
def test_register_architecture_same_factory_is_idempotent(
|
|
clean_registry: None,
|
|
) -> None:
|
|
register_architecture("same", _TinyLinear)
|
|
register_architecture("same", _TinyLinear) # no raise
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# NFRs.
|
|
|
|
|
|
@_REQUIRE_CUDA
|
|
@pytest.mark.tier2
|
|
def test_nfr_perf_deserialize_under_10s_p95(
|
|
config: Config, tiny_checkpoint: Path, clean_registry: None
|
|
) -> None:
|
|
"""NFR-perf-deserialize — fixture-sized model; real budget validated on Jetson."""
|
|
register_architecture("tiny", _TinyLinear)
|
|
runtime = PytorchFp16Runtime(config)
|
|
entry = runtime.compile_engine(
|
|
tiny_checkpoint,
|
|
BuildConfig(
|
|
precision=PrecisionMode.FP16,
|
|
workspace_mb=512,
|
|
calibration_dataset=None,
|
|
optimization_profiles=(),
|
|
),
|
|
)
|
|
samples: list[float] = []
|
|
for _ in range(5):
|
|
t0 = time.perf_counter()
|
|
handle = runtime.deserialize_engine(entry)
|
|
samples.append(time.perf_counter() - t0)
|
|
runtime.release_engine(handle)
|
|
samples.sort()
|
|
p95 = samples[int(0.95 * len(samples))]
|
|
assert p95 <= 10.0, f"deserialize p95={p95:.3f} s exceeds 10 s NFR"
|
|
|
|
|
|
@_REQUIRE_CUDA
|
|
def test_nfr_reliability_eval_mode_unconditional(
|
|
config: Config, tmp_path: Path, clean_registry: None
|
|
) -> None:
|
|
"""A checkpoint saved in training mode MUST land in eval mode."""
|
|
register_architecture("tiny", _TinyLinear)
|
|
runtime = PytorchFp16Runtime(config)
|
|
trained = _TinyLinear()
|
|
trained.train()
|
|
path = tmp_path / "trained.pt"
|
|
torch.save(trained.state_dict(), path)
|
|
entry = EngineCacheEntry(
|
|
engine_path=path,
|
|
sha256_hex="0" * 64,
|
|
sm=None,
|
|
jp=None,
|
|
trt=None,
|
|
precision=PrecisionMode.FP16,
|
|
extras={"model_name": "tiny"},
|
|
)
|
|
handle = runtime.deserialize_engine(entry)
|
|
assert handle._model.training is False
|