mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 10:21:13 +00:00
0ad3278b12
Land the fallback InferenceRuntime strategy that satisfies C7-IT-05: when the TRT-direct path (AZ-298) cannot deserialise a cached engine or when the operator explicitly selects ORT, the system stays in the air at degraded latency rather than dropping the request. Conforms to the AZ-297 Protocol; current_runtime_label() == "onnx_trt_ep". Production - onnx_trt_ep_runtime.py: compile_engine is a no-op returning an EngineCacheEntry pointing at the source .onnx; deserialize_engine is gate-first for .engine entries and gate-skip for .onnx, builds an ORT InferenceSession with the provider list [TensorrtExecutionProvider, CUDAExecutionProvider, CPUExecutionProvider], stages cached engines into the ORT TRT EP cache directory via symlink-or-copy, warms up with one session.run after construction, and honours config.inference.ort_disallow_cpu_ fallback by raising EngineDeserializeError when the active provider resolves to CPU; infer emits a one-shot c7.fallback_to_onnx_trt_ep WARN log plus gcs_alert callback on first call when is_fallback= True; release_engine is idempotent. _build_provider_args is the single point that pins TRT EP option-key names (Risk-3) and caps trt_max_workspace_size at gpu_memory_budget_bytes // 4 (AC-8). - config.py: adds ort_trt_cache_dir (validated non-empty) and ort_disallow_cpu_fallback to C7InferenceConfig. - fdr_client/records.py: adds c7.fallback_to_onnx_trt_ep and c7.cpu_fallback FDR record kinds. Tests - test_onnx_trt_ep_runtime.py: covers AC-1..AC-8 + Risk-2 CPU-fallback alert + Risk-3 option-key pin + NFR-reliability error rewrap; Tier-1 via fake ORT session; Tier-2 placeholders skip on macOS dev for numerical FP16 comparison and session-creation perf NFR. - test_protocol_conformance.py: drops onnx_trt_ep from the missing- module parametrize now that the module ships. - test_az272_fdr_record_schema.py: extends per-kind fixture builder to cover the two new C7 FDR kinds in the roundtrip / schema-version AC tests. Docs - module-layout.md: replaces the pending onnx_trt_runtime row with the shipped onnx_trt_ep_runtime row + capabilities list. - batch_32_cycle1_report.md + reviews/batch_32_review.md: full batch + self-review (PASS_WITH_WARNINGS, 4 Low findings accepted). Tests run: c7_inference 139 passing + 17 Tier-2 skips; combined unit suite (excluding pending components) 529 passing, 19 env-skipped. Co-authored-by: Cursor <cursoragent@cursor.com>
510 lines
16 KiB
Python
510 lines
16 KiB
Python
"""AZ-297 — C7 inference runtime Protocol + DTO + error + factory conformance.
|
|
|
|
Covers all 8 ACs of AZ-297 plus NFR-perf-factory and
|
|
NFR-reliability-error-family. The factory ACs (AC-4 / AC-5) substitute
|
|
fake strategy modules at ``sys.modules`` boundaries so the test never
|
|
touches TensorRT, ONNX Runtime, or PyTorch.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import dataclasses
|
|
import re
|
|
import sys
|
|
import time
|
|
import types
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from gps_denied_onboard.components.c7_inference import (
|
|
BuildConfig,
|
|
C7InferenceConfig,
|
|
CalibrationCacheError,
|
|
EngineBuildError,
|
|
EngineCacheEntry,
|
|
EngineDeserializeError,
|
|
EngineHandle,
|
|
EngineHashMismatchError,
|
|
EngineSchemaMismatchError,
|
|
EngineSidecarMissingError,
|
|
InferenceError,
|
|
InferenceRuntime,
|
|
OptimizationProfile,
|
|
OutOfMemoryError,
|
|
PrecisionMode,
|
|
TelemetryUnavailableError,
|
|
ThermalState,
|
|
)
|
|
from gps_denied_onboard.components.c7_inference import (
|
|
RuntimeError as C7RuntimeError,
|
|
)
|
|
from gps_denied_onboard.components.c7_inference.config import KNOWN_RUNTIMES
|
|
from gps_denied_onboard.config.schema import Config, ConfigError
|
|
from gps_denied_onboard.runtime_root.errors import RuntimeNotAvailableError
|
|
from gps_denied_onboard.runtime_root.inference_factory import (
|
|
build_inference_runtime,
|
|
)
|
|
|
|
_CONTRACT_PATH = (
|
|
Path(__file__).resolve().parents[3]
|
|
/ "_docs/02_document/contracts/c7_inference/inference_runtime_protocol.md"
|
|
)
|
|
_STRATEGY_MODULES: dict[str, tuple[str, str, str]] = {
|
|
"tensorrt": (
|
|
"gps_denied_onboard.components.c7_inference.tensorrt_runtime",
|
|
"TensorrtRuntime",
|
|
"BUILD_TENSORRT_RUNTIME",
|
|
),
|
|
"onnx_trt_ep": (
|
|
"gps_denied_onboard.components.c7_inference.onnx_trt_ep_runtime",
|
|
"OnnxTrtEpRuntime",
|
|
"BUILD_ONNX_TRT_EP_RUNTIME",
|
|
),
|
|
"pytorch_fp16": (
|
|
"gps_denied_onboard.components.c7_inference.pytorch_fp16_runtime",
|
|
"PytorchFp16Runtime",
|
|
"BUILD_PYTORCH_FP16_RUNTIME",
|
|
),
|
|
}
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Fakes that structurally satisfy the InferenceRuntime Protocol.
|
|
|
|
|
|
class _FullInferenceRuntime:
|
|
def __init__(self, config: Config) -> None:
|
|
self.config = config
|
|
self._label = config.components["c7_inference"].runtime
|
|
|
|
def compile_engine(self, model_path, build_config):
|
|
raise NotImplementedError
|
|
|
|
def deserialize_engine(self, entry):
|
|
raise NotImplementedError
|
|
|
|
def infer(self, handle, inputs):
|
|
raise NotImplementedError
|
|
|
|
def release_engine(self, handle):
|
|
return None
|
|
|
|
def thermal_state(self):
|
|
return ThermalState(
|
|
cpu_temp_c=None,
|
|
gpu_temp_c=None,
|
|
thermal_throttle_active=False,
|
|
measured_clock_mhz=None,
|
|
measured_at_ns=0,
|
|
is_telemetry_available=False,
|
|
)
|
|
|
|
def current_runtime_label(self):
|
|
return self._label
|
|
|
|
|
|
class _PartialInferenceRuntime:
|
|
def compile_engine(self, model_path, build_config):
|
|
raise NotImplementedError
|
|
|
|
def deserialize_engine(self, entry):
|
|
raise NotImplementedError
|
|
|
|
def infer(self, handle, inputs):
|
|
raise NotImplementedError
|
|
|
|
def release_engine(self, handle):
|
|
return None
|
|
|
|
def thermal_state(self):
|
|
raise NotImplementedError
|
|
|
|
|
|
def _config_with_runtime(runtime: str) -> Config:
|
|
return Config.with_blocks(
|
|
c7_inference=C7InferenceConfig(runtime=runtime)
|
|
)
|
|
|
|
|
|
def _install_fake_strategy(runtime_label: str) -> type:
|
|
module_name, class_name, _flag = _STRATEGY_MODULES[runtime_label]
|
|
|
|
class _FakeStrategy(_FullInferenceRuntime):
|
|
pass
|
|
|
|
_FakeStrategy.__name__ = class_name
|
|
module = types.ModuleType(module_name)
|
|
setattr(module, class_name, _FakeStrategy)
|
|
sys.modules[module_name] = module
|
|
return _FakeStrategy
|
|
|
|
|
|
@pytest.fixture
|
|
def strategy_module_cleanup():
|
|
"""Pop every fake strategy module before/after each factory test."""
|
|
for module_name, _, _ in _STRATEGY_MODULES.values():
|
|
sys.modules.pop(module_name, None)
|
|
yield
|
|
for module_name, _, _ in _STRATEGY_MODULES.values():
|
|
sys.modules.pop(module_name, None)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-1: Protocol is conformance-checkable.
|
|
|
|
|
|
def test_ac1_inference_runtime_conformance_full() -> None:
|
|
instance = _FullInferenceRuntime(_config_with_runtime("pytorch_fp16"))
|
|
assert isinstance(instance, InferenceRuntime)
|
|
|
|
|
|
def test_ac1_inference_runtime_conformance_partial_missing_label() -> None:
|
|
assert not isinstance(_PartialInferenceRuntime(), InferenceRuntime)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-2: frozen DTOs reject mutation.
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"dto, field_name, new_value",
|
|
[
|
|
(
|
|
BuildConfig(
|
|
precision=PrecisionMode.FP16,
|
|
workspace_mb=512,
|
|
calibration_dataset=None,
|
|
optimization_profiles=(),
|
|
),
|
|
"precision",
|
|
PrecisionMode.INT8,
|
|
),
|
|
(
|
|
EngineCacheEntry(
|
|
engine_path=Path("/var/lib/x.engine"),
|
|
sha256_hex="a" * 64,
|
|
sm=87,
|
|
jp="6.2",
|
|
trt="10.3",
|
|
precision=PrecisionMode.FP16,
|
|
extras={},
|
|
),
|
|
"sha256_hex",
|
|
"b" * 64,
|
|
),
|
|
(
|
|
ThermalState(
|
|
cpu_temp_c=40.0,
|
|
gpu_temp_c=45.0,
|
|
thermal_throttle_active=False,
|
|
measured_clock_mhz=918,
|
|
measured_at_ns=1_000_000,
|
|
is_telemetry_available=True,
|
|
),
|
|
"thermal_throttle_active",
|
|
True,
|
|
),
|
|
(
|
|
OptimizationProfile(
|
|
input_name="input",
|
|
min_shape=(1, 3, 224, 224),
|
|
opt_shape=(1, 3, 384, 384),
|
|
max_shape=(1, 3, 512, 512),
|
|
),
|
|
"input_name",
|
|
"renamed",
|
|
),
|
|
],
|
|
)
|
|
def test_ac2_frozen_dtos_reject_mutation(dto, field_name: str, new_value) -> None:
|
|
original_value = getattr(dto, field_name)
|
|
with pytest.raises(dataclasses.FrozenInstanceError):
|
|
setattr(dto, field_name, new_value)
|
|
assert getattr(dto, field_name) == original_value
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-3: error hierarchy catchable as a single family.
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"exc_factory",
|
|
[
|
|
EngineBuildError,
|
|
EngineDeserializeError,
|
|
EngineHashMismatchError,
|
|
EngineSchemaMismatchError,
|
|
EngineSidecarMissingError,
|
|
CalibrationCacheError,
|
|
InferenceError,
|
|
OutOfMemoryError,
|
|
TelemetryUnavailableError,
|
|
],
|
|
)
|
|
def test_ac3_all_runtime_errors_caught_as_family(exc_factory) -> None:
|
|
with pytest.raises(C7RuntimeError):
|
|
raise exc_factory("boom")
|
|
|
|
|
|
def test_ac3_unrelated_exception_not_caught_as_family() -> None:
|
|
with pytest.raises(ValueError):
|
|
try:
|
|
raise ValueError("not us")
|
|
except C7RuntimeError:
|
|
pytest.fail("ValueError must not be caught as c7 RuntimeError")
|
|
|
|
|
|
def test_ac3_runtime_not_available_outside_family() -> None:
|
|
with pytest.raises(RuntimeNotAvailableError):
|
|
try:
|
|
raise RuntimeNotAvailableError("composition-time")
|
|
except C7RuntimeError:
|
|
pytest.fail(
|
|
"RuntimeNotAvailableError is a composition-root error and "
|
|
"MUST NOT be in the c7 runtime family"
|
|
)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-4 + AC-5: factory honours config + BUILD flag gate.
|
|
|
|
|
|
@pytest.mark.parametrize("runtime", sorted(_STRATEGY_MODULES))
|
|
def test_ac4_build_inference_runtime_returns_protocol_impl(
|
|
monkeypatch, strategy_module_cleanup, runtime
|
|
) -> None:
|
|
_, _, flag = _STRATEGY_MODULES[runtime]
|
|
monkeypatch.setenv(flag, "ON")
|
|
fake_cls = _install_fake_strategy(runtime)
|
|
config = _config_with_runtime(runtime)
|
|
instance = build_inference_runtime(config)
|
|
assert isinstance(instance, fake_cls)
|
|
assert isinstance(instance, InferenceRuntime)
|
|
|
|
|
|
@pytest.mark.parametrize("runtime", sorted(_STRATEGY_MODULES))
|
|
def test_ac5_build_inference_runtime_flag_off_no_import(
|
|
monkeypatch, strategy_module_cleanup, runtime
|
|
) -> None:
|
|
module_name, _, flag = _STRATEGY_MODULES[runtime]
|
|
monkeypatch.delenv(flag, raising=False)
|
|
config = _config_with_runtime(runtime)
|
|
with pytest.raises(RuntimeNotAvailableError) as exc_info:
|
|
build_inference_runtime(config)
|
|
assert runtime in str(exc_info.value)
|
|
assert flag in str(exc_info.value)
|
|
assert module_name not in sys.modules
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"runtime",
|
|
sorted(
|
|
rt
|
|
for rt in _STRATEGY_MODULES
|
|
if rt not in {"pytorch_fp16", "tensorrt", "onnx_trt_ep"}
|
|
),
|
|
)
|
|
def test_ac5_build_inference_runtime_flag_on_but_module_missing(
|
|
monkeypatch, strategy_module_cleanup, runtime
|
|
) -> None:
|
|
"""``BUILD_*=ON`` but the strategy module hasn't been written yet.
|
|
|
|
AZ-298 (TensorrtRuntime), AZ-299 (OnnxTrtEpRuntime), and AZ-300
|
|
(PytorchFp16Runtime) have all shipped their concrete modules and
|
|
are excluded; their protocol conformance is covered in the
|
|
per-strategy test files. This parameterisation guards the factory
|
|
path for any future strategy whose `BUILD_*` flag is wired in
|
|
`inference_factory._RUNTIME_TO_MODULE` ahead of its module landing.
|
|
"""
|
|
_, _, flag = _STRATEGY_MODULES[runtime]
|
|
monkeypatch.setenv(flag, "ON")
|
|
config = _config_with_runtime(runtime)
|
|
with pytest.raises(RuntimeNotAvailableError) as exc_info:
|
|
build_inference_runtime(config)
|
|
assert runtime in str(exc_info.value)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-6: unknown runtime label rejected at config load.
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"bad_label",
|
|
["tensorflow_lite", "onnx", "trt", "TENSORRT", ""],
|
|
)
|
|
def test_ac6_unknown_runtime_rejected_at_config_load(bad_label: str) -> None:
|
|
with pytest.raises(ConfigError) as exc_info:
|
|
C7InferenceConfig(runtime=bad_label)
|
|
msg = str(exc_info.value)
|
|
assert bad_label in msg or "runtime" in msg
|
|
for valid in KNOWN_RUNTIMES:
|
|
assert valid in msg
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-7: current_runtime_label() matches config exactly.
|
|
|
|
|
|
@pytest.mark.parametrize("runtime", sorted(_STRATEGY_MODULES))
|
|
def test_ac7_current_runtime_label_matches_config(
|
|
monkeypatch, strategy_module_cleanup, runtime
|
|
) -> None:
|
|
_, _, flag = _STRATEGY_MODULES[runtime]
|
|
monkeypatch.setenv(flag, "ON")
|
|
_install_fake_strategy(runtime)
|
|
config = _config_with_runtime(runtime)
|
|
instance = build_inference_runtime(config)
|
|
assert instance.current_runtime_label() == runtime
|
|
assert instance.current_runtime_label() == config.components["c7_inference"].runtime
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# AC-8: contract file matches Protocol shape.
|
|
|
|
|
|
_METHOD_TABLE_RE = re.compile(r"^\|\s*`(?P<name>[a-z_][a-z0-9_]*)`\s*\|", re.MULTILINE)
|
|
|
|
|
|
def _methods_from_contract() -> set[str]:
|
|
text = _CONTRACT_PATH.read_text(encoding="utf-8")
|
|
surface_start = text.index("### Protocol surface")
|
|
next_section = text.find("\n### ", surface_start + len("### Protocol surface"))
|
|
section = text[surface_start:next_section] if next_section != -1 else text[surface_start:]
|
|
return {m.group("name") for m in _METHOD_TABLE_RE.finditer(section)}
|
|
|
|
|
|
def _protocol_methods(proto: type) -> set[str]:
|
|
return {
|
|
name
|
|
for name in dir(proto)
|
|
if not name.startswith("_") and callable(getattr(proto, name))
|
|
}
|
|
|
|
|
|
def test_ac8_contract_methods_match_protocol() -> None:
|
|
contract_methods = _methods_from_contract()
|
|
protocol_methods = _protocol_methods(InferenceRuntime)
|
|
missing_in_protocol = contract_methods - protocol_methods
|
|
missing_in_contract = protocol_methods - contract_methods
|
|
assert not missing_in_protocol, (
|
|
"Methods declared in inference_runtime_protocol.md Shape section "
|
|
f"but missing from the Protocol: {sorted(missing_in_protocol)}"
|
|
)
|
|
assert not missing_in_contract, (
|
|
"Methods present on the Protocol but missing from the contract "
|
|
f"Shape section: {sorted(missing_in_contract)}"
|
|
)
|
|
|
|
|
|
def test_ac8_contract_lists_all_nine_error_subtypes() -> None:
|
|
text = _CONTRACT_PATH.read_text(encoding="utf-8")
|
|
expected = {
|
|
"EngineBuildError",
|
|
"EngineDeserializeError",
|
|
"EngineHashMismatchError",
|
|
"EngineSchemaMismatchError",
|
|
"EngineSidecarMissingError",
|
|
"CalibrationCacheError",
|
|
"InferenceError",
|
|
"OutOfMemoryError",
|
|
"TelemetryUnavailableError",
|
|
}
|
|
for name in expected:
|
|
assert name in text, (
|
|
f"Contract file is missing the documented error subtype {name!r}"
|
|
)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# NFRs.
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"exc_type",
|
|
[
|
|
EngineBuildError,
|
|
EngineDeserializeError,
|
|
EngineHashMismatchError,
|
|
EngineSchemaMismatchError,
|
|
EngineSidecarMissingError,
|
|
CalibrationCacheError,
|
|
InferenceError,
|
|
OutOfMemoryError,
|
|
TelemetryUnavailableError,
|
|
],
|
|
)
|
|
def test_nfr_reliability_all_runtime_errors_subclass_family(exc_type) -> None:
|
|
assert issubclass(exc_type, C7RuntimeError)
|
|
|
|
|
|
def test_nfr_reliability_runtime_not_available_not_in_family() -> None:
|
|
assert not issubclass(RuntimeNotAvailableError, C7RuntimeError)
|
|
|
|
|
|
def test_nfr_perf_factory_under_200ms_p99(
|
|
monkeypatch, strategy_module_cleanup
|
|
) -> None:
|
|
"""Factory p99 ≤ 200 ms across 1000 calls (NFR-perf-factory)."""
|
|
runtime = "pytorch_fp16"
|
|
_, _, flag = _STRATEGY_MODULES[runtime]
|
|
monkeypatch.setenv(flag, "ON")
|
|
_install_fake_strategy(runtime)
|
|
config = _config_with_runtime(runtime)
|
|
|
|
durations_ms: list[float] = []
|
|
for _ in range(1000):
|
|
t0 = time.perf_counter()
|
|
build_inference_runtime(config)
|
|
durations_ms.append((time.perf_counter() - t0) * 1000.0)
|
|
|
|
durations_ms.sort()
|
|
p99 = durations_ms[int(0.99 * len(durations_ms))]
|
|
assert p99 <= 200.0, (
|
|
f"build_inference_runtime() p99={p99:.3f} ms exceeds 200 ms NFR"
|
|
)
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Surface coverage.
|
|
|
|
|
|
def test_engine_handle_is_class_not_protocol() -> None:
|
|
"""C7 EngineHandle is an opaque class — not a runtime_checkable Protocol.
|
|
|
|
Distinguishes it from the LightGlue ``_types.manifests.EngineHandle``
|
|
Protocol (intentional dual-name design; see manifests.py docstring).
|
|
"""
|
|
assert isinstance(EngineHandle, type)
|
|
assert not hasattr(EngineHandle, "_is_runtime_protocol")
|
|
|
|
|
|
def test_c7_config_thermal_poll_hz_validation() -> None:
|
|
with pytest.raises(ConfigError):
|
|
C7InferenceConfig(thermal_poll_hz=0.0)
|
|
with pytest.raises(ConfigError):
|
|
C7InferenceConfig(thermal_poll_hz=-1.0)
|
|
|
|
|
|
def test_c7_config_engine_cache_dir_validation() -> None:
|
|
with pytest.raises(ConfigError):
|
|
C7InferenceConfig(engine_cache_dir="")
|
|
|
|
|
|
def test_precision_mode_enum_surface() -> None:
|
|
assert {v.value for v in PrecisionMode} == {"fp16", "int8", "mixed"}
|
|
|
|
|
|
def test_thermal_state_invariant_i6_default_safe() -> None:
|
|
"""When telemetry is unavailable, throttle MUST be False (Invariant I-6)."""
|
|
ts = ThermalState(
|
|
cpu_temp_c=None,
|
|
gpu_temp_c=None,
|
|
thermal_throttle_active=False,
|
|
measured_clock_mhz=None,
|
|
measured_at_ns=0,
|
|
is_telemetry_available=False,
|
|
)
|
|
assert ts.thermal_throttle_active is False
|
|
assert ts.is_telemetry_available is False
|