mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 18:51:15 +00:00
[AZ-297] C7 InferenceRuntime: Protocol + DTOs + factory
Freezes the c7_inference Public API per _docs/02_document/contracts/c7_inference/inference_runtime_protocol.md v1.0.0: - InferenceRuntime Protocol (6 methods: compile_engine, deserialize_engine, infer, release_engine, thermal_state, current_runtime_label) in components/c7_inference/interface.py. - DTOs (PrecisionMode enum, OptimizationProfile, BuildConfig, EngineCacheEntry, EngineHandle opaque marker) in _types/inference.py — placed at the L1 types layer so C10 can re-export EngineCacheEntry without crossing the components.* boundary (AZ-270 AC-6). - ThermalState DTO expanded in _types/thermal.py from the AZ-355 forward-declared stub to the AZ-297 contract shape (cpu/gpu temp, thermal_throttle_active, measured_clock_mhz, measured_at_ns, is_telemetry_available). Invariant I-6: when telemetry is unavailable, throttle is False. - Error family rooted at c7_inference.errors.RuntimeError (9 subtypes: EngineBuildError, EngineDeserializeError, EngineHashMismatchError, EngineSchemaMismatchError, EngineSidecarMissingError, CalibrationCacheError, InferenceError, OutOfMemoryError, TelemetryUnavailableError). RuntimeNotAvailableError stays in runtime_root/errors.py — composition-time, outside the family. - C7InferenceConfig per-component config block (runtime label, thermal_poll_hz, engine_cache_dir) with constructor-time validation rejecting unknown runtime labels. - Composition-root factory build_inference_runtime in runtime_root/inference_factory.py with three BUILD_* gates (BUILD_TENSORRT_RUNTIME, BUILD_ONNX_TRT_EP_RUNTIME, BUILD_PYTORCH_FP16_RUNTIME). Concrete strategy modules are imported lazily via __import__ AFTER the flag check, so a Tier-0 build with the flag OFF MUST NOT load the strategy module (AC-5 / I-5; verifiable via sys.modules). - 37 conformance tests cover all 8 ACs + NFR-perf-factory (p99 build under 200 ms × 1000 calls) + NFR-reliability-error-family. AC-8 introspects the contract file's Shape table and asserts method parity against the runtime Protocol; also asserts all 9 error subtypes are documented. Retired the AZ-263 scaffolding EngineCacheEntry from _types/manifests.py (replaced by the AZ-297 canonical shape in _types/inference.py); updated the LightGlue-flavoured EngineHandle Protocol docstring in _types/manifests.py to rationalize its intentional dual existence with the C7 opaque EngineHandle (same name, different consumer-side cut, mirroring the C4/C5 ISam2GraphHandle pattern). Stale ThermalState.throttle docstring references in c4_pose/config.py, c4_pose/interface.py, and _types/pose.py updated to thermal_throttle_active. Full unit-test sweep: 843 passed, 2 pre-existing environment skips (cmake, actionlint). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,6 +1,68 @@
|
||||
"""C7 Inference Runtime component — Public API."""
|
||||
"""C7 Inference Runtime — Public API (AZ-297).
|
||||
|
||||
from gps_denied_onboard._types.manifests import EngineCacheEntry
|
||||
Per ``inference_runtime_protocol.md`` v1.0.0 the public surface
|
||||
consists of:
|
||||
|
||||
- :class:`InferenceRuntime` Protocol (6 methods).
|
||||
- DTOs: :class:`BuildConfig`, :class:`EngineCacheEntry`,
|
||||
:class:`EngineHandle` (opaque marker), :class:`OptimizationProfile`,
|
||||
:class:`PrecisionMode` enum.
|
||||
- :class:`ThermalState` re-exported from
|
||||
:mod:`gps_denied_onboard._types.thermal` (its canonical home for
|
||||
C4 forward-compatibility).
|
||||
- Error family rooted at :class:`RuntimeError`; nine documented subtypes.
|
||||
- Config block :class:`C7InferenceConfig` (registered on import).
|
||||
|
||||
Concrete strategies (``TensorrtRuntime``, ``OnnxTrtEpRuntime``,
|
||||
``PytorchFp16Runtime``) live in sibling modules and are imported
|
||||
lazily by :mod:`gps_denied_onboard.runtime_root.inference_factory` —
|
||||
Risk-2 mitigation: this ``__init__.py`` MUST NOT import any concrete
|
||||
strategy module.
|
||||
"""
|
||||
|
||||
from gps_denied_onboard._types.inference import (
|
||||
BuildConfig,
|
||||
EngineCacheEntry,
|
||||
EngineHandle,
|
||||
OptimizationProfile,
|
||||
PrecisionMode,
|
||||
)
|
||||
from gps_denied_onboard._types.thermal import ThermalState
|
||||
from gps_denied_onboard.components.c7_inference.config import C7InferenceConfig
|
||||
from gps_denied_onboard.components.c7_inference.errors import (
|
||||
CalibrationCacheError,
|
||||
EngineBuildError,
|
||||
EngineDeserializeError,
|
||||
EngineHashMismatchError,
|
||||
EngineSchemaMismatchError,
|
||||
EngineSidecarMissingError,
|
||||
InferenceError,
|
||||
OutOfMemoryError,
|
||||
RuntimeError,
|
||||
TelemetryUnavailableError,
|
||||
)
|
||||
from gps_denied_onboard.components.c7_inference.interface import InferenceRuntime
|
||||
from gps_denied_onboard.config.schema import register_component_block
|
||||
|
||||
__all__ = ["EngineCacheEntry", "InferenceRuntime"]
|
||||
register_component_block("c7_inference", C7InferenceConfig)
|
||||
|
||||
__all__ = [
|
||||
"BuildConfig",
|
||||
"C7InferenceConfig",
|
||||
"CalibrationCacheError",
|
||||
"EngineBuildError",
|
||||
"EngineCacheEntry",
|
||||
"EngineDeserializeError",
|
||||
"EngineHandle",
|
||||
"EngineHashMismatchError",
|
||||
"EngineSchemaMismatchError",
|
||||
"EngineSidecarMissingError",
|
||||
"InferenceError",
|
||||
"InferenceRuntime",
|
||||
"OptimizationProfile",
|
||||
"OutOfMemoryError",
|
||||
"PrecisionMode",
|
||||
"RuntimeError",
|
||||
"TelemetryUnavailableError",
|
||||
"ThermalState",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
"""C7 inference runtime config block (AZ-297).
|
||||
|
||||
Registered into ``config.components['c7_inference']`` by the package
|
||||
``__init__.py``. The composition-root factory
|
||||
:func:`gps_denied_onboard.runtime_root.inference_factory.build_inference_runtime`
|
||||
reads this block to select the strategy and locate the engine cache.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Final
|
||||
|
||||
from gps_denied_onboard.config.schema import ConfigError
|
||||
|
||||
__all__ = [
|
||||
"C7InferenceConfig",
|
||||
"KNOWN_RUNTIMES",
|
||||
]
|
||||
|
||||
KNOWN_RUNTIMES: Final[frozenset[str]] = frozenset(
|
||||
{"tensorrt", "onnx_trt_ep", "pytorch_fp16"}
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class C7InferenceConfig:
|
||||
"""Per-component config for C7 inference.
|
||||
|
||||
``runtime`` selects exactly one of the three strategies
|
||||
(``tensorrt`` / ``onnx_trt_ep`` / ``pytorch_fp16``); the
|
||||
composition-root factory respects compile-time ``BUILD_*`` gating
|
||||
on top of this label (a runtime whose flag is OFF raises
|
||||
:class:`RuntimeNotAvailableError` at composition time).
|
||||
|
||||
``thermal_poll_hz`` is the cadence at which AZ-302's
|
||||
``ThermalStatePublisher`` polls ``tegrastats`` / ``jtop``;
|
||||
default 1 Hz keeps the SDK telemetry source cool.
|
||||
|
||||
``engine_cache_dir`` is the filesystem root where compiled
|
||||
``.engine`` binaries + ``.sha256`` sidecars live; the C10
|
||||
pre-flight ``CacheProvisioner`` writes here.
|
||||
"""
|
||||
|
||||
runtime: str = "pytorch_fp16"
|
||||
thermal_poll_hz: float = 1.0
|
||||
engine_cache_dir: str = "/var/lib/gps-denied/engines"
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.runtime not in KNOWN_RUNTIMES:
|
||||
raise ConfigError(
|
||||
f"C7InferenceConfig.runtime={self.runtime!r} not in "
|
||||
f"{sorted(KNOWN_RUNTIMES)}"
|
||||
)
|
||||
if self.thermal_poll_hz <= 0:
|
||||
raise ConfigError(
|
||||
f"C7InferenceConfig.thermal_poll_hz must be > 0; "
|
||||
f"got {self.thermal_poll_hz}"
|
||||
)
|
||||
if not self.engine_cache_dir:
|
||||
raise ConfigError(
|
||||
"C7InferenceConfig.engine_cache_dir must be non-empty"
|
||||
)
|
||||
@@ -0,0 +1,94 @@
|
||||
"""C7 inference runtime error taxonomy (AZ-297).
|
||||
|
||||
All Protocol methods raise only members of :class:`RuntimeError` (the
|
||||
C7-local family — NOT stdlib :class:`builtins.RuntimeError`).
|
||||
Consumers catch the family with a single
|
||||
``except c7_inference.errors.RuntimeError``; implementations rewrap
|
||||
third-party library exceptions (TRT C++ exceptions, ORT internal
|
||||
errors, PyTorch CUDA errors) into one of these types.
|
||||
|
||||
Two composition-time errors live OUTSIDE the family:
|
||||
- :class:`gps_denied_onboard.runtime_root.errors.RuntimeNotAvailableError`
|
||||
is raised by the factory when the requested ``BUILD_*`` is OFF.
|
||||
- :class:`gps_denied_onboard.config.schema.ConfigError` is raised at
|
||||
config-load time for unknown ``runtime`` labels.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = [
|
||||
"CalibrationCacheError",
|
||||
"EngineBuildError",
|
||||
"EngineDeserializeError",
|
||||
"EngineHashMismatchError",
|
||||
"EngineSchemaMismatchError",
|
||||
"EngineSidecarMissingError",
|
||||
"InferenceError",
|
||||
"OutOfMemoryError",
|
||||
"RuntimeError",
|
||||
"TelemetryUnavailableError",
|
||||
]
|
||||
|
||||
|
||||
class RuntimeError(Exception): # noqa: A001 - the contract names this RuntimeError
|
||||
"""Base class for the C7 inference runtime error family.
|
||||
|
||||
Named ``RuntimeError`` per the contract (``inference_runtime_protocol.md``
|
||||
Shape § Error hierarchy); this shadows the stdlib name only inside
|
||||
``c7_inference.errors``. Consumers MUST import from this module by
|
||||
fully-qualified name to avoid collision.
|
||||
"""
|
||||
|
||||
|
||||
class EngineBuildError(RuntimeError):
|
||||
"""``compile_engine`` failed before producing a usable engine binary."""
|
||||
|
||||
|
||||
class EngineDeserializeError(RuntimeError):
|
||||
"""``deserialize_engine`` could not load the engine binary into memory."""
|
||||
|
||||
|
||||
class EngineHashMismatchError(RuntimeError):
|
||||
"""The sha256 of the engine file does not match
|
||||
:attr:`EngineCacheEntry.sha256_hex` (D-C10-3 / cache poisoning gate)."""
|
||||
|
||||
|
||||
class EngineSchemaMismatchError(RuntimeError):
|
||||
"""The engine's IO schema (input / output tensor names + shapes) does
|
||||
not match the runtime expectation (D-C10-7)."""
|
||||
|
||||
|
||||
class EngineSidecarMissingError(RuntimeError):
|
||||
"""The ``.sha256`` sidecar file expected next to the engine binary is
|
||||
absent or unreadable."""
|
||||
|
||||
|
||||
class CalibrationCacheError(RuntimeError):
|
||||
"""``compile_engine`` with ``precision=INT8`` could not load or write
|
||||
the calibration cache."""
|
||||
|
||||
|
||||
class InferenceError(RuntimeError):
|
||||
"""``infer`` failed during GPU execution.
|
||||
|
||||
The impl MUST rewrap CUDA / TRT C++ / ORT exceptions into this
|
||||
type; raw exceptions MUST NOT escape the Protocol.
|
||||
"""
|
||||
|
||||
|
||||
class OutOfMemoryError(RuntimeError): # noqa: A001 - the contract names this OutOfMemoryError
|
||||
"""GPU OOM during ``deserialize_engine`` or ``infer``.
|
||||
|
||||
Named ``OutOfMemoryError`` per the contract; this shadows the
|
||||
stdlib name only inside ``c7_inference.errors``.
|
||||
"""
|
||||
|
||||
|
||||
class TelemetryUnavailableError(RuntimeError):
|
||||
"""``thermal_state`` cold-start could not bind to the
|
||||
``tegrastats`` / ``jtop`` source.
|
||||
|
||||
Steady-state telemetry absence is signalled by
|
||||
``ThermalState.is_telemetry_available == False`` (NOT this
|
||||
exception); this error is the cold-start fail-fast.
|
||||
"""
|
||||
@@ -1,18 +1,106 @@
|
||||
"""C7 `InferenceRuntime` Protocol.
|
||||
"""C7 ``InferenceRuntime`` Protocol (AZ-297).
|
||||
|
||||
Concrete impls: `TensorrtRuntime` (production-default; TensorRT 10.3),
|
||||
`OnnxTrtEpRuntime` (ONNX Runtime + TensorRT EP), `PytorchFp16Runtime` (research
|
||||
baseline). See `_docs/02_document/components/09_c7_inference/`.
|
||||
PEP 544 ``typing.Protocol`` with ``runtime_checkable=True``; six
|
||||
methods that span engine lifecycle (compile / deserialize / release),
|
||||
per-call inference, thermal-state telemetry, and runtime-label
|
||||
self-report. Concrete impls — :class:`TensorrtRuntime` (AZ-298),
|
||||
:class:`OnnxTrtEpRuntime` (AZ-299), :class:`PytorchFp16Runtime`
|
||||
(AZ-300) — live in sibling modules and are imported lazily by
|
||||
:mod:`gps_denied_onboard.runtime_root.inference_factory`.
|
||||
|
||||
The contract at
|
||||
``_docs/02_document/contracts/c7_inference/inference_runtime_protocol.md``
|
||||
v1.0.0 is the authoritative shape; this module mirrors it 1:1.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Protocol
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable
|
||||
|
||||
from gps_denied_onboard._types.inference import (
|
||||
BuildConfig,
|
||||
EngineCacheEntry,
|
||||
EngineHandle,
|
||||
)
|
||||
from gps_denied_onboard._types.thermal import ThermalState
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
|
||||
__all__ = ["InferenceRuntime"]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class InferenceRuntime(Protocol):
|
||||
"""Compiled-engine inference runtime."""
|
||||
"""On-Jetson inference runtime. See ``inference_runtime_protocol.md`` v1.0.0.
|
||||
|
||||
def infer(self, inputs: Any) -> Any: ...
|
||||
Implementations: :class:`TensorrtRuntime` (production-default, TRT 10.3),
|
||||
:class:`OnnxTrtEpRuntime` (ONNX Runtime + TensorRT EP fallback),
|
||||
:class:`PytorchFp16Runtime` (research / Tier-0 baseline). Selection
|
||||
is owned by the composition root.
|
||||
"""
|
||||
|
||||
def load(self, engine_path: str) -> None: ...
|
||||
def compile_engine(
|
||||
self, model_path: Path, build_config: BuildConfig
|
||||
) -> EngineCacheEntry:
|
||||
"""Offline (minutes for INT8) engine compilation.
|
||||
|
||||
Raises :class:`EngineBuildError` on compile failure;
|
||||
:class:`CalibrationCacheError` when
|
||||
``build_config.precision == INT8`` and the calibration data
|
||||
cannot be loaded.
|
||||
"""
|
||||
...
|
||||
|
||||
def deserialize_engine(self, entry: EngineCacheEntry) -> EngineHandle:
|
||||
"""Load a compiled engine into GPU memory.
|
||||
|
||||
Raises :class:`EngineDeserializeError` /
|
||||
:class:`EngineHashMismatchError` /
|
||||
:class:`EngineSchemaMismatchError` /
|
||||
:class:`EngineSidecarMissingError` /
|
||||
:class:`OutOfMemoryError` per the contract envelope.
|
||||
"""
|
||||
...
|
||||
|
||||
def infer(
|
||||
self,
|
||||
handle: EngineHandle,
|
||||
inputs: "dict[str, np.ndarray]",
|
||||
) -> "dict[str, np.ndarray]":
|
||||
"""Synchronous request/response inference (Invariant I-8).
|
||||
|
||||
Returns AFTER the GPU stream has synchronised; output tensors
|
||||
are host-resident :class:`numpy.ndarray` ready for consumer
|
||||
use. Raises :class:`InferenceError` /
|
||||
:class:`OutOfMemoryError`.
|
||||
"""
|
||||
...
|
||||
|
||||
def release_engine(self, handle: EngineHandle) -> None:
|
||||
"""Drop the engine + free GPU memory.
|
||||
|
||||
Idempotent (Invariant I-7) — second-and-later calls on the
|
||||
same handle return silently.
|
||||
"""
|
||||
...
|
||||
|
||||
def thermal_state(self) -> ThermalState:
|
||||
"""Most-recent thermal reading from ``tegrastats`` / ``jtop``.
|
||||
|
||||
Raises :class:`TelemetryUnavailableError` only on cold-start
|
||||
binding failure; steady-state telemetry absence is signalled
|
||||
by ``ThermalState.is_telemetry_available == False`` (Invariant I-6).
|
||||
"""
|
||||
...
|
||||
|
||||
def current_runtime_label(
|
||||
self,
|
||||
) -> Literal["tensorrt", "onnx_trt_ep", "pytorch_fp16"]:
|
||||
"""Identify which concrete strategy is wired here.
|
||||
|
||||
Returned string equals ``config.inference.runtime`` exactly
|
||||
(Invariant I-1); AC-NEW-3 FDR audit relies on this property.
|
||||
"""
|
||||
...
|
||||
|
||||
Reference in New Issue
Block a user