mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 15:11:12 +00:00
[AZ-297] C7 InferenceRuntime: Protocol + DTOs + factory
Freezes the c7_inference Public API per _docs/02_document/contracts/c7_inference/inference_runtime_protocol.md v1.0.0: - InferenceRuntime Protocol (6 methods: compile_engine, deserialize_engine, infer, release_engine, thermal_state, current_runtime_label) in components/c7_inference/interface.py. - DTOs (PrecisionMode enum, OptimizationProfile, BuildConfig, EngineCacheEntry, EngineHandle opaque marker) in _types/inference.py — placed at the L1 types layer so C10 can re-export EngineCacheEntry without crossing the components.* boundary (AZ-270 AC-6). - ThermalState DTO expanded in _types/thermal.py from the AZ-355 forward-declared stub to the AZ-297 contract shape (cpu/gpu temp, thermal_throttle_active, measured_clock_mhz, measured_at_ns, is_telemetry_available). Invariant I-6: when telemetry is unavailable, throttle is False. - Error family rooted at c7_inference.errors.RuntimeError (9 subtypes: EngineBuildError, EngineDeserializeError, EngineHashMismatchError, EngineSchemaMismatchError, EngineSidecarMissingError, CalibrationCacheError, InferenceError, OutOfMemoryError, TelemetryUnavailableError). RuntimeNotAvailableError stays in runtime_root/errors.py — composition-time, outside the family. - C7InferenceConfig per-component config block (runtime label, thermal_poll_hz, engine_cache_dir) with constructor-time validation rejecting unknown runtime labels. - Composition-root factory build_inference_runtime in runtime_root/inference_factory.py with three BUILD_* gates (BUILD_TENSORRT_RUNTIME, BUILD_ONNX_TRT_EP_RUNTIME, BUILD_PYTORCH_FP16_RUNTIME). Concrete strategy modules are imported lazily via __import__ AFTER the flag check, so a Tier-0 build with the flag OFF MUST NOT load the strategy module (AC-5 / I-5; verifiable via sys.modules). - 37 conformance tests cover all 8 ACs + NFR-perf-factory (p99 build under 200 ms × 1000 calls) + NFR-reliability-error-family. AC-8 introspects the contract file's Shape table and asserts method parity against the runtime Protocol; also asserts all 9 error subtypes are documented. Retired the AZ-263 scaffolding EngineCacheEntry from _types/manifests.py (replaced by the AZ-297 canonical shape in _types/inference.py); updated the LightGlue-flavoured EngineHandle Protocol docstring in _types/manifests.py to rationalize its intentional dual existence with the C7 opaque EngineHandle (same name, different consumer-side cut, mirroring the C4/C5 ISam2GraphHandle pattern). Stale ThermalState.throttle docstring references in c4_pose/config.py, c4_pose/interface.py, and _types/pose.py updated to thermal_throttle_active. Full unit-test sweep: 843 passed, 2 pre-existing environment skips (cmake, actionlint). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,118 @@
|
||||
"""C7 inference-runtime composition-root factory (AZ-297).
|
||||
|
||||
:func:`build_inference_runtime` selects exactly one strategy by
|
||||
``config.components['c7_inference'].runtime`` and respects compile-time
|
||||
``BUILD_*`` gating: requesting a strategy whose flag is OFF raises
|
||||
:class:`RuntimeNotAvailableError` at composition time (NOT at first
|
||||
inference call).
|
||||
|
||||
The concrete strategy modules (``tensorrt_runtime``, ``onnx_trt_ep_runtime``,
|
||||
``pytorch_fp16_runtime``) are imported lazily — a Tier-0 workstation
|
||||
build with ``BUILD_TENSORRT_RUNTIME=OFF`` MUST NOT load
|
||||
``c7_inference.tensorrt_runtime`` (Invariant I-5; verifiable via
|
||||
``sys.modules``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from gps_denied_onboard.runtime_root.errors import RuntimeNotAvailableError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.components.c7_inference import (
|
||||
C7InferenceConfig,
|
||||
InferenceRuntime,
|
||||
)
|
||||
from gps_denied_onboard.config.schema import Config
|
||||
|
||||
__all__ = ["build_inference_runtime"]
|
||||
|
||||
|
||||
_RUNTIME_TO_BUILD_FLAG: dict[str, str] = {
|
||||
"tensorrt": "BUILD_TENSORRT_RUNTIME",
|
||||
"onnx_trt_ep": "BUILD_ONNX_TRT_EP_RUNTIME",
|
||||
"pytorch_fp16": "BUILD_PYTORCH_FP16_RUNTIME",
|
||||
}
|
||||
|
||||
_RUNTIME_TO_MODULE: dict[str, tuple[str, str]] = {
|
||||
"tensorrt": (
|
||||
"gps_denied_onboard.components.c7_inference.tensorrt_runtime",
|
||||
"TensorrtRuntime",
|
||||
),
|
||||
"onnx_trt_ep": (
|
||||
"gps_denied_onboard.components.c7_inference.onnx_trt_ep_runtime",
|
||||
"OnnxTrtEpRuntime",
|
||||
),
|
||||
"pytorch_fp16": (
|
||||
"gps_denied_onboard.components.c7_inference.pytorch_fp16_runtime",
|
||||
"PytorchFp16Runtime",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _is_build_flag_on(flag_name: str) -> bool:
|
||||
"""Read a compile-time ``BUILD_*`` flag from the environment.
|
||||
|
||||
``ON`` / ``1`` / ``true`` / ``yes`` (case-insensitive) → ``True``;
|
||||
anything else (including unset) → ``False``. Defaults to OFF so
|
||||
test environments must opt-in explicitly per strategy.
|
||||
"""
|
||||
raw = os.environ.get(flag_name, "")
|
||||
return raw.strip().lower() in {"on", "1", "true", "yes"}
|
||||
|
||||
|
||||
def _c7_config(config: "Config") -> "C7InferenceConfig":
|
||||
"""Pull the registered C7 config block.
|
||||
|
||||
``c7_inference.__init__`` registers it on import; a missing
|
||||
registration is a developer error and surfaces as ``KeyError``
|
||||
rather than a silent fallback.
|
||||
"""
|
||||
return config.components["c7_inference"]
|
||||
|
||||
|
||||
def build_inference_runtime(config: "Config") -> "InferenceRuntime":
|
||||
"""Construct the :class:`InferenceRuntime` impl selected by config.
|
||||
|
||||
The factory:
|
||||
|
||||
1. Reads ``config.components['c7_inference'].runtime``.
|
||||
2. Checks the matching ``BUILD_*`` flag — if OFF, raises
|
||||
:class:`RuntimeNotAvailableError` BEFORE any import.
|
||||
3. Lazily imports the concrete strategy module.
|
||||
4. Constructs and returns the strategy instance, passing ``config``.
|
||||
|
||||
Raises :class:`RuntimeNotAvailableError` when:
|
||||
|
||||
- The compile-time flag is OFF (the canonical Tier-0 path).
|
||||
- The concrete strategy module has not been built yet (AZ-298 /
|
||||
AZ-299 / AZ-300 are still pending) — the import fails and the
|
||||
factory wraps :class:`ModuleNotFoundError`.
|
||||
"""
|
||||
block = _c7_config(config)
|
||||
runtime = block.runtime
|
||||
flag_name = _RUNTIME_TO_BUILD_FLAG.get(runtime)
|
||||
module_info = _RUNTIME_TO_MODULE.get(runtime)
|
||||
if flag_name is None or module_info is None:
|
||||
raise RuntimeNotAvailableError(
|
||||
f"InferenceRuntime runtime {runtime!r} is not buildable in "
|
||||
"this binary."
|
||||
)
|
||||
if not _is_build_flag_on(flag_name):
|
||||
raise RuntimeNotAvailableError(
|
||||
f"InferenceRuntime runtime {runtime!r} requires "
|
||||
f"{flag_name}=ON in this binary; the flag is OFF."
|
||||
)
|
||||
module_name, class_name = module_info
|
||||
try:
|
||||
module = __import__(module_name, fromlist=[class_name])
|
||||
except ModuleNotFoundError as exc:
|
||||
raise RuntimeNotAvailableError(
|
||||
f"InferenceRuntime runtime {runtime!r} is configured but its "
|
||||
f"concrete impl module {module_name!r} has not been built into "
|
||||
"this binary yet (AZ-298 / AZ-299 / AZ-300 pending)."
|
||||
) from exc
|
||||
strategy_cls = getattr(module, class_name)
|
||||
return strategy_cls(config)
|
||||
Reference in New Issue
Block a user