mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 17:11:14 +00:00
18a69022b3
Implement the production-default InferenceRuntime strategy on JetPack 6.2 + TensorRT 10.3 (per D-C7-9). The runtime owns the full TRT lifecycle: compile_engine via the Polygraphy + trtexec + IBuilderConfig hybrid (FP16 / INT8 / Mixed precision), deserialize_engine with EngineGate-first ordering and a pre-allocation GPU memory budget gate, infer via H2D -> enqueueV3 -> D2H -> stream sync on the owned CUDA stream, idempotent release_engine, and an injected ThermalStatePublisher delegation for thermal_state. INT8 calibration cache trust (D-C10-6, AC-2/3/4) is enforced by a .calib_cache.sha256 file-integrity sidecar (AZ-280) plus a new .calib_cache.dataset_sha256 sidecar that records the dataset content hash at compile time; reuse only when both agree, rebuild silently on dataset hash mismatch, raise CalibrationCacheError on corrupt sidecar (never silently overwritten). GPU memory budget (NFT-LIM-01, default 4 GiB) is checked BEFORE any TRT call beyond the gate (AC-6); a pre-allocation refusal raises OutOfMemoryError and leaves the resident state unchanged. TensorRT 10.3 / Polygraphy / PyCUDA are lazy-imported inside the methods that need them so the module loads cleanly on Tier-0 hosts. A standalone CLI entry (python -m gps_denied_onboard.components.c7_inference.tensorrt_runtime compile <onnx> <build_config.json>) is wired for C10 CacheProvisioner (AZ-321) to invoke pre-flight without holding a runtime instance. C7InferenceConfig gains gpu_memory_budget_bytes (default 4 GiB) and trtexec_timeout_s (default 600 s, Risk 4 mitigation), both validated in __post_init__. Tests: 26 active + 6 Tier-2-gated skips; AC-1 / AC-3 / AC-4 / AC-5 / AC-6 / AC-7 / AC-10 + NFR-reliability fully covered on Tier-1 via fake CUDA / TRT modules; AC-2 / AC-8 / AC-9 / NFR-perf-deserialize placeholders skip with prerequisite reason and live in the AZ-298 Tier-2 microbench harness. Code review verdict PASS_WITH_WARNINGS (1 Medium hot-path hoist fix auto-applied). Co-authored-by: Cursor <cursoragent@cursor.com>
88 lines
3.2 KiB
Python
88 lines
3.2 KiB
Python
"""C7 inference runtime config block (AZ-297).
|
|
|
|
Registered into ``config.components['c7_inference']`` by the package
|
|
``__init__.py``. The composition-root factory
|
|
:func:`gps_denied_onboard.runtime_root.inference_factory.build_inference_runtime`
|
|
reads this block to select the strategy and locate the engine cache.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Final
|
|
|
|
from gps_denied_onboard.config.schema import ConfigError
|
|
|
|
__all__ = [
|
|
"KNOWN_RUNTIMES",
|
|
"C7InferenceConfig",
|
|
]
|
|
|
|
KNOWN_RUNTIMES: Final[frozenset[str]] = frozenset(
|
|
{"tensorrt", "onnx_trt_ep", "pytorch_fp16"}
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class C7InferenceConfig:
|
|
"""Per-component config for C7 inference.
|
|
|
|
``runtime`` selects exactly one of the three strategies
|
|
(``tensorrt`` / ``onnx_trt_ep`` / ``pytorch_fp16``); the
|
|
composition-root factory respects compile-time ``BUILD_*`` gating
|
|
on top of this label (a runtime whose flag is OFF raises
|
|
:class:`RuntimeNotAvailableError` at composition time).
|
|
|
|
``thermal_poll_hz`` is the cadence at which AZ-302's
|
|
``ThermalStatePublisher`` polls ``tegrastats`` / ``jtop``;
|
|
default 1 Hz keeps the SDK telemetry source cool.
|
|
|
|
``engine_cache_dir`` is the filesystem root where compiled
|
|
``.engine`` binaries + ``.sha256`` sidecars live; the C10
|
|
pre-flight ``CacheProvisioner`` writes here.
|
|
|
|
``gpu_memory_budget_bytes`` caps the aggregate GPU memory the
|
|
``TensorrtRuntime`` is allowed to hold across resident engines
|
|
(C7-PT-02 / NFT-LIM-01); default 4 GiB. The ``TensorrtRuntime``
|
|
enforces this at :meth:`deserialize_engine` time and refuses with
|
|
:class:`OutOfMemoryError` BEFORE allocating buffers when a new
|
|
engine would push past the cap.
|
|
|
|
``trtexec_timeout_s`` bounds the ``trtexec`` subprocess used by
|
|
``TensorrtRuntime.compile_engine`` when ``BuildConfig.use_trtexec``
|
|
is true (AZ-298 Risk 4); default 10 minutes.
|
|
"""
|
|
|
|
runtime: str = "pytorch_fp16"
|
|
thermal_poll_hz: float = 1.0
|
|
engine_cache_dir: str = "/var/lib/gps-denied/engines"
|
|
per_frame_debug_log: bool = False
|
|
gpu_memory_budget_bytes: int = 4 * 1024 * 1024 * 1024
|
|
trtexec_timeout_s: int = 600
|
|
|
|
def __post_init__(self) -> None:
|
|
if self.runtime not in KNOWN_RUNTIMES:
|
|
raise ConfigError(
|
|
f"C7InferenceConfig.runtime={self.runtime!r} not in "
|
|
f"{sorted(KNOWN_RUNTIMES)}"
|
|
)
|
|
if self.thermal_poll_hz <= 0:
|
|
raise ConfigError(
|
|
f"C7InferenceConfig.thermal_poll_hz must be > 0; "
|
|
f"got {self.thermal_poll_hz}"
|
|
)
|
|
if not self.engine_cache_dir:
|
|
raise ConfigError(
|
|
"C7InferenceConfig.engine_cache_dir must be non-empty"
|
|
)
|
|
if self.gpu_memory_budget_bytes <= 0:
|
|
raise ConfigError(
|
|
"C7InferenceConfig.gpu_memory_budget_bytes must be > 0; "
|
|
f"got {self.gpu_memory_budget_bytes}"
|
|
)
|
|
if self.trtexec_timeout_s <= 0:
|
|
raise ConfigError(
|
|
"C7InferenceConfig.trtexec_timeout_s must be > 0; "
|
|
f"got {self.trtexec_timeout_s}"
|
|
)
|