"""C7 inference runtime config block (AZ-297). Registered into ``config.components['c7_inference']`` by the package ``__init__.py``. The composition-root factory :func:`gps_denied_onboard.runtime_root.inference_factory.build_inference_runtime` reads this block to select the strategy and locate the engine cache. """ from __future__ import annotations from dataclasses import dataclass from typing import Final from gps_denied_onboard.config.schema import ConfigError __all__ = [ "KNOWN_RUNTIMES", "C7InferenceConfig", ] KNOWN_RUNTIMES: Final[frozenset[str]] = frozenset( {"tensorrt", "onnx_trt_ep", "pytorch_fp16"} ) @dataclass(frozen=True) class C7InferenceConfig: """Per-component config for C7 inference. ``runtime`` selects exactly one of the three strategies (``tensorrt`` / ``onnx_trt_ep`` / ``pytorch_fp16``); the composition-root factory respects compile-time ``BUILD_*`` gating on top of this label (a runtime whose flag is OFF raises :class:`RuntimeNotAvailableError` at composition time). ``thermal_poll_hz`` is the cadence at which AZ-302's ``ThermalStatePublisher`` polls ``tegrastats`` / ``jtop``; default 1 Hz keeps the SDK telemetry source cool. ``engine_cache_dir`` is the filesystem root where compiled ``.engine`` binaries + ``.sha256`` sidecars live; the C10 pre-flight ``CacheProvisioner`` writes here. ``gpu_memory_budget_bytes`` caps the aggregate GPU memory the ``TensorrtRuntime`` is allowed to hold across resident engines (C7-PT-02 / NFT-LIM-01); default 4 GiB. The ``TensorrtRuntime`` enforces this at :meth:`deserialize_engine` time and refuses with :class:`OutOfMemoryError` BEFORE allocating buffers when a new engine would push past the cap. ``trtexec_timeout_s`` bounds the ``trtexec`` subprocess used by ``TensorrtRuntime.compile_engine`` when ``BuildConfig.use_trtexec`` is true (AZ-298 Risk 4); default 10 minutes. """ runtime: str = "pytorch_fp16" thermal_poll_hz: float = 1.0 engine_cache_dir: str = "/var/lib/gps-denied/engines" per_frame_debug_log: bool = False gpu_memory_budget_bytes: int = 4 * 1024 * 1024 * 1024 trtexec_timeout_s: int = 600 def __post_init__(self) -> None: if self.runtime not in KNOWN_RUNTIMES: raise ConfigError( f"C7InferenceConfig.runtime={self.runtime!r} not in " f"{sorted(KNOWN_RUNTIMES)}" ) if self.thermal_poll_hz <= 0: raise ConfigError( f"C7InferenceConfig.thermal_poll_hz must be > 0; " f"got {self.thermal_poll_hz}" ) if not self.engine_cache_dir: raise ConfigError( "C7InferenceConfig.engine_cache_dir must be non-empty" ) if self.gpu_memory_budget_bytes <= 0: raise ConfigError( "C7InferenceConfig.gpu_memory_budget_bytes must be > 0; " f"got {self.gpu_memory_budget_bytes}" ) if self.trtexec_timeout_s <= 0: raise ConfigError( "C7InferenceConfig.trtexec_timeout_s must be > 0; " f"got {self.trtexec_timeout_s}" )