gps-denied-onboard/src/gps_denied_onboard/components/c7_inference/config.py

"""C7 inference runtime config block (AZ-297).

Registered into ``config.components['c7_inference']`` by the package
``__init__.py``. The composition-root factory
:func:`gps_denied_onboard.runtime_root.inference_factory.build_inference_runtime`
reads this block to select the strategy and locate the engine cache.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Final

from gps_denied_onboard.config.schema import ConfigError

__all__ = [
    "KNOWN_RUNTIMES",
    "C7InferenceConfig",
]

KNOWN_RUNTIMES: Final[frozenset[str]] = frozenset(
    {"tensorrt", "onnx_trt_ep", "pytorch_fp16"}
)


@dataclass(frozen=True)
class C7InferenceConfig:
    """Per-component config for C7 inference.

    ``runtime`` selects exactly one of the three strategies
    (``tensorrt`` / ``onnx_trt_ep`` / ``pytorch_fp16``); the
    composition-root factory respects compile-time ``BUILD_*`` gating
    on top of this label (a runtime whose flag is OFF raises
    :class:`RuntimeNotAvailableError` at composition time).

    ``thermal_poll_hz`` is the cadence at which AZ-302's
    ``ThermalStatePublisher`` polls ``tegrastats`` / ``jtop``;
    default 1 Hz keeps the SDK telemetry source cool.

    ``engine_cache_dir`` is the filesystem root where compiled
    ``.engine`` binaries + ``.sha256`` sidecars live; the C10
    pre-flight ``CacheProvisioner`` writes here.

    ``gpu_memory_budget_bytes`` caps the aggregate GPU memory the
    ``TensorrtRuntime`` is allowed to hold across resident engines
    (C7-PT-02 / NFT-LIM-01); default 4 GiB. The ``TensorrtRuntime``
    enforces this at :meth:`deserialize_engine` time and refuses with
    :class:`OutOfMemoryError` BEFORE allocating buffers when a new
    engine would push past the cap.

    ``trtexec_timeout_s`` bounds the ``trtexec`` subprocess used by
    ``TensorrtRuntime.compile_engine`` when ``BuildConfig.use_trtexec``
    is true (AZ-298 Risk 4); default 10 minutes.
    """

    runtime: str = "pytorch_fp16"
    thermal_poll_hz: float = 1.0
    engine_cache_dir: str = "/var/lib/gps-denied/engines"
    per_frame_debug_log: bool = False
    gpu_memory_budget_bytes: int = 4 * 1024 * 1024 * 1024
    trtexec_timeout_s: int = 600

    def __post_init__(self) -> None:
        if self.runtime not in KNOWN_RUNTIMES:
            raise ConfigError(
                f"C7InferenceConfig.runtime={self.runtime!r} not in "
                f"{sorted(KNOWN_RUNTIMES)}"
            )
        if self.thermal_poll_hz <= 0:
            raise ConfigError(
                f"C7InferenceConfig.thermal_poll_hz must be > 0; "
                f"got {self.thermal_poll_hz}"
            )
        if not self.engine_cache_dir:
            raise ConfigError(
                "C7InferenceConfig.engine_cache_dir must be non-empty"
            )
        if self.gpu_memory_budget_bytes <= 0:
            raise ConfigError(
                "C7InferenceConfig.gpu_memory_budget_bytes must be > 0; "
                f"got {self.gpu_memory_budget_bytes}"
            )
        if self.trtexec_timeout_s <= 0:
            raise ConfigError(
                "C7InferenceConfig.trtexec_timeout_s must be > 0; "
                f"got {self.trtexec_timeout_s}"
            )