diff --git a/_docs/02_tasks/todo/AZ-297_c7_runtime_protocol.md b/_docs/02_tasks/done/AZ-297_c7_runtime_protocol.md similarity index 100% rename from _docs/02_tasks/todo/AZ-297_c7_runtime_protocol.md rename to _docs/02_tasks/done/AZ-297_c7_runtime_protocol.md diff --git a/src/gps_denied_onboard/_types/inference.py b/src/gps_denied_onboard/_types/inference.py new file mode 100644 index 0000000..7f9a915 --- /dev/null +++ b/src/gps_denied_onboard/_types/inference.py @@ -0,0 +1,106 @@ +"""C7 inference runtime DTOs + enums (AZ-297) — L1 shared types. + +Hosted at the ``_types`` layer (not under ``components/c7_inference``) +because C10 ``CacheProvisioner`` re-exports :class:`EngineCacheEntry` +as part of its own Public API; per the architecture rule +(``test_az270_compose_root.py`` AC-6) components MUST NOT import +other components, so cross-component DTOs live in ``_types/``. + +C7's :class:`gps_denied_onboard.components.c7_inference` package +re-exports these names for the canonical contract surface; consumers +SHOULD import from ``c7_inference`` (the contract namespace) rather +than from here. The contract at +``_docs/02_document/contracts/c7_inference/inference_runtime_protocol.md`` +v1.0.0 is the authoritative shape; this module mirrors it 1:1. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +__all__ = [ + "BuildConfig", + "EngineCacheEntry", + "EngineHandle", + "OptimizationProfile", + "PrecisionMode", +] + + +class PrecisionMode(str, Enum): + """TensorRT / ORT / PyTorch precision the engine is compiled for.""" + + FP16 = "fp16" + INT8 = "int8" + MIXED = "mixed" + + +@dataclass(frozen=True) +class OptimizationProfile: + """One named TRT optimisation profile. + + The runtime picks an actual shape inside ``[min_shape, max_shape]`` + based on the input dict supplied to ``infer``; the Protocol does + NOT auto-batch (see ``inference_runtime_protocol.md`` Non-Goals). + """ + + input_name: str + min_shape: tuple[int, ...] + opt_shape: tuple[int, ...] + max_shape: tuple[int, ...] + + +@dataclass(frozen=True) +class BuildConfig: + """Engine-build hyperparameters passed to ``compile_engine``. + + ``calibration_dataset`` is required when ``precision == INT8`` and + is ``None`` for FP16 / MIXED. ``use_trtexec`` is a TRT-only hint; + the ORT / PyTorch strategies ignore it. + """ + + precision: PrecisionMode + workspace_mb: int + calibration_dataset: Path | None + optimization_profiles: tuple[OptimizationProfile, ...] + use_trtexec: bool = False + + +@dataclass(frozen=True) +class EngineCacheEntry: + """Self-describing pointer to a compiled engine on disk. + + The five-tuple ``(model, sm, jp, trt, precision)`` matches the + filename schema from AZ-281 (``engine_filename_schema.md``); + ``sha256_hex`` is produced via the AZ-280 sidecar pattern. For + PyTorch baselines the ``sm`` / ``jp`` / ``trt`` fields are ``None`` + (the runtime is hardware-portable). ``extras`` carries + strategy-specific metadata (e.g., calibration cache path). + """ + + engine_path: Path + sha256_hex: str + sm: int | None + jp: str | None + trt: str | None + precision: PrecisionMode + extras: dict[str, str] + + +class EngineHandle: + """Opaque token returned by C7 ``deserialize_engine``. + + Consumers MUST pass the handle back to the same runtime + (``infer`` / ``release_engine``) and MUST NOT introspect fields — + Invariant I-4 of ``inference_runtime_protocol.md``. Each concrete + strategy subclasses this with implementation-private state. + + A separate :class:`gps_denied_onboard._types.manifests.EngineHandle` + Protocol exists for the LightGlue consumer-side cut (descriptor_dim + + forward). That is a duck-typed structural cut, intentionally + distinct from this opaque marker; concrete C7 strategies subclass + THIS class and structurally satisfy the LightGlue Protocol when + paired with a matching engine binary. + """ diff --git a/src/gps_denied_onboard/_types/manifests.py b/src/gps_denied_onboard/_types/manifests.py index e0e9c20..8535153 100644 --- a/src/gps_denied_onboard/_types/manifests.py +++ b/src/gps_denied_onboard/_types/manifests.py @@ -22,19 +22,6 @@ class Manifest: metadata: dict[str, Any] = field(default_factory=dict) -@dataclass(frozen=True) -class EngineCacheEntry: - """TensorRT engine + calibration cache, keyed by SM/JP/TRT/precision (D-C10-7).""" - - engine_path: str - sm_arch: str - jetpack_version: str - tensorrt_version: str - precision: str - content_hash: str - int8_calibration_path: str | None = None - - @dataclass(frozen=True) class EngineCacheKey: """Parsed tuple of a self-describing `.engine` filename (D-C10-7, AZ-281). @@ -53,12 +40,23 @@ class EngineCacheKey: @runtime_checkable class EngineHandle(Protocol): - """Opaque Protocol for an inference engine handle (C7-owned implementation). + """Consumer-side structural cut of a C7 engine handle for the + LightGlue helper (AZ-278 / R14 fix). - The production handle is created by C7's - ``InferenceRuntime.deserialize_engine`` and injected by the - composition root into ``LightGlueRuntime``. The helper depends on - this Protocol from `_types` so Layer 1 never imports C7 (R14 fix). + Intentionally distinct from + :class:`gps_denied_onboard.components.c7_inference.EngineHandle` + (the AZ-297 opaque marker class): same name, different shape, by + design. The C7 Protocol says EngineHandle is opaque to general + consumers (Invariant I-4); LightGlueRuntime is a specific helper + that needs a narrower structural Protocol with + ``descriptor_dim`` + ``forward``. Concrete C7 strategies + (TensorrtRuntime / OnnxTrtEpRuntime / PytorchFp16Runtime) + subclass the c7_inference opaque marker AND structurally satisfy + this Protocol when paired with a matching engine binary. + + The helper depends on THIS Protocol from ``_types`` so Layer 1 + never imports C7 (R14 fix). See ADR for the dual-Protocol + design rationale (mirrors the C4/C5 ISam2GraphHandle split). """ @property diff --git a/src/gps_denied_onboard/_types/pose.py b/src/gps_denied_onboard/_types/pose.py index 43ffe50..cf825fd 100644 --- a/src/gps_denied_onboard/_types/pose.py +++ b/src/gps_denied_onboard/_types/pose.py @@ -54,7 +54,7 @@ class CovarianceMode(Enum): iSAM2 graph (ADR-003 substrate). The cost-accurate path. * ``JACOBIAN`` — D-CROSS-LATENCY-1 thermal-throttle fallback (ADR-006). Cheaper but ~5-10% less accurate; engaged per-frame - when ``ThermalState.throttle == True``. + when ``ThermalState.thermal_throttle_active == True``. AZ-355 owns the enum; AZ-358 (Marginals) + AZ-361 (Hybrid) own the producer paths. diff --git a/src/gps_denied_onboard/_types/thermal.py b/src/gps_denied_onboard/_types/thermal.py index 400959c..6b4992e 100644 --- a/src/gps_denied_onboard/_types/thermal.py +++ b/src/gps_denied_onboard/_types/thermal.py @@ -1,14 +1,17 @@ -"""C7 ``ThermalState`` DTO stub (forward-declared for AZ-355). +"""C7 ``ThermalState`` DTO (frozen at AZ-297 v1.0.0). -AZ-355 (C4 PoseEstimator Protocol) needs a ``ThermalState`` type to -annotate :meth:`PoseEstimator.estimate`. The full producer -(``ThermalStatePublisher`` in C7) is owned by AZ-302; this module -holds the minimal DTO surface C4 needs so the Protocol typechecks -without a circular dependency or a ``TYPE_CHECKING`` workaround. +Originally introduced as a forward-declared stub for AZ-355 (C4 needs +the type before C7 lands); AZ-297 expanded the surface to the full +contract shape (cpu/gpu temp, throttle, measured clock, monotonic +timestamp, telemetry-availability bit) and AZ-302 will own the +producer-side polling thread. -When AZ-302 lands, it MAY add fields here (temperature reading, -thermal-zone source, captured_at) but MUST keep the ``throttle`` -boolean — it is the only field the C4 Protocol contract pins. +C4 consumers read ``thermal_throttle_active`` (the canonical name); +the older docstring references to ``throttle`` are stale and now +point at this field. Invariant I-6 of +``_docs/02_document/contracts/c7_inference/inference_runtime_protocol.md`` +binds ``thermal_throttle_active == False`` whenever +``is_telemetry_available == False``. """ from __future__ import annotations @@ -22,11 +25,22 @@ __all__ = ["ThermalState"] class ThermalState: """C7-reported thermal state consumed by C4 for the per-frame mode switch. - ``throttle == True`` triggers the Jacobian path (D-CROSS-LATENCY-1 - / ADR-006). ``False`` keeps the production Marginals path. + ``thermal_throttle_active == True`` triggers the Jacobian path + (D-CROSS-LATENCY-1 / ADR-006); ``False`` keeps the production + Marginals path. ``is_telemetry_available`` is the default-safe + bit: when the source is hung or absent, consumers see + ``thermal_throttle_active == False`` regardless of any other + field's value. - The full C7 publisher (AZ-302) emits these on a fixed cadence; - C4 reads the latest value at every ``estimate`` call entry. + Temperatures are degrees Celsius; ``measured_clock_mhz`` is the + most-recent GPU clock report (None when unavailable); + ``measured_at_ns`` is :func:`time.monotonic_ns` at poll time so + consumers can age-gate the reading. """ - throttle: bool + cpu_temp_c: float | None + gpu_temp_c: float | None + thermal_throttle_active: bool + measured_clock_mhz: int | None + measured_at_ns: int + is_telemetry_available: bool diff --git a/src/gps_denied_onboard/components/c10_provisioning/__init__.py b/src/gps_denied_onboard/components/c10_provisioning/__init__.py index 2bb6bed..5484cb7 100644 --- a/src/gps_denied_onboard/components/c10_provisioning/__init__.py +++ b/src/gps_denied_onboard/components/c10_provisioning/__init__.py @@ -1,6 +1,12 @@ -"""C10 Cache Provisioning component — Public API.""" +"""C10 Cache Provisioning component — Public API. -from gps_denied_onboard._types.manifests import EngineCacheEntry, Manifest +``EngineCacheEntry`` is the C7 canonical DTO (frozen at AZ-297) and +lives at the L1 ``_types`` layer so C10 can re-export it without +crossing the components.* boundary (architecture rule AC-6). +""" + +from gps_denied_onboard._types.inference import EngineCacheEntry +from gps_denied_onboard._types.manifests import Manifest from gps_denied_onboard.components.c10_provisioning.interface import CacheProvisioner __all__ = ["CacheProvisioner", "EngineCacheEntry", "Manifest"] diff --git a/src/gps_denied_onboard/components/c4_pose/config.py b/src/gps_denied_onboard/components/c4_pose/config.py index 3a2ae1d..ab24140 100644 --- a/src/gps_denied_onboard/components/c4_pose/config.py +++ b/src/gps_denied_onboard/components/c4_pose/config.py @@ -34,7 +34,7 @@ class C4PoseConfig: * ``ransac_reprojection_threshold_px`` — RANSAC inlier-distance threshold. Default 4.0 pixels per the contract. * ``thermal_throttle_threshold_celsius`` — informational only; - the actual ``ThermalState.throttle`` decision is owned by C7 + the actual ``ThermalState.thermal_throttle_active`` decision is owned by C7 (AZ-302). Default 75.0 °C. """ diff --git a/src/gps_denied_onboard/components/c4_pose/interface.py b/src/gps_denied_onboard/components/c4_pose/interface.py index 258938b..f26553d 100644 --- a/src/gps_denied_onboard/components/c4_pose/interface.py +++ b/src/gps_denied_onboard/components/c4_pose/interface.py @@ -51,7 +51,7 @@ class PoseEstimator(Protocol): ) -> PoseEstimate: """Run PnP → factor add → covariance recovery. - Per-frame thermal decision: ``thermal_state.throttle == True`` + Per-frame thermal decision: ``thermal_state.thermal_throttle_active == True`` engages the Jacobian path (cheap, ~5-10 % accuracy loss); ``False`` engages the Marginals path (production default). diff --git a/src/gps_denied_onboard/components/c7_inference/__init__.py b/src/gps_denied_onboard/components/c7_inference/__init__.py index 48be5b5..452f419 100644 --- a/src/gps_denied_onboard/components/c7_inference/__init__.py +++ b/src/gps_denied_onboard/components/c7_inference/__init__.py @@ -1,6 +1,68 @@ -"""C7 Inference Runtime component — Public API.""" +"""C7 Inference Runtime — Public API (AZ-297). -from gps_denied_onboard._types.manifests import EngineCacheEntry +Per ``inference_runtime_protocol.md`` v1.0.0 the public surface +consists of: + +- :class:`InferenceRuntime` Protocol (6 methods). +- DTOs: :class:`BuildConfig`, :class:`EngineCacheEntry`, + :class:`EngineHandle` (opaque marker), :class:`OptimizationProfile`, + :class:`PrecisionMode` enum. +- :class:`ThermalState` re-exported from + :mod:`gps_denied_onboard._types.thermal` (its canonical home for + C4 forward-compatibility). +- Error family rooted at :class:`RuntimeError`; nine documented subtypes. +- Config block :class:`C7InferenceConfig` (registered on import). + +Concrete strategies (``TensorrtRuntime``, ``OnnxTrtEpRuntime``, +``PytorchFp16Runtime``) live in sibling modules and are imported +lazily by :mod:`gps_denied_onboard.runtime_root.inference_factory` — +Risk-2 mitigation: this ``__init__.py`` MUST NOT import any concrete +strategy module. +""" + +from gps_denied_onboard._types.inference import ( + BuildConfig, + EngineCacheEntry, + EngineHandle, + OptimizationProfile, + PrecisionMode, +) +from gps_denied_onboard._types.thermal import ThermalState +from gps_denied_onboard.components.c7_inference.config import C7InferenceConfig +from gps_denied_onboard.components.c7_inference.errors import ( + CalibrationCacheError, + EngineBuildError, + EngineDeserializeError, + EngineHashMismatchError, + EngineSchemaMismatchError, + EngineSidecarMissingError, + InferenceError, + OutOfMemoryError, + RuntimeError, + TelemetryUnavailableError, +) from gps_denied_onboard.components.c7_inference.interface import InferenceRuntime +from gps_denied_onboard.config.schema import register_component_block -__all__ = ["EngineCacheEntry", "InferenceRuntime"] +register_component_block("c7_inference", C7InferenceConfig) + +__all__ = [ + "BuildConfig", + "C7InferenceConfig", + "CalibrationCacheError", + "EngineBuildError", + "EngineCacheEntry", + "EngineDeserializeError", + "EngineHandle", + "EngineHashMismatchError", + "EngineSchemaMismatchError", + "EngineSidecarMissingError", + "InferenceError", + "InferenceRuntime", + "OptimizationProfile", + "OutOfMemoryError", + "PrecisionMode", + "RuntimeError", + "TelemetryUnavailableError", + "ThermalState", +] diff --git a/src/gps_denied_onboard/components/c7_inference/config.py b/src/gps_denied_onboard/components/c7_inference/config.py new file mode 100644 index 0000000..d82e4ed --- /dev/null +++ b/src/gps_denied_onboard/components/c7_inference/config.py @@ -0,0 +1,63 @@ +"""C7 inference runtime config block (AZ-297). + +Registered into ``config.components['c7_inference']`` by the package +``__init__.py``. The composition-root factory +:func:`gps_denied_onboard.runtime_root.inference_factory.build_inference_runtime` +reads this block to select the strategy and locate the engine cache. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Final + +from gps_denied_onboard.config.schema import ConfigError + +__all__ = [ + "C7InferenceConfig", + "KNOWN_RUNTIMES", +] + +KNOWN_RUNTIMES: Final[frozenset[str]] = frozenset( + {"tensorrt", "onnx_trt_ep", "pytorch_fp16"} +) + + +@dataclass(frozen=True) +class C7InferenceConfig: + """Per-component config for C7 inference. + + ``runtime`` selects exactly one of the three strategies + (``tensorrt`` / ``onnx_trt_ep`` / ``pytorch_fp16``); the + composition-root factory respects compile-time ``BUILD_*`` gating + on top of this label (a runtime whose flag is OFF raises + :class:`RuntimeNotAvailableError` at composition time). + + ``thermal_poll_hz`` is the cadence at which AZ-302's + ``ThermalStatePublisher`` polls ``tegrastats`` / ``jtop``; + default 1 Hz keeps the SDK telemetry source cool. + + ``engine_cache_dir`` is the filesystem root where compiled + ``.engine`` binaries + ``.sha256`` sidecars live; the C10 + pre-flight ``CacheProvisioner`` writes here. + """ + + runtime: str = "pytorch_fp16" + thermal_poll_hz: float = 1.0 + engine_cache_dir: str = "/var/lib/gps-denied/engines" + + def __post_init__(self) -> None: + if self.runtime not in KNOWN_RUNTIMES: + raise ConfigError( + f"C7InferenceConfig.runtime={self.runtime!r} not in " + f"{sorted(KNOWN_RUNTIMES)}" + ) + if self.thermal_poll_hz <= 0: + raise ConfigError( + f"C7InferenceConfig.thermal_poll_hz must be > 0; " + f"got {self.thermal_poll_hz}" + ) + if not self.engine_cache_dir: + raise ConfigError( + "C7InferenceConfig.engine_cache_dir must be non-empty" + ) diff --git a/src/gps_denied_onboard/components/c7_inference/errors.py b/src/gps_denied_onboard/components/c7_inference/errors.py new file mode 100644 index 0000000..356e986 --- /dev/null +++ b/src/gps_denied_onboard/components/c7_inference/errors.py @@ -0,0 +1,94 @@ +"""C7 inference runtime error taxonomy (AZ-297). + +All Protocol methods raise only members of :class:`RuntimeError` (the +C7-local family — NOT stdlib :class:`builtins.RuntimeError`). +Consumers catch the family with a single +``except c7_inference.errors.RuntimeError``; implementations rewrap +third-party library exceptions (TRT C++ exceptions, ORT internal +errors, PyTorch CUDA errors) into one of these types. + +Two composition-time errors live OUTSIDE the family: +- :class:`gps_denied_onboard.runtime_root.errors.RuntimeNotAvailableError` + is raised by the factory when the requested ``BUILD_*`` is OFF. +- :class:`gps_denied_onboard.config.schema.ConfigError` is raised at + config-load time for unknown ``runtime`` labels. +""" + +from __future__ import annotations + +__all__ = [ + "CalibrationCacheError", + "EngineBuildError", + "EngineDeserializeError", + "EngineHashMismatchError", + "EngineSchemaMismatchError", + "EngineSidecarMissingError", + "InferenceError", + "OutOfMemoryError", + "RuntimeError", + "TelemetryUnavailableError", +] + + +class RuntimeError(Exception): # noqa: A001 - the contract names this RuntimeError + """Base class for the C7 inference runtime error family. + + Named ``RuntimeError`` per the contract (``inference_runtime_protocol.md`` + Shape § Error hierarchy); this shadows the stdlib name only inside + ``c7_inference.errors``. Consumers MUST import from this module by + fully-qualified name to avoid collision. + """ + + +class EngineBuildError(RuntimeError): + """``compile_engine`` failed before producing a usable engine binary.""" + + +class EngineDeserializeError(RuntimeError): + """``deserialize_engine`` could not load the engine binary into memory.""" + + +class EngineHashMismatchError(RuntimeError): + """The sha256 of the engine file does not match + :attr:`EngineCacheEntry.sha256_hex` (D-C10-3 / cache poisoning gate).""" + + +class EngineSchemaMismatchError(RuntimeError): + """The engine's IO schema (input / output tensor names + shapes) does + not match the runtime expectation (D-C10-7).""" + + +class EngineSidecarMissingError(RuntimeError): + """The ``.sha256`` sidecar file expected next to the engine binary is + absent or unreadable.""" + + +class CalibrationCacheError(RuntimeError): + """``compile_engine`` with ``precision=INT8`` could not load or write + the calibration cache.""" + + +class InferenceError(RuntimeError): + """``infer`` failed during GPU execution. + + The impl MUST rewrap CUDA / TRT C++ / ORT exceptions into this + type; raw exceptions MUST NOT escape the Protocol. + """ + + +class OutOfMemoryError(RuntimeError): # noqa: A001 - the contract names this OutOfMemoryError + """GPU OOM during ``deserialize_engine`` or ``infer``. + + Named ``OutOfMemoryError`` per the contract; this shadows the + stdlib name only inside ``c7_inference.errors``. + """ + + +class TelemetryUnavailableError(RuntimeError): + """``thermal_state`` cold-start could not bind to the + ``tegrastats`` / ``jtop`` source. + + Steady-state telemetry absence is signalled by + ``ThermalState.is_telemetry_available == False`` (NOT this + exception); this error is the cold-start fail-fast. + """ diff --git a/src/gps_denied_onboard/components/c7_inference/interface.py b/src/gps_denied_onboard/components/c7_inference/interface.py index 2ade7e6..2f8e7ec 100644 --- a/src/gps_denied_onboard/components/c7_inference/interface.py +++ b/src/gps_denied_onboard/components/c7_inference/interface.py @@ -1,18 +1,106 @@ -"""C7 `InferenceRuntime` Protocol. +"""C7 ``InferenceRuntime`` Protocol (AZ-297). -Concrete impls: `TensorrtRuntime` (production-default; TensorRT 10.3), -`OnnxTrtEpRuntime` (ONNX Runtime + TensorRT EP), `PytorchFp16Runtime` (research -baseline). See `_docs/02_document/components/09_c7_inference/`. +PEP 544 ``typing.Protocol`` with ``runtime_checkable=True``; six +methods that span engine lifecycle (compile / deserialize / release), +per-call inference, thermal-state telemetry, and runtime-label +self-report. Concrete impls — :class:`TensorrtRuntime` (AZ-298), +:class:`OnnxTrtEpRuntime` (AZ-299), :class:`PytorchFp16Runtime` +(AZ-300) — live in sibling modules and are imported lazily by +:mod:`gps_denied_onboard.runtime_root.inference_factory`. + +The contract at +``_docs/02_document/contracts/c7_inference/inference_runtime_protocol.md`` +v1.0.0 is the authoritative shape; this module mirrors it 1:1. """ from __future__ import annotations -from typing import Any, Protocol +from pathlib import Path +from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable + +from gps_denied_onboard._types.inference import ( + BuildConfig, + EngineCacheEntry, + EngineHandle, +) +from gps_denied_onboard._types.thermal import ThermalState + +if TYPE_CHECKING: + import numpy as np + +__all__ = ["InferenceRuntime"] +@runtime_checkable class InferenceRuntime(Protocol): - """Compiled-engine inference runtime.""" + """On-Jetson inference runtime. See ``inference_runtime_protocol.md`` v1.0.0. - def infer(self, inputs: Any) -> Any: ... + Implementations: :class:`TensorrtRuntime` (production-default, TRT 10.3), + :class:`OnnxTrtEpRuntime` (ONNX Runtime + TensorRT EP fallback), + :class:`PytorchFp16Runtime` (research / Tier-0 baseline). Selection + is owned by the composition root. + """ - def load(self, engine_path: str) -> None: ... + def compile_engine( + self, model_path: Path, build_config: BuildConfig + ) -> EngineCacheEntry: + """Offline (minutes for INT8) engine compilation. + + Raises :class:`EngineBuildError` on compile failure; + :class:`CalibrationCacheError` when + ``build_config.precision == INT8`` and the calibration data + cannot be loaded. + """ + ... + + def deserialize_engine(self, entry: EngineCacheEntry) -> EngineHandle: + """Load a compiled engine into GPU memory. + + Raises :class:`EngineDeserializeError` / + :class:`EngineHashMismatchError` / + :class:`EngineSchemaMismatchError` / + :class:`EngineSidecarMissingError` / + :class:`OutOfMemoryError` per the contract envelope. + """ + ... + + def infer( + self, + handle: EngineHandle, + inputs: "dict[str, np.ndarray]", + ) -> "dict[str, np.ndarray]": + """Synchronous request/response inference (Invariant I-8). + + Returns AFTER the GPU stream has synchronised; output tensors + are host-resident :class:`numpy.ndarray` ready for consumer + use. Raises :class:`InferenceError` / + :class:`OutOfMemoryError`. + """ + ... + + def release_engine(self, handle: EngineHandle) -> None: + """Drop the engine + free GPU memory. + + Idempotent (Invariant I-7) — second-and-later calls on the + same handle return silently. + """ + ... + + def thermal_state(self) -> ThermalState: + """Most-recent thermal reading from ``tegrastats`` / ``jtop``. + + Raises :class:`TelemetryUnavailableError` only on cold-start + binding failure; steady-state telemetry absence is signalled + by ``ThermalState.is_telemetry_available == False`` (Invariant I-6). + """ + ... + + def current_runtime_label( + self, + ) -> Literal["tensorrt", "onnx_trt_ep", "pytorch_fp16"]: + """Identify which concrete strategy is wired here. + + Returned string equals ``config.inference.runtime`` exactly + (Invariant I-1); AC-NEW-3 FDR audit relies on this property. + """ + ... diff --git a/src/gps_denied_onboard/runtime_root/inference_factory.py b/src/gps_denied_onboard/runtime_root/inference_factory.py new file mode 100644 index 0000000..9cf3782 --- /dev/null +++ b/src/gps_denied_onboard/runtime_root/inference_factory.py @@ -0,0 +1,118 @@ +"""C7 inference-runtime composition-root factory (AZ-297). + +:func:`build_inference_runtime` selects exactly one strategy by +``config.components['c7_inference'].runtime`` and respects compile-time +``BUILD_*`` gating: requesting a strategy whose flag is OFF raises +:class:`RuntimeNotAvailableError` at composition time (NOT at first +inference call). + +The concrete strategy modules (``tensorrt_runtime``, ``onnx_trt_ep_runtime``, +``pytorch_fp16_runtime``) are imported lazily — a Tier-0 workstation +build with ``BUILD_TENSORRT_RUNTIME=OFF`` MUST NOT load +``c7_inference.tensorrt_runtime`` (Invariant I-5; verifiable via +``sys.modules``). +""" + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING + +from gps_denied_onboard.runtime_root.errors import RuntimeNotAvailableError + +if TYPE_CHECKING: + from gps_denied_onboard.components.c7_inference import ( + C7InferenceConfig, + InferenceRuntime, + ) + from gps_denied_onboard.config.schema import Config + +__all__ = ["build_inference_runtime"] + + +_RUNTIME_TO_BUILD_FLAG: dict[str, str] = { + "tensorrt": "BUILD_TENSORRT_RUNTIME", + "onnx_trt_ep": "BUILD_ONNX_TRT_EP_RUNTIME", + "pytorch_fp16": "BUILD_PYTORCH_FP16_RUNTIME", +} + +_RUNTIME_TO_MODULE: dict[str, tuple[str, str]] = { + "tensorrt": ( + "gps_denied_onboard.components.c7_inference.tensorrt_runtime", + "TensorrtRuntime", + ), + "onnx_trt_ep": ( + "gps_denied_onboard.components.c7_inference.onnx_trt_ep_runtime", + "OnnxTrtEpRuntime", + ), + "pytorch_fp16": ( + "gps_denied_onboard.components.c7_inference.pytorch_fp16_runtime", + "PytorchFp16Runtime", + ), +} + + +def _is_build_flag_on(flag_name: str) -> bool: + """Read a compile-time ``BUILD_*`` flag from the environment. + + ``ON`` / ``1`` / ``true`` / ``yes`` (case-insensitive) → ``True``; + anything else (including unset) → ``False``. Defaults to OFF so + test environments must opt-in explicitly per strategy. + """ + raw = os.environ.get(flag_name, "") + return raw.strip().lower() in {"on", "1", "true", "yes"} + + +def _c7_config(config: "Config") -> "C7InferenceConfig": + """Pull the registered C7 config block. + + ``c7_inference.__init__`` registers it on import; a missing + registration is a developer error and surfaces as ``KeyError`` + rather than a silent fallback. + """ + return config.components["c7_inference"] + + +def build_inference_runtime(config: "Config") -> "InferenceRuntime": + """Construct the :class:`InferenceRuntime` impl selected by config. + + The factory: + + 1. Reads ``config.components['c7_inference'].runtime``. + 2. Checks the matching ``BUILD_*`` flag — if OFF, raises + :class:`RuntimeNotAvailableError` BEFORE any import. + 3. Lazily imports the concrete strategy module. + 4. Constructs and returns the strategy instance, passing ``config``. + + Raises :class:`RuntimeNotAvailableError` when: + + - The compile-time flag is OFF (the canonical Tier-0 path). + - The concrete strategy module has not been built yet (AZ-298 / + AZ-299 / AZ-300 are still pending) — the import fails and the + factory wraps :class:`ModuleNotFoundError`. + """ + block = _c7_config(config) + runtime = block.runtime + flag_name = _RUNTIME_TO_BUILD_FLAG.get(runtime) + module_info = _RUNTIME_TO_MODULE.get(runtime) + if flag_name is None or module_info is None: + raise RuntimeNotAvailableError( + f"InferenceRuntime runtime {runtime!r} is not buildable in " + "this binary." + ) + if not _is_build_flag_on(flag_name): + raise RuntimeNotAvailableError( + f"InferenceRuntime runtime {runtime!r} requires " + f"{flag_name}=ON in this binary; the flag is OFF." + ) + module_name, class_name = module_info + try: + module = __import__(module_name, fromlist=[class_name]) + except ModuleNotFoundError as exc: + raise RuntimeNotAvailableError( + f"InferenceRuntime runtime {runtime!r} is configured but its " + f"concrete impl module {module_name!r} has not been built into " + "this binary yet (AZ-298 / AZ-299 / AZ-300 pending)." + ) from exc + strategy_cls = getattr(module, class_name) + return strategy_cls(config) diff --git a/tests/unit/c7_inference/test_protocol_conformance.py b/tests/unit/c7_inference/test_protocol_conformance.py new file mode 100644 index 0000000..8ecc2d4 --- /dev/null +++ b/tests/unit/c7_inference/test_protocol_conformance.py @@ -0,0 +1,492 @@ +"""AZ-297 — C7 inference runtime Protocol + DTO + error + factory conformance. + +Covers all 8 ACs of AZ-297 plus NFR-perf-factory and +NFR-reliability-error-family. The factory ACs (AC-4 / AC-5) substitute +fake strategy modules at ``sys.modules`` boundaries so the test never +touches TensorRT, ONNX Runtime, or PyTorch. +""" + +from __future__ import annotations + +import dataclasses +import re +import sys +import time +import types +from pathlib import Path + +import pytest + +from gps_denied_onboard.components.c7_inference import ( + BuildConfig, + C7InferenceConfig, + CalibrationCacheError, + EngineBuildError, + EngineCacheEntry, + EngineDeserializeError, + EngineHandle, + EngineHashMismatchError, + EngineSchemaMismatchError, + EngineSidecarMissingError, + InferenceError, + InferenceRuntime, + OptimizationProfile, + OutOfMemoryError, + PrecisionMode, + RuntimeError as C7RuntimeError, + TelemetryUnavailableError, + ThermalState, +) +from gps_denied_onboard.components.c7_inference.config import KNOWN_RUNTIMES +from gps_denied_onboard.config.schema import Config, ConfigError +from gps_denied_onboard.runtime_root.errors import RuntimeNotAvailableError +from gps_denied_onboard.runtime_root.inference_factory import ( + build_inference_runtime, +) + + +_CONTRACT_PATH = ( + Path(__file__).resolve().parents[3] + / "_docs/02_document/contracts/c7_inference/inference_runtime_protocol.md" +) +_STRATEGY_MODULES: dict[str, tuple[str, str, str]] = { + "tensorrt": ( + "gps_denied_onboard.components.c7_inference.tensorrt_runtime", + "TensorrtRuntime", + "BUILD_TENSORRT_RUNTIME", + ), + "onnx_trt_ep": ( + "gps_denied_onboard.components.c7_inference.onnx_trt_ep_runtime", + "OnnxTrtEpRuntime", + "BUILD_ONNX_TRT_EP_RUNTIME", + ), + "pytorch_fp16": ( + "gps_denied_onboard.components.c7_inference.pytorch_fp16_runtime", + "PytorchFp16Runtime", + "BUILD_PYTORCH_FP16_RUNTIME", + ), +} + + +# ---------------------------------------------------------------------- +# Fakes that structurally satisfy the InferenceRuntime Protocol. + + +class _FullInferenceRuntime: + def __init__(self, config: Config) -> None: + self.config = config + self._label = config.components["c7_inference"].runtime + + def compile_engine(self, model_path, build_config): + raise NotImplementedError + + def deserialize_engine(self, entry): + raise NotImplementedError + + def infer(self, handle, inputs): + raise NotImplementedError + + def release_engine(self, handle): + return None + + def thermal_state(self): + return ThermalState( + cpu_temp_c=None, + gpu_temp_c=None, + thermal_throttle_active=False, + measured_clock_mhz=None, + measured_at_ns=0, + is_telemetry_available=False, + ) + + def current_runtime_label(self): + return self._label + + +class _PartialInferenceRuntime: + def compile_engine(self, model_path, build_config): + raise NotImplementedError + + def deserialize_engine(self, entry): + raise NotImplementedError + + def infer(self, handle, inputs): + raise NotImplementedError + + def release_engine(self, handle): + return None + + def thermal_state(self): + raise NotImplementedError + + +def _config_with_runtime(runtime: str) -> Config: + return Config.with_blocks( + c7_inference=C7InferenceConfig(runtime=runtime) + ) + + +def _install_fake_strategy(runtime_label: str) -> type: + module_name, class_name, _flag = _STRATEGY_MODULES[runtime_label] + + class _FakeStrategy(_FullInferenceRuntime): + pass + + _FakeStrategy.__name__ = class_name + module = types.ModuleType(module_name) + setattr(module, class_name, _FakeStrategy) + sys.modules[module_name] = module + return _FakeStrategy + + +@pytest.fixture +def strategy_module_cleanup(): + """Pop every fake strategy module before/after each factory test.""" + for module_name, _, _ in _STRATEGY_MODULES.values(): + sys.modules.pop(module_name, None) + yield + for module_name, _, _ in _STRATEGY_MODULES.values(): + sys.modules.pop(module_name, None) + + +# ---------------------------------------------------------------------- +# AC-1: Protocol is conformance-checkable. + + +def test_ac1_inference_runtime_conformance_full() -> None: + instance = _FullInferenceRuntime(_config_with_runtime("pytorch_fp16")) + assert isinstance(instance, InferenceRuntime) + + +def test_ac1_inference_runtime_conformance_partial_missing_label() -> None: + assert not isinstance(_PartialInferenceRuntime(), InferenceRuntime) + + +# ---------------------------------------------------------------------- +# AC-2: frozen DTOs reject mutation. + + +@pytest.mark.parametrize( + "dto, field_name, new_value", + [ + ( + BuildConfig( + precision=PrecisionMode.FP16, + workspace_mb=512, + calibration_dataset=None, + optimization_profiles=(), + ), + "precision", + PrecisionMode.INT8, + ), + ( + EngineCacheEntry( + engine_path=Path("/var/lib/x.engine"), + sha256_hex="a" * 64, + sm=87, + jp="6.2", + trt="10.3", + precision=PrecisionMode.FP16, + extras={}, + ), + "sha256_hex", + "b" * 64, + ), + ( + ThermalState( + cpu_temp_c=40.0, + gpu_temp_c=45.0, + thermal_throttle_active=False, + measured_clock_mhz=918, + measured_at_ns=1_000_000, + is_telemetry_available=True, + ), + "thermal_throttle_active", + True, + ), + ( + OptimizationProfile( + input_name="input", + min_shape=(1, 3, 224, 224), + opt_shape=(1, 3, 384, 384), + max_shape=(1, 3, 512, 512), + ), + "input_name", + "renamed", + ), + ], +) +def test_ac2_frozen_dtos_reject_mutation(dto, field_name: str, new_value) -> None: + original_value = getattr(dto, field_name) + with pytest.raises(dataclasses.FrozenInstanceError): + setattr(dto, field_name, new_value) + assert getattr(dto, field_name) == original_value + + +# ---------------------------------------------------------------------- +# AC-3: error hierarchy catchable as a single family. + + +@pytest.mark.parametrize( + "exc_factory", + [ + EngineBuildError, + EngineDeserializeError, + EngineHashMismatchError, + EngineSchemaMismatchError, + EngineSidecarMissingError, + CalibrationCacheError, + InferenceError, + OutOfMemoryError, + TelemetryUnavailableError, + ], +) +def test_ac3_all_runtime_errors_caught_as_family(exc_factory) -> None: + with pytest.raises(C7RuntimeError): + raise exc_factory("boom") + + +def test_ac3_unrelated_exception_not_caught_as_family() -> None: + with pytest.raises(ValueError): + try: + raise ValueError("not us") + except C7RuntimeError: + pytest.fail("ValueError must not be caught as c7 RuntimeError") + + +def test_ac3_runtime_not_available_outside_family() -> None: + with pytest.raises(RuntimeNotAvailableError): + try: + raise RuntimeNotAvailableError("composition-time") + except C7RuntimeError: + pytest.fail( + "RuntimeNotAvailableError is a composition-root error and " + "MUST NOT be in the c7 runtime family" + ) + + +# ---------------------------------------------------------------------- +# AC-4 + AC-5: factory honours config + BUILD flag gate. + + +@pytest.mark.parametrize("runtime", sorted(_STRATEGY_MODULES)) +def test_ac4_build_inference_runtime_returns_protocol_impl( + monkeypatch, strategy_module_cleanup, runtime +) -> None: + _, _, flag = _STRATEGY_MODULES[runtime] + monkeypatch.setenv(flag, "ON") + fake_cls = _install_fake_strategy(runtime) + config = _config_with_runtime(runtime) + instance = build_inference_runtime(config) + assert isinstance(instance, fake_cls) + assert isinstance(instance, InferenceRuntime) + + +@pytest.mark.parametrize("runtime", sorted(_STRATEGY_MODULES)) +def test_ac5_build_inference_runtime_flag_off_no_import( + monkeypatch, strategy_module_cleanup, runtime +) -> None: + module_name, _, flag = _STRATEGY_MODULES[runtime] + monkeypatch.delenv(flag, raising=False) + config = _config_with_runtime(runtime) + with pytest.raises(RuntimeNotAvailableError) as exc_info: + build_inference_runtime(config) + assert runtime in str(exc_info.value) + assert flag in str(exc_info.value) + assert module_name not in sys.modules + + +@pytest.mark.parametrize("runtime", sorted(_STRATEGY_MODULES)) +def test_ac5_build_inference_runtime_flag_on_but_module_missing( + monkeypatch, strategy_module_cleanup, runtime +) -> None: + _, _, flag = _STRATEGY_MODULES[runtime] + monkeypatch.setenv(flag, "ON") + config = _config_with_runtime(runtime) + with pytest.raises(RuntimeNotAvailableError) as exc_info: + build_inference_runtime(config) + assert runtime in str(exc_info.value) + + +# ---------------------------------------------------------------------- +# AC-6: unknown runtime label rejected at config load. + + +@pytest.mark.parametrize( + "bad_label", + ["tensorflow_lite", "onnx", "trt", "TENSORRT", ""], +) +def test_ac6_unknown_runtime_rejected_at_config_load(bad_label: str) -> None: + with pytest.raises(ConfigError) as exc_info: + C7InferenceConfig(runtime=bad_label) + msg = str(exc_info.value) + assert bad_label in msg or "runtime" in msg + for valid in KNOWN_RUNTIMES: + assert valid in msg + + +# ---------------------------------------------------------------------- +# AC-7: current_runtime_label() matches config exactly. + + +@pytest.mark.parametrize("runtime", sorted(_STRATEGY_MODULES)) +def test_ac7_current_runtime_label_matches_config( + monkeypatch, strategy_module_cleanup, runtime +) -> None: + _, _, flag = _STRATEGY_MODULES[runtime] + monkeypatch.setenv(flag, "ON") + _install_fake_strategy(runtime) + config = _config_with_runtime(runtime) + instance = build_inference_runtime(config) + assert instance.current_runtime_label() == runtime + assert instance.current_runtime_label() == config.components["c7_inference"].runtime + + +# ---------------------------------------------------------------------- +# AC-8: contract file matches Protocol shape. + + +_METHOD_TABLE_RE = re.compile(r"^\|\s*`(?P[a-z_][a-z0-9_]*)`\s*\|", re.MULTILINE) + + +def _methods_from_contract() -> set[str]: + text = _CONTRACT_PATH.read_text(encoding="utf-8") + surface_start = text.index("### Protocol surface") + next_section = text.find("\n### ", surface_start + len("### Protocol surface")) + section = text[surface_start:next_section] if next_section != -1 else text[surface_start:] + return {m.group("name") for m in _METHOD_TABLE_RE.finditer(section)} + + +def _protocol_methods(proto: type) -> set[str]: + return { + name + for name in dir(proto) + if not name.startswith("_") and callable(getattr(proto, name)) + } + + +def test_ac8_contract_methods_match_protocol() -> None: + contract_methods = _methods_from_contract() + protocol_methods = _protocol_methods(InferenceRuntime) + missing_in_protocol = contract_methods - protocol_methods + missing_in_contract = protocol_methods - contract_methods + assert not missing_in_protocol, ( + "Methods declared in inference_runtime_protocol.md Shape section " + f"but missing from the Protocol: {sorted(missing_in_protocol)}" + ) + assert not missing_in_contract, ( + "Methods present on the Protocol but missing from the contract " + f"Shape section: {sorted(missing_in_contract)}" + ) + + +def test_ac8_contract_lists_all_nine_error_subtypes() -> None: + text = _CONTRACT_PATH.read_text(encoding="utf-8") + expected = { + "EngineBuildError", + "EngineDeserializeError", + "EngineHashMismatchError", + "EngineSchemaMismatchError", + "EngineSidecarMissingError", + "CalibrationCacheError", + "InferenceError", + "OutOfMemoryError", + "TelemetryUnavailableError", + } + for name in expected: + assert name in text, ( + f"Contract file is missing the documented error subtype {name!r}" + ) + + +# ---------------------------------------------------------------------- +# NFRs. + + +@pytest.mark.parametrize( + "exc_type", + [ + EngineBuildError, + EngineDeserializeError, + EngineHashMismatchError, + EngineSchemaMismatchError, + EngineSidecarMissingError, + CalibrationCacheError, + InferenceError, + OutOfMemoryError, + TelemetryUnavailableError, + ], +) +def test_nfr_reliability_all_runtime_errors_subclass_family(exc_type) -> None: + assert issubclass(exc_type, C7RuntimeError) + + +def test_nfr_reliability_runtime_not_available_not_in_family() -> None: + assert not issubclass(RuntimeNotAvailableError, C7RuntimeError) + + +def test_nfr_perf_factory_under_200ms_p99( + monkeypatch, strategy_module_cleanup +) -> None: + """Factory p99 ≤ 200 ms across 1000 calls (NFR-perf-factory).""" + runtime = "pytorch_fp16" + _, _, flag = _STRATEGY_MODULES[runtime] + monkeypatch.setenv(flag, "ON") + _install_fake_strategy(runtime) + config = _config_with_runtime(runtime) + + durations_ms: list[float] = [] + for _ in range(1000): + t0 = time.perf_counter() + build_inference_runtime(config) + durations_ms.append((time.perf_counter() - t0) * 1000.0) + + durations_ms.sort() + p99 = durations_ms[int(0.99 * len(durations_ms))] + assert p99 <= 200.0, ( + f"build_inference_runtime() p99={p99:.3f} ms exceeds 200 ms NFR" + ) + + +# ---------------------------------------------------------------------- +# Surface coverage. + + +def test_engine_handle_is_class_not_protocol() -> None: + """C7 EngineHandle is an opaque class — not a runtime_checkable Protocol. + + Distinguishes it from the LightGlue ``_types.manifests.EngineHandle`` + Protocol (intentional dual-name design; see manifests.py docstring). + """ + assert isinstance(EngineHandle, type) + assert not hasattr(EngineHandle, "_is_runtime_protocol") + + +def test_c7_config_thermal_poll_hz_validation() -> None: + with pytest.raises(ConfigError): + C7InferenceConfig(thermal_poll_hz=0.0) + with pytest.raises(ConfigError): + C7InferenceConfig(thermal_poll_hz=-1.0) + + +def test_c7_config_engine_cache_dir_validation() -> None: + with pytest.raises(ConfigError): + C7InferenceConfig(engine_cache_dir="") + + +def test_precision_mode_enum_surface() -> None: + assert {v.value for v in PrecisionMode} == {"fp16", "int8", "mixed"} + + +def test_thermal_state_invariant_i6_default_safe() -> None: + """When telemetry is unavailable, throttle MUST be False (Invariant I-6).""" + ts = ThermalState( + cpu_temp_c=None, + gpu_temp_c=None, + thermal_throttle_active=False, + measured_clock_mhz=None, + measured_at_ns=0, + is_telemetry_available=False, + ) + assert ts.thermal_throttle_active is False + assert ts.is_telemetry_available is False diff --git a/tests/unit/c7_inference/test_smoke.py b/tests/unit/c7_inference/test_smoke.py deleted file mode 100644 index 9ef840f..0000000 --- a/tests/unit/c7_inference/test_smoke.py +++ /dev/null @@ -1,12 +0,0 @@ -"""C7 InferenceRuntime smoke test — AC-9.""" - - -def test_interface_importable() -> None: - # Assert - from gps_denied_onboard.components.c7_inference import ( - EngineCacheEntry, - InferenceRuntime, - ) - - assert InferenceRuntime is not None - assert EngineCacheEntry is not None