"""C10 ``EngineCompiler`` — per-model TRT compile + hardware-tied cache reuse (AZ-321). Public surface frozen by `_docs/02_document/components/11_c10_provisioning/description.md` §5 (error handling) + §7 (D-C10-6 calibration-cache reuse, D-C10-7 self-describing filename). Responsibilities ---------------- For every :class:`BackboneSpec` in :class:`EngineCompileRequest` the compiler: 1. Computes the canonical engine filename via AZ-281 :class:`EngineFilenameSchema` from the host's :class:`HostCapabilities` plus the request precision. 2. If the engine is already on disk at ``{cache_root}/{filename}`` AND :meth:`Sha256Sidecar.verify` returns ``True`` for that path: treats it as a cache hit (``CompileOutcome.REUSED``) and returns a canonical :class:`EngineCacheEntry` synthesised from the sidecar. Zero calls to the injected :class:`InferenceRuntime`. 3. Otherwise delegates to :meth:`InferenceRuntime.compile_engine` (AZ-298 / AZ-299 / AZ-300 own the write path; the runtime atomically writes both the ``.engine`` binary and its ``.sha256`` sidecar). The compiler does NOT double-write the file — the task spec's "engine bytes are returned by compile_engine then written via the sidecar" wording contradicts the actual AZ-297 Protocol (``compile_engine`` returns an :class:`EngineCacheEntry`, not raw bytes); the Protocol shipped first and wins. Hardware-tied cache reuse (D-C10-6) is satisfied by the filename construction: an engine compiled on ``(sm=87, jp=6.2, trt=10.3, fp16)`` lives at a different path than one compiled on ``(sm=89, jp=6.3, trt=10.5, fp16)`` so a hardware change naturally forces a rebuild — the compiler does NOT load nor delete stale engines (AC-4). """ from __future__ import annotations import logging import time from dataclasses import dataclass from enum import Enum from pathlib import Path from typing import Protocol, runtime_checkable from gps_denied_onboard._types.inference import ( BuildConfig, EngineCacheEntry, OptimizationProfile, PrecisionMode, ) from gps_denied_onboard._types.inference_errors import ( CalibrationCacheError, EngineBuildError, ) from gps_denied_onboard._types.manifests import HostCapabilities from gps_denied_onboard.helpers.engine_filename_schema import ( EngineFilenameSchema, ) from gps_denied_onboard.helpers.sha256_sidecar import ( Sha256Sidecar, Sha256SidecarError, ) __all__ = [ "BackboneSpec", "CompileEngineCallable", "CompileOutcome", "EngineCompileRequest", "EngineCompileResult", "EngineCompileSummary", "EngineCompiler", ] _DEFAULT_WORKSPACE_MB: int = 4096 @runtime_checkable class CompileEngineCallable(Protocol): """Structural cut of the C7 ``InferenceRuntime`` Protocol (AZ-297). The compiler only ever calls :meth:`InferenceRuntime.compile_engine`, so it accepts any object that structurally satisfies this narrow Protocol. This keeps the c10 component free of cross-component imports (architecture rule ``test_az270_compose_root.test_ac6``) while still letting the real :class:`gps_denied_onboard.components.c7_inference.InferenceRuntime` plug in unchanged via duck typing — the composition root wires the concrete strategy in. Same dual-Protocol pattern used by the LightGlue ``EngineHandle`` consumer cut in ``_types/manifests.py``. """ def compile_engine( self, model_path: Path, build_config: BuildConfig ) -> EngineCacheEntry: ... class CompileOutcome(str, Enum): """Per-backbone outcome of one ``compile_engines_for_corpus`` call.""" BUILT = "built" REUSED = "reused" @dataclass(frozen=True) class BackboneSpec: """One model the corpus needs an engine for. ``input_name`` defaults to ``"input"`` because most exported ONNX graphs in this project use that name; backbones with a different input name must override it. ``expected_input_shape`` is used to synthesise a single :class:`OptimizationProfile` with ``min == opt == max``; backbones that need explicit dynamic ranges should be split into separate :class:`OptimizationProfile`-aware helpers and supplied via ``custom_profiles`` (out of scope for the AZ-321 corpus; reserved for a later extension). """ model_name: str onnx_path: Path expected_input_shape: tuple[int, ...] input_name: str = "input" @dataclass(frozen=True) class EngineCompileRequest: """Inputs to one ``compile_engines_for_corpus`` invocation. ``host`` is passed in (rather than introspected via the runtime) because the AZ-297 :class:`InferenceRuntime` Protocol does not expose host-info; the composition root resolves :class:`HostCapabilities` from device probes (Tier-2) or test fixtures (Tier-1) and threads it through here. This keeps the compiler decoupled from the runtime's introspection surface and makes the AC-4 (hardware change) test trivial. """ backbones: tuple[BackboneSpec, ...] calibration_path: Path | None cache_root: Path precision: PrecisionMode host: HostCapabilities workspace_mb: int = _DEFAULT_WORKSPACE_MB @dataclass(frozen=True) class EngineCompileResult: """One backbone's outcome record after ``compile_engines_for_corpus``. ``entry`` is the canonical :class:`gps_denied_onboard._types.inference.EngineCacheEntry` — same shape whether the engine was freshly built or reused. The surrounding ``outcome`` + ``compile_duration_s`` are c10-local bookkeeping (the AZ-321 task spec called this combined record ``EngineCacheEntry`` but that name is already taken by the AZ-297 canonical DTO; the canonical shape wins and the wrapper takes a new name). """ entry: EngineCacheEntry outcome: CompileOutcome compile_duration_s: float | None @dataclass(frozen=True) class EngineCompileSummary: """Aggregate counts surfaced via the ``c10.engine.compile.summary`` log.""" engines_built: int engines_reused: int cache_hit_ratio: float class EngineCompiler: """Compile or reuse TensorRT engines for every backbone in a corpus. The compiler is stateless across calls; ``__init__`` only injects the collaborators it cannot construct itself (the :class:`InferenceRuntime` is composition-root-owned; the logger is named per component). """ def __init__( self, *, inference_runtime: CompileEngineCallable, logger: logging.Logger, ) -> None: self._runtime = inference_runtime self._log = logger def compile_engines_for_corpus( self, request: EngineCompileRequest ) -> tuple[EngineCompileResult, ...]: """Compile or reuse one engine per backbone in ``request.backbones``. Empty ``backbones`` → empty result and a summary log with all-zero counts (AC-10). Errors from :meth:`InferenceRuntime.compile_engine` are NOT caught here — they propagate to the caller (AC-6 / AC-7). Side effects on backbones implemented before the failing one are visible on disk; the compiler does NOT roll back (AZ-298's atomic-write guarantees no half-engine). """ engines_dir = request.cache_root engines_dir.mkdir(parents=True, exist_ok=True) results: list[EngineCompileResult] = [] for backbone in request.backbones: result = self._compile_one(backbone, request) results.append(result) summary = _summarise(results) self._log.info( "c10.engine.compile.summary", extra={ "kind": "c10.engine.compile.summary", "kv": { "engines_built": summary.engines_built, "engines_reused": summary.engines_reused, "cache_hit_ratio": summary.cache_hit_ratio, "total": len(results), }, }, ) return tuple(results) def _compile_one( self, backbone: BackboneSpec, request: EngineCompileRequest, ) -> EngineCompileResult: filename = EngineFilenameSchema.build( model_name=backbone.model_name, sm=request.host.sm, jetpack=request.host.jetpack, trt=request.host.trt, precision=request.precision.value, ) target_path = request.cache_root / filename cache_hit_entry = self._maybe_reuse( target_path, backbone, request ) if cache_hit_entry is not None: self._log.info( "c10.engine.cache.hit", extra={ "kind": "c10.engine.cache.hit", "kv": { "model_name": backbone.model_name, "engine_path": str(target_path), }, }, ) return EngineCompileResult( entry=cache_hit_entry, outcome=CompileOutcome.REUSED, compile_duration_s=None, ) self._log.warning( "c10.engine.cache.miss", extra={ "kind": "c10.engine.cache.miss", "kv": { "model_name": backbone.model_name, "target_filename": filename, }, }, ) build_config = _build_config_for_backbone(backbone, request) t0 = time.perf_counter() try: entry = self._runtime.compile_engine( backbone.onnx_path, build_config ) except (EngineBuildError, CalibrationCacheError) as exc: # AZ-507 narrowed the catch to the documented C7 typed-error # envelope (`_types/inference_errors.py` re-exports # `EngineBuildError` + `CalibrationCacheError` from # `c7_inference.errors` without violating the AZ-270 lint). # Unknown exceptions intentionally propagate unhandled — they # are programmer errors, not C7 contract failures, and must # not be swallowed under a structured "compile.error" log. self._log.error( "c10.engine.compile.error", extra={ "kind": "c10.engine.compile.error", "kv": { "model_name": backbone.model_name, "calibration_path": ( str(request.calibration_path) if request.calibration_path is not None else None ), "error_class": type(exc).__name__, "message": str(exc), }, }, ) raise elapsed_s = time.perf_counter() - t0 return EngineCompileResult( entry=entry, outcome=CompileOutcome.BUILT, compile_duration_s=elapsed_s, ) def _maybe_reuse( self, target_path: Path, backbone: BackboneSpec, request: EngineCompileRequest, ) -> EngineCacheEntry | None: """Return a synthesised :class:`EngineCacheEntry` on cache hit; ``None`` on miss. Side effect: emits a WARN log on a tampered / missing sidecar (the engine file exists but its sidecar is invalid). The recompile-on-miss branch is owned by the caller. """ if not target_path.exists(): return None try: verified = Sha256Sidecar.verify(target_path) except Sha256SidecarError as exc: self._log.warning( "c10.engine.sidecar.mismatch", extra={ "kind": "c10.engine.sidecar.mismatch", "kv": { "model_name": backbone.model_name, "engine_path": str(target_path), "reason": str(exc), }, }, ) return None if not verified: self._log.warning( "c10.engine.sidecar.mismatch", extra={ "kind": "c10.engine.sidecar.mismatch", "kv": { "model_name": backbone.model_name, "engine_path": str(target_path), "reason": "digest_mismatch", }, }, ) return None sidecar_text = ( Path(str(target_path) + ".sha256").read_text().strip() ) return EngineCacheEntry( engine_path=target_path, sha256_hex=sidecar_text, sm=request.host.sm, jp=request.host.jetpack, trt=request.host.trt, precision=request.precision, extras={}, ) def _build_config_for_backbone( backbone: BackboneSpec, request: EngineCompileRequest ) -> BuildConfig: """Synthesise a :class:`BuildConfig` from a :class:`BackboneSpec`. Constructs exactly one :class:`OptimizationProfile` with ``min == opt == max == expected_input_shape``; backbones with dynamic input ranges are out of scope for AZ-321 and would need a richer ``BackboneSpec`` variant. """ profile = OptimizationProfile( input_name=backbone.input_name, min_shape=backbone.expected_input_shape, opt_shape=backbone.expected_input_shape, max_shape=backbone.expected_input_shape, ) return BuildConfig( precision=request.precision, workspace_mb=request.workspace_mb, calibration_dataset=request.calibration_path, optimization_profiles=(profile,), ) def _summarise( results: list[EngineCompileResult], ) -> EngineCompileSummary: built = sum( 1 for r in results if r.outcome is CompileOutcome.BUILT ) reused = sum( 1 for r in results if r.outcome is CompileOutcome.REUSED ) total = len(results) ratio = reused / total if total > 0 else 0.0 return EngineCompileSummary( engines_built=built, engines_reused=reused, cache_hit_ratio=ratio, )