mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 22:31:13 +00:00
[AZ-321] C10 EngineCompiler: hardware-tied TRT compile + cache reuse
Land the C10 per-model engine compile + cache-reuse orchestrator. `EngineCompiler.compile_engines_for_corpus(request)` walks the corpus, computes the canonical engine filename via AZ-281 `EngineFilenameSchema.build`, and either reuses the cached binary (cache hit, AZ-280 `Sha256Sidecar.verify` returns True) or delegates to the AZ-297 `compile_engine` on the injected runtime (cache miss; the runtime owns the write path). Returns one `EngineCompileResult` per backbone carrying the canonical `EngineCacheEntry`, outcome (BUILT / REUSED), and `compile_duration_s` (None on reuse). Hardware-tied reuse (D-C10-6 / D-C10-7) falls out of the filename schema — a host change rebuilds at the new path and leaves the old files untouched (AC-4). Design corrections vs. the task spec body: - The spec proposed a c10-local `EngineCacheEntry` carrying outcome and duration; that name is already taken by the AZ-297 canonical DTO. The wrapper is renamed `EngineCompileResult`; the canonical shape wins. - The spec called `InferenceRuntime.host_info()`, which is not in the AZ-297 Protocol. `HostCapabilities` is threaded through `EngineCompileRequest` instead so the composition root owns host probing and the compiler stays decoupled. - The c10 layer cannot import `components.c7_inference` (arch rule `test_az270_compose_root.test_ac6`). `engine_compiler.py` defines `CompileEngineCallable` — a structural Protocol cut of `InferenceRuntime` exposing only `compile_engine` — and catches broad `Exception` (re-raising preserves the original type; `error_class` is recorded in the ERROR log payload). Production - engine_compiler.py: `CompileOutcome` enum, `BackboneSpec`, `EngineCompileRequest`, `EngineCompileResult`, `EngineCompileSummary` DTOs; `CompileEngineCallable` Protocol; `EngineCompiler` with the single public method. - config.py: `BackboneConfig` + `C10ProvisioningConfig` (`workspace_mb` default 4 GiB to match C7 NFT-LIM-01); validate positive shape dims and duplicate model_name detection in `__post_init__`. - runtime_root/c10_factory.py: `build_engine_compiler(config)` wires the existing `build_inference_runtime` factory through; `build_backbone_specs(config)` materialises the `BackboneSpec` tuple from the config block. - components/c10_provisioning/__init__.py: re-exports the AZ-321 surface and registers the new config block. Tests - test_engine_compiler.py: covers AC-1..AC-10 + missing-sidecar sibling case for AC-5. Tier-1 via fake runtime that writes through the REAL `Sha256Sidecar.write_atomic_and_sidecar`. Tier-2 placeholders for the cache-hit p99 NFR (200 MB engine sweep) and kill-during-compile atomic-write NFR. Docs - module-layout.md: c10_provisioning Per-Component Mapping lists the new internal modules (engine_compiler.py, config.py), the composition-root c10_factory.py, the AZ-321 public re-export surface, and the registered config block. - batch_33_cycle1_report.md + reviews/batch_33_review.md: PASS_WITH_WARNINGS (4 Low findings accepted). Tests run: c10_provisioning 13 passing + 2 Tier-2 skips; combined unit suite (excluding pending components) 543 passing, 21 env-skipped. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -3,10 +3,45 @@
|
||||
``EngineCacheEntry`` is the C7 canonical DTO (frozen at AZ-297) and
|
||||
lives at the L1 ``_types`` layer so C10 can re-export it without
|
||||
crossing the components.* boundary (architecture rule AC-6).
|
||||
|
||||
The AZ-321 ``EngineCompiler`` plus its DTOs are re-exported here so
|
||||
the composition root and downstream operator-tooling code consume
|
||||
them through this single contract surface.
|
||||
"""
|
||||
|
||||
from gps_denied_onboard._types.inference import EngineCacheEntry
|
||||
from gps_denied_onboard._types.manifests import Manifest
|
||||
from gps_denied_onboard.components.c10_provisioning.interface import CacheProvisioner
|
||||
from gps_denied_onboard.components.c10_provisioning.config import (
|
||||
BackboneConfig,
|
||||
C10ProvisioningConfig,
|
||||
)
|
||||
from gps_denied_onboard.components.c10_provisioning.engine_compiler import (
|
||||
BackboneSpec,
|
||||
CompileEngineCallable,
|
||||
CompileOutcome,
|
||||
EngineCompileRequest,
|
||||
EngineCompileResult,
|
||||
EngineCompileSummary,
|
||||
EngineCompiler,
|
||||
)
|
||||
from gps_denied_onboard.components.c10_provisioning.interface import (
|
||||
CacheProvisioner,
|
||||
)
|
||||
from gps_denied_onboard.config.schema import register_component_block
|
||||
|
||||
__all__ = ["CacheProvisioner", "EngineCacheEntry", "Manifest"]
|
||||
register_component_block("c10_provisioning", C10ProvisioningConfig)
|
||||
|
||||
__all__ = [
|
||||
"BackboneConfig",
|
||||
"BackboneSpec",
|
||||
"C10ProvisioningConfig",
|
||||
"CacheProvisioner",
|
||||
"CompileEngineCallable",
|
||||
"CompileOutcome",
|
||||
"EngineCacheEntry",
|
||||
"EngineCompileRequest",
|
||||
"EngineCompileResult",
|
||||
"EngineCompileSummary",
|
||||
"EngineCompiler",
|
||||
"Manifest",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
"""C10 cache-provisioning config block (AZ-321).
|
||||
|
||||
Registered into ``config.components['c10_provisioning']`` by the
|
||||
package ``__init__.py``. The composition-root factory
|
||||
:func:`gps_denied_onboard.runtime_root.c10_factory.build_engine_compiler`
|
||||
reads this block to enumerate the project's backbones and to bound
|
||||
the workspace memory passed to
|
||||
:meth:`InferenceRuntime.compile_engine`.
|
||||
|
||||
Backbone enumeration is config-driven (not hardcoded) so a new model
|
||||
is a YAML change rather than a code change — see the AZ-321 task
|
||||
spec §Constraints.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from gps_denied_onboard.config.schema import ConfigError
|
||||
|
||||
__all__ = [
|
||||
"BackboneConfig",
|
||||
"C10ProvisioningConfig",
|
||||
]
|
||||
|
||||
|
||||
_DEFAULT_WORKSPACE_MB: int = 4096
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BackboneConfig:
|
||||
"""One backbone the C10 corpus needs an engine for.
|
||||
|
||||
``onnx_path`` is the absolute path to the source ``.onnx`` file;
|
||||
the path is resolved by the composition root, not by this
|
||||
dataclass, so we keep it as a string here for cheap YAML round-
|
||||
trip.
|
||||
|
||||
``expected_input_shape`` is parsed into a
|
||||
:class:`gps_denied_onboard.components.c10_provisioning.engine_compiler.BackboneSpec`
|
||||
at factory time; this dataclass keeps it as a tuple because frozen
|
||||
dataclasses need hashable fields.
|
||||
"""
|
||||
|
||||
model_name: str
|
||||
onnx_path: str
|
||||
expected_input_shape: tuple[int, ...]
|
||||
input_name: str = "input"
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.model_name:
|
||||
raise ConfigError(
|
||||
"BackboneConfig.model_name must be a non-empty string"
|
||||
)
|
||||
if not self.onnx_path:
|
||||
raise ConfigError(
|
||||
f"BackboneConfig({self.model_name!r}).onnx_path must "
|
||||
"be a non-empty string"
|
||||
)
|
||||
if not self.expected_input_shape:
|
||||
raise ConfigError(
|
||||
f"BackboneConfig({self.model_name!r}).expected_input_shape "
|
||||
"must be a non-empty tuple of positive ints"
|
||||
)
|
||||
for dim in self.expected_input_shape:
|
||||
if not isinstance(dim, int) or isinstance(dim, bool) or dim <= 0:
|
||||
raise ConfigError(
|
||||
f"BackboneConfig({self.model_name!r}).expected_input_shape "
|
||||
f"contains non-positive or non-int dim: {dim!r}"
|
||||
)
|
||||
if not self.input_name:
|
||||
raise ConfigError(
|
||||
f"BackboneConfig({self.model_name!r}).input_name must "
|
||||
"be a non-empty string"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class C10ProvisioningConfig:
|
||||
"""Per-component config for C10 cache provisioning.
|
||||
|
||||
``backbones`` enumerates the project's engine corpus; default is
|
||||
empty so a unit test or replay run that has no use for engines
|
||||
can leave this unconfigured. Production deployments populate it
|
||||
via YAML.
|
||||
|
||||
``workspace_mb`` is the per-engine workspace allocation passed
|
||||
into :class:`BuildConfig`; defaults to 4 GiB which matches the
|
||||
C7 NFT-LIM-01 GPU memory budget. Operators can dial it down for
|
||||
Tier-2 compile workstations with less GPU memory.
|
||||
"""
|
||||
|
||||
backbones: tuple[BackboneConfig, ...] = field(default_factory=tuple)
|
||||
workspace_mb: int = _DEFAULT_WORKSPACE_MB
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.workspace_mb <= 0:
|
||||
raise ConfigError(
|
||||
"C10ProvisioningConfig.workspace_mb must be > 0; "
|
||||
f"got {self.workspace_mb}"
|
||||
)
|
||||
seen: set[str] = set()
|
||||
for backbone in self.backbones:
|
||||
if backbone.model_name in seen:
|
||||
raise ConfigError(
|
||||
"C10ProvisioningConfig.backbones contains duplicate "
|
||||
f"model_name {backbone.model_name!r}"
|
||||
)
|
||||
seen.add(backbone.model_name)
|
||||
@@ -0,0 +1,407 @@
|
||||
"""C10 ``EngineCompiler`` — per-model TRT compile + hardware-tied cache reuse (AZ-321).
|
||||
|
||||
Public surface frozen by `_docs/02_document/components/11_c10_provisioning/description.md`
|
||||
§5 (error handling) + §7 (D-C10-6 calibration-cache reuse, D-C10-7 self-describing
|
||||
filename).
|
||||
|
||||
Responsibilities
|
||||
----------------
|
||||
|
||||
For every :class:`BackboneSpec` in :class:`EngineCompileRequest` the
|
||||
compiler:
|
||||
|
||||
1. Computes the canonical engine filename via AZ-281
|
||||
:class:`EngineFilenameSchema` from the host's
|
||||
:class:`HostCapabilities` plus the request precision.
|
||||
2. If the engine is already on disk at
|
||||
``{cache_root}/{filename}`` AND
|
||||
:meth:`Sha256Sidecar.verify` returns ``True`` for that path:
|
||||
treats it as a cache hit (``CompileOutcome.REUSED``) and returns a
|
||||
canonical :class:`EngineCacheEntry` synthesised from the sidecar.
|
||||
Zero calls to the injected :class:`InferenceRuntime`.
|
||||
3. Otherwise delegates to
|
||||
:meth:`InferenceRuntime.compile_engine` (AZ-298 / AZ-299 / AZ-300
|
||||
own the write path; the runtime atomically writes both the
|
||||
``.engine`` binary and its ``.sha256`` sidecar). The compiler does
|
||||
NOT double-write the file — the task spec's "engine bytes are
|
||||
returned by compile_engine then written via the sidecar" wording
|
||||
contradicts the actual AZ-297 Protocol (``compile_engine`` returns
|
||||
an :class:`EngineCacheEntry`, not raw bytes); the Protocol shipped
|
||||
first and wins.
|
||||
|
||||
Hardware-tied cache reuse (D-C10-6) is satisfied by the filename
|
||||
construction: an engine compiled on ``(sm=87, jp=6.2, trt=10.3, fp16)``
|
||||
lives at a different path than one compiled on
|
||||
``(sm=89, jp=6.3, trt=10.5, fp16)`` so a hardware change naturally
|
||||
forces a rebuild — the compiler does NOT load nor delete stale
|
||||
engines (AC-4).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from gps_denied_onboard._types.inference import (
|
||||
BuildConfig,
|
||||
EngineCacheEntry,
|
||||
OptimizationProfile,
|
||||
PrecisionMode,
|
||||
)
|
||||
from gps_denied_onboard._types.manifests import HostCapabilities
|
||||
from gps_denied_onboard.helpers.engine_filename_schema import (
|
||||
EngineFilenameSchema,
|
||||
)
|
||||
from gps_denied_onboard.helpers.sha256_sidecar import (
|
||||
Sha256Sidecar,
|
||||
Sha256SidecarError,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"BackboneSpec",
|
||||
"CompileEngineCallable",
|
||||
"CompileOutcome",
|
||||
"EngineCompileRequest",
|
||||
"EngineCompileResult",
|
||||
"EngineCompileSummary",
|
||||
"EngineCompiler",
|
||||
]
|
||||
|
||||
|
||||
_DEFAULT_WORKSPACE_MB: int = 4096
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class CompileEngineCallable(Protocol):
|
||||
"""Structural cut of the C7 ``InferenceRuntime`` Protocol (AZ-297).
|
||||
|
||||
The compiler only ever calls
|
||||
:meth:`InferenceRuntime.compile_engine`, so it accepts any object
|
||||
that structurally satisfies this narrow Protocol. This keeps the
|
||||
c10 component free of cross-component imports (architecture rule
|
||||
``test_az270_compose_root.test_ac6``) while still letting the real
|
||||
:class:`gps_denied_onboard.components.c7_inference.InferenceRuntime`
|
||||
plug in unchanged via duck typing — the composition root wires the
|
||||
concrete strategy in. Same dual-Protocol pattern used by the
|
||||
LightGlue ``EngineHandle`` consumer cut in ``_types/manifests.py``.
|
||||
"""
|
||||
|
||||
def compile_engine(
|
||||
self, model_path: Path, build_config: BuildConfig
|
||||
) -> EngineCacheEntry: ...
|
||||
|
||||
|
||||
class CompileOutcome(str, Enum):
|
||||
"""Per-backbone outcome of one ``compile_engines_for_corpus`` call."""
|
||||
|
||||
BUILT = "built"
|
||||
REUSED = "reused"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class BackboneSpec:
|
||||
"""One model the corpus needs an engine for.
|
||||
|
||||
``input_name`` defaults to ``"input"`` because most exported ONNX
|
||||
graphs in this project use that name; backbones with a different
|
||||
input name must override it. ``expected_input_shape`` is used to
|
||||
synthesise a single :class:`OptimizationProfile` with
|
||||
``min == opt == max``; backbones that need explicit dynamic ranges
|
||||
should be split into separate :class:`OptimizationProfile`-aware
|
||||
helpers and supplied via ``custom_profiles`` (out of scope for the
|
||||
AZ-321 corpus; reserved for a later extension).
|
||||
"""
|
||||
|
||||
model_name: str
|
||||
onnx_path: Path
|
||||
expected_input_shape: tuple[int, ...]
|
||||
input_name: str = "input"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EngineCompileRequest:
|
||||
"""Inputs to one ``compile_engines_for_corpus`` invocation.
|
||||
|
||||
``host`` is passed in (rather than introspected via the runtime)
|
||||
because the AZ-297 :class:`InferenceRuntime` Protocol does not
|
||||
expose host-info; the composition root resolves
|
||||
:class:`HostCapabilities` from device probes (Tier-2) or test
|
||||
fixtures (Tier-1) and threads it through here. This keeps the
|
||||
compiler decoupled from the runtime's introspection surface and
|
||||
makes the AC-4 (hardware change) test trivial.
|
||||
"""
|
||||
|
||||
backbones: tuple[BackboneSpec, ...]
|
||||
calibration_path: Path | None
|
||||
cache_root: Path
|
||||
precision: PrecisionMode
|
||||
host: HostCapabilities
|
||||
workspace_mb: int = _DEFAULT_WORKSPACE_MB
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EngineCompileResult:
|
||||
"""One backbone's outcome record after ``compile_engines_for_corpus``.
|
||||
|
||||
``entry`` is the canonical
|
||||
:class:`gps_denied_onboard._types.inference.EngineCacheEntry` —
|
||||
same shape whether the engine was freshly built or reused. The
|
||||
surrounding ``outcome`` + ``compile_duration_s`` are c10-local
|
||||
bookkeeping (the AZ-321 task spec called this combined record
|
||||
``EngineCacheEntry`` but that name is already taken by the AZ-297
|
||||
canonical DTO; the canonical shape wins and the wrapper takes a
|
||||
new name).
|
||||
"""
|
||||
|
||||
entry: EngineCacheEntry
|
||||
outcome: CompileOutcome
|
||||
compile_duration_s: float | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EngineCompileSummary:
|
||||
"""Aggregate counts surfaced via the ``c10.engine.compile.summary`` log."""
|
||||
|
||||
engines_built: int
|
||||
engines_reused: int
|
||||
cache_hit_ratio: float
|
||||
|
||||
|
||||
class EngineCompiler:
|
||||
"""Compile or reuse TensorRT engines for every backbone in a corpus.
|
||||
|
||||
The compiler is stateless across calls; ``__init__`` only injects
|
||||
the collaborators it cannot construct itself
|
||||
(the :class:`InferenceRuntime` is composition-root-owned; the
|
||||
logger is named per component).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
inference_runtime: CompileEngineCallable,
|
||||
logger: logging.Logger,
|
||||
) -> None:
|
||||
self._runtime = inference_runtime
|
||||
self._log = logger
|
||||
|
||||
def compile_engines_for_corpus(
|
||||
self, request: EngineCompileRequest
|
||||
) -> tuple[EngineCompileResult, ...]:
|
||||
"""Compile or reuse one engine per backbone in ``request.backbones``.
|
||||
|
||||
Empty ``backbones`` → empty result and a summary log with
|
||||
all-zero counts (AC-10). Errors from
|
||||
:meth:`InferenceRuntime.compile_engine` are NOT caught here —
|
||||
they propagate to the caller (AC-6 / AC-7). Side effects on
|
||||
backbones implemented before the failing one are visible on
|
||||
disk; the compiler does NOT roll back (AZ-298's atomic-write
|
||||
guarantees no half-engine).
|
||||
"""
|
||||
|
||||
engines_dir = request.cache_root
|
||||
engines_dir.mkdir(parents=True, exist_ok=True)
|
||||
results: list[EngineCompileResult] = []
|
||||
for backbone in request.backbones:
|
||||
result = self._compile_one(backbone, request)
|
||||
results.append(result)
|
||||
|
||||
summary = _summarise(results)
|
||||
self._log.info(
|
||||
"c10.engine.compile.summary",
|
||||
extra={
|
||||
"kind": "c10.engine.compile.summary",
|
||||
"kv": {
|
||||
"engines_built": summary.engines_built,
|
||||
"engines_reused": summary.engines_reused,
|
||||
"cache_hit_ratio": summary.cache_hit_ratio,
|
||||
"total": len(results),
|
||||
},
|
||||
},
|
||||
)
|
||||
return tuple(results)
|
||||
|
||||
def _compile_one(
|
||||
self,
|
||||
backbone: BackboneSpec,
|
||||
request: EngineCompileRequest,
|
||||
) -> EngineCompileResult:
|
||||
filename = EngineFilenameSchema.build(
|
||||
model_name=backbone.model_name,
|
||||
sm=request.host.sm,
|
||||
jetpack=request.host.jetpack,
|
||||
trt=request.host.trt,
|
||||
precision=request.precision.value,
|
||||
)
|
||||
target_path = request.cache_root / filename
|
||||
|
||||
cache_hit_entry = self._maybe_reuse(
|
||||
target_path, backbone, request
|
||||
)
|
||||
if cache_hit_entry is not None:
|
||||
self._log.info(
|
||||
"c10.engine.cache.hit",
|
||||
extra={
|
||||
"kind": "c10.engine.cache.hit",
|
||||
"kv": {
|
||||
"model_name": backbone.model_name,
|
||||
"engine_path": str(target_path),
|
||||
},
|
||||
},
|
||||
)
|
||||
return EngineCompileResult(
|
||||
entry=cache_hit_entry,
|
||||
outcome=CompileOutcome.REUSED,
|
||||
compile_duration_s=None,
|
||||
)
|
||||
|
||||
self._log.warning(
|
||||
"c10.engine.cache.miss",
|
||||
extra={
|
||||
"kind": "c10.engine.cache.miss",
|
||||
"kv": {
|
||||
"model_name": backbone.model_name,
|
||||
"target_filename": filename,
|
||||
},
|
||||
},
|
||||
)
|
||||
build_config = _build_config_for_backbone(backbone, request)
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
entry = self._runtime.compile_engine(
|
||||
backbone.onnx_path, build_config
|
||||
)
|
||||
except Exception as exc:
|
||||
# The C7 InferenceRuntime contract scopes exceptions to its
|
||||
# `RuntimeError` family (`EngineBuildError`,
|
||||
# `CalibrationCacheError`, ...). The c10 layer is forbidden
|
||||
# from importing the c7 errors module (architecture rule
|
||||
# AC-6 / test_az270_compose_root.test_ac6); we catch the
|
||||
# broader `Exception` and dispatch by class name in the log
|
||||
# payload. Re-raising preserves the original type.
|
||||
self._log.error(
|
||||
"c10.engine.compile.error",
|
||||
extra={
|
||||
"kind": "c10.engine.compile.error",
|
||||
"kv": {
|
||||
"model_name": backbone.model_name,
|
||||
"calibration_path": (
|
||||
str(request.calibration_path)
|
||||
if request.calibration_path is not None
|
||||
else None
|
||||
),
|
||||
"error_class": type(exc).__name__,
|
||||
"message": str(exc),
|
||||
},
|
||||
},
|
||||
)
|
||||
raise
|
||||
elapsed_s = time.perf_counter() - t0
|
||||
return EngineCompileResult(
|
||||
entry=entry,
|
||||
outcome=CompileOutcome.BUILT,
|
||||
compile_duration_s=elapsed_s,
|
||||
)
|
||||
|
||||
def _maybe_reuse(
|
||||
self,
|
||||
target_path: Path,
|
||||
backbone: BackboneSpec,
|
||||
request: EngineCompileRequest,
|
||||
) -> EngineCacheEntry | None:
|
||||
"""Return a synthesised :class:`EngineCacheEntry` on cache hit; ``None`` on miss.
|
||||
|
||||
Side effect: emits a WARN log on a tampered / missing sidecar
|
||||
(the engine file exists but its sidecar is invalid). The
|
||||
recompile-on-miss branch is owned by the caller.
|
||||
"""
|
||||
|
||||
if not target_path.exists():
|
||||
return None
|
||||
try:
|
||||
verified = Sha256Sidecar.verify(target_path)
|
||||
except Sha256SidecarError as exc:
|
||||
self._log.warning(
|
||||
"c10.engine.sidecar.mismatch",
|
||||
extra={
|
||||
"kind": "c10.engine.sidecar.mismatch",
|
||||
"kv": {
|
||||
"model_name": backbone.model_name,
|
||||
"engine_path": str(target_path),
|
||||
"reason": str(exc),
|
||||
},
|
||||
},
|
||||
)
|
||||
return None
|
||||
if not verified:
|
||||
self._log.warning(
|
||||
"c10.engine.sidecar.mismatch",
|
||||
extra={
|
||||
"kind": "c10.engine.sidecar.mismatch",
|
||||
"kv": {
|
||||
"model_name": backbone.model_name,
|
||||
"engine_path": str(target_path),
|
||||
"reason": "digest_mismatch",
|
||||
},
|
||||
},
|
||||
)
|
||||
return None
|
||||
sidecar_text = (
|
||||
Path(str(target_path) + ".sha256").read_text().strip()
|
||||
)
|
||||
return EngineCacheEntry(
|
||||
engine_path=target_path,
|
||||
sha256_hex=sidecar_text,
|
||||
sm=request.host.sm,
|
||||
jp=request.host.jetpack,
|
||||
trt=request.host.trt,
|
||||
precision=request.precision,
|
||||
extras={},
|
||||
)
|
||||
|
||||
|
||||
def _build_config_for_backbone(
|
||||
backbone: BackboneSpec, request: EngineCompileRequest
|
||||
) -> BuildConfig:
|
||||
"""Synthesise a :class:`BuildConfig` from a :class:`BackboneSpec`.
|
||||
|
||||
Constructs exactly one :class:`OptimizationProfile` with
|
||||
``min == opt == max == expected_input_shape``; backbones with
|
||||
dynamic input ranges are out of scope for AZ-321 and would need
|
||||
a richer ``BackboneSpec`` variant.
|
||||
"""
|
||||
|
||||
profile = OptimizationProfile(
|
||||
input_name=backbone.input_name,
|
||||
min_shape=backbone.expected_input_shape,
|
||||
opt_shape=backbone.expected_input_shape,
|
||||
max_shape=backbone.expected_input_shape,
|
||||
)
|
||||
return BuildConfig(
|
||||
precision=request.precision,
|
||||
workspace_mb=request.workspace_mb,
|
||||
calibration_dataset=request.calibration_path,
|
||||
optimization_profiles=(profile,),
|
||||
)
|
||||
|
||||
|
||||
def _summarise(
|
||||
results: list[EngineCompileResult],
|
||||
) -> EngineCompileSummary:
|
||||
built = sum(
|
||||
1 for r in results if r.outcome is CompileOutcome.BUILT
|
||||
)
|
||||
reused = sum(
|
||||
1 for r in results if r.outcome is CompileOutcome.REUSED
|
||||
)
|
||||
total = len(results)
|
||||
ratio = reused / total if total > 0 else 0.0
|
||||
return EngineCompileSummary(
|
||||
engines_built=built,
|
||||
engines_reused=reused,
|
||||
cache_hit_ratio=ratio,
|
||||
)
|
||||
@@ -0,0 +1,85 @@
|
||||
"""C10 cache-provisioning factory (AZ-321).
|
||||
|
||||
Composition-root wiring for the AZ-321 :class:`EngineCompiler`. Reads
|
||||
``config.components['c10_provisioning']`` for the backbone corpus,
|
||||
resolves the :class:`InferenceRuntime` strategy via
|
||||
:func:`gps_denied_onboard.runtime_root.inference_factory.build_inference_runtime`,
|
||||
and returns a ready-to-call :class:`EngineCompiler`.
|
||||
|
||||
Backbone resolution is config-driven: the YAML enumerates the
|
||||
project's engine corpus (initially DINOv2-VPR + LightGlue + ALIKED
|
||||
per the AZ-321 task spec); adding a model is a config change rather
|
||||
than a code change.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from gps_denied_onboard.components.c10_provisioning import (
|
||||
BackboneSpec,
|
||||
EngineCompiler,
|
||||
)
|
||||
from gps_denied_onboard.components.c10_provisioning.config import (
|
||||
BackboneConfig,
|
||||
C10ProvisioningConfig,
|
||||
)
|
||||
from gps_denied_onboard.logging import get_logger
|
||||
from gps_denied_onboard.runtime_root.inference_factory import (
|
||||
build_inference_runtime,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.config.schema import Config
|
||||
|
||||
__all__ = [
|
||||
"build_backbone_specs",
|
||||
"build_engine_compiler",
|
||||
]
|
||||
|
||||
|
||||
def build_engine_compiler(config: "Config") -> EngineCompiler:
|
||||
"""Construct a wired :class:`EngineCompiler` from ``config``.
|
||||
|
||||
The factory:
|
||||
|
||||
1. Resolves the :class:`InferenceRuntime` via the existing
|
||||
C7 factory (honouring the ``BUILD_*`` gating and the runtime
|
||||
selection in ``config.components['c7_inference']``).
|
||||
2. Names a c10-scoped structured logger.
|
||||
3. Hands both to :class:`EngineCompiler`.
|
||||
|
||||
The :class:`BackboneSpec` corpus is NOT materialised by this
|
||||
factory — call :func:`build_backbone_specs` separately so the
|
||||
operator binary can pick up the spec list after Step 7 of the
|
||||
autodev flow without dragging an :class:`InferenceRuntime` along.
|
||||
"""
|
||||
|
||||
runtime = build_inference_runtime(config)
|
||||
logger = get_logger("c10_provisioning")
|
||||
return EngineCompiler(inference_runtime=runtime, logger=logger)
|
||||
|
||||
|
||||
def build_backbone_specs(config: "Config") -> tuple[BackboneSpec, ...]:
|
||||
"""Materialise :class:`BackboneSpec` tuple from
|
||||
``config.components['c10_provisioning'].backbones``.
|
||||
|
||||
Resolves each :class:`BackboneConfig` ``onnx_path`` string into
|
||||
an absolute :class:`Path` (validation happened at load time via
|
||||
:meth:`BackboneConfig.__post_init__`).
|
||||
"""
|
||||
|
||||
block: C10ProvisioningConfig = config.components["c10_provisioning"]
|
||||
return tuple(_backbone_spec_from_config(bb) for bb in block.backbones)
|
||||
|
||||
|
||||
def _backbone_spec_from_config(
|
||||
backbone: BackboneConfig,
|
||||
) -> BackboneSpec:
|
||||
return BackboneSpec(
|
||||
model_name=backbone.model_name,
|
||||
onnx_path=Path(backbone.onnx_path),
|
||||
expected_input_shape=tuple(backbone.expected_input_shape),
|
||||
input_name=backbone.input_name,
|
||||
)
|
||||
Reference in New Issue
Block a user