"""Unit tests for AZ-321 :class:`EngineCompiler`. Covers the 10 ACs + 2 NFRs in the AZ-321 task spec. Tier-1 tests use a fake :class:`InferenceRuntime` that writes scripted bytes via the real :class:`Sha256Sidecar` so the cache-hit / cache-miss / tampered- sidecar paths exercise the production helpers. NFR perf + atomic- write skips are Tier-2 placeholders kept for the microbench harness. """ from __future__ import annotations import logging from dataclasses import dataclass, field from pathlib import Path import pytest from gps_denied_onboard._types.inference import ( BuildConfig, EngineCacheEntry, PrecisionMode, ) from gps_denied_onboard._types.manifests import HostCapabilities from gps_denied_onboard.components.c10_provisioning import ( BackboneSpec, CompileOutcome, EngineCompileRequest, EngineCompiler, ) from gps_denied_onboard.components.c7_inference import ( CalibrationCacheError, EngineBuildError, ) from gps_denied_onboard.helpers.engine_filename_schema import ( EngineFilenameSchema, ) from gps_denied_onboard.helpers.sha256_sidecar import Sha256Sidecar # ---------------------------------------------------------------------- # Fixtures # ---------------------------------------------------------------------- _HOST_T2: HostCapabilities = HostCapabilities(sm=87, jetpack="6.2", trt="10.3") _HOST_T2_NEXT: HostCapabilities = HostCapabilities( sm=89, jetpack="6.3", trt="10.5" ) @dataclass class _FakeRuntime: """Stand-in for a real C7 ``InferenceRuntime`` in Tier-1 tests. ``compile_engine`` writes deterministic engine bytes (a tiny payload derived from the model name) via the real :class:`Sha256Sidecar` to the same path the C7 production runtimes would. The compiler under test consumes the returned :class:`EngineCacheEntry` exactly as it would from :class:`TensorrtRuntime`. Behaviour knobs: - ``raise_on``: maps ``model_name`` → exception instance the fake raises instead of writing the file. Used by AC-6 / AC-7 to simulate a failure mid-corpus. - ``calls``: records each ``compile_engine`` call so the cache-hit AC can assert zero invocations. """ cache_root: Path host: HostCapabilities = _HOST_T2 raise_on: dict[str, Exception] = field(default_factory=dict) calls: list[tuple[Path, BuildConfig]] = field(default_factory=list) def compile_engine( self, model_path: Path, build_config: BuildConfig ) -> EngineCacheEntry: self.calls.append((model_path, build_config)) model_name = Path(model_path).stem exc = self.raise_on.get(model_name) if exc is not None: raise exc filename = EngineFilenameSchema.build( model_name=model_name, sm=self.host.sm, jetpack=self.host.jetpack, trt=self.host.trt, precision=build_config.precision.value, ) target_path = self.cache_root / filename target_path.parent.mkdir(parents=True, exist_ok=True) payload = ( f"FAKE-ENGINE:{model_name}:{build_config.precision.value}" ).encode("utf-8") sha_hex = Sha256Sidecar.write_atomic_and_sidecar(target_path, payload) return EngineCacheEntry( engine_path=target_path, sha256_hex=sha_hex, sm=self.host.sm, jp=self.host.jetpack, trt=self.host.trt, precision=build_config.precision, extras={"fake": "true"}, ) @pytest.fixture def cache_root(tmp_path: Path) -> Path: root = tmp_path / "engines" root.mkdir(parents=True, exist_ok=True) return root @pytest.fixture def backbones(tmp_path: Path) -> tuple[BackboneSpec, ...]: onnx_dir = tmp_path / "onnx" onnx_dir.mkdir(parents=True, exist_ok=True) specs: list[BackboneSpec] = [] for model_name in ("dinov2_vpr", "lightglue", "aliked"): onnx_path = onnx_dir / f"{model_name}.onnx" onnx_path.write_bytes(b"ONNX:" + model_name.encode("ascii")) specs.append( BackboneSpec( model_name=model_name, onnx_path=onnx_path, expected_input_shape=(1, 3, 224, 224), ) ) return tuple(specs) @pytest.fixture def logger() -> logging.Logger: return logging.getLogger("test.c10_provisioning") def _request( backbones: tuple[BackboneSpec, ...], cache_root: Path, host: HostCapabilities = _HOST_T2, precision: PrecisionMode = PrecisionMode.FP16, calibration_path: Path | None = None, ) -> EngineCompileRequest: return EngineCompileRequest( backbones=backbones, calibration_path=calibration_path, cache_root=cache_root, precision=precision, host=host, ) def _populate_cache( backbones: tuple[BackboneSpec, ...], cache_root: Path, host: HostCapabilities = _HOST_T2, precision: PrecisionMode = PrecisionMode.FP16, ) -> dict[str, Path]: """Pre-write engine + sidecar for every backbone; return name→path map.""" cache_root.mkdir(parents=True, exist_ok=True) paths: dict[str, Path] = {} for spec in backbones: filename = EngineFilenameSchema.build( model_name=spec.model_name, sm=host.sm, jetpack=host.jetpack, trt=host.trt, precision=precision.value, ) target_path = cache_root / filename payload = ( f"PRE-WRITTEN:{spec.model_name}:{precision.value}" ).encode("utf-8") Sha256Sidecar.write_atomic_and_sidecar(target_path, payload) paths[spec.model_name] = target_path return paths # ---------------------------------------------------------------------- # AC-1: cold cache compiles every backbone # ---------------------------------------------------------------------- def test_ac1_cold_cache_compiles_every_backbone( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, caplog: pytest.LogCaptureFixture, ) -> None: # Arrange runtime = _FakeRuntime(cache_root=cache_root) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act with caplog.at_level(logging.DEBUG, logger=logger.name): results = compiler.compile_engines_for_corpus(request) # Assert assert len(results) == 3 for r in results: assert r.outcome is CompileOutcome.BUILT assert r.compile_duration_s is not None assert r.compile_duration_s >= 0.0 assert r.entry.engine_path.exists() sidecar = Path(str(r.entry.engine_path) + ".sha256") assert sidecar.exists() assert Sha256Sidecar.verify(r.entry.engine_path) is True assert len(runtime.calls) == 3 miss_kinds = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.cache.miss" ] summary_kinds = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.compile.summary" ] assert len(miss_kinds) == 3 assert len(summary_kinds) == 1 assert summary_kinds[0].__dict__["kv"]["engines_built"] == 3 assert summary_kinds[0].__dict__["kv"]["engines_reused"] == 0 # ---------------------------------------------------------------------- # AC-2: warm cache reuses every backbone # ---------------------------------------------------------------------- def test_ac2_warm_cache_reuses_every_backbone( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, caplog: pytest.LogCaptureFixture, ) -> None: # Arrange _populate_cache(backbones, cache_root) runtime = _FakeRuntime(cache_root=cache_root) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act with caplog.at_level(logging.DEBUG, logger=logger.name): results = compiler.compile_engines_for_corpus(request) # Assert assert len(results) == 3 for r in results: assert r.outcome is CompileOutcome.REUSED assert r.compile_duration_s is None assert runtime.calls == [] hit_kinds = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.cache.hit" ] summary = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.compile.summary" ] assert len(hit_kinds) == 3 assert len(summary) == 1 assert summary[0].__dict__["kv"]["engines_reused"] == 3 # ---------------------------------------------------------------------- # AC-3: mixed cache (1 hit + 2 miss) # ---------------------------------------------------------------------- def test_ac3_mixed_cache_hits_and_misses( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, ) -> None: # Arrange only_dinov2 = (backbones[0],) _populate_cache(only_dinov2, cache_root) runtime = _FakeRuntime(cache_root=cache_root) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act results = compiler.compile_engines_for_corpus(request) # Assert outcomes = {r.entry.engine_path.name: r.outcome for r in results} dinov2_outcomes = [ v for k, v in outcomes.items() if k.startswith("dinov2_vpr__") ] other_outcomes = [ v for k, v in outcomes.items() if not k.startswith("dinov2_vpr__") ] assert dinov2_outcomes == [CompileOutcome.REUSED] assert other_outcomes.count(CompileOutcome.BUILT) == 2 assert len(runtime.calls) == 2 # ---------------------------------------------------------------------- # AC-4: hardware change invalidates cache (all rebuilt; old files untouched) # ---------------------------------------------------------------------- def test_ac4_hardware_change_invalidates_cache( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, ) -> None: # Arrange old_paths = _populate_cache(backbones, cache_root, host=_HOST_T2) runtime = _FakeRuntime(cache_root=cache_root, host=_HOST_T2_NEXT) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root, host=_HOST_T2_NEXT) # Act results = compiler.compile_engines_for_corpus(request) # Assert for r in results: assert r.outcome is CompileOutcome.BUILT for old_path in old_paths.values(): assert old_path.exists(), ( f"old engine {old_path} should be untouched on hardware change" ) # ---------------------------------------------------------------------- # AC-5: tampered sidecar invalidates that one engine # ---------------------------------------------------------------------- def test_ac5_tampered_sidecar_invalidates_that_engine( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, caplog: pytest.LogCaptureFixture, ) -> None: # Arrange paths = _populate_cache(backbones, cache_root) tampered = paths["lightglue"] sidecar = Path(str(tampered) + ".sha256") sidecar.write_text( "0" * 64 ) runtime = _FakeRuntime(cache_root=cache_root) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act with caplog.at_level(logging.WARNING, logger=logger.name): results = compiler.compile_engines_for_corpus(request) # Assert outcome_by_name = { Path(r.entry.engine_path).stem.split("__")[0]: r.outcome for r in results } assert outcome_by_name["dinov2_vpr"] is CompileOutcome.REUSED assert outcome_by_name["lightglue"] is CompileOutcome.BUILT assert outcome_by_name["aliked"] is CompileOutcome.REUSED mismatch_kinds = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.sidecar.mismatch" ] assert len(mismatch_kinds) == 1 assert ( mismatch_kinds[0].__dict__["kv"]["model_name"] == "lightglue" ) # ---------------------------------------------------------------------- # AC-6: ``EngineBuildError`` propagates without partial state corruption # ---------------------------------------------------------------------- def test_ac6_engine_build_error_propagates_and_third_backbone_untouched( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, ) -> None: # Arrange pre_populated = _populate_cache((backbones[0],), cache_root) runtime = _FakeRuntime( cache_root=cache_root, raise_on={"lightglue": EngineBuildError("CUDA OOM")}, ) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act + Assert with pytest.raises(EngineBuildError, match="CUDA OOM"): compiler.compile_engines_for_corpus(request) # Backbone 1 reused → untouched on disk assert pre_populated["dinov2_vpr"].exists() # Backbone 2 raised before write → no half-engine on disk aliked_filename = EngineFilenameSchema.build( model_name="aliked", sm=_HOST_T2.sm, jetpack=_HOST_T2.jetpack, trt=_HOST_T2.trt, precision="fp16", ) assert not (cache_root / aliked_filename).exists() # Backbone 2 was attempted once; backbone 3 never reached assert [c[0].stem for c in runtime.calls] == ["lightglue"] # ---------------------------------------------------------------------- # AC-7: ``CalibrationCacheError`` propagates with diagnostic # ---------------------------------------------------------------------- def test_ac7_calibration_cache_error_propagates( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, caplog: pytest.LogCaptureFixture, tmp_path: Path, ) -> None: # Arrange calibration_path = tmp_path / "calib_dataset" calibration_path.mkdir(parents=True, exist_ok=True) runtime = _FakeRuntime( cache_root=cache_root, raise_on={ "dinov2_vpr": CalibrationCacheError( "calibration table missing for INT8" ) }, ) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request( backbones, cache_root, precision=PrecisionMode.INT8, calibration_path=calibration_path, ) # Act + Assert with caplog.at_level(logging.ERROR, logger=logger.name): with pytest.raises( CalibrationCacheError, match="calibration table" ): compiler.compile_engines_for_corpus(request) error_kinds = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.compile.error" ] assert len(error_kinds) == 1 kv = error_kinds[0].__dict__["kv"] assert kv["model_name"] == "dinov2_vpr" assert kv["calibration_path"] == str(calibration_path) assert kv["error_class"] == "CalibrationCacheError" # ---------------------------------------------------------------------- # AC-8: filename + sidecar layout matches AZ-281 schema # ---------------------------------------------------------------------- def test_ac8_filename_and_sidecar_layout( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, ) -> None: # Arrange runtime = _FakeRuntime(cache_root=cache_root) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act results = compiler.compile_engines_for_corpus(request) # Assert dinov2 = next( r for r in results if Path(r.entry.engine_path).stem.startswith("dinov2_vpr") ) assert ( dinov2.entry.engine_path.name == "dinov2_vpr__sm87_jp6.2_trt10.3_fp16.engine" ) sidecar = Path(str(dinov2.entry.engine_path) + ".sha256") assert sidecar.exists() assert len(sidecar.read_text().strip()) == 64 parsed = EngineFilenameSchema.parse(dinov2.entry.engine_path.name) assert parsed.sm == 87 assert parsed.jetpack == "6.2" assert parsed.trt == "10.3" assert parsed.precision == "fp16" assert Sha256Sidecar.verify(dinov2.entry.engine_path) is True # ---------------------------------------------------------------------- # AC-9: compile_duration_s recorded for ``built``, ``None`` for ``reused`` # ---------------------------------------------------------------------- def test_ac9_compile_duration_recorded_for_built_only( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, ) -> None: # Arrange _populate_cache((backbones[0],), cache_root) runtime = _FakeRuntime(cache_root=cache_root) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act results = compiler.compile_engines_for_corpus(request) # Assert for r in results: if r.outcome is CompileOutcome.BUILT: assert r.compile_duration_s is not None assert r.compile_duration_s >= 0.0 assert isinstance(r.compile_duration_s, float) else: assert r.compile_duration_s is None # ---------------------------------------------------------------------- # AC-10: empty backbones returns empty result with no side effects # ---------------------------------------------------------------------- def test_ac10_empty_backbones_returns_empty( cache_root: Path, logger: logging.Logger, caplog: pytest.LogCaptureFixture, ) -> None: # Arrange runtime = _FakeRuntime(cache_root=cache_root) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request((), cache_root) # Act with caplog.at_level(logging.DEBUG, logger=logger.name): results = compiler.compile_engines_for_corpus(request) # Assert assert results == () assert runtime.calls == [] assert list(cache_root.iterdir()) == [] summary = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.compile.summary" ] assert len(summary) == 1 kv = summary[0].__dict__["kv"] assert kv["engines_built"] == 0 assert kv["engines_reused"] == 0 assert kv["total"] == 0 # ---------------------------------------------------------------------- # Sidecar-missing path (AC-5 sibling): engine on disk but no sidecar at all # ---------------------------------------------------------------------- def test_missing_sidecar_treated_as_cache_miss( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, caplog: pytest.LogCaptureFixture, ) -> None: # Arrange paths = _populate_cache(backbones, cache_root) sidecar = Path(str(paths["lightglue"]) + ".sha256") sidecar.unlink() runtime = _FakeRuntime(cache_root=cache_root) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act with caplog.at_level(logging.WARNING, logger=logger.name): results = compiler.compile_engines_for_corpus(request) # Assert outcome_by_name = { Path(r.entry.engine_path).stem.split("__")[0]: r.outcome for r in results } assert outcome_by_name["lightglue"] is CompileOutcome.BUILT mismatch_kinds = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.sidecar.mismatch" ] assert any( rec.__dict__["kv"]["model_name"] == "lightglue" for rec in mismatch_kinds ) # ---------------------------------------------------------------------- # AZ-507 AC-3: non-typed exceptions propagate without the compile.error log # ---------------------------------------------------------------------- def test_az507_ac3_non_typed_exception_propagates_without_structured_log( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, caplog: pytest.LogCaptureFixture, ) -> None: # Arrange — the runtime raises a stdlib RuntimeError, which is NOT # in the C7 typed-error envelope. AZ-507 narrows the catch to # `(EngineBuildError, CalibrationCacheError)` so the unknown error # must propagate unchanged and the c10.engine.compile.error log # must NOT fire (the structured log is the typed-failure contract, # not a catch-all). runtime = _FakeRuntime( cache_root=cache_root, raise_on={"dinov2_vpr": RuntimeError("unexpected programmer error")}, ) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act + Assert — propagation with caplog.at_level(logging.ERROR, logger=logger.name): with pytest.raises(RuntimeError, match="unexpected programmer error"): compiler.compile_engines_for_corpus(request) # Assert — no structured compile.error log for the unknown type error_kinds = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.compile.error" ] assert error_kinds == [] def test_az507_ac3_typed_exception_still_logs_structured_diagnostic( cache_root: Path, backbones: tuple[BackboneSpec, ...], logger: logging.Logger, caplog: pytest.LogCaptureFixture, ) -> None: # Arrange — typed C7 error MUST still produce the structured log # and re-raise (regression guard for AZ-321's diagnostic contract # that AZ-507 must not break). runtime = _FakeRuntime( cache_root=cache_root, raise_on={"dinov2_vpr": EngineBuildError("typed failure")}, ) compiler = EngineCompiler(inference_runtime=runtime, logger=logger) request = _request(backbones, cache_root) # Act + Assert with caplog.at_level(logging.ERROR, logger=logger.name): with pytest.raises(EngineBuildError, match="typed failure"): compiler.compile_engines_for_corpus(request) error_kinds = [ rec for rec in caplog.records if rec.__dict__.get("kind") == "c10.engine.compile.error" ] assert len(error_kinds) == 1 assert error_kinds[0].__dict__["kv"]["error_class"] == "EngineBuildError" # ---------------------------------------------------------------------- # NFR placeholders (Tier-2 microbench harness owns these on Jetson) # ---------------------------------------------------------------------- _TIER2_REASON = ( "AZ-321 Tier-2 microbench harness owns the cache-hit and atomic-" "write NFR asserts (200 MB engine sweep, kill-during-compile " "scenarios); skipped on Tier-1 CI / macOS dev." ) @pytest.mark.tier2 def test_nfr_perf_cache_hit_p99_under_1500ms_for_200mb_engine() -> None: pytest.skip(_TIER2_REASON) @pytest.mark.tier2 def test_nfr_reliability_atomic_write_no_half_engine_after_kill() -> None: pytest.skip(_TIER2_REASON)