gps-denied-onboard/tests/unit/c7_inference/test_tensorrt_runtime.py

"""AZ-298 — ``TensorrtRuntime`` acceptance tests.

Most production paths (Polygraphy + ``IBuilderConfig`` + ``enqueueV3``
+ CUDA streams) require TensorRT 10.3 + a Tier-2 Jetson host; those
tests are guarded by :data:`_REQUIRE_TENSORRT` and skip cleanly on
Tier-1 / macOS dev. CPU-runnable coverage focuses on the gates that
keep the system safe BEFORE any GPU is touched: protocol conformance
(AC-1), the calibration cache trust pipeline (AC-3 / AC-4), the
EngineGate-first ordering (AC-5), the GPU memory budget (AC-6),
idempotent release (AC-10), and the InferenceError rewrap envelope
(NFR-reliability).
"""

from __future__ import annotations

import hashlib
from pathlib import Path
from typing import Any

import numpy as np
import pytest

from gps_denied_onboard._types.inference import (
    BuildConfig,
    EngineCacheEntry,
    OptimizationProfile,
    PrecisionMode,
)
from gps_denied_onboard._types.thermal import ThermalState
from gps_denied_onboard.components.c7_inference import (
    C7InferenceConfig,
    DeploymentManifest,
    EngineGate,
    EngineSchemaMismatchError,
    HostTuple,
    InferenceError,
    InferenceRuntime,
    OutOfMemoryError,
)
from gps_denied_onboard.components.c7_inference.errors import (
    CalibrationCacheError,
)
from gps_denied_onboard.components.c7_inference.tensorrt_runtime import (
    CALIB_CACHE_DATASET_SHA_SUFFIX,
    CALIB_CACHE_SUFFIX,
    TensorrtRuntime,
    TrtEngineHandle,
    _dataset_content_hash,
    _persist_calibration_cache_sidecars,
    _plan_calibration_cache,
    _predicted_deserialize_bytes,
    _profile_buffer_bytes,
)
from gps_denied_onboard.config.schema import Config
from gps_denied_onboard.helpers.sha256_sidecar import (
    SIDECAR_SUFFIX,
    Sha256Sidecar,
)

try:
    import tensorrt  # type: ignore[import-not-found]  # noqa: F401

    _HAS_TENSORRT = True
except ImportError:
    _HAS_TENSORRT = False

_REQUIRE_TENSORRT = pytest.mark.skipif(
    not _HAS_TENSORRT,
    reason="TensorRT python binding not installed (Tier-2 Jetson only)",
)


_TIER2_HOST = HostTuple(sm=87, jp="6.2", trt="10.3", precision=PrecisionMode.FP16)


# ----------------------------------------------------------------------
# Fixtures.


@pytest.fixture
def config() -> Config:
    return Config.with_blocks(c7_inference=C7InferenceConfig(runtime="tensorrt"))


@pytest.fixture
def runtime_basic(config: Config) -> TensorrtRuntime:
    return TensorrtRuntime(config)


@pytest.fixture
def dataset_dir(tmp_path: Path) -> Path:
    """Materialise a tiny calibration dataset with 3 deterministic images."""
    d = tmp_path / "calib_dataset"
    d.mkdir()
    for idx in range(3):
        (d / f"img_{idx:03d}.bin").write_bytes(
            np.full((3, 4, 4), idx, dtype=np.float32).tobytes()
        )
    return d


def _make_engine_artifact(
    tmp_path: Path,
    *,
    sm: int = 87,
    jp: str = "6.2",
    trt: str = "10.3",
    precision: PrecisionMode = PrecisionMode.FP16,
    payload: bytes = b"fake-engine-bytes",
    extras_buffer_bytes: int | None = 1_024,
) -> tuple[EngineCacheEntry, Path]:
    """Build a (entry, engine_path) pair conforming to the AZ-281 schema."""
    name = (
        f"ultravpr__sm{sm}_jp{jp}_trt{trt}_{precision.value}.engine"
    )
    engine_path = tmp_path / name
    engine_path.write_bytes(payload)
    sha_hex = hashlib.sha256(payload).hexdigest()
    Path(str(engine_path) + SIDECAR_SUFFIX).write_text(sha_hex, encoding="utf-8")
    extras: dict[str, str] = {}
    if extras_buffer_bytes is not None:
        extras["opt_buffer_bytes"] = str(extras_buffer_bytes)
    entry = EngineCacheEntry(
        engine_path=engine_path,
        sha256_hex=sha_hex,
        sm=sm,
        jp=jp,
        trt=trt,
        precision=precision,
        extras=extras,
    )
    return entry, engine_path


def _manifest_for(engine_path: Path) -> DeploymentManifest:
    sha_hex = hashlib.sha256(engine_path.read_bytes()).hexdigest()
    return DeploymentManifest(
        root=engine_path.parent,
        entries={engine_path.name: sha_hex},
    )


# ----------------------------------------------------------------------
# AC-1: Protocol conformance + label (CPU-runnable).


def test_ac1_protocol_conformance(runtime_basic: TensorrtRuntime) -> None:
    assert isinstance(runtime_basic, InferenceRuntime)
    assert runtime_basic.current_runtime_label() == "tensorrt"


# ----------------------------------------------------------------------
# AC-3: stale calibration cache forces rebuild (CPU-runnable).


def test_ac3_stale_calibration_cache_forces_rebuild(
    tmp_path: Path, dataset_dir: Path
) -> None:
    # Arrange — pretend a previous compile produced cache + both sidecars.
    engine_path = tmp_path / "engine.engine"
    cache_path = Path(str(engine_path) + CALIB_CACHE_SUFFIX)
    dataset_sha_sidecar = Path(
        str(engine_path) + CALIB_CACHE_DATASET_SHA_SUFFIX
    )
    cache_path.write_bytes(b"old-cache-payload")
    Sha256Sidecar.write_atomic_and_sidecar(cache_path, b"old-cache-payload")
    dataset_sha_sidecar.write_text(
        "deadbeef" * 8, encoding="utf-8"
    )  # not the current dataset hash
    # Act
    plan = _plan_calibration_cache(engine_path, dataset_dir)
    # Assert
    assert plan.reuse is False
    assert plan.current_hash == _dataset_content_hash(dataset_dir)
    assert plan.current_hash != "deadbeef" * 8


def test_ac3_matching_dataset_hash_reuses_cache(
    tmp_path: Path, dataset_dir: Path
) -> None:
    # Arrange — pretend the calibrator just wrote the cache + correct sidecars.
    engine_path = tmp_path / "engine.engine"
    cache_path = Path(str(engine_path) + CALIB_CACHE_SUFFIX)
    cache_path.write_bytes(b"cache-payload")
    Sha256Sidecar.write_atomic_and_sidecar(cache_path, b"cache-payload")
    current_hash = _dataset_content_hash(dataset_dir)
    dataset_sha_sidecar = Path(
        str(engine_path) + CALIB_CACHE_DATASET_SHA_SUFFIX
    )
    dataset_sha_sidecar.write_text(current_hash, encoding="utf-8")
    # Act
    plan = _plan_calibration_cache(engine_path, dataset_dir)
    # Assert
    assert plan.reuse is True
    assert plan.current_hash == current_hash


# ----------------------------------------------------------------------
# AC-4: corrupted calibration cache raises CalibrationCacheError.


def test_ac4_corrupted_calibration_cache_raises(
    tmp_path: Path, dataset_dir: Path
) -> None:
    # Arrange — cache + dataset sidecars exist but sha256 sidecar mismatches.
    engine_path = tmp_path / "engine.engine"
    cache_path = Path(str(engine_path) + CALIB_CACHE_SUFFIX)
    cache_path.write_bytes(b"real-payload")
    # Wrong sidecar: hash of different bytes.
    Path(str(cache_path) + SIDECAR_SUFFIX).write_text(
        hashlib.sha256(b"tampered").hexdigest(),
        encoding="utf-8",
    )
    Path(
        str(engine_path) + CALIB_CACHE_DATASET_SHA_SUFFIX
    ).write_text(_dataset_content_hash(dataset_dir), encoding="utf-8")
    # Act / Assert
    with pytest.raises(CalibrationCacheError):
        _plan_calibration_cache(engine_path, dataset_dir)


def test_ac4_malformed_dataset_sidecar_raises(
    tmp_path: Path, dataset_dir: Path
) -> None:
    # Arrange — cache + sha sidecar OK, but dataset sidecar contains garbage.
    engine_path = tmp_path / "engine.engine"
    cache_path = Path(str(engine_path) + CALIB_CACHE_SUFFIX)
    cache_path.write_bytes(b"cache-payload")
    Sha256Sidecar.write_atomic_and_sidecar(cache_path, b"cache-payload")
    Path(
        str(engine_path) + CALIB_CACHE_DATASET_SHA_SUFFIX
    ).write_text("not-a-sha256", encoding="utf-8")
    # Act / Assert
    with pytest.raises(CalibrationCacheError, match="malformed"):
        _plan_calibration_cache(engine_path, dataset_dir)


def test_ac4_empty_dataset_raises(tmp_path: Path) -> None:
    # Arrange
    empty = tmp_path / "empty"
    empty.mkdir()
    engine_path = tmp_path / "engine.engine"
    # Act / Assert
    with pytest.raises(CalibrationCacheError, match="empty"):
        _plan_calibration_cache(engine_path, empty)


def test_persist_calibration_cache_sidecars_writes_both(
    tmp_path: Path, dataset_dir: Path
) -> None:
    # Arrange — fake calibrator dropped a binary cache; no sidecars yet.
    engine_path = tmp_path / "engine.engine"
    cache_path = Path(str(engine_path) + CALIB_CACHE_SUFFIX)
    cache_path.write_bytes(b"fresh-cache-bytes")
    plan = _plan_calibration_cache(engine_path, dataset_dir)
    assert plan.reuse is False
    # Act
    _persist_calibration_cache_sidecars(plan)
    # Assert
    assert (
        Path(str(cache_path) + SIDECAR_SUFFIX).read_text(encoding="utf-8").strip()
        == hashlib.sha256(b"fresh-cache-bytes").hexdigest()
    )
    assert plan.dataset_sha_sidecar.read_text(encoding="utf-8") == plan.current_hash


# ----------------------------------------------------------------------
# AC-5: deserialize_engine invokes EngineGate BEFORE any TRT call.


def test_ac5_gate_refusal_precedes_trt_import(
    tmp_path: Path, config: Config
) -> None:
    # Arrange — engine filename says sm=86 but the host tuple is sm=87.
    entry, engine_path = _make_engine_artifact(tmp_path, sm=86)

    # Build a runtime that will fail loudly if _load_trt is called.
    class _ShouldNotImport:
        def __call__(self) -> Any:  # pragma: no cover - assertion path
            raise AssertionError(
                "AC-5: TRT must NOT be loaded before EngineGate.validate"
            )

    runtime = TensorrtRuntime(
        config,
        host_tuple_provider=lambda _precision: _TIER2_HOST,
        manifest_provider=lambda: _manifest_for(engine_path),
    )
    runtime._load_trt = _ShouldNotImport()  # type: ignore[method-assign]
    runtime._load_pycuda = _ShouldNotImport()  # type: ignore[method-assign]
    # Act / Assert
    with pytest.raises(EngineSchemaMismatchError, match="sm=86"):
        runtime.deserialize_engine(entry)


# ----------------------------------------------------------------------
# AC-6: GPU memory budget pre-allocation gate.


def test_ac6_budget_helper_refuses_overshoot(
    runtime_basic: TensorrtRuntime,
) -> None:
    # Arrange
    runtime_basic._resident_bytes = 3 * 1024 * 1024 * 1024  # 3 GiB resident
    # Budget is 4 GiB (config default); predicted 1.2 GiB → over.
    predicted = int(1.2 * 1024 * 1024 * 1024)
    # Act / Assert
    with pytest.raises(OutOfMemoryError, match="ultravpr"):
        runtime_basic._raise_if_over_budget(predicted, "ultravpr.engine")


def test_ac6_budget_helper_accepts_within(
    runtime_basic: TensorrtRuntime,
) -> None:
    # Arrange
    runtime_basic._resident_bytes = 1 * 1024 * 1024 * 1024
    # Act / Assert
    runtime_basic._raise_if_over_budget(2 * 1024 * 1024 * 1024, "ok.engine")


def test_ac6_deserialize_budget_raises_before_trt_load(
    tmp_path: Path, config: Config
) -> None:
    # Arrange — entry stamps 1.2 GiB in extras; runtime already holds 3 GiB.
    entry, engine_path = _make_engine_artifact(
        tmp_path, extras_buffer_bytes=int(1.2 * 1024 * 1024 * 1024)
    )

    class _ShouldNotImport:
        def __call__(self) -> Any:  # pragma: no cover - assertion path
            raise AssertionError(
                "AC-6: TRT must NOT be loaded once budget pre-check raises"
            )

    runtime = TensorrtRuntime(
        config,
        host_tuple_provider=lambda _precision: _TIER2_HOST,
        manifest_provider=lambda: _manifest_for(engine_path),
    )
    runtime._resident_bytes = 3 * 1024 * 1024 * 1024
    runtime._load_trt = _ShouldNotImport()  # type: ignore[method-assign]
    # Act / Assert
    with pytest.raises(OutOfMemoryError, match=entry.engine_path.name):
        runtime.deserialize_engine(entry)
    # Resident state must be unchanged.
    assert runtime._resident_bytes == 3 * 1024 * 1024 * 1024


# ----------------------------------------------------------------------
# AC-10: release_engine is idempotent (CPU-runnable via fake handle).


class _FakeFreeable:
    def __init__(self) -> None:
        self.free_count = 0

    def free(self) -> None:
        self.free_count += 1


def _make_fake_handle(allocated_bytes: int = 1024) -> TrtEngineHandle:
    return TrtEngineHandle(
        engine=_FakeFreeable(),
        exec_context=_FakeFreeable(),
        stream=_FakeFreeable(),
        input_names=("x",),
        output_names=("y",),
        input_buffers={"x": _FakeFreeable()},
        output_buffers={"y": _FakeFreeable()},
        allocated_bytes=allocated_bytes,
        engine_name="fake.engine",
    )


def test_ac10_release_is_idempotent(
    runtime_basic: TensorrtRuntime,
) -> None:
    # Arrange
    handle = _make_fake_handle(allocated_bytes=2048)
    runtime_basic._resident_bytes = 2048
    input_freeable = handle._input_buffers["x"]
    output_freeable = handle._output_buffers["y"]
    # Act — first release.
    runtime_basic.release_engine(handle)
    # Assert
    assert handle._released is True
    assert input_freeable.free_count == 1
    assert output_freeable.free_count == 1
    assert runtime_basic._resident_bytes == 0
    # Act — second release (must be a no-op).
    runtime_basic.release_engine(handle)
    assert input_freeable.free_count == 1
    assert output_freeable.free_count == 1
    assert runtime_basic._resident_bytes == 0


def test_release_engine_ignores_foreign_handle_type(
    runtime_basic: TensorrtRuntime,
) -> None:
    class _Foreign:  # pragma: no cover - sentinel
        pass

    runtime_basic.release_engine(_Foreign())  # type: ignore[arg-type]


# ----------------------------------------------------------------------
# NFR-reliability: every Protocol method rewraps third-party exceptions.


class _FakeStream:
    def __init__(self) -> None:
        self.synced = 0
        self.handle = 0xCAFEBABE

    def synchronize(self) -> None:
        self.synced += 1


class _FakeCuda:
    def __init__(self) -> None:
        self.htod_calls: list[tuple[Any, Any, Any]] = []
        self.dtoh_calls: list[tuple[Any, Any, Any]] = []

    def memcpy_htod_async(self, dst: Any, src: Any, stream: Any) -> None:
        self.htod_calls.append((dst, src, stream))

    def memcpy_dtoh_async(self, dst: Any, src: Any, stream: Any) -> None:
        self.dtoh_calls.append((dst, src, stream))


class _FakeBuffer:
    def __init__(self, address: int) -> None:
        self._address = address

    def __int__(self) -> int:
        return self._address

    def free(self) -> None:  # for release_engine compat
        pass


class _FakeExecContext:
    def __init__(self, output_shape: tuple[int, ...]) -> None:
        self.tensor_addresses: dict[str, int] = {}
        self._output_shape = output_shape
        self.exec_calls = 0

    def set_tensor_address(self, name: str, address: int) -> None:
        self.tensor_addresses[name] = address

    def get_tensor_shape(self, name: str) -> tuple[int, ...]:
        return self._output_shape

    def execute_async_v3(self, stream_handle: int) -> bool:
        self.exec_calls += 1
        return True


class _FakeEngine:
    def __init__(self, dtype: Any) -> None:
        self._dtype = dtype

    def get_tensor_dtype(self, name: str) -> Any:
        return self._dtype


class _FakeTrt:
    """Minimal stand-in for the lazy ``import tensorrt as trt`` call."""

    @staticmethod
    def nptype(dtype: Any) -> Any:
        return dtype


def _make_infer_handle() -> TrtEngineHandle:
    return TrtEngineHandle(
        engine=_FakeEngine(np.float32),
        exec_context=_FakeExecContext((1, 2)),
        stream=_FakeStream(),
        input_names=("x",),
        output_names=("y",),
        input_buffers={"x": _FakeBuffer(0xDEADBEEF)},
        output_buffers={"y": _FakeBuffer(0xBEEFDEAD)},
        allocated_bytes=128,
        engine_name="fake.engine",
    )


def test_infer_orders_h2d_enqueue_d2h_sync(
    runtime_basic: TensorrtRuntime,
) -> None:
    # Arrange
    handle = _make_infer_handle()
    runtime_basic._resident_bytes = handle._allocated_bytes
    fake_cuda = _FakeCuda()
    runtime_basic._load_pycuda = lambda: (fake_cuda, None)  # type: ignore[method-assign]
    runtime_basic._load_trt = lambda: _FakeTrt()  # type: ignore[method-assign]
    inputs = {"x": np.ones((1, 3), dtype=np.float32)}
    # Act
    outputs = runtime_basic.infer(handle, inputs)
    # Assert
    assert len(fake_cuda.htod_calls) == 1
    assert handle._exec_context.exec_calls == 1
    assert len(fake_cuda.dtoh_calls) == 1
    assert handle._stream.synced == 1
    # All HtoD happen before enqueueV3; enqueueV3 before DtoH; DtoH before sync —
    # enforced by the linear control flow inside infer().
    assert set(outputs.keys()) == {"y"}
    assert outputs["y"].dtype == np.float32
    assert outputs["y"].shape == (1, 2)


def test_infer_rewraps_third_party_exception(
    runtime_basic: TensorrtRuntime,
) -> None:
    # Arrange
    class _RaisingContext(_FakeExecContext):
        def execute_async_v3(self, stream_handle: int) -> bool:
            raise RuntimeError("TRT C++ exception: enqueueV3 fault")

    handle = TrtEngineHandle(
        engine=_FakeEngine(np.float32),
        exec_context=_RaisingContext((1, 2)),
        stream=_FakeStream(),
        input_names=("x",),
        output_names=("y",),
        input_buffers={"x": _FakeBuffer(1)},
        output_buffers={"y": _FakeBuffer(2)},
        allocated_bytes=64,
        engine_name="fake.engine",
    )
    runtime_basic._load_pycuda = lambda: (_FakeCuda(), None)  # type: ignore[method-assign]
    runtime_basic._load_trt = lambda: _FakeTrt()  # type: ignore[method-assign]
    # Act / Assert
    with pytest.raises(InferenceError, match="enqueueV3 fault") as exc_info:
        runtime_basic.infer(handle, {"x": np.ones((1, 3), dtype=np.float32)})
    assert isinstance(exc_info.value.__cause__, RuntimeError)


def test_infer_rejects_foreign_handle(runtime_basic: TensorrtRuntime) -> None:
    class _Foreign(_FakeFreeable):
        pass

    with pytest.raises(InferenceError, match="foreign handle"):
        runtime_basic.infer(_Foreign(), {})  # type: ignore[arg-type]


def test_infer_rejects_released_handle(runtime_basic: TensorrtRuntime) -> None:
    handle = _make_infer_handle()
    handle._released = True
    with pytest.raises(InferenceError, match="released handle"):
        runtime_basic.infer(handle, {"x": np.ones((1, 3), dtype=np.float32)})


def test_infer_missing_input_binding_rewraps(
    runtime_basic: TensorrtRuntime,
) -> None:
    handle = _make_infer_handle()
    runtime_basic._load_pycuda = lambda: (_FakeCuda(), None)  # type: ignore[method-assign]
    runtime_basic._load_trt = lambda: _FakeTrt()  # type: ignore[method-assign]
    with pytest.raises(InferenceError, match="missing input binding"):
        runtime_basic.infer(handle, {})


# ----------------------------------------------------------------------
# thermal_state delegation (default-safe + provider-injected).


def test_thermal_state_default_safe(runtime_basic: TensorrtRuntime) -> None:
    # Act
    snapshot = runtime_basic.thermal_state()
    # Assert
    assert isinstance(snapshot, ThermalState)
    assert snapshot.is_telemetry_available is False
    assert snapshot.thermal_throttle_active is False


def test_thermal_state_delegates_to_publisher(config: Config) -> None:
    # Arrange
    canned = ThermalState(
        cpu_temp_c=42.0,
        gpu_temp_c=55.0,
        thermal_throttle_active=True,
        measured_clock_mhz=624,
        measured_at_ns=1_000_000_000,
        is_telemetry_available=True,
    )

    class _Publisher:
        def read(self) -> ThermalState:
            return canned

    runtime = TensorrtRuntime(config, thermal_publisher=_Publisher())
    # Act
    snapshot = runtime.thermal_state()
    # Assert
    assert snapshot is canned


# ----------------------------------------------------------------------
# Helpers: predicted_deserialize_bytes / profile_buffer_bytes / dataset hash.


def test_predicted_deserialize_bytes_uses_extras(tmp_path: Path) -> None:
    # Arrange
    entry, engine_path = _make_engine_artifact(tmp_path, extras_buffer_bytes=2048)
    # Act
    predicted = _predicted_deserialize_bytes(entry)
    # Assert
    assert predicted == engine_path.stat().st_size + 2048


def test_predicted_deserialize_bytes_falls_back_when_extras_missing(
    tmp_path: Path,
) -> None:
    entry, engine_path = _make_engine_artifact(tmp_path, extras_buffer_bytes=None)
    predicted = _predicted_deserialize_bytes(entry)
    assert predicted == engine_path.stat().st_size + 256 * 1024 * 1024


def test_profile_buffer_bytes_sums_opt_shape() -> None:
    profiles = (
        OptimizationProfile(
            input_name="in",
            min_shape=(1, 3, 224, 224),
            opt_shape=(1, 3, 224, 224),
            max_shape=(1, 3, 224, 224),
        ),
    )
    elements = 1 * 3 * 224 * 224
    assert _profile_buffer_bytes(profiles) == elements * 2


def test_dataset_content_hash_changes_with_content(tmp_path: Path) -> None:
    # Arrange
    a = tmp_path / "a"
    a.mkdir()
    (a / "x.bin").write_bytes(b"hello")
    b = tmp_path / "b"
    b.mkdir()
    (b / "x.bin").write_bytes(b"world")
    # Act / Assert
    assert _dataset_content_hash(a) != _dataset_content_hash(b)


# ----------------------------------------------------------------------
# CLI smoke tests — argparse wiring only (no real compile).


def test_cli_build_config_from_json_round_trips() -> None:
    from gps_denied_onboard.components.c7_inference.tensorrt_runtime import (
        _build_config_from_json,
    )

    payload = {
        "precision": "fp16",
        "workspace_mb": 512,
        "optimization_profiles": [
            {
                "input_name": "x",
                "min_shape": [1, 3, 224, 224],
                "opt_shape": [1, 3, 224, 224],
                "max_shape": [1, 3, 224, 224],
            }
        ],
        "use_trtexec": False,
    }
    bc = _build_config_from_json(payload)
    assert isinstance(bc, BuildConfig)
    assert bc.precision is PrecisionMode.FP16
    assert bc.workspace_mb == 512
    assert bc.calibration_dataset is None
    assert len(bc.optimization_profiles) == 1
    assert bc.use_trtexec is False


def test_cli_build_config_int8_requires_calibration_dataset() -> None:
    from gps_denied_onboard.components.c7_inference.tensorrt_runtime import (
        _build_config_from_json,
    )

    payload = {"precision": "int8", "workspace_mb": 512}
    from gps_denied_onboard.components.c7_inference.errors import (
        EngineBuildError,
    )

    with pytest.raises(EngineBuildError, match="calibration_dataset"):
        _build_config_from_json(payload)


# ----------------------------------------------------------------------
# Tier-2 only — real TRT compile / deserialize / infer paths.


_TIER2_REASON = (
    "AZ-298 Tier-2 microbench harness owns the real-engine perf/memory "
    "asserts (C7-PT-01 / C7-PT-02); skipped on Tier-1 CI / macOS dev."
)


@_REQUIRE_TENSORRT
@pytest.mark.tier2
def test_ac1_real_fp16_compile_produces_engine_and_sidecar(
    tmp_path: Path, config: Config
) -> None:  # pragma: no cover - Tier-2 only
    pytest.skip(_TIER2_REASON)


@_REQUIRE_TENSORRT
@pytest.mark.tier2
def test_ac2_int8_compile_reuses_calibration_cache_under_30s(
    tmp_path: Path, config: Config
) -> None:  # pragma: no cover - Tier-2 only
    pytest.skip(_TIER2_REASON)


@_REQUIRE_TENSORRT
@pytest.mark.tier2
def test_ac7_real_infer_records_cuda_event_sequence(
    tmp_path: Path, config: Config
) -> None:  # pragma: no cover - Tier-2 only
    pytest.skip(_TIER2_REASON)


@_REQUIRE_TENSORRT
@pytest.mark.tier2
def test_ac8_per_model_p95_latency_within_budget(
    tmp_path: Path, config: Config
) -> None:  # pragma: no cover - Tier-2 only
    pytest.skip(_TIER2_REASON)


@_REQUIRE_TENSORRT
@pytest.mark.tier2
def test_ac9_concurrent_engine_resident_memory_within_budget(
    tmp_path: Path, config: Config
) -> None:  # pragma: no cover - Tier-2 only
    pytest.skip(_TIER2_REASON)


@_REQUIRE_TENSORRT
@pytest.mark.tier2
def test_nfr_perf_deserialize_p95_under_5s(
    tmp_path: Path, config: Config
) -> None:  # pragma: no cover - Tier-2 only
    pytest.skip(_TIER2_REASON)