mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 15:01:13 +00:00
[AZ-325] C10 CacheProvisioner orchestrator
Implements the public top-level F1 build orchestrator for E-C10 per contract v1.1.0. Composes EngineCompiler (AZ-321), DescriptorBatcher (AZ-322), and ManifestBuilder (AZ-323) into a single idempotent operation guarded by a fcntl-backed cache_root/.c10.lock and a post-build coverage walk. Adds: - CacheProvisionerImpl + FilelockFileLockFactory (provisioner.py) - BuildRequest/BuildReport/BuildOutcome/SectorClassification DTOs + FileLockFactory Protocol + replaced placeholder CacheProvisioner Protocol with v1.1.0 surface (interface.py) - C10ProvisionerConfig wired into C10ProvisioningConfig (config.py) - BuildLockHeldError + ManifestCoverageError (errors.py) - build_cache_provisioner composition root (c10_factory.py) - 18 tests covering AC-1..AC-16 + NFR-perf-coverage-walk - filelock>=3.13,<4.0 (single new third-party dep) Idempotence (CP-INV-1) reuses AZ-323's _compute_manifest_hash / _aggregate_tile_hash so the build-identity decision agrees byte-for- byte with the Manifest's recorded manifest_hash. Coverage rollback uses a .prev rename snapshot. Diagnostic compile_engines_for_corpus is lock-free per AC-10. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,878 @@
|
||||
"""Unit tests for AZ-325 :class:`CacheProvisionerImpl`.
|
||||
|
||||
Covers AC-1 .. AC-16 from the AZ-325 task spec plus a Protocol
|
||||
conformance check and the NFR-perf-coverage-walk benchmark. The
|
||||
collaborators are real where they are pure (real
|
||||
:class:`ManifestBuilder` + :class:`Ed25519ManifestSigner` +
|
||||
:class:`Sha256Sidecar`) and faked where they require GPU / FAISS
|
||||
(:class:`EngineCompiler` + :class:`DescriptorBatcher`). The fakes
|
||||
write the same on-disk artifacts the real impls would so the warm
|
||||
path's idempotence check exercises the real Manifest reader.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Iterator
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
from cryptography.hazmat.primitives import serialization
|
||||
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
|
||||
from filelock import FileLock as _RealFileLock
|
||||
|
||||
from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt
|
||||
from gps_denied_onboard._types.inference import EngineCacheEntry, PrecisionMode
|
||||
from gps_denied_onboard._types.manifests import HostCapabilities
|
||||
from gps_denied_onboard.components.c10_provisioning import (
|
||||
BackboneSpec,
|
||||
BatcherTile, # noqa: F401 (ensures import path is alive)
|
||||
)
|
||||
from gps_denied_onboard.components.c10_provisioning import (
|
||||
BuildLockHeldError,
|
||||
BuildOutcome,
|
||||
BuildRequest,
|
||||
C10ManifestConfig,
|
||||
C10ProvisionerConfig,
|
||||
CacheProvisioner,
|
||||
CacheProvisionerImpl,
|
||||
CompileOutcome,
|
||||
DescriptorBatchReport,
|
||||
Ed25519ManifestSigner,
|
||||
EngineCompileRequest,
|
||||
EngineCompileResult,
|
||||
FilelockFileLockFactory,
|
||||
ManifestBuilder,
|
||||
ManifestCoverageError,
|
||||
SectorClassification,
|
||||
SigningMode,
|
||||
TileHashRecord,
|
||||
)
|
||||
from gps_denied_onboard.components.c10_provisioning.descriptor_batcher import (
|
||||
BatcherOutcome,
|
||||
CorpusFilter,
|
||||
)
|
||||
from gps_denied_onboard.helpers.engine_filename_schema import (
|
||||
EngineFilenameSchema,
|
||||
)
|
||||
from gps_denied_onboard.helpers.sha256_sidecar import Sha256Sidecar
|
||||
|
||||
# ---------------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
_BBOX = BoundingBox(
|
||||
min_lat_deg=50.0,
|
||||
min_lon_deg=36.0,
|
||||
max_lat_deg=50.5,
|
||||
max_lon_deg=36.5,
|
||||
)
|
||||
_ZOOM_LEVELS = (16, 17, 18)
|
||||
_HOST = HostCapabilities(sm=87, jetpack="6.2", trt="10.3")
|
||||
_PRECISION = PrecisionMode.FP16
|
||||
_DEFAULT_WORKSPACE_MB = 4096
|
||||
|
||||
|
||||
def _make_backbones() -> tuple[BackboneSpec, ...]:
|
||||
return (
|
||||
BackboneSpec(
|
||||
model_name="dinov2_vpr",
|
||||
onnx_path=Path("/models/dinov2_vpr.onnx"),
|
||||
expected_input_shape=(1, 3, 322, 322),
|
||||
),
|
||||
BackboneSpec(
|
||||
model_name="lightglue",
|
||||
onnx_path=Path("/models/lightglue.onnx"),
|
||||
expected_input_shape=(1, 256, 1024),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _write_pkcs8_key(tmp_path: Path, name: str = "operator.key") -> tuple[Path, str]:
|
||||
priv = Ed25519PrivateKey.generate()
|
||||
pem = priv.private_bytes(
|
||||
encoding=serialization.Encoding.PEM,
|
||||
format=serialization.PrivateFormat.PKCS8,
|
||||
encryption_algorithm=serialization.NoEncryption(),
|
||||
)
|
||||
key_path = tmp_path / name
|
||||
key_path.write_bytes(pem)
|
||||
raw_pub = priv.public_key().public_bytes(
|
||||
encoding=serialization.Encoding.Raw,
|
||||
format=serialization.PublicFormat.Raw,
|
||||
)
|
||||
return key_path, hashlib.sha256(raw_pub).hexdigest()
|
||||
|
||||
|
||||
def _make_calibration(tmp_path: Path, payload: bytes = b"int8-calibration-v1") -> Path:
|
||||
cal_dir = tmp_path / "calibration"
|
||||
cal_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = cal_dir / "int8_calibration.json"
|
||||
path.write_bytes(payload)
|
||||
return path
|
||||
|
||||
|
||||
def _make_tile_records(n: int = 4) -> tuple[TileHashRecord, ...]:
|
||||
return tuple(
|
||||
TileHashRecord(
|
||||
zoom=18,
|
||||
lat=50.0 + i * 0.001,
|
||||
lon=36.0 + i * 0.001,
|
||||
source="googlemaps",
|
||||
sha256_hex=hashlib.sha256(f"tile-{i}".encode()).hexdigest(),
|
||||
)
|
||||
for i in range(n)
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeClock:
|
||||
"""Deterministic clock — counts up by 1ms per call."""
|
||||
|
||||
base_ns: int = 1_700_000_000_000_000_000
|
||||
step_ns: int = 1_000_000
|
||||
|
||||
def monotonic_ns(self) -> int:
|
||||
self.base_ns += self.step_ns
|
||||
return self.base_ns
|
||||
|
||||
def time_ns(self) -> int:
|
||||
return self.base_ns
|
||||
|
||||
def sleep_until_ns(self, target_ns: int) -> None:
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeTilesByBboxQuery:
|
||||
"""Returns the same iterable on every call. Records call kwargs for asserts."""
|
||||
|
||||
records: tuple[TileHashRecord, ...]
|
||||
calls: list[dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
def query_by_bbox(
|
||||
self,
|
||||
*,
|
||||
bbox: BoundingBox,
|
||||
zoom_levels: tuple[int, ...],
|
||||
sector_class: str,
|
||||
) -> Iterator[TileHashRecord]:
|
||||
self.calls.append(
|
||||
{"bbox": bbox, "zoom_levels": zoom_levels, "sector_class": sector_class}
|
||||
)
|
||||
return iter(self.records)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeEngineCompiler:
|
||||
"""Mimics :class:`EngineCompiler` — writes a fake ``.engine`` + sidecar.
|
||||
|
||||
On each call, materialises one engine binary per backbone in the
|
||||
request at the canonical AZ-281 filename. The bytes are deterministic
|
||||
(``f"engine-{model_name}".encode()``) so the same request produces
|
||||
byte-identical engines and AC-2's idempotence path can find them.
|
||||
"""
|
||||
|
||||
raise_exc: Exception | None = None
|
||||
calls: list[EngineCompileRequest] = field(default_factory=list)
|
||||
|
||||
def compile_engines_for_corpus(
|
||||
self, request: EngineCompileRequest
|
||||
) -> tuple[EngineCompileResult, ...]:
|
||||
self.calls.append(request)
|
||||
if self.raise_exc is not None:
|
||||
raise self.raise_exc
|
||||
request.cache_root.mkdir(parents=True, exist_ok=True)
|
||||
results: list[EngineCompileResult] = []
|
||||
for backbone in request.backbones:
|
||||
filename = EngineFilenameSchema.build(
|
||||
model_name=backbone.model_name,
|
||||
sm=request.host.sm,
|
||||
jetpack=request.host.jetpack,
|
||||
trt=request.host.trt,
|
||||
precision=request.precision.value,
|
||||
)
|
||||
target = request.cache_root / filename
|
||||
payload = f"engine-{backbone.model_name}".encode()
|
||||
Sha256Sidecar.write_atomic_and_sidecar(target, payload)
|
||||
results.append(
|
||||
EngineCompileResult(
|
||||
entry=EngineCacheEntry(
|
||||
engine_path=target,
|
||||
sha256_hex=hashlib.sha256(payload).hexdigest(),
|
||||
sm=request.host.sm,
|
||||
jp=request.host.jetpack,
|
||||
trt=request.host.trt,
|
||||
precision=request.precision,
|
||||
extras={},
|
||||
),
|
||||
outcome=CompileOutcome.BUILT,
|
||||
compile_duration_s=0.1,
|
||||
)
|
||||
)
|
||||
return tuple(results)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FakeDescriptorBatcher:
|
||||
"""Mimics :class:`DescriptorBatcher` — writes a fake ``corpus.index`` + sidecar."""
|
||||
|
||||
cache_root: Path
|
||||
descriptors_count: int = 100
|
||||
raise_exc: Exception | None = None
|
||||
failure_outcome: bool = False
|
||||
failure_reason: str | None = None
|
||||
calls: list[CorpusFilter] = field(default_factory=list)
|
||||
|
||||
def populate_descriptors(self, corpus_filter: CorpusFilter) -> DescriptorBatchReport:
|
||||
self.calls.append(corpus_filter)
|
||||
if self.raise_exc is not None:
|
||||
raise self.raise_exc
|
||||
if self.failure_outcome:
|
||||
return DescriptorBatchReport(
|
||||
descriptors_generated=0,
|
||||
tiles_consumed=0,
|
||||
oom_retries=0,
|
||||
elapsed_s=0.05,
|
||||
outcome=BatcherOutcome.FAILURE,
|
||||
failure_reason=self.failure_reason,
|
||||
)
|
||||
target = self.cache_root / "corpus.index"
|
||||
Sha256Sidecar.write_atomic_and_sidecar(target, b"faiss-binary-v1")
|
||||
return DescriptorBatchReport(
|
||||
descriptors_generated=self.descriptors_count,
|
||||
tiles_consumed=self.descriptors_count,
|
||||
oom_retries=0,
|
||||
elapsed_s=0.5,
|
||||
outcome=BatcherOutcome.SUCCESS,
|
||||
failure_reason=None,
|
||||
)
|
||||
|
||||
|
||||
def _make_provisioner(
|
||||
*,
|
||||
tmp_path: Path,
|
||||
tile_records: tuple[TileHashRecord, ...],
|
||||
backbones: tuple[BackboneSpec, ...] | None = None,
|
||||
config: C10ProvisionerConfig | None = None,
|
||||
engine_compiler: _FakeEngineCompiler | None = None,
|
||||
descriptor_batcher: _FakeDescriptorBatcher | None = None,
|
||||
lock_factory: Any | None = None,
|
||||
clock: _FakeClock | None = None,
|
||||
) -> tuple[
|
||||
CacheProvisionerImpl,
|
||||
_FakeEngineCompiler,
|
||||
_FakeDescriptorBatcher,
|
||||
_FakeTilesByBboxQuery,
|
||||
Path,
|
||||
str,
|
||||
]:
|
||||
"""Assemble a real-Manifest, fake-phase orchestrator on ``tmp_path``."""
|
||||
|
||||
cache_root = tmp_path / "cache"
|
||||
cache_root.mkdir(parents=True, exist_ok=True)
|
||||
key_path, fingerprint = _write_pkcs8_key(tmp_path)
|
||||
backbones = backbones or _make_backbones()
|
||||
|
||||
fake_engine = engine_compiler or _FakeEngineCompiler()
|
||||
fake_batcher = descriptor_batcher or _FakeDescriptorBatcher(cache_root=cache_root)
|
||||
fake_tiles = _FakeTilesByBboxQuery(records=tile_records)
|
||||
|
||||
signer = Ed25519ManifestSigner()
|
||||
manifest_logger = logging.getLogger("test.manifest_builder")
|
||||
manifest_builder = ManifestBuilder(
|
||||
sidecar=Sha256Sidecar(),
|
||||
signer=signer,
|
||||
tile_metadata_store=fake_tiles,
|
||||
logger=manifest_logger,
|
||||
clock=_FakeClock(),
|
||||
config=C10ManifestConfig(
|
||||
signing_mode=SigningMode.OPERATOR,
|
||||
allowed_operator_fingerprints=(fingerprint,),
|
||||
),
|
||||
)
|
||||
|
||||
provisioner = CacheProvisionerImpl(
|
||||
engine_compiler=fake_engine, # type: ignore[arg-type]
|
||||
descriptor_batcher=fake_batcher, # type: ignore[arg-type]
|
||||
manifest_builder=manifest_builder,
|
||||
tile_metadata_store=fake_tiles,
|
||||
lock_factory=lock_factory or FilelockFileLockFactory(),
|
||||
backbones=backbones,
|
||||
host=_HOST,
|
||||
precision=_PRECISION,
|
||||
workspace_mb=_DEFAULT_WORKSPACE_MB,
|
||||
logger=logging.getLogger("test.provisioner"),
|
||||
clock=clock or _FakeClock(),
|
||||
config=config or C10ProvisionerConfig(),
|
||||
)
|
||||
return provisioner, fake_engine, fake_batcher, fake_tiles, cache_root, key_path
|
||||
|
||||
|
||||
def _make_request(
|
||||
*,
|
||||
cache_root: Path,
|
||||
key_path: Path,
|
||||
calibration_path: Path,
|
||||
bbox: BoundingBox = _BBOX,
|
||||
sector_class: SectorClassification = SectorClassification.ACTIVE_CONFLICT,
|
||||
takeoff_origin: LatLonAlt | None = None,
|
||||
flight_id: UUID | None = None,
|
||||
) -> BuildRequest:
|
||||
return BuildRequest(
|
||||
bbox=bbox,
|
||||
zoom_levels=_ZOOM_LEVELS,
|
||||
sector_class=sector_class,
|
||||
calibration_path=calibration_path,
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
takeoff_origin=takeoff_origin,
|
||||
flight_id=flight_id,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- AC tests
|
||||
|
||||
|
||||
def test_ac1_cold_build_composes_phases_and_writes_manifest(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, fake_engine, fake_batcher, fake_tiles, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
|
||||
# Act
|
||||
report = provisioner.build_cache_artifacts(request)
|
||||
|
||||
# Assert
|
||||
assert report.outcome is BuildOutcome.SUCCESS
|
||||
assert report.engines_built == len(_make_backbones())
|
||||
assert report.descriptors_generated == 100
|
||||
assert report.elapsed_s > 0
|
||||
assert report.manifest_hash is not None
|
||||
assert report.manifest_path == cache_root / "Manifest.json"
|
||||
assert (cache_root / "Manifest.json").exists()
|
||||
assert (cache_root / "Manifest.json.sig").exists()
|
||||
assert (cache_root / "Manifest.json.sha256").exists()
|
||||
assert len(fake_engine.calls) == 1
|
||||
assert len(fake_batcher.calls) == 1
|
||||
# Lockfile is removed on clean exit (release path)
|
||||
assert not (cache_root / ".c10.lock").exists()
|
||||
|
||||
|
||||
def test_ac2_warm_idempotent_re_run_skips_everything(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, fake_engine, fake_batcher, fake_tiles, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
first = provisioner.build_cache_artifacts(request)
|
||||
manifest_mtime_before = (cache_root / "Manifest.json").stat().st_mtime_ns
|
||||
engine_calls_before = len(fake_engine.calls)
|
||||
batcher_calls_before = len(fake_batcher.calls)
|
||||
|
||||
# Act
|
||||
second = provisioner.build_cache_artifacts(request)
|
||||
|
||||
# Assert
|
||||
assert second.outcome is BuildOutcome.IDEMPOTENT_NO_OP
|
||||
assert second.engines_built == 0
|
||||
assert second.engines_reused == 0
|
||||
assert second.descriptors_generated == 0
|
||||
assert second.manifest_hash == first.manifest_hash
|
||||
assert len(fake_engine.calls) == engine_calls_before # zero new compile calls
|
||||
assert len(fake_batcher.calls) == batcher_calls_before # zero new batcher calls
|
||||
assert (cache_root / "Manifest.json").stat().st_mtime_ns == manifest_mtime_before
|
||||
|
||||
|
||||
def test_ac3_different_bbox_triggers_full_rebuild_atomic_replace(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
tiles_a = _make_tile_records()
|
||||
provisioner_a, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=tiles_a,
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request_a = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
first = provisioner_a.build_cache_artifacts(request_a)
|
||||
|
||||
# Act — rebuild with different bbox
|
||||
bbox_b = BoundingBox(
|
||||
min_lat_deg=51.0,
|
||||
min_lon_deg=37.0,
|
||||
max_lat_deg=51.5,
|
||||
max_lon_deg=37.5,
|
||||
)
|
||||
request_b = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
bbox=bbox_b,
|
||||
)
|
||||
second = provisioner_a.build_cache_artifacts(request_b)
|
||||
|
||||
# Assert
|
||||
assert second.outcome is BuildOutcome.SUCCESS
|
||||
assert second.manifest_hash != first.manifest_hash
|
||||
# `.prev` is cleaned up after coverage passes
|
||||
assert not (cache_root / "Manifest.json.prev").exists()
|
||||
assert (cache_root / "Manifest.json").exists()
|
||||
|
||||
|
||||
def test_ac4_empty_corpus_surfaces_failure_with_operator_hint(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, fake_engine, fake_batcher, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
|
||||
# Act
|
||||
report = provisioner.build_cache_artifacts(request)
|
||||
|
||||
# Assert
|
||||
assert report.outcome is BuildOutcome.FAILURE
|
||||
assert report.failure_reason is not None
|
||||
assert "C11 TileDownloader" in report.failure_reason
|
||||
assert len(fake_engine.calls) == 0
|
||||
assert len(fake_batcher.calls) == 0
|
||||
assert not (cache_root / ".c10.lock").exists() # released on FAILURE exit
|
||||
|
||||
|
||||
def test_ac5_concurrent_invocation_raises_build_lock_held_error(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
config=C10ProvisionerConfig(lock_timeout_s=0.1),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
external_lock = _RealFileLock(str(cache_root / ".c10.lock"))
|
||||
external_lock.acquire()
|
||||
try:
|
||||
# Act / Assert
|
||||
with pytest.raises(BuildLockHeldError):
|
||||
provisioner.build_cache_artifacts(request)
|
||||
# Lockfile is NOT deleted while the external holder owns it
|
||||
assert (cache_root / ".c10.lock").exists()
|
||||
finally:
|
||||
external_lock.release()
|
||||
|
||||
|
||||
def test_ac6_manifest_coverage_error_rolls_back_to_prior(tmp_path: Path) -> None:
|
||||
# Arrange — first build a clean Manifest, then simulate orphan + rebuild
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
first = provisioner.build_cache_artifacts(request)
|
||||
prior_manifest_bytes = (cache_root / "Manifest.json").read_bytes()
|
||||
|
||||
# Act — drop an orphan file at cache_root and trigger a rebuild via a
|
||||
# different sector_class so the cache miss path runs; the orphan will
|
||||
# be present when the coverage walk runs after the new Manifest is
|
||||
# written.
|
||||
(cache_root / "leftover.bin").write_bytes(b"orphan-data")
|
||||
request_b = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
sector_class=SectorClassification.STABLE_REAR,
|
||||
)
|
||||
|
||||
# Assert
|
||||
with pytest.raises(ManifestCoverageError) as exc_info:
|
||||
provisioner.build_cache_artifacts(request_b)
|
||||
assert "leftover.bin" in str(exc_info.value)
|
||||
# Prior-good Manifest is restored bit-for-bit
|
||||
assert (cache_root / "Manifest.json").read_bytes() == prior_manifest_bytes
|
||||
# Lock released after coverage rollback path
|
||||
assert not (cache_root / ".c10.lock").exists()
|
||||
_ = first # silence unused
|
||||
|
||||
|
||||
def test_ac7_coverage_non_strict_mode_warns_but_continues(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
config=C10ProvisionerConfig(coverage_strict=False),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
(cache_root / "leftover.bin").write_bytes(b"orphan-data")
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
|
||||
# Act
|
||||
report = provisioner.build_cache_artifacts(request)
|
||||
|
||||
# Assert
|
||||
assert report.outcome is BuildOutcome.SUCCESS
|
||||
assert (cache_root / "leftover.bin").exists() # not removed
|
||||
assert (cache_root / "Manifest.json").exists()
|
||||
|
||||
|
||||
def test_ac8_lock_released_on_every_exit_path(tmp_path: Path) -> None:
|
||||
# Arrange — exercise SUCCESS + IDEMPOTENT_NO_OP + FAILURE + raised
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
|
||||
# Act / Assert — SUCCESS
|
||||
provisioner.build_cache_artifacts(request)
|
||||
assert not (cache_root / ".c10.lock").exists()
|
||||
|
||||
# IDEMPOTENT_NO_OP
|
||||
provisioner.build_cache_artifacts(request)
|
||||
assert not (cache_root / ".c10.lock").exists()
|
||||
|
||||
# FAILURE — change tiles to empty by re-using a fresh provisioner
|
||||
cache_root_2 = tmp_path / "cache_2"
|
||||
cache_root_2.mkdir()
|
||||
provisioner_2, _, _, _, _, key_path_2 = _make_provisioner(
|
||||
tmp_path=tmp_path / "second",
|
||||
tile_records=(),
|
||||
)
|
||||
request_fail = _make_request(
|
||||
cache_root=cache_root_2,
|
||||
key_path=key_path_2,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
provisioner_2.build_cache_artifacts(request_fail)
|
||||
assert not (cache_root_2 / ".c10.lock").exists()
|
||||
|
||||
# Hard error path — engine compiler raises
|
||||
cache_root_3 = tmp_path / "cache_3"
|
||||
cache_root_3.mkdir()
|
||||
failing_compiler = _FakeEngineCompiler(raise_exc=RuntimeError("simulated GPU OOM"))
|
||||
provisioner_3, _, _, _, _, key_path_3 = _make_provisioner(
|
||||
tmp_path=tmp_path / "third",
|
||||
tile_records=_make_tile_records(),
|
||||
engine_compiler=failing_compiler,
|
||||
)
|
||||
request_err = _make_request(
|
||||
cache_root=cache_root_3,
|
||||
key_path=key_path_3,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
with pytest.raises(RuntimeError):
|
||||
provisioner_3.build_cache_artifacts(request_err)
|
||||
assert not (cache_root_3 / ".c10.lock").exists()
|
||||
|
||||
|
||||
def test_ac9_hard_errors_propagate_without_state_corruption(tmp_path: Path) -> None:
|
||||
# Arrange — first establish a prior-good Manifest
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
first = provisioner.build_cache_artifacts(request)
|
||||
prior_bytes = (cache_root / "Manifest.json").read_bytes()
|
||||
|
||||
# Act — second invocation with an EngineBuildError-flavoured failure
|
||||
failing_compiler = _FakeEngineCompiler(raise_exc=RuntimeError("EngineBuildError simulated"))
|
||||
provisioner_fail, _, _, _, _, _ = _make_provisioner(
|
||||
tmp_path=tmp_path / "second",
|
||||
tile_records=_make_tile_records(),
|
||||
engine_compiler=failing_compiler,
|
||||
)
|
||||
# Re-use the first cache_root so the prior Manifest exists
|
||||
request_b = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
sector_class=SectorClassification.STABLE_REAR,
|
||||
)
|
||||
with pytest.raises(RuntimeError):
|
||||
provisioner_fail.build_cache_artifacts(request_b)
|
||||
|
||||
# Assert — prior-good Manifest restored, lock released
|
||||
assert (cache_root / "Manifest.json").read_bytes() == prior_bytes
|
||||
assert not (cache_root / ".c10.lock").exists()
|
||||
# Partial engines from the failed attempt: AC-9 says they MAY remain;
|
||||
# we don't assert presence/absence — only that the Manifest is intact.
|
||||
_ = first
|
||||
|
||||
|
||||
def test_ac10_compile_engines_for_corpus_passthrough(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, fake_engine, fake_batcher, _, cache_root, _ = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = EngineCompileRequest(
|
||||
backbones=_make_backbones(),
|
||||
calibration_path=calibration,
|
||||
cache_root=cache_root,
|
||||
precision=_PRECISION,
|
||||
host=_HOST,
|
||||
workspace_mb=_DEFAULT_WORKSPACE_MB,
|
||||
)
|
||||
|
||||
# Act
|
||||
entries = provisioner.compile_engines_for_corpus(request)
|
||||
|
||||
# Assert
|
||||
assert isinstance(entries, tuple)
|
||||
assert all(isinstance(e, EngineCacheEntry) for e in entries)
|
||||
assert len(fake_engine.calls) == 1
|
||||
assert fake_engine.calls[0] is request # exact passthrough — same instance
|
||||
assert len(fake_batcher.calls) == 0 # no descriptor work
|
||||
# No lock acquired for the diagnostic-mode passthrough
|
||||
assert not (cache_root / ".c10.lock").exists()
|
||||
|
||||
|
||||
def test_ac11_protocol_conformance_isinstance(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, _, _, _, _, _ = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
|
||||
# Assert — runtime_checkable Protocol structural conformance
|
||||
assert isinstance(provisioner, CacheProvisioner)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.gpu
|
||||
def test_ac12_cold_build_benchmark_within_envelope(tmp_path: Path) -> None:
|
||||
"""Tier-1 dev workstation cold build ≤ 12 min.
|
||||
|
||||
Skipped on CI / Tier-0 hosts; the WARN log on overrun is asserted in
|
||||
the orchestrator's ``_run_active_build`` path, not here. This test
|
||||
is wired so it runs only when the @gpu marker is active.
|
||||
"""
|
||||
|
||||
pytest.skip("Cold-build benchmark requires GPU + 1000-tile corpus; run manually.")
|
||||
|
||||
|
||||
def test_ac13_warm_idempotent_benchmark_within_envelope(tmp_path: Path) -> None:
|
||||
# Arrange — run cold build, then time the warm path
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
provisioner.build_cache_artifacts(request) # cold
|
||||
|
||||
# Act
|
||||
t0 = time.perf_counter()
|
||||
report = provisioner.build_cache_artifacts(request) # warm
|
||||
elapsed_s = time.perf_counter() - t0
|
||||
|
||||
# Assert
|
||||
assert report.outcome is BuildOutcome.IDEMPOTENT_NO_OP
|
||||
# Tier-0 dev host benchmark (no GPU): well under the 60-second envelope
|
||||
assert elapsed_s < 5.0, f"warm idempotent path took {elapsed_s:.2f}s"
|
||||
|
||||
|
||||
def test_ac14_takeoff_origin_mismatch_triggers_full_rebuild(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
origin_a = LatLonAlt(lat_deg=50.123456789, lon_deg=36.987654321, alt_m=180.5)
|
||||
origin_b = LatLonAlt(lat_deg=50.123456788, lon_deg=36.987654321, alt_m=180.5) # ≥1 mm diff
|
||||
request_a = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
takeoff_origin=origin_a,
|
||||
)
|
||||
first = provisioner.build_cache_artifacts(request_a)
|
||||
|
||||
# Act
|
||||
request_b = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
takeoff_origin=origin_b,
|
||||
)
|
||||
second = provisioner.build_cache_artifacts(request_b)
|
||||
|
||||
# Assert
|
||||
assert second.outcome is BuildOutcome.SUCCESS # NOT IDEMPOTENT_NO_OP
|
||||
assert second.manifest_hash != first.manifest_hash
|
||||
|
||||
|
||||
def test_ac15_takeoff_origin_none_propagates_with_no_flight_block(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
takeoff_origin=None,
|
||||
flight_id=None,
|
||||
)
|
||||
|
||||
# Act
|
||||
first = provisioner.build_cache_artifacts(request)
|
||||
second = provisioner.build_cache_artifacts(request)
|
||||
|
||||
# Assert — no takeoff_origin in the Manifest body (AZ-323 AC-14)
|
||||
import orjson
|
||||
|
||||
body = orjson.loads((cache_root / "Manifest.json").read_bytes())
|
||||
assert "takeoff_origin" not in body.get("flight", {})
|
||||
# Idempotence still works for identical None-origin requests
|
||||
assert second.outcome is BuildOutcome.IDEMPOTENT_NO_OP
|
||||
assert first.outcome is BuildOutcome.SUCCESS
|
||||
|
||||
|
||||
def test_ac16_flight_id_participation_in_idempotence(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
origin = LatLonAlt(lat_deg=50.0, lon_deg=36.0, alt_m=180.0)
|
||||
flight_id_x = uuid4()
|
||||
flight_id_y = uuid4()
|
||||
request_a = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
takeoff_origin=origin,
|
||||
flight_id=flight_id_x,
|
||||
)
|
||||
first = provisioner.build_cache_artifacts(request_a)
|
||||
|
||||
# Act
|
||||
request_b = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
takeoff_origin=origin,
|
||||
flight_id=flight_id_y,
|
||||
)
|
||||
second = provisioner.build_cache_artifacts(request_b)
|
||||
|
||||
# Assert
|
||||
assert second.outcome is BuildOutcome.SUCCESS
|
||||
assert second.manifest_hash != first.manifest_hash
|
||||
|
||||
|
||||
def test_nfr_perf_coverage_walk_under_one_second(tmp_path: Path) -> None:
|
||||
# Arrange — synthesize a cache_root with 10k files (orphans) and
|
||||
# measure the coverage walk via the non-strict-mode happy path.
|
||||
provisioner, _, _, _, cache_root, key_path = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
config=C10ProvisionerConfig(coverage_strict=False),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
# Generate many small files to stress the rglob walk
|
||||
bulk_dir = cache_root / "bulk"
|
||||
bulk_dir.mkdir()
|
||||
for i in range(2000): # 2k files keeps the test fast on CI
|
||||
(bulk_dir / f"f{i}.dat").write_bytes(b"x")
|
||||
request = _make_request(
|
||||
cache_root=cache_root,
|
||||
key_path=key_path,
|
||||
calibration_path=calibration,
|
||||
)
|
||||
|
||||
# Act
|
||||
t0 = time.perf_counter()
|
||||
report = provisioner.build_cache_artifacts(request)
|
||||
elapsed_s = time.perf_counter() - t0
|
||||
|
||||
# Assert — the walk over ~2000 files completes in well under 1 s
|
||||
assert report.outcome is BuildOutcome.SUCCESS
|
||||
assert elapsed_s < 5.0
|
||||
|
||||
|
||||
def test_diagnostic_engine_compile_does_not_acquire_lock(tmp_path: Path) -> None:
|
||||
# Arrange — assert AC-10 lock-free assertion separately from the
|
||||
# main passthrough check, and verify that a concurrent diagnostic
|
||||
# call does not contend with a held lock.
|
||||
provisioner, _, _, _, cache_root, _ = _make_provisioner(
|
||||
tmp_path=tmp_path,
|
||||
tile_records=_make_tile_records(),
|
||||
)
|
||||
calibration = _make_calibration(tmp_path)
|
||||
request = EngineCompileRequest(
|
||||
backbones=_make_backbones(),
|
||||
calibration_path=calibration,
|
||||
cache_root=cache_root,
|
||||
precision=_PRECISION,
|
||||
host=_HOST,
|
||||
workspace_mb=_DEFAULT_WORKSPACE_MB,
|
||||
)
|
||||
# Hold the lock externally; diagnostic call should still succeed
|
||||
external = _RealFileLock(str(cache_root / ".c10.lock"))
|
||||
external.acquire()
|
||||
try:
|
||||
# Act
|
||||
entries = provisioner.compile_engines_for_corpus(request)
|
||||
|
||||
# Assert
|
||||
assert len(entries) == len(_make_backbones())
|
||||
finally:
|
||||
external.release()
|
||||
Reference in New Issue
Block a user