Files
gps-denied-onboard/src/gps_denied_onboard/runtime_root/c10_factory.py
T
Oleksandr Bezdieniezhnykh f7b2e70085 [AZ-325] C10 CacheProvisioner orchestrator
Implements the public top-level F1 build orchestrator for E-C10 per
contract v1.1.0. Composes EngineCompiler (AZ-321), DescriptorBatcher
(AZ-322), and ManifestBuilder (AZ-323) into a single idempotent
operation guarded by a fcntl-backed cache_root/.c10.lock and a
post-build coverage walk.

Adds:
- CacheProvisionerImpl + FilelockFileLockFactory (provisioner.py)
- BuildRequest/BuildReport/BuildOutcome/SectorClassification DTOs +
  FileLockFactory Protocol + replaced placeholder CacheProvisioner
  Protocol with v1.1.0 surface (interface.py)
- C10ProvisionerConfig wired into C10ProvisioningConfig (config.py)
- BuildLockHeldError + ManifestCoverageError (errors.py)
- build_cache_provisioner composition root (c10_factory.py)
- 18 tests covering AC-1..AC-16 + NFR-perf-coverage-walk
- filelock>=3.13,<4.0 (single new third-party dep)

Idempotence (CP-INV-1) reuses AZ-323's _compute_manifest_hash /
_aggregate_tile_hash so the build-identity decision agrees byte-for-
byte with the Manifest's recorded manifest_hash. Coverage rollback
uses a .prev rename snapshot. Diagnostic compile_engines_for_corpus
is lock-free per AC-10.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-13 05:00:16 +03:00

493 lines
17 KiB
Python

"""C10 cache-provisioning factory (AZ-321).
Composition-root wiring for the AZ-321 :class:`EngineCompiler`. Reads
``config.components['c10_provisioning']`` for the backbone corpus,
resolves the :class:`InferenceRuntime` strategy via
:func:`gps_denied_onboard.runtime_root.inference_factory.build_inference_runtime`,
and returns a ready-to-call :class:`EngineCompiler`.
Backbone resolution is config-driven: the YAML enumerates the
project's engine corpus (initially DINOv2-VPR + LightGlue + ALIKED
per the AZ-321 task spec); adding a model is a config change rather
than a code change.
"""
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING, Any
from gps_denied_onboard.components.c10_provisioning import (
BackboneSpec,
C10BatcherConfig,
CacheProvisionerImpl,
DescriptorBatcher,
DescriptorIndexRebuilder,
Ed25519ManifestSigner,
EngineCompiler,
FilelockFileLockFactory,
ManifestBuilder,
ManifestVerifierImpl,
TileBboxRecord,
TileHashRecord,
TilePixelOpener,
TilesByBboxBatchQuery,
TilesByBboxQuery,
)
from gps_denied_onboard.components.c10_provisioning.config import (
BackboneConfig,
C10ProvisioningConfig,
)
from gps_denied_onboard.components.c10_provisioning.interface import (
BackboneEmbedder,
)
from gps_denied_onboard.helpers.sha256_sidecar import Sha256Sidecar
from gps_denied_onboard.logging import get_logger
from gps_denied_onboard.runtime_root.inference_factory import (
build_inference_runtime,
)
if TYPE_CHECKING:
from gps_denied_onboard._types.inference import PrecisionMode
from gps_denied_onboard._types.manifests import HostCapabilities
from gps_denied_onboard.clock import Clock
from gps_denied_onboard.components.c6_tile_cache import (
DescriptorIndex,
TileMetadataStore,
TileStore,
)
from gps_denied_onboard.config.schema import Config
__all__ = [
"build_backbone_specs",
"build_cache_provisioner",
"build_descriptor_batcher",
"build_engine_compiler",
"build_manifest_builder",
"build_manifest_verifier",
"c6_descriptor_index_to_rebuilder",
"c6_tile_metadata_store_to_tiles_batch_query",
"c6_tile_metadata_store_to_tiles_query",
"c6_tile_store_to_pixel_opener",
]
def build_engine_compiler(config: Config) -> EngineCompiler:
"""Construct a wired :class:`EngineCompiler` from ``config``.
The factory:
1. Resolves the :class:`InferenceRuntime` via the existing
C7 factory (honouring the ``BUILD_*`` gating and the runtime
selection in ``config.components['c7_inference']``).
2. Names a c10-scoped structured logger.
3. Hands both to :class:`EngineCompiler`.
The :class:`BackboneSpec` corpus is NOT materialised by this
factory — call :func:`build_backbone_specs` separately so the
operator binary can pick up the spec list after Step 7 of the
autodev flow without dragging an :class:`InferenceRuntime` along.
"""
runtime = build_inference_runtime(config)
logger = get_logger("c10_provisioning")
return EngineCompiler(inference_runtime=runtime, logger=logger)
def build_backbone_specs(config: Config) -> tuple[BackboneSpec, ...]:
"""Materialise :class:`BackboneSpec` tuple from
``config.components['c10_provisioning'].backbones``.
Resolves each :class:`BackboneConfig` ``onnx_path`` string into
an absolute :class:`Path` (validation happened at load time via
:meth:`BackboneConfig.__post_init__`).
"""
block: C10ProvisioningConfig = config.components["c10_provisioning"]
return tuple(_backbone_spec_from_config(bb) for bb in block.backbones)
def _backbone_spec_from_config(
backbone: BackboneConfig,
) -> BackboneSpec:
return BackboneSpec(
model_name=backbone.model_name,
onnx_path=Path(backbone.onnx_path),
expected_input_shape=tuple(backbone.expected_input_shape),
input_name=backbone.input_name,
)
def build_manifest_builder(
config: Config,
*,
tile_metadata_store: TileMetadataStore,
clock: Clock,
) -> ManifestBuilder:
"""Construct a wired :class:`ManifestBuilder` (AZ-323).
The ``tile_metadata_store`` argument is the AZ-303 C6 store; this
factory wraps it in the consumer-side
:class:`TilesByBboxQuery` adapter so the C10 module never imports
``components.c6_tile_cache`` directly (AZ-270 + AZ-507 boundary).
``clock`` is supplied explicitly rather than re-resolved through
a clock factory because the composition root selects the clock
strategy (WallClock for live, TlogDerivedClock for replay) per
AZ-398 and threads the SAME instance through every consumer.
"""
block: C10ProvisioningConfig = config.components["c10_provisioning"]
sidecar = Sha256Sidecar()
signer = Ed25519ManifestSigner()
logger = get_logger("c10_provisioning.manifest")
tiles_query = c6_tile_metadata_store_to_tiles_query(tile_metadata_store)
return ManifestBuilder(
sidecar=sidecar,
signer=signer,
tile_metadata_store=tiles_query,
logger=logger,
clock=clock,
config=block.manifest,
)
def build_manifest_verifier(
config: Config,
*,
clock: Clock,
tile_metadata_store: TileMetadataStore | None = None,
with_tile_store: bool = False,
) -> ManifestVerifierImpl:
"""Construct a wired :class:`ManifestVerifierImpl` (AZ-324).
``with_tile_store=True`` (operator C12 mode) requires
``tile_metadata_store`` to be supplied — the verifier re-derives
``tiles_coverage_sha256`` from C6 and reports drift.
``with_tile_store=False`` (airborne C5 mode) trusts the recorded
aggregate after the Ed25519 signature passes (MV-INV-5); the
``tile_metadata_store`` argument is ignored.
"""
sidecar = Sha256Sidecar()
logger = get_logger("c10_provisioning.verify")
# AZ-324 silently accepting a tile_metadata_store with
# `with_tile_store=False` would mask a composition-root mistake
# (operator mode wired in an airborne binary by accident); we keep
# the airborne path explicit by ignoring the argument here.
if with_tile_store:
if tile_metadata_store is None:
raise ValueError(
"build_manifest_verifier(with_tile_store=True) requires "
"tile_metadata_store; supply None or set with_tile_store=False"
)
tiles_query: TilesByBboxQuery | None = c6_tile_metadata_store_to_tiles_query(
tile_metadata_store
)
else:
tiles_query = None
return ManifestVerifierImpl(
sidecar=sidecar,
logger=logger,
clock=clock,
tile_metadata_store=tiles_query,
)
def c6_tile_metadata_store_to_tiles_query(
tile_metadata_store: TileMetadataStore,
) -> TilesByBboxQuery:
"""Adapt the C6 ``TileMetadataStore`` to the C10 ``TilesByBboxQuery`` cut.
Lives in the composition root because it is the only place that
may import both C6 and C10 (the AZ-270 lint allows
``runtime_root``). C6 returns ``TileMetadata`` rows; AZ-323 needs
a ``TileHashRecord`` with ``(zoom, lat, lon, source, sha256_hex)``
and nothing else.
"""
from gps_denied_onboard.components.c6_tile_cache import (
SectorClassification as C6SectorClassification,
)
class _C6TilesAdapter:
def __init__(self, store: TileMetadataStore) -> None:
self._store = store
def query_by_bbox(
self,
*,
bbox,
zoom_levels,
sector_class,
):
c6_sector = C6SectorClassification(sector_class)
rows = self._store.query_by_bbox(
bbox=bbox,
zoom_levels=zoom_levels,
sector_class=c6_sector,
)
return tuple(
TileHashRecord(
zoom=row.tile_id.zoom_level,
lat=row.tile_id.lat,
lon=row.tile_id.lon,
source=row.source.value
if hasattr(row.source, "value")
else str(row.source),
sha256_hex=row.content_sha256_hex,
)
for row in rows
)
return _C6TilesAdapter(tile_metadata_store)
def build_descriptor_batcher(
config: Config,
*,
backbone_embedder: BackboneEmbedder,
tile_metadata_store: TileMetadataStore,
tile_store: TileStore,
descriptor_index: DescriptorIndex,
clock: Clock,
) -> DescriptorBatcher:
"""Construct a wired :class:`DescriptorBatcher` (AZ-322).
The factory:
1. Adapts C6's ``TileMetadataStore`` to C10's
:class:`TilesByBboxBatchQuery` cut.
2. Adapts C6's ``TileStore`` to C10's :class:`TilePixelOpener` cut.
3. Adapts C6's ``DescriptorIndex`` to C10's
:class:`DescriptorIndexRebuilder` cut.
4. Reads the C10 batcher knobs from
``config.components['c10_provisioning']`` (currently defaults
only — a dedicated config block lands when AZ-326 wires the T5
orchestrator).
The ``backbone_embedder`` is supplied by the operator binary
(composition root); the most common impl is the
:class:`C7EngineBackboneEmbedder`. Keeping it injected here
instead of constructed inside the factory lets E-C2 (AZ-255) swap
in its public embed API later via a one-line factory swap, per
the AZ-322 spec § Risk-1 mitigation.
"""
logger = get_logger("c10_provisioning.descriptor_batcher")
return DescriptorBatcher(
backbone_embedder=backbone_embedder,
tiles_query=c6_tile_metadata_store_to_tiles_batch_query(
tile_metadata_store
),
tile_pixel_opener=c6_tile_store_to_pixel_opener(tile_store),
descriptor_index=c6_descriptor_index_to_rebuilder(descriptor_index),
clock=clock,
logger=logger,
config=C10BatcherConfig(),
)
def c6_tile_metadata_store_to_tiles_batch_query(
tile_metadata_store: TileMetadataStore,
) -> TilesByBboxBatchQuery:
"""Adapt C6 ``TileMetadataStore`` to C10's ``TilesByBboxBatchQuery``.
C6's ``query_by_bbox`` accepts a single ``zoom`` and a ``Bbox`` DTO;
the batcher cut takes ``zoom_levels: tuple[int, ...]`` and a 4-tuple
bbox. This adapter loops over the zoom set and concatenates the
results, projecting :class:`TileMetadata` rows down to the
:class:`TileBboxRecord` shape the batcher needs (zoom + lat + lon
+ source — the rest of the metadata row is irrelevant to the
descriptor pipeline).
Lives in ``runtime_root`` because it is the only layer allowed to
import both C6 and C10 (AZ-270 lint).
"""
from gps_denied_onboard.components.c6_tile_cache import (
Bbox as C6Bbox,
)
from gps_denied_onboard.components.c6_tile_cache import (
SectorClassification as C6SectorClassification,
)
class _C6BatchTilesAdapter:
def __init__(self, store: TileMetadataStore) -> None:
self._store = store
def query_by_bbox_batch(
self,
*,
bbox: tuple[float, float, float, float],
zoom_levels: tuple[int, ...],
sector_class: str,
) -> list[TileBboxRecord]:
# ``sector_class`` is currently a soft filter (the
# batcher's CorpusFilter carries it to keep parity with
# the manifest builder); C6's query_by_bbox does not
# accept it directly, so we pre-validate the enum here
# and let the upstream metadata classification gate
# invalidate freshness if needed.
C6SectorClassification(sector_class)
min_lat, min_lon, max_lat, max_lon = bbox
c6_bbox = C6Bbox(
min_lat=min_lat,
min_lon=min_lon,
max_lat=max_lat,
max_lon=max_lon,
)
records: list[TileBboxRecord] = []
for zoom in zoom_levels:
rows = self._store.query_by_bbox(bbox=c6_bbox, zoom=zoom)
for row in rows:
source = row.source
source_str = (
source.value if hasattr(source, "value") else str(source)
)
records.append(
TileBboxRecord(
zoom=row.tile_id.zoom_level,
lat=row.tile_id.lat,
lon=row.tile_id.lon,
source=source_str,
)
)
return records
return _C6BatchTilesAdapter(tile_metadata_store)
def c6_tile_store_to_pixel_opener(
tile_store: TileStore,
) -> TilePixelOpener:
"""Adapt C6 ``TileStore`` to C10's ``TilePixelOpener`` cut.
The C6 contract: ``read_tile_pixels(tile_id) -> TilePixelHandle``,
where :class:`TilePixelHandle` is itself a context manager (mmap
handle that closes on ``__exit__``). The batcher cut: ``open_tile(zoom, lat, lon)
-> ContextManager``. This adapter just builds a ``TileId`` and
returns the C6 handle directly — the call shape matches because
:class:`TilePixelHandle` already implements ``__enter__`` /
``__exit__``.
"""
from gps_denied_onboard.components.c6_tile_cache import TileId
class _C6PixelOpenerAdapter:
def __init__(self, store: TileStore) -> None:
self._store = store
def open_tile(self, *, zoom: int, lat: float, lon: float) -> Any:
tile_id = TileId(zoom_level=zoom, lat=lat, lon=lon)
return self._store.read_tile_pixels(tile_id)
return _C6PixelOpenerAdapter(tile_store)
def build_cache_provisioner(
config: Config,
*,
engine_compiler: EngineCompiler,
descriptor_batcher: DescriptorBatcher,
manifest_builder: ManifestBuilder,
tile_metadata_store: TileMetadataStore,
host: HostCapabilities,
precision: PrecisionMode,
clock: Clock,
) -> CacheProvisionerImpl:
"""Construct a wired :class:`CacheProvisionerImpl` (AZ-325).
The orchestrator is the public top-level seam C12 calls; the
factory composes it from the already-built phase impls so the
same engine_compiler / descriptor_batcher / manifest_builder
instances can be reused across multiple ``build_cache_artifacts``
invocations within an operator session.
``host`` + ``precision`` come from the composition root because
AZ-321's :class:`EngineCompileRequest` expects host-info threaded
in (the AZ-297 :class:`InferenceRuntime` does not introspect it),
and they participate in the build-identity hash via
:class:`EngineFilenameSchema`. Tier-1 dev workstations probe the
GPU via :mod:`pynvml`; replay / unit tests construct fixed
:class:`HostCapabilities` so AC-1..AC-16 are deterministic.
The :class:`TileMetadataStore` is wrapped in the C10
:class:`TilesByBboxQuery` cut so the orchestrator never imports
``components.c6_tile_cache``.
"""
block: C10ProvisioningConfig = config.components["c10_provisioning"]
backbones = build_backbone_specs(config)
tiles_query = c6_tile_metadata_store_to_tiles_query(tile_metadata_store)
logger = get_logger("c10_provisioning.provisioner")
return CacheProvisionerImpl(
engine_compiler=engine_compiler,
descriptor_batcher=descriptor_batcher,
manifest_builder=manifest_builder,
tile_metadata_store=tiles_query,
lock_factory=FilelockFileLockFactory(),
backbones=backbones,
host=host,
precision=precision,
workspace_mb=block.workspace_mb,
logger=logger,
clock=clock,
config=block.provisioner,
)
def c6_descriptor_index_to_rebuilder(
descriptor_index: DescriptorIndex,
) -> DescriptorIndexRebuilder:
"""Adapt C6 ``DescriptorIndex`` to C10's ``DescriptorIndexRebuilder``.
C6's ``rebuild_from_descriptors(descriptors, tile_ids: list[TileId],
hnsw_params: HnswParams)`` is the AZ-303 / AZ-306 contract; the
batcher cut ``rebuild(*, descriptors, tile_records, hnsw_*)`` is
transport-decoupled. This adapter projects ``TileBboxRecord`` →
``TileId`` and folds the four HNSW kwargs into the
:class:`HnswParams` DTO before delegating.
"""
from gps_denied_onboard.components.c6_tile_cache import (
HnswParams,
TileId,
)
class _C6RebuilderAdapter:
def __init__(self, index: DescriptorIndex) -> None:
self._index = index
def rebuild(
self,
*,
descriptors,
tile_records,
hnsw_m,
hnsw_ef_construction,
hnsw_ef_search,
hnsw_metric,
):
tile_ids = [
TileId(
zoom_level=record.zoom,
lat=record.lat,
lon=record.lon,
)
for record in tile_records
]
params = HnswParams(
m=hnsw_m,
ef_construction=hnsw_ef_construction,
ef_search=hnsw_ef_search,
metric=hnsw_metric,
)
self._index.rebuild_from_descriptors(
descriptors=descriptors,
tile_ids=tile_ids,
hnsw_params=params,
)
return _C6RebuilderAdapter(descriptor_index)