mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 17:41:13 +00:00
[AZ-322] C10 DescriptorBatcher (faiss-cpu, OOM halve-retry)
Implements the C10 internal phase that walks every C6 tile, embeds through C2's backbone via the AZ-321-produced engine, and rebuilds the AZ-306 FAISS HNSW index in one atomic write. - DescriptorBatcher with halve-and-retry OOM recovery (default 1 retry) - BackboneEmbedder Protocol + C7EngineBackboneEmbedder default impl - DescriptorBatchError for OOM / dim-mismatch / missing-output failures - Empty-corpus surfaces as outcome=failure with explicit hint to run C11 - Per-10% progress callback + DEBUG logs (no engine bytes leaked) - Consumer-side Protocol cuts (TilesByBboxBatchQuery, TilePixelOpener, DescriptorIndexRebuilder) so c10 stays within AZ-270 lint - runtime_root.c10_factory adds build_descriptor_batcher + three C6->C10 adapters - 16 unit tests covering AC-1..AC-10 + 2 NFRs + 4 supplemental (Protocol conformance, query pass-through, handle release, config) Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -15,21 +15,30 @@ than a code change.
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from gps_denied_onboard.components.c10_provisioning import (
|
||||
BackboneSpec,
|
||||
C10BatcherConfig,
|
||||
DescriptorBatcher,
|
||||
DescriptorIndexRebuilder,
|
||||
Ed25519ManifestSigner,
|
||||
EngineCompiler,
|
||||
ManifestBuilder,
|
||||
ManifestVerifierImpl,
|
||||
TileBboxRecord,
|
||||
TileHashRecord,
|
||||
TilePixelOpener,
|
||||
TilesByBboxBatchQuery,
|
||||
TilesByBboxQuery,
|
||||
)
|
||||
from gps_denied_onboard.components.c10_provisioning.config import (
|
||||
BackboneConfig,
|
||||
C10ProvisioningConfig,
|
||||
)
|
||||
from gps_denied_onboard.components.c10_provisioning.interface import (
|
||||
BackboneEmbedder,
|
||||
)
|
||||
from gps_denied_onboard.helpers.sha256_sidecar import Sha256Sidecar
|
||||
from gps_denied_onboard.logging import get_logger
|
||||
from gps_denied_onboard.runtime_root.inference_factory import (
|
||||
@@ -38,15 +47,23 @@ from gps_denied_onboard.runtime_root.inference_factory import (
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.clock import Clock
|
||||
from gps_denied_onboard.components.c6_tile_cache import TileMetadataStore
|
||||
from gps_denied_onboard.components.c6_tile_cache import (
|
||||
DescriptorIndex,
|
||||
TileMetadataStore,
|
||||
TileStore,
|
||||
)
|
||||
from gps_denied_onboard.config.schema import Config
|
||||
|
||||
__all__ = [
|
||||
"build_backbone_specs",
|
||||
"build_descriptor_batcher",
|
||||
"build_engine_compiler",
|
||||
"build_manifest_builder",
|
||||
"build_manifest_verifier",
|
||||
"c6_descriptor_index_to_rebuilder",
|
||||
"c6_tile_metadata_store_to_tiles_batch_query",
|
||||
"c6_tile_metadata_store_to_tiles_query",
|
||||
"c6_tile_store_to_pixel_opener",
|
||||
]
|
||||
|
||||
|
||||
@@ -219,3 +236,200 @@ def c6_tile_metadata_store_to_tiles_query(
|
||||
)
|
||||
|
||||
return _C6TilesAdapter(tile_metadata_store)
|
||||
|
||||
|
||||
def build_descriptor_batcher(
|
||||
config: Config,
|
||||
*,
|
||||
backbone_embedder: BackboneEmbedder,
|
||||
tile_metadata_store: TileMetadataStore,
|
||||
tile_store: TileStore,
|
||||
descriptor_index: DescriptorIndex,
|
||||
clock: Clock,
|
||||
) -> DescriptorBatcher:
|
||||
"""Construct a wired :class:`DescriptorBatcher` (AZ-322).
|
||||
|
||||
The factory:
|
||||
|
||||
1. Adapts C6's ``TileMetadataStore`` to C10's
|
||||
:class:`TilesByBboxBatchQuery` cut.
|
||||
2. Adapts C6's ``TileStore`` to C10's :class:`TilePixelOpener` cut.
|
||||
3. Adapts C6's ``DescriptorIndex`` to C10's
|
||||
:class:`DescriptorIndexRebuilder` cut.
|
||||
4. Reads the C10 batcher knobs from
|
||||
``config.components['c10_provisioning']`` (currently defaults
|
||||
only — a dedicated config block lands when AZ-326 wires the T5
|
||||
orchestrator).
|
||||
|
||||
The ``backbone_embedder`` is supplied by the operator binary
|
||||
(composition root); the most common impl is the
|
||||
:class:`C7EngineBackboneEmbedder`. Keeping it injected here
|
||||
instead of constructed inside the factory lets E-C2 (AZ-255) swap
|
||||
in its public embed API later via a one-line factory swap, per
|
||||
the AZ-322 spec § Risk-1 mitigation.
|
||||
"""
|
||||
|
||||
logger = get_logger("c10_provisioning.descriptor_batcher")
|
||||
return DescriptorBatcher(
|
||||
backbone_embedder=backbone_embedder,
|
||||
tiles_query=c6_tile_metadata_store_to_tiles_batch_query(
|
||||
tile_metadata_store
|
||||
),
|
||||
tile_pixel_opener=c6_tile_store_to_pixel_opener(tile_store),
|
||||
descriptor_index=c6_descriptor_index_to_rebuilder(descriptor_index),
|
||||
clock=clock,
|
||||
logger=logger,
|
||||
config=C10BatcherConfig(),
|
||||
)
|
||||
|
||||
|
||||
def c6_tile_metadata_store_to_tiles_batch_query(
|
||||
tile_metadata_store: TileMetadataStore,
|
||||
) -> TilesByBboxBatchQuery:
|
||||
"""Adapt C6 ``TileMetadataStore`` to C10's ``TilesByBboxBatchQuery``.
|
||||
|
||||
C6's ``query_by_bbox`` accepts a single ``zoom`` and a ``Bbox`` DTO;
|
||||
the batcher cut takes ``zoom_levels: tuple[int, ...]`` and a 4-tuple
|
||||
bbox. This adapter loops over the zoom set and concatenates the
|
||||
results, projecting :class:`TileMetadata` rows down to the
|
||||
:class:`TileBboxRecord` shape the batcher needs (zoom + lat + lon
|
||||
+ source — the rest of the metadata row is irrelevant to the
|
||||
descriptor pipeline).
|
||||
|
||||
Lives in ``runtime_root`` because it is the only layer allowed to
|
||||
import both C6 and C10 (AZ-270 lint).
|
||||
"""
|
||||
|
||||
from gps_denied_onboard.components.c6_tile_cache import (
|
||||
Bbox as C6Bbox,
|
||||
)
|
||||
from gps_denied_onboard.components.c6_tile_cache import (
|
||||
SectorClassification as C6SectorClassification,
|
||||
)
|
||||
|
||||
class _C6BatchTilesAdapter:
|
||||
def __init__(self, store: TileMetadataStore) -> None:
|
||||
self._store = store
|
||||
|
||||
def query_by_bbox_batch(
|
||||
self,
|
||||
*,
|
||||
bbox: tuple[float, float, float, float],
|
||||
zoom_levels: tuple[int, ...],
|
||||
sector_class: str,
|
||||
) -> list[TileBboxRecord]:
|
||||
# ``sector_class`` is currently a soft filter (the
|
||||
# batcher's CorpusFilter carries it to keep parity with
|
||||
# the manifest builder); C6's query_by_bbox does not
|
||||
# accept it directly, so we pre-validate the enum here
|
||||
# and let the upstream metadata classification gate
|
||||
# invalidate freshness if needed.
|
||||
C6SectorClassification(sector_class)
|
||||
min_lat, min_lon, max_lat, max_lon = bbox
|
||||
c6_bbox = C6Bbox(
|
||||
min_lat=min_lat,
|
||||
min_lon=min_lon,
|
||||
max_lat=max_lat,
|
||||
max_lon=max_lon,
|
||||
)
|
||||
records: list[TileBboxRecord] = []
|
||||
for zoom in zoom_levels:
|
||||
rows = self._store.query_by_bbox(bbox=c6_bbox, zoom=zoom)
|
||||
for row in rows:
|
||||
source = row.source
|
||||
source_str = (
|
||||
source.value if hasattr(source, "value") else str(source)
|
||||
)
|
||||
records.append(
|
||||
TileBboxRecord(
|
||||
zoom=row.tile_id.zoom_level,
|
||||
lat=row.tile_id.lat,
|
||||
lon=row.tile_id.lon,
|
||||
source=source_str,
|
||||
)
|
||||
)
|
||||
return records
|
||||
|
||||
return _C6BatchTilesAdapter(tile_metadata_store)
|
||||
|
||||
|
||||
def c6_tile_store_to_pixel_opener(
|
||||
tile_store: TileStore,
|
||||
) -> TilePixelOpener:
|
||||
"""Adapt C6 ``TileStore`` to C10's ``TilePixelOpener`` cut.
|
||||
|
||||
The C6 contract: ``read_tile_pixels(tile_id) -> TilePixelHandle``,
|
||||
where :class:`TilePixelHandle` is itself a context manager (mmap
|
||||
handle that closes on ``__exit__``). The batcher cut: ``open_tile(zoom, lat, lon)
|
||||
-> ContextManager``. This adapter just builds a ``TileId`` and
|
||||
returns the C6 handle directly — the call shape matches because
|
||||
:class:`TilePixelHandle` already implements ``__enter__`` /
|
||||
``__exit__``.
|
||||
"""
|
||||
|
||||
from gps_denied_onboard.components.c6_tile_cache import TileId
|
||||
|
||||
class _C6PixelOpenerAdapter:
|
||||
def __init__(self, store: TileStore) -> None:
|
||||
self._store = store
|
||||
|
||||
def open_tile(self, *, zoom: int, lat: float, lon: float) -> Any:
|
||||
tile_id = TileId(zoom_level=zoom, lat=lat, lon=lon)
|
||||
return self._store.read_tile_pixels(tile_id)
|
||||
|
||||
return _C6PixelOpenerAdapter(tile_store)
|
||||
|
||||
|
||||
def c6_descriptor_index_to_rebuilder(
|
||||
descriptor_index: DescriptorIndex,
|
||||
) -> DescriptorIndexRebuilder:
|
||||
"""Adapt C6 ``DescriptorIndex`` to C10's ``DescriptorIndexRebuilder``.
|
||||
|
||||
C6's ``rebuild_from_descriptors(descriptors, tile_ids: list[TileId],
|
||||
hnsw_params: HnswParams)`` is the AZ-303 / AZ-306 contract; the
|
||||
batcher cut ``rebuild(*, descriptors, tile_records, hnsw_*)`` is
|
||||
transport-decoupled. This adapter projects ``TileBboxRecord`` →
|
||||
``TileId`` and folds the four HNSW kwargs into the
|
||||
:class:`HnswParams` DTO before delegating.
|
||||
"""
|
||||
|
||||
from gps_denied_onboard.components.c6_tile_cache import (
|
||||
HnswParams,
|
||||
TileId,
|
||||
)
|
||||
|
||||
class _C6RebuilderAdapter:
|
||||
def __init__(self, index: DescriptorIndex) -> None:
|
||||
self._index = index
|
||||
|
||||
def rebuild(
|
||||
self,
|
||||
*,
|
||||
descriptors,
|
||||
tile_records,
|
||||
hnsw_m,
|
||||
hnsw_ef_construction,
|
||||
hnsw_ef_search,
|
||||
hnsw_metric,
|
||||
):
|
||||
tile_ids = [
|
||||
TileId(
|
||||
zoom_level=record.zoom,
|
||||
lat=record.lat,
|
||||
lon=record.lon,
|
||||
)
|
||||
for record in tile_records
|
||||
]
|
||||
params = HnswParams(
|
||||
m=hnsw_m,
|
||||
ef_construction=hnsw_ef_construction,
|
||||
ef_search=hnsw_ef_search,
|
||||
metric=hnsw_metric,
|
||||
)
|
||||
self._index.rebuild_from_descriptors(
|
||||
descriptors=descriptors,
|
||||
tile_ids=tile_ids,
|
||||
hnsw_params=params,
|
||||
)
|
||||
|
||||
return _C6RebuilderAdapter(descriptor_index)
|
||||
|
||||
Reference in New Issue
Block a user