"""Operator pre-flight cache assembly driver (AZ-839 / Epic AZ-835 C3). Replaces the placeholder ``operator_pre_flight_setup`` fixture stub at ``conftest.py`` lines 293-310 with a real driver that wires together the four operator-side production components: 1. **C1 / AZ-836 RouteSpec** — already extracted by the caller via :func:`gps_denied_onboard.replay_input.tlog_route.extract_route_from_tlog` and handed in as :paramref:`populate_c6_from_route.route_spec`. 2. **C2 / AZ-838 SatelliteProviderRouteClient** — POSTs the route to satellite-provider, polls ``mapsReady``. 3. **C11 / AZ-316 + AZ-777 Phase 1 HttpTileDownloader** — pulls the seeded tiles from satellite-provider into C6 over a bbox derived from the route waypoints. 4. **C10 / AZ-322 DescriptorBatcher** — rebuilds the FAISS HNSW descriptor index over the populated C6 cache (NetVLAD backbone per ``c2_vpr/config.py:67``). The descriptor index sidecar coherence (AZ-306 triple-consistency: ``.index`` + ``.sha256`` + ``.meta.json``) is verified by re-loading the index after rebuild via the caller-supplied ``descriptor_index_factory``; any tampering surfaces as :class:`IndexUnavailableError`. Public surface — re-exported from this module: * :class:`PopulatedC6Cache` — frozen dataclass returned on success. * :func:`populate_c6_from_route` — the driver function. Cleanup-on-failure removes any FAISS sidecar files produced inside the driver if any later step raises. Tile-store rows written by C11 are NOT deleted (the C6 store owns its own rollback semantics — leaving those rows enables idempotent re-runs via the C11 download journal). """ from __future__ import annotations import logging import time from collections.abc import Callable from dataclasses import dataclass from pathlib import Path from typing import Any from uuid import UUID, uuid4 from gps_denied_onboard.components.c10_provisioning.descriptor_batcher import ( BatcherOutcome, CorpusFilter, DescriptorBatcher, ) from gps_denied_onboard.components.c11_tile_manager import ( DownloadOutcome, DownloadRequest, HttpTileDownloader, SectorClassification, ) from gps_denied_onboard.components.c11_tile_manager.errors import ( RouteTerminalFailureError, RouteTransientError, RouteValidationError, ) from gps_denied_onboard.components.c11_tile_manager.route_client import ( SatelliteProviderRouteClient, ) from gps_denied_onboard.components.c6_tile_cache.errors import ( IndexUnavailableError, ) from gps_denied_onboard.components.c6_tile_cache.faiss_descriptor_index import ( META_SUFFIX, ) from gps_denied_onboard.helpers.sha256_sidecar import SIDECAR_SUFFIX from gps_denied_onboard._types.route import RouteSpec __all__ = [ "PopulatedC6Cache", "populate_c6_from_route", ] # Mirror C11's existing schedule so the fixture does not introduce a # parallel retry budget. AC-5 ties our per-attempt cap (3) to the # documented pause cadence; the schedule itself lives in the # downloader module and is re-exported here so tests can override. _DEFAULT_RETRY_SCHEDULE_S: tuple[float, ...] = (1.0, 2.0, 4.0, 8.0) _DEFAULT_MAX_RETRY_ATTEMPTS: int = 3 _DEFAULT_ZOOM_LEVEL: int = 18 _DEFAULT_SECTOR_CLASS: SectorClassification = SectorClassification.ACTIVE_CONFLICT # Per-degree-of-latitude metres at WGS84 mean radius — reused from C11 # route-coverage enumeration (route_client._enumerate_route_tile_coords). # Re-stated here so the driver does not depend on a private constant. _METERS_PER_DEGREE_LAT: float = 111_320.0 _LOGGER = logging.getLogger( "tests.e2e.replay.operator_pre_flight" ) @dataclass(frozen=True, slots=True) class PopulatedC6Cache: """Output of :func:`populate_c6_from_route`. Mirrors the public-surface dataclass documented in the AZ-839 spec. All paths point at on-disk artifacts that survive the fixture's ``session`` scope (when mounted on the named docker volume the e2e-runner declares); ``elapsed_seconds`` powers the AC-1 / AC-2 perf budget assertions. """ cache_root: Path tile_store_path: Path faiss_index_path: Path faiss_sidecar_sha256_path: Path faiss_sidecar_meta_path: Path route_spec: RouteSpec tile_count: int elapsed_seconds: float def populate_c6_from_route( *, route_spec: RouteSpec, route_client: SatelliteProviderRouteClient, tile_downloader: HttpTileDownloader, descriptor_batcher: DescriptorBatcher, descriptor_index_factory: Callable[[], Any], cache_root: Path, tile_store_path: Path, faiss_index_path: Path, flight_id: UUID | None = None, sector_class: SectorClassification = _DEFAULT_SECTOR_CLASS, zoom_level: int = _DEFAULT_ZOOM_LEVEL, region_size_meters: float | None = None, retry_schedule_s: tuple[float, ...] = _DEFAULT_RETRY_SCHEDULE_S, max_retry_attempts: int = _DEFAULT_MAX_RETRY_ATTEMPTS, sleep: Callable[[float], None] = time.sleep, monotonic: Callable[[], float] = time.monotonic, logger: logging.Logger | None = None, ) -> PopulatedC6Cache: """Drive the full C1+C2+C11+C10 pipeline end-to-end. Args: route_spec: Coarsened route from AZ-836's :func:`extract_route_from_tlog`. The caller chooses the tlog (typically a session-scoped fixture); this driver is tlog-agnostic. route_client: Configured C2 client. Built from env vars by the production fixture; injected as a stub by unit tests. tile_downloader: Configured C11 downloader. Same wiring rules. descriptor_batcher: Configured C10 batcher; its rebuild path owns the on-disk FAISS write (atomic via :class:`Sha256Sidecar`). descriptor_index_factory: Zero-arg callable that constructs a FRESH descriptor index pointed at ``faiss_index_path``. Production passes ``lambda: FaissDescriptorIndex.from_config(config)``; the constructor auto-loads via :meth:`FaissDescriptorIndex._load`, raising :class:`IndexUnavailableError` on triple-consistency failure (AC-3 / AC-6 verification). cache_root: Root directory mounted on the named docker volume that survives across pytest sessions. tile_store_path: Where C6's :class:`TileStore` writes JPEG blobs. Carried on the result for downstream tests. faiss_index_path: Final ``.index`` blob path. Sidecars live at ``.sha256`` + ``.meta.json``. flight_id: C11 download-journal key; defaults to a fresh UUID so two fixture sessions never collide their journals. sector_class: C11 / C6 sector classification. Defaults to ``ACTIVE_CONFLICT`` — Derkachi is an active-conflict corridor; ``STABLE_REAR`` is for non-Ukraine clips. zoom_level: Single Web-Mercator zoom level the fixture populates. AZ-839 spec defaults to 18 to match ``seed_route.py`` ergonomics; tests override for speed. region_size_meters: Per-waypoint coverage radius in metres. ``None`` falls back to :attr:`RouteSpec.suggested_region_size_meters`. retry_schedule_s: Pause cadence between transient retries. Defaults to C11's documented ``_DEFAULT_BACKOFF_SCHEDULE_S``. max_retry_attempts: Total :meth:`seed_route` attempts on transient error before propagating (AC-5 — final attempt's exception is propagated unchanged). sleep: Test override for the retry pause; production passes :func:`time.sleep`. monotonic: Test override for elapsed-time measurement. logger: Optional logger. Defaults to the module logger. Returns: :class:`PopulatedC6Cache` on success. Raises: RouteValidationError: Pre-emptive validation or HTTP 4xx — propagated unchanged with original cause (AC-4). RouteTerminalFailureError: ``mapsReady`` never reached or terminal failure status — propagated unchanged (AC-4). RouteTransientError: 5xx / network / timeout AFTER all retry attempts have been exhausted (AC-5). IndexUnavailableError: Triple-consistency check failed after rebuild — sidecars are corrupt / mismatched (AC-3 / AC-6). RuntimeError: C11 ``download_tiles_for_area`` returned a non-success outcome OR C10 ``populate_descriptors`` returned :attr:`BatcherOutcome.FAILURE`. Notes: Cleanup behaviour (AC-7) — if any step raises after the rebuild has begun writing sidecar files, the partial files (.index, .sha256, .meta.json) are removed before the exception propagates so a re-run starts from a clean slate. Tile-store rows are NOT deleted on cleanup; the C11 download journal owns idempotent re-run semantics. """ log = logger or _LOGGER if max_retry_attempts < 1: raise ValueError( f"max_retry_attempts must be >= 1; got {max_retry_attempts}" ) started_monotonic = monotonic() effective_flight_id = flight_id or uuid4() effective_region_size = float( region_size_meters if region_size_meters is not None else route_spec.suggested_region_size_meters ) if effective_region_size <= 0: raise ValueError( f"region_size_meters must be > 0; got {effective_region_size}" ) if not route_spec.waypoints: raise ValueError("route_spec.waypoints must be non-empty") sidecar_paths = ( faiss_index_path, Path(str(faiss_index_path) + SIDECAR_SUFFIX), Path(str(faiss_index_path) + META_SUFFIX), ) pre_existing_sidecar = {p: p.is_file() for p in sidecar_paths} try: seed_result = _seed_route_with_retry( route_client=route_client, spec=route_spec, region_size_meters=effective_region_size, zoom_level=zoom_level, retry_schedule_s=retry_schedule_s, max_retry_attempts=max_retry_attempts, sleep=sleep, logger=log, ) bbox = _route_bbox( waypoints=route_spec.waypoints, region_size_meters=effective_region_size, ) download_request = DownloadRequest( flight_id=effective_flight_id, bbox_min_lat=bbox[0], bbox_min_lon=bbox[1], bbox_max_lat=bbox[2], bbox_max_lon=bbox[3], zoom_levels=(int(zoom_level),), sector_class=sector_class, cache_root=cache_root, ) download_report = tile_downloader.download_tiles_for_area(download_request) if download_report.outcome not in { DownloadOutcome.SUCCESS, DownloadOutcome.IDEMPOTENT_NO_OP, }: raise RuntimeError( "C11 download_tiles_for_area returned non-success outcome " f"{download_report.outcome.value!r}; " f"requested={download_report.tiles_requested} " f"downloaded={download_report.tiles_downloaded} " f"rejected_resolution={download_report.tiles_rejected_resolution} " f"rejected_freshness={download_report.tiles_rejected_freshness}" ) log.info( "operator_pre_flight: tiles populated", extra={ "kind": "operator_pre_flight.tiles_populated", "kv": { "route_id": str(seed_result.route_id), "seeded_tile_count": seed_result.tile_count, "downloaded_tiles": download_report.tiles_downloaded, "request_hash": download_report.request_hash, }, }, ) corpus_filter = CorpusFilter( bbox=bbox, zoom_levels=(int(zoom_level),), sector_class=sector_class.value, ) batcher_report = descriptor_batcher.populate_descriptors(corpus_filter) if batcher_report.outcome is not BatcherOutcome.SUCCESS: raise RuntimeError( "C10 populate_descriptors returned FAILURE: " f"{batcher_report.failure_reason}" ) verifier_index = descriptor_index_factory() log.debug( "operator_pre_flight: sidecar coherence verified", extra={ "kind": "operator_pre_flight.sidecar_verified", "kv": { "faiss_index_path": str(faiss_index_path), "verifier_type": type(verifier_index).__name__, }, }, ) elapsed_seconds = max(0.0, monotonic() - started_monotonic) return PopulatedC6Cache( cache_root=cache_root, tile_store_path=tile_store_path, faiss_index_path=faiss_index_path, faiss_sidecar_sha256_path=sidecar_paths[1], faiss_sidecar_meta_path=sidecar_paths[2], route_spec=route_spec, tile_count=batcher_report.tiles_consumed, elapsed_seconds=elapsed_seconds, ) except BaseException: _cleanup_partial_sidecars( sidecar_paths=sidecar_paths, pre_existing=pre_existing_sidecar, logger=log, ) raise def _seed_route_with_retry( *, route_client: SatelliteProviderRouteClient, spec: RouteSpec, region_size_meters: float, zoom_level: int, retry_schedule_s: tuple[float, ...], max_retry_attempts: int, sleep: Callable[[float], None], logger: logging.Logger, ) -> Any: """Call ``seed_route`` with bounded transient retries (AC-5). Validation / terminal-failure errors propagate IMMEDIATELY with their original cause (AC-4 — no silent swallow). Only :class:`RouteTransientError` triggers the retry ladder; the final attempt's exception is re-raised unchanged so the caller sees the actual transient signal that exhausted the budget. """ last_transient: RouteTransientError | None = None for attempt in range(1, max_retry_attempts + 1): try: return route_client.seed_route( spec, region_size_meters=region_size_meters, zoom_level=zoom_level, ) except (RouteValidationError, RouteTerminalFailureError): raise except RouteTransientError as exc: last_transient = exc logger.warning( "operator_pre_flight: route seed transient failure", extra={ "kind": "operator_pre_flight.route_seed.transient", "kv": { "attempt": attempt, "max_attempts": max_retry_attempts, "exc": repr(exc), }, }, ) if attempt >= max_retry_attempts: raise pause_s = retry_schedule_s[ min(attempt - 1, len(retry_schedule_s) - 1) ] sleep(pause_s) # Defensive — the loop body always returns or raises before this. if last_transient is not None: raise last_transient raise RuntimeError( "operator_pre_flight: seed_route loop exited without result" ) def _route_bbox( *, waypoints: tuple[tuple[float, float], ...], region_size_meters: float, ) -> tuple[float, float, float, float]: """Bounding box of every waypoint's coverage square. Mirrors the local enumeration in :func:`gps_denied_onboard.components.c11_tile_manager.route_client._enumerate_route_tile_coords` by taking ``region_size_meters`` as the per-waypoint square edge and unioning the lat/lon extents. The result is a single bbox that the C11 :meth:`HttpTileDownloader.download_tiles_for_area` Protocol consumes; C11 then runs the standard slippy-map enumeration over that bbox at the requested zoom level. Returns: ``(min_lat, min_lon, max_lat, max_lon)`` — matching :class:`DownloadRequest`'s field order. """ import math half = region_size_meters / 2.0 min_lat = float("inf") max_lat = float("-inf") min_lon = float("inf") max_lon = float("-inf") for lat_deg, lon_deg in waypoints: lat_delta_deg = half / _METERS_PER_DEGREE_LAT cos_lat = math.cos(math.radians(lat_deg)) if cos_lat <= 1e-9: cos_lat = 1e-9 lon_delta_deg = half / (_METERS_PER_DEGREE_LAT * cos_lat) min_lat = min(min_lat, lat_deg - lat_delta_deg) max_lat = max(max_lat, lat_deg + lat_delta_deg) min_lon = min(min_lon, lon_deg - lon_delta_deg) max_lon = max(max_lon, lon_deg + lon_delta_deg) if min_lat >= max_lat or min_lon >= max_lon: raise ValueError( "operator_pre_flight: degenerate bbox from route waypoints " f"(min_lat={min_lat}, max_lat={max_lat}, " f"min_lon={min_lon}, max_lon={max_lon})" ) return (min_lat, min_lon, max_lat, max_lon) def _cleanup_partial_sidecars( *, sidecar_paths: tuple[Path, ...], pre_existing: dict[Path, bool], logger: logging.Logger, ) -> None: """Remove sidecar files this driver may have produced. Only files that did NOT exist when the driver started AND now exist are removed — pre-existing files (a warm cache from a prior run) are preserved. OS errors during cleanup are logged but do not mask the original exception. """ for path in sidecar_paths: if pre_existing.get(path, False): continue if not path.exists(): continue try: path.unlink() logger.warning( "operator_pre_flight: cleaned up partial sidecar", extra={ "kind": "operator_pre_flight.cleanup.removed", "kv": {"path": str(path)}, }, ) except OSError as exc: logger.error( "operator_pre_flight: cleanup unlink failed", extra={ "kind": "operator_pre_flight.cleanup.failed", "kv": {"path": str(path), "exc": repr(exc)}, }, )