mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 08:31:13 +00:00
bfcac2cb9f
Replace the placeholder operator_pre_flight_setup pytest fixture (the mkdir stub at tests/e2e/replay/conftest.py:293-310) with a real driver that wires C1 (AZ-836 RouteSpec) + C2 (AZ-838 SatelliteProviderRoute Client) + C11 (AZ-316 HttpTileDownloader) + C10 (AZ-322 Descriptor Batcher) end-to-end and yields a typed PopulatedC6Cache. AZ-306 FAISS sidecar triple-consistency is verified post-rebuild via a caller- supplied descriptor_index_factory; partial sidecars are cleaned up on failure (AC-7) while pre-existing warm-cache files are preserved. Algorithm lives in tests/e2e/replay/_operator_pre_flight.py with pure dependency injection so the AC-8 unit suite (11 tests covering happy / transient-retry / terminal-failure / validation-error / tamper-detection / cleanup-on-failure) runs against stubs and the AC-9 Tier-2 integration test runs the same algorithm against the real Jetson harness. The conftest fixture skip-gates on RUN_REPLAY _E2E + SATELLITE_PROVIDER_URL/API_KEY + BUILD_FAISS_INDEX + GPS_DENIED_OPERATOR_CONFIG_PATH and wires deps through the existing runtime_root factories. Supersedes AZ-777 Phase 3. Co-authored-by: Cursor <cursoragent@cursor.com>
475 lines
18 KiB
Python
475 lines
18 KiB
Python
"""Operator pre-flight cache assembly driver (AZ-839 / Epic AZ-835 C3).
|
|
|
|
Replaces the placeholder ``operator_pre_flight_setup`` fixture stub at
|
|
``conftest.py`` lines 293-310 with a real driver that wires together
|
|
the four operator-side production components:
|
|
|
|
1. **C1 / AZ-836 RouteSpec** — already extracted by the caller via
|
|
:func:`gps_denied_onboard.replay_input.tlog_route.extract_route_from_tlog`
|
|
and handed in as :paramref:`populate_c6_from_route.route_spec`.
|
|
2. **C2 / AZ-838 SatelliteProviderRouteClient** — POSTs the route to
|
|
satellite-provider, polls ``mapsReady``.
|
|
3. **C11 / AZ-316 + AZ-777 Phase 1 HttpTileDownloader** — pulls the
|
|
seeded tiles from satellite-provider into C6 over a bbox derived
|
|
from the route waypoints.
|
|
4. **C10 / AZ-322 DescriptorBatcher** — rebuilds the FAISS HNSW
|
|
descriptor index over the populated C6 cache (NetVLAD backbone per
|
|
``c2_vpr/config.py:67``).
|
|
|
|
The descriptor index sidecar coherence (AZ-306 triple-consistency:
|
|
``.index`` + ``.sha256`` + ``.meta.json``) is verified by re-loading
|
|
the index after rebuild via the caller-supplied
|
|
``descriptor_index_factory``; any tampering surfaces as
|
|
:class:`IndexUnavailableError`.
|
|
|
|
Public surface — re-exported from this module:
|
|
|
|
* :class:`PopulatedC6Cache` — frozen dataclass returned on success.
|
|
* :func:`populate_c6_from_route` — the driver function.
|
|
|
|
Cleanup-on-failure removes any FAISS sidecar files produced inside the
|
|
driver if any later step raises. Tile-store rows written by C11 are
|
|
NOT deleted (the C6 store owns its own rollback semantics — leaving
|
|
those rows enables idempotent re-runs via the C11 download journal).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import time
|
|
from collections.abc import Callable
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from uuid import UUID, uuid4
|
|
|
|
from gps_denied_onboard.components.c10_provisioning.descriptor_batcher import (
|
|
BatcherOutcome,
|
|
CorpusFilter,
|
|
DescriptorBatcher,
|
|
)
|
|
from gps_denied_onboard.components.c11_tile_manager import (
|
|
DownloadOutcome,
|
|
DownloadRequest,
|
|
HttpTileDownloader,
|
|
SectorClassification,
|
|
)
|
|
from gps_denied_onboard.components.c11_tile_manager.errors import (
|
|
RouteTerminalFailureError,
|
|
RouteTransientError,
|
|
RouteValidationError,
|
|
)
|
|
from gps_denied_onboard.components.c11_tile_manager.route_client import (
|
|
SatelliteProviderRouteClient,
|
|
)
|
|
from gps_denied_onboard.components.c6_tile_cache.errors import (
|
|
IndexUnavailableError,
|
|
)
|
|
from gps_denied_onboard.components.c6_tile_cache.faiss_descriptor_index import (
|
|
META_SUFFIX,
|
|
)
|
|
from gps_denied_onboard.helpers.sha256_sidecar import SIDECAR_SUFFIX
|
|
from gps_denied_onboard.replay_input.tlog_route import RouteSpec
|
|
|
|
__all__ = [
|
|
"PopulatedC6Cache",
|
|
"populate_c6_from_route",
|
|
]
|
|
|
|
|
|
# Mirror C11's existing schedule so the fixture does not introduce a
|
|
# parallel retry budget. AC-5 ties our per-attempt cap (3) to the
|
|
# documented pause cadence; the schedule itself lives in the
|
|
# downloader module and is re-exported here so tests can override.
|
|
_DEFAULT_RETRY_SCHEDULE_S: tuple[float, ...] = (1.0, 2.0, 4.0, 8.0)
|
|
_DEFAULT_MAX_RETRY_ATTEMPTS: int = 3
|
|
_DEFAULT_ZOOM_LEVEL: int = 18
|
|
_DEFAULT_SECTOR_CLASS: SectorClassification = SectorClassification.ACTIVE_CONFLICT
|
|
# Per-degree-of-latitude metres at WGS84 mean radius — reused from C11
|
|
# route-coverage enumeration (route_client._enumerate_route_tile_coords).
|
|
# Re-stated here so the driver does not depend on a private constant.
|
|
_METERS_PER_DEGREE_LAT: float = 111_320.0
|
|
|
|
_LOGGER = logging.getLogger(
|
|
"tests.e2e.replay.operator_pre_flight"
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class PopulatedC6Cache:
|
|
"""Output of :func:`populate_c6_from_route`.
|
|
|
|
Mirrors the public-surface dataclass documented in the AZ-839 spec.
|
|
All paths point at on-disk artifacts that survive the fixture's
|
|
``session`` scope (when mounted on the named docker volume the
|
|
e2e-runner declares); ``elapsed_seconds`` powers the AC-1 / AC-2
|
|
perf budget assertions.
|
|
"""
|
|
|
|
cache_root: Path
|
|
tile_store_path: Path
|
|
faiss_index_path: Path
|
|
faiss_sidecar_sha256_path: Path
|
|
faiss_sidecar_meta_path: Path
|
|
route_spec: RouteSpec
|
|
tile_count: int
|
|
elapsed_seconds: float
|
|
|
|
|
|
def populate_c6_from_route(
|
|
*,
|
|
route_spec: RouteSpec,
|
|
route_client: SatelliteProviderRouteClient,
|
|
tile_downloader: HttpTileDownloader,
|
|
descriptor_batcher: DescriptorBatcher,
|
|
descriptor_index_factory: Callable[[], Any],
|
|
cache_root: Path,
|
|
tile_store_path: Path,
|
|
faiss_index_path: Path,
|
|
flight_id: UUID | None = None,
|
|
sector_class: SectorClassification = _DEFAULT_SECTOR_CLASS,
|
|
zoom_level: int = _DEFAULT_ZOOM_LEVEL,
|
|
region_size_meters: float | None = None,
|
|
retry_schedule_s: tuple[float, ...] = _DEFAULT_RETRY_SCHEDULE_S,
|
|
max_retry_attempts: int = _DEFAULT_MAX_RETRY_ATTEMPTS,
|
|
sleep: Callable[[float], None] = time.sleep,
|
|
monotonic: Callable[[], float] = time.monotonic,
|
|
logger: logging.Logger | None = None,
|
|
) -> PopulatedC6Cache:
|
|
"""Drive the full C1+C2+C11+C10 pipeline end-to-end.
|
|
|
|
Args:
|
|
route_spec: Coarsened route from AZ-836's
|
|
:func:`extract_route_from_tlog`. The caller chooses the
|
|
tlog (typically a session-scoped fixture); this driver is
|
|
tlog-agnostic.
|
|
route_client: Configured C2 client. Built from env vars by the
|
|
production fixture; injected as a stub by unit tests.
|
|
tile_downloader: Configured C11 downloader. Same wiring rules.
|
|
descriptor_batcher: Configured C10 batcher; its rebuild path
|
|
owns the on-disk FAISS write (atomic via
|
|
:class:`Sha256Sidecar`).
|
|
descriptor_index_factory: Zero-arg callable that constructs a
|
|
FRESH descriptor index pointed at ``faiss_index_path``.
|
|
Production passes
|
|
``lambda: FaissDescriptorIndex.from_config(config)``; the
|
|
constructor auto-loads via
|
|
:meth:`FaissDescriptorIndex._load`, raising
|
|
:class:`IndexUnavailableError` on triple-consistency
|
|
failure (AC-3 / AC-6 verification).
|
|
cache_root: Root directory mounted on the named docker volume
|
|
that survives across pytest sessions.
|
|
tile_store_path: Where C6's :class:`TileStore` writes JPEG
|
|
blobs. Carried on the result for downstream tests.
|
|
faiss_index_path: Final ``.index`` blob path. Sidecars live at
|
|
``<faiss_index_path>.sha256`` + ``<faiss_index_path>.meta.json``.
|
|
flight_id: C11 download-journal key; defaults to a fresh UUID
|
|
so two fixture sessions never collide their journals.
|
|
sector_class: C11 / C6 sector classification. Defaults to
|
|
``ACTIVE_CONFLICT`` — Derkachi is an active-conflict
|
|
corridor; ``STABLE_REAR`` is for non-Ukraine clips.
|
|
zoom_level: Single Web-Mercator zoom level the fixture
|
|
populates. AZ-839 spec defaults to 18 to match
|
|
``seed_route.py`` ergonomics; tests override for speed.
|
|
region_size_meters: Per-waypoint coverage radius in metres.
|
|
``None`` falls back to
|
|
:attr:`RouteSpec.suggested_region_size_meters`.
|
|
retry_schedule_s: Pause cadence between transient retries.
|
|
Defaults to C11's documented ``_DEFAULT_BACKOFF_SCHEDULE_S``.
|
|
max_retry_attempts: Total :meth:`seed_route` attempts on
|
|
transient error before propagating (AC-5 — final
|
|
attempt's exception is propagated unchanged).
|
|
sleep: Test override for the retry pause; production passes
|
|
:func:`time.sleep`.
|
|
monotonic: Test override for elapsed-time measurement.
|
|
logger: Optional logger. Defaults to the module logger.
|
|
|
|
Returns:
|
|
:class:`PopulatedC6Cache` on success.
|
|
|
|
Raises:
|
|
RouteValidationError: Pre-emptive validation or HTTP 4xx —
|
|
propagated unchanged with original cause (AC-4).
|
|
RouteTerminalFailureError: ``mapsReady`` never reached or
|
|
terminal failure status — propagated unchanged (AC-4).
|
|
RouteTransientError: 5xx / network / timeout AFTER all retry
|
|
attempts have been exhausted (AC-5).
|
|
IndexUnavailableError: Triple-consistency check failed after
|
|
rebuild — sidecars are corrupt / mismatched (AC-3 / AC-6).
|
|
RuntimeError: C11 ``download_tiles_for_area`` returned a
|
|
non-success outcome OR C10 ``populate_descriptors``
|
|
returned :attr:`BatcherOutcome.FAILURE`.
|
|
|
|
Notes:
|
|
Cleanup behaviour (AC-7) — if any step raises after the
|
|
rebuild has begun writing sidecar files, the partial files
|
|
(.index, .sha256, .meta.json) are removed before the
|
|
exception propagates so a re-run starts from a clean slate.
|
|
Tile-store rows are NOT deleted on cleanup; the C11 download
|
|
journal owns idempotent re-run semantics.
|
|
"""
|
|
|
|
log = logger or _LOGGER
|
|
if max_retry_attempts < 1:
|
|
raise ValueError(
|
|
f"max_retry_attempts must be >= 1; got {max_retry_attempts}"
|
|
)
|
|
|
|
started_monotonic = monotonic()
|
|
effective_flight_id = flight_id or uuid4()
|
|
effective_region_size = float(
|
|
region_size_meters
|
|
if region_size_meters is not None
|
|
else route_spec.suggested_region_size_meters
|
|
)
|
|
if effective_region_size <= 0:
|
|
raise ValueError(
|
|
f"region_size_meters must be > 0; got {effective_region_size}"
|
|
)
|
|
if not route_spec.waypoints:
|
|
raise ValueError("route_spec.waypoints must be non-empty")
|
|
|
|
sidecar_paths = (
|
|
faiss_index_path,
|
|
Path(str(faiss_index_path) + SIDECAR_SUFFIX),
|
|
Path(str(faiss_index_path) + META_SUFFIX),
|
|
)
|
|
pre_existing_sidecar = {p: p.is_file() for p in sidecar_paths}
|
|
|
|
try:
|
|
seed_result = _seed_route_with_retry(
|
|
route_client=route_client,
|
|
spec=route_spec,
|
|
region_size_meters=effective_region_size,
|
|
zoom_level=zoom_level,
|
|
retry_schedule_s=retry_schedule_s,
|
|
max_retry_attempts=max_retry_attempts,
|
|
sleep=sleep,
|
|
logger=log,
|
|
)
|
|
|
|
bbox = _route_bbox(
|
|
waypoints=route_spec.waypoints,
|
|
region_size_meters=effective_region_size,
|
|
)
|
|
download_request = DownloadRequest(
|
|
flight_id=effective_flight_id,
|
|
bbox_min_lat=bbox[0],
|
|
bbox_min_lon=bbox[1],
|
|
bbox_max_lat=bbox[2],
|
|
bbox_max_lon=bbox[3],
|
|
zoom_levels=(int(zoom_level),),
|
|
sector_class=sector_class,
|
|
cache_root=cache_root,
|
|
)
|
|
download_report = tile_downloader.download_tiles_for_area(download_request)
|
|
if download_report.outcome not in {
|
|
DownloadOutcome.SUCCESS,
|
|
DownloadOutcome.IDEMPOTENT_NO_OP,
|
|
}:
|
|
raise RuntimeError(
|
|
"C11 download_tiles_for_area returned non-success outcome "
|
|
f"{download_report.outcome.value!r}; "
|
|
f"requested={download_report.tiles_requested} "
|
|
f"downloaded={download_report.tiles_downloaded} "
|
|
f"rejected_resolution={download_report.tiles_rejected_resolution} "
|
|
f"rejected_freshness={download_report.tiles_rejected_freshness}"
|
|
)
|
|
|
|
log.info(
|
|
"operator_pre_flight: tiles populated",
|
|
extra={
|
|
"kind": "operator_pre_flight.tiles_populated",
|
|
"kv": {
|
|
"route_id": str(seed_result.route_id),
|
|
"seeded_tile_count": seed_result.tile_count,
|
|
"downloaded_tiles": download_report.tiles_downloaded,
|
|
"request_hash": download_report.request_hash,
|
|
},
|
|
},
|
|
)
|
|
|
|
corpus_filter = CorpusFilter(
|
|
bbox=bbox,
|
|
zoom_levels=(int(zoom_level),),
|
|
sector_class=sector_class.value,
|
|
)
|
|
batcher_report = descriptor_batcher.populate_descriptors(corpus_filter)
|
|
if batcher_report.outcome is not BatcherOutcome.SUCCESS:
|
|
raise RuntimeError(
|
|
"C10 populate_descriptors returned FAILURE: "
|
|
f"{batcher_report.failure_reason}"
|
|
)
|
|
|
|
verifier_index = descriptor_index_factory()
|
|
log.debug(
|
|
"operator_pre_flight: sidecar coherence verified",
|
|
extra={
|
|
"kind": "operator_pre_flight.sidecar_verified",
|
|
"kv": {
|
|
"faiss_index_path": str(faiss_index_path),
|
|
"verifier_type": type(verifier_index).__name__,
|
|
},
|
|
},
|
|
)
|
|
|
|
elapsed_seconds = max(0.0, monotonic() - started_monotonic)
|
|
return PopulatedC6Cache(
|
|
cache_root=cache_root,
|
|
tile_store_path=tile_store_path,
|
|
faiss_index_path=faiss_index_path,
|
|
faiss_sidecar_sha256_path=sidecar_paths[1],
|
|
faiss_sidecar_meta_path=sidecar_paths[2],
|
|
route_spec=route_spec,
|
|
tile_count=batcher_report.tiles_consumed,
|
|
elapsed_seconds=elapsed_seconds,
|
|
)
|
|
except BaseException:
|
|
_cleanup_partial_sidecars(
|
|
sidecar_paths=sidecar_paths,
|
|
pre_existing=pre_existing_sidecar,
|
|
logger=log,
|
|
)
|
|
raise
|
|
|
|
|
|
def _seed_route_with_retry(
|
|
*,
|
|
route_client: SatelliteProviderRouteClient,
|
|
spec: RouteSpec,
|
|
region_size_meters: float,
|
|
zoom_level: int,
|
|
retry_schedule_s: tuple[float, ...],
|
|
max_retry_attempts: int,
|
|
sleep: Callable[[float], None],
|
|
logger: logging.Logger,
|
|
) -> Any:
|
|
"""Call ``seed_route`` with bounded transient retries (AC-5).
|
|
|
|
Validation / terminal-failure errors propagate IMMEDIATELY with
|
|
their original cause (AC-4 — no silent swallow). Only
|
|
:class:`RouteTransientError` triggers the retry ladder; the final
|
|
attempt's exception is re-raised unchanged so the caller sees
|
|
the actual transient signal that exhausted the budget.
|
|
"""
|
|
last_transient: RouteTransientError | None = None
|
|
for attempt in range(1, max_retry_attempts + 1):
|
|
try:
|
|
return route_client.seed_route(
|
|
spec,
|
|
region_size_meters=region_size_meters,
|
|
zoom_level=zoom_level,
|
|
)
|
|
except (RouteValidationError, RouteTerminalFailureError):
|
|
raise
|
|
except RouteTransientError as exc:
|
|
last_transient = exc
|
|
logger.warning(
|
|
"operator_pre_flight: route seed transient failure",
|
|
extra={
|
|
"kind": "operator_pre_flight.route_seed.transient",
|
|
"kv": {
|
|
"attempt": attempt,
|
|
"max_attempts": max_retry_attempts,
|
|
"exc": repr(exc),
|
|
},
|
|
},
|
|
)
|
|
if attempt >= max_retry_attempts:
|
|
raise
|
|
pause_s = retry_schedule_s[
|
|
min(attempt - 1, len(retry_schedule_s) - 1)
|
|
]
|
|
sleep(pause_s)
|
|
# Defensive — the loop body always returns or raises before this.
|
|
if last_transient is not None:
|
|
raise last_transient
|
|
raise RuntimeError(
|
|
"operator_pre_flight: seed_route loop exited without result"
|
|
)
|
|
|
|
|
|
def _route_bbox(
|
|
*,
|
|
waypoints: tuple[tuple[float, float], ...],
|
|
region_size_meters: float,
|
|
) -> tuple[float, float, float, float]:
|
|
"""Bounding box of every waypoint's coverage square.
|
|
|
|
Mirrors the local enumeration in
|
|
:func:`gps_denied_onboard.components.c11_tile_manager.route_client._enumerate_route_tile_coords`
|
|
by taking ``region_size_meters`` as the per-waypoint square edge
|
|
and unioning the lat/lon extents. The result is a single bbox
|
|
that the C11 :meth:`HttpTileDownloader.download_tiles_for_area`
|
|
Protocol consumes; C11 then runs the standard slippy-map
|
|
enumeration over that bbox at the requested zoom level.
|
|
|
|
Returns:
|
|
``(min_lat, min_lon, max_lat, max_lon)`` — matching
|
|
:class:`DownloadRequest`'s field order.
|
|
"""
|
|
|
|
import math
|
|
|
|
half = region_size_meters / 2.0
|
|
min_lat = float("inf")
|
|
max_lat = float("-inf")
|
|
min_lon = float("inf")
|
|
max_lon = float("-inf")
|
|
for lat_deg, lon_deg in waypoints:
|
|
lat_delta_deg = half / _METERS_PER_DEGREE_LAT
|
|
cos_lat = math.cos(math.radians(lat_deg))
|
|
if cos_lat <= 1e-9:
|
|
cos_lat = 1e-9
|
|
lon_delta_deg = half / (_METERS_PER_DEGREE_LAT * cos_lat)
|
|
min_lat = min(min_lat, lat_deg - lat_delta_deg)
|
|
max_lat = max(max_lat, lat_deg + lat_delta_deg)
|
|
min_lon = min(min_lon, lon_deg - lon_delta_deg)
|
|
max_lon = max(max_lon, lon_deg + lon_delta_deg)
|
|
|
|
if min_lat >= max_lat or min_lon >= max_lon:
|
|
raise ValueError(
|
|
"operator_pre_flight: degenerate bbox from route waypoints "
|
|
f"(min_lat={min_lat}, max_lat={max_lat}, "
|
|
f"min_lon={min_lon}, max_lon={max_lon})"
|
|
)
|
|
return (min_lat, min_lon, max_lat, max_lon)
|
|
|
|
|
|
def _cleanup_partial_sidecars(
|
|
*,
|
|
sidecar_paths: tuple[Path, ...],
|
|
pre_existing: dict[Path, bool],
|
|
logger: logging.Logger,
|
|
) -> None:
|
|
"""Remove sidecar files this driver may have produced.
|
|
|
|
Only files that did NOT exist when the driver started AND now
|
|
exist are removed — pre-existing files (a warm cache from a prior
|
|
run) are preserved. OS errors during cleanup are logged but do
|
|
not mask the original exception.
|
|
"""
|
|
|
|
for path in sidecar_paths:
|
|
if pre_existing.get(path, False):
|
|
continue
|
|
if not path.exists():
|
|
continue
|
|
try:
|
|
path.unlink()
|
|
logger.warning(
|
|
"operator_pre_flight: cleaned up partial sidecar",
|
|
extra={
|
|
"kind": "operator_pre_flight.cleanup.removed",
|
|
"kv": {"path": str(path)},
|
|
},
|
|
)
|
|
except OSError as exc:
|
|
logger.error(
|
|
"operator_pre_flight: cleanup unlink failed",
|
|
extra={
|
|
"kind": "operator_pre_flight.cleanup.failed",
|
|
"kv": {"path": str(path), "exc": repr(exc)},
|
|
},
|
|
)
|