mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 08:11:12 +00:00
[AZ-839] [AZ-835] operator_pre_flight_setup real fixture (E-AZ-835 C3)
Replace the placeholder operator_pre_flight_setup pytest fixture (the mkdir stub at tests/e2e/replay/conftest.py:293-310) with a real driver that wires C1 (AZ-836 RouteSpec) + C2 (AZ-838 SatelliteProviderRoute Client) + C11 (AZ-316 HttpTileDownloader) + C10 (AZ-322 Descriptor Batcher) end-to-end and yields a typed PopulatedC6Cache. AZ-306 FAISS sidecar triple-consistency is verified post-rebuild via a caller- supplied descriptor_index_factory; partial sidecars are cleaned up on failure (AC-7) while pre-existing warm-cache files are preserved. Algorithm lives in tests/e2e/replay/_operator_pre_flight.py with pure dependency injection so the AC-8 unit suite (11 tests covering happy / transient-retry / terminal-failure / validation-error / tamper-detection / cleanup-on-failure) runs against stubs and the AC-9 Tier-2 integration test runs the same algorithm against the real Jetson harness. The conftest fixture skip-gates on RUN_REPLAY _E2E + SATELLITE_PROVIDER_URL/API_KEY + BUILD_FAISS_INDEX + GPS_DENIED_OPERATOR_CONFIG_PATH and wires deps through the existing runtime_root factories. Supersedes AZ-777 Phase 3. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
+374
-16
@@ -290,21 +290,379 @@ def replay_runner(derkachi_replay_inputs: DerkachiReplayInputs) -> Any:
|
||||
return _run
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def operator_pre_flight_setup(tmp_path: Path) -> Iterator[Path]:
|
||||
"""Operator C12 pre-flight rehearsal stub.
|
||||
@pytest.fixture(scope="session")
|
||||
def operator_pre_flight_setup(
|
||||
derkachi_replay_inputs: DerkachiReplayInputs,
|
||||
tmp_path_factory: pytest.TempPathFactory,
|
||||
) -> Iterator["PopulatedC6Cache"]:
|
||||
"""Operator C12 pre-flight: real C1+C2+C11+C10 wiring (AZ-839 / Epic AZ-835 C3).
|
||||
|
||||
Per AZ-404's spec this fixture should run the operator's full
|
||||
C10/C11/C12 pre-flight against a ``mock-suite-sat-service``
|
||||
fixture and yield the populated cache directory. The current
|
||||
``tests/fixtures/mock-suite-sat-service`` is a bootstrap stub
|
||||
(only ``GET /healthz`` per its README) — the full D-PROJ-2
|
||||
contract is not implemented. Until that ships, AC-8 (operator
|
||||
workflow rehearsal) is skipped at the test level; this fixture
|
||||
yields a placeholder cache directory so test bodies that
|
||||
request it can fail-fast with a documented reason rather than a
|
||||
surprise ImportError.
|
||||
Replaces the AZ-404 placeholder. Drives the operator-side
|
||||
pre-flight pipeline end-to-end and yields the populated cache
|
||||
so AC-8 (operator workflow rehearsal) and the AZ-840 e2e
|
||||
orchestrator test can consume it.
|
||||
|
||||
Skip gates (in evaluation order — first match wins):
|
||||
|
||||
* ``RUN_REPLAY_E2E`` not in ``{1, true, yes, on}`` — same as
|
||||
every other heavy test in this directory.
|
||||
* ``SATELLITE_PROVIDER_URL`` / ``SATELLITE_PROVIDER_API_KEY``
|
||||
missing — the C2 route client cannot reach the parent suite.
|
||||
* ``BUILD_FAISS_INDEX`` not ON — the C6 ``DescriptorIndex``
|
||||
runtime is gated by the env flag (``storage_factory.py``).
|
||||
* ``GPS_DENIED_OPERATOR_CONFIG_PATH`` missing OR points at a
|
||||
config that does not register every component this fixture
|
||||
needs (c6_tile_cache + c7_inference + c10_provisioning +
|
||||
c11_tile_manager) — the wiring would fail later with a less
|
||||
readable error.
|
||||
|
||||
See ``tests/e2e/replay/_operator_pre_flight.py::populate_c6_from_route``
|
||||
for the algorithm; this fixture only owns the
|
||||
runtime-factory wiring + skip gates.
|
||||
"""
|
||||
cache_dir = tmp_path / "operator_cache"
|
||||
cache_dir.mkdir()
|
||||
yield cache_dir
|
||||
|
||||
skip_reason = _operator_pre_flight_skip_reason()
|
||||
if skip_reason is not None:
|
||||
pytest.skip(skip_reason)
|
||||
|
||||
yield from _build_operator_pre_flight_cache(
|
||||
derkachi_replay_inputs=derkachi_replay_inputs,
|
||||
tmp_path_factory=tmp_path_factory,
|
||||
)
|
||||
|
||||
|
||||
def _operator_pre_flight_skip_reason() -> str | None:
|
||||
"""Return a SKIP reason string when env / build flags are not viable.
|
||||
|
||||
Centralised so the conditions stay testable + documented in one
|
||||
place. Returns ``None`` when the fixture is allowed to run.
|
||||
"""
|
||||
|
||||
if os.environ.get("RUN_REPLAY_E2E", "").strip().lower() not in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}:
|
||||
return "AZ-839 operator_pre_flight_setup gated by RUN_REPLAY_E2E=1"
|
||||
sp_url = os.environ.get("SATELLITE_PROVIDER_URL", "").strip()
|
||||
sp_jwt = os.environ.get("SATELLITE_PROVIDER_API_KEY", "").strip()
|
||||
if not sp_url:
|
||||
return (
|
||||
"AZ-839 operator_pre_flight_setup requires SATELLITE_PROVIDER_URL "
|
||||
"(e.g. https://satellite-provider:8080)"
|
||||
)
|
||||
if not sp_jwt:
|
||||
return (
|
||||
"AZ-839 operator_pre_flight_setup requires SATELLITE_PROVIDER_API_KEY "
|
||||
"(Bearer JWT for the parent-suite Route + Inventory APIs)"
|
||||
)
|
||||
if os.environ.get("BUILD_FAISS_INDEX", "").strip().lower() not in {
|
||||
"on",
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
}:
|
||||
return (
|
||||
"AZ-839 operator_pre_flight_setup requires BUILD_FAISS_INDEX=ON "
|
||||
"(the C6 FaissDescriptorIndex runtime is build-flag-gated per "
|
||||
"runtime_root.storage_factory)"
|
||||
)
|
||||
if not os.environ.get("GPS_DENIED_OPERATOR_CONFIG_PATH", "").strip():
|
||||
return (
|
||||
"AZ-839 operator_pre_flight_setup requires "
|
||||
"GPS_DENIED_OPERATOR_CONFIG_PATH pointing at a YAML that "
|
||||
"registers c6_tile_cache + c7_inference + c10_provisioning + "
|
||||
"c11_tile_manager blocks (Jetson e2e harness sets this; "
|
||||
"dev macOS does not)"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _build_operator_pre_flight_cache(
|
||||
*,
|
||||
derkachi_replay_inputs: DerkachiReplayInputs,
|
||||
tmp_path_factory: pytest.TempPathFactory,
|
||||
) -> Iterator["PopulatedC6Cache"]:
|
||||
"""Wire the operator-side runtime graph and run the AZ-839 driver.
|
||||
|
||||
All imports of heavy collaborators (httpx, runtime_root factories,
|
||||
c10/c11/c6 modules) live inside this function so collection on
|
||||
dev macOS without the e2e env stays cheap (the SKIP path returns
|
||||
before reaching this body).
|
||||
|
||||
Raises:
|
||||
pytest.skip.Exception: when an env-flagged dependency
|
||||
(e.g. ``c10_provisioning`` config block, route extraction)
|
||||
cannot be satisfied and re-running with the right env is
|
||||
the right next step.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
|
||||
from gps_denied_onboard.clock.wall_clock import WallClock
|
||||
from gps_denied_onboard.config.loader import load_config
|
||||
from gps_denied_onboard.replay_input.tlog_route import (
|
||||
extract_route_from_tlog,
|
||||
)
|
||||
from gps_denied_onboard.runtime_root.c10_factory import (
|
||||
build_descriptor_batcher,
|
||||
build_engine_compiler,
|
||||
)
|
||||
from gps_denied_onboard.runtime_root.c11_factory import (
|
||||
build_tile_downloader,
|
||||
)
|
||||
from gps_denied_onboard.runtime_root.storage_factory import (
|
||||
build_descriptor_index,
|
||||
build_tile_metadata_store,
|
||||
build_tile_store,
|
||||
)
|
||||
|
||||
from tests.e2e.replay._operator_pre_flight import (
|
||||
populate_c6_from_route,
|
||||
)
|
||||
|
||||
config_path = Path(os.environ["GPS_DENIED_OPERATOR_CONFIG_PATH"])
|
||||
if not config_path.is_file():
|
||||
pytest.skip(
|
||||
f"GPS_DENIED_OPERATOR_CONFIG_PATH points at a non-file: {config_path}"
|
||||
)
|
||||
config = load_config(os.environ, paths=[config_path])
|
||||
|
||||
cache_root = tmp_path_factory.mktemp("operator_pre_flight_cache")
|
||||
tile_store_path = cache_root / "tile_store"
|
||||
tile_store_path.mkdir(parents=True, exist_ok=True)
|
||||
faiss_index_path = cache_root / "descriptor.index"
|
||||
|
||||
route_spec = extract_route_from_tlog(
|
||||
derkachi_replay_inputs.tlog_path,
|
||||
max_waypoints=10,
|
||||
)
|
||||
|
||||
sp_url = os.environ["SATELLITE_PROVIDER_URL"].strip()
|
||||
sp_jwt = os.environ["SATELLITE_PROVIDER_API_KEY"].strip()
|
||||
tls_insecure = os.environ.get(
|
||||
"SATELLITE_PROVIDER_TLS_INSECURE", ""
|
||||
).strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
from gps_denied_onboard.components.c11_tile_manager.route_client import (
|
||||
SatelliteProviderRouteClient,
|
||||
)
|
||||
|
||||
route_client = SatelliteProviderRouteClient(
|
||||
base_url=sp_url,
|
||||
jwt=sp_jwt,
|
||||
tls_insecure=tls_insecure,
|
||||
)
|
||||
|
||||
tile_store = build_tile_store(config)
|
||||
tile_metadata_store = build_tile_metadata_store(config)
|
||||
descriptor_index = build_descriptor_index(config)
|
||||
|
||||
httpx_client = httpx.Client(
|
||||
verify=not tls_insecure,
|
||||
timeout=httpx.Timeout(30.0),
|
||||
headers={"Authorization": f"Bearer {sp_jwt}"},
|
||||
)
|
||||
tile_downloader = build_tile_downloader(
|
||||
config,
|
||||
http_client=httpx_client,
|
||||
tile_store=tile_store,
|
||||
tile_metadata_store=tile_metadata_store,
|
||||
budget_enforcer=tile_store,
|
||||
)
|
||||
|
||||
clock = WallClock()
|
||||
engine_compiler = build_engine_compiler(config)
|
||||
backbone_embedder = _build_replay_backbone_embedder(
|
||||
config=config,
|
||||
engine_compiler=engine_compiler,
|
||||
cache_root=cache_root,
|
||||
)
|
||||
|
||||
descriptor_batcher = build_descriptor_batcher(
|
||||
config,
|
||||
backbone_embedder=backbone_embedder,
|
||||
tile_metadata_store=tile_metadata_store,
|
||||
tile_store=tile_store,
|
||||
descriptor_index=descriptor_index,
|
||||
clock=clock,
|
||||
)
|
||||
|
||||
def _descriptor_index_factory() -> Any:
|
||||
from gps_denied_onboard.components.c6_tile_cache.faiss_descriptor_index import ( # noqa: E501
|
||||
FaissDescriptorIndex,
|
||||
)
|
||||
from gps_denied_onboard.helpers.sha256_sidecar import Sha256Sidecar
|
||||
from gps_denied_onboard.logging import get_logger
|
||||
|
||||
return FaissDescriptorIndex(
|
||||
index_path=faiss_index_path,
|
||||
sidecar=Sha256Sidecar(),
|
||||
logger=get_logger("c6_tile_cache.faiss_descriptor_index"),
|
||||
)
|
||||
|
||||
populated = populate_c6_from_route(
|
||||
route_spec=route_spec,
|
||||
route_client=route_client,
|
||||
tile_downloader=tile_downloader,
|
||||
descriptor_batcher=descriptor_batcher,
|
||||
descriptor_index_factory=_descriptor_index_factory,
|
||||
cache_root=cache_root,
|
||||
tile_store_path=tile_store_path,
|
||||
faiss_index_path=faiss_index_path,
|
||||
)
|
||||
try:
|
||||
yield populated
|
||||
finally:
|
||||
httpx_client.close()
|
||||
|
||||
|
||||
def _build_replay_backbone_embedder(
|
||||
*,
|
||||
config: Any,
|
||||
engine_compiler: Any,
|
||||
cache_root: Path,
|
||||
) -> Any:
|
||||
"""Compile the first configured backbone and wrap it for the AZ-322 batcher.
|
||||
|
||||
The replay-mode operator binary does not exist yet (tracked under
|
||||
Epic AZ-835); until it does, this fixture performs the wiring
|
||||
inline. The path is deliberately the production path:
|
||||
|
||||
* :func:`runtime_root.c10_factory.build_engine_compiler` builds
|
||||
the AZ-321 :class:`EngineCompiler`.
|
||||
* The first backbone in
|
||||
``config.components['c10_provisioning'].backbones`` is
|
||||
compiled to an engine cache entry; the AZ-297
|
||||
:class:`InferenceRuntime` deserialises it into the
|
||||
:class:`EngineHandle` the embedder consumes.
|
||||
* The tile decoder converts a C6 :class:`TilePixelHandle`
|
||||
(mmap of JPEG bytes) to the ``np.float32`` tensor shape the
|
||||
backbone expects via OpenCV — the same primitive the C7
|
||||
pre-processor uses.
|
||||
|
||||
Tests / dev workstations without a backbone ONNX or a working
|
||||
:class:`InferenceRuntime` fail this function, which surfaces as
|
||||
a fixture error (deliberate — the SKIP gate above is meant to
|
||||
catch the env-mismatch case before we get here).
|
||||
"""
|
||||
|
||||
from gps_denied_onboard._types.inference import PrecisionMode
|
||||
from gps_denied_onboard._types.manifests import HostCapabilities
|
||||
from gps_denied_onboard.components.c10_provisioning.c7_engine_embedder import (
|
||||
C7EngineBackboneEmbedder,
|
||||
)
|
||||
from gps_denied_onboard.components.c10_provisioning.engine_compiler import (
|
||||
EngineCompileRequest,
|
||||
)
|
||||
from gps_denied_onboard.logging import get_logger
|
||||
from gps_denied_onboard.runtime_root.c10_factory import (
|
||||
build_backbone_specs,
|
||||
)
|
||||
from gps_denied_onboard.runtime_root.inference_factory import (
|
||||
build_inference_runtime,
|
||||
)
|
||||
|
||||
backbones = build_backbone_specs(config)
|
||||
if not backbones:
|
||||
pytest.skip(
|
||||
"AZ-839 operator_pre_flight_setup: config has no "
|
||||
"c10_provisioning.backbones entries — the e2e harness "
|
||||
"config must declare at least one backbone (typically "
|
||||
"DINOv2-VPR or NetVLAD per AZ-321)."
|
||||
)
|
||||
|
||||
host = HostCapabilities(
|
||||
gpu_name="replay-e2e",
|
||||
cuda_compute_capability=(0, 0),
|
||||
cuda_runtime_version="0.0",
|
||||
tensorrt_version="0.0",
|
||||
host_arch="unknown",
|
||||
host_os="linux",
|
||||
driver_version="unknown",
|
||||
)
|
||||
engine_cache_root = cache_root / "engines"
|
||||
engine_cache_root.mkdir(parents=True, exist_ok=True)
|
||||
request = EngineCompileRequest(
|
||||
backbones=backbones,
|
||||
calibration_path=None,
|
||||
cache_root=engine_cache_root,
|
||||
precision=PrecisionMode.FP16,
|
||||
host=host,
|
||||
workspace_mb=int(
|
||||
config.components["c10_provisioning"].workspace_mb
|
||||
),
|
||||
)
|
||||
results = engine_compiler.compile_engines_for_corpus(request)
|
||||
if not results:
|
||||
pytest.skip(
|
||||
"AZ-839 operator_pre_flight_setup: engine compiler returned "
|
||||
"empty results — corpus failed to compile."
|
||||
)
|
||||
first = results[0]
|
||||
spec = backbones[0]
|
||||
inference_runtime = build_inference_runtime(config)
|
||||
engine_handle = inference_runtime.deserialize_engine(first.entry)
|
||||
descriptor_dim = _resolve_replay_descriptor_dim(config, spec)
|
||||
return C7EngineBackboneEmbedder(
|
||||
inference_runtime=inference_runtime,
|
||||
engine_handle=engine_handle,
|
||||
input_name=spec.input_name,
|
||||
output_name="descriptor",
|
||||
descriptor_dim=descriptor_dim,
|
||||
tile_decoder=_default_tile_decoder,
|
||||
logger=get_logger("c10_provisioning.replay_backbone_embedder"),
|
||||
)
|
||||
|
||||
|
||||
def _resolve_replay_descriptor_dim(config: Any, spec: Any) -> int:
|
||||
"""Resolve the descriptor output dimension for the AZ-839 NetVLAD baseline.
|
||||
|
||||
The AZ-839 task spec pins the C2 backbone at NetVLAD (per
|
||||
``c2_vpr/config.py:67``); :class:`C2VprConfig.netvlad_descriptor_dim`
|
||||
is the canonical source. We read the c2_vpr block and fall back
|
||||
to the architecture default ``4096`` when the block is absent so
|
||||
operators on a hand-rolled YAML still get a coherent dim. Other
|
||||
backbones (UltraVPR=512, MegaLoc=2048, MixVPR=4096) require
|
||||
swapping this resolver — out of scope for AZ-839.
|
||||
"""
|
||||
|
||||
block = config.components.get("c2_vpr") if config.components else None
|
||||
if block is not None and getattr(block, "strategy", "") == "net_vlad":
|
||||
return int(getattr(block, "netvlad_descriptor_dim", 4096))
|
||||
pytest.skip(
|
||||
"AZ-839 operator_pre_flight_setup: descriptor_dim resolver "
|
||||
f"only supports c2_vpr.strategy='net_vlad'; got "
|
||||
f"{getattr(block, 'strategy', '<missing>')!r} on backbone "
|
||||
f"{spec.model_name!r}. See AZ-839 spec § Out of scope."
|
||||
)
|
||||
raise AssertionError("unreachable: pytest.skip raises")
|
||||
|
||||
|
||||
def _default_tile_decoder(handle: Any) -> Any:
|
||||
"""Decode a C6 :class:`TilePixelHandle` (JPEG mmap) to a CHW float32 tensor.
|
||||
|
||||
The handle exposes ``read_bytes()`` (or context-manager + ``read``);
|
||||
we prefer the simpler ``read_bytes()`` path. OpenCV imdecode
|
||||
yields HWC-uint8-BGR; the embedder expects float32-CHW-RGB
|
||||
normalised to ``[0, 1]`` (DINOv2-VPR + NetVLAD share this layout).
|
||||
Imports are lazy — no OpenCV penalty when this module is imported
|
||||
on dev macOS.
|
||||
"""
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
if hasattr(handle, "read_bytes"):
|
||||
blob = handle.read_bytes()
|
||||
else:
|
||||
with handle as opened:
|
||||
blob = opened.read()
|
||||
arr = np.frombuffer(blob, dtype=np.uint8)
|
||||
bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||||
if bgr is None:
|
||||
raise RuntimeError("cv2.imdecode returned None for tile handle")
|
||||
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
|
||||
chw = np.transpose(rgb, (2, 0, 1)).astype(np.float32) / 255.0
|
||||
return chw
|
||||
|
||||
Reference in New Issue
Block a user