mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-23 00:21:12 +00:00
235eb4549e
Closes cumulative review batches 49-51 Finding F1 (Medium / Maintainability) -- the 7-way duplication of _assert_engine_output_dim across c2_vpr secondary VPR strategy modules. Add c2-internal helper assert_engine_output_dim(inference_runtime, handle, preprocessor, descriptor_dim, *, output_key='embedding', input_key='input') in src/gps_denied_onboard/components/c2_vpr/ _engine_dim_assertion.py. The helper runs a zero-init dry-run inference at preprocessor.input_shape() and asserts the engine output dict carries (1, descriptor_dim) under output_key. Raises gps_denied_onboard.config.schema.ConfigError on mismatch (preserving the prior error envelope and message wording byte-identically). Migrate 7 strategy modules (ultra_vpr, net_vlad, mega_loc, mix_vpr, sela_vpr, eigen_places, salad) to import the helper and delete the local _assert_engine_output_dim definitions + their inline 'AZ-527 (planned)' comments. NetVLAD is the only call site that overrides output_key='vlad_descriptor'; the other 6 explicitly pass output_key=_OUTPUT_KEY + input_key=_ENGINE_INPUT_KEY (matching helper defaults but documenting strategy contract at the call site). Add tests/unit/c2_vpr/test_az527_engine_dim_assertion.py (14 tests, AAA pattern, Protocol-conforming fakes) covering AC-1..AC-4: helper signature; wrong shape raises ConfigError naming both dims; missing output key raises ConfigError naming the missing key; AST-walk regression guard for stray definitions outside the helper module (modeled on AZ-526's test_ac4_az526_no_module_level_iso_ts_from_clock_outside_helper); import-grep regression guard verifying all 7 strategy modules import the helper. AC-5 (existing AZ-337/338/339/340 AC-6 sub-tests pass unmodified) is exercised transitively: c2_vpr/ full directory 230/230 PASS, no test file modified outside the new test_az527_*. AC-6 (AZ-270 + AZ-507 layer lints) verified by tests/unit/test_az270_compose_root.py 8/8 PASS. Code-review verdict: PASS (zero findings). Ruff clean. Co-authored-by: Cursor <cursoragent@cursor.com>
434 lines
15 KiB
Python
434 lines
15 KiB
Python
"""``MegaLocStrategy`` — C2 secondary VprStrategy for IT-12 (AZ-339).
|
|
|
|
MegaLoc is one of two secondary backbones (alongside :class:`MixVprStrategy`)
|
|
shipped exclusively in the research binary for the IT-12 comparative-study
|
|
matrix (``components/02_c2_vpr/description.md`` § 1 + § 5). Per ADR-002,
|
|
``BUILD_VPR_MEGALOC`` is ON for the research binary and replay-cli, OFF
|
|
for the airborne and operator-tooling binaries — selecting ``mega_loc``
|
|
on a binary without the flag fails fast at composition-root time via
|
|
:class:`StrategyNotAvailableError` (not at first frame).
|
|
|
|
The strategy runs on the C7 TensorRT runtime (AZ-298), or the ONNX-Runtime
|
|
fallback (AZ-299), via the local :class:`InferenceRuntimeCut` (AZ-507).
|
|
Engine output key is ``"embedding"`` and the strategy applies single-stage
|
|
global L2 normalisation (no NetVLAD-style intra-cluster step). Retrieval
|
|
delegates to :class:`FaissBridge` (AZ-341).
|
|
|
|
Architecture-registry differences from :class:`NetVladStrategy`:
|
|
|
|
MegaLoc consumes a pre-compiled ``.trt`` engine produced by C10's engine
|
|
compiler (AZ-321) — there is no PyTorch ``nn.Module`` to register, so
|
|
the module does NOT expose ``MODEL_NAME`` / ``architecture_factory``.
|
|
:func:`gps_denied_onboard.runtime_root.vpr_factory._register_strategy_architecture`
|
|
no-ops for this strategy.
|
|
|
|
Engine load happens in :func:`create` (NOT at first frame) so the
|
|
engine-output-shape assertion (AC-6) surfaces at startup, not after
|
|
takeoff.
|
|
|
|
Per-frame :meth:`embed_query` pipeline:
|
|
|
|
1. ``preprocessor.preprocess(frame, calibration)`` ->
|
|
``(1, 3, 322, 322)`` FP16 NCHW ndarray.
|
|
2. ``inference_runtime.infer(handle, {"input": tensor})`` ->
|
|
``{"embedding": (1, 2048) FP16 ndarray}``.
|
|
3. ``normaliser.l2_normalise(raw[0])`` -> global L2 (single-stage).
|
|
4. Return :class:`VprQuery` with ``frame_id``, normalised embedding,
|
|
produced_at monotonic ns.
|
|
|
|
Error envelope: every method raises only members of :class:`VprError`.
|
|
``RuntimeError`` from the backbone forward -> rewrapped to
|
|
:class:`VprBackboneError`; :class:`VprPreprocessError` from the
|
|
preprocessor propagates unchanged.
|
|
|
|
Retrieval is a single-line delegation to :class:`FaissBridge.retrieve`;
|
|
see AZ-341 AC-10.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import TYPE_CHECKING, Final, Literal
|
|
|
|
import numpy as np
|
|
|
|
from gps_denied_onboard._types.inference import (
|
|
BuildConfig,
|
|
EngineHandle,
|
|
PrecisionMode,
|
|
)
|
|
from gps_denied_onboard._types.vpr import VprQuery, VprResult
|
|
from gps_denied_onboard.clock import Clock
|
|
from gps_denied_onboard.components.c2_vpr._engine_dim_assertion import (
|
|
assert_engine_output_dim,
|
|
)
|
|
from gps_denied_onboard.components.c2_vpr._faiss_bridge import FaissBridge
|
|
from gps_denied_onboard.components.c2_vpr._preprocessor_mega_loc import (
|
|
MegaLocBackbonePreprocessor,
|
|
)
|
|
from gps_denied_onboard.components.c2_vpr.descriptor_index_cut import (
|
|
DescriptorIndexCut,
|
|
)
|
|
from gps_denied_onboard.components.c2_vpr.errors import (
|
|
VprBackboneError,
|
|
VprPreprocessError,
|
|
)
|
|
from gps_denied_onboard.components.c2_vpr.inference_runtime_cut import (
|
|
InferenceRuntimeCut,
|
|
)
|
|
from gps_denied_onboard.config.schema import ConfigError
|
|
from gps_denied_onboard.fdr_client import EnqueueResult, FdrClient
|
|
from gps_denied_onboard.fdr_client.records import (
|
|
CURRENT_SCHEMA_VERSION,
|
|
FdrRecord,
|
|
)
|
|
from gps_denied_onboard.helpers.descriptor_normaliser import DescriptorNormaliser
|
|
from gps_denied_onboard.helpers.iso_timestamps import (
|
|
iso_ts_from_clock as _iso_ts_from_clock,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from gps_denied_onboard._types.calibration import CameraCalibration
|
|
from gps_denied_onboard._types.nav import NavCameraFrame
|
|
from gps_denied_onboard.config.schema import Config
|
|
|
|
__all__ = ["DESCRIPTOR_DIM", "MegaLocStrategy", "create"]
|
|
|
|
|
|
# MegaLoc's published embedding dimension (D=2048) per the upstream
|
|
# research code drop. Engine output shape is asserted at create() time
|
|
# against this constant — changing it would silently break AC-2 /
|
|
# AC-4 / AC-5 / AC-6.
|
|
DESCRIPTOR_DIM: Final[int] = 2048
|
|
|
|
_BACKBONE_LABEL: Final[Literal["mega_loc"]] = "mega_loc"
|
|
_COMPONENT: Final[str] = "c2_vpr"
|
|
_OUTPUT_KEY: Final[str] = "embedding"
|
|
_ENGINE_INPUT_KEY: Final[str] = "input"
|
|
|
|
_ALLOWED_RUNTIME_LABELS: Final[frozenset[str]] = frozenset(
|
|
{"tensorrt", "onnx_trt_ep"}
|
|
)
|
|
|
|
_LOG_KIND_READY: Final[str] = "c2.vpr.ready"
|
|
_LOG_KIND_BACKBONE_ERROR: Final[str] = "c2.vpr.backbone_error"
|
|
_LOG_KIND_PREPROCESS_ERROR: Final[str] = "c2.vpr.preprocess_error"
|
|
_LOG_KIND_FDR_OVERRUN: Final[str] = "c2.vpr.fdr_overrun"
|
|
|
|
_FDR_KIND_EMBED: Final[str] = "vpr.embed_query"
|
|
_FDR_KIND_BACKBONE_ERROR: Final[str] = "vpr.backbone_error"
|
|
_FDR_KIND_PREPROCESS_ERROR: Final[str] = "vpr.preprocess_error"
|
|
|
|
|
|
class MegaLocStrategy:
|
|
"""C2 secondary VprStrategy backed by a TRT MegaLoc engine.
|
|
|
|
See module docstring for the engine-loading + per-frame pipeline.
|
|
Stateless across frames (INV-2); single-threaded per instance
|
|
(INV-1, per AZ-336).
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
inference_runtime: InferenceRuntimeCut,
|
|
engine_handle: EngineHandle,
|
|
descriptor_index: DescriptorIndexCut,
|
|
preprocessor: MegaLocBackbonePreprocessor,
|
|
normaliser: DescriptorNormaliser,
|
|
faiss_bridge: FaissBridge,
|
|
fdr_client: FdrClient,
|
|
clock: Clock,
|
|
logger: logging.Logger,
|
|
descriptor_dim: int = DESCRIPTOR_DIM,
|
|
) -> None:
|
|
if descriptor_dim < 1:
|
|
raise ValueError(
|
|
f"MegaLocStrategy.descriptor_dim must be >= 1; "
|
|
f"got {descriptor_dim}"
|
|
)
|
|
self._inference_runtime = inference_runtime
|
|
self._engine_handle = engine_handle
|
|
self._descriptor_index = descriptor_index
|
|
self._preprocessor = preprocessor
|
|
self._normaliser = normaliser
|
|
self._faiss_bridge = faiss_bridge
|
|
self._fdr_client = fdr_client
|
|
self._clock = clock
|
|
self._logger = logger
|
|
self._descriptor_dim = descriptor_dim
|
|
|
|
def embed_query(
|
|
self,
|
|
frame: NavCameraFrame,
|
|
calibration: CameraCalibration,
|
|
) -> VprQuery:
|
|
try:
|
|
tensor = self._preprocessor.preprocess(frame, calibration)
|
|
except VprPreprocessError as exc:
|
|
self._emit_preprocess_error(frame, exc)
|
|
raise
|
|
|
|
ns_start = self._clock.monotonic_ns()
|
|
try:
|
|
outputs = self._inference_runtime.infer(
|
|
self._engine_handle, {_ENGINE_INPUT_KEY: tensor}
|
|
)
|
|
except Exception as exc:
|
|
wrapped = self._wrap_backbone_error(frame, exc)
|
|
raise wrapped from exc
|
|
ns_end = self._clock.monotonic_ns()
|
|
latency_us = max(1, (ns_end - ns_start) // 1_000)
|
|
|
|
if _OUTPUT_KEY not in outputs:
|
|
err = VprBackboneError(
|
|
f"MegaLoc forward returned no {_OUTPUT_KEY!r} key; "
|
|
f"got {sorted(outputs.keys())!r}"
|
|
)
|
|
self._emit_backbone_error(frame, err)
|
|
raise err
|
|
|
|
raw = np.asarray(outputs[_OUTPUT_KEY])
|
|
if (
|
|
raw.ndim != 2
|
|
or raw.shape[0] != 1
|
|
or raw.shape[1] != self._descriptor_dim
|
|
):
|
|
err = VprBackboneError(
|
|
f"MegaLoc forward returned shape {raw.shape}; "
|
|
f"expected (1, {self._descriptor_dim})"
|
|
)
|
|
self._emit_backbone_error(frame, err)
|
|
raise err
|
|
|
|
flat = np.ascontiguousarray(raw[0], dtype=np.float16)
|
|
normalised = self._normaliser.l2_normalise(flat)
|
|
|
|
self._emit_embed_record(
|
|
frame_id=int(frame.frame_id), latency_us=int(latency_us)
|
|
)
|
|
|
|
return VprQuery(
|
|
frame_id=int(frame.frame_id),
|
|
embedding=normalised,
|
|
produced_at=ns_end,
|
|
)
|
|
|
|
def retrieve_topk(self, query: VprQuery, k: int) -> VprResult:
|
|
return self._faiss_bridge.retrieve(
|
|
query, k, backbone_label=_BACKBONE_LABEL
|
|
)
|
|
|
|
def descriptor_dim(self) -> int:
|
|
return self._descriptor_dim
|
|
|
|
def _wrap_backbone_error(
|
|
self, frame: NavCameraFrame, exc: BaseException
|
|
) -> VprBackboneError:
|
|
wrapped = VprBackboneError(
|
|
f"MegaLoc forward raised {type(exc).__name__}: {exc}"
|
|
)
|
|
self._emit_backbone_error(frame, wrapped)
|
|
return wrapped
|
|
|
|
def _emit_embed_record(self, *, frame_id: int, latency_us: int) -> None:
|
|
record = FdrRecord(
|
|
schema_version=CURRENT_SCHEMA_VERSION,
|
|
ts=_iso_ts_from_clock(self._clock),
|
|
producer_id=self._fdr_client.producer_id,
|
|
kind=_FDR_KIND_EMBED,
|
|
payload={
|
|
"frame_id": frame_id,
|
|
"backbone_label": _BACKBONE_LABEL,
|
|
"descriptor_dim": self._descriptor_dim,
|
|
"latency_us": latency_us,
|
|
},
|
|
)
|
|
result = self._fdr_client.enqueue(record)
|
|
if result == EnqueueResult.OVERRUN:
|
|
self._logger.warning(
|
|
"FDR enqueue dropped vpr.embed_query record (buffer overrun)",
|
|
extra={
|
|
"component": _COMPONENT,
|
|
"kind": _LOG_KIND_FDR_OVERRUN,
|
|
"kv": {
|
|
"frame_id": frame_id,
|
|
"backbone_label": _BACKBONE_LABEL,
|
|
},
|
|
},
|
|
)
|
|
|
|
def _emit_backbone_error(
|
|
self, frame: NavCameraFrame, error: BaseException
|
|
) -> None:
|
|
frame_id = int(frame.frame_id)
|
|
msg = f"MegaLoc backbone error: {error}"
|
|
self._logger.error(
|
|
msg,
|
|
extra={
|
|
"component": _COMPONENT,
|
|
"kind": _LOG_KIND_BACKBONE_ERROR,
|
|
"kv": {
|
|
"frame_id": frame_id,
|
|
"backbone_label": _BACKBONE_LABEL,
|
|
"error_type": type(error).__name__,
|
|
},
|
|
},
|
|
)
|
|
self._fdr_client.enqueue(
|
|
FdrRecord(
|
|
schema_version=CURRENT_SCHEMA_VERSION,
|
|
ts=_iso_ts_from_clock(self._clock),
|
|
producer_id=self._fdr_client.producer_id,
|
|
kind=_FDR_KIND_BACKBONE_ERROR,
|
|
payload={
|
|
"frame_id": frame_id,
|
|
"backbone_label": _BACKBONE_LABEL,
|
|
"error_type": type(error).__name__,
|
|
"error_message": str(error)[:512],
|
|
},
|
|
)
|
|
)
|
|
|
|
def _emit_preprocess_error(
|
|
self, frame: NavCameraFrame, error: BaseException
|
|
) -> None:
|
|
frame_id = int(frame.frame_id)
|
|
msg = f"MegaLoc preprocess error: {error}"
|
|
self._logger.error(
|
|
msg,
|
|
extra={
|
|
"component": _COMPONENT,
|
|
"kind": _LOG_KIND_PREPROCESS_ERROR,
|
|
"kv": {
|
|
"frame_id": frame_id,
|
|
"backbone_label": _BACKBONE_LABEL,
|
|
"error_type": type(error).__name__,
|
|
},
|
|
},
|
|
)
|
|
self._fdr_client.enqueue(
|
|
FdrRecord(
|
|
schema_version=CURRENT_SCHEMA_VERSION,
|
|
ts=_iso_ts_from_clock(self._clock),
|
|
producer_id=self._fdr_client.producer_id,
|
|
kind=_FDR_KIND_PREPROCESS_ERROR,
|
|
payload={
|
|
"frame_id": frame_id,
|
|
"backbone_label": _BACKBONE_LABEL,
|
|
"error_type": type(error).__name__,
|
|
"error_message": str(error)[:512],
|
|
},
|
|
)
|
|
)
|
|
|
|
|
|
def _build_trt_build_config() -> BuildConfig:
|
|
return BuildConfig(
|
|
precision=PrecisionMode.FP16,
|
|
workspace_mb=0,
|
|
calibration_dataset=None,
|
|
optimization_profiles=(),
|
|
)
|
|
|
|
|
|
def create(
|
|
config: Config,
|
|
*,
|
|
descriptor_index: DescriptorIndexCut,
|
|
inference_runtime: InferenceRuntimeCut,
|
|
fdr_client: FdrClient | None = None,
|
|
clock: Clock | None = None,
|
|
logger: logging.Logger | None = None,
|
|
) -> MegaLocStrategy:
|
|
"""Module-level factory consumed by :func:`build_vpr_strategy`.
|
|
|
|
MegaLoc is unselectable when the C7 TRT / ONNX-RT runtimes are
|
|
excluded — ``current_runtime_label()`` MUST be one of
|
|
``{"tensorrt", "onnx_trt_ep"}``; ``"pytorch_fp16"`` is rejected
|
|
with :class:`ConfigError` at composition time.
|
|
|
|
Engine output shape is asserted at create time via a single
|
|
dry-run inference on a zero-init input; mismatch raises
|
|
:class:`ConfigError` BEFORE the strategy is bound (AC-6).
|
|
|
|
Optional keyword-only injection points (``fdr_client`` / ``clock`` /
|
|
``logger``) keep tests deterministic; production wiring fills them
|
|
from the composition root.
|
|
"""
|
|
runtime_label = inference_runtime.current_runtime_label()
|
|
if runtime_label not in _ALLOWED_RUNTIME_LABELS:
|
|
raise ConfigError(
|
|
f"MegaLoc requires BUILD_TENSORRT_RUNTIME=ON (or "
|
|
f"BUILD_ONNX_TRT_EP_RUNTIME=ON as fallback); this binary "
|
|
f"has runtime_label={runtime_label!r}."
|
|
)
|
|
|
|
block = config.components["c2_vpr"]
|
|
weights_path = block.backbone_weights_path
|
|
|
|
if fdr_client is None:
|
|
raise ValueError(
|
|
"MegaLocStrategy.create: fdr_client is required; the "
|
|
"composition root must inject the running FDR client."
|
|
)
|
|
if clock is None:
|
|
from gps_denied_onboard.clock.wall_clock import WallClock
|
|
|
|
clock = WallClock()
|
|
if logger is None:
|
|
logger = logging.getLogger("gps_denied_onboard.c2_vpr.mega_loc")
|
|
|
|
entry = inference_runtime.compile_engine(
|
|
weights_path, _build_trt_build_config()
|
|
)
|
|
handle = inference_runtime.deserialize_engine(entry)
|
|
|
|
preprocessor = MegaLocBackbonePreprocessor(logger=logger)
|
|
normaliser = DescriptorNormaliser()
|
|
faiss_bridge = FaissBridge(
|
|
descriptor_index=descriptor_index,
|
|
descriptor_dim=DESCRIPTOR_DIM,
|
|
warn_top1_threshold=block.warn_top1_threshold,
|
|
debug_log_per_frame_distances=block.debug_per_frame_distances,
|
|
fdr_client=fdr_client,
|
|
logger=logger,
|
|
clock=clock,
|
|
)
|
|
|
|
assert_engine_output_dim(
|
|
inference_runtime,
|
|
handle,
|
|
preprocessor,
|
|
DESCRIPTOR_DIM,
|
|
output_key=_OUTPUT_KEY,
|
|
input_key=_ENGINE_INPUT_KEY,
|
|
)
|
|
|
|
logger.info(
|
|
"C2 VPR strategy ready",
|
|
extra={
|
|
"component": _COMPONENT,
|
|
"kind": _LOG_KIND_READY,
|
|
"kv": {
|
|
"strategy": _BACKBONE_LABEL,
|
|
"descriptor_dim": DESCRIPTOR_DIM,
|
|
},
|
|
},
|
|
)
|
|
|
|
return MegaLocStrategy(
|
|
inference_runtime=inference_runtime,
|
|
engine_handle=handle,
|
|
descriptor_index=descriptor_index,
|
|
preprocessor=preprocessor,
|
|
normaliser=normaliser,
|
|
faiss_bridge=faiss_bridge,
|
|
fdr_client=fdr_client,
|
|
clock=clock,
|
|
logger=logger,
|
|
descriptor_dim=DESCRIPTOR_DIM,
|
|
)
|
|
|
|
|