mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 20:21:13 +00:00
[AZ-339] C2 MegaLoc + MixVPR secondary VPR backbones
Adds two research-only VprStrategy implementations for the IT-12 comparative-study matrix. MegaLocStrategy (D=2048, 322x322) and MixVprStrategy (D=4096, 320x320), both via C7 TensorRT FP16 with their own concrete BackbonePreprocessor. Single-stage global L2 normalisation; retrieval delegated to FaissBridge; FDR records + structured logs identical to UltraVPR. BUILD_VPR_MEGALOC and BUILD_VPR_MIXVPR ON for research/replay-cli only, OFF for airborne and operator-tooling (fail-fast at composition root via existing AZ-336 factory). Uses helpers.iso_ts_from_clock from day 1 — no new timestamp helper duplicates introduced. 36 parametrised AC tests + 25 protocol-conformance + 18 helper regression tests pass; 1690 / 1690 unit tests pass (excluding 1 pre-existing flaky cold-start subprocess test in c12). Verdict: PASS_WITH_WARNINGS — one Medium follow-on (AZ-527 to consolidate 4-way _assert_engine_output_dim) + one Low AC wording drift. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,199 @@
|
||||
"""MegaLoc backbone preprocessor (AZ-339).
|
||||
|
||||
MegaLoc's published preprocessing chain (per the research code drop):
|
||||
decode the nav-camera frame's image to RGB uint8, centre-crop to a
|
||||
square region respecting the camera calibration's principal point (or
|
||||
geometric centre + WARN log when calibration is absent), resize to
|
||||
``(322, 322)``, apply ImageNet mean/std normalisation, cast to FP16,
|
||||
reshape to NCHW.
|
||||
|
||||
Differences from :class:`UltraVprBackbonePreprocessor`:
|
||||
|
||||
- 322x322 input shape (vs UltraVPR's 384x384, MixVPR's 320x320).
|
||||
- Same calibration-aware centre-crop and ImageNet mean/std — these
|
||||
upstream conventions happen to align with UltraVPR but are NOT a
|
||||
shared dependency: the centre-crop logic is duplicated here per
|
||||
``components/02_c2_vpr/description.md`` § 6 so a future MegaLoc
|
||||
code drop can change its preprocessing without coupling other
|
||||
strategies' weights-versions.
|
||||
|
||||
This preprocessor is C2-internal and owned exclusively by
|
||||
:class:`MegaLocStrategy` — sharing across backbones is forbidden per
|
||||
``components/02_c2_vpr/description.md`` § 6.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Final
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from gps_denied_onboard.components.c2_vpr.errors import VprPreprocessError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard._types.calibration import CameraCalibration
|
||||
from gps_denied_onboard._types.nav import NavCameraFrame
|
||||
|
||||
__all__ = [
|
||||
"IMAGENET_MEAN",
|
||||
"IMAGENET_STD",
|
||||
"MEGA_LOC_INPUT_HW",
|
||||
"MegaLocBackbonePreprocessor",
|
||||
]
|
||||
|
||||
MEGA_LOC_INPUT_HW: Final[tuple[int, int]] = (322, 322)
|
||||
IMAGENET_MEAN: Final[tuple[float, float, float]] = (0.485, 0.456, 0.406)
|
||||
IMAGENET_STD: Final[tuple[float, float, float]] = (0.229, 0.224, 0.225)
|
||||
|
||||
_COMPONENT: Final[str] = "c2_vpr"
|
||||
_LOG_KIND_CALIBRATION_MISSING: Final[str] = "c2.vpr.calibration_missing"
|
||||
|
||||
|
||||
class MegaLocBackbonePreprocessor:
|
||||
"""Centre-crop (principal-point-aware) + resize + ImageNet-normalise + FP16 NCHW."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
input_shape: tuple[int, int] = MEGA_LOC_INPUT_HW,
|
||||
mean: tuple[float, float, float] = IMAGENET_MEAN,
|
||||
std: tuple[float, float, float] = IMAGENET_STD,
|
||||
logger: logging.Logger | None = None,
|
||||
) -> None:
|
||||
if (
|
||||
not isinstance(input_shape, tuple)
|
||||
or len(input_shape) != 2
|
||||
or any(not isinstance(v, int) or v <= 0 for v in input_shape)
|
||||
):
|
||||
raise ValueError(
|
||||
f"MegaLocBackbonePreprocessor.input_shape must be a (H, W) "
|
||||
f"tuple of positive ints; got {input_shape!r}"
|
||||
)
|
||||
if len(mean) != 3 or len(std) != 3:
|
||||
raise ValueError(
|
||||
"MegaLocBackbonePreprocessor.mean and std must each be "
|
||||
"3-tuples (one per channel)"
|
||||
)
|
||||
if any(v <= 0 for v in std):
|
||||
raise ValueError(
|
||||
"MegaLocBackbonePreprocessor.std components must be > 0"
|
||||
)
|
||||
self._input_shape: tuple[int, int] = input_shape
|
||||
self._mean: np.ndarray = np.array(mean, dtype=np.float32).reshape(1, 1, 3)
|
||||
self._std: np.ndarray = np.array(std, dtype=np.float32).reshape(1, 1, 3)
|
||||
self._logger: logging.Logger = (
|
||||
logger
|
||||
if logger is not None
|
||||
else logging.getLogger("gps_denied_onboard.c2_vpr.mega_loc")
|
||||
)
|
||||
|
||||
def preprocess(
|
||||
self,
|
||||
frame: NavCameraFrame,
|
||||
calibration: CameraCalibration,
|
||||
) -> np.ndarray:
|
||||
"""Decode -> centre-crop (principal-point-aware) -> resize -> normalise -> FP16 NCHW.
|
||||
|
||||
Calibration handling mirrors UltraVPR (description.md § 6 — same
|
||||
upstream convention, duplicated not shared): when calibration is
|
||||
absent or its principal point cannot be extracted from
|
||||
``intrinsics_3x3``, fall back to the image's geometric centre
|
||||
and emit ONE WARN log per call with
|
||||
``kind="c2.vpr.calibration_missing"``.
|
||||
"""
|
||||
image = self._coerce_to_rgb_uint8(frame.image)
|
||||
cropped = self._centre_crop_around_principal_point(
|
||||
image, calibration, frame_id=frame.frame_id
|
||||
)
|
||||
target_h, target_w = self._input_shape
|
||||
in_h, in_w = cropped.shape[:2]
|
||||
interp = (
|
||||
cv2.INTER_AREA
|
||||
if (in_h > target_h or in_w > target_w)
|
||||
else cv2.INTER_CUBIC
|
||||
)
|
||||
try:
|
||||
resized = cv2.resize(
|
||||
cropped, (target_w, target_h), interpolation=interp
|
||||
)
|
||||
except cv2.error as exc:
|
||||
raise VprPreprocessError(
|
||||
f"cv2.resize failed: {type(exc).__name__}: {exc}"
|
||||
) from exc
|
||||
as_f32 = resized.astype(np.float32) / 255.0
|
||||
normalised = (as_f32 - self._mean) / self._std
|
||||
chw = normalised.transpose(2, 0, 1)
|
||||
return np.ascontiguousarray(chw[None, :, :, :], dtype=np.float16)
|
||||
|
||||
def input_shape(self) -> tuple[int, int]:
|
||||
return self._input_shape
|
||||
|
||||
@staticmethod
|
||||
def _coerce_to_rgb_uint8(image: object) -> np.ndarray:
|
||||
if not isinstance(image, np.ndarray):
|
||||
raise VprPreprocessError(
|
||||
f"frame.image must be a numpy array; got {type(image).__name__}"
|
||||
)
|
||||
if image.dtype != np.uint8:
|
||||
raise VprPreprocessError(
|
||||
f"frame.image must be uint8 RGB; got dtype {image.dtype}"
|
||||
)
|
||||
if image.ndim == 2:
|
||||
return np.stack([image, image, image], axis=-1)
|
||||
if image.ndim == 3 and image.shape[2] == 3:
|
||||
return image
|
||||
raise VprPreprocessError(
|
||||
f"frame.image must be (H,W) or (H,W,3); got shape {image.shape}"
|
||||
)
|
||||
|
||||
def _centre_crop_around_principal_point(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
calibration: CameraCalibration | None,
|
||||
*,
|
||||
frame_id: int,
|
||||
) -> np.ndarray:
|
||||
h, w = image.shape[:2]
|
||||
side = min(h, w)
|
||||
cx_cy = self._extract_principal_point(calibration)
|
||||
if cx_cy is None:
|
||||
self._logger.warning(
|
||||
"MegaLoc calibration unusable; centre-cropping around "
|
||||
"geometric centre",
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_CALIBRATION_MISSING,
|
||||
"kv": {"frame_id": int(frame_id)},
|
||||
},
|
||||
)
|
||||
cx = w / 2.0
|
||||
cy = h / 2.0
|
||||
else:
|
||||
cx, cy = cx_cy
|
||||
half = side // 2
|
||||
left = round(max(0.0, min(float(w - side), cx - half)))
|
||||
top = round(max(0.0, min(float(h - side), cy - half)))
|
||||
return image[top : top + side, left : left + side, :]
|
||||
|
||||
@staticmethod
|
||||
def _extract_principal_point(
|
||||
calibration: CameraCalibration | None,
|
||||
) -> tuple[float, float] | None:
|
||||
if calibration is None:
|
||||
return None
|
||||
intrinsics = getattr(calibration, "intrinsics_3x3", None)
|
||||
if intrinsics is None:
|
||||
return None
|
||||
try:
|
||||
arr = np.asarray(intrinsics, dtype=np.float64)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if arr.shape != (3, 3):
|
||||
return None
|
||||
cx = float(arr[0, 2])
|
||||
cy = float(arr[1, 2])
|
||||
if cx == 0.0 and cy == 0.0:
|
||||
return None
|
||||
return cx, cy
|
||||
@@ -0,0 +1,200 @@
|
||||
"""MixVPR backbone preprocessor (AZ-339).
|
||||
|
||||
MixVPR's published preprocessing chain (per the research code drop):
|
||||
decode the nav-camera frame's image to RGB uint8, centre-crop to a
|
||||
square region respecting the camera calibration's principal point (or
|
||||
geometric centre + WARN log when calibration is absent), resize to
|
||||
``(320, 320)``, apply ImageNet mean/std normalisation, cast to FP16,
|
||||
reshape to NCHW.
|
||||
|
||||
Differences from :class:`MegaLocBackbonePreprocessor` /
|
||||
:class:`UltraVprBackbonePreprocessor`:
|
||||
|
||||
- 320x320 input shape (vs MegaLoc's 322x322, UltraVPR's 384x384).
|
||||
- Same calibration-aware centre-crop and ImageNet mean/std — these
|
||||
upstream conventions happen to align with UltraVPR / MegaLoc but
|
||||
are NOT a shared dependency: the centre-crop logic is duplicated
|
||||
here per ``components/02_c2_vpr/description.md`` § 6 so a future
|
||||
MixVPR code drop can change its preprocessing without coupling
|
||||
other strategies' weights-versions.
|
||||
|
||||
This preprocessor is C2-internal and owned exclusively by
|
||||
:class:`MixVprStrategy` — sharing across backbones is forbidden per
|
||||
``components/02_c2_vpr/description.md`` § 6.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Final
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from gps_denied_onboard.components.c2_vpr.errors import VprPreprocessError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard._types.calibration import CameraCalibration
|
||||
from gps_denied_onboard._types.nav import NavCameraFrame
|
||||
|
||||
__all__ = [
|
||||
"IMAGENET_MEAN",
|
||||
"IMAGENET_STD",
|
||||
"MIX_VPR_INPUT_HW",
|
||||
"MixVprBackbonePreprocessor",
|
||||
]
|
||||
|
||||
MIX_VPR_INPUT_HW: Final[tuple[int, int]] = (320, 320)
|
||||
IMAGENET_MEAN: Final[tuple[float, float, float]] = (0.485, 0.456, 0.406)
|
||||
IMAGENET_STD: Final[tuple[float, float, float]] = (0.229, 0.224, 0.225)
|
||||
|
||||
_COMPONENT: Final[str] = "c2_vpr"
|
||||
_LOG_KIND_CALIBRATION_MISSING: Final[str] = "c2.vpr.calibration_missing"
|
||||
|
||||
|
||||
class MixVprBackbonePreprocessor:
|
||||
"""Centre-crop (principal-point-aware) + resize + ImageNet-normalise + FP16 NCHW."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
input_shape: tuple[int, int] = MIX_VPR_INPUT_HW,
|
||||
mean: tuple[float, float, float] = IMAGENET_MEAN,
|
||||
std: tuple[float, float, float] = IMAGENET_STD,
|
||||
logger: logging.Logger | None = None,
|
||||
) -> None:
|
||||
if (
|
||||
not isinstance(input_shape, tuple)
|
||||
or len(input_shape) != 2
|
||||
or any(not isinstance(v, int) or v <= 0 for v in input_shape)
|
||||
):
|
||||
raise ValueError(
|
||||
f"MixVprBackbonePreprocessor.input_shape must be a (H, W) "
|
||||
f"tuple of positive ints; got {input_shape!r}"
|
||||
)
|
||||
if len(mean) != 3 or len(std) != 3:
|
||||
raise ValueError(
|
||||
"MixVprBackbonePreprocessor.mean and std must each be "
|
||||
"3-tuples (one per channel)"
|
||||
)
|
||||
if any(v <= 0 for v in std):
|
||||
raise ValueError(
|
||||
"MixVprBackbonePreprocessor.std components must be > 0"
|
||||
)
|
||||
self._input_shape: tuple[int, int] = input_shape
|
||||
self._mean: np.ndarray = np.array(mean, dtype=np.float32).reshape(1, 1, 3)
|
||||
self._std: np.ndarray = np.array(std, dtype=np.float32).reshape(1, 1, 3)
|
||||
self._logger: logging.Logger = (
|
||||
logger
|
||||
if logger is not None
|
||||
else logging.getLogger("gps_denied_onboard.c2_vpr.mix_vpr")
|
||||
)
|
||||
|
||||
def preprocess(
|
||||
self,
|
||||
frame: NavCameraFrame,
|
||||
calibration: CameraCalibration,
|
||||
) -> np.ndarray:
|
||||
"""Decode -> centre-crop (principal-point-aware) -> resize -> normalise -> FP16 NCHW.
|
||||
|
||||
Calibration handling mirrors UltraVPR (description.md § 6 — same
|
||||
upstream convention, duplicated not shared): when calibration is
|
||||
absent or its principal point cannot be extracted from
|
||||
``intrinsics_3x3``, fall back to the image's geometric centre
|
||||
and emit ONE WARN log per call with
|
||||
``kind="c2.vpr.calibration_missing"``.
|
||||
"""
|
||||
image = self._coerce_to_rgb_uint8(frame.image)
|
||||
cropped = self._centre_crop_around_principal_point(
|
||||
image, calibration, frame_id=frame.frame_id
|
||||
)
|
||||
target_h, target_w = self._input_shape
|
||||
in_h, in_w = cropped.shape[:2]
|
||||
interp = (
|
||||
cv2.INTER_AREA
|
||||
if (in_h > target_h or in_w > target_w)
|
||||
else cv2.INTER_CUBIC
|
||||
)
|
||||
try:
|
||||
resized = cv2.resize(
|
||||
cropped, (target_w, target_h), interpolation=interp
|
||||
)
|
||||
except cv2.error as exc:
|
||||
raise VprPreprocessError(
|
||||
f"cv2.resize failed: {type(exc).__name__}: {exc}"
|
||||
) from exc
|
||||
as_f32 = resized.astype(np.float32) / 255.0
|
||||
normalised = (as_f32 - self._mean) / self._std
|
||||
chw = normalised.transpose(2, 0, 1)
|
||||
return np.ascontiguousarray(chw[None, :, :, :], dtype=np.float16)
|
||||
|
||||
def input_shape(self) -> tuple[int, int]:
|
||||
return self._input_shape
|
||||
|
||||
@staticmethod
|
||||
def _coerce_to_rgb_uint8(image: object) -> np.ndarray:
|
||||
if not isinstance(image, np.ndarray):
|
||||
raise VprPreprocessError(
|
||||
f"frame.image must be a numpy array; got {type(image).__name__}"
|
||||
)
|
||||
if image.dtype != np.uint8:
|
||||
raise VprPreprocessError(
|
||||
f"frame.image must be uint8 RGB; got dtype {image.dtype}"
|
||||
)
|
||||
if image.ndim == 2:
|
||||
return np.stack([image, image, image], axis=-1)
|
||||
if image.ndim == 3 and image.shape[2] == 3:
|
||||
return image
|
||||
raise VprPreprocessError(
|
||||
f"frame.image must be (H,W) or (H,W,3); got shape {image.shape}"
|
||||
)
|
||||
|
||||
def _centre_crop_around_principal_point(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
calibration: CameraCalibration | None,
|
||||
*,
|
||||
frame_id: int,
|
||||
) -> np.ndarray:
|
||||
h, w = image.shape[:2]
|
||||
side = min(h, w)
|
||||
cx_cy = self._extract_principal_point(calibration)
|
||||
if cx_cy is None:
|
||||
self._logger.warning(
|
||||
"MixVPR calibration unusable; centre-cropping around "
|
||||
"geometric centre",
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_CALIBRATION_MISSING,
|
||||
"kv": {"frame_id": int(frame_id)},
|
||||
},
|
||||
)
|
||||
cx = w / 2.0
|
||||
cy = h / 2.0
|
||||
else:
|
||||
cx, cy = cx_cy
|
||||
half = side // 2
|
||||
left = round(max(0.0, min(float(w - side), cx - half)))
|
||||
top = round(max(0.0, min(float(h - side), cy - half)))
|
||||
return image[top : top + side, left : left + side, :]
|
||||
|
||||
@staticmethod
|
||||
def _extract_principal_point(
|
||||
calibration: CameraCalibration | None,
|
||||
) -> tuple[float, float] | None:
|
||||
if calibration is None:
|
||||
return None
|
||||
intrinsics = getattr(calibration, "intrinsics_3x3", None)
|
||||
if intrinsics is None:
|
||||
return None
|
||||
try:
|
||||
arr = np.asarray(intrinsics, dtype=np.float64)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if arr.shape != (3, 3):
|
||||
return None
|
||||
cx = float(arr[0, 2])
|
||||
cy = float(arr[1, 2])
|
||||
if cx == 0.0 and cy == 0.0:
|
||||
return None
|
||||
return cx, cy
|
||||
@@ -0,0 +1,451 @@
|
||||
"""``MegaLocStrategy`` — C2 secondary VprStrategy for IT-12 (AZ-339).
|
||||
|
||||
MegaLoc is one of two secondary backbones (alongside :class:`MixVprStrategy`)
|
||||
shipped exclusively in the research binary for the IT-12 comparative-study
|
||||
matrix (``components/02_c2_vpr/description.md`` § 1 + § 5). Per ADR-002,
|
||||
``BUILD_VPR_MEGALOC`` is ON for the research binary and replay-cli, OFF
|
||||
for the airborne and operator-tooling binaries — selecting ``mega_loc``
|
||||
on a binary without the flag fails fast at composition-root time via
|
||||
:class:`StrategyNotAvailableError` (not at first frame).
|
||||
|
||||
The strategy runs on the C7 TensorRT runtime (AZ-298), or the ONNX-Runtime
|
||||
fallback (AZ-299), via the local :class:`InferenceRuntimeCut` (AZ-507).
|
||||
Engine output key is ``"embedding"`` and the strategy applies single-stage
|
||||
global L2 normalisation (no NetVLAD-style intra-cluster step). Retrieval
|
||||
delegates to :class:`FaissBridge` (AZ-341).
|
||||
|
||||
Architecture-registry differences from :class:`NetVladStrategy`:
|
||||
|
||||
MegaLoc consumes a pre-compiled ``.trt`` engine produced by C10's engine
|
||||
compiler (AZ-321) — there is no PyTorch ``nn.Module`` to register, so
|
||||
the module does NOT expose ``MODEL_NAME`` / ``architecture_factory``.
|
||||
:func:`gps_denied_onboard.runtime_root.vpr_factory._register_strategy_architecture`
|
||||
no-ops for this strategy.
|
||||
|
||||
Engine load happens in :func:`create` (NOT at first frame) so the
|
||||
engine-output-shape assertion (AC-6) surfaces at startup, not after
|
||||
takeoff.
|
||||
|
||||
Per-frame :meth:`embed_query` pipeline:
|
||||
|
||||
1. ``preprocessor.preprocess(frame, calibration)`` ->
|
||||
``(1, 3, 322, 322)`` FP16 NCHW ndarray.
|
||||
2. ``inference_runtime.infer(handle, {"input": tensor})`` ->
|
||||
``{"embedding": (1, 2048) FP16 ndarray}``.
|
||||
3. ``normaliser.l2_normalise(raw[0])`` -> global L2 (single-stage).
|
||||
4. Return :class:`VprQuery` with ``frame_id``, normalised embedding,
|
||||
produced_at monotonic ns.
|
||||
|
||||
Error envelope: every method raises only members of :class:`VprError`.
|
||||
``RuntimeError`` from the backbone forward -> rewrapped to
|
||||
:class:`VprBackboneError`; :class:`VprPreprocessError` from the
|
||||
preprocessor propagates unchanged.
|
||||
|
||||
Retrieval is a single-line delegation to :class:`FaissBridge.retrieve`;
|
||||
see AZ-341 AC-10.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Final, Literal
|
||||
|
||||
import numpy as np
|
||||
|
||||
from gps_denied_onboard._types.inference import (
|
||||
BuildConfig,
|
||||
EngineHandle,
|
||||
PrecisionMode,
|
||||
)
|
||||
from gps_denied_onboard._types.vpr import VprQuery, VprResult
|
||||
from gps_denied_onboard.clock import Clock
|
||||
from gps_denied_onboard.components.c2_vpr._faiss_bridge import FaissBridge
|
||||
from gps_denied_onboard.components.c2_vpr._preprocessor_mega_loc import (
|
||||
MegaLocBackbonePreprocessor,
|
||||
)
|
||||
from gps_denied_onboard.components.c2_vpr.descriptor_index_cut import (
|
||||
DescriptorIndexCut,
|
||||
)
|
||||
from gps_denied_onboard.components.c2_vpr.errors import (
|
||||
VprBackboneError,
|
||||
VprPreprocessError,
|
||||
)
|
||||
from gps_denied_onboard.components.c2_vpr.inference_runtime_cut import (
|
||||
InferenceRuntimeCut,
|
||||
)
|
||||
from gps_denied_onboard.config.schema import ConfigError
|
||||
from gps_denied_onboard.fdr_client import EnqueueResult, FdrClient
|
||||
from gps_denied_onboard.fdr_client.records import (
|
||||
CURRENT_SCHEMA_VERSION,
|
||||
FdrRecord,
|
||||
)
|
||||
from gps_denied_onboard.helpers.descriptor_normaliser import DescriptorNormaliser
|
||||
from gps_denied_onboard.helpers.iso_timestamps import (
|
||||
iso_ts_from_clock as _iso_ts_from_clock,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard._types.calibration import CameraCalibration
|
||||
from gps_denied_onboard._types.nav import NavCameraFrame
|
||||
from gps_denied_onboard.config.schema import Config
|
||||
|
||||
__all__ = ["DESCRIPTOR_DIM", "MegaLocStrategy", "create"]
|
||||
|
||||
|
||||
# MegaLoc's published embedding dimension (D=2048) per the upstream
|
||||
# research code drop. Engine output shape is asserted at create() time
|
||||
# against this constant — changing it would silently break AC-2 /
|
||||
# AC-4 / AC-5 / AC-6.
|
||||
DESCRIPTOR_DIM: Final[int] = 2048
|
||||
|
||||
_BACKBONE_LABEL: Final[Literal["mega_loc"]] = "mega_loc"
|
||||
_COMPONENT: Final[str] = "c2_vpr"
|
||||
_OUTPUT_KEY: Final[str] = "embedding"
|
||||
_ENGINE_INPUT_KEY: Final[str] = "input"
|
||||
|
||||
_ALLOWED_RUNTIME_LABELS: Final[frozenset[str]] = frozenset(
|
||||
{"tensorrt", "onnx_trt_ep"}
|
||||
)
|
||||
|
||||
_LOG_KIND_READY: Final[str] = "c2.vpr.ready"
|
||||
_LOG_KIND_BACKBONE_ERROR: Final[str] = "c2.vpr.backbone_error"
|
||||
_LOG_KIND_PREPROCESS_ERROR: Final[str] = "c2.vpr.preprocess_error"
|
||||
_LOG_KIND_FDR_OVERRUN: Final[str] = "c2.vpr.fdr_overrun"
|
||||
|
||||
_FDR_KIND_EMBED: Final[str] = "vpr.embed_query"
|
||||
_FDR_KIND_BACKBONE_ERROR: Final[str] = "vpr.backbone_error"
|
||||
_FDR_KIND_PREPROCESS_ERROR: Final[str] = "vpr.preprocess_error"
|
||||
|
||||
|
||||
class MegaLocStrategy:
|
||||
"""C2 secondary VprStrategy backed by a TRT MegaLoc engine.
|
||||
|
||||
See module docstring for the engine-loading + per-frame pipeline.
|
||||
Stateless across frames (INV-2); single-threaded per instance
|
||||
(INV-1, per AZ-336).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
inference_runtime: InferenceRuntimeCut,
|
||||
engine_handle: EngineHandle,
|
||||
descriptor_index: DescriptorIndexCut,
|
||||
preprocessor: MegaLocBackbonePreprocessor,
|
||||
normaliser: DescriptorNormaliser,
|
||||
faiss_bridge: FaissBridge,
|
||||
fdr_client: FdrClient,
|
||||
clock: Clock,
|
||||
logger: logging.Logger,
|
||||
descriptor_dim: int = DESCRIPTOR_DIM,
|
||||
) -> None:
|
||||
if descriptor_dim < 1:
|
||||
raise ValueError(
|
||||
f"MegaLocStrategy.descriptor_dim must be >= 1; "
|
||||
f"got {descriptor_dim}"
|
||||
)
|
||||
self._inference_runtime = inference_runtime
|
||||
self._engine_handle = engine_handle
|
||||
self._descriptor_index = descriptor_index
|
||||
self._preprocessor = preprocessor
|
||||
self._normaliser = normaliser
|
||||
self._faiss_bridge = faiss_bridge
|
||||
self._fdr_client = fdr_client
|
||||
self._clock = clock
|
||||
self._logger = logger
|
||||
self._descriptor_dim = descriptor_dim
|
||||
|
||||
def embed_query(
|
||||
self,
|
||||
frame: NavCameraFrame,
|
||||
calibration: CameraCalibration,
|
||||
) -> VprQuery:
|
||||
try:
|
||||
tensor = self._preprocessor.preprocess(frame, calibration)
|
||||
except VprPreprocessError as exc:
|
||||
self._emit_preprocess_error(frame, exc)
|
||||
raise
|
||||
|
||||
ns_start = self._clock.monotonic_ns()
|
||||
try:
|
||||
outputs = self._inference_runtime.infer(
|
||||
self._engine_handle, {_ENGINE_INPUT_KEY: tensor}
|
||||
)
|
||||
except Exception as exc:
|
||||
wrapped = self._wrap_backbone_error(frame, exc)
|
||||
raise wrapped from exc
|
||||
ns_end = self._clock.monotonic_ns()
|
||||
latency_us = max(1, (ns_end - ns_start) // 1_000)
|
||||
|
||||
if _OUTPUT_KEY not in outputs:
|
||||
err = VprBackboneError(
|
||||
f"MegaLoc forward returned no {_OUTPUT_KEY!r} key; "
|
||||
f"got {sorted(outputs.keys())!r}"
|
||||
)
|
||||
self._emit_backbone_error(frame, err)
|
||||
raise err
|
||||
|
||||
raw = np.asarray(outputs[_OUTPUT_KEY])
|
||||
if (
|
||||
raw.ndim != 2
|
||||
or raw.shape[0] != 1
|
||||
or raw.shape[1] != self._descriptor_dim
|
||||
):
|
||||
err = VprBackboneError(
|
||||
f"MegaLoc forward returned shape {raw.shape}; "
|
||||
f"expected (1, {self._descriptor_dim})"
|
||||
)
|
||||
self._emit_backbone_error(frame, err)
|
||||
raise err
|
||||
|
||||
flat = np.ascontiguousarray(raw[0], dtype=np.float16)
|
||||
normalised = self._normaliser.l2_normalise(flat)
|
||||
|
||||
self._emit_embed_record(
|
||||
frame_id=int(frame.frame_id), latency_us=int(latency_us)
|
||||
)
|
||||
|
||||
return VprQuery(
|
||||
frame_id=int(frame.frame_id),
|
||||
embedding=normalised,
|
||||
produced_at=ns_end,
|
||||
)
|
||||
|
||||
def retrieve_topk(self, query: VprQuery, k: int) -> VprResult:
|
||||
return self._faiss_bridge.retrieve(
|
||||
query, k, backbone_label=_BACKBONE_LABEL
|
||||
)
|
||||
|
||||
def descriptor_dim(self) -> int:
|
||||
return self._descriptor_dim
|
||||
|
||||
def _wrap_backbone_error(
|
||||
self, frame: NavCameraFrame, exc: BaseException
|
||||
) -> VprBackboneError:
|
||||
wrapped = VprBackboneError(
|
||||
f"MegaLoc forward raised {type(exc).__name__}: {exc}"
|
||||
)
|
||||
self._emit_backbone_error(frame, wrapped)
|
||||
return wrapped
|
||||
|
||||
def _emit_embed_record(self, *, frame_id: int, latency_us: int) -> None:
|
||||
record = FdrRecord(
|
||||
schema_version=CURRENT_SCHEMA_VERSION,
|
||||
ts=_iso_ts_from_clock(self._clock),
|
||||
producer_id=self._fdr_client.producer_id,
|
||||
kind=_FDR_KIND_EMBED,
|
||||
payload={
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"descriptor_dim": self._descriptor_dim,
|
||||
"latency_us": latency_us,
|
||||
},
|
||||
)
|
||||
result = self._fdr_client.enqueue(record)
|
||||
if result == EnqueueResult.OVERRUN:
|
||||
self._logger.warning(
|
||||
"FDR enqueue dropped vpr.embed_query record (buffer overrun)",
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_FDR_OVERRUN,
|
||||
"kv": {
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
def _emit_backbone_error(
|
||||
self, frame: NavCameraFrame, error: BaseException
|
||||
) -> None:
|
||||
frame_id = int(frame.frame_id)
|
||||
msg = f"MegaLoc backbone error: {error}"
|
||||
self._logger.error(
|
||||
msg,
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_BACKBONE_ERROR,
|
||||
"kv": {
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"error_type": type(error).__name__,
|
||||
},
|
||||
},
|
||||
)
|
||||
self._fdr_client.enqueue(
|
||||
FdrRecord(
|
||||
schema_version=CURRENT_SCHEMA_VERSION,
|
||||
ts=_iso_ts_from_clock(self._clock),
|
||||
producer_id=self._fdr_client.producer_id,
|
||||
kind=_FDR_KIND_BACKBONE_ERROR,
|
||||
payload={
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"error_type": type(error).__name__,
|
||||
"error_message": str(error)[:512],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
def _emit_preprocess_error(
|
||||
self, frame: NavCameraFrame, error: BaseException
|
||||
) -> None:
|
||||
frame_id = int(frame.frame_id)
|
||||
msg = f"MegaLoc preprocess error: {error}"
|
||||
self._logger.error(
|
||||
msg,
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_PREPROCESS_ERROR,
|
||||
"kv": {
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"error_type": type(error).__name__,
|
||||
},
|
||||
},
|
||||
)
|
||||
self._fdr_client.enqueue(
|
||||
FdrRecord(
|
||||
schema_version=CURRENT_SCHEMA_VERSION,
|
||||
ts=_iso_ts_from_clock(self._clock),
|
||||
producer_id=self._fdr_client.producer_id,
|
||||
kind=_FDR_KIND_PREPROCESS_ERROR,
|
||||
payload={
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"error_type": type(error).__name__,
|
||||
"error_message": str(error)[:512],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _build_trt_build_config() -> BuildConfig:
|
||||
return BuildConfig(
|
||||
precision=PrecisionMode.FP16,
|
||||
workspace_mb=0,
|
||||
calibration_dataset=None,
|
||||
optimization_profiles=(),
|
||||
)
|
||||
|
||||
|
||||
def create(
|
||||
config: Config,
|
||||
*,
|
||||
descriptor_index: DescriptorIndexCut,
|
||||
inference_runtime: InferenceRuntimeCut,
|
||||
fdr_client: FdrClient | None = None,
|
||||
clock: Clock | None = None,
|
||||
logger: logging.Logger | None = None,
|
||||
) -> MegaLocStrategy:
|
||||
"""Module-level factory consumed by :func:`build_vpr_strategy`.
|
||||
|
||||
MegaLoc is unselectable when the C7 TRT / ONNX-RT runtimes are
|
||||
excluded — ``current_runtime_label()`` MUST be one of
|
||||
``{"tensorrt", "onnx_trt_ep"}``; ``"pytorch_fp16"`` is rejected
|
||||
with :class:`ConfigError` at composition time.
|
||||
|
||||
Engine output shape is asserted at create time via a single
|
||||
dry-run inference on a zero-init input; mismatch raises
|
||||
:class:`ConfigError` BEFORE the strategy is bound (AC-6).
|
||||
|
||||
Optional keyword-only injection points (``fdr_client`` / ``clock`` /
|
||||
``logger``) keep tests deterministic; production wiring fills them
|
||||
from the composition root.
|
||||
"""
|
||||
runtime_label = inference_runtime.current_runtime_label()
|
||||
if runtime_label not in _ALLOWED_RUNTIME_LABELS:
|
||||
raise ConfigError(
|
||||
f"MegaLoc requires BUILD_TENSORRT_RUNTIME=ON (or "
|
||||
f"BUILD_ONNX_TRT_EP_RUNTIME=ON as fallback); this binary "
|
||||
f"has runtime_label={runtime_label!r}."
|
||||
)
|
||||
|
||||
block = config.components["c2_vpr"]
|
||||
weights_path = block.backbone_weights_path
|
||||
|
||||
if fdr_client is None:
|
||||
raise ValueError(
|
||||
"MegaLocStrategy.create: fdr_client is required; the "
|
||||
"composition root must inject the running FDR client."
|
||||
)
|
||||
if clock is None:
|
||||
from gps_denied_onboard.clock.wall_clock import WallClock
|
||||
|
||||
clock = WallClock()
|
||||
if logger is None:
|
||||
logger = logging.getLogger("gps_denied_onboard.c2_vpr.mega_loc")
|
||||
|
||||
entry = inference_runtime.compile_engine(
|
||||
weights_path, _build_trt_build_config()
|
||||
)
|
||||
handle = inference_runtime.deserialize_engine(entry)
|
||||
|
||||
preprocessor = MegaLocBackbonePreprocessor(logger=logger)
|
||||
normaliser = DescriptorNormaliser()
|
||||
faiss_bridge = FaissBridge(
|
||||
descriptor_index=descriptor_index,
|
||||
descriptor_dim=DESCRIPTOR_DIM,
|
||||
warn_top1_threshold=block.warn_top1_threshold,
|
||||
debug_log_per_frame_distances=block.debug_per_frame_distances,
|
||||
fdr_client=fdr_client,
|
||||
logger=logger,
|
||||
clock=clock,
|
||||
)
|
||||
|
||||
_assert_engine_output_dim(inference_runtime, handle, preprocessor)
|
||||
|
||||
logger.info(
|
||||
"C2 VPR strategy ready",
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_READY,
|
||||
"kv": {
|
||||
"strategy": _BACKBONE_LABEL,
|
||||
"descriptor_dim": DESCRIPTOR_DIM,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
return MegaLocStrategy(
|
||||
inference_runtime=inference_runtime,
|
||||
engine_handle=handle,
|
||||
descriptor_index=descriptor_index,
|
||||
preprocessor=preprocessor,
|
||||
normaliser=normaliser,
|
||||
faiss_bridge=faiss_bridge,
|
||||
fdr_client=fdr_client,
|
||||
clock=clock,
|
||||
logger=logger,
|
||||
descriptor_dim=DESCRIPTOR_DIM,
|
||||
)
|
||||
|
||||
|
||||
def _assert_engine_output_dim(
|
||||
inference_runtime: InferenceRuntimeCut,
|
||||
handle: EngineHandle,
|
||||
preprocessor: MegaLocBackbonePreprocessor,
|
||||
) -> None:
|
||||
# The 4-way duplication of this helper (ultra_vpr / net_vlad /
|
||||
# mega_loc / mix_vpr) will be consolidated by AZ-527 (hygiene
|
||||
# PBI sized in parallel with AZ-339 land). The duplication is
|
||||
# intentional for now: extracting earlier would expand AZ-339's
|
||||
# scope past the two new strategies.
|
||||
h, w = preprocessor.input_shape()
|
||||
probe = np.zeros((1, 3, h, w), dtype=np.float16)
|
||||
outputs = inference_runtime.infer(handle, {_ENGINE_INPUT_KEY: probe})
|
||||
if _OUTPUT_KEY not in outputs:
|
||||
raise ConfigError(
|
||||
f"engine output shape mismatch: {_OUTPUT_KEY!r} key absent; "
|
||||
f"got keys {sorted(outputs.keys())!r}"
|
||||
)
|
||||
actual = np.asarray(outputs[_OUTPUT_KEY])
|
||||
if (
|
||||
actual.ndim != 2
|
||||
or actual.shape[0] != 1
|
||||
or actual.shape[1] != DESCRIPTOR_DIM
|
||||
):
|
||||
raise ConfigError(
|
||||
f"engine output shape mismatch: expected (1, {DESCRIPTOR_DIM}), "
|
||||
f"got {tuple(actual.shape)}"
|
||||
)
|
||||
@@ -0,0 +1,454 @@
|
||||
"""``MixVprStrategy`` — C2 secondary VprStrategy for IT-12 (AZ-339).
|
||||
|
||||
MixVPR is the second of two secondary backbones (alongside
|
||||
:class:`MegaLocStrategy`) shipped exclusively in the research binary
|
||||
for the IT-12 comparative-study matrix (``components/02_c2_vpr/
|
||||
description.md`` § 1 + § 5). Per ADR-002, ``BUILD_VPR_MIXVPR`` is ON
|
||||
for the research binary and replay-cli, OFF for the airborne and
|
||||
operator-tooling binaries — selecting ``mix_vpr`` on a binary without
|
||||
the flag fails fast at composition-root time via
|
||||
:class:`StrategyNotAvailableError` (not at first frame).
|
||||
|
||||
The strategy runs on the C7 TensorRT runtime (AZ-298), or the ONNX-Runtime
|
||||
fallback (AZ-299), via the local :class:`InferenceRuntimeCut` (AZ-507).
|
||||
Engine output key is ``"embedding"`` and the strategy applies single-stage
|
||||
global L2 normalisation (no NetVLAD-style intra-cluster step). Retrieval
|
||||
delegates to :class:`FaissBridge` (AZ-341).
|
||||
|
||||
Architecture-registry differences from :class:`NetVladStrategy`:
|
||||
|
||||
MixVPR consumes a pre-compiled ``.trt`` engine produced by C10's engine
|
||||
compiler (AZ-321) — there is no PyTorch ``nn.Module`` to register, so
|
||||
the module does NOT expose ``MODEL_NAME`` / ``architecture_factory``.
|
||||
:func:`gps_denied_onboard.runtime_root.vpr_factory._register_strategy_architecture`
|
||||
no-ops for this strategy.
|
||||
|
||||
Engine load happens in :func:`create` (NOT at first frame) so the
|
||||
engine-output-shape assertion (AC-6) surfaces at startup, not after
|
||||
takeoff.
|
||||
|
||||
Per-frame :meth:`embed_query` pipeline:
|
||||
|
||||
1. ``preprocessor.preprocess(frame, calibration)`` ->
|
||||
``(1, 3, 320, 320)`` FP16 NCHW ndarray.
|
||||
2. ``inference_runtime.infer(handle, {"input": tensor})`` ->
|
||||
``{"embedding": (1, 4096) FP16 ndarray}``.
|
||||
3. ``normaliser.l2_normalise(raw[0])`` -> global L2 (single-stage).
|
||||
4. Return :class:`VprQuery` with ``frame_id``, normalised embedding,
|
||||
produced_at monotonic ns.
|
||||
|
||||
Error envelope: every method raises only members of :class:`VprError`.
|
||||
``RuntimeError`` from the backbone forward -> rewrapped to
|
||||
:class:`VprBackboneError`; :class:`VprPreprocessError` from the
|
||||
preprocessor propagates unchanged.
|
||||
|
||||
Retrieval is a single-line delegation to :class:`FaissBridge.retrieve`;
|
||||
see AZ-341 AC-10.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Final, Literal
|
||||
|
||||
import numpy as np
|
||||
|
||||
from gps_denied_onboard._types.inference import (
|
||||
BuildConfig,
|
||||
EngineHandle,
|
||||
PrecisionMode,
|
||||
)
|
||||
from gps_denied_onboard._types.vpr import VprQuery, VprResult
|
||||
from gps_denied_onboard.clock import Clock
|
||||
from gps_denied_onboard.components.c2_vpr._faiss_bridge import FaissBridge
|
||||
from gps_denied_onboard.components.c2_vpr._preprocessor_mix_vpr import (
|
||||
MixVprBackbonePreprocessor,
|
||||
)
|
||||
from gps_denied_onboard.components.c2_vpr.descriptor_index_cut import (
|
||||
DescriptorIndexCut,
|
||||
)
|
||||
from gps_denied_onboard.components.c2_vpr.errors import (
|
||||
VprBackboneError,
|
||||
VprPreprocessError,
|
||||
)
|
||||
from gps_denied_onboard.components.c2_vpr.inference_runtime_cut import (
|
||||
InferenceRuntimeCut,
|
||||
)
|
||||
from gps_denied_onboard.config.schema import ConfigError
|
||||
from gps_denied_onboard.fdr_client import EnqueueResult, FdrClient
|
||||
from gps_denied_onboard.fdr_client.records import (
|
||||
CURRENT_SCHEMA_VERSION,
|
||||
FdrRecord,
|
||||
)
|
||||
from gps_denied_onboard.helpers.descriptor_normaliser import DescriptorNormaliser
|
||||
from gps_denied_onboard.helpers.iso_timestamps import (
|
||||
iso_ts_from_clock as _iso_ts_from_clock,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard._types.calibration import CameraCalibration
|
||||
from gps_denied_onboard._types.nav import NavCameraFrame
|
||||
from gps_denied_onboard.config.schema import Config
|
||||
|
||||
__all__ = ["DESCRIPTOR_DIM", "MixVprStrategy", "create"]
|
||||
|
||||
|
||||
# MixVPR's published embedding dimension (D=4096) per the upstream
|
||||
# research code drop. The 4096-d output is the largest VPR descriptor
|
||||
# the project carries; the matching FAISS HNSW corpus has correspondingly
|
||||
# higher RAM cost (researchers must rebuild the corpus when swapping
|
||||
# between MixVPR and any non-4096 backbone — see AZ-336 pre-flight
|
||||
# dim-mismatch check). Engine output shape is asserted at create() time.
|
||||
DESCRIPTOR_DIM: Final[int] = 4096
|
||||
|
||||
_BACKBONE_LABEL: Final[Literal["mix_vpr"]] = "mix_vpr"
|
||||
_COMPONENT: Final[str] = "c2_vpr"
|
||||
_OUTPUT_KEY: Final[str] = "embedding"
|
||||
_ENGINE_INPUT_KEY: Final[str] = "input"
|
||||
|
||||
_ALLOWED_RUNTIME_LABELS: Final[frozenset[str]] = frozenset(
|
||||
{"tensorrt", "onnx_trt_ep"}
|
||||
)
|
||||
|
||||
_LOG_KIND_READY: Final[str] = "c2.vpr.ready"
|
||||
_LOG_KIND_BACKBONE_ERROR: Final[str] = "c2.vpr.backbone_error"
|
||||
_LOG_KIND_PREPROCESS_ERROR: Final[str] = "c2.vpr.preprocess_error"
|
||||
_LOG_KIND_FDR_OVERRUN: Final[str] = "c2.vpr.fdr_overrun"
|
||||
|
||||
_FDR_KIND_EMBED: Final[str] = "vpr.embed_query"
|
||||
_FDR_KIND_BACKBONE_ERROR: Final[str] = "vpr.backbone_error"
|
||||
_FDR_KIND_PREPROCESS_ERROR: Final[str] = "vpr.preprocess_error"
|
||||
|
||||
|
||||
class MixVprStrategy:
|
||||
"""C2 secondary VprStrategy backed by a TRT MixVPR engine.
|
||||
|
||||
See module docstring for the engine-loading + per-frame pipeline.
|
||||
Stateless across frames (INV-2); single-threaded per instance
|
||||
(INV-1, per AZ-336).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
inference_runtime: InferenceRuntimeCut,
|
||||
engine_handle: EngineHandle,
|
||||
descriptor_index: DescriptorIndexCut,
|
||||
preprocessor: MixVprBackbonePreprocessor,
|
||||
normaliser: DescriptorNormaliser,
|
||||
faiss_bridge: FaissBridge,
|
||||
fdr_client: FdrClient,
|
||||
clock: Clock,
|
||||
logger: logging.Logger,
|
||||
descriptor_dim: int = DESCRIPTOR_DIM,
|
||||
) -> None:
|
||||
if descriptor_dim < 1:
|
||||
raise ValueError(
|
||||
f"MixVprStrategy.descriptor_dim must be >= 1; "
|
||||
f"got {descriptor_dim}"
|
||||
)
|
||||
self._inference_runtime = inference_runtime
|
||||
self._engine_handle = engine_handle
|
||||
self._descriptor_index = descriptor_index
|
||||
self._preprocessor = preprocessor
|
||||
self._normaliser = normaliser
|
||||
self._faiss_bridge = faiss_bridge
|
||||
self._fdr_client = fdr_client
|
||||
self._clock = clock
|
||||
self._logger = logger
|
||||
self._descriptor_dim = descriptor_dim
|
||||
|
||||
def embed_query(
|
||||
self,
|
||||
frame: NavCameraFrame,
|
||||
calibration: CameraCalibration,
|
||||
) -> VprQuery:
|
||||
try:
|
||||
tensor = self._preprocessor.preprocess(frame, calibration)
|
||||
except VprPreprocessError as exc:
|
||||
self._emit_preprocess_error(frame, exc)
|
||||
raise
|
||||
|
||||
ns_start = self._clock.monotonic_ns()
|
||||
try:
|
||||
outputs = self._inference_runtime.infer(
|
||||
self._engine_handle, {_ENGINE_INPUT_KEY: tensor}
|
||||
)
|
||||
except Exception as exc:
|
||||
wrapped = self._wrap_backbone_error(frame, exc)
|
||||
raise wrapped from exc
|
||||
ns_end = self._clock.monotonic_ns()
|
||||
latency_us = max(1, (ns_end - ns_start) // 1_000)
|
||||
|
||||
if _OUTPUT_KEY not in outputs:
|
||||
err = VprBackboneError(
|
||||
f"MixVPR forward returned no {_OUTPUT_KEY!r} key; "
|
||||
f"got {sorted(outputs.keys())!r}"
|
||||
)
|
||||
self._emit_backbone_error(frame, err)
|
||||
raise err
|
||||
|
||||
raw = np.asarray(outputs[_OUTPUT_KEY])
|
||||
if (
|
||||
raw.ndim != 2
|
||||
or raw.shape[0] != 1
|
||||
or raw.shape[1] != self._descriptor_dim
|
||||
):
|
||||
err = VprBackboneError(
|
||||
f"MixVPR forward returned shape {raw.shape}; "
|
||||
f"expected (1, {self._descriptor_dim})"
|
||||
)
|
||||
self._emit_backbone_error(frame, err)
|
||||
raise err
|
||||
|
||||
flat = np.ascontiguousarray(raw[0], dtype=np.float16)
|
||||
normalised = self._normaliser.l2_normalise(flat)
|
||||
|
||||
self._emit_embed_record(
|
||||
frame_id=int(frame.frame_id), latency_us=int(latency_us)
|
||||
)
|
||||
|
||||
return VprQuery(
|
||||
frame_id=int(frame.frame_id),
|
||||
embedding=normalised,
|
||||
produced_at=ns_end,
|
||||
)
|
||||
|
||||
def retrieve_topk(self, query: VprQuery, k: int) -> VprResult:
|
||||
return self._faiss_bridge.retrieve(
|
||||
query, k, backbone_label=_BACKBONE_LABEL
|
||||
)
|
||||
|
||||
def descriptor_dim(self) -> int:
|
||||
return self._descriptor_dim
|
||||
|
||||
def _wrap_backbone_error(
|
||||
self, frame: NavCameraFrame, exc: BaseException
|
||||
) -> VprBackboneError:
|
||||
wrapped = VprBackboneError(
|
||||
f"MixVPR forward raised {type(exc).__name__}: {exc}"
|
||||
)
|
||||
self._emit_backbone_error(frame, wrapped)
|
||||
return wrapped
|
||||
|
||||
def _emit_embed_record(self, *, frame_id: int, latency_us: int) -> None:
|
||||
record = FdrRecord(
|
||||
schema_version=CURRENT_SCHEMA_VERSION,
|
||||
ts=_iso_ts_from_clock(self._clock),
|
||||
producer_id=self._fdr_client.producer_id,
|
||||
kind=_FDR_KIND_EMBED,
|
||||
payload={
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"descriptor_dim": self._descriptor_dim,
|
||||
"latency_us": latency_us,
|
||||
},
|
||||
)
|
||||
result = self._fdr_client.enqueue(record)
|
||||
if result == EnqueueResult.OVERRUN:
|
||||
self._logger.warning(
|
||||
"FDR enqueue dropped vpr.embed_query record (buffer overrun)",
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_FDR_OVERRUN,
|
||||
"kv": {
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
def _emit_backbone_error(
|
||||
self, frame: NavCameraFrame, error: BaseException
|
||||
) -> None:
|
||||
frame_id = int(frame.frame_id)
|
||||
msg = f"MixVPR backbone error: {error}"
|
||||
self._logger.error(
|
||||
msg,
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_BACKBONE_ERROR,
|
||||
"kv": {
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"error_type": type(error).__name__,
|
||||
},
|
||||
},
|
||||
)
|
||||
self._fdr_client.enqueue(
|
||||
FdrRecord(
|
||||
schema_version=CURRENT_SCHEMA_VERSION,
|
||||
ts=_iso_ts_from_clock(self._clock),
|
||||
producer_id=self._fdr_client.producer_id,
|
||||
kind=_FDR_KIND_BACKBONE_ERROR,
|
||||
payload={
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"error_type": type(error).__name__,
|
||||
"error_message": str(error)[:512],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
def _emit_preprocess_error(
|
||||
self, frame: NavCameraFrame, error: BaseException
|
||||
) -> None:
|
||||
frame_id = int(frame.frame_id)
|
||||
msg = f"MixVPR preprocess error: {error}"
|
||||
self._logger.error(
|
||||
msg,
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_PREPROCESS_ERROR,
|
||||
"kv": {
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"error_type": type(error).__name__,
|
||||
},
|
||||
},
|
||||
)
|
||||
self._fdr_client.enqueue(
|
||||
FdrRecord(
|
||||
schema_version=CURRENT_SCHEMA_VERSION,
|
||||
ts=_iso_ts_from_clock(self._clock),
|
||||
producer_id=self._fdr_client.producer_id,
|
||||
kind=_FDR_KIND_PREPROCESS_ERROR,
|
||||
payload={
|
||||
"frame_id": frame_id,
|
||||
"backbone_label": _BACKBONE_LABEL,
|
||||
"error_type": type(error).__name__,
|
||||
"error_message": str(error)[:512],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _build_trt_build_config() -> BuildConfig:
|
||||
return BuildConfig(
|
||||
precision=PrecisionMode.FP16,
|
||||
workspace_mb=0,
|
||||
calibration_dataset=None,
|
||||
optimization_profiles=(),
|
||||
)
|
||||
|
||||
|
||||
def create(
|
||||
config: Config,
|
||||
*,
|
||||
descriptor_index: DescriptorIndexCut,
|
||||
inference_runtime: InferenceRuntimeCut,
|
||||
fdr_client: FdrClient | None = None,
|
||||
clock: Clock | None = None,
|
||||
logger: logging.Logger | None = None,
|
||||
) -> MixVprStrategy:
|
||||
"""Module-level factory consumed by :func:`build_vpr_strategy`.
|
||||
|
||||
MixVPR is unselectable when the C7 TRT / ONNX-RT runtimes are
|
||||
excluded — ``current_runtime_label()`` MUST be one of
|
||||
``{"tensorrt", "onnx_trt_ep"}``; ``"pytorch_fp16"`` is rejected
|
||||
with :class:`ConfigError` at composition time.
|
||||
|
||||
Engine output shape is asserted at create time via a single
|
||||
dry-run inference on a zero-init input; mismatch raises
|
||||
:class:`ConfigError` BEFORE the strategy is bound (AC-6).
|
||||
|
||||
Optional keyword-only injection points (``fdr_client`` / ``clock`` /
|
||||
``logger``) keep tests deterministic; production wiring fills them
|
||||
from the composition root.
|
||||
"""
|
||||
runtime_label = inference_runtime.current_runtime_label()
|
||||
if runtime_label not in _ALLOWED_RUNTIME_LABELS:
|
||||
raise ConfigError(
|
||||
f"MixVPR requires BUILD_TENSORRT_RUNTIME=ON (or "
|
||||
f"BUILD_ONNX_TRT_EP_RUNTIME=ON as fallback); this binary "
|
||||
f"has runtime_label={runtime_label!r}."
|
||||
)
|
||||
|
||||
block = config.components["c2_vpr"]
|
||||
weights_path = block.backbone_weights_path
|
||||
|
||||
if fdr_client is None:
|
||||
raise ValueError(
|
||||
"MixVprStrategy.create: fdr_client is required; the "
|
||||
"composition root must inject the running FDR client."
|
||||
)
|
||||
if clock is None:
|
||||
from gps_denied_onboard.clock.wall_clock import WallClock
|
||||
|
||||
clock = WallClock()
|
||||
if logger is None:
|
||||
logger = logging.getLogger("gps_denied_onboard.c2_vpr.mix_vpr")
|
||||
|
||||
entry = inference_runtime.compile_engine(
|
||||
weights_path, _build_trt_build_config()
|
||||
)
|
||||
handle = inference_runtime.deserialize_engine(entry)
|
||||
|
||||
preprocessor = MixVprBackbonePreprocessor(logger=logger)
|
||||
normaliser = DescriptorNormaliser()
|
||||
faiss_bridge = FaissBridge(
|
||||
descriptor_index=descriptor_index,
|
||||
descriptor_dim=DESCRIPTOR_DIM,
|
||||
warn_top1_threshold=block.warn_top1_threshold,
|
||||
debug_log_per_frame_distances=block.debug_per_frame_distances,
|
||||
fdr_client=fdr_client,
|
||||
logger=logger,
|
||||
clock=clock,
|
||||
)
|
||||
|
||||
_assert_engine_output_dim(inference_runtime, handle, preprocessor)
|
||||
|
||||
logger.info(
|
||||
"C2 VPR strategy ready",
|
||||
extra={
|
||||
"component": _COMPONENT,
|
||||
"kind": _LOG_KIND_READY,
|
||||
"kv": {
|
||||
"strategy": _BACKBONE_LABEL,
|
||||
"descriptor_dim": DESCRIPTOR_DIM,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
return MixVprStrategy(
|
||||
inference_runtime=inference_runtime,
|
||||
engine_handle=handle,
|
||||
descriptor_index=descriptor_index,
|
||||
preprocessor=preprocessor,
|
||||
normaliser=normaliser,
|
||||
faiss_bridge=faiss_bridge,
|
||||
fdr_client=fdr_client,
|
||||
clock=clock,
|
||||
logger=logger,
|
||||
descriptor_dim=DESCRIPTOR_DIM,
|
||||
)
|
||||
|
||||
|
||||
def _assert_engine_output_dim(
|
||||
inference_runtime: InferenceRuntimeCut,
|
||||
handle: EngineHandle,
|
||||
preprocessor: MixVprBackbonePreprocessor,
|
||||
) -> None:
|
||||
# The 4-way duplication of this helper (ultra_vpr / net_vlad /
|
||||
# mega_loc / mix_vpr) will be consolidated by AZ-527 (hygiene
|
||||
# PBI sized in parallel with AZ-339 land). The duplication is
|
||||
# intentional for now: extracting earlier would expand AZ-339's
|
||||
# scope past the two new strategies.
|
||||
h, w = preprocessor.input_shape()
|
||||
probe = np.zeros((1, 3, h, w), dtype=np.float16)
|
||||
outputs = inference_runtime.infer(handle, {_ENGINE_INPUT_KEY: probe})
|
||||
if _OUTPUT_KEY not in outputs:
|
||||
raise ConfigError(
|
||||
f"engine output shape mismatch: {_OUTPUT_KEY!r} key absent; "
|
||||
f"got keys {sorted(outputs.keys())!r}"
|
||||
)
|
||||
actual = np.asarray(outputs[_OUTPUT_KEY])
|
||||
if (
|
||||
actual.ndim != 2
|
||||
or actual.shape[0] != 1
|
||||
or actual.shape[1] != DESCRIPTOR_DIM
|
||||
):
|
||||
raise ConfigError(
|
||||
f"engine output shape mismatch: expected (1, {DESCRIPTOR_DIM}), "
|
||||
f"got {tuple(actual.shape)}"
|
||||
)
|
||||
Reference in New Issue
Block a user