mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-23 07:51:13 +00:00
[AZ-270] [AZ-272] [AZ-279] [AZ-281] [AZ-283] Compose root + FDR schema + 3 Layer-1 helpers
AZ-270: composition root with strategy registry, tier-gated lookup, topo-order construction, all-or-nothing teardown, StrategyNotLinkedError payload. AZ-272: orjson-backed FdrRecord serialise/parse with forward-compat for unknown payload + top-level fields and canonical overrun-record shape. AZ-279: pyproj-backed WGS84/ECEF/ENU + OSM slippy-map tile math with WgsConversionError for shape/range/zoom guards. AZ-281: strict EngineFilenameSchema build/parse/matches_host with anchored regex + enum validation; round-trip identity by construction. AZ-283: dtype-preserving (fp16/fp32) single + batch L2 normaliser with zero-norm safety and descriptor_metric() source-of-truth. pyproject.toml pins pyproj>=3.6 and orjson>=3.9 (named-backend deps per the AZ-272 / AZ-279 contracts). New DTOs LatLonAlt + BoundingBox and EngineCacheKey + HostCapabilities land in _types/ to back the helper contracts. 203 unit tests pass (64 new). Review verdict: PASS_WITH_WARNINGS; findings are perf-NFR deferrals + dep amendment + minor docstring polish. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,14 +1,97 @@
|
||||
"""Descriptor-normalisation utility — STUB.
|
||||
"""L2 descriptor normaliser aligning cosine similarity to FAISS inner-product (AZ-283).
|
||||
|
||||
Concrete impl owned by AZ-283. Contract:
|
||||
`_docs/02_document/common-helpers/08_helper_descriptor_normaliser.md`.
|
||||
Public surface frozen by
|
||||
``_docs/02_document/contracts/shared_helpers/descriptor_normaliser.md`` v1.0.0.
|
||||
|
||||
Used on both the corpus side (C10 index build) and the query side (C2 runtime
|
||||
lookup). The two sides MUST go through the same helper so the FAISS HNSW
|
||||
search returns useful neighbours.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from typing import Final
|
||||
|
||||
import numpy as np
|
||||
|
||||
__all__ = [
|
||||
"ALLOWED_DTYPES",
|
||||
"DescriptorNormaliser",
|
||||
"DescriptorNormaliserError",
|
||||
]
|
||||
|
||||
# Allowed input dtypes; anything else is rejected to keep the FAISS index and
|
||||
# query path on the same precision.
|
||||
ALLOWED_DTYPES: Final[tuple[np.dtype, ...]] = (
|
||||
np.dtype(np.float16),
|
||||
np.dtype(np.float32),
|
||||
)
|
||||
|
||||
_METRIC_VALUE: Final[str] = "inner_product"
|
||||
|
||||
|
||||
def l2_normalise(descriptors: Any) -> Any:
|
||||
"""L2-normalise a (N, D) descriptor matrix in-place semantics."""
|
||||
raise NotImplementedError("descriptor_normaliser concrete impl is AZ-283")
|
||||
class DescriptorNormaliserError(ValueError):
|
||||
"""Raised on shape / dtype violations (AZ-283)."""
|
||||
|
||||
|
||||
def _validate_dtype(arr: np.ndarray, label: str) -> None:
|
||||
if arr.dtype not in ALLOWED_DTYPES:
|
||||
raise DescriptorNormaliserError(
|
||||
f"{label}: dtype {arr.dtype} not in allowed set (float16, float32)"
|
||||
)
|
||||
|
||||
|
||||
class DescriptorNormaliser:
|
||||
"""Stateless L2-normalisation helper; dtype-preserving; zero-norm safe."""
|
||||
|
||||
@staticmethod
|
||||
def l2_normalise(descriptor: np.ndarray) -> np.ndarray:
|
||||
if not isinstance(descriptor, np.ndarray):
|
||||
raise DescriptorNormaliserError(
|
||||
f"l2_normalise: expected np.ndarray; got {type(descriptor).__name__}"
|
||||
)
|
||||
if descriptor.ndim != 1:
|
||||
raise DescriptorNormaliserError(
|
||||
f"l2_normalise: expected 1-D shape (D,); got shape {descriptor.shape}"
|
||||
)
|
||||
if descriptor.shape[0] < 1:
|
||||
raise DescriptorNormaliserError(
|
||||
f"l2_normalise: dimension must be >= 1; got shape {descriptor.shape}"
|
||||
)
|
||||
_validate_dtype(descriptor, "l2_normalise")
|
||||
in_dtype = descriptor.dtype
|
||||
# Compute norm in float32 to stabilise float16 inputs against overflow /
|
||||
# underflow; cast back to the caller dtype so we never silently up-cast.
|
||||
as_f32 = descriptor.astype(np.float32, copy=False)
|
||||
norm = float(np.linalg.norm(as_f32))
|
||||
if norm == 0.0:
|
||||
return np.zeros_like(descriptor)
|
||||
normalised_f32 = as_f32 / norm
|
||||
return normalised_f32.astype(in_dtype, copy=False)
|
||||
|
||||
@staticmethod
|
||||
def l2_normalise_batch(descriptors: np.ndarray) -> np.ndarray:
|
||||
if not isinstance(descriptors, np.ndarray):
|
||||
raise DescriptorNormaliserError(
|
||||
f"l2_normalise_batch: expected np.ndarray; got {type(descriptors).__name__}"
|
||||
)
|
||||
if descriptors.ndim != 2:
|
||||
raise DescriptorNormaliserError(
|
||||
f"l2_normalise_batch: expected 2-D shape (N, D); got shape {descriptors.shape}"
|
||||
)
|
||||
if descriptors.shape[0] < 1 or descriptors.shape[1] < 1:
|
||||
raise DescriptorNormaliserError(
|
||||
f"l2_normalise_batch: N and D must be >= 1; got shape {descriptors.shape}"
|
||||
)
|
||||
_validate_dtype(descriptors, "l2_normalise_batch")
|
||||
in_dtype = descriptors.dtype
|
||||
as_f32 = descriptors.astype(np.float32, copy=False)
|
||||
norms = np.linalg.norm(as_f32, axis=1, keepdims=True)
|
||||
# Avoid division-by-zero: leave zero rows as zero.
|
||||
safe = np.where(norms == 0.0, 1.0, norms)
|
||||
normalised_f32 = np.where(norms == 0.0, 0.0, as_f32 / safe)
|
||||
return normalised_f32.astype(in_dtype, copy=False)
|
||||
|
||||
@staticmethod
|
||||
def descriptor_metric() -> str:
|
||||
return _METRIC_VALUE
|
||||
|
||||
Reference in New Issue
Block a user