[AZ-270] [AZ-272] [AZ-279] [AZ-281] [AZ-283] Compose root + FDR schema + 3 Layer-1 helpers

AZ-270: composition root with strategy registry, tier-gated lookup,
topo-order construction, all-or-nothing teardown, StrategyNotLinkedError
payload.
AZ-272: orjson-backed FdrRecord serialise/parse with forward-compat for
unknown payload + top-level fields and canonical overrun-record shape.
AZ-279: pyproj-backed WGS84/ECEF/ENU + OSM slippy-map tile math with
WgsConversionError for shape/range/zoom guards.
AZ-281: strict EngineFilenameSchema build/parse/matches_host with
anchored regex + enum validation; round-trip identity by construction.
AZ-283: dtype-preserving (fp16/fp32) single + batch L2 normaliser with
zero-norm safety and descriptor_metric() source-of-truth.
pyproject.toml pins pyproj>=3.6 and orjson>=3.9 (named-backend deps per
the AZ-272 / AZ-279 contracts). New DTOs LatLonAlt + BoundingBox and
EngineCacheKey + HostCapabilities land in _types/ to back the helper
contracts.
203 unit tests pass (64 new). Review verdict: PASS_WITH_WARNINGS;
findings are perf-NFR deferrals + dep amendment + minor docstring polish.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-11 02:03:36 +03:00
parent 8e71f6c002
commit 3acc7f33dd
24 changed files with 2381 additions and 97 deletions
@@ -1,14 +1,97 @@
"""Descriptor-normalisation utility — STUB.
"""L2 descriptor normaliser aligning cosine similarity to FAISS inner-product (AZ-283).
Concrete impl owned by AZ-283. Contract:
`_docs/02_document/common-helpers/08_helper_descriptor_normaliser.md`.
Public surface frozen by
``_docs/02_document/contracts/shared_helpers/descriptor_normaliser.md`` v1.0.0.
Used on both the corpus side (C10 index build) and the query side (C2 runtime
lookup). The two sides MUST go through the same helper so the FAISS HNSW
search returns useful neighbours.
"""
from __future__ import annotations
from typing import Any
from typing import Final
import numpy as np
__all__ = [
"ALLOWED_DTYPES",
"DescriptorNormaliser",
"DescriptorNormaliserError",
]
# Allowed input dtypes; anything else is rejected to keep the FAISS index and
# query path on the same precision.
ALLOWED_DTYPES: Final[tuple[np.dtype, ...]] = (
np.dtype(np.float16),
np.dtype(np.float32),
)
_METRIC_VALUE: Final[str] = "inner_product"
def l2_normalise(descriptors: Any) -> Any:
"""L2-normalise a (N, D) descriptor matrix in-place semantics."""
raise NotImplementedError("descriptor_normaliser concrete impl is AZ-283")
class DescriptorNormaliserError(ValueError):
"""Raised on shape / dtype violations (AZ-283)."""
def _validate_dtype(arr: np.ndarray, label: str) -> None:
if arr.dtype not in ALLOWED_DTYPES:
raise DescriptorNormaliserError(
f"{label}: dtype {arr.dtype} not in allowed set (float16, float32)"
)
class DescriptorNormaliser:
"""Stateless L2-normalisation helper; dtype-preserving; zero-norm safe."""
@staticmethod
def l2_normalise(descriptor: np.ndarray) -> np.ndarray:
if not isinstance(descriptor, np.ndarray):
raise DescriptorNormaliserError(
f"l2_normalise: expected np.ndarray; got {type(descriptor).__name__}"
)
if descriptor.ndim != 1:
raise DescriptorNormaliserError(
f"l2_normalise: expected 1-D shape (D,); got shape {descriptor.shape}"
)
if descriptor.shape[0] < 1:
raise DescriptorNormaliserError(
f"l2_normalise: dimension must be >= 1; got shape {descriptor.shape}"
)
_validate_dtype(descriptor, "l2_normalise")
in_dtype = descriptor.dtype
# Compute norm in float32 to stabilise float16 inputs against overflow /
# underflow; cast back to the caller dtype so we never silently up-cast.
as_f32 = descriptor.astype(np.float32, copy=False)
norm = float(np.linalg.norm(as_f32))
if norm == 0.0:
return np.zeros_like(descriptor)
normalised_f32 = as_f32 / norm
return normalised_f32.astype(in_dtype, copy=False)
@staticmethod
def l2_normalise_batch(descriptors: np.ndarray) -> np.ndarray:
if not isinstance(descriptors, np.ndarray):
raise DescriptorNormaliserError(
f"l2_normalise_batch: expected np.ndarray; got {type(descriptors).__name__}"
)
if descriptors.ndim != 2:
raise DescriptorNormaliserError(
f"l2_normalise_batch: expected 2-D shape (N, D); got shape {descriptors.shape}"
)
if descriptors.shape[0] < 1 or descriptors.shape[1] < 1:
raise DescriptorNormaliserError(
f"l2_normalise_batch: N and D must be >= 1; got shape {descriptors.shape}"
)
_validate_dtype(descriptors, "l2_normalise_batch")
in_dtype = descriptors.dtype
as_f32 = descriptors.astype(np.float32, copy=False)
norms = np.linalg.norm(as_f32, axis=1, keepdims=True)
# Avoid division-by-zero: leave zero rows as zero.
safe = np.where(norms == 0.0, 1.0, norms)
normalised_f32 = np.where(norms == 0.0, 0.0, as_f32 / safe)
return normalised_f32.astype(in_dtype, copy=False)
@staticmethod
def descriptor_metric() -> str:
return _METRIC_VALUE