mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 21:41:12 +00:00
[AZ-306] C6 FaissDescriptorIndex (faiss-cpu, HNSW32)
Production-default DescriptorIndex strategy backed by the faiss-cpu PyPI wheel (>=1.7,<2.0). Implements the AZ-303 Protocol surface end to end: HNSW32 + IndexIDMap2 search, atomic three-file rebuild (.index + .sha256 sidecar + .meta.json), triple-consistency load check, mmap-backed reads with IO_FLAG_MMAP|IO_FLAG_READ_ONLY, optional warm-up query at construction, FAISS RuntimeError rewrap to IndexUnavailableError / IndexBuildError, and FaissDescriptorIndex.from_config classmethod wired into runtime_root.storage_factory. The original spec required a custom pybind11 wrapper over a vendored FAISS HEAD; the user opted for the upstream faiss-cpu wheel after research fact #92 confirmed ARM64 wheel availability for Jetson and the existing pyproject.toml already pinned faiss-cpu. cpp/faiss_index/ placeholder removed; BUILD_FAISS_INDEX flag retained as a runtime/factory gate (no native target). Spec rewritten end-to-end and archived to _docs/02_tasks/done/. C6TileCacheConfig extended with faiss_index_path and faiss_warmup_query_path fields. tests/conftest.py sets KMP_DUPLICATE_LIB_OK=TRUE to remediate the macOS faiss/torch libomp duplicate-load abort during pytest (no-op on CI Linux). 21 new tests cover AC-1..12 + 2 NFRs + from_config smoke; AZ-303 protocol-conformance fake updated with from_config classmethod. Tests: 124/124 c6_tile_cache pass; 1334 project-wide pass; 1 pre-existing OKVIS2 submodule failure unrelated. Doc sync: module-layout.md, components/08_c6_tile_cache/description.md §5, batch_35_cycle1_report.md. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,650 @@
|
||||
"""AZ-306 — C6 ``FaissDescriptorIndex`` unit tests.
|
||||
|
||||
Covers AC-1 through AC-12 plus NFR-perf-rebuild + NFR-reliability-rewrap
|
||||
from ``_docs/02_tasks/todo/AZ-306_c6_faiss_descriptor_index.md``.
|
||||
|
||||
The tests use the real ``faiss-cpu`` dep (promoted to main deps in this
|
||||
task) — no fake-FAISS shim. AZ-303 already covers the
|
||||
Protocol-conformance / factory-gate boundary; this file exercises the
|
||||
production behaviour end-to-end against a real ``.index`` file.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import hashlib
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import faiss
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard.components.c6_tile_cache import (
|
||||
C6TileCacheConfig,
|
||||
HnswParams,
|
||||
IndexBuildError,
|
||||
IndexUnavailableError,
|
||||
TileId,
|
||||
)
|
||||
from gps_denied_onboard.components.c6_tile_cache import faiss_descriptor_index as fdi_mod
|
||||
from gps_denied_onboard.components.c6_tile_cache.faiss_descriptor_index import (
|
||||
META_SUFFIX,
|
||||
FaissDescriptorIndex,
|
||||
tile_id_to_int64,
|
||||
)
|
||||
from gps_denied_onboard.config.schema import Config
|
||||
from gps_denied_onboard.helpers.sha256_sidecar import SIDECAR_SUFFIX, Sha256Sidecar
|
||||
|
||||
# --------------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
def _sample_tile_ids(n: int, *, zoom: int = 18) -> list[TileId]:
|
||||
# Spread the synthetic tiles over a small grid so each (lat, lon) is
|
||||
# unique and cleanly inside the WGS84 valid range.
|
||||
return [
|
||||
TileId(
|
||||
zoom_level=zoom,
|
||||
lat=49.0 + (i * 1e-4),
|
||||
lon=36.0 + (i * 1e-4),
|
||||
)
|
||||
for i in range(n)
|
||||
]
|
||||
|
||||
|
||||
def _make_descriptors(n: int, d: int, *, seed: int = 0) -> np.ndarray:
|
||||
rng = np.random.default_rng(seed)
|
||||
arr = rng.standard_normal((n, d), dtype=np.float32)
|
||||
return np.ascontiguousarray(arr, dtype=np.float32)
|
||||
|
||||
|
||||
def _make_logger() -> logging.Logger:
|
||||
log = logging.getLogger("tests.az306")
|
||||
log.setLevel(logging.DEBUG)
|
||||
return log
|
||||
|
||||
|
||||
def _build_initial_index(
|
||||
tmp_path: Path,
|
||||
*,
|
||||
n: int = 32,
|
||||
d: int = 16,
|
||||
hnsw_params: HnswParams | None = None,
|
||||
) -> tuple[Path, list[TileId], np.ndarray, HnswParams]:
|
||||
"""Build a minimal valid index on disk and return its path + inputs."""
|
||||
index_path = tmp_path / "tiles.index"
|
||||
sidecar = Sha256Sidecar()
|
||||
logger = _make_logger()
|
||||
descriptors = _make_descriptors(n, d, seed=0)
|
||||
tile_ids = _sample_tile_ids(n)
|
||||
params = hnsw_params or HnswParams(m=16, ef_construction=40, ef_search=32)
|
||||
|
||||
bootstrap = _make_bootstrap_index_on_disk(
|
||||
index_path=index_path,
|
||||
tile_ids=tile_ids,
|
||||
descriptors=descriptors,
|
||||
params=params,
|
||||
)
|
||||
assert bootstrap == index_path # sanity
|
||||
|
||||
instance = FaissDescriptorIndex(
|
||||
index_path=index_path,
|
||||
sidecar=sidecar,
|
||||
logger=logger,
|
||||
)
|
||||
instance.rebuild_from_descriptors(descriptors, tile_ids, params)
|
||||
return index_path, tile_ids, descriptors, params
|
||||
|
||||
|
||||
def _make_bootstrap_index_on_disk(
|
||||
*,
|
||||
index_path: Path,
|
||||
tile_ids: list[TileId],
|
||||
descriptors: np.ndarray,
|
||||
params: HnswParams,
|
||||
) -> Path:
|
||||
"""Bootstrap a valid index/sidecar/meta tuple on disk via FAISS directly.
|
||||
|
||||
Reused by tests that need an existing on-disk index BEFORE constructing
|
||||
the class under test. This bypasses the rebuild path so the rebuild
|
||||
path tests can start from a known-good baseline.
|
||||
"""
|
||||
inner = faiss.IndexHNSWFlat(int(descriptors.shape[1]), params.m, faiss.METRIC_L2)
|
||||
inner.hnsw.efConstruction = params.ef_construction
|
||||
inner.hnsw.efSearch = params.ef_search
|
||||
wrapped = faiss.IndexIDMap2(inner)
|
||||
int64_ids = np.asarray([tile_id_to_int64(t) for t in tile_ids], dtype=np.int64)
|
||||
wrapped.add_with_ids(descriptors, int64_ids)
|
||||
|
||||
faiss.write_index(wrapped, str(index_path))
|
||||
|
||||
payload = index_path.read_bytes()
|
||||
digest = hashlib.sha256(payload).hexdigest()
|
||||
Path(str(index_path) + SIDECAR_SUFFIX).write_text(digest, encoding="ascii")
|
||||
|
||||
meta = {
|
||||
"descriptor_dim": int(descriptors.shape[1]),
|
||||
"n_vectors": len(tile_ids),
|
||||
"backbone_label": "test_backbone",
|
||||
"backbone_sha256_hex": "0" * 64,
|
||||
"built_at": datetime.now().astimezone().isoformat(),
|
||||
"hnsw_params": dataclasses.asdict(params),
|
||||
"sidecar_sha256_hex": digest,
|
||||
"file_path": str(index_path),
|
||||
"id_mapping": [
|
||||
{
|
||||
"int64_id": int(int64),
|
||||
"zoom_level": tile_id.zoom_level,
|
||||
"lat": tile_id.lat,
|
||||
"lon": tile_id.lon,
|
||||
}
|
||||
for int64, tile_id in zip(int64_ids.tolist(), tile_ids, strict=True)
|
||||
],
|
||||
}
|
||||
Path(str(index_path) + META_SUFFIX).write_text(
|
||||
json.dumps(meta, sort_keys=True, indent=2), encoding="utf-8"
|
||||
)
|
||||
return index_path
|
||||
|
||||
|
||||
def _open_existing(index_path: Path) -> FaissDescriptorIndex:
|
||||
return FaissDescriptorIndex(
|
||||
index_path=index_path,
|
||||
sidecar=Sha256Sidecar(),
|
||||
logger=_make_logger(),
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-1
|
||||
|
||||
|
||||
def test_ac1_search_topk_returns_self_match_first(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
n, d = 1000, 16
|
||||
descriptors = _make_descriptors(n, d, seed=42)
|
||||
tile_ids = _sample_tile_ids(n)
|
||||
params = HnswParams(m=16, ef_construction=40, ef_search=64)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=tmp_path / "tiles.index",
|
||||
tile_ids=tile_ids,
|
||||
descriptors=descriptors,
|
||||
params=params,
|
||||
)
|
||||
index = _open_existing(tmp_path / "tiles.index")
|
||||
|
||||
# Act
|
||||
results = index.search_topk(descriptors[0], k=5)
|
||||
|
||||
# Assert
|
||||
assert len(results) == 5
|
||||
assert results[0][0] == tile_ids[0]
|
||||
assert results[0][1] < 1e-6
|
||||
distances = [d for _, d in results]
|
||||
assert distances == sorted(distances)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-2
|
||||
|
||||
|
||||
def test_ac2_search_topk_returns_fewer_than_k_when_corpus_is_small(tmp_path: Path) -> None:
|
||||
n, d = 3, 8
|
||||
descriptors = _make_descriptors(n, d, seed=1)
|
||||
tile_ids = _sample_tile_ids(n)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=tmp_path / "tiles.index",
|
||||
tile_ids=tile_ids,
|
||||
descriptors=descriptors,
|
||||
params=params,
|
||||
)
|
||||
index = _open_existing(tmp_path / "tiles.index")
|
||||
|
||||
results = index.search_topk(descriptors[1], k=10)
|
||||
|
||||
assert len(results) == n
|
||||
returned_tile_ids = {tid for tid, _ in results}
|
||||
assert returned_tile_ids == set(tile_ids)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-3
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def loaded_index(tmp_path: Path) -> FaissDescriptorIndex:
|
||||
_build_initial_index(tmp_path, n=16, d=8)
|
||||
return _open_existing(tmp_path / "tiles.index")
|
||||
|
||||
|
||||
def test_ac3_search_topk_rejects_wrong_shape(loaded_index: FaissDescriptorIndex) -> None:
|
||||
bad_query = np.zeros(loaded_index.descriptor_dim() + 1, dtype=np.float32)
|
||||
with pytest.raises(IndexUnavailableError) as exc_info:
|
||||
loaded_index.search_topk(bad_query, k=3)
|
||||
assert "shape" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_ac3_search_topk_rejects_wrong_dtype(loaded_index: FaissDescriptorIndex) -> None:
|
||||
bad_query = np.zeros(loaded_index.descriptor_dim(), dtype=np.float64)
|
||||
with pytest.raises(IndexUnavailableError) as exc_info:
|
||||
loaded_index.search_topk(bad_query, k=3)
|
||||
assert "float32" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_ac3_search_topk_rejects_non_contiguous(loaded_index: FaissDescriptorIndex) -> None:
|
||||
base = np.zeros((2, loaded_index.descriptor_dim()), dtype=np.float32)
|
||||
# Strided view -> not C-contiguous
|
||||
bad_query = base[::2][0]
|
||||
bad_query = bad_query.reshape(loaded_index.descriptor_dim())
|
||||
if bad_query.flags["C_CONTIGUOUS"]:
|
||||
# Force a non-contiguous view via slicing across the inner axis.
|
||||
wide = np.zeros((1, loaded_index.descriptor_dim() * 2), dtype=np.float32)
|
||||
bad_query = wide[0, ::2]
|
||||
assert not bad_query.flags["C_CONTIGUOUS"]
|
||||
with pytest.raises(IndexUnavailableError) as exc_info:
|
||||
loaded_index.search_topk(bad_query, k=3)
|
||||
assert "contiguous" in str(exc_info.value).lower()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-4
|
||||
|
||||
|
||||
def test_ac4_rebuild_atomic_on_simulated_crash(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
# Arrange — bootstrap a valid baseline.
|
||||
n, d = 8, 4
|
||||
descriptors = _make_descriptors(n, d, seed=1)
|
||||
tile_ids = _sample_tile_ids(n)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=tmp_path / "tiles.index",
|
||||
tile_ids=tile_ids,
|
||||
descriptors=descriptors,
|
||||
params=params,
|
||||
)
|
||||
original_bytes = (tmp_path / "tiles.index").read_bytes()
|
||||
original_sidecar = (tmp_path / ("tiles.index" + SIDECAR_SUFFIX)).read_text()
|
||||
|
||||
index = _open_existing(tmp_path / "tiles.index")
|
||||
|
||||
# Act — simulate a crash AFTER the temp index is written but BEFORE
|
||||
# the atomic rename swaps it into place. Real OS rename failures
|
||||
# raise ``OSError``; ``Sha256Sidecar`` rewraps it to
|
||||
# ``Sha256SidecarError`` which the rebuild path turns into
|
||||
# ``IndexBuildError``.
|
||||
sentinel = OSError("simulated crash before rename")
|
||||
|
||||
def crashing_replace(*args: object, **kwargs: object) -> None:
|
||||
raise sentinel
|
||||
|
||||
# ``Sha256Sidecar.write_atomic_and_sidecar`` delegates to ``atomicwrites``
|
||||
# which on POSIX commits via ``os.rename``. Patching it simulates the
|
||||
# commit step crashing AFTER the temp file lands but BEFORE the
|
||||
# atomic swap completes — the exact AC-4 fault we want to verify.
|
||||
monkeypatch.setattr(os, "rename", crashing_replace)
|
||||
|
||||
new_descriptors = _make_descriptors(n, d, seed=2)
|
||||
with pytest.raises(IndexBuildError):
|
||||
index.rebuild_from_descriptors(new_descriptors, tile_ids, params)
|
||||
|
||||
# Assert — original index + sidecar are intact and reload-able.
|
||||
assert (tmp_path / "tiles.index").read_bytes() == original_bytes
|
||||
assert (tmp_path / ("tiles.index" + SIDECAR_SUFFIX)).read_text() == original_sidecar
|
||||
monkeypatch.undo()
|
||||
reopened = _open_existing(tmp_path / "tiles.index")
|
||||
assert reopened.descriptor_dim() == d
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-5
|
||||
|
||||
|
||||
def test_ac5_rebuild_writes_correct_sidecars(tmp_path: Path) -> None:
|
||||
index_path = tmp_path / "tiles.index"
|
||||
n, d = 16, 8
|
||||
descriptors = _make_descriptors(n, d, seed=3)
|
||||
tile_ids = _sample_tile_ids(n)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
|
||||
)
|
||||
index = _open_existing(index_path)
|
||||
|
||||
new_descriptors = _make_descriptors(n, d, seed=99)
|
||||
before_call = time.time()
|
||||
index.rebuild_from_descriptors(new_descriptors, tile_ids, params)
|
||||
after_call = time.time()
|
||||
|
||||
on_disk_sha = hashlib.sha256(index_path.read_bytes()).hexdigest()
|
||||
sidecar_text = (Path(str(index_path) + SIDECAR_SUFFIX)).read_text(encoding="ascii").strip()
|
||||
assert sidecar_text == on_disk_sha
|
||||
|
||||
meta = json.loads((Path(str(index_path) + META_SUFFIX)).read_bytes().decode("utf-8"))
|
||||
assert meta["descriptor_dim"] == d
|
||||
assert meta["n_vectors"] == n
|
||||
assert meta["sidecar_sha256_hex"] == on_disk_sha
|
||||
assert meta["hnsw_params"] == dataclasses.asdict(params)
|
||||
built_at = datetime.fromisoformat(meta["built_at"]).timestamp()
|
||||
assert before_call - 1.0 <= built_at <= after_call + 1.0
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-6
|
||||
|
||||
|
||||
def test_ac6_corrupt_sidecar_blocks_construction(tmp_path: Path) -> None:
|
||||
index_path = tmp_path / "tiles.index"
|
||||
descriptors = _make_descriptors(8, 4, seed=4)
|
||||
tile_ids = _sample_tile_ids(8)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
|
||||
)
|
||||
Path(str(index_path) + SIDECAR_SUFFIX).write_text("0" * 64, encoding="ascii")
|
||||
|
||||
with pytest.raises(IndexUnavailableError) as exc_info:
|
||||
_open_existing(index_path)
|
||||
assert "sidecar mismatch" in str(exc_info.value)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-7
|
||||
|
||||
|
||||
def test_ac7_missing_meta_blocks_construction(tmp_path: Path) -> None:
|
||||
index_path = tmp_path / "tiles.index"
|
||||
descriptors = _make_descriptors(4, 4, seed=5)
|
||||
tile_ids = _sample_tile_ids(4)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
|
||||
)
|
||||
Path(str(index_path) + META_SUFFIX).unlink()
|
||||
|
||||
with pytest.raises(IndexUnavailableError) as exc_info:
|
||||
_open_existing(index_path)
|
||||
assert "meta.json missing" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_ac7_malformed_meta_blocks_construction(tmp_path: Path) -> None:
|
||||
index_path = tmp_path / "tiles.index"
|
||||
descriptors = _make_descriptors(4, 4, seed=6)
|
||||
tile_ids = _sample_tile_ids(4)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
|
||||
)
|
||||
Path(str(index_path) + META_SUFFIX).write_text("not-json{{", encoding="utf-8")
|
||||
|
||||
with pytest.raises(IndexUnavailableError) as exc_info:
|
||||
_open_existing(index_path)
|
||||
assert "malformed" in str(exc_info.value)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-8
|
||||
|
||||
|
||||
def test_ac8_warmup_query_pages_in_mmap(tmp_path: Path) -> None:
|
||||
n, d = 256, 16
|
||||
descriptors = _make_descriptors(n, d, seed=7)
|
||||
tile_ids = _sample_tile_ids(n)
|
||||
params = HnswParams(m=16, ef_construction=40, ef_search=32)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=tmp_path / "tiles.index",
|
||||
tile_ids=tile_ids,
|
||||
descriptors=descriptors,
|
||||
params=params,
|
||||
)
|
||||
|
||||
# Force the OS page cache cold (best-effort; no-op on platforms
|
||||
# without posix_fadvise — the test still verifies warm-up runs and
|
||||
# the subsequent search latency is bounded).
|
||||
if hasattr(os, "posix_fadvise") and hasattr(os, "POSIX_FADV_DONTNEED"):
|
||||
with (tmp_path / "tiles.index").open("rb") as fh:
|
||||
os.posix_fadvise(fh.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
|
||||
|
||||
warm = descriptors[7].copy()
|
||||
instance = FaissDescriptorIndex(
|
||||
index_path=tmp_path / "tiles.index",
|
||||
sidecar=Sha256Sidecar(),
|
||||
logger=_make_logger(),
|
||||
warmup_query=warm,
|
||||
)
|
||||
|
||||
# Subsequent searches should be fast (sanity bound — not the C2-PT-01
|
||||
# 5 ms canonical budget; this is a "warm-up succeeded" assertion).
|
||||
samples_ms: list[float] = []
|
||||
for i in range(10):
|
||||
q = descriptors[i].copy()
|
||||
t0 = time.perf_counter()
|
||||
instance.search_topk(q, k=5)
|
||||
samples_ms.append((time.perf_counter() - t0) * 1000)
|
||||
samples_ms.sort()
|
||||
p95 = samples_ms[int(len(samples_ms) * 0.95)]
|
||||
assert p95 < 50.0, f"warm search p95={p95:.3f} ms exceeded 50 ms sanity bound"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-9
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_ac9_search_topk_p95_latency(tmp_path: Path) -> None:
|
||||
n, d = 100_000, 16
|
||||
descriptors = _make_descriptors(n, d, seed=8)
|
||||
tile_ids = _sample_tile_ids(n)
|
||||
params = HnswParams(m=32, ef_construction=200, ef_search=64)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=tmp_path / "tiles.index",
|
||||
tile_ids=tile_ids,
|
||||
descriptors=descriptors,
|
||||
params=params,
|
||||
)
|
||||
index = _open_existing(tmp_path / "tiles.index")
|
||||
# warm
|
||||
for i in range(10):
|
||||
index.search_topk(descriptors[i], k=10)
|
||||
|
||||
rng = np.random.default_rng(123)
|
||||
samples_ms: list[float] = []
|
||||
for _ in range(1000):
|
||||
q = np.ascontiguousarray(rng.standard_normal(d).astype(np.float32))
|
||||
t0 = time.perf_counter()
|
||||
index.search_topk(q, k=10)
|
||||
samples_ms.append((time.perf_counter() - t0) * 1000)
|
||||
samples_ms.sort()
|
||||
p95 = samples_ms[int(len(samples_ms) * 0.95)]
|
||||
assert p95 <= 5.0, f"search_topk p95={p95:.3f} ms exceeds 5 ms"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-10
|
||||
|
||||
|
||||
_FDI_FQN = "gps_denied_onboard.components.c6_tile_cache.faiss_descriptor_index"
|
||||
|
||||
|
||||
def test_ac10_factory_gate_off_does_not_import_fdi(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
from gps_denied_onboard.components.c6_tile_cache import C6TileCacheConfig
|
||||
from gps_denied_onboard.runtime_root.errors import RuntimeNotAvailableError
|
||||
from gps_denied_onboard.runtime_root.storage_factory import build_descriptor_index
|
||||
|
||||
monkeypatch.delenv("BUILD_FAISS_INDEX", raising=False)
|
||||
sys.modules.pop(_FDI_FQN, None)
|
||||
|
||||
config = Config.with_blocks(c6_tile_cache=C6TileCacheConfig())
|
||||
with pytest.raises(RuntimeNotAvailableError) as exc_info:
|
||||
build_descriptor_index(config)
|
||||
assert "faiss_hnsw" in str(exc_info.value)
|
||||
assert _FDI_FQN not in sys.modules
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-11
|
||||
|
||||
|
||||
def test_ac11_int64_id_collision_raises_index_build_error(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
index_path = tmp_path / "tiles.index"
|
||||
descriptors = _make_descriptors(4, 4, seed=9)
|
||||
tile_ids = _sample_tile_ids(4)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
|
||||
)
|
||||
index = _open_existing(index_path)
|
||||
pre_rebuild_bytes = index_path.read_bytes()
|
||||
|
||||
# Force a deterministic collision via monkeypatch — every TileId
|
||||
# maps to the same int64.
|
||||
monkeypatch.setattr(fdi_mod, "tile_id_to_int64", lambda _tile_id: 42)
|
||||
|
||||
new_descriptors = _make_descriptors(4, 4, seed=10)
|
||||
with pytest.raises(IndexBuildError) as exc_info:
|
||||
index.rebuild_from_descriptors(new_descriptors, tile_ids, params)
|
||||
assert "collision" in str(exc_info.value)
|
||||
# On-disk index untouched.
|
||||
assert index_path.read_bytes() == pre_rebuild_bytes
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- AC-12
|
||||
|
||||
|
||||
def test_ac12_index_metadata_round_trip(tmp_path: Path) -> None:
|
||||
index_path = tmp_path / "tiles.index"
|
||||
descriptors = _make_descriptors(16, 8, seed=11)
|
||||
tile_ids = _sample_tile_ids(16)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16, metric="L2")
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
|
||||
)
|
||||
index = _open_existing(index_path)
|
||||
index.rebuild_from_descriptors(descriptors, tile_ids, params)
|
||||
|
||||
meta = index.index_metadata()
|
||||
on_disk_sha = hashlib.sha256(index_path.read_bytes()).hexdigest()
|
||||
assert meta.descriptor_dim == 8
|
||||
assert meta.n_vectors == 16
|
||||
assert meta.hnsw_params == params
|
||||
assert meta.sidecar_sha256_hex == on_disk_sha
|
||||
assert meta.file_path == index_path
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- NFR-perf-rebuild
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_nfr_perf_rebuild_under_5_minutes_for_100k(tmp_path: Path) -> None:
|
||||
n, d = 100_000, 16
|
||||
descriptors = _make_descriptors(n, d, seed=12)
|
||||
tile_ids = _sample_tile_ids(n)
|
||||
params = HnswParams(m=32, ef_construction=200, ef_search=64)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=tmp_path / "tiles.index",
|
||||
tile_ids=tile_ids,
|
||||
descriptors=descriptors,
|
||||
params=params,
|
||||
)
|
||||
index = _open_existing(tmp_path / "tiles.index")
|
||||
|
||||
t0 = time.perf_counter()
|
||||
index.rebuild_from_descriptors(descriptors, tile_ids, params)
|
||||
elapsed = time.perf_counter() - t0
|
||||
assert elapsed < 300.0, f"rebuild took {elapsed:.1f}s; expected < 300s"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- NFR-reliability-rewrap
|
||||
|
||||
|
||||
def test_nfr_reliability_search_runtime_error_rewrapped(
|
||||
loaded_index: FaissDescriptorIndex, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
sentinel = RuntimeError("simulated FAISS C++ failure")
|
||||
|
||||
def raising_search(*args: object, **kwargs: object) -> None:
|
||||
raise sentinel
|
||||
|
||||
monkeypatch.setattr(loaded_index._index, "search", raising_search)
|
||||
query = np.zeros(loaded_index.descriptor_dim(), dtype=np.float32)
|
||||
with pytest.raises(IndexUnavailableError) as exc_info:
|
||||
loaded_index.search_topk(query, k=3)
|
||||
assert exc_info.value.__cause__ is sentinel
|
||||
|
||||
|
||||
def test_nfr_reliability_rebuild_runtime_error_rewrapped(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
index_path, tile_ids, descriptors, params = _build_initial_index(
|
||||
tmp_path, n=8, d=4
|
||||
)
|
||||
index = _open_existing(index_path)
|
||||
|
||||
sentinel = RuntimeError("simulated FAISS write failure")
|
||||
|
||||
def raising_write(*args: object, **kwargs: object) -> None:
|
||||
raise sentinel
|
||||
|
||||
monkeypatch.setattr(faiss, "write_index", raising_write)
|
||||
|
||||
with pytest.raises(IndexBuildError) as exc_info:
|
||||
index.rebuild_from_descriptors(descriptors, tile_ids, params)
|
||||
assert exc_info.value.__cause__ is sentinel
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- from_config smoke
|
||||
|
||||
|
||||
def test_from_config_resolves_paths_and_warmup(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
index_path = tmp_path / "tiles.index"
|
||||
descriptors = _make_descriptors(8, 4, seed=13)
|
||||
tile_ids = _sample_tile_ids(8)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
|
||||
)
|
||||
warmup_path = tmp_path / "warmup.npy"
|
||||
np.save(warmup_path, descriptors[0])
|
||||
|
||||
block = C6TileCacheConfig(
|
||||
faiss_index_path=str(index_path),
|
||||
faiss_warmup_query_path=str(warmup_path),
|
||||
)
|
||||
config = Config.with_blocks(c6_tile_cache=block)
|
||||
instance = FaissDescriptorIndex.from_config(config)
|
||||
assert instance.descriptor_dim() == 4
|
||||
assert instance.mmap_handle() == index_path
|
||||
|
||||
|
||||
def test_from_config_default_path_uses_root_dir(tmp_path: Path) -> None:
|
||||
root = tmp_path / "root"
|
||||
root.mkdir()
|
||||
index_path = root / "descriptor.index"
|
||||
descriptors = _make_descriptors(4, 4, seed=14)
|
||||
tile_ids = _sample_tile_ids(4)
|
||||
params = HnswParams(m=8, ef_construction=20, ef_search=16)
|
||||
_make_bootstrap_index_on_disk(
|
||||
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
|
||||
)
|
||||
|
||||
block = C6TileCacheConfig(root_dir=str(root))
|
||||
config = Config.with_blocks(c6_tile_cache=block)
|
||||
instance = FaissDescriptorIndex.from_config(config)
|
||||
assert instance.mmap_handle() == index_path
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- module-import sanity
|
||||
# The module is intentionally import-clean so the AZ-303 factory gate
|
||||
# can pre-empt the import via the BUILD_FAISS_INDEX env flag.
|
||||
|
||||
|
||||
def test_module_is_import_clean() -> None:
|
||||
# AC-10's negative-path test removes the module from sys.modules; use
|
||||
# ``import_module`` so this assertion works whether the module is
|
||||
# currently cached or not.
|
||||
sys.modules.pop(_FDI_FQN, None)
|
||||
fresh = importlib.import_module(_FDI_FQN)
|
||||
assert hasattr(fresh, "FaissDescriptorIndex")
|
||||
assert hasattr(fresh, "tile_id_to_int64")
|
||||
@@ -297,6 +297,13 @@ def _install_fake_faiss_impl_module() -> type:
|
||||
def __init__(self, config: Config) -> None:
|
||||
self.config = config
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, config: Config) -> _FakeFaissDescriptorIndex:
|
||||
# AZ-306: factory now dispatches via from_config so the production
|
||||
# impl can wire its Sha256Sidecar / logger / warmup query without
|
||||
# the runtime_root touching them. Mirror PostgresFilesystemStore.
|
||||
return cls(config)
|
||||
|
||||
fake_module = types.ModuleType(_FAKE_IMPL_MODULE)
|
||||
fake_module.FaissDescriptorIndex = _FakeFaissDescriptorIndex # type: ignore[attr-defined]
|
||||
sys.modules[_FAKE_IMPL_MODULE] = fake_module
|
||||
|
||||
Reference in New Issue
Block a user