[AZ-306] C6 FaissDescriptorIndex (faiss-cpu, HNSW32)

Production-default DescriptorIndex strategy backed by the faiss-cpu
PyPI wheel (>=1.7,<2.0). Implements the AZ-303 Protocol surface end
to end: HNSW32 + IndexIDMap2 search, atomic three-file rebuild
(.index + .sha256 sidecar + .meta.json), triple-consistency load
check, mmap-backed reads with IO_FLAG_MMAP|IO_FLAG_READ_ONLY, optional
warm-up query at construction, FAISS RuntimeError rewrap to
IndexUnavailableError / IndexBuildError, and FaissDescriptorIndex.from_config
classmethod wired into runtime_root.storage_factory.

The original spec required a custom pybind11 wrapper over a vendored
FAISS HEAD; the user opted for the upstream faiss-cpu wheel after
research fact #92 confirmed ARM64 wheel availability for Jetson and
the existing pyproject.toml already pinned faiss-cpu. cpp/faiss_index/
placeholder removed; BUILD_FAISS_INDEX flag retained as a
runtime/factory gate (no native target). Spec rewritten end-to-end and
archived to _docs/02_tasks/done/.

C6TileCacheConfig extended with faiss_index_path and
faiss_warmup_query_path fields. tests/conftest.py sets
KMP_DUPLICATE_LIB_OK=TRUE to remediate the macOS faiss/torch libomp
duplicate-load abort during pytest (no-op on CI Linux). 21 new tests
cover AC-1..12 + 2 NFRs + from_config smoke; AZ-303 protocol-conformance
fake updated with from_config classmethod.

Tests: 124/124 c6_tile_cache pass; 1334 project-wide pass; 1
pre-existing OKVIS2 submodule failure unrelated.

Doc sync: module-layout.md, components/08_c6_tile_cache/description.md
§5, batch_35_cycle1_report.md.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-13 04:01:37 +03:00
parent ecf76d762d
commit 3b7265757b
17 changed files with 1550 additions and 87 deletions
@@ -0,0 +1,650 @@
"""AZ-306 — C6 ``FaissDescriptorIndex`` unit tests.
Covers AC-1 through AC-12 plus NFR-perf-rebuild + NFR-reliability-rewrap
from ``_docs/02_tasks/todo/AZ-306_c6_faiss_descriptor_index.md``.
The tests use the real ``faiss-cpu`` dep (promoted to main deps in this
task) — no fake-FAISS shim. AZ-303 already covers the
Protocol-conformance / factory-gate boundary; this file exercises the
production behaviour end-to-end against a real ``.index`` file.
"""
from __future__ import annotations
import dataclasses
import hashlib
import importlib
import json
import logging
import os
import sys
import time
from datetime import datetime
from pathlib import Path
import faiss
import numpy as np
import pytest
from gps_denied_onboard.components.c6_tile_cache import (
C6TileCacheConfig,
HnswParams,
IndexBuildError,
IndexUnavailableError,
TileId,
)
from gps_denied_onboard.components.c6_tile_cache import faiss_descriptor_index as fdi_mod
from gps_denied_onboard.components.c6_tile_cache.faiss_descriptor_index import (
META_SUFFIX,
FaissDescriptorIndex,
tile_id_to_int64,
)
from gps_denied_onboard.config.schema import Config
from gps_denied_onboard.helpers.sha256_sidecar import SIDECAR_SUFFIX, Sha256Sidecar
# --------------------------------------------------------------------- helpers
def _sample_tile_ids(n: int, *, zoom: int = 18) -> list[TileId]:
# Spread the synthetic tiles over a small grid so each (lat, lon) is
# unique and cleanly inside the WGS84 valid range.
return [
TileId(
zoom_level=zoom,
lat=49.0 + (i * 1e-4),
lon=36.0 + (i * 1e-4),
)
for i in range(n)
]
def _make_descriptors(n: int, d: int, *, seed: int = 0) -> np.ndarray:
rng = np.random.default_rng(seed)
arr = rng.standard_normal((n, d), dtype=np.float32)
return np.ascontiguousarray(arr, dtype=np.float32)
def _make_logger() -> logging.Logger:
log = logging.getLogger("tests.az306")
log.setLevel(logging.DEBUG)
return log
def _build_initial_index(
tmp_path: Path,
*,
n: int = 32,
d: int = 16,
hnsw_params: HnswParams | None = None,
) -> tuple[Path, list[TileId], np.ndarray, HnswParams]:
"""Build a minimal valid index on disk and return its path + inputs."""
index_path = tmp_path / "tiles.index"
sidecar = Sha256Sidecar()
logger = _make_logger()
descriptors = _make_descriptors(n, d, seed=0)
tile_ids = _sample_tile_ids(n)
params = hnsw_params or HnswParams(m=16, ef_construction=40, ef_search=32)
bootstrap = _make_bootstrap_index_on_disk(
index_path=index_path,
tile_ids=tile_ids,
descriptors=descriptors,
params=params,
)
assert bootstrap == index_path # sanity
instance = FaissDescriptorIndex(
index_path=index_path,
sidecar=sidecar,
logger=logger,
)
instance.rebuild_from_descriptors(descriptors, tile_ids, params)
return index_path, tile_ids, descriptors, params
def _make_bootstrap_index_on_disk(
*,
index_path: Path,
tile_ids: list[TileId],
descriptors: np.ndarray,
params: HnswParams,
) -> Path:
"""Bootstrap a valid index/sidecar/meta tuple on disk via FAISS directly.
Reused by tests that need an existing on-disk index BEFORE constructing
the class under test. This bypasses the rebuild path so the rebuild
path tests can start from a known-good baseline.
"""
inner = faiss.IndexHNSWFlat(int(descriptors.shape[1]), params.m, faiss.METRIC_L2)
inner.hnsw.efConstruction = params.ef_construction
inner.hnsw.efSearch = params.ef_search
wrapped = faiss.IndexIDMap2(inner)
int64_ids = np.asarray([tile_id_to_int64(t) for t in tile_ids], dtype=np.int64)
wrapped.add_with_ids(descriptors, int64_ids)
faiss.write_index(wrapped, str(index_path))
payload = index_path.read_bytes()
digest = hashlib.sha256(payload).hexdigest()
Path(str(index_path) + SIDECAR_SUFFIX).write_text(digest, encoding="ascii")
meta = {
"descriptor_dim": int(descriptors.shape[1]),
"n_vectors": len(tile_ids),
"backbone_label": "test_backbone",
"backbone_sha256_hex": "0" * 64,
"built_at": datetime.now().astimezone().isoformat(),
"hnsw_params": dataclasses.asdict(params),
"sidecar_sha256_hex": digest,
"file_path": str(index_path),
"id_mapping": [
{
"int64_id": int(int64),
"zoom_level": tile_id.zoom_level,
"lat": tile_id.lat,
"lon": tile_id.lon,
}
for int64, tile_id in zip(int64_ids.tolist(), tile_ids, strict=True)
],
}
Path(str(index_path) + META_SUFFIX).write_text(
json.dumps(meta, sort_keys=True, indent=2), encoding="utf-8"
)
return index_path
def _open_existing(index_path: Path) -> FaissDescriptorIndex:
return FaissDescriptorIndex(
index_path=index_path,
sidecar=Sha256Sidecar(),
logger=_make_logger(),
)
# --------------------------------------------------------------------- AC-1
def test_ac1_search_topk_returns_self_match_first(tmp_path: Path) -> None:
# Arrange
n, d = 1000, 16
descriptors = _make_descriptors(n, d, seed=42)
tile_ids = _sample_tile_ids(n)
params = HnswParams(m=16, ef_construction=40, ef_search=64)
_make_bootstrap_index_on_disk(
index_path=tmp_path / "tiles.index",
tile_ids=tile_ids,
descriptors=descriptors,
params=params,
)
index = _open_existing(tmp_path / "tiles.index")
# Act
results = index.search_topk(descriptors[0], k=5)
# Assert
assert len(results) == 5
assert results[0][0] == tile_ids[0]
assert results[0][1] < 1e-6
distances = [d for _, d in results]
assert distances == sorted(distances)
# --------------------------------------------------------------------- AC-2
def test_ac2_search_topk_returns_fewer_than_k_when_corpus_is_small(tmp_path: Path) -> None:
n, d = 3, 8
descriptors = _make_descriptors(n, d, seed=1)
tile_ids = _sample_tile_ids(n)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=tmp_path / "tiles.index",
tile_ids=tile_ids,
descriptors=descriptors,
params=params,
)
index = _open_existing(tmp_path / "tiles.index")
results = index.search_topk(descriptors[1], k=10)
assert len(results) == n
returned_tile_ids = {tid for tid, _ in results}
assert returned_tile_ids == set(tile_ids)
# --------------------------------------------------------------------- AC-3
@pytest.fixture
def loaded_index(tmp_path: Path) -> FaissDescriptorIndex:
_build_initial_index(tmp_path, n=16, d=8)
return _open_existing(tmp_path / "tiles.index")
def test_ac3_search_topk_rejects_wrong_shape(loaded_index: FaissDescriptorIndex) -> None:
bad_query = np.zeros(loaded_index.descriptor_dim() + 1, dtype=np.float32)
with pytest.raises(IndexUnavailableError) as exc_info:
loaded_index.search_topk(bad_query, k=3)
assert "shape" in str(exc_info.value)
def test_ac3_search_topk_rejects_wrong_dtype(loaded_index: FaissDescriptorIndex) -> None:
bad_query = np.zeros(loaded_index.descriptor_dim(), dtype=np.float64)
with pytest.raises(IndexUnavailableError) as exc_info:
loaded_index.search_topk(bad_query, k=3)
assert "float32" in str(exc_info.value)
def test_ac3_search_topk_rejects_non_contiguous(loaded_index: FaissDescriptorIndex) -> None:
base = np.zeros((2, loaded_index.descriptor_dim()), dtype=np.float32)
# Strided view -> not C-contiguous
bad_query = base[::2][0]
bad_query = bad_query.reshape(loaded_index.descriptor_dim())
if bad_query.flags["C_CONTIGUOUS"]:
# Force a non-contiguous view via slicing across the inner axis.
wide = np.zeros((1, loaded_index.descriptor_dim() * 2), dtype=np.float32)
bad_query = wide[0, ::2]
assert not bad_query.flags["C_CONTIGUOUS"]
with pytest.raises(IndexUnavailableError) as exc_info:
loaded_index.search_topk(bad_query, k=3)
assert "contiguous" in str(exc_info.value).lower()
# --------------------------------------------------------------------- AC-4
def test_ac4_rebuild_atomic_on_simulated_crash(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
# Arrange — bootstrap a valid baseline.
n, d = 8, 4
descriptors = _make_descriptors(n, d, seed=1)
tile_ids = _sample_tile_ids(n)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=tmp_path / "tiles.index",
tile_ids=tile_ids,
descriptors=descriptors,
params=params,
)
original_bytes = (tmp_path / "tiles.index").read_bytes()
original_sidecar = (tmp_path / ("tiles.index" + SIDECAR_SUFFIX)).read_text()
index = _open_existing(tmp_path / "tiles.index")
# Act — simulate a crash AFTER the temp index is written but BEFORE
# the atomic rename swaps it into place. Real OS rename failures
# raise ``OSError``; ``Sha256Sidecar`` rewraps it to
# ``Sha256SidecarError`` which the rebuild path turns into
# ``IndexBuildError``.
sentinel = OSError("simulated crash before rename")
def crashing_replace(*args: object, **kwargs: object) -> None:
raise sentinel
# ``Sha256Sidecar.write_atomic_and_sidecar`` delegates to ``atomicwrites``
# which on POSIX commits via ``os.rename``. Patching it simulates the
# commit step crashing AFTER the temp file lands but BEFORE the
# atomic swap completes — the exact AC-4 fault we want to verify.
monkeypatch.setattr(os, "rename", crashing_replace)
new_descriptors = _make_descriptors(n, d, seed=2)
with pytest.raises(IndexBuildError):
index.rebuild_from_descriptors(new_descriptors, tile_ids, params)
# Assert — original index + sidecar are intact and reload-able.
assert (tmp_path / "tiles.index").read_bytes() == original_bytes
assert (tmp_path / ("tiles.index" + SIDECAR_SUFFIX)).read_text() == original_sidecar
monkeypatch.undo()
reopened = _open_existing(tmp_path / "tiles.index")
assert reopened.descriptor_dim() == d
# --------------------------------------------------------------------- AC-5
def test_ac5_rebuild_writes_correct_sidecars(tmp_path: Path) -> None:
index_path = tmp_path / "tiles.index"
n, d = 16, 8
descriptors = _make_descriptors(n, d, seed=3)
tile_ids = _sample_tile_ids(n)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
)
index = _open_existing(index_path)
new_descriptors = _make_descriptors(n, d, seed=99)
before_call = time.time()
index.rebuild_from_descriptors(new_descriptors, tile_ids, params)
after_call = time.time()
on_disk_sha = hashlib.sha256(index_path.read_bytes()).hexdigest()
sidecar_text = (Path(str(index_path) + SIDECAR_SUFFIX)).read_text(encoding="ascii").strip()
assert sidecar_text == on_disk_sha
meta = json.loads((Path(str(index_path) + META_SUFFIX)).read_bytes().decode("utf-8"))
assert meta["descriptor_dim"] == d
assert meta["n_vectors"] == n
assert meta["sidecar_sha256_hex"] == on_disk_sha
assert meta["hnsw_params"] == dataclasses.asdict(params)
built_at = datetime.fromisoformat(meta["built_at"]).timestamp()
assert before_call - 1.0 <= built_at <= after_call + 1.0
# --------------------------------------------------------------------- AC-6
def test_ac6_corrupt_sidecar_blocks_construction(tmp_path: Path) -> None:
index_path = tmp_path / "tiles.index"
descriptors = _make_descriptors(8, 4, seed=4)
tile_ids = _sample_tile_ids(8)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
)
Path(str(index_path) + SIDECAR_SUFFIX).write_text("0" * 64, encoding="ascii")
with pytest.raises(IndexUnavailableError) as exc_info:
_open_existing(index_path)
assert "sidecar mismatch" in str(exc_info.value)
# --------------------------------------------------------------------- AC-7
def test_ac7_missing_meta_blocks_construction(tmp_path: Path) -> None:
index_path = tmp_path / "tiles.index"
descriptors = _make_descriptors(4, 4, seed=5)
tile_ids = _sample_tile_ids(4)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
)
Path(str(index_path) + META_SUFFIX).unlink()
with pytest.raises(IndexUnavailableError) as exc_info:
_open_existing(index_path)
assert "meta.json missing" in str(exc_info.value)
def test_ac7_malformed_meta_blocks_construction(tmp_path: Path) -> None:
index_path = tmp_path / "tiles.index"
descriptors = _make_descriptors(4, 4, seed=6)
tile_ids = _sample_tile_ids(4)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
)
Path(str(index_path) + META_SUFFIX).write_text("not-json{{", encoding="utf-8")
with pytest.raises(IndexUnavailableError) as exc_info:
_open_existing(index_path)
assert "malformed" in str(exc_info.value)
# --------------------------------------------------------------------- AC-8
def test_ac8_warmup_query_pages_in_mmap(tmp_path: Path) -> None:
n, d = 256, 16
descriptors = _make_descriptors(n, d, seed=7)
tile_ids = _sample_tile_ids(n)
params = HnswParams(m=16, ef_construction=40, ef_search=32)
_make_bootstrap_index_on_disk(
index_path=tmp_path / "tiles.index",
tile_ids=tile_ids,
descriptors=descriptors,
params=params,
)
# Force the OS page cache cold (best-effort; no-op on platforms
# without posix_fadvise — the test still verifies warm-up runs and
# the subsequent search latency is bounded).
if hasattr(os, "posix_fadvise") and hasattr(os, "POSIX_FADV_DONTNEED"):
with (tmp_path / "tiles.index").open("rb") as fh:
os.posix_fadvise(fh.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
warm = descriptors[7].copy()
instance = FaissDescriptorIndex(
index_path=tmp_path / "tiles.index",
sidecar=Sha256Sidecar(),
logger=_make_logger(),
warmup_query=warm,
)
# Subsequent searches should be fast (sanity bound — not the C2-PT-01
# 5 ms canonical budget; this is a "warm-up succeeded" assertion).
samples_ms: list[float] = []
for i in range(10):
q = descriptors[i].copy()
t0 = time.perf_counter()
instance.search_topk(q, k=5)
samples_ms.append((time.perf_counter() - t0) * 1000)
samples_ms.sort()
p95 = samples_ms[int(len(samples_ms) * 0.95)]
assert p95 < 50.0, f"warm search p95={p95:.3f} ms exceeded 50 ms sanity bound"
# --------------------------------------------------------------------- AC-9
@pytest.mark.slow
def test_ac9_search_topk_p95_latency(tmp_path: Path) -> None:
n, d = 100_000, 16
descriptors = _make_descriptors(n, d, seed=8)
tile_ids = _sample_tile_ids(n)
params = HnswParams(m=32, ef_construction=200, ef_search=64)
_make_bootstrap_index_on_disk(
index_path=tmp_path / "tiles.index",
tile_ids=tile_ids,
descriptors=descriptors,
params=params,
)
index = _open_existing(tmp_path / "tiles.index")
# warm
for i in range(10):
index.search_topk(descriptors[i], k=10)
rng = np.random.default_rng(123)
samples_ms: list[float] = []
for _ in range(1000):
q = np.ascontiguousarray(rng.standard_normal(d).astype(np.float32))
t0 = time.perf_counter()
index.search_topk(q, k=10)
samples_ms.append((time.perf_counter() - t0) * 1000)
samples_ms.sort()
p95 = samples_ms[int(len(samples_ms) * 0.95)]
assert p95 <= 5.0, f"search_topk p95={p95:.3f} ms exceeds 5 ms"
# --------------------------------------------------------------------- AC-10
_FDI_FQN = "gps_denied_onboard.components.c6_tile_cache.faiss_descriptor_index"
def test_ac10_factory_gate_off_does_not_import_fdi(monkeypatch: pytest.MonkeyPatch) -> None:
from gps_denied_onboard.components.c6_tile_cache import C6TileCacheConfig
from gps_denied_onboard.runtime_root.errors import RuntimeNotAvailableError
from gps_denied_onboard.runtime_root.storage_factory import build_descriptor_index
monkeypatch.delenv("BUILD_FAISS_INDEX", raising=False)
sys.modules.pop(_FDI_FQN, None)
config = Config.with_blocks(c6_tile_cache=C6TileCacheConfig())
with pytest.raises(RuntimeNotAvailableError) as exc_info:
build_descriptor_index(config)
assert "faiss_hnsw" in str(exc_info.value)
assert _FDI_FQN not in sys.modules
# --------------------------------------------------------------------- AC-11
def test_ac11_int64_id_collision_raises_index_build_error(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
index_path = tmp_path / "tiles.index"
descriptors = _make_descriptors(4, 4, seed=9)
tile_ids = _sample_tile_ids(4)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
)
index = _open_existing(index_path)
pre_rebuild_bytes = index_path.read_bytes()
# Force a deterministic collision via monkeypatch — every TileId
# maps to the same int64.
monkeypatch.setattr(fdi_mod, "tile_id_to_int64", lambda _tile_id: 42)
new_descriptors = _make_descriptors(4, 4, seed=10)
with pytest.raises(IndexBuildError) as exc_info:
index.rebuild_from_descriptors(new_descriptors, tile_ids, params)
assert "collision" in str(exc_info.value)
# On-disk index untouched.
assert index_path.read_bytes() == pre_rebuild_bytes
# --------------------------------------------------------------------- AC-12
def test_ac12_index_metadata_round_trip(tmp_path: Path) -> None:
index_path = tmp_path / "tiles.index"
descriptors = _make_descriptors(16, 8, seed=11)
tile_ids = _sample_tile_ids(16)
params = HnswParams(m=8, ef_construction=20, ef_search=16, metric="L2")
_make_bootstrap_index_on_disk(
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
)
index = _open_existing(index_path)
index.rebuild_from_descriptors(descriptors, tile_ids, params)
meta = index.index_metadata()
on_disk_sha = hashlib.sha256(index_path.read_bytes()).hexdigest()
assert meta.descriptor_dim == 8
assert meta.n_vectors == 16
assert meta.hnsw_params == params
assert meta.sidecar_sha256_hex == on_disk_sha
assert meta.file_path == index_path
# --------------------------------------------------------------------- NFR-perf-rebuild
@pytest.mark.slow
def test_nfr_perf_rebuild_under_5_minutes_for_100k(tmp_path: Path) -> None:
n, d = 100_000, 16
descriptors = _make_descriptors(n, d, seed=12)
tile_ids = _sample_tile_ids(n)
params = HnswParams(m=32, ef_construction=200, ef_search=64)
_make_bootstrap_index_on_disk(
index_path=tmp_path / "tiles.index",
tile_ids=tile_ids,
descriptors=descriptors,
params=params,
)
index = _open_existing(tmp_path / "tiles.index")
t0 = time.perf_counter()
index.rebuild_from_descriptors(descriptors, tile_ids, params)
elapsed = time.perf_counter() - t0
assert elapsed < 300.0, f"rebuild took {elapsed:.1f}s; expected < 300s"
# --------------------------------------------------------------------- NFR-reliability-rewrap
def test_nfr_reliability_search_runtime_error_rewrapped(
loaded_index: FaissDescriptorIndex, monkeypatch: pytest.MonkeyPatch
) -> None:
sentinel = RuntimeError("simulated FAISS C++ failure")
def raising_search(*args: object, **kwargs: object) -> None:
raise sentinel
monkeypatch.setattr(loaded_index._index, "search", raising_search)
query = np.zeros(loaded_index.descriptor_dim(), dtype=np.float32)
with pytest.raises(IndexUnavailableError) as exc_info:
loaded_index.search_topk(query, k=3)
assert exc_info.value.__cause__ is sentinel
def test_nfr_reliability_rebuild_runtime_error_rewrapped(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
index_path, tile_ids, descriptors, params = _build_initial_index(
tmp_path, n=8, d=4
)
index = _open_existing(index_path)
sentinel = RuntimeError("simulated FAISS write failure")
def raising_write(*args: object, **kwargs: object) -> None:
raise sentinel
monkeypatch.setattr(faiss, "write_index", raising_write)
with pytest.raises(IndexBuildError) as exc_info:
index.rebuild_from_descriptors(descriptors, tile_ids, params)
assert exc_info.value.__cause__ is sentinel
# --------------------------------------------------------------------- from_config smoke
def test_from_config_resolves_paths_and_warmup(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
index_path = tmp_path / "tiles.index"
descriptors = _make_descriptors(8, 4, seed=13)
tile_ids = _sample_tile_ids(8)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
)
warmup_path = tmp_path / "warmup.npy"
np.save(warmup_path, descriptors[0])
block = C6TileCacheConfig(
faiss_index_path=str(index_path),
faiss_warmup_query_path=str(warmup_path),
)
config = Config.with_blocks(c6_tile_cache=block)
instance = FaissDescriptorIndex.from_config(config)
assert instance.descriptor_dim() == 4
assert instance.mmap_handle() == index_path
def test_from_config_default_path_uses_root_dir(tmp_path: Path) -> None:
root = tmp_path / "root"
root.mkdir()
index_path = root / "descriptor.index"
descriptors = _make_descriptors(4, 4, seed=14)
tile_ids = _sample_tile_ids(4)
params = HnswParams(m=8, ef_construction=20, ef_search=16)
_make_bootstrap_index_on_disk(
index_path=index_path, tile_ids=tile_ids, descriptors=descriptors, params=params
)
block = C6TileCacheConfig(root_dir=str(root))
config = Config.with_blocks(c6_tile_cache=block)
instance = FaissDescriptorIndex.from_config(config)
assert instance.mmap_handle() == index_path
# --------------------------------------------------------------------- module-import sanity
# The module is intentionally import-clean so the AZ-303 factory gate
# can pre-empt the import via the BUILD_FAISS_INDEX env flag.
def test_module_is_import_clean() -> None:
# AC-10's negative-path test removes the module from sys.modules; use
# ``import_module`` so this assertion works whether the module is
# currently cached or not.
sys.modules.pop(_FDI_FQN, None)
fresh = importlib.import_module(_FDI_FQN)
assert hasattr(fresh, "FaissDescriptorIndex")
assert hasattr(fresh, "tile_id_to_int64")
@@ -297,6 +297,13 @@ def _install_fake_faiss_impl_module() -> type:
def __init__(self, config: Config) -> None:
self.config = config
@classmethod
def from_config(cls, config: Config) -> _FakeFaissDescriptorIndex:
# AZ-306: factory now dispatches via from_config so the production
# impl can wire its Sha256Sidecar / logger / warmup query without
# the runtime_root touching them. Mirror PostgresFilesystemStore.
return cls(config)
fake_module = types.ModuleType(_FAKE_IMPL_MODULE)
fake_module.FaissDescriptorIndex = _FakeFaissDescriptorIndex # type: ignore[attr-defined]
sys.modules[_FAKE_IMPL_MODULE] = fake_module