"""AZ-964 — seed a minimal empty HNSW32 + IndexIDMap2 FAISS index fixture. Shared by: * `scripts/mk_test_faiss_fixture.py` — invoked by the `tile-init` setup service in `docker-compose.test.jetson.yml`. * `tests/e2e/replay/conftest.py::_build_operator_pre_flight_cache` — the AZ-839 C3 fixture, which creates a fresh tmp `root_dir` per test and needs an empty index there before `build_descriptor_index` can call `FaissDescriptorIndex._load()` without raising `IndexUnavailableError`. The seed produces three files under ``root_dir``: * ``descriptor.index`` — HNSW32 / IndexIDMap2 binary * ``descriptor.index.sha256`` — sha256 sidecar (verified by ``_load``) * ``descriptor.index.meta.json`` — metadata with matching ``sidecar_sha256_hex`` (cross-checked by ``_load``) The default ``descriptor_dim=512`` + ``backbone_label="ultra_vpr"`` mirror the prior in-script defaults; callers can override when seeding for a NetVLAD (4096) or DINOv2-VPR run (AZ-965 territory). """ from __future__ import annotations import hashlib import json from datetime import datetime, timezone from pathlib import Path import faiss # type: ignore[import-untyped] __all__ = ["seed_empty_faiss_index"] _HNSW_M = 32 _EF_CONSTRUCTION = 40 _EF_SEARCH = 16 def seed_empty_faiss_index( root_dir: Path, *, descriptor_dim: int = 512, backbone_label: str = "ultra_vpr", ) -> Path: """Create an empty valid HNSW32 FAISS index at ``root_dir/descriptor.index``. Idempotent — re-running overwrites the prior fixture. Returns the path to the written ``.index`` file. """ root_dir.mkdir(parents=True, exist_ok=True) inner = faiss.IndexHNSWFlat(descriptor_dim, _HNSW_M, faiss.METRIC_INNER_PRODUCT) index = faiss.IndexIDMap2(inner) idx_path = root_dir / "descriptor.index" faiss.write_index(index, str(idx_path)) idx_bytes = idx_path.read_bytes() sha256 = hashlib.sha256(idx_bytes).hexdigest() (idx_path.parent / (idx_path.name + ".sha256")).write_text( sha256, encoding="ascii" ) meta = { "descriptor_dim": descriptor_dim, "n_vectors": 0, "backbone_label": backbone_label, "backbone_sha256_hex": "0" * 64, "built_at": datetime.now(timezone.utc).isoformat(), "hnsw_params": { "m": _HNSW_M, "ef_construction": _EF_CONSTRUCTION, "ef_search": _EF_SEARCH, "metric": "INNER_PRODUCT", }, "sidecar_sha256_hex": sha256, "file_path": str(idx_path), "id_mapping": [], } (idx_path.parent / (idx_path.name + ".meta.json")).write_text( json.dumps(meta, sort_keys=True, indent=2), encoding="utf-8" ) return idx_path