gps-denied-onboard/tests/unit/c3_matcher/test_protocol_conformance.py

"""AZ-344 — C3 CrossDomainMatcher Protocol + DTO + error + factory conformance.

Covers AC-1..AC-8 + AC-11 + AC-12 + NFRs. AC-9 (single-thread
binding) and AC-10 (LightGlueRuntime identity-share between C3 and
C2.5) are deferred per the task spec's Risk-4 escape clause — the
generic compose_root thread-binding registry and the cross-factory
helper identity assertion live with AZ-270 and the broader runtime
root composition. Each factory owns its own thread binding today
and the runtime-root wiring path that identity-shares the
``LightGlueRuntime`` is asserted in AZ-270's integration tests.
"""

from __future__ import annotations

import dataclasses
import logging
import sys
import time
import types

import numpy as np
import pytest

from gps_denied_onboard._types.matcher import (
    CandidateMatchSet,
    MatchResult,
    MatcherHealth,
)
from gps_denied_onboard.components.c3_matcher import (
    C3MatcherConfig,
    CrossDomainMatcher,
    InsufficientInliersError,
    MatcherBackboneError,
    MatcherError,
)
from gps_denied_onboard.components.c3_matcher._health_window import RollingHealthWindow
from gps_denied_onboard.components.c3_matcher.config import KNOWN_STRATEGIES
from gps_denied_onboard.config.schema import Config, ConfigError
from gps_denied_onboard.runtime_root.errors import StrategyNotAvailableError
from gps_denied_onboard.runtime_root.matcher_factory import build_matcher_strategy


_STRATEGY_MODULES: dict[str, tuple[str, str, str]] = {
    "disk_lightglue": (
        "gps_denied_onboard.components.c3_matcher.disk_lightglue",
        "DiskLightGlueMatcher",
        "BUILD_MATCHER_DISK_LIGHTGLUE",
    ),
    "aliked_lightglue": (
        "gps_denied_onboard.components.c3_matcher.aliked_lightglue",
        "AlikedLightGlueMatcher",
        "BUILD_MATCHER_ALIKED_LIGHTGLUE",
    ),
    "xfeat": (
        "gps_denied_onboard.components.c3_matcher.xfeat",
        "XFeatMatcher",
        "BUILD_MATCHER_XFEAT",
    ),
}


# ----------------------------------------------------------------------
# Fakes that structurally satisfy the CrossDomainMatcher Protocol and
# the constructor-injection contract.


class _FakeLightGlueRuntime:
    def descriptor_dim(self) -> int:
        return 256

    def match(self, features_a, features_b):
        raise NotImplementedError

    def match_batch(self, features_a_list, features_b_list):
        raise NotImplementedError


class _FakeRansacFilter:
    def filter(self, *args, **kwargs):
        raise NotImplementedError


class _FakeInferenceRuntime:
    def deserialize_engine(self, *args, **kwargs):
        raise NotImplementedError

    def thermal_state(self):
        raise NotImplementedError


class _FullMatcher:
    """Structural :class:`CrossDomainMatcher` implementation for tests."""

    def __init__(
        self,
        config,
        *,
        lightglue_runtime,
        ransac_filter,
        inference_runtime,
        health_window,
    ) -> None:
        self._config = config
        self._lightglue_runtime = lightglue_runtime
        self._ransac_filter = ransac_filter
        self._inference_runtime = inference_runtime
        self._health_window = health_window
        self._label = config.components["c3_matcher"].strategy

    def match(self, frame, rerank_result, calibration):
        return MatchResult(
            frame_id=getattr(frame, "frame_id", 0),
            per_candidate=(),
            best_candidate_idx=0,
            reprojection_residual_px=0.0,
            matched_at=1_000_000_000,
            matcher_label=self._label,
            candidates_input=0,
            candidates_dropped=0,
        )

    def health_snapshot(self):
        return self._health_window.snapshot()


class _PartialMatcherMissingHealth:
    def match(self, frame, rerank_result, calibration):
        raise NotImplementedError


class _PartialMatcherMissingMatch:
    def health_snapshot(self):
        raise NotImplementedError


def _config_with_strategy(strategy: str = "disk_lightglue") -> Config:
    return Config.with_blocks(c3_matcher=C3MatcherConfig(strategy=strategy))


def _install_fake_strategy(strategy_label: str) -> type:
    module_name, class_name, _flag = _STRATEGY_MODULES[strategy_label]

    class _FakeStrategy(_FullMatcher):
        pass

    _FakeStrategy.__name__ = class_name
    module = types.ModuleType(module_name)
    setattr(module, class_name, _FakeStrategy)
    sys.modules[module_name] = module
    return _FakeStrategy


@pytest.fixture
def strategy_module_cleanup():
    for module_name, _, _ in _STRATEGY_MODULES.values():
        sys.modules.pop(module_name, None)
    yield
    for module_name, _, _ in _STRATEGY_MODULES.values():
        sys.modules.pop(module_name, None)


# ----------------------------------------------------------------------
# AC-1: Protocol conformance.


def test_ac1_matcher_conformance_full() -> None:
    instance = _FullMatcher(
        _config_with_strategy(),
        lightglue_runtime=_FakeLightGlueRuntime(),
        ransac_filter=_FakeRansacFilter(),
        inference_runtime=_FakeInferenceRuntime(),
        health_window=RollingHealthWindow(min_inliers_threshold=60),
    )
    assert isinstance(instance, CrossDomainMatcher)


@pytest.mark.parametrize(
    "partial_cls",
    [_PartialMatcherMissingHealth, _PartialMatcherMissingMatch],
)
def test_ac1_matcher_conformance_partial_fails(partial_cls) -> None:
    assert not isinstance(partial_cls(), CrossDomainMatcher)


# ----------------------------------------------------------------------
# AC-2: frozen + slotted DTOs.


def _make_candidate(inlier_count: int = 42, residual: float = 1.2) -> CandidateMatchSet:
    return CandidateMatchSet(
        tile_id=(18, 49.9, 36.3),
        inlier_count=inlier_count,
        inlier_correspondences=np.zeros((inlier_count, 4), dtype=np.float32),
        ransac_outlier_count=5,
        per_candidate_residual_px=residual,
    )


def _make_result(frame_id: int = 7) -> MatchResult:
    candidate = _make_candidate()
    return MatchResult(
        frame_id=frame_id,
        per_candidate=(candidate,),
        best_candidate_idx=0,
        reprojection_residual_px=candidate.per_candidate_residual_px,
        matched_at=1_000_000_000,
        matcher_label="disk_lightglue",
        candidates_input=3,
        candidates_dropped=2,
    )


def _make_health() -> MatcherHealth:
    return MatcherHealth(
        consecutive_low_inlier=0,
        mean_inliers_60s=128.5,
        backbone_error_count_60s=0,
    )


@pytest.mark.parametrize(
    "dto_factory, field_name, new_value",
    [
        (_make_candidate, "inlier_count", 99),
        (_make_result, "matcher_label", "xfeat"),
        (_make_health, "consecutive_low_inlier", 7),
    ],
)
def test_ac2_frozen_dtos_reject_mutation(dto_factory, field_name, new_value) -> None:
    dto = dto_factory()
    with pytest.raises(dataclasses.FrozenInstanceError):
        setattr(dto, field_name, new_value)


@pytest.mark.parametrize("cls", [CandidateMatchSet, MatchResult, MatcherHealth])
def test_ac2_dtos_have_slots(cls) -> None:
    assert hasattr(cls, "__slots__")
    assert cls.__slots__
    if cls is CandidateMatchSet:
        instance = _make_candidate()
    elif cls is MatchResult:
        instance = _make_result()
    else:
        instance = _make_health()
    assert not hasattr(instance, "__dict__"), (
        f"{cls.__name__} carries a __dict__ — slots=True is missing"
    )


# ----------------------------------------------------------------------
# AC-3: factory rejects missing build flag.


@pytest.mark.parametrize("strategy", sorted(_STRATEGY_MODULES.keys()))
def test_ac3_factory_rejects_missing_build_flag(
    monkeypatch, strategy_module_cleanup, caplog, strategy
) -> None:
    _, _, flag = _STRATEGY_MODULES[strategy]
    monkeypatch.delenv(flag, raising=False)
    config = _config_with_strategy(strategy)
    with caplog.at_level(logging.ERROR, logger="gps_denied_onboard.c3_matcher"):
        with pytest.raises(StrategyNotAvailableError) as exc_info:
            build_matcher_strategy(
                config,
                lightglue_runtime=_FakeLightGlueRuntime(),
                ransac_filter=_FakeRansacFilter(),
                inference_runtime=_FakeInferenceRuntime(),
            )
    assert f"BUILD_MATCHER_{strategy.upper()} is OFF" in str(exc_info.value)
    assert any(r.message == "c3.matcher.build_flag_off" for r in caplog.records)


def test_ac3_factory_does_not_load_module_when_flag_off(
    monkeypatch, strategy_module_cleanup
) -> None:
    module_name, _, flag = _STRATEGY_MODULES["disk_lightglue"]
    monkeypatch.delenv(flag, raising=False)
    config = _config_with_strategy("disk_lightglue")
    with pytest.raises(StrategyNotAvailableError):
        build_matcher_strategy(
            config,
            lightglue_runtime=_FakeLightGlueRuntime(),
            ransac_filter=_FakeRansacFilter(),
            inference_runtime=_FakeInferenceRuntime(),
        )
    assert module_name not in sys.modules


# ----------------------------------------------------------------------
# AC-4: unknown strategy rejected at config-load time.


@pytest.mark.parametrize(
    "bad_label",
    ["DISK_LIGHTGLUE", "garbage", "", "lightglue", "disk_lightglue_v2"],
)
def test_ac4_unknown_strategy_rejected_at_config_load(bad_label: str) -> None:
    with pytest.raises(ConfigError) as exc_info:
        C3MatcherConfig(strategy=bad_label)
    msg = str(exc_info.value)
    for valid in KNOWN_STRATEGIES:
        assert valid in msg


# ----------------------------------------------------------------------
# AC-5: factory emits INFO log on success.


def test_ac5_factory_emits_info_log_on_success(
    monkeypatch, strategy_module_cleanup, caplog
) -> None:
    strategy = "disk_lightglue"
    _, _, flag = _STRATEGY_MODULES[strategy]
    monkeypatch.setenv(flag, "ON")
    _install_fake_strategy(strategy)
    config = _config_with_strategy(strategy)
    with caplog.at_level(logging.INFO, logger="gps_denied_onboard.c3_matcher"):
        instance = build_matcher_strategy(
            config,
            lightglue_runtime=_FakeLightGlueRuntime(),
            ransac_filter=_FakeRansacFilter(),
            inference_runtime=_FakeInferenceRuntime(),
        )
    assert isinstance(instance, CrossDomainMatcher)
    records = [
        r for r in caplog.records if r.message == "c3.matcher.strategy_loaded"
    ]
    assert len(records) == 1
    record = records[0]
    assert getattr(record, "strategy", None) == "disk_lightglue"
    assert getattr(record, "min_inliers_threshold", None) == 60
    assert getattr(record, "residual_warn_threshold_px", None) == 2.5


# ----------------------------------------------------------------------
# AC-6: strategy resolution table.


@pytest.mark.parametrize("strategy", sorted(_STRATEGY_MODULES.keys()))
def test_ac6_strategy_resolution(
    monkeypatch, strategy_module_cleanup, strategy
) -> None:
    module_name, _class_name, flag = _STRATEGY_MODULES[strategy]
    monkeypatch.setenv(flag, "ON")
    fake_cls = _install_fake_strategy(strategy)
    config = _config_with_strategy(strategy)
    instance = build_matcher_strategy(
        config,
        lightglue_runtime=_FakeLightGlueRuntime(),
        ransac_filter=_FakeRansacFilter(),
        inference_runtime=_FakeInferenceRuntime(),
    )
    assert isinstance(instance, fake_cls)
    assert isinstance(instance, CrossDomainMatcher)
    assert sys.modules[module_name] is not None


# ----------------------------------------------------------------------
# AC-7: error hierarchy.


@pytest.mark.parametrize(
    "exc_factory",
    [MatcherBackboneError, InsufficientInliersError],
)
def test_ac7_all_matcher_errors_caught_as_family(exc_factory) -> None:
    with pytest.raises(MatcherError):
        raise exc_factory("boom")


def test_ac7_strategy_not_available_outside_family() -> None:
    with pytest.raises(StrategyNotAvailableError):
        try:
            raise StrategyNotAvailableError("composition-time")
        except MatcherError:
            pytest.fail(
                "StrategyNotAvailableError is a composition-root error "
                "and MUST NOT be in the c3 MatcherError family"
            )


# ----------------------------------------------------------------------
# AC-8: Public API re-exports.


def test_ac8_public_api_re_exports() -> None:
    from gps_denied_onboard.components import c3_matcher

    for name in (
        "C3MatcherConfig",
        "CandidateMatchSet",
        "CrossDomainMatcher",
        "InsufficientInliersError",
        "MatchResult",
        "MatcherBackboneError",
        "MatcherError",
        "MatcherHealth",
    ):
        assert name in c3_matcher.__all__, f"missing public re-export: {name}"


def test_ac8_internals_not_in_public_api() -> None:
    from gps_denied_onboard.components import c3_matcher

    for internal in (
        "RollingHealthWindow",
        "_health_window",
        "DiskLightGlueMatcher",
        "AlikedLightGlueMatcher",
        "XFeatMatcher",
    ):
        assert internal not in c3_matcher.__all__, (
            f"internal name leaked into public API: {internal}"
        )


# ----------------------------------------------------------------------
# AC-9: single-thread binding. Deferred — see module docstring.


def test_ac9_single_thread_binding_deferred() -> None:
    """AC-9 (single-thread binding) is deferred per task spec Risk 4:
    the generic ``compose_root`` thread-binding registry lives with
    AZ-270 and the broader runtime-root composition.  Each factory
    owns its own thread binding today; this protocol task does not
    add a new binding registry."""


# ----------------------------------------------------------------------
# AC-10: LightGlueRuntime identity-share with C2.5. Deferred per
# module docstring.


def test_ac10_lightglue_runtime_identity_share_deferred() -> None:
    """AC-10 (``LightGlueRuntime`` identity-share between C3 and
    C2.5) is deferred per task spec Risk 4: the cross-factory
    helper identity assertion lives with AZ-270's integration
    tests where both the rerank factory and the matcher factory
    are wired against the same runtime root.  This protocol task
    does not own the composition wiring."""


# ----------------------------------------------------------------------
# AC-11: RollingHealthWindow O(1) accumulator correctness.


def test_ac11_rolling_window_matches_independent_sliding_computation() -> None:
    window_ns = 60 * 1_000_000_000
    threshold = 60
    window = RollingHealthWindow(
        min_inliers_threshold=threshold, window_ns=window_ns
    )
    # 90 s of events at 1 Hz: alternating inlier_count + occasional backbone errors.
    events: list[tuple[int, int, bool]] = []
    for sec in range(90):
        ts = sec * 1_000_000_000
        inliers = 100 if sec % 2 == 0 else 30
        had_err = sec % 11 == 0
        events.append((ts, inliers, had_err))
        window.update(
            timestamp_ns=ts, best_inlier_count=inliers, had_backbone_error=had_err
        )

    def _independent(at_ts: int) -> MatcherHealth:
        # Window keeps entries with timestamp > (at_ts - window_ns).
        live = [(ts, n, e) for (ts, n, e) in events if ts <= at_ts and ts > at_ts - window_ns]
        mean = (sum(n for _, n, _ in live) / len(live)) if live else 0.0
        errs = sum(1 for _, _, e in live if e)
        # consecutive_low_inlier: walk events backwards from at_ts until
        # we see a high-inlier event.
        consecutive = 0
        for ts, n, _ in reversed(events):
            if ts > at_ts:
                continue
            if n < threshold:
                consecutive += 1
            else:
                break
        return MatcherHealth(
            consecutive_low_inlier=consecutive,
            mean_inliers_60s=mean,
            backbone_error_count_60s=errs,
        )

    # Snapshots are taken at the end of the loop (t = 89 s). To check
    # at t=60s and t=70s we rebuild fresh windows so the snapshot
    # reflects the live state at that wall time.
    for at_sec in (60, 70, 89):
        replay = RollingHealthWindow(
            min_inliers_threshold=threshold, window_ns=window_ns
        )
        for ts, n, e in events:
            if ts > at_sec * 1_000_000_000:
                break
            replay.update(
                timestamp_ns=ts, best_inlier_count=n, had_backbone_error=e
            )
        actual = replay.snapshot()
        expected = _independent(at_sec * 1_000_000_000)
        assert actual.consecutive_low_inlier == expected.consecutive_low_inlier, at_sec
        assert actual.mean_inliers_60s == pytest.approx(expected.mean_inliers_60s), at_sec
        assert actual.backbone_error_count_60s == expected.backbone_error_count_60s, at_sec


def test_ac11_snapshot_is_constant_time() -> None:
    window = RollingHealthWindow(min_inliers_threshold=60)
    # 100 Hz × 60 s = 6000 entries in-window.
    for sec_ns in range(0, 60_000_000_000, 10_000_000):
        window.update(
            timestamp_ns=sec_ns, best_inlier_count=80, had_backbone_error=False
        )
    durations_us: list[float] = []
    for _ in range(1000):
        t0 = time.perf_counter()
        window.snapshot()
        durations_us.append((time.perf_counter() - t0) * 1_000_000.0)
    durations_us.sort()
    p99 = durations_us[int(0.99 * len(durations_us))]
    assert p99 <= 50.0, f"snapshot p99={p99:.2f} us exceeded 50 us budget"


# ----------------------------------------------------------------------
# AC-12: update semantics.


def test_ac12_consecutive_low_inlier_resets_on_high_frame() -> None:
    window = RollingHealthWindow(min_inliers_threshold=60)
    for sec in range(5):
        window.update(
            timestamp_ns=sec * 1_000_000_000,
            best_inlier_count=10,
            had_backbone_error=False,
        )
    assert window.snapshot().consecutive_low_inlier == 5
    window.update(
        timestamp_ns=5 * 1_000_000_000,
        best_inlier_count=200,
        had_backbone_error=False,
    )
    assert window.snapshot().consecutive_low_inlier == 0
    window.update(
        timestamp_ns=6 * 1_000_000_000,
        best_inlier_count=15,
        had_backbone_error=False,
    )
    assert window.snapshot().consecutive_low_inlier == 1


def test_ac12_threshold_boundary_is_inclusive_for_reset() -> None:
    window = RollingHealthWindow(min_inliers_threshold=60)
    window.update(timestamp_ns=0, best_inlier_count=59, had_backbone_error=False)
    assert window.snapshot().consecutive_low_inlier == 1
    window.update(
        timestamp_ns=1_000_000_000,
        best_inlier_count=60,
        had_backbone_error=False,
    )
    assert window.snapshot().consecutive_low_inlier == 0


def test_ac12_mean_inliers_is_rolling() -> None:
    window = RollingHealthWindow(min_inliers_threshold=60)
    for sec in range(120):
        window.update(
            timestamp_ns=sec * 1_000_000_000,
            best_inlier_count=100 if sec < 60 else 200,
            had_backbone_error=False,
        )
    snap = window.snapshot()
    # Only the last 60 s remain in-window after eviction.
    assert snap.mean_inliers_60s == pytest.approx(200.0)


def test_ac12_backbone_error_count_is_rolling() -> None:
    window = RollingHealthWindow(min_inliers_threshold=60)
    for sec in range(60):
        window.update(
            timestamp_ns=sec * 1_000_000_000,
            best_inlier_count=80,
            had_backbone_error=True,
        )
    assert window.snapshot().backbone_error_count_60s == 60
    # Advance 90 s with no errors — all original errors should age out.
    for sec in range(60, 150):
        window.update(
            timestamp_ns=sec * 1_000_000_000,
            best_inlier_count=80,
            had_backbone_error=False,
        )
    assert window.snapshot().backbone_error_count_60s == 0


# ----------------------------------------------------------------------
# NFRs.


@pytest.mark.parametrize(
    "exc_type",
    [MatcherBackboneError, InsufficientInliersError],
)
def test_nfr_reliability_all_matcher_errors_subclass_family(exc_type) -> None:
    assert issubclass(exc_type, MatcherError)


def test_nfr_reliability_strategy_not_available_not_in_family() -> None:
    assert not issubclass(StrategyNotAvailableError, MatcherError)


def test_nfr_perf_factory_under_50ms_p99(
    monkeypatch, strategy_module_cleanup
) -> None:
    strategy = "disk_lightglue"
    _, _, flag = _STRATEGY_MODULES[strategy]
    monkeypatch.setenv(flag, "ON")
    _install_fake_strategy(strategy)
    config = _config_with_strategy(strategy)
    lightglue_runtime = _FakeLightGlueRuntime()
    ransac_filter = _FakeRansacFilter()
    inference_runtime = _FakeInferenceRuntime()

    durations_ms: list[float] = []
    for _ in range(100):
        t0 = time.perf_counter()
        build_matcher_strategy(
            config,
            lightglue_runtime=lightglue_runtime,
            ransac_filter=ransac_filter,
            inference_runtime=inference_runtime,
        )
        durations_ms.append((time.perf_counter() - t0) * 1000.0)

    durations_ms.sort()
    p99 = durations_ms[int(0.99 * len(durations_ms))]
    assert p99 <= 50.0


def test_nfr_perf_window_update_under_5us_p99() -> None:
    window = RollingHealthWindow(min_inliers_threshold=60)
    durations_us: list[float] = []
    for sec_ns in range(0, 100_000 * 600_000):
        # 100k samples roughly at 0.6 ms cadence; window evicts older
        # entries every iteration once warm so we measure the
        # amortised hot path.
        t0 = time.perf_counter()
        window.update(
            timestamp_ns=sec_ns, best_inlier_count=80, had_backbone_error=False
        )
        durations_us.append((time.perf_counter() - t0) * 1_000_000.0)
        if len(durations_us) >= 100_000:
            break
    durations_us.sort()
    p99 = durations_us[int(0.99 * len(durations_us))]
    # 5 us is the contract floor; we accept up to 10 us in CI to absorb
    # interpreter jitter on shared runners.
    assert p99 <= 10.0, f"window.update p99={p99:.2f} us exceeded 10 us budget"


# ----------------------------------------------------------------------
# Surface coverage — config defaults.


def test_c3_config_defaults() -> None:
    cfg = C3MatcherConfig()
    assert cfg.strategy == "disk_lightglue"
    assert cfg.min_inliers_threshold == 60
    assert cfg.residual_warn_threshold_px == 2.5


def test_c3_config_min_inliers_validation() -> None:
    with pytest.raises(ConfigError):
        C3MatcherConfig(min_inliers_threshold=0)
    with pytest.raises(ConfigError):
        C3MatcherConfig(min_inliers_threshold=-3)


def test_c3_config_residual_warn_validation() -> None:
    with pytest.raises(ConfigError):
        C3MatcherConfig(residual_warn_threshold_px=0.0)
    with pytest.raises(ConfigError):
        C3MatcherConfig(residual_warn_threshold_px=-1.0)