"""Unit tests for ``runner.helpers.retrieval_evaluator`` (AZ-423). Pure-logic AC-8.6 coverage for FT-P-19 — the scenario in ``e2e/tests/positive/test_ft_p_19_sat_reloc_scale.py`` exercises the same helpers end-to-end when the SITL fixture is prepared; this file covers them in isolation. """ from __future__ import annotations import csv from pathlib import Path from typing import Any import pytest from runner.helpers import retrieval_evaluator as re_ DERKACHI_LAT = 48.275292 DERKACHI_LON = 37.385220 def _candidate( tile_id: str = "tile_001", lat: float = DERKACHI_LAT, lon: float = DERKACHI_LON, ) -> re_.CandidateTile: return re_.CandidateTile(tile_id=tile_id, centre_lat_deg=lat, centre_lon_deg=lon) def _query( image_id: str = "AD000001", *, true_lat: float = DERKACHI_LAT, true_lon: float = DERKACHI_LON, candidates: tuple[re_.CandidateTile, ...] = (), ) -> re_.TopKQuery: return re_.TopKQuery( image_id=image_id, true_centre_lat_deg=true_lat, true_centre_lon_deg=true_lon, candidates=candidates, ) # ─────────────────────── evaluate_top_k_within_distance ─────────────────────── def test_evaluate_top_k_one_candidate_close_passes() -> None: # Arrange — single candidate exactly at GT q = _query(candidates=(_candidate(),)) # Act report = re_.evaluate_top_k_within_distance([q], expected_image_count=1) # Assert assert report.passes assert report.pass_count == 1 assert report.entries[0].min_distance_m == pytest.approx(0.0, abs=0.01) def test_evaluate_top_k_all_candidates_far_fails() -> None: # Arrange — candidate ~ 1 km east at this latitude far = _candidate(tile_id="far", lat=DERKACHI_LAT, lon=DERKACHI_LON + 0.0135) q = _query(candidates=(far,)) # Act report = re_.evaluate_top_k_within_distance([q], expected_image_count=1) # Assert assert not report.passes assert report.entries[0].pass_distance is False assert (report.entries[0].min_distance_m or 0) > 100.0 def test_evaluate_top_k_one_close_candidate_among_far_passes() -> None: # Arrange — 9 far + 1 close in top-K (any rank passes) far_tiles = tuple( _candidate(tile_id=f"far_{i}", lon=DERKACHI_LON + 0.01 * (i + 1)) for i in range(9) ) close = _candidate(tile_id="close", lat=DERKACHI_LAT, lon=DERKACHI_LON) q = _query(candidates=far_tiles + (close,)) # Act report = re_.evaluate_top_k_within_distance([q], expected_image_count=1) # Assert assert report.passes def test_evaluate_top_k_empty_candidates_fails() -> None: # Arrange q = _query(candidates=()) # Act report = re_.evaluate_top_k_within_distance([q], expected_image_count=1) # Assert assert not report.passes assert report.entries[0].min_distance_m is None assert report.entries[0].candidate_count == 0 def test_evaluate_top_k_short_query_count_fails_aggregate() -> None: # Arrange — 1 passing query but expected_image_count=60 q = _query(candidates=(_candidate(),)) # Act report = re_.evaluate_top_k_within_distance([q], expected_image_count=60) # Assert assert not report.passes assert report.pass_count == 1 def test_evaluate_top_k_invalid_tolerance_raises() -> None: with pytest.raises(ValueError, match="max_distance_m"): re_.evaluate_top_k_within_distance( [_query(candidates=(_candidate(),))], max_distance_m=0 ) def test_evaluate_top_k_custom_tolerance() -> None: # Arrange — candidate 200m east; default 100m fails, custom 250m passes far_200m = _candidate(lat=DERKACHI_LAT, lon=DERKACHI_LON + 0.0027) q = _query(candidates=(far_200m,)) # Act strict = re_.evaluate_top_k_within_distance([q], max_distance_m=100, expected_image_count=1) lenient = re_.evaluate_top_k_within_distance([q], max_distance_m=250, expected_image_count=1) # Assert assert not strict.passes assert lenient.passes def test_evaluate_top_k_aggregate_60_all_pass() -> None: # Arrange — 60 queries, each with one close candidate queries = [_query(image_id=f"AD0000{i:02d}", candidates=(_candidate(),)) for i in range(1, 61)] # Act report = re_.evaluate_top_k_within_distance(queries, expected_image_count=60) # Assert assert report.passes assert report.pass_count == 60 def test_evaluate_top_k_aggregate_60_one_fail() -> None: # Arrange — 59 pass + 1 fail queries = [_query(image_id=f"AD0000{i:02d}", candidates=(_candidate(),)) for i in range(1, 60)] queries.append(_query(image_id="AD000060", candidates=())) # Act report = re_.evaluate_top_k_within_distance(queries, expected_image_count=60) # Assert assert not report.passes assert report.pass_count == 59 assert len(report.failing_entries) == 1 # ─────────────────────── evaluate_scene_change_subset ─────────────────────── def test_evaluate_scene_change_both_matched_still_partial() -> None: # Arrange — both pairs matched matches = [ re_.SceneChangeMatch(image_id="AD000001", matched=True, inlier_count=120), re_.SceneChangeMatch(image_id="AD000002", matched=True, inlier_count=98), ] # Act report = re_.evaluate_scene_change_subset(matches) # Assert assert report.coverage_complete assert report.overall_label == re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL assert report.matched_count == 2 def test_evaluate_scene_change_zero_matched_still_partial() -> None: # Arrange matches = [ re_.SceneChangeMatch(image_id="AD000001", matched=False, inlier_count=0), re_.SceneChangeMatch(image_id="AD000002", matched=False, inlier_count=0), ] # Act report = re_.evaluate_scene_change_subset(matches) # Assert assert report.overall_label == re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL def test_evaluate_scene_change_one_image_only_coverage_incomplete() -> None: # Arrange matches = [re_.SceneChangeMatch(image_id="AD000001", matched=True, inlier_count=120)] # Act report = re_.evaluate_scene_change_subset(matches) # Assert assert not report.coverage_complete # PARTIAL label still set (decoupled from coverage) assert report.overall_label == re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL def test_evaluate_scene_change_empty_coverage_incomplete() -> None: # Act report = re_.evaluate_scene_change_subset([]) # Assert assert not report.coverage_complete def test_evaluate_scene_change_extra_image_ids_coverage_incomplete() -> None: # Arrange — image set that doesn't match expected pair ids matches = [ re_.SceneChangeMatch(image_id="AD000099", matched=True, inlier_count=120), re_.SceneChangeMatch(image_id="AD000002", matched=True, inlier_count=80), ] # Act report = re_.evaluate_scene_change_subset(matches) # Assert assert not report.coverage_complete # ─────────────────────── CSV writers ─────────────────────── def test_write_top_k_csv_round_trip(tmp_path: Path) -> None: # Arrange out = tmp_path / "topk.csv" queries = [ _query(image_id="AD000001", candidates=(_candidate(),)), _query(image_id="AD000002", candidates=()), ] report = re_.evaluate_top_k_within_distance(queries, expected_image_count=2) # Act re_.write_top_k_csv(out, report) rows = list(csv.reader(out.open(encoding="utf-8"))) # Assert assert rows[0] == list(re_.TOP_K_CSV_HEADER) assert rows[1][0] == "AD000001" assert rows[1][3] == "true" assert rows[2][0] == "AD000002" assert rows[2][2] == "" # min_distance_m is None when no candidates assert rows[2][3] == "false" def test_write_scene_change_csv_round_trip(tmp_path: Path) -> None: # Arrange out = tmp_path / "scene_change.csv" matches = [ re_.SceneChangeMatch(image_id="AD000001", matched=True, inlier_count=120), re_.SceneChangeMatch(image_id="AD000002", matched=False, inlier_count=None), ] report = re_.evaluate_scene_change_subset(matches) # Act re_.write_scene_change_csv(out, report) rows = list(csv.reader(out.open(encoding="utf-8"))) # Assert assert rows[0] == list(re_.SCENE_CHANGE_CSV_HEADER) assert rows[1] == ["AD000001", "true", "120", re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL] assert rows[2] == ["AD000002", "false", "", re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL] def test_write_top_k_csv_missing_parent_dir_raises(tmp_path: Path) -> None: out = tmp_path / "nope" / "topk.csv" report = re_.evaluate_top_k_within_distance( [_query(candidates=(_candidate(),))], expected_image_count=1 ) with pytest.raises(OSError): re_.write_top_k_csv(out, report) # ─────────────────────── project_topk_record_to_query ─────────────────────── def test_project_topk_happy_path() -> None: # Arrange payload = { "image_id": "AD000001", "candidates": [ {"tile_id": "t_a", "centre_lat_deg": DERKACHI_LAT, "centre_lon_deg": DERKACHI_LON}, {"tile_id": "t_b", "centre_lat_deg": DERKACHI_LAT + 0.001, "centre_lon_deg": DERKACHI_LON}, ], } # Act q = re_.project_topk_record_to_query( payload, true_centre_lat_deg=DERKACHI_LAT, true_centre_lon_deg=DERKACHI_LON ) # Assert assert q is not None assert q.image_id == "AD000001" assert len(q.candidates) == 2 assert q.candidates[0].tile_id == "t_a" def test_project_topk_skips_malformed_candidates() -> None: # Arrange payload = { "image_id": "AD000002", "candidates": [ {"tile_id": "ok", "centre_lat_deg": DERKACHI_LAT, "centre_lon_deg": DERKACHI_LON}, "not a dict", {"tile_id": "missing_lat", "centre_lon_deg": DERKACHI_LON}, {"tile_id": "wrong_type", "centre_lat_deg": "stringy", "centre_lon_deg": DERKACHI_LON}, ], } # Act q = re_.project_topk_record_to_query( payload, true_centre_lat_deg=DERKACHI_LAT, true_centre_lon_deg=DERKACHI_LON ) # Assert assert q is not None assert [c.tile_id for c in q.candidates] == ["ok"] def test_project_topk_non_dict_payload_returns_none() -> None: # Act / Assert assert re_.project_topk_record_to_query("not a dict", 0, 0) is None # type: ignore[arg-type] def test_project_topk_missing_image_id_returns_none() -> None: # Act / Assert assert re_.project_topk_record_to_query({"candidates": []}, 0, 0) is None def test_project_topk_missing_candidates_returns_none() -> None: # Act / Assert assert re_.project_topk_record_to_query({"image_id": "AD000001"}, 0, 0) is None # ─────────────────────── project_scene_change_record ─────────────────────── def test_project_scene_change_happy_path() -> None: # Arrange payload = {"image_id": "AD000001", "matched": True, "inlier_count": 120} # Act m = re_.project_scene_change_record(payload) # Assert assert m is not None assert m.matched is True assert m.inlier_count == 120 def test_project_scene_change_inlier_count_missing_is_none() -> None: # Act m = re_.project_scene_change_record({"image_id": "AD000001", "matched": False}) # Assert assert m is not None assert m.inlier_count is None def test_project_scene_change_inlier_count_bool_is_none() -> None: # Act — bool is technically int in Python, but treat as missing for inlier_count m = re_.project_scene_change_record( {"image_id": "AD000001", "matched": True, "inlier_count": True} ) # Assert assert m is not None assert m.inlier_count is None def test_project_scene_change_matched_not_bool_returns_none() -> None: # Act / Assert assert re_.project_scene_change_record({"image_id": "AD000001", "matched": "yes"}) is None def test_project_scene_change_non_dict_returns_none() -> None: # Act / Assert assert re_.project_scene_change_record(None) is None assert re_.project_scene_change_record("nope") is None # ─────────────────────── iter_*_payloads ─────────────────────── class _StubRecord: def __init__(self, record_type: str, payload: Any) -> None: self.record_type = record_type self.payload = payload def test_iter_topk_payloads_filters_by_record_type() -> None: # Arrange records = [ _StubRecord("retrieval-topk", {"image_id": "AD000001"}), _StubRecord("scene-change-match", {"image_id": "AD000002"}), _StubRecord("retrieval-topk", {"image_id": "AD000003"}), _StubRecord("other-kind", {}), ] # Act payloads = list(re_.iter_topk_payloads(records)) # Assert assert [p["image_id"] for p in payloads] == ["AD000001", "AD000003"] def test_iter_scene_change_payloads_filters_by_record_type() -> None: # Arrange records = [ _StubRecord("retrieval-topk", {"image_id": "AD000001"}), _StubRecord("scene-change-match", {"image_id": "AD000002"}), ] # Act payloads = list(re_.iter_scene_change_payloads(records)) # Assert assert payloads == [{"image_id": "AD000002"}]