mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 21:21:13 +00:00
f25cae4a82
Implement the AC-8.6 (top-K=10 retrieval scale-ratio + scene-change
PARTIAL) and AC-8.2 / AC-NEW-6 (stale aged-tile rejection) blackbox
scenarios.
AZ-423 (FT-P-19, 3pt) helpers + scenario:
- retrieval_evaluator.py — top-K within-distance evaluator (60 stills
vs 100 m budget), scene-change PARTIAL recorder (always emits
PARTIAL on the 2 _gmaps.png pairs), FDR record projectors, CSV
writers.
- tests/positive/test_ft_p_19_sat_reloc_scale.py (6 parametrised
variants).
AZ-427 (FT-N-05, 2pt) helpers + scenario:
- aged_tile_rejection_evaluator.py — Signal A (stale rejection at
load) + Signal B (per-frame downgrade) decision matrix, reuses
ALLOWED_SOURCE_LABELS from estimate_schema.
- tests/negative/test_ft_n_05_stale_tile_rejection.py (12 parametrised
variants: FC × VIO × {7mo/active-conflict, 13mo/rear}).
48 new unit tests cover every helper branch. Both scenarios skip
when sitl_replay_ready is false and fail loudly when fixture records
are missing.
Per-batch review: PASS_WITH_WARNINGS (2 Low — production-dependency
surface, FDR-kind constant duplication).
Cumulative review 82-84: PASS (2 Low carry-over / hygiene candidate).
Co-authored-by: Cursor <cursoragent@cursor.com>
390 lines
13 KiB
Python
390 lines
13 KiB
Python
"""Unit tests for ``runner.helpers.retrieval_evaluator`` (AZ-423).
|
|
|
|
Pure-logic AC-8.6 coverage for FT-P-19 — the scenario in
|
|
``e2e/tests/positive/test_ft_p_19_sat_reloc_scale.py`` exercises the
|
|
same helpers end-to-end when the SITL fixture is prepared; this file
|
|
covers them in isolation.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
from runner.helpers import retrieval_evaluator as re_
|
|
|
|
DERKACHI_LAT = 48.275292
|
|
DERKACHI_LON = 37.385220
|
|
|
|
|
|
def _candidate(
|
|
tile_id: str = "tile_001",
|
|
lat: float = DERKACHI_LAT,
|
|
lon: float = DERKACHI_LON,
|
|
) -> re_.CandidateTile:
|
|
return re_.CandidateTile(tile_id=tile_id, centre_lat_deg=lat, centre_lon_deg=lon)
|
|
|
|
|
|
def _query(
|
|
image_id: str = "AD000001",
|
|
*,
|
|
true_lat: float = DERKACHI_LAT,
|
|
true_lon: float = DERKACHI_LON,
|
|
candidates: tuple[re_.CandidateTile, ...] = (),
|
|
) -> re_.TopKQuery:
|
|
return re_.TopKQuery(
|
|
image_id=image_id,
|
|
true_centre_lat_deg=true_lat,
|
|
true_centre_lon_deg=true_lon,
|
|
candidates=candidates,
|
|
)
|
|
|
|
|
|
# ─────────────────────── evaluate_top_k_within_distance ───────────────────────
|
|
|
|
|
|
def test_evaluate_top_k_one_candidate_close_passes() -> None:
|
|
# Arrange — single candidate exactly at GT
|
|
q = _query(candidates=(_candidate(),))
|
|
# Act
|
|
report = re_.evaluate_top_k_within_distance([q], expected_image_count=1)
|
|
# Assert
|
|
assert report.passes
|
|
assert report.pass_count == 1
|
|
assert report.entries[0].min_distance_m == pytest.approx(0.0, abs=0.01)
|
|
|
|
|
|
def test_evaluate_top_k_all_candidates_far_fails() -> None:
|
|
# Arrange — candidate ~ 1 km east at this latitude
|
|
far = _candidate(tile_id="far", lat=DERKACHI_LAT, lon=DERKACHI_LON + 0.0135)
|
|
q = _query(candidates=(far,))
|
|
# Act
|
|
report = re_.evaluate_top_k_within_distance([q], expected_image_count=1)
|
|
# Assert
|
|
assert not report.passes
|
|
assert report.entries[0].pass_distance is False
|
|
assert (report.entries[0].min_distance_m or 0) > 100.0
|
|
|
|
|
|
def test_evaluate_top_k_one_close_candidate_among_far_passes() -> None:
|
|
# Arrange — 9 far + 1 close in top-K (any rank passes)
|
|
far_tiles = tuple(
|
|
_candidate(tile_id=f"far_{i}", lon=DERKACHI_LON + 0.01 * (i + 1))
|
|
for i in range(9)
|
|
)
|
|
close = _candidate(tile_id="close", lat=DERKACHI_LAT, lon=DERKACHI_LON)
|
|
q = _query(candidates=far_tiles + (close,))
|
|
# Act
|
|
report = re_.evaluate_top_k_within_distance([q], expected_image_count=1)
|
|
# Assert
|
|
assert report.passes
|
|
|
|
|
|
def test_evaluate_top_k_empty_candidates_fails() -> None:
|
|
# Arrange
|
|
q = _query(candidates=())
|
|
# Act
|
|
report = re_.evaluate_top_k_within_distance([q], expected_image_count=1)
|
|
# Assert
|
|
assert not report.passes
|
|
assert report.entries[0].min_distance_m is None
|
|
assert report.entries[0].candidate_count == 0
|
|
|
|
|
|
def test_evaluate_top_k_short_query_count_fails_aggregate() -> None:
|
|
# Arrange — 1 passing query but expected_image_count=60
|
|
q = _query(candidates=(_candidate(),))
|
|
# Act
|
|
report = re_.evaluate_top_k_within_distance([q], expected_image_count=60)
|
|
# Assert
|
|
assert not report.passes
|
|
assert report.pass_count == 1
|
|
|
|
|
|
def test_evaluate_top_k_invalid_tolerance_raises() -> None:
|
|
with pytest.raises(ValueError, match="max_distance_m"):
|
|
re_.evaluate_top_k_within_distance(
|
|
[_query(candidates=(_candidate(),))], max_distance_m=0
|
|
)
|
|
|
|
|
|
def test_evaluate_top_k_custom_tolerance() -> None:
|
|
# Arrange — candidate 200m east; default 100m fails, custom 250m passes
|
|
far_200m = _candidate(lat=DERKACHI_LAT, lon=DERKACHI_LON + 0.0027)
|
|
q = _query(candidates=(far_200m,))
|
|
# Act
|
|
strict = re_.evaluate_top_k_within_distance([q], max_distance_m=100, expected_image_count=1)
|
|
lenient = re_.evaluate_top_k_within_distance([q], max_distance_m=250, expected_image_count=1)
|
|
# Assert
|
|
assert not strict.passes
|
|
assert lenient.passes
|
|
|
|
|
|
def test_evaluate_top_k_aggregate_60_all_pass() -> None:
|
|
# Arrange — 60 queries, each with one close candidate
|
|
queries = [_query(image_id=f"AD0000{i:02d}", candidates=(_candidate(),)) for i in range(1, 61)]
|
|
# Act
|
|
report = re_.evaluate_top_k_within_distance(queries, expected_image_count=60)
|
|
# Assert
|
|
assert report.passes
|
|
assert report.pass_count == 60
|
|
|
|
|
|
def test_evaluate_top_k_aggregate_60_one_fail() -> None:
|
|
# Arrange — 59 pass + 1 fail
|
|
queries = [_query(image_id=f"AD0000{i:02d}", candidates=(_candidate(),)) for i in range(1, 60)]
|
|
queries.append(_query(image_id="AD000060", candidates=()))
|
|
# Act
|
|
report = re_.evaluate_top_k_within_distance(queries, expected_image_count=60)
|
|
# Assert
|
|
assert not report.passes
|
|
assert report.pass_count == 59
|
|
assert len(report.failing_entries) == 1
|
|
|
|
|
|
# ─────────────────────── evaluate_scene_change_subset ───────────────────────
|
|
|
|
|
|
def test_evaluate_scene_change_both_matched_still_partial() -> None:
|
|
# Arrange — both pairs matched
|
|
matches = [
|
|
re_.SceneChangeMatch(image_id="AD000001", matched=True, inlier_count=120),
|
|
re_.SceneChangeMatch(image_id="AD000002", matched=True, inlier_count=98),
|
|
]
|
|
# Act
|
|
report = re_.evaluate_scene_change_subset(matches)
|
|
# Assert
|
|
assert report.coverage_complete
|
|
assert report.overall_label == re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL
|
|
assert report.matched_count == 2
|
|
|
|
|
|
def test_evaluate_scene_change_zero_matched_still_partial() -> None:
|
|
# Arrange
|
|
matches = [
|
|
re_.SceneChangeMatch(image_id="AD000001", matched=False, inlier_count=0),
|
|
re_.SceneChangeMatch(image_id="AD000002", matched=False, inlier_count=0),
|
|
]
|
|
# Act
|
|
report = re_.evaluate_scene_change_subset(matches)
|
|
# Assert
|
|
assert report.overall_label == re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL
|
|
|
|
|
|
def test_evaluate_scene_change_one_image_only_coverage_incomplete() -> None:
|
|
# Arrange
|
|
matches = [re_.SceneChangeMatch(image_id="AD000001", matched=True, inlier_count=120)]
|
|
# Act
|
|
report = re_.evaluate_scene_change_subset(matches)
|
|
# Assert
|
|
assert not report.coverage_complete
|
|
# PARTIAL label still set (decoupled from coverage)
|
|
assert report.overall_label == re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL
|
|
|
|
|
|
def test_evaluate_scene_change_empty_coverage_incomplete() -> None:
|
|
# Act
|
|
report = re_.evaluate_scene_change_subset([])
|
|
# Assert
|
|
assert not report.coverage_complete
|
|
|
|
|
|
def test_evaluate_scene_change_extra_image_ids_coverage_incomplete() -> None:
|
|
# Arrange — image set that doesn't match expected pair ids
|
|
matches = [
|
|
re_.SceneChangeMatch(image_id="AD000099", matched=True, inlier_count=120),
|
|
re_.SceneChangeMatch(image_id="AD000002", matched=True, inlier_count=80),
|
|
]
|
|
# Act
|
|
report = re_.evaluate_scene_change_subset(matches)
|
|
# Assert
|
|
assert not report.coverage_complete
|
|
|
|
|
|
# ─────────────────────── CSV writers ───────────────────────
|
|
|
|
|
|
def test_write_top_k_csv_round_trip(tmp_path: Path) -> None:
|
|
# Arrange
|
|
out = tmp_path / "topk.csv"
|
|
queries = [
|
|
_query(image_id="AD000001", candidates=(_candidate(),)),
|
|
_query(image_id="AD000002", candidates=()),
|
|
]
|
|
report = re_.evaluate_top_k_within_distance(queries, expected_image_count=2)
|
|
# Act
|
|
re_.write_top_k_csv(out, report)
|
|
rows = list(csv.reader(out.open(encoding="utf-8")))
|
|
# Assert
|
|
assert rows[0] == list(re_.TOP_K_CSV_HEADER)
|
|
assert rows[1][0] == "AD000001"
|
|
assert rows[1][3] == "true"
|
|
assert rows[2][0] == "AD000002"
|
|
assert rows[2][2] == "" # min_distance_m is None when no candidates
|
|
assert rows[2][3] == "false"
|
|
|
|
|
|
def test_write_scene_change_csv_round_trip(tmp_path: Path) -> None:
|
|
# Arrange
|
|
out = tmp_path / "scene_change.csv"
|
|
matches = [
|
|
re_.SceneChangeMatch(image_id="AD000001", matched=True, inlier_count=120),
|
|
re_.SceneChangeMatch(image_id="AD000002", matched=False, inlier_count=None),
|
|
]
|
|
report = re_.evaluate_scene_change_subset(matches)
|
|
# Act
|
|
re_.write_scene_change_csv(out, report)
|
|
rows = list(csv.reader(out.open(encoding="utf-8")))
|
|
# Assert
|
|
assert rows[0] == list(re_.SCENE_CHANGE_CSV_HEADER)
|
|
assert rows[1] == ["AD000001", "true", "120", re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL]
|
|
assert rows[2] == ["AD000002", "false", "", re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL]
|
|
|
|
|
|
def test_write_top_k_csv_missing_parent_dir_raises(tmp_path: Path) -> None:
|
|
out = tmp_path / "nope" / "topk.csv"
|
|
report = re_.evaluate_top_k_within_distance(
|
|
[_query(candidates=(_candidate(),))], expected_image_count=1
|
|
)
|
|
with pytest.raises(OSError):
|
|
re_.write_top_k_csv(out, report)
|
|
|
|
|
|
# ─────────────────────── project_topk_record_to_query ───────────────────────
|
|
|
|
|
|
def test_project_topk_happy_path() -> None:
|
|
# Arrange
|
|
payload = {
|
|
"image_id": "AD000001",
|
|
"candidates": [
|
|
{"tile_id": "t_a", "centre_lat_deg": DERKACHI_LAT, "centre_lon_deg": DERKACHI_LON},
|
|
{"tile_id": "t_b", "centre_lat_deg": DERKACHI_LAT + 0.001, "centre_lon_deg": DERKACHI_LON},
|
|
],
|
|
}
|
|
# Act
|
|
q = re_.project_topk_record_to_query(
|
|
payload, true_centre_lat_deg=DERKACHI_LAT, true_centre_lon_deg=DERKACHI_LON
|
|
)
|
|
# Assert
|
|
assert q is not None
|
|
assert q.image_id == "AD000001"
|
|
assert len(q.candidates) == 2
|
|
assert q.candidates[0].tile_id == "t_a"
|
|
|
|
|
|
def test_project_topk_skips_malformed_candidates() -> None:
|
|
# Arrange
|
|
payload = {
|
|
"image_id": "AD000002",
|
|
"candidates": [
|
|
{"tile_id": "ok", "centre_lat_deg": DERKACHI_LAT, "centre_lon_deg": DERKACHI_LON},
|
|
"not a dict",
|
|
{"tile_id": "missing_lat", "centre_lon_deg": DERKACHI_LON},
|
|
{"tile_id": "wrong_type", "centre_lat_deg": "stringy", "centre_lon_deg": DERKACHI_LON},
|
|
],
|
|
}
|
|
# Act
|
|
q = re_.project_topk_record_to_query(
|
|
payload, true_centre_lat_deg=DERKACHI_LAT, true_centre_lon_deg=DERKACHI_LON
|
|
)
|
|
# Assert
|
|
assert q is not None
|
|
assert [c.tile_id for c in q.candidates] == ["ok"]
|
|
|
|
|
|
def test_project_topk_non_dict_payload_returns_none() -> None:
|
|
# Act / Assert
|
|
assert re_.project_topk_record_to_query("not a dict", 0, 0) is None # type: ignore[arg-type]
|
|
|
|
|
|
def test_project_topk_missing_image_id_returns_none() -> None:
|
|
# Act / Assert
|
|
assert re_.project_topk_record_to_query({"candidates": []}, 0, 0) is None
|
|
|
|
|
|
def test_project_topk_missing_candidates_returns_none() -> None:
|
|
# Act / Assert
|
|
assert re_.project_topk_record_to_query({"image_id": "AD000001"}, 0, 0) is None
|
|
|
|
|
|
# ─────────────────────── project_scene_change_record ───────────────────────
|
|
|
|
|
|
def test_project_scene_change_happy_path() -> None:
|
|
# Arrange
|
|
payload = {"image_id": "AD000001", "matched": True, "inlier_count": 120}
|
|
# Act
|
|
m = re_.project_scene_change_record(payload)
|
|
# Assert
|
|
assert m is not None
|
|
assert m.matched is True
|
|
assert m.inlier_count == 120
|
|
|
|
|
|
def test_project_scene_change_inlier_count_missing_is_none() -> None:
|
|
# Act
|
|
m = re_.project_scene_change_record({"image_id": "AD000001", "matched": False})
|
|
# Assert
|
|
assert m is not None
|
|
assert m.inlier_count is None
|
|
|
|
|
|
def test_project_scene_change_inlier_count_bool_is_none() -> None:
|
|
# Act — bool is technically int in Python, but treat as missing for inlier_count
|
|
m = re_.project_scene_change_record(
|
|
{"image_id": "AD000001", "matched": True, "inlier_count": True}
|
|
)
|
|
# Assert
|
|
assert m is not None
|
|
assert m.inlier_count is None
|
|
|
|
|
|
def test_project_scene_change_matched_not_bool_returns_none() -> None:
|
|
# Act / Assert
|
|
assert re_.project_scene_change_record({"image_id": "AD000001", "matched": "yes"}) is None
|
|
|
|
|
|
def test_project_scene_change_non_dict_returns_none() -> None:
|
|
# Act / Assert
|
|
assert re_.project_scene_change_record(None) is None
|
|
assert re_.project_scene_change_record("nope") is None
|
|
|
|
|
|
# ─────────────────────── iter_*_payloads ───────────────────────
|
|
|
|
|
|
class _StubRecord:
|
|
def __init__(self, record_type: str, payload: Any) -> None:
|
|
self.record_type = record_type
|
|
self.payload = payload
|
|
|
|
|
|
def test_iter_topk_payloads_filters_by_record_type() -> None:
|
|
# Arrange
|
|
records = [
|
|
_StubRecord("retrieval-topk", {"image_id": "AD000001"}),
|
|
_StubRecord("scene-change-match", {"image_id": "AD000002"}),
|
|
_StubRecord("retrieval-topk", {"image_id": "AD000003"}),
|
|
_StubRecord("other-kind", {}),
|
|
]
|
|
# Act
|
|
payloads = list(re_.iter_topk_payloads(records))
|
|
# Assert
|
|
assert [p["image_id"] for p in payloads] == ["AD000001", "AD000003"]
|
|
|
|
|
|
def test_iter_scene_change_payloads_filters_by_record_type() -> None:
|
|
# Arrange
|
|
records = [
|
|
_StubRecord("retrieval-topk", {"image_id": "AD000001"}),
|
|
_StubRecord("scene-change-match", {"image_id": "AD000002"}),
|
|
]
|
|
# Act
|
|
payloads = list(re_.iter_scene_change_payloads(records))
|
|
# Assert
|
|
assert payloads == [{"image_id": "AD000002"}]
|