mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 16:11:13 +00:00
[AZ-423] [AZ-427] Add FT-P-19 + FT-N-05 blackbox tests
Implement the AC-8.6 (top-K=10 retrieval scale-ratio + scene-change
PARTIAL) and AC-8.2 / AC-NEW-6 (stale aged-tile rejection) blackbox
scenarios.
AZ-423 (FT-P-19, 3pt) helpers + scenario:
- retrieval_evaluator.py — top-K within-distance evaluator (60 stills
vs 100 m budget), scene-change PARTIAL recorder (always emits
PARTIAL on the 2 _gmaps.png pairs), FDR record projectors, CSV
writers.
- tests/positive/test_ft_p_19_sat_reloc_scale.py (6 parametrised
variants).
AZ-427 (FT-N-05, 2pt) helpers + scenario:
- aged_tile_rejection_evaluator.py — Signal A (stale rejection at
load) + Signal B (per-frame downgrade) decision matrix, reuses
ALLOWED_SOURCE_LABELS from estimate_schema.
- tests/negative/test_ft_n_05_stale_tile_rejection.py (12 parametrised
variants: FC × VIO × {7mo/active-conflict, 13mo/rear}).
48 new unit tests cover every helper branch. Both scenarios skip
when sitl_replay_ready is false and fail loudly when fixture records
are missing.
Per-batch review: PASS_WITH_WARNINGS (2 Low — production-dependency
surface, FDR-kind constant duplication).
Cumulative review 82-84: PASS (2 Low carry-over / hygiene candidate).
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,149 @@
|
||||
"""FT-P-19 — Satellite-relocalization scale-ratio + scene-change PARTIAL (AZ-423 / AC-8.6).
|
||||
|
||||
The full scenario:
|
||||
|
||||
1. Push each ``AD0000NN.jpg`` from ``still-image-set-60`` to the SUT's
|
||||
frame source, one at a time. For each image, the SUT runs top-K=10
|
||||
tile-cache retrieval and writes an FDR ``retrieval-topk`` record
|
||||
carrying the 10 candidate tile centres.
|
||||
2. The test joins the per-image GT (``coordinates.csv``) with the FDR
|
||||
stream and asserts that EVERY image's top-K covers a tile centre
|
||||
within 100 m of the image's true centre (``AC-1`` / set_contains).
|
||||
3. For the 2 paired ``_gmaps.png`` reference images the SUT runs the
|
||||
cross-domain matcher; the test records per-image match success
|
||||
into a CSV and tags the subset as ``PARTIAL`` (``AC-2``).
|
||||
4. ``AC-3`` — parameterisation across ``(fc_adapter, vio_strategy)``.
|
||||
|
||||
What this file owns:
|
||||
|
||||
* The AC-1 / AC-2 / AC-3 wiring above.
|
||||
* CSV evidence emission via the AZ-423-owned ``retrieval_evaluator``.
|
||||
|
||||
What this file does NOT own:
|
||||
|
||||
* The frame-source push → ``runner.helpers.frame_source_replay`` (stub,
|
||||
AZ-441) — skip-gated.
|
||||
* The SITL message receipt → ``runner.helpers.sitl_observer`` (stub;
|
||||
AZ-416/AZ-417) — skip-gated.
|
||||
* The SUT-side ``retrieval-topk`` + ``scene-change-match`` FDR record
|
||||
emission — production gap, surfaced via fail-loud when the fixture
|
||||
exists but the records are missing.
|
||||
|
||||
When ``E2E_SITL_REPLAY_DIR`` is set and points at a prepared SITL
|
||||
replay fixture, this file's runtime path activates automatically.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import accuracy_evaluator as ae
|
||||
from runner.helpers import retrieval_evaluator as re_
|
||||
|
||||
GT_CSV = Path(__file__).resolve().parents[3] / "_docs" / "00_problem" / "input_data" / "coordinates.csv"
|
||||
STILL_IMAGES_DIR = GT_CSV.parent
|
||||
|
||||
|
||||
@pytest.mark.traces_to("AC-8.6,AC-1,AC-2,AC-3")
|
||||
def test_ft_p_19_sat_reloc_scale(
|
||||
fc_adapter: str,
|
||||
vio_strategy: str,
|
||||
evidence_dir, # type: ignore[no-untyped-def]
|
||||
run_id: str,
|
||||
nfr_recorder, # type: ignore[no-untyped-def]
|
||||
sitl_replay_ready: bool,
|
||||
) -> None:
|
||||
"""Full FT-P-19 scenario (AC-8.6).
|
||||
|
||||
AC-1: every image's top-K=10 includes a tile centre within 100 m of GT.
|
||||
AC-2: paired _gmaps.png subset → PARTIAL.
|
||||
AC-3: parametrised across ``(fc_adapter, vio_strategy)``.
|
||||
"""
|
||||
if not sitl_replay_ready:
|
||||
pytest.skip(
|
||||
"FT-P-19 requires `E2E_SITL_REPLAY_DIR` to point at a SITL replay "
|
||||
"fixture emitting `retrieval-topk` + `scene-change-match` FDR "
|
||||
"records (AZ-595 + AZ-423 fixture builder). Pure-logic AC-8.6 "
|
||||
"coverage lives in e2e/_unit_tests/helpers/test_retrieval_evaluator.py."
|
||||
)
|
||||
|
||||
from runner.helpers import fdr_reader
|
||||
|
||||
gt_rows = ae.load_gt_coordinates(GT_CSV)
|
||||
gt_by_id = {_normalise_image_id(r.image_id): r for r in gt_rows}
|
||||
|
||||
fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
|
||||
|
||||
topk_queries: list[re_.TopKQuery] = []
|
||||
for payload in re_.iter_topk_payloads(fdr_reader.iter_records(fdr_root)):
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
image_id_raw = payload.get("image_id")
|
||||
if not isinstance(image_id_raw, str):
|
||||
continue
|
||||
image_key = _normalise_image_id(image_id_raw)
|
||||
gt = gt_by_id.get(image_key)
|
||||
if gt is None:
|
||||
continue
|
||||
q = re_.project_topk_record_to_query(
|
||||
payload, true_centre_lat_deg=gt.lat_deg, true_centre_lon_deg=gt.lon_deg
|
||||
)
|
||||
if q is not None:
|
||||
topk_queries.append(q)
|
||||
|
||||
scene_change_matches: list[re_.SceneChangeMatch] = []
|
||||
for payload in re_.iter_scene_change_payloads(fdr_reader.iter_records(fdr_root)):
|
||||
m = re_.project_scene_change_record(payload)
|
||||
if m is not None:
|
||||
scene_change_matches.append(m)
|
||||
|
||||
if not topk_queries:
|
||||
pytest.fail(
|
||||
f"FT-P-19: no `{re_.RETRIEVAL_TOPK_FDR_KIND}` FDR records found at "
|
||||
f"{fdr_root}. SUT must emit a top-K retrieval record per pushed "
|
||||
"image; fixture builder must collect them."
|
||||
)
|
||||
|
||||
topk_report = re_.evaluate_top_k_within_distance(
|
||||
topk_queries, expected_image_count=ae.TOTAL_IMAGES_REQUIRED
|
||||
)
|
||||
scene_change_report = re_.evaluate_scene_change_subset(scene_change_matches)
|
||||
|
||||
top_k_csv = Path(evidence_dir) / f"ft-p-19-topk-{fc_adapter}-{vio_strategy}.csv"
|
||||
scene_change_csv = Path(evidence_dir) / f"ft-p-19-scene-change-{fc_adapter}-{vio_strategy}.csv"
|
||||
re_.write_top_k_csv(top_k_csv, topk_report)
|
||||
re_.write_scene_change_csv(scene_change_csv, scene_change_report)
|
||||
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_19.topk_pass_count", float(topk_report.pass_count), ac_id="AC-1"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_19.topk_image_count", float(len(topk_report.entries)), ac_id="AC-1"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_19.scene_change_matched_count",
|
||||
float(scene_change_report.matched_count),
|
||||
ac_id="AC-2",
|
||||
)
|
||||
|
||||
assert topk_report.passes, (
|
||||
f"AC-1 (top-K=10 within {topk_report.max_distance_m} m for {topk_report.expected_image_count} images) failed: "
|
||||
f"pass_count={topk_report.pass_count}/{topk_report.expected_image_count}, "
|
||||
f"failing={[(e.image_id, e.min_distance_m) for e in topk_report.failing_entries[:5]]}"
|
||||
)
|
||||
assert scene_change_report.coverage_complete, (
|
||||
"AC-2 (scene-change subset coverage) failed: paired images observed = "
|
||||
f"{[e.image_id for e in scene_change_report.entries]}, expected = "
|
||||
f"{list(scene_change_report.expected_image_ids)}"
|
||||
)
|
||||
assert scene_change_report.overall_label == re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL, (
|
||||
f"AC-2 (PARTIAL annotation): got {scene_change_report.overall_label!r}, "
|
||||
f"expected {re_.SCENE_CHANGE_SUBSET_PARTIAL_LABEL!r}"
|
||||
)
|
||||
|
||||
|
||||
def _normalise_image_id(image_id: str) -> str:
|
||||
"""Strip ``.jpg`` extension if present; ``coordinates.csv`` uses the bare stem."""
|
||||
return image_id[:-4] if image_id.lower().endswith(".jpg") else image_id
|
||||
Reference in New Issue
Block a user