mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 11:41:12 +00:00
[AZ-423] [AZ-427] Add FT-P-19 + FT-N-05 blackbox tests
Implement the AC-8.6 (top-K=10 retrieval scale-ratio + scene-change
PARTIAL) and AC-8.2 / AC-NEW-6 (stale aged-tile rejection) blackbox
scenarios.
AZ-423 (FT-P-19, 3pt) helpers + scenario:
- retrieval_evaluator.py — top-K within-distance evaluator (60 stills
vs 100 m budget), scene-change PARTIAL recorder (always emits
PARTIAL on the 2 _gmaps.png pairs), FDR record projectors, CSV
writers.
- tests/positive/test_ft_p_19_sat_reloc_scale.py (6 parametrised
variants).
AZ-427 (FT-N-05, 2pt) helpers + scenario:
- aged_tile_rejection_evaluator.py — Signal A (stale rejection at
load) + Signal B (per-frame downgrade) decision matrix, reuses
ALLOWED_SOURCE_LABELS from estimate_schema.
- tests/negative/test_ft_n_05_stale_tile_rejection.py (12 parametrised
variants: FC × VIO × {7mo/active-conflict, 13mo/rear}).
48 new unit tests cover every helper branch. Both scenarios skip
when sitl_replay_ready is false and fail loudly when fixture records
are missing.
Per-batch review: PASS_WITH_WARNINGS (2 Low — production-dependency
surface, FDR-kind constant duplication).
Cumulative review 82-84: PASS (2 Low carry-over / hygiene candidate).
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,168 @@
|
||||
"""FT-N-05 — Stale-tile rejection on freshness violation (AZ-427 / AC-8.2, AC-NEW-6).
|
||||
|
||||
Two sub-cases:
|
||||
|
||||
* ``synth-age-7mo`` mounted, SUT configured for active-conflict sector.
|
||||
* ``synth-age-13mo`` mounted, SUT configured for rear sector.
|
||||
|
||||
For each sub-case: push the 60 still images, then assert one of:
|
||||
|
||||
1. **Signal A** — the FDR contains at least one ``tile-load-rejected``
|
||||
record with ``reason == "stale"`` AND no outbound emission carried
|
||||
``source_label = satellite_anchored``.
|
||||
2. **Signal B** — the SUT loaded the cache but every emission falls in
|
||||
``{visual_propagated, dead_reckoned}``.
|
||||
|
||||
Either signal is acceptable; the AC fails the moment any frame slips
|
||||
through with ``satellite_anchored`` from these aged tiles.
|
||||
|
||||
What this file owns:
|
||||
|
||||
* The AC-1 / AC-2 / AC-3 wiring for both sub-cases.
|
||||
* Sector-binding declaration via ``AGED_FIXTURE_SECTOR_BINDINGS``.
|
||||
|
||||
What this file does NOT own:
|
||||
|
||||
* The frame-source push → ``runner.helpers.frame_source_replay`` (stub).
|
||||
* The SITL message receipt → ``runner.helpers.sitl_observer`` (stub).
|
||||
* The fixture-builder support for mounting ``synth-age-*`` tiles and
|
||||
switching sector configuration — surfaced as a production-dependency
|
||||
finding in the batch report (gated on ``sitl_replay_ready`` +
|
||||
``E2E_FT_N_05_FIXTURE`` env var; the test skips cleanly when not set
|
||||
and fails loudly when set but the records are missing).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import accuracy_evaluator as ae
|
||||
from runner.helpers import aged_tile_rejection_evaluator as ate
|
||||
|
||||
FT_N_05_FIXTURE_ENV = "E2E_FT_N_05_FIXTURE"
|
||||
|
||||
GT_CSV = Path(__file__).resolve().parents[3] / "_docs" / "00_problem" / "input_data" / "coordinates.csv"
|
||||
|
||||
|
||||
@pytest.mark.traces_to("AC-8.2,AC-NEW-6,AC-1,AC-2,AC-3")
|
||||
@pytest.mark.parametrize(
|
||||
"fixture, sector",
|
||||
ate.AGED_FIXTURE_SECTOR_BINDINGS,
|
||||
ids=[f"{f}_{s}" for f, s in ate.AGED_FIXTURE_SECTOR_BINDINGS],
|
||||
)
|
||||
def test_ft_n_05_stale_tile_rejection(
|
||||
fixture: str,
|
||||
sector: str,
|
||||
fc_adapter: str,
|
||||
vio_strategy: str,
|
||||
evidence_dir, # type: ignore[no-untyped-def]
|
||||
run_id: str,
|
||||
nfr_recorder, # type: ignore[no-untyped-def]
|
||||
sitl_replay_ready: bool,
|
||||
) -> None:
|
||||
"""Full FT-N-05 sub-case (AC-1 or AC-2 depending on ``fixture``)."""
|
||||
if not sitl_replay_ready:
|
||||
pytest.skip(
|
||||
"FT-N-05 requires `E2E_SITL_REPLAY_DIR` to point at a SITL replay "
|
||||
"fixture mounting one of the synth-age tile sets (AZ-595 + AZ-427 "
|
||||
"fixture builder). Pure-logic AC-8.2/AC-NEW-6 coverage lives in "
|
||||
"e2e/_unit_tests/helpers/test_aged_tile_rejection_evaluator.py."
|
||||
)
|
||||
active_fixture = os.environ.get(FT_N_05_FIXTURE_ENV)
|
||||
if active_fixture is None:
|
||||
pytest.skip(
|
||||
f"FT-N-05 needs `{FT_N_05_FIXTURE_ENV}` env var to declare which "
|
||||
f"synth-age fixture the fixture builder mounted "
|
||||
f"(one of {[f for f, _ in ate.AGED_FIXTURE_SECTOR_BINDINGS]}). "
|
||||
"The fixture builder publishes this from its per-sub-case orchestration."
|
||||
)
|
||||
if active_fixture != fixture:
|
||||
pytest.skip(
|
||||
f"FT-N-05 sub-case [{fixture}/{sector}] does not match the mounted "
|
||||
f"fixture (`{FT_N_05_FIXTURE_ENV}` = {active_fixture!r}). "
|
||||
"Subsequent matrix sub-cases will run in their own pytest invocations."
|
||||
)
|
||||
|
||||
from runner.helpers import fdr_reader, frame_source_replay, sitl_observer
|
||||
from runner.helpers.frame_source_replay import FrameSourceReplayer
|
||||
|
||||
image_paths = sorted(GT_CSV.parent.glob("AD??????.jpg"))
|
||||
if len(image_paths) != ae.TOTAL_IMAGES_REQUIRED:
|
||||
pytest.fail(
|
||||
f"FT-N-05 expects {ae.TOTAL_IMAGES_REQUIRED} still images, "
|
||||
f"found {len(image_paths)} under {GT_CSV.parent}"
|
||||
)
|
||||
|
||||
sitl_host = "sitl-ardupilot" if fc_adapter == "ardupilot" else "sitl-inav"
|
||||
observer = sitl_observer.get_observer(fc_kind=fc_adapter, host=sitl_host)
|
||||
sink = _resolve_frame_sink()
|
||||
replayer = FrameSourceReplayer(sink)
|
||||
|
||||
emissions: list[ate.SourceLabelEmission] = []
|
||||
per_image_timeout_s = 5.0
|
||||
for path in image_paths:
|
||||
image_id = path.name
|
||||
replayer.replay_image(path)
|
||||
try:
|
||||
msg = observer.wait_for_outbound(timeout_s=per_image_timeout_s)
|
||||
except TimeoutError:
|
||||
continue
|
||||
label = getattr(msg, "source_label", None)
|
||||
if not isinstance(label, str):
|
||||
pytest.fail(
|
||||
f"FT-N-05: outbound emission for {image_id} carries no `source_label` "
|
||||
f"(got {type(label).__name__}). The SUT must include `source_label` per "
|
||||
"FT-P-03 / AC-1.4 even on downgraded frames."
|
||||
)
|
||||
emissions.append(ate.SourceLabelEmission(frame_id=image_id, label=label))
|
||||
|
||||
fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
|
||||
rejections = list(ate.iter_stale_rejection_payloads(fdr_reader.iter_records(fdr_root)))
|
||||
|
||||
sub_case_id = f"{fixture}_{sector}"
|
||||
report = ate.evaluate_aged_tile_rejection(
|
||||
sub_case_id=sub_case_id,
|
||||
fixture=fixture,
|
||||
sector=sector,
|
||||
emissions=emissions,
|
||||
rejections=rejections,
|
||||
)
|
||||
|
||||
metric_prefix = f"ft_n_05.{sub_case_id}"
|
||||
nfr_recorder.record_metric(
|
||||
f"{metric_prefix}.emissions_observed", float(report.emissions_observed), ac_id="AC-1"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
f"{metric_prefix}.anchored_count", float(report.anchored_count), ac_id="AC-1"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
f"{metric_prefix}.downgrade_count", float(report.downgrade_frame_count), ac_id="AC-1"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
f"{metric_prefix}.stale_rejection_count",
|
||||
float(len(report.stale_rejections)),
|
||||
ac_id="AC-1",
|
||||
)
|
||||
|
||||
assert not report.illegal_labels, (
|
||||
"FT-N-05: outbound emissions include labels outside the FT-P-03 contract: "
|
||||
f"{report.illegal_labels}. Fix the SUT before assessing the freshness gate."
|
||||
)
|
||||
assert report.passes, (
|
||||
f"FT-N-05 [{sub_case_id}] failed: signal_a={report.signal_a_holds}, "
|
||||
f"signal_b={report.signal_b_holds}; "
|
||||
f"anchored_frames={list(report.anchored_frame_ids)[:5]}, "
|
||||
f"emissions={report.emissions_observed}, "
|
||||
f"downgrade_count={report.downgrade_frame_count}, "
|
||||
f"stale_rejections={list(report.stale_rejections)[:5]}"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_frame_sink(): # type: ignore[no-untyped-def]
|
||||
"""Return a replay-mode `FrameSink` (counter-only; AZ-597)."""
|
||||
from runner.helpers.replay_mode import NullFrameSink
|
||||
|
||||
return NullFrameSink()
|
||||
Reference in New Issue
Block a user