mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 09:51:13 +00:00
29ac16cfcb
AZ-409 (3pt) — FT-P-01 still-image frame-center accuracy: - accuracy_evaluator.py: GT loader + Vincenty error + AC-2/AC-3 pass-counts - test_ft_p_01_still_image_accuracy.py: scenario gated on frame_source_replay + sitl_observer NotImplementedError; AC-4 timeout discipline AZ-412 (3pt) — FT-P-04 Derkachi f2f registration >=95% on normal segments: - registration_classifier.py: accel-derived attitude + overlap heuristic + success ratio with AC-3 sharp-turn exclusion - test_ft_p_04_derkachi_f2f_registration.py: scenario gated on frame_source_replay + imu_replay + fdr_reader AZ-413 (3pt) — FT-P-05 + FT-P-06 cross-domain MRE budgets: - mre_evaluator.py: per-image budget (strict <2.5px) + 95th-percentile via numpy linear interp + combined report - test_ft_p_05_sat_anchor.py: cross-domain scenario, reuses accuracy_evaluator for geodesic join - test_ft_p_06_mre_budgets.py: pure piggyback on FT-P-04 + FT-P-05 CSV evidence; skips when either upstream CSV missing Tests: 325 unit tests pass (+77 vs batch 69). Reports: batch_70_report.md, batch_70_review.md (PASS). Co-authored-by: Cursor <cursoragent@cursor.com>
94 lines
3.4 KiB
Python
94 lines
3.4 KiB
Python
"""FT-P-06 — 95th-percentile MRE budgets (AC-2.2).
|
|
|
|
Piggyback test: depends on the FT-P-04 + FT-P-05 evidence CSVs produced
|
|
in the same run. Reads both, aggregates per domain, asserts:
|
|
|
|
* Frame-to-frame p95 MRE < 1.0 px
|
|
* Cross-domain p95 MRE < 2.5 px
|
|
|
|
What this file owns:
|
|
|
|
* The AC-4 assertion + the combined report.
|
|
|
|
What this file does NOT own:
|
|
|
|
* The FT-P-04 evidence collection — owned by ``test_ft_p_04_*``.
|
|
* The FT-P-05 evidence collection — owned by ``test_ft_p_05_*``.
|
|
* Both run as the same pytest session; this test depends on the
|
|
artefacts they wrote to ``evidence_dir``.
|
|
|
|
Skip discipline: if either evidence CSV is missing, the test SKIPS with
|
|
a clear reason (it cannot fail without the upstream evidence; that
|
|
would mask the actual gate, which is whether FT-P-04 / FT-P-05 ran).
|
|
The autodev / Tier-1 runner will only mark this test FAIL if it runs
|
|
AND the evidence is present AND the p95 budgets are exceeded.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from runner.helpers import mre_evaluator as me
|
|
|
|
|
|
@pytest.mark.traces_to("AC-2.2,AC-4,AC-5")
|
|
def test_ft_p_06_mre_budgets(
|
|
fc_adapter: str,
|
|
vio_strategy: str,
|
|
evidence_dir, # type: ignore[no-untyped-def]
|
|
nfr_recorder, # type: ignore[no-untyped-def]
|
|
) -> None:
|
|
"""AC-4: 95th-percentile MRE < 1.0 px f2f AND < 2.5 px cross-domain.
|
|
|
|
AC-5: parametrized across ``(fc_adapter, vio_strategy)``.
|
|
|
|
This test is a pure piggyback — it reads the FT-P-04 + FT-P-05 CSVs
|
|
from the same run. If either is missing the test skips (without
|
|
those, FT-P-06 has nothing to assert on).
|
|
"""
|
|
f2f_csv = evidence_dir / f"ft-p-04-{fc_adapter}-{vio_strategy}.csv"
|
|
xd_csv = evidence_dir / f"ft-p-05-{fc_adapter}-{vio_strategy}.csv"
|
|
|
|
if not f2f_csv.exists() or not xd_csv.exists():
|
|
missing = [str(p.name) for p in (f2f_csv, xd_csv) if not p.exists()]
|
|
pytest.skip(
|
|
f"FT-P-06 piggybacks on FT-P-04 + FT-P-05 evidence; missing in this run: {missing}. "
|
|
"Pure-logic ACs covered by e2e/_unit_tests/helpers/test_mre_evaluator.py."
|
|
)
|
|
|
|
# Both CSVs present — load and evaluate.
|
|
try:
|
|
f2f_records = me.load_frame_to_frame_csv(f2f_csv)
|
|
except ValueError as exc:
|
|
# mre_px column absent → FT-P-04 evidence does not yet carry MRE.
|
|
# Per the FT-P-06 spec: "if absent, the test fails" — but at this
|
|
# point the failure is on the SUT (it must expose per-frame MRE).
|
|
pytest.fail(f"FT-P-04 evidence is missing per-frame MRE: {exc}")
|
|
xd_records = me.load_cross_domain_csv(xd_csv)
|
|
|
|
combined = me.evaluate_combined_p95(f2f_records, xd_records)
|
|
|
|
nfr_recorder.record_metric(
|
|
"ft_p_06.f2f_p95_mre_px",
|
|
combined.frame_to_frame.p95_px,
|
|
ac_id="AC-4",
|
|
)
|
|
nfr_recorder.record_metric(
|
|
"ft_p_06.cross_domain_p95_mre_px",
|
|
combined.cross_domain.p95_px,
|
|
ac_id="AC-4",
|
|
)
|
|
|
|
assert combined.frame_to_frame.passes, (
|
|
f"AC-4 (frame-to-frame p95 MRE < {me.MRE_P95_FRAME_TO_FRAME_BUDGET_PX} px) "
|
|
f"failed: p95={combined.frame_to_frame.p95_px:.4f} over "
|
|
f"{combined.frame_to_frame.sample_count} samples"
|
|
)
|
|
assert combined.cross_domain.passes, (
|
|
f"AC-4 (cross-domain p95 MRE < {me.MRE_P95_CROSS_DOMAIN_BUDGET_PX} px) "
|
|
f"failed: p95={combined.cross_domain.p95_px:.4f} over "
|
|
f"{combined.cross_domain.sample_count} samples"
|
|
)
|