mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 15:31:13 +00:00
[AZ-409] [AZ-412] [AZ-413] Batch 70: FT-P-01/04/05/06 scenarios
AZ-409 (3pt) — FT-P-01 still-image frame-center accuracy: - accuracy_evaluator.py: GT loader + Vincenty error + AC-2/AC-3 pass-counts - test_ft_p_01_still_image_accuracy.py: scenario gated on frame_source_replay + sitl_observer NotImplementedError; AC-4 timeout discipline AZ-412 (3pt) — FT-P-04 Derkachi f2f registration >=95% on normal segments: - registration_classifier.py: accel-derived attitude + overlap heuristic + success ratio with AC-3 sharp-turn exclusion - test_ft_p_04_derkachi_f2f_registration.py: scenario gated on frame_source_replay + imu_replay + fdr_reader AZ-413 (3pt) — FT-P-05 + FT-P-06 cross-domain MRE budgets: - mre_evaluator.py: per-image budget (strict <2.5px) + 95th-percentile via numpy linear interp + combined report - test_ft_p_05_sat_anchor.py: cross-domain scenario, reuses accuracy_evaluator for geodesic join - test_ft_p_06_mre_budgets.py: pure piggyback on FT-P-04 + FT-P-05 CSV evidence; skips when either upstream CSV missing Tests: 325 unit tests pass (+77 vs batch 69). Reports: batch_70_report.md, batch_70_review.md (PASS). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,175 @@
|
||||
"""FT-P-01 — Still-image set-60 frame-center accuracy (AC-1.1, AC-1.2).
|
||||
|
||||
The full scenario:
|
||||
|
||||
1. Push each ``AD0000NN.jpg`` from ``still-image-set-60`` to the SUT's
|
||||
frame source, one at a time.
|
||||
2. Wait up to 5 s for the SITL listener to receive the SUT's outbound
|
||||
``GPS_INPUT`` (ArduPilot) or ``MSP2_SENSOR_GPS`` (iNav) message.
|
||||
3. Compute Vincenty geodesic distance between the SUT estimate and the
|
||||
per-image GT from ``_docs/00_problem/input_data/coordinates.csv``.
|
||||
4. Emit ``e2e-results/run-${RUN_ID}/ft-p-01-{fc_adapter}-{vio_strategy}.csv``
|
||||
with one row per image.
|
||||
5. Assert AC-2 (≥48/60 within 50 m) and AC-3 (≥30/60 within 20 m) per
|
||||
``expected_results/results_report.md`` Pass/Fail Rules.
|
||||
|
||||
What this file owns:
|
||||
|
||||
* The AC-1 / AC-2 / AC-3 / AC-4 / AC-5 wiring above.
|
||||
* CSV evidence emission via the AZ-409-owned ``accuracy_evaluator``.
|
||||
|
||||
What this file does NOT own:
|
||||
|
||||
* The frame-source push → ``runner.helpers.frame_source_replay`` (stub;
|
||||
owned by AZ-441) — skip-gated.
|
||||
* The SITL message receipt → ``runner.helpers.sitl_observer`` (stub;
|
||||
owned by AZ-416/AZ-417) — skip-gated.
|
||||
|
||||
When both upstream helpers land, this file's runtime path activates
|
||||
automatically — the skip is keyed off the ``NotImplementedError`` from
|
||||
the helper imports, not off a hard-coded marker.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import accuracy_evaluator as ae
|
||||
|
||||
GT_CSV = Path(__file__).resolve().parents[3] / "_docs" / "00_problem" / "input_data" / "coordinates.csv"
|
||||
STILL_IMAGES_DIR = GT_CSV.parent
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def _harness_helpers_implemented() -> bool:
|
||||
"""True iff the upstream replay + SITL-observation helpers are real.
|
||||
|
||||
Same auto-detect pattern as FT-P-02 / FT-P-03 — the gate flips when
|
||||
the helpers stop raising NotImplementedError, so no marker churn.
|
||||
"""
|
||||
from runner.helpers import frame_source_replay, sitl_observer
|
||||
from runner.helpers.frame_source_replay import FrameSourceReplayer
|
||||
|
||||
try:
|
||||
replayer = FrameSourceReplayer(sink=_NullSink()) # type: ignore[arg-type]
|
||||
try:
|
||||
replayer.replay_image_directory(Path("/tmp/non-existent"))
|
||||
except NotImplementedError:
|
||||
return False
|
||||
try:
|
||||
sitl_observer.get_observer(fc_adapter="ardupilot", host="sitl-ardupilot")
|
||||
except NotImplementedError:
|
||||
return False
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
class _NullSink:
|
||||
def write_frame(self, jpeg_bytes: bytes, timestamp_ms: int) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def _ft_p_01_image_paths() -> list[Path]:
|
||||
"""The 60 AD0000NN.jpg images, sorted lexicographically (AD000001..AD000060)."""
|
||||
return sorted(STILL_IMAGES_DIR.glob("AD??????.jpg"))
|
||||
|
||||
|
||||
@pytest.mark.traces_to("AC-1.1,AC-1.2,AC-1,AC-2,AC-3,AC-4,AC-5")
|
||||
def test_ft_p_01_still_image_accuracy(
|
||||
fc_adapter: str,
|
||||
vio_strategy: str,
|
||||
evidence_dir, # type: ignore[no-untyped-def]
|
||||
run_id: str,
|
||||
nfr_recorder, # type: ignore[no-untyped-def]
|
||||
_harness_helpers_implemented: bool,
|
||||
) -> None:
|
||||
"""Full FT-P-01 scenario (AC-1.1, AC-1.2).
|
||||
|
||||
AC-1: per-image distance computed for all 60 images.
|
||||
AC-2: ``pass_count(error_m ≤ 50) ≥ 48``.
|
||||
AC-3: ``pass_count(error_m ≤ 20) ≥ 30``.
|
||||
AC-4: per-image timeout → ``error_m=∞``; aggregate continues.
|
||||
AC-5: parametrized across ``(fc_adapter, vio_strategy)`` (4 variants).
|
||||
"""
|
||||
if not _harness_helpers_implemented:
|
||||
pytest.skip(
|
||||
"FT-P-01 still-image push requires runner.helpers.{frame_source_replay,"
|
||||
"sitl_observer} — currently AZ-441 + AZ-416/AZ-417 leftovers. "
|
||||
"Pure-logic ACs covered by e2e/_unit_tests/helpers/test_accuracy_evaluator.py."
|
||||
)
|
||||
|
||||
from runner.helpers import frame_source_replay, sitl_observer
|
||||
from runner.helpers.frame_source_replay import FrameSourceReplayer
|
||||
|
||||
# 1. Resolve GT + image inventory once.
|
||||
gt_rows = ae.load_gt_coordinates(GT_CSV)
|
||||
image_paths = _ft_p_01_image_paths()
|
||||
if len(image_paths) != ae.TOTAL_IMAGES_REQUIRED:
|
||||
pytest.fail(
|
||||
f"FT-P-01 expects {ae.TOTAL_IMAGES_REQUIRED} images in {STILL_IMAGES_DIR}, "
|
||||
f"found {len(image_paths)}"
|
||||
)
|
||||
|
||||
# 2. Resolve the SITL listener for the requested FC adapter.
|
||||
sitl_host = "sitl-ardupilot" if fc_adapter == "ardupilot" else "sitl-inav"
|
||||
observer = sitl_observer.get_observer(fc_adapter=fc_adapter, host=sitl_host)
|
||||
sink = _resolve_frame_sink()
|
||||
replayer = FrameSourceReplayer(sink)
|
||||
|
||||
# 3. Push images one at a time, capturing per-image estimates.
|
||||
estimates: list[ae.EstimateInput] = []
|
||||
per_image_timeout_s = 5.0
|
||||
for path in image_paths:
|
||||
image_id = path.name
|
||||
replayer.replay_image(path)
|
||||
try:
|
||||
msg = observer.wait_for_outbound(timeout_s=per_image_timeout_s)
|
||||
estimates.append(
|
||||
ae.EstimateInput(
|
||||
image_id=image_id,
|
||||
est_lat_deg=float(msg.lat_deg),
|
||||
est_lon_deg=float(msg.lon_deg),
|
||||
)
|
||||
)
|
||||
except TimeoutError:
|
||||
estimates.append(
|
||||
ae.EstimateInput(image_id=image_id, est_lat_deg=math.inf, est_lon_deg=math.inf)
|
||||
)
|
||||
|
||||
# 4. Evaluate + emit CSV evidence.
|
||||
results, aggregate = ae.evaluate(gt_rows, estimates)
|
||||
out_csv = evidence_dir / f"ft-p-01-{fc_adapter}-{vio_strategy}.csv"
|
||||
ae.write_csv_evidence(out_csv, results)
|
||||
|
||||
# 5. Record NFR metrics for the run report.
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_01.pass_count_50m", float(aggregate.pass_count_50m), ac_id="AC-2"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_01.pass_count_20m", float(aggregate.pass_count_20m), ac_id="AC-3"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_01.timeout_count", float(aggregate.timeout_count), ac_id="AC-4"
|
||||
)
|
||||
|
||||
# 6. AC assertions.
|
||||
assert aggregate.pass_ac2, (
|
||||
f"AC-2 (50 m budget) failed: {aggregate.pass_count_50m}/60 "
|
||||
f"< required {ae.PASS_COUNT_50M_REQUIRED}; "
|
||||
f"timeouts={aggregate.timeout_count}"
|
||||
)
|
||||
assert aggregate.pass_ac3, (
|
||||
f"AC-3 (20 m budget) failed: {aggregate.pass_count_20m}/60 "
|
||||
f"< required {ae.PASS_COUNT_20M_REQUIRED}"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_frame_sink(): # type: ignore[no-untyped-def]
|
||||
"""Stub helper resolved when the underlying replayer lands."""
|
||||
raise NotImplementedError(
|
||||
"frame sink resolution is owned by AZ-441 / runner.helpers.frame_source_replay"
|
||||
)
|
||||
@@ -0,0 +1,189 @@
|
||||
"""FT-P-04 — Derkachi frame-to-frame registration ≥95% on normal segments (AC-2.1a).
|
||||
|
||||
The full scenario:
|
||||
|
||||
1. Replay the Derkachi MP4 + IMU through the SUT.
|
||||
2. Collect per-video-frame ``registration_success`` from
|
||||
``NAMED_VALUE_FLOAT`` OR from the post-run FDR archive (whichever the
|
||||
SUT emits — both are public-boundary artefacts per AC-NEW-3).
|
||||
3. Derive "normal" segment classification from ``data_imu.csv`` only —
|
||||
AC-1 explicitly requires SCALED_IMU2-derived attitude (no internal
|
||||
SUT state).
|
||||
4. Compute success ratio over the normal denominator (AC-3 excludes
|
||||
sharp-turn frames).
|
||||
5. Emit ``ft-p-04-{fc_adapter}-{vio_strategy}.csv`` with one row per
|
||||
video frame for evidence.
|
||||
6. Assert AC-2 (ratio ≥ 0.95).
|
||||
|
||||
What this file owns:
|
||||
|
||||
* The AC-1 / AC-2 / AC-3 / AC-4 wiring above.
|
||||
* CSV evidence emission via the AZ-412-owned ``registration_classifier``.
|
||||
|
||||
What this file does NOT own:
|
||||
|
||||
* The MP4 video-replay path → ``runner.helpers.frame_source_replay``
|
||||
(stub; AZ-441) — skip-gated.
|
||||
* The IMU CSV replay → ``runner.helpers.imu_replay`` (stub; AZ-407
|
||||
leftover) — skip-gated.
|
||||
* The FDR-archive iteration → ``runner.helpers.fdr_reader`` (stub;
|
||||
AZ-441) — skip-gated.
|
||||
|
||||
When all three upstream helpers land, this file's runtime path activates
|
||||
automatically — the skip is keyed off the ``NotImplementedError`` from
|
||||
the helper imports, not off a hard-coded marker.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import registration_classifier as rc
|
||||
|
||||
DERKACHI_DIR = (
|
||||
Path(__file__).resolve().parents[3]
|
||||
/ "_docs"
|
||||
/ "00_problem"
|
||||
/ "input_data"
|
||||
/ "flight_derkachi"
|
||||
)
|
||||
DERKACHI_IMU_CSV = DERKACHI_DIR / "data_imu.csv"
|
||||
DERKACHI_MP4 = DERKACHI_DIR / "flight_derkachi.mp4"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def _harness_helpers_implemented() -> bool:
|
||||
"""True iff every upstream helper FT-P-04 needs has a real impl."""
|
||||
from runner.helpers import fdr_reader, frame_source_replay, imu_replay
|
||||
from runner.helpers.frame_source_replay import FrameSourceReplayer
|
||||
|
||||
try:
|
||||
replayer = FrameSourceReplayer(sink=_NullSink()) # type: ignore[arg-type]
|
||||
try:
|
||||
replayer.replay_video(Path("/tmp/non-existent.mp4"))
|
||||
except NotImplementedError:
|
||||
return False
|
||||
try:
|
||||
list(fdr_reader.iter_records(Path("/tmp/non-existent")))
|
||||
except NotImplementedError:
|
||||
return False
|
||||
try:
|
||||
imu_replay.ImuReplayer(emitter=_NullImuEmitter()).replay(Path("/tmp/non-existent.csv")) # type: ignore[arg-type]
|
||||
except NotImplementedError:
|
||||
return False
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
class _NullSink:
|
||||
def write_frame(self, jpeg_bytes: bytes, timestamp_ms: int) -> None:
|
||||
return None
|
||||
|
||||
|
||||
class _NullImuEmitter:
|
||||
def emit(self, sample: object) -> None:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.mark.traces_to("AC-2.1a,AC-1,AC-2,AC-3,AC-4")
|
||||
def test_ft_p_04_derkachi_f2f_registration(
|
||||
fc_adapter: str,
|
||||
vio_strategy: str,
|
||||
evidence_dir, # type: ignore[no-untyped-def]
|
||||
run_id: str,
|
||||
nfr_recorder, # type: ignore[no-untyped-def]
|
||||
_harness_helpers_implemented: bool,
|
||||
) -> None:
|
||||
"""Full FT-P-04 scenario.
|
||||
|
||||
AC-1: classification reproducibility — unit-tested via
|
||||
``test_classify_frames_is_reproducible_ac1``.
|
||||
AC-2: ``success_ratio_over_normal_segments ≥ 0.95``.
|
||||
AC-3: sharp-turn frames excluded from the denominator.
|
||||
AC-4: parametrized across ``(fc_adapter, vio_strategy)``.
|
||||
"""
|
||||
if not _harness_helpers_implemented:
|
||||
pytest.skip(
|
||||
"FT-P-04 full replay requires runner.helpers.{frame_source_replay,"
|
||||
"imu_replay,fdr_reader} — currently AZ-441 / AZ-407 leftovers. "
|
||||
"Pure-logic ACs covered by e2e/_unit_tests/helpers/test_registration_classifier.py."
|
||||
)
|
||||
|
||||
from runner.helpers import fdr_reader, imu_replay
|
||||
from runner.helpers.frame_source_replay import FrameSourceReplayer
|
||||
|
||||
# 1. Build the per-frame classification from data_imu.csv up-front.
|
||||
imu_rows = rc.load_imu_telemetry(DERKACHI_IMU_CSV)
|
||||
classifications = rc.classify_frames(imu_rows)
|
||||
|
||||
# 2. Drive the replay.
|
||||
sink = _resolve_frame_sink()
|
||||
emitter = _resolve_fc_inbound_emitter(fc_adapter)
|
||||
FrameSourceReplayer(sink).replay_video(DERKACHI_MP4)
|
||||
imu_replay.ImuReplayer(emitter).replay(DERKACHI_IMU_CSV)
|
||||
|
||||
# 3. Collect per-frame registration_success from the FDR archive.
|
||||
fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
|
||||
registration_success_by_frame: dict[int, bool] = {}
|
||||
for rec in fdr_reader.iter_records(fdr_root):
|
||||
if rec.record_type == "frame_metric":
|
||||
payload = rec.payload
|
||||
if payload.get("metric") == "registration_success":
|
||||
frame_index = int(payload["frame_index"]) # type: ignore[arg-type]
|
||||
registration_success_by_frame[frame_index] = bool(payload["value"]) # type: ignore[arg-type]
|
||||
|
||||
if not registration_success_by_frame:
|
||||
pytest.fail(
|
||||
"FT-P-04: SUT did not emit any frame_metric records with "
|
||||
"metric='registration_success' (required by AC-NEW-3 FDR schema)."
|
||||
)
|
||||
|
||||
# 4. Compute success report + emit evidence.
|
||||
report = rc.compute_success_ratio(classifications, registration_success_by_frame)
|
||||
out_csv = evidence_dir / f"ft-p-04-{fc_adapter}-{vio_strategy}.csv"
|
||||
rc.write_csv_evidence(out_csv, classifications, registration_success_by_frame)
|
||||
|
||||
# 5. Record NFR metrics for the run report.
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_04.success_ratio", report.ratio, ac_id="AC-2"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_04.normal_denominator", float(report.denominator), ac_id="AC-3"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_04.excluded_by_attitude", float(report.excluded_by_attitude), ac_id="AC-3"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_04.excluded_by_overlap", float(report.excluded_by_overlap), ac_id="AC-3"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_04.excluded_by_missing_metric",
|
||||
float(report.excluded_by_missing_metric),
|
||||
ac_id="AC-2",
|
||||
)
|
||||
|
||||
# 6. AC-2 assertion.
|
||||
assert report.passes, (
|
||||
f"AC-2 (registration ≥{rc.SUCCESS_RATIO_REQUIRED:.0%}) failed: "
|
||||
f"ratio={report.ratio:.4f} over {report.denominator} normal frames "
|
||||
f"(excluded: attitude={report.excluded_by_attitude}, "
|
||||
f"overlap={report.excluded_by_overlap}, "
|
||||
f"missing_metric={report.excluded_by_missing_metric})"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_frame_sink(): # type: ignore[no-untyped-def]
|
||||
"""Stub helper resolved when the underlying replayer lands."""
|
||||
raise NotImplementedError(
|
||||
"frame sink resolution is owned by AZ-441 / runner.helpers.frame_source_replay"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_fc_inbound_emitter(fc_adapter: str): # type: ignore[no-untyped-def]
|
||||
"""Stub helper resolved when the FC inbound emitter lands."""
|
||||
raise NotImplementedError(
|
||||
"FC inbound emitter resolution is owned by AZ-416/AZ-417 / runner.helpers.imu_replay"
|
||||
)
|
||||
@@ -0,0 +1,195 @@
|
||||
"""FT-P-05 — Satellite-anchor cross-domain registration MRE + accuracy (AC-2.1b).
|
||||
|
||||
The full scenario:
|
||||
|
||||
1. Push each ``AD0000NN.jpg`` from ``still-image-set-60`` to the SUT.
|
||||
2. Wait for the SUT's outbound estimate (same path as FT-P-01) + record
|
||||
per-image MRE from ``NAMED_VALUE_FLOAT`` or post-run FDR.
|
||||
3. Compute geodesic error vs ``coordinates.csv`` GT (delegated to
|
||||
``accuracy_evaluator``).
|
||||
4. Emit ``ft-p-05-{fc_adapter}-{vio_strategy}.csv`` (image_id, est_lat,
|
||||
est_lon, error_m, mre_px, pass_50m, pass_20m, pass_mre).
|
||||
5. Assert AC-2 (every MRE < 2.5 px) AND AC-3 (≥80 % within 50 m AND
|
||||
≥50 % within 20 m — same image set as FT-P-01; this AC is
|
||||
"implied by FT-P-01" if FT-P-01 passes in the same run).
|
||||
|
||||
What this file owns:
|
||||
|
||||
* The AC-1 / AC-2 / AC-3 / AC-5 wiring above.
|
||||
* CSV evidence emission via the AZ-413-owned ``mre_evaluator``.
|
||||
|
||||
What this file does NOT own:
|
||||
|
||||
* The frame-source push → ``runner.helpers.frame_source_replay`` (stub;
|
||||
AZ-441) — skip-gated.
|
||||
* The SITL message receipt + MRE harvesting → ``runner.helpers.{sitl_observer,
|
||||
fdr_reader}`` (stubs; AZ-416/AZ-417, AZ-441) — skip-gated.
|
||||
|
||||
When the upstream helpers land, this file's runtime path activates
|
||||
automatically.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import accuracy_evaluator as ae
|
||||
from runner.helpers import mre_evaluator as me
|
||||
|
||||
GT_CSV = Path(__file__).resolve().parents[3] / "_docs" / "00_problem" / "input_data" / "coordinates.csv"
|
||||
STILL_IMAGES_DIR = GT_CSV.parent
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def _harness_helpers_implemented() -> bool:
|
||||
"""True iff replay + SITL observation + FDR helpers are all real."""
|
||||
from runner.helpers import fdr_reader, frame_source_replay, sitl_observer
|
||||
from runner.helpers.frame_source_replay import FrameSourceReplayer
|
||||
|
||||
try:
|
||||
replayer = FrameSourceReplayer(sink=_NullSink()) # type: ignore[arg-type]
|
||||
try:
|
||||
replayer.replay_image_directory(Path("/tmp/non-existent"))
|
||||
except NotImplementedError:
|
||||
return False
|
||||
try:
|
||||
sitl_observer.get_observer(fc_adapter="ardupilot", host="sitl-ardupilot")
|
||||
except NotImplementedError:
|
||||
return False
|
||||
try:
|
||||
list(fdr_reader.iter_records(Path("/tmp/non-existent")))
|
||||
except NotImplementedError:
|
||||
return False
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
class _NullSink:
|
||||
def write_frame(self, jpeg_bytes: bytes, timestamp_ms: int) -> None:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.mark.traces_to("AC-2.1b,AC-1,AC-2,AC-3,AC-5")
|
||||
def test_ft_p_05_sat_anchor(
|
||||
fc_adapter: str,
|
||||
vio_strategy: str,
|
||||
evidence_dir, # type: ignore[no-untyped-def]
|
||||
run_id: str,
|
||||
nfr_recorder, # type: ignore[no-untyped-def]
|
||||
_harness_helpers_implemented: bool,
|
||||
) -> None:
|
||||
"""Full FT-P-05 scenario.
|
||||
|
||||
AC-1: per-image MRE captured in ``ft-p-05.csv``.
|
||||
AC-2: every MRE < 2.5 px.
|
||||
AC-3: ≥80 % within 50 m AND ≥50 % within 20 m (same image set as FT-P-01).
|
||||
AC-5: parametrized across ``(fc_adapter, vio_strategy)``.
|
||||
"""
|
||||
if not _harness_helpers_implemented:
|
||||
pytest.skip(
|
||||
"FT-P-05 still-image push requires runner.helpers.{frame_source_replay,"
|
||||
"sitl_observer,fdr_reader} — currently AZ-441 + AZ-416/AZ-417 leftovers. "
|
||||
"Pure-logic ACs covered by e2e/_unit_tests/helpers/test_mre_evaluator.py."
|
||||
)
|
||||
|
||||
from runner.helpers import fdr_reader, frame_source_replay, sitl_observer
|
||||
from runner.helpers.frame_source_replay import FrameSourceReplayer
|
||||
|
||||
# 1. Resolve GT + image inventory.
|
||||
gt_rows = ae.load_gt_coordinates(GT_CSV)
|
||||
image_paths = sorted(STILL_IMAGES_DIR.glob("AD??????.jpg"))
|
||||
if len(image_paths) != ae.TOTAL_IMAGES_REQUIRED:
|
||||
pytest.fail(
|
||||
f"FT-P-05 expects {ae.TOTAL_IMAGES_REQUIRED} images, found {len(image_paths)}"
|
||||
)
|
||||
|
||||
# 2. Push images, collect (est_lat, est_lon, mre_px) per image.
|
||||
sitl_host = "sitl-ardupilot" if fc_adapter == "ardupilot" else "sitl-inav"
|
||||
observer = sitl_observer.get_observer(fc_adapter=fc_adapter, host=sitl_host)
|
||||
sink = _resolve_frame_sink()
|
||||
replayer = FrameSourceReplayer(sink)
|
||||
|
||||
estimates: list[ae.EstimateInput] = []
|
||||
mre_records: list[me.CrossDomainRecord] = []
|
||||
per_image_timeout_s = 5.0
|
||||
for path in image_paths:
|
||||
image_id = path.name
|
||||
replayer.replay_image(path)
|
||||
try:
|
||||
msg = observer.wait_for_outbound(timeout_s=per_image_timeout_s)
|
||||
estimates.append(
|
||||
ae.EstimateInput(
|
||||
image_id=image_id,
|
||||
est_lat_deg=float(msg.lat_deg),
|
||||
est_lon_deg=float(msg.lon_deg),
|
||||
)
|
||||
)
|
||||
mre_records.append(
|
||||
me.CrossDomainRecord(
|
||||
image_id=image_id,
|
||||
mre_px=float(msg.mre_px),
|
||||
error_m=0.0, # filled in once geodesic computed
|
||||
)
|
||||
)
|
||||
except TimeoutError:
|
||||
estimates.append(
|
||||
ae.EstimateInput(image_id=image_id, est_lat_deg=math.inf, est_lon_deg=math.inf)
|
||||
)
|
||||
mre_records.append(
|
||||
me.CrossDomainRecord(image_id=image_id, mre_px=math.inf, error_m=math.inf)
|
||||
)
|
||||
|
||||
# 3. Compute per-image error_m by joining with GT.
|
||||
per_image_results, accuracy_aggregate = ae.evaluate(gt_rows, estimates)
|
||||
pass_50m = {r.image_id: r.pass_50m for r in per_image_results}
|
||||
pass_20m = {r.image_id: r.pass_20m for r in per_image_results}
|
||||
error_by_image = {r.image_id: r.error_m for r in per_image_results}
|
||||
mre_records = [
|
||||
me.CrossDomainRecord(
|
||||
image_id=r.image_id, mre_px=r.mre_px, error_m=error_by_image[r.image_id]
|
||||
)
|
||||
for r in mre_records
|
||||
]
|
||||
|
||||
# 4. Emit FT-P-05 evidence.
|
||||
out_csv = evidence_dir / f"ft-p-05-{fc_adapter}-{vio_strategy}.csv"
|
||||
me.write_cross_domain_csv(out_csv, mre_records, pass_50m=pass_50m, pass_20m=pass_20m)
|
||||
|
||||
# 5. Evaluate AC-2 + record NFR metrics.
|
||||
mre_report = me.evaluate_per_image_budget(mre_records)
|
||||
nfr_recorder.record_metric("ft_p_05.max_mre_px", mre_report.max_mre_px, ac_id="AC-2")
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_05.mre_pass_count", float(mre_report.pass_count), ac_id="AC-2"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_05.pass_count_50m", float(accuracy_aggregate.pass_count_50m), ac_id="AC-3"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_05.pass_count_20m", float(accuracy_aggregate.pass_count_20m), ac_id="AC-3"
|
||||
)
|
||||
|
||||
# 6. AC assertions.
|
||||
assert mre_report.passes, (
|
||||
f"AC-2 (cross-domain MRE < {me.MRE_PER_IMAGE_BUDGET_PX} px) failed: "
|
||||
f"{len(mre_report.fail_image_ids)} image(s) over budget; "
|
||||
f"max_mre={mre_report.max_mre_px:.4f} px; "
|
||||
f"failing image_ids={list(mre_report.fail_image_ids)[:5]}"
|
||||
)
|
||||
assert accuracy_aggregate.pass_ac2, (
|
||||
f"AC-3 (50 m budget — implied by FT-P-01) failed: "
|
||||
f"{accuracy_aggregate.pass_count_50m}/60 < {ae.PASS_COUNT_50M_REQUIRED}"
|
||||
)
|
||||
assert accuracy_aggregate.pass_ac3, (
|
||||
f"AC-3 (20 m budget — implied by FT-P-01) failed: "
|
||||
f"{accuracy_aggregate.pass_count_20m}/60 < {ae.PASS_COUNT_20M_REQUIRED}"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_frame_sink(): # type: ignore[no-untyped-def]
|
||||
raise NotImplementedError(
|
||||
"frame sink resolution is owned by AZ-441 / runner.helpers.frame_source_replay"
|
||||
)
|
||||
@@ -0,0 +1,93 @@
|
||||
"""FT-P-06 — 95th-percentile MRE budgets (AC-2.2).
|
||||
|
||||
Piggyback test: depends on the FT-P-04 + FT-P-05 evidence CSVs produced
|
||||
in the same run. Reads both, aggregates per domain, asserts:
|
||||
|
||||
* Frame-to-frame p95 MRE < 1.0 px
|
||||
* Cross-domain p95 MRE < 2.5 px
|
||||
|
||||
What this file owns:
|
||||
|
||||
* The AC-4 assertion + the combined report.
|
||||
|
||||
What this file does NOT own:
|
||||
|
||||
* The FT-P-04 evidence collection — owned by ``test_ft_p_04_*``.
|
||||
* The FT-P-05 evidence collection — owned by ``test_ft_p_05_*``.
|
||||
* Both run as the same pytest session; this test depends on the
|
||||
artefacts they wrote to ``evidence_dir``.
|
||||
|
||||
Skip discipline: if either evidence CSV is missing, the test SKIPS with
|
||||
a clear reason (it cannot fail without the upstream evidence; that
|
||||
would mask the actual gate, which is whether FT-P-04 / FT-P-05 ran).
|
||||
The autodev / Tier-1 runner will only mark this test FAIL if it runs
|
||||
AND the evidence is present AND the p95 budgets are exceeded.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import mre_evaluator as me
|
||||
|
||||
|
||||
@pytest.mark.traces_to("AC-2.2,AC-4,AC-5")
|
||||
def test_ft_p_06_mre_budgets(
|
||||
fc_adapter: str,
|
||||
vio_strategy: str,
|
||||
evidence_dir, # type: ignore[no-untyped-def]
|
||||
nfr_recorder, # type: ignore[no-untyped-def]
|
||||
) -> None:
|
||||
"""AC-4: 95th-percentile MRE < 1.0 px f2f AND < 2.5 px cross-domain.
|
||||
|
||||
AC-5: parametrized across ``(fc_adapter, vio_strategy)``.
|
||||
|
||||
This test is a pure piggyback — it reads the FT-P-04 + FT-P-05 CSVs
|
||||
from the same run. If either is missing the test skips (without
|
||||
those, FT-P-06 has nothing to assert on).
|
||||
"""
|
||||
f2f_csv = evidence_dir / f"ft-p-04-{fc_adapter}-{vio_strategy}.csv"
|
||||
xd_csv = evidence_dir / f"ft-p-05-{fc_adapter}-{vio_strategy}.csv"
|
||||
|
||||
if not f2f_csv.exists() or not xd_csv.exists():
|
||||
missing = [str(p.name) for p in (f2f_csv, xd_csv) if not p.exists()]
|
||||
pytest.skip(
|
||||
f"FT-P-06 piggybacks on FT-P-04 + FT-P-05 evidence; missing in this run: {missing}. "
|
||||
"Pure-logic ACs covered by e2e/_unit_tests/helpers/test_mre_evaluator.py."
|
||||
)
|
||||
|
||||
# Both CSVs present — load and evaluate.
|
||||
try:
|
||||
f2f_records = me.load_frame_to_frame_csv(f2f_csv)
|
||||
except ValueError as exc:
|
||||
# mre_px column absent → FT-P-04 evidence does not yet carry MRE.
|
||||
# Per the FT-P-06 spec: "if absent, the test fails" — but at this
|
||||
# point the failure is on the SUT (it must expose per-frame MRE).
|
||||
pytest.fail(f"FT-P-04 evidence is missing per-frame MRE: {exc}")
|
||||
xd_records = me.load_cross_domain_csv(xd_csv)
|
||||
|
||||
combined = me.evaluate_combined_p95(f2f_records, xd_records)
|
||||
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_06.f2f_p95_mre_px",
|
||||
combined.frame_to_frame.p95_px,
|
||||
ac_id="AC-4",
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_06.cross_domain_p95_mre_px",
|
||||
combined.cross_domain.p95_px,
|
||||
ac_id="AC-4",
|
||||
)
|
||||
|
||||
assert combined.frame_to_frame.passes, (
|
||||
f"AC-4 (frame-to-frame p95 MRE < {me.MRE_P95_FRAME_TO_FRAME_BUDGET_PX} px) "
|
||||
f"failed: p95={combined.frame_to_frame.p95_px:.4f} over "
|
||||
f"{combined.frame_to_frame.sample_count} samples"
|
||||
)
|
||||
assert combined.cross_domain.passes, (
|
||||
f"AC-4 (cross-domain p95 MRE < {me.MRE_P95_CROSS_DOMAIN_BUDGET_PX} px) "
|
||||
f"failed: p95={combined.cross_domain.p95_px:.4f} over "
|
||||
f"{combined.cross_domain.sample_count} samples"
|
||||
)
|
||||
Reference in New Issue
Block a user