mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 15:41:12 +00:00
[AZ-409] [AZ-412] [AZ-413] Batch 70: FT-P-01/04/05/06 scenarios
AZ-409 (3pt) — FT-P-01 still-image frame-center accuracy: - accuracy_evaluator.py: GT loader + Vincenty error + AC-2/AC-3 pass-counts - test_ft_p_01_still_image_accuracy.py: scenario gated on frame_source_replay + sitl_observer NotImplementedError; AC-4 timeout discipline AZ-412 (3pt) — FT-P-04 Derkachi f2f registration >=95% on normal segments: - registration_classifier.py: accel-derived attitude + overlap heuristic + success ratio with AC-3 sharp-turn exclusion - test_ft_p_04_derkachi_f2f_registration.py: scenario gated on frame_source_replay + imu_replay + fdr_reader AZ-413 (3pt) — FT-P-05 + FT-P-06 cross-domain MRE budgets: - mre_evaluator.py: per-image budget (strict <2.5px) + 95th-percentile via numpy linear interp + combined report - test_ft_p_05_sat_anchor.py: cross-domain scenario, reuses accuracy_evaluator for geodesic join - test_ft_p_06_mre_budgets.py: pure piggyback on FT-P-04 + FT-P-05 CSV evidence; skips when either upstream CSV missing Tests: 325 unit tests pass (+77 vs batch 69). Reports: batch_70_report.md, batch_70_review.md (PASS). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,360 @@
|
||||
"""Unit tests for ``runner.helpers.accuracy_evaluator`` (FT-P-01 / AZ-409).
|
||||
|
||||
Covers AC-1 (per-image evaluation), AC-2 (50 m pass-count threshold ≥48),
|
||||
AC-3 (20 m pass-count threshold ≥30), AC-4 (timeout discipline) and the
|
||||
CSV evidence shape.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers.accuracy_evaluator import (
|
||||
PASS_COUNT_20M_REQUIRED,
|
||||
PASS_COUNT_50M_REQUIRED,
|
||||
TOTAL_IMAGES_REQUIRED,
|
||||
AggregateReport,
|
||||
EstimateInput,
|
||||
GtCoordinate,
|
||||
PerImageResult,
|
||||
compute_per_image,
|
||||
evaluate,
|
||||
load_gt_coordinates,
|
||||
write_csv_evidence,
|
||||
)
|
||||
from runner.helpers.geo import distance_m, offset
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
GT_CSV = REPO_ROOT / "_docs" / "00_problem" / "input_data" / "coordinates.csv"
|
||||
|
||||
|
||||
def test_load_gt_coordinates_parses_repo_csv() -> None:
|
||||
"""The shipped ``coordinates.csv`` must parse cleanly into 60 rows."""
|
||||
# Act
|
||||
rows = load_gt_coordinates(GT_CSV)
|
||||
|
||||
# Assert
|
||||
assert len(rows) == TOTAL_IMAGES_REQUIRED
|
||||
assert rows[0].image_id == "AD000001.jpg"
|
||||
assert rows[0].lat_deg == pytest.approx(48.275292, abs=1e-6)
|
||||
assert rows[0].lon_deg == pytest.approx(37.385220, abs=1e-6)
|
||||
assert rows[-1].image_id == "AD000060.jpg"
|
||||
|
||||
|
||||
def test_load_gt_coordinates_rejects_missing_file(tmp_path: Path) -> None:
|
||||
"""Explicit FileNotFoundError, not a silent empty list."""
|
||||
# Act / Assert
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_gt_coordinates(tmp_path / "missing.csv")
|
||||
|
||||
|
||||
def test_load_gt_coordinates_rejects_wrong_header(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
bad = tmp_path / "bad.csv"
|
||||
bad.write_text("img_name,latitude,longitude\nx,1,2\n")
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="header mismatch"):
|
||||
load_gt_coordinates(bad)
|
||||
|
||||
|
||||
def test_compute_per_image_zero_error_for_exact_match() -> None:
|
||||
"""Exact GT → estimate match yields error_m ≈ 0 and both pass flags True."""
|
||||
# Arrange
|
||||
gt = GtCoordinate("AD000001.jpg", 48.275292, 37.385220)
|
||||
est = EstimateInput("AD000001.jpg", 48.275292, 37.385220)
|
||||
|
||||
# Act
|
||||
result = compute_per_image(gt, est)
|
||||
|
||||
# Assert
|
||||
assert result.error_m == pytest.approx(0.0, abs=1e-6)
|
||||
assert result.pass_50m is True
|
||||
assert result.pass_20m is True
|
||||
|
||||
|
||||
def test_compute_per_image_15m_north_passes_both() -> None:
|
||||
"""15 m north of GT — below both 50 m and 20 m budgets."""
|
||||
# Arrange
|
||||
gt = GtCoordinate("AD000001.jpg", 48.275292, 37.385220)
|
||||
new_lat, new_lon = offset(gt.lat_deg, gt.lon_deg, bearing_deg=0.0, distance_m=15.0)
|
||||
est = EstimateInput("AD000001.jpg", new_lat, new_lon)
|
||||
|
||||
# Act
|
||||
result = compute_per_image(gt, est)
|
||||
|
||||
# Assert
|
||||
assert result.error_m == pytest.approx(15.0, abs=0.5)
|
||||
assert result.pass_50m is True
|
||||
assert result.pass_20m is True
|
||||
|
||||
|
||||
def test_compute_per_image_35m_east_passes_50_only() -> None:
|
||||
"""35 m east of GT — passes 50 m budget, fails 20 m budget."""
|
||||
# Arrange
|
||||
gt = GtCoordinate("AD000001.jpg", 48.275292, 37.385220)
|
||||
new_lat, new_lon = offset(gt.lat_deg, gt.lon_deg, bearing_deg=90.0, distance_m=35.0)
|
||||
est = EstimateInput("AD000001.jpg", new_lat, new_lon)
|
||||
|
||||
# Act
|
||||
result = compute_per_image(gt, est)
|
||||
|
||||
# Assert
|
||||
assert result.error_m == pytest.approx(35.0, abs=0.5)
|
||||
assert result.pass_50m is True
|
||||
assert result.pass_20m is False
|
||||
|
||||
|
||||
def test_compute_per_image_120m_south_fails_both() -> None:
|
||||
"""120 m south of GT — fails both budgets."""
|
||||
# Arrange
|
||||
gt = GtCoordinate("AD000001.jpg", 48.275292, 37.385220)
|
||||
new_lat, new_lon = offset(gt.lat_deg, gt.lon_deg, bearing_deg=180.0, distance_m=120.0)
|
||||
est = EstimateInput("AD000001.jpg", new_lat, new_lon)
|
||||
|
||||
# Act
|
||||
result = compute_per_image(gt, est)
|
||||
|
||||
# Assert
|
||||
assert result.error_m == pytest.approx(120.0, abs=0.5)
|
||||
assert result.pass_50m is False
|
||||
assert result.pass_20m is False
|
||||
|
||||
|
||||
def test_compute_per_image_timeout_sets_inf_and_false_flags() -> None:
|
||||
"""AC-4: inf estimate → error_m = inf, both flags False; no crash."""
|
||||
# Arrange
|
||||
gt = GtCoordinate("AD000001.jpg", 48.275292, 37.385220)
|
||||
est = EstimateInput("AD000001.jpg", math.inf, math.inf)
|
||||
|
||||
# Act
|
||||
result = compute_per_image(gt, est)
|
||||
|
||||
# Assert
|
||||
assert math.isinf(result.error_m)
|
||||
assert result.pass_50m is False
|
||||
assert result.pass_20m is False
|
||||
|
||||
|
||||
def test_compute_per_image_rejects_image_id_mismatch() -> None:
|
||||
"""compute_per_image refuses to silently join across image_ids."""
|
||||
# Arrange
|
||||
gt = GtCoordinate("AD000001.jpg", 48.0, 37.0)
|
||||
est = EstimateInput("AD000002.jpg", 48.0, 37.0)
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="image_id mismatch"):
|
||||
compute_per_image(gt, est)
|
||||
|
||||
|
||||
def _make_gt_with_offsets(offsets_m: list[float]) -> tuple[list[GtCoordinate], list[EstimateInput]]:
|
||||
"""Build GT + estimates: each estimate is `offsets_m[i]` meters north of GT."""
|
||||
base_lat, base_lon = 48.275, 37.385
|
||||
gt_rows: list[GtCoordinate] = []
|
||||
estimates: list[EstimateInput] = []
|
||||
for i, off in enumerate(offsets_m, start=1):
|
||||
image_id = f"AD{i:06d}.jpg"
|
||||
gt_lat = base_lat + i * 1e-4
|
||||
gt_lon = base_lon
|
||||
gt_rows.append(GtCoordinate(image_id, gt_lat, gt_lon))
|
||||
est_lat, est_lon = offset(gt_lat, gt_lon, bearing_deg=0.0, distance_m=off)
|
||||
estimates.append(EstimateInput(image_id, est_lat, est_lon))
|
||||
return gt_rows, estimates
|
||||
|
||||
|
||||
def test_evaluate_all_pass_yields_overall_pass() -> None:
|
||||
"""60 images all <20 m: AC-2 + AC-3 both pass."""
|
||||
# Arrange
|
||||
offsets = [5.0] * TOTAL_IMAGES_REQUIRED
|
||||
gt_rows, estimates = _make_gt_with_offsets(offsets)
|
||||
|
||||
# Act
|
||||
results, aggregate = evaluate(gt_rows, estimates)
|
||||
|
||||
# Assert
|
||||
assert len(results) == TOTAL_IMAGES_REQUIRED
|
||||
assert aggregate.pass_count_50m == 60
|
||||
assert aggregate.pass_count_20m == 60
|
||||
assert aggregate.timeout_count == 0
|
||||
assert aggregate.overall_pass is True
|
||||
|
||||
|
||||
def test_evaluate_boundary_threshold_holds() -> None:
|
||||
"""Exactly 48 within 50 m + 30 within 20 m → overall_pass = True."""
|
||||
# Arrange — 30 images at 10m (pass both), 18 images at 35m (pass 50 only),
|
||||
# 12 images at 120m (fail both).
|
||||
offsets = [10.0] * 30 + [35.0] * 18 + [120.0] * 12
|
||||
gt_rows, estimates = _make_gt_with_offsets(offsets)
|
||||
|
||||
# Act
|
||||
_, aggregate = evaluate(gt_rows, estimates)
|
||||
|
||||
# Assert
|
||||
assert aggregate.pass_count_50m == 48
|
||||
assert aggregate.pass_count_20m == 30
|
||||
assert aggregate.pass_ac2 is True
|
||||
assert aggregate.pass_ac3 is True
|
||||
assert aggregate.overall_pass is True
|
||||
|
||||
|
||||
def test_evaluate_below_50m_threshold_fails_overall() -> None:
|
||||
"""47/60 within 50 m → AC-2 fails → overall_pass False."""
|
||||
# Arrange — 30 at 10m, 17 at 35m (47 within 50m), 13 at 120m.
|
||||
offsets = [10.0] * 30 + [35.0] * 17 + [120.0] * 13
|
||||
gt_rows, estimates = _make_gt_with_offsets(offsets)
|
||||
|
||||
# Act
|
||||
_, aggregate = evaluate(gt_rows, estimates)
|
||||
|
||||
# Assert
|
||||
assert aggregate.pass_count_50m == 47
|
||||
assert aggregate.pass_ac2 is False
|
||||
assert aggregate.overall_pass is False
|
||||
|
||||
|
||||
def test_evaluate_below_20m_threshold_fails_overall() -> None:
|
||||
"""All 60 within 50 m but only 29 within 20 m → AC-3 fails."""
|
||||
# Arrange
|
||||
offsets = [10.0] * 29 + [35.0] * 31
|
||||
gt_rows, estimates = _make_gt_with_offsets(offsets)
|
||||
|
||||
# Act
|
||||
_, aggregate = evaluate(gt_rows, estimates)
|
||||
|
||||
# Assert
|
||||
assert aggregate.pass_count_50m == 60
|
||||
assert aggregate.pass_count_20m == 29
|
||||
assert aggregate.pass_ac3 is False
|
||||
assert aggregate.overall_pass is False
|
||||
|
||||
|
||||
def test_evaluate_missing_estimate_recorded_as_timeout() -> None:
|
||||
"""GT row without estimate → timeout (inf, both False) and aggregate counts it."""
|
||||
# Arrange
|
||||
offsets = [5.0] * TOTAL_IMAGES_REQUIRED
|
||||
gt_rows, estimates = _make_gt_with_offsets(offsets)
|
||||
# Drop the 7th estimate to simulate a SITL timeout for AD000007.jpg.
|
||||
dropped_index = 6
|
||||
estimates_with_gap = [e for i, e in enumerate(estimates) if i != dropped_index]
|
||||
|
||||
# Act
|
||||
results, aggregate = evaluate(gt_rows, estimates_with_gap)
|
||||
|
||||
# Assert
|
||||
assert len(results) == TOTAL_IMAGES_REQUIRED
|
||||
assert aggregate.timeout_count == 1
|
||||
assert results[dropped_index].image_id == "AD000007.jpg"
|
||||
assert math.isinf(results[dropped_index].error_m)
|
||||
assert results[dropped_index].pass_50m is False
|
||||
|
||||
|
||||
def test_evaluate_rejects_duplicate_estimate_image_id() -> None:
|
||||
"""Two estimates for the same image_id → ValueError (programming error)."""
|
||||
# Arrange
|
||||
offsets = [5.0] * 2
|
||||
gt_rows, estimates = _make_gt_with_offsets(offsets)
|
||||
duplicate = EstimateInput(estimates[0].image_id, estimates[0].est_lat_deg, estimates[0].est_lon_deg)
|
||||
estimates.append(duplicate)
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="duplicate estimate image_ids"):
|
||||
evaluate(gt_rows, estimates)
|
||||
|
||||
|
||||
def test_evaluate_rejects_stranger_estimate_image_id() -> None:
|
||||
"""Estimate for an image not in GT → ValueError (programming error)."""
|
||||
# Arrange
|
||||
offsets = [5.0] * 2
|
||||
gt_rows, estimates = _make_gt_with_offsets(offsets)
|
||||
estimates.append(EstimateInput("AD999999.jpg", 48.0, 37.0))
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="not in GT"):
|
||||
evaluate(gt_rows, estimates)
|
||||
|
||||
|
||||
def test_evaluate_full_timeout_run_produces_zero_pass_counts() -> None:
|
||||
"""All 60 timed out → pass counts 0, overall_pass False."""
|
||||
# Arrange
|
||||
gt_rows = [GtCoordinate(f"AD{i:06d}.jpg", 48.275 + i * 1e-4, 37.385) for i in range(1, 61)]
|
||||
estimates: list[EstimateInput] = []
|
||||
|
||||
# Act
|
||||
results, aggregate = evaluate(gt_rows, estimates)
|
||||
|
||||
# Assert
|
||||
assert aggregate.timeout_count == 60
|
||||
assert aggregate.pass_count_50m == 0
|
||||
assert aggregate.pass_count_20m == 0
|
||||
assert aggregate.overall_pass is False
|
||||
assert all(math.isinf(r.error_m) for r in results)
|
||||
|
||||
|
||||
def test_aggregate_report_thresholds_match_results_report() -> None:
|
||||
"""The thresholds in code must match results_report.md (48 / 30 / 60)."""
|
||||
# Assert
|
||||
assert PASS_COUNT_50M_REQUIRED == 48
|
||||
assert PASS_COUNT_20M_REQUIRED == 30
|
||||
assert TOTAL_IMAGES_REQUIRED == 60
|
||||
|
||||
|
||||
def test_write_csv_evidence_round_trip(tmp_path: Path) -> None:
|
||||
"""CSV row count + header + numeric round-trip on the evidence file."""
|
||||
# Arrange
|
||||
offsets = [5.0, 35.0, 120.0]
|
||||
gt_rows, estimates = _make_gt_with_offsets(offsets)
|
||||
results, _ = evaluate(gt_rows, estimates)
|
||||
out_path = tmp_path / "ft-p-01.csv"
|
||||
|
||||
# Act
|
||||
written = write_csv_evidence(out_path, results)
|
||||
|
||||
# Assert
|
||||
assert written == out_path
|
||||
rows = list(csv.reader(out_path.open()))
|
||||
assert rows[0] == [
|
||||
"image_id",
|
||||
"gt_lat",
|
||||
"gt_lon",
|
||||
"est_lat",
|
||||
"est_lon",
|
||||
"error_m",
|
||||
"pass_50m",
|
||||
"pass_20m",
|
||||
]
|
||||
assert len(rows) == 1 + len(offsets)
|
||||
# AD000003 had a 120 m offset → pass_50m=false, pass_20m=false
|
||||
far_row = rows[3]
|
||||
assert far_row[0] == "AD000003.jpg"
|
||||
assert far_row[6] == "false"
|
||||
assert far_row[7] == "false"
|
||||
|
||||
|
||||
def test_write_csv_evidence_serializes_timeout_as_inf(tmp_path: Path) -> None:
|
||||
"""Timeout rows are written with the literal 'inf' for est_lat/est_lon/error_m."""
|
||||
# Arrange
|
||||
gt = GtCoordinate("AD000001.jpg", 48.275, 37.385)
|
||||
timeout = PerImageResult(
|
||||
image_id="AD000001.jpg",
|
||||
gt_lat=gt.lat_deg,
|
||||
gt_lon=gt.lon_deg,
|
||||
est_lat=math.inf,
|
||||
est_lon=math.inf,
|
||||
error_m=math.inf,
|
||||
pass_50m=False,
|
||||
pass_20m=False,
|
||||
)
|
||||
out_path = tmp_path / "ft-p-01.csv"
|
||||
|
||||
# Act
|
||||
write_csv_evidence(out_path, [timeout])
|
||||
|
||||
# Assert
|
||||
rows = list(csv.reader(out_path.open()))
|
||||
assert rows[1][3] == "inf"
|
||||
assert rows[1][4] == "inf"
|
||||
assert rows[1][5] == "inf"
|
||||
@@ -0,0 +1,320 @@
|
||||
"""Unit tests for ``runner.helpers.mre_evaluator`` (FT-P-05 + FT-P-06 / AZ-413).
|
||||
|
||||
Covers AC-2 of FT-P-05 (every cross-domain MRE < 2.5 px), AC-3 of FT-P-05
|
||||
(accuracy alongside MRE — delegated to ``accuracy_evaluator``), and AC-4
|
||||
of FT-P-06 (95th-percentile MRE budgets per domain).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from runner.helpers.mre_evaluator import (
|
||||
MRE_P95_CROSS_DOMAIN_BUDGET_PX,
|
||||
MRE_P95_FRAME_TO_FRAME_BUDGET_PX,
|
||||
MRE_PER_IMAGE_BUDGET_PX,
|
||||
CombinedP95Report,
|
||||
CrossDomainRecord,
|
||||
FrameToFrameRecord,
|
||||
PerImageBudgetReport,
|
||||
P95Report,
|
||||
evaluate_combined_p95,
|
||||
evaluate_p95,
|
||||
evaluate_per_image_budget,
|
||||
load_cross_domain_csv,
|
||||
load_frame_to_frame_csv,
|
||||
summarize_mre_distribution,
|
||||
write_cross_domain_csv,
|
||||
)
|
||||
|
||||
|
||||
def test_constants_match_spec() -> None:
|
||||
"""The three budgets must match the AC text."""
|
||||
# Assert
|
||||
assert MRE_PER_IMAGE_BUDGET_PX == 2.5
|
||||
assert MRE_P95_FRAME_TO_FRAME_BUDGET_PX == 1.0
|
||||
assert MRE_P95_CROSS_DOMAIN_BUDGET_PX == 2.5
|
||||
|
||||
|
||||
def test_evaluate_per_image_budget_all_pass() -> None:
|
||||
"""All MREs under 2.5 → AC-2 passes."""
|
||||
# Arrange
|
||||
records = [CrossDomainRecord(f"AD{i:06d}.jpg", mre_px=1.5, error_m=10.0) for i in range(60)]
|
||||
|
||||
# Act
|
||||
report = evaluate_per_image_budget(records)
|
||||
|
||||
# Assert
|
||||
assert report.total_images == 60
|
||||
assert report.pass_count == 60
|
||||
assert report.fail_image_ids == ()
|
||||
assert report.max_mre_px == 1.5
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_evaluate_per_image_budget_single_fail_fails_overall() -> None:
|
||||
"""One MRE at the boundary → fails (strict < 2.5)."""
|
||||
# Arrange — 59 pass, 1 at exactly 2.5
|
||||
records = [CrossDomainRecord(f"AD{i:06d}.jpg", mre_px=1.0, error_m=5.0) for i in range(59)]
|
||||
records.append(CrossDomainRecord("AD000060.jpg", mre_px=2.5, error_m=5.0))
|
||||
|
||||
# Act
|
||||
report = evaluate_per_image_budget(records)
|
||||
|
||||
# Assert
|
||||
assert report.pass_count == 59
|
||||
assert report.fail_image_ids == ("AD000060.jpg",)
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_evaluate_per_image_budget_above_boundary_fails() -> None:
|
||||
"""An MRE strictly above 2.5 fails."""
|
||||
# Arrange
|
||||
records = [
|
||||
CrossDomainRecord("a", mre_px=1.0, error_m=5.0),
|
||||
CrossDomainRecord("b", mre_px=3.0, error_m=15.0),
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_per_image_budget(records)
|
||||
|
||||
# Assert
|
||||
assert report.fail_image_ids == ("b",)
|
||||
assert report.passes is False
|
||||
assert report.max_mre_px == 3.0
|
||||
|
||||
|
||||
def test_evaluate_per_image_budget_empty_list_does_not_pass() -> None:
|
||||
"""Zero records → does NOT pass (no positive evidence of compliance)."""
|
||||
# Act
|
||||
report = evaluate_per_image_budget([])
|
||||
|
||||
# Assert
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_evaluate_per_image_budget_rejects_zero_budget() -> None:
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="budget_px must be > 0"):
|
||||
evaluate_per_image_budget([], budget_px=0.0)
|
||||
|
||||
|
||||
def test_evaluate_p95_uses_numpy_linear_interpolation() -> None:
|
||||
"""Spec mandates numpy's default percentile algorithm; verify match."""
|
||||
# Arrange — 20 samples uniformly from 0.1 to 2.0.
|
||||
samples = [round(0.1 * i, 2) for i in range(1, 21)]
|
||||
expected_p95 = float(np.percentile(np.asarray(samples, dtype=float), 95))
|
||||
|
||||
# Act
|
||||
report = evaluate_p95(samples, budget_px=2.5)
|
||||
|
||||
# Assert
|
||||
assert report.sample_count == 20
|
||||
assert report.p95_px == pytest.approx(expected_p95)
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_evaluate_p95_passes_when_below_budget() -> None:
|
||||
"""p95 < 1.0 → passes for the frame-to-frame budget."""
|
||||
# Arrange — 100 samples mostly below 1.0
|
||||
samples = [0.5] * 95 + [0.9] * 5 # p95 = 0.5 (linear interp)
|
||||
|
||||
# Act
|
||||
report = evaluate_p95(samples, budget_px=MRE_P95_FRAME_TO_FRAME_BUDGET_PX)
|
||||
|
||||
# Assert
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_evaluate_p95_fails_when_above_budget() -> None:
|
||||
"""p95 ≥ 1.0 → fails."""
|
||||
# Arrange
|
||||
samples = [0.5] * 90 + [1.5] * 10 # p95 ≈ 1.5
|
||||
|
||||
# Act
|
||||
report = evaluate_p95(samples, budget_px=MRE_P95_FRAME_TO_FRAME_BUDGET_PX)
|
||||
|
||||
# Assert
|
||||
assert report.passes is False
|
||||
assert report.p95_px == pytest.approx(1.5, abs=1e-6)
|
||||
|
||||
|
||||
def test_evaluate_p95_empty_input_does_not_pass() -> None:
|
||||
"""Zero samples → NaN p95, does not pass."""
|
||||
# Act
|
||||
report = evaluate_p95([], budget_px=2.5)
|
||||
|
||||
# Assert
|
||||
assert report.sample_count == 0
|
||||
assert math.isnan(report.p95_px)
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_evaluate_p95_rejects_zero_budget() -> None:
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="budget_px must be > 0"):
|
||||
evaluate_p95([1.0], budget_px=0.0)
|
||||
|
||||
|
||||
def test_evaluate_combined_p95_both_pass() -> None:
|
||||
"""Both domains below their budgets → combined report passes."""
|
||||
# Arrange
|
||||
f2f = [FrameToFrameRecord(frame_index=i, mre_px=0.4) for i in range(100)]
|
||||
xd = [CrossDomainRecord(f"AD{i:06d}.jpg", mre_px=1.0, error_m=5.0) for i in range(60)]
|
||||
|
||||
# Act
|
||||
report = evaluate_combined_p95(f2f, xd)
|
||||
|
||||
# Assert
|
||||
assert report.frame_to_frame.passes is True
|
||||
assert report.cross_domain.passes is True
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_evaluate_combined_p95_fails_when_frame_to_frame_fails() -> None:
|
||||
"""f2f p95 ≥ 1.0 → combined fails even if cross-domain passes."""
|
||||
# Arrange — f2f p95 ≈ 1.5, cross-domain p95 ≈ 1.0
|
||||
f2f = [FrameToFrameRecord(frame_index=i, mre_px=0.5) for i in range(90)] + [
|
||||
FrameToFrameRecord(frame_index=i, mre_px=1.5) for i in range(90, 100)
|
||||
]
|
||||
xd = [CrossDomainRecord(f"a{i}", mre_px=1.0, error_m=5.0) for i in range(60)]
|
||||
|
||||
# Act
|
||||
report = evaluate_combined_p95(f2f, xd)
|
||||
|
||||
# Assert
|
||||
assert report.frame_to_frame.passes is False
|
||||
assert report.cross_domain.passes is True
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_evaluate_combined_p95_fails_when_cross_domain_fails() -> None:
|
||||
"""cross-domain p95 ≥ 2.5 → combined fails even if f2f passes."""
|
||||
# Arrange
|
||||
f2f = [FrameToFrameRecord(frame_index=i, mre_px=0.5) for i in range(100)]
|
||||
xd = [CrossDomainRecord(f"a{i}", mre_px=1.0, error_m=5.0) for i in range(54)] + [
|
||||
CrossDomainRecord(f"b{i}", mre_px=3.0, error_m=5.0) for i in range(6)
|
||||
]
|
||||
|
||||
# Act
|
||||
report = evaluate_combined_p95(f2f, xd)
|
||||
|
||||
# Assert
|
||||
assert report.cross_domain.passes is False
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_write_cross_domain_csv_round_trip(tmp_path: Path) -> None:
|
||||
"""write + read returns the same records."""
|
||||
# Arrange
|
||||
records = [
|
||||
CrossDomainRecord("AD000001.jpg", mre_px=1.234, error_m=12.345),
|
||||
CrossDomainRecord("AD000002.jpg", mre_px=2.6, error_m=200.0),
|
||||
]
|
||||
out = tmp_path / "ft-p-05.csv"
|
||||
|
||||
# Act
|
||||
write_cross_domain_csv(out, records)
|
||||
loaded = load_cross_domain_csv(out)
|
||||
|
||||
# Assert
|
||||
assert len(loaded) == 2
|
||||
assert loaded[0].image_id == "AD000001.jpg"
|
||||
assert loaded[0].mre_px == pytest.approx(1.234, abs=1e-3)
|
||||
assert loaded[1].mre_px == pytest.approx(2.6, abs=1e-3)
|
||||
|
||||
|
||||
def test_write_cross_domain_csv_emits_pass_mre_column(tmp_path: Path) -> None:
|
||||
"""Each row's pass_mre cell reflects the < 2.5 strict comparison."""
|
||||
# Arrange
|
||||
records = [
|
||||
CrossDomainRecord("a", mre_px=1.0, error_m=5.0),
|
||||
CrossDomainRecord("b", mre_px=2.5, error_m=5.0),
|
||||
CrossDomainRecord("c", mre_px=2.499, error_m=5.0),
|
||||
]
|
||||
out = tmp_path / "ft-p-05.csv"
|
||||
|
||||
# Act
|
||||
write_cross_domain_csv(out, records)
|
||||
rows = list(csv.reader(out.open()))
|
||||
|
||||
# Assert
|
||||
assert rows[1][7] == "true" # a (1.0 px)
|
||||
assert rows[2][7] == "false" # b (2.5 px — strict <)
|
||||
assert rows[3][7] == "true" # c (2.499 px)
|
||||
|
||||
|
||||
def test_load_cross_domain_csv_rejects_missing_file(tmp_path: Path) -> None:
|
||||
# Act / Assert
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_cross_domain_csv(tmp_path / "missing.csv")
|
||||
|
||||
|
||||
def test_load_cross_domain_csv_rejects_missing_columns(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
bad = tmp_path / "bad.csv"
|
||||
bad.write_text("image_id,mre_px\nx,1.0\n")
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="missing columns"):
|
||||
load_cross_domain_csv(bad)
|
||||
|
||||
|
||||
def test_load_frame_to_frame_csv_rejects_missing_mre_column(tmp_path: Path) -> None:
|
||||
"""If FT-P-04 evidence lacks mre_px, FT-P-06 must fail loudly."""
|
||||
# Arrange
|
||||
bad = tmp_path / "ft-p-04.csv"
|
||||
bad.write_text(
|
||||
"frame_index,imu_row_index,bank_deg,pitch_deg,translation_m,overlap_fraction,is_normal,excluded_reason,registration_success\n"
|
||||
"0,0,0.0,0.0,0.0,1.0,true,,true\n"
|
||||
)
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="mre_px"):
|
||||
load_frame_to_frame_csv(bad)
|
||||
|
||||
|
||||
def test_load_frame_to_frame_csv_round_trip(tmp_path: Path) -> None:
|
||||
"""When mre_px is present, records parse correctly."""
|
||||
# Arrange
|
||||
good = tmp_path / "ft-p-04.csv"
|
||||
good.write_text(
|
||||
"frame_index,mre_px\n0,0.5\n1,0.7\n2,\n3,1.1\n"
|
||||
)
|
||||
|
||||
# Act
|
||||
records = load_frame_to_frame_csv(good)
|
||||
|
||||
# Assert — blank mre_px rows are skipped.
|
||||
assert [r.frame_index for r in records] == [0, 1, 3]
|
||||
assert records[0].mre_px == 0.5
|
||||
|
||||
|
||||
def test_summarize_mre_distribution_basic_stats() -> None:
|
||||
"""median / p95 / max / count for a tiny sample."""
|
||||
# Arrange
|
||||
records = [FrameToFrameRecord(frame_index=i, mre_px=float(i)) for i in range(10)]
|
||||
|
||||
# Act
|
||||
summary = summarize_mre_distribution(records)
|
||||
|
||||
# Assert
|
||||
assert summary["count"] == 10
|
||||
assert summary["median"] == pytest.approx(4.5)
|
||||
assert summary["max"] == 9.0
|
||||
assert summary["p95"] == pytest.approx(np.percentile(np.arange(10, dtype=float), 95))
|
||||
|
||||
|
||||
def test_summarize_mre_distribution_empty_returns_nan() -> None:
|
||||
# Act
|
||||
summary = summarize_mre_distribution([])
|
||||
|
||||
# Assert
|
||||
assert summary["count"] == 0
|
||||
assert math.isnan(summary["median"])
|
||||
assert math.isnan(summary["p95"])
|
||||
@@ -0,0 +1,411 @@
|
||||
"""Unit tests for ``runner.helpers.registration_classifier`` (FT-P-04 / AZ-412).
|
||||
|
||||
Covers AC-1 (normal-segment classification reproducibility), AC-2
|
||||
(success ratio ≥0.95), AC-3 (sharp-turn exclusion from denominator),
|
||||
and the CSV evidence shape.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers.registration_classifier import (
|
||||
ATTITUDE_LIMIT_DEG,
|
||||
DEFAULT_GROUND_FOOTPRINT_M,
|
||||
IMU_HZ,
|
||||
SUCCESS_RATIO_REQUIRED,
|
||||
TARGET_OVERLAP_FRACTION,
|
||||
VIDEO_FPS,
|
||||
VIDEO_FRAMES_PER_IMU_ROW,
|
||||
FrameAttitude,
|
||||
FrameClassification,
|
||||
ImuTelemetryRow,
|
||||
SuccessReport,
|
||||
classify_frames,
|
||||
compute_attitude,
|
||||
compute_overlap_fraction,
|
||||
compute_success_ratio,
|
||||
compute_translation_m,
|
||||
load_imu_telemetry,
|
||||
write_csv_evidence,
|
||||
)
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
DERKACHI_IMU_CSV = REPO_ROOT / "_docs" / "00_problem" / "input_data" / "flight_derkachi" / "data_imu.csv"
|
||||
|
||||
|
||||
def _level_row(time_s: float = 0.0) -> ImuTelemetryRow:
|
||||
"""A cruise/level row: gravity is z=-1000mg, cruise velocity 10 m/s east."""
|
||||
return ImuTelemetryRow(
|
||||
timestamp_ms=time_s * 1000.0,
|
||||
time_s=time_s,
|
||||
xacc=0,
|
||||
yacc=0,
|
||||
zacc=-1000,
|
||||
vx_cms=1000.0,
|
||||
vy_cms=0.0,
|
||||
vz_cms=0.0,
|
||||
)
|
||||
|
||||
|
||||
def _rolled_row(time_s: float, roll_deg: float) -> ImuTelemetryRow:
|
||||
"""A row with the given roll about +x; uses the accel decomposition."""
|
||||
rad = math.radians(roll_deg)
|
||||
return ImuTelemetryRow(
|
||||
timestamp_ms=time_s * 1000.0,
|
||||
time_s=time_s,
|
||||
xacc=0,
|
||||
yacc=int(round(-1000.0 * math.sin(rad))),
|
||||
zacc=int(round(-1000.0 * math.cos(rad))),
|
||||
vx_cms=1000.0,
|
||||
vy_cms=0.0,
|
||||
vz_cms=0.0,
|
||||
)
|
||||
|
||||
|
||||
def _pitched_row(time_s: float, pitch_deg: float) -> ImuTelemetryRow:
|
||||
"""A row pitched nose-down by ``pitch_deg``; ``+pitch_deg`` = nose down."""
|
||||
rad = math.radians(pitch_deg)
|
||||
return ImuTelemetryRow(
|
||||
timestamp_ms=time_s * 1000.0,
|
||||
time_s=time_s,
|
||||
xacc=int(round(-1000.0 * math.sin(rad))),
|
||||
yacc=0,
|
||||
zacc=int(round(-1000.0 * math.cos(rad))),
|
||||
vx_cms=1000.0,
|
||||
vy_cms=0.0,
|
||||
vz_cms=0.0,
|
||||
)
|
||||
|
||||
|
||||
def test_load_imu_telemetry_parses_repo_csv() -> None:
|
||||
"""The shipped ``data_imu.csv`` parses cleanly into ≈4900 rows."""
|
||||
# Act
|
||||
rows = load_imu_telemetry(DERKACHI_IMU_CSV)
|
||||
|
||||
# Assert — results_report.md says "4,900 nonblank rows".
|
||||
assert len(rows) == 4900
|
||||
assert rows[0].time_s == pytest.approx(0.0, abs=1e-9)
|
||||
# The first row's accel components match the file header we inspected.
|
||||
assert rows[0].xacc == 21
|
||||
assert rows[0].yacc == -3
|
||||
assert rows[0].zacc == -984
|
||||
|
||||
|
||||
def test_load_imu_telemetry_rejects_missing_file(tmp_path: Path) -> None:
|
||||
# Act / Assert
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_imu_telemetry(tmp_path / "missing.csv")
|
||||
|
||||
|
||||
def test_load_imu_telemetry_rejects_missing_columns(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
bad = tmp_path / "bad.csv"
|
||||
bad.write_text("timestamp(ms),Time\n100,0.1\n")
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="missing columns"):
|
||||
load_imu_telemetry(bad)
|
||||
|
||||
|
||||
def test_compute_attitude_level_row_within_one_degree() -> None:
|
||||
"""Repo's first row (≈level cruise) → bank + pitch both within ±1°."""
|
||||
# Act
|
||||
attitude = compute_attitude(_level_row())
|
||||
|
||||
# Assert
|
||||
assert abs(attitude.bank_deg) < 1.0
|
||||
assert abs(attitude.pitch_deg) < 1.0
|
||||
|
||||
|
||||
def test_compute_attitude_right_roll_30_deg_round_trip() -> None:
|
||||
"""A row constructed with 30° right roll → bank ≈ +30°."""
|
||||
# Act
|
||||
attitude = compute_attitude(_rolled_row(time_s=0.1, roll_deg=30.0))
|
||||
|
||||
# Assert
|
||||
assert attitude.bank_deg == pytest.approx(30.0, abs=0.5)
|
||||
assert abs(attitude.pitch_deg) < 0.5
|
||||
|
||||
|
||||
def test_compute_attitude_left_roll_30_deg_round_trip() -> None:
|
||||
"""30° left roll → bank ≈ -30°."""
|
||||
# Act
|
||||
attitude = compute_attitude(_rolled_row(time_s=0.1, roll_deg=-30.0))
|
||||
|
||||
# Assert
|
||||
assert attitude.bank_deg == pytest.approx(-30.0, abs=0.5)
|
||||
|
||||
|
||||
def test_compute_attitude_pitch_down_15_deg_round_trip() -> None:
|
||||
"""Pitched nose-down 15° → pitch ≈ +15°."""
|
||||
# Act
|
||||
attitude = compute_attitude(_pitched_row(time_s=0.1, pitch_deg=15.0))
|
||||
|
||||
# Assert
|
||||
assert attitude.pitch_deg == pytest.approx(15.0, abs=0.5)
|
||||
|
||||
|
||||
def test_compute_translation_m_uses_per_frame_dt() -> None:
|
||||
"""Translation = horizontal_speed * (1/30s) per video frame."""
|
||||
# Arrange — 10 m/s east cruise.
|
||||
row = ImuTelemetryRow(0.0, 0.0, 0, 0, -1000, vx_cms=1000.0, vy_cms=0.0, vz_cms=0.0)
|
||||
|
||||
# Act
|
||||
translation = compute_translation_m(row, prev_row=None)
|
||||
|
||||
# Assert — 10 m/s × (1/30 s) ≈ 0.333 m
|
||||
assert translation == pytest.approx(10.0 / 30.0, rel=1e-6)
|
||||
|
||||
|
||||
def test_compute_overlap_fraction_full_overlap_when_translation_zero() -> None:
|
||||
# Act
|
||||
overlap = compute_overlap_fraction(translation_m=0.0, ground_footprint_m=147.0)
|
||||
|
||||
# Assert
|
||||
assert overlap == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_compute_overlap_fraction_half_overlap_at_half_footprint() -> None:
|
||||
"""Translating by half the footprint → 50% overlap."""
|
||||
# Act
|
||||
overlap = compute_overlap_fraction(translation_m=73.5, ground_footprint_m=147.0)
|
||||
|
||||
# Assert
|
||||
assert overlap == pytest.approx(0.5, abs=1e-6)
|
||||
|
||||
|
||||
def test_compute_overlap_fraction_clamped_at_zero() -> None:
|
||||
"""Translating further than the footprint → 0% (clamped, never negative)."""
|
||||
# Act
|
||||
overlap = compute_overlap_fraction(translation_m=300.0, ground_footprint_m=147.0)
|
||||
|
||||
# Assert
|
||||
assert overlap == 0.0
|
||||
|
||||
|
||||
def test_compute_overlap_fraction_rejects_zero_footprint() -> None:
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="ground_footprint_m must be > 0"):
|
||||
compute_overlap_fraction(translation_m=1.0, ground_footprint_m=0.0)
|
||||
|
||||
|
||||
def test_classify_frames_expands_each_imu_row_to_three_video_frames() -> None:
|
||||
"""VIDEO_FRAMES_PER_IMU_ROW = 3; classify_frames respects it."""
|
||||
# Arrange
|
||||
rows = [_level_row(time_s=0.0), _level_row(time_s=0.1)]
|
||||
|
||||
# Act
|
||||
classifications = classify_frames(rows)
|
||||
|
||||
# Assert
|
||||
assert len(classifications) == 2 * VIDEO_FRAMES_PER_IMU_ROW == 6
|
||||
assert [c.frame_index for c in classifications] == [0, 1, 2, 3, 4, 5]
|
||||
assert [c.imu_row_index for c in classifications] == [0, 0, 0, 1, 1, 1]
|
||||
|
||||
|
||||
def test_classify_frames_marks_level_cruise_as_normal() -> None:
|
||||
"""Level cruise rows (±10° attitude, low translation) are all normal."""
|
||||
# Arrange — 10 rows of level cruise.
|
||||
rows = [_level_row(time_s=0.1 * i) for i in range(10)]
|
||||
|
||||
# Act
|
||||
classifications = classify_frames(rows)
|
||||
|
||||
# Assert
|
||||
assert all(c.is_normal for c in classifications)
|
||||
assert all(c.excluded_reason == "" for c in classifications)
|
||||
|
||||
|
||||
def test_classify_frames_excludes_sharp_roll() -> None:
|
||||
"""A 25° roll row is excluded; the level rows around it stay normal."""
|
||||
# Arrange — 3 level + 1 sharp roll + 3 level
|
||||
rows = (
|
||||
[_level_row(time_s=0.1 * i) for i in range(3)]
|
||||
+ [_rolled_row(time_s=0.3, roll_deg=25.0)]
|
||||
+ [_level_row(time_s=0.1 * i) for i in range(4, 7)]
|
||||
)
|
||||
|
||||
# Act
|
||||
classifications = classify_frames(rows)
|
||||
|
||||
# Assert
|
||||
sharp_frames = [c for c in classifications if c.imu_row_index == 3]
|
||||
other_frames = [c for c in classifications if c.imu_row_index != 3]
|
||||
assert len(sharp_frames) == VIDEO_FRAMES_PER_IMU_ROW
|
||||
assert all(not c.is_normal for c in sharp_frames)
|
||||
assert all(c.excluded_reason == "attitude_exceeds_limit" for c in sharp_frames)
|
||||
assert all(c.is_normal for c in other_frames)
|
||||
|
||||
|
||||
def test_classify_frames_is_reproducible_ac1() -> None:
|
||||
"""AC-1: same input → same classification across two runs."""
|
||||
# Arrange — pull a real chunk of Derkachi telemetry.
|
||||
rows = load_imu_telemetry(DERKACHI_IMU_CSV)[:100]
|
||||
|
||||
# Act
|
||||
a = classify_frames(rows)
|
||||
b = classify_frames(rows)
|
||||
|
||||
# Assert
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_classify_frames_rejects_invalid_overlap_threshold() -> None:
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="min_overlap_fraction"):
|
||||
classify_frames([_level_row()], min_overlap_fraction=1.5)
|
||||
|
||||
|
||||
def test_classify_frames_rejects_invalid_attitude_limit() -> None:
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="attitude_limit_deg"):
|
||||
classify_frames([_level_row()], attitude_limit_deg=0.0)
|
||||
|
||||
|
||||
def test_compute_success_ratio_perfect_run_passes() -> None:
|
||||
"""100 normal frames + 100 success metrics → ratio 1.0; passes."""
|
||||
# Arrange
|
||||
rows = [_level_row(time_s=0.1 * i) for i in range(34)] # 34 × 3 = 102 frames
|
||||
classifications = classify_frames(rows)
|
||||
success_map = {c.frame_index: True for c in classifications}
|
||||
|
||||
# Act
|
||||
report = compute_success_ratio(classifications, success_map)
|
||||
|
||||
# Assert
|
||||
assert report.denominator == len(classifications)
|
||||
assert report.success_count == len(classifications)
|
||||
assert report.ratio == 1.0
|
||||
assert report.passes is True
|
||||
assert report.excluded_count == 0
|
||||
|
||||
|
||||
def test_compute_success_ratio_at_95_pct_passes() -> None:
|
||||
"""Exactly 95% success → AC-2 passes."""
|
||||
# Arrange — 20 normal frames, 1 failure → 19/20 = 0.95.
|
||||
rows = [_level_row(time_s=0.1 * i) for i in range(7)] # 7 × 3 = 21 frames; trim to 20.
|
||||
classifications = classify_frames(rows)[:20]
|
||||
success_map = {c.frame_index: (i != 0) for i, c in enumerate(classifications)}
|
||||
|
||||
# Act
|
||||
report = compute_success_ratio(classifications, success_map)
|
||||
|
||||
# Assert
|
||||
assert report.denominator == 20
|
||||
assert report.success_count == 19
|
||||
assert report.ratio == pytest.approx(0.95)
|
||||
assert report.passes is True
|
||||
|
||||
|
||||
def test_compute_success_ratio_below_95_pct_fails() -> None:
|
||||
"""94% success → AC-2 fails."""
|
||||
# Arrange — 100 normal frames, 6 failures → 94/100 = 0.94.
|
||||
rows = [_level_row(time_s=0.1 * i) for i in range(34)]
|
||||
classifications = classify_frames(rows)[:100]
|
||||
success_map = {c.frame_index: (i >= 6) for i, c in enumerate(classifications)}
|
||||
|
||||
# Act
|
||||
report = compute_success_ratio(classifications, success_map)
|
||||
|
||||
# Assert
|
||||
assert report.denominator == 100
|
||||
assert report.ratio == pytest.approx(0.94)
|
||||
assert report.passes is False
|
||||
|
||||
|
||||
def test_compute_success_ratio_excludes_sharp_turn_from_denominator_ac3() -> None:
|
||||
"""AC-3: sharp-turn frames are NOT counted in the denominator."""
|
||||
# Arrange — 5 normal + 5 sharp + 5 normal IMU rows = 45 frames total.
|
||||
rows = (
|
||||
[_level_row(time_s=0.1 * i) for i in range(5)]
|
||||
+ [_rolled_row(time_s=0.1 * (5 + i), roll_deg=30.0) for i in range(5)]
|
||||
+ [_level_row(time_s=0.1 * (10 + i)) for i in range(5)]
|
||||
)
|
||||
classifications = classify_frames(rows)
|
||||
success_map = {c.frame_index: True for c in classifications}
|
||||
|
||||
# Act
|
||||
report = compute_success_ratio(classifications, success_map)
|
||||
|
||||
# Assert — 30 normal video frames; 15 excluded by attitude.
|
||||
assert report.denominator == 30
|
||||
assert report.excluded_by_attitude == 15
|
||||
assert report.excluded_by_overlap == 0
|
||||
assert report.excluded_by_missing_metric == 0
|
||||
|
||||
|
||||
def test_compute_success_ratio_handles_missing_metric_separately() -> None:
|
||||
"""A normal frame without a success-map entry is excluded as 'missing'."""
|
||||
# Arrange
|
||||
rows = [_level_row(time_s=0.1 * i) for i in range(5)]
|
||||
classifications = classify_frames(rows)
|
||||
# Drop the first three frames from the success map.
|
||||
success_map = {c.frame_index: True for c in classifications[3:]}
|
||||
|
||||
# Act
|
||||
report = compute_success_ratio(classifications, success_map)
|
||||
|
||||
# Assert
|
||||
assert report.excluded_by_missing_metric == 3
|
||||
assert report.denominator == len(classifications) - 3
|
||||
|
||||
|
||||
def test_constants_match_spec() -> None:
|
||||
"""The constants exposed by the module must match the AC text."""
|
||||
# Assert
|
||||
assert ATTITUDE_LIMIT_DEG == 10.0
|
||||
assert TARGET_OVERLAP_FRACTION == 0.40
|
||||
assert SUCCESS_RATIO_REQUIRED == 0.95
|
||||
assert VIDEO_FPS == 30
|
||||
assert IMU_HZ == 10
|
||||
assert VIDEO_FRAMES_PER_IMU_ROW == 3
|
||||
assert DEFAULT_GROUND_FOOTPRINT_M > 0
|
||||
|
||||
|
||||
def test_write_csv_evidence_round_trip(tmp_path: Path) -> None:
|
||||
"""CSV header + per-frame row written exactly as specified."""
|
||||
# Arrange
|
||||
rows = [_level_row(time_s=0.1 * i) for i in range(2)]
|
||||
classifications = classify_frames(rows)
|
||||
success_map = {0: True, 1: False, 2: True, 3: True, 4: True, 5: True}
|
||||
out_path = tmp_path / "ft-p-04.csv"
|
||||
|
||||
# Act
|
||||
write_csv_evidence(out_path, classifications, success_map)
|
||||
|
||||
# Assert
|
||||
written = list(csv.reader(out_path.open()))
|
||||
assert written[0] == [
|
||||
"frame_index",
|
||||
"imu_row_index",
|
||||
"bank_deg",
|
||||
"pitch_deg",
|
||||
"translation_m",
|
||||
"overlap_fraction",
|
||||
"is_normal",
|
||||
"excluded_reason",
|
||||
"registration_success",
|
||||
]
|
||||
assert len(written) == 1 + len(classifications)
|
||||
# frame 1 must have registration_success=false written.
|
||||
assert written[2][8] == "false"
|
||||
|
||||
|
||||
def test_write_csv_evidence_omits_metric_when_missing(tmp_path: Path) -> None:
|
||||
"""Frames without a success-map entry emit an empty registration_success cell."""
|
||||
# Arrange
|
||||
rows = [_level_row(time_s=0.0)]
|
||||
classifications = classify_frames(rows)
|
||||
out_path = tmp_path / "ft-p-04-empty.csv"
|
||||
|
||||
# Act
|
||||
write_csv_evidence(out_path, classifications, {})
|
||||
|
||||
# Assert
|
||||
written = list(csv.reader(out_path.open()))
|
||||
assert all(row[8] == "" for row in written[1:])
|
||||
Reference in New Issue
Block a user