[AZ-408] [AZ-410] [AZ-411] Batch 69: synth injectors + FT-P-02/03/14

AZ-408 (3pt) — Replace AZ-406 injector scaffolds with concrete generators: - outlier.py: deterministic stride + far-away tile replacement; AC-2 ≥350m offset - blackout_spoof.py: paired video blackout + FC GPS spoof with ≤40ms alignment; AC-4 realistic fix_type/hdop; AC-NEW-8 200-500m inter-spoof deltas - multi_segment.py: ≥3 disjoint windows, ≥30s gaps, ≤25% coverage - fc_proxy.py: timed-splice runtime proxy with pre-activate RuntimeError guard - _common.py: derive_rng + tile-manifest reader + tmpfs helpers - injector_fixtures.py: pytest fixtures wired via runner conftest AZ-410 (3pt) — FT-P-02 cumulative drift between satellite anchors: - anchor_pair_detector.py: AC-1 detection, AC-2/3 pass-fraction, AC-4 monotonicity check, CSV evidence - test_ft_p_02_derkachi_drift.py: scenario gated on upstream helper NotImplementedError (frame_source_replay / fdr_reader / imu_replay) AZ-411 (2pt) — FT-P-03 + FT-P-14 schema + WGS84: - estimate_schema.py: AC-1 schema completeness, AC-2 source-label set containment, AC-3 WGS84 range + int32 1e-7 decode - test_ft_p_03_14_schema_wgs84.py: shared single-image-push scenario Tests: 248 unit tests pass (+91 vs batch 68). Reports: batch_69_report.md, batch_69_review.md (PASS), cumulative_review_batches_67-69_cycle1_report.md (PASS). Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-22 01:21:13 +00:00 · 2026-05-16 17:54:00 +03:00
parent ff1b00200c
commit 702a0c0ff3
27 changed files with 4619 additions and 58 deletions
@@ -0,0 +1,312 @@
+"""Unit tests for the AZ-410 anchor-pair detector (FT-P-02 logic).
+
+Validates AC-1 (anchor-pair detection), AC-2 (visual-only drift bound),
+AC-3 (IMU-fused drift bound), and AC-4 (monotonic distribution) using
+synthetic FdrEstimate streams. The full-replay scenario test
+(``test_ft_p_02_derkachi_drift.py``) imports this helper but is skipped
+until the docker harness helpers land — these tests are the AC coverage
+for the logic itself.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from runner.helpers.anchor_pair_detector import (
+    AnchorPair,
+    DEFAULT_AGE_BIN_EDGES_MS,
+    FdrEstimate,
+    aggregate,
+    bin_drifts,
+    check_monotonic,
+    compute_pass_fraction,
+    detect_anchor_pairs,
+    write_csv_evidence,
+)
+
+
+# ---------------------------------------------------------------------------
+# Stream builders
+# ---------------------------------------------------------------------------
+
+
+def _est(
+    t_ms: int,
+    lat: float,
+    lon: float,
+    label: str,
+    imu_fused: bool = False,
+    age_ms: int = 0,
+) -> FdrEstimate:
+    return FdrEstimate(
+        monotonic_ms=t_ms,
+        lat_deg=lat,
+        lon_deg=lon,
+        source_label=label,  # type: ignore[arg-type]
+        imu_fused=imu_fused,
+        last_satellite_anchor_age_ms=age_ms,
+    )
+
+
+# Derkachi-ish base coords.
+_BASE_LAT = 50.075
+_BASE_LON = 36.150
+
+
+# ---------------------------------------------------------------------------
+# AC-1: anchor-pair detection
+# ---------------------------------------------------------------------------
+
+
+def test_first_anchor_is_not_a_pair() -> None:
+    # Arrange — a stream that starts with an anchor must not produce a pair
+    stream = [
+        _est(0, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=0),
+        _est(100, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=100),
+    ]
+    # Act
+    pairs = detect_anchor_pairs(stream)
+    # Assert
+    assert pairs == []  # zero segments precede each anchor
+
+
+def test_simple_visual_only_pair() -> None:
+    # Arrange — a→visual→visual→a, the second `a` makes one pair.
+    stream = [
+        _est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
+        _est(100, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
+        _est(200, _BASE_LAT + 0.0002, _BASE_LON, "visual_propagated"),
+        _est(300, _BASE_LAT - 0.0001, _BASE_LON, "satellite_anchored", age_ms=300),
+    ]
+    # Act
+    pairs = detect_anchor_pairs(stream)
+    # Assert
+    assert len(pairs) == 1
+    p = pairs[0]
+    assert p.propagated_centre_ms == 200
+    assert p.anchor_ms == 300
+    assert p.last_satellite_anchor_age_ms == 300
+    assert not p.imu_fused_segment
+    assert p.drift_m > 0
+
+
+def test_imu_fused_segment_classifies_pair() -> None:
+    # Arrange — any frame with imu_fused=True in the segment marks the pair
+    stream = [
+        _est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
+        _est(100, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated", imu_fused=True),
+        _est(200, _BASE_LAT + 0.0002, _BASE_LON, "visual_propagated"),
+        _est(300, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=300),
+    ]
+    # Act
+    pairs = detect_anchor_pairs(stream)
+    # Assert
+    assert pairs[0].imu_fused_segment is True
+
+
+def test_dead_reckoned_in_segment_still_pair() -> None:
+    # Arrange
+    stream = [
+        _est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
+        _est(100, _BASE_LAT + 0.0001, _BASE_LON, "dead_reckoned"),
+        _est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
+    ]
+    # Act
+    pairs = detect_anchor_pairs(stream)
+    # Assert
+    assert len(pairs) == 1
+
+
+def test_multiple_pairs_in_one_flight() -> None:
+    # Arrange — 3 anchors → 2 pairs
+    stream = [
+        _est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
+        _est(50, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
+        _est(100, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=100),
+        _est(150, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
+        _est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=100),
+    ]
+    # Act
+    pairs = detect_anchor_pairs(stream)
+    # Assert
+    assert len(pairs) == 2
+
+
+# ---------------------------------------------------------------------------
+# Drift computation
+# ---------------------------------------------------------------------------
+
+
+def test_drift_is_geodesic_meters() -> None:
+    """Drift uses pyproj/WGS84 Vincenty — ~1 deg of lat ≈ 111 km."""
+    # Arrange — propagate to lat+1 deg, anchor at base; expect ~111 km drift
+    stream = [
+        _est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
+        _est(100, _BASE_LAT + 1.0, _BASE_LON, "visual_propagated"),
+        _est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
+    ]
+    # Act
+    pairs = detect_anchor_pairs(stream)
+    # Assert — bracket the expected geodesic distance
+    assert 110_000 < pairs[0].drift_m < 112_000
+
+
+# ---------------------------------------------------------------------------
+# AC-2 / AC-3: pass-fraction
+# ---------------------------------------------------------------------------
+
+
+def test_pass_fraction_empty_returns_zero() -> None:
+    # Arrange / Act / Assert
+    assert compute_pass_fraction([], 100.0) == 0.0
+
+
+def test_pass_fraction_all_pass() -> None:
+    # Arrange — 10 pairs all at 10 m drift, bound 100 m
+    pairs = [_make_pair(drift_m=10.0) for _ in range(10)]
+    # Act
+    f = compute_pass_fraction(pairs, drift_bound_m=100.0)
+    # Assert
+    assert f == 1.0
+
+
+def test_pass_fraction_partial() -> None:
+    # Arrange — 8 of 10 under 100 m
+    pairs = [_make_pair(drift_m=10.0) for _ in range(8)] + [
+        _make_pair(drift_m=200.0) for _ in range(2)
+    ]
+    # Act
+    f = compute_pass_fraction(pairs, drift_bound_m=100.0)
+    # Assert
+    assert f == 0.8
+
+
+# ---------------------------------------------------------------------------
+# AC-4: bin medians + monotonicity
+# ---------------------------------------------------------------------------
+
+
+def test_bin_drifts_default_edges() -> None:
+    # Arrange — synthetic drifts at known ages
+    pairs = [
+        _make_pair(drift_m=10.0, age_ms=500),     # <1s bin
+        _make_pair(drift_m=20.0, age_ms=2_000),   # 1-3s bin
+        _make_pair(drift_m=50.0, age_ms=5_000),   # 3-10s bin
+        _make_pair(drift_m=100.0, age_ms=20_000),  # 10-30s bin
+        _make_pair(drift_m=200.0, age_ms=60_000),  # >30s bin
+    ]
+    # Act
+    bins = bin_drifts(pairs)
+    # Assert — every bin has exactly one entry, in monotonic order
+    counts = [b.count for b in bins]
+    assert counts == [1, 1, 1, 1, 1]
+    medians = [b.median_m for b in bins]
+    assert medians == sorted(medians)
+
+
+def test_check_monotonic_passes_for_increasing_medians() -> None:
+    # Arrange
+    pairs = [
+        _make_pair(drift_m=10.0, age_ms=500),
+        _make_pair(drift_m=15.0, age_ms=2_000),
+        _make_pair(drift_m=20.0, age_ms=5_000),
+    ]
+    bins = bin_drifts(pairs)
+    # Act
+    violations = check_monotonic(bins)
+    # Assert
+    assert violations == []
+
+
+def test_check_monotonic_flags_regression() -> None:
+    # Arrange — drifts decrease with age (impossible IRL → violation)
+    pairs = [
+        _make_pair(drift_m=20.0, age_ms=500),
+        _make_pair(drift_m=10.0, age_ms=2_000),
+    ]
+    bins = bin_drifts(pairs)
+    # Act
+    violations = check_monotonic(bins)
+    # Assert
+    assert any("non-monotonic" in v for v in violations)
+
+
+def test_check_monotonic_flags_2x_jump() -> None:
+    # Arrange — 100 m → 250 m is > 2x
+    pairs = [
+        _make_pair(drift_m=100.0, age_ms=500),
+        _make_pair(drift_m=250.0, age_ms=2_000),
+    ]
+    bins = bin_drifts(pairs)
+    # Act
+    violations = check_monotonic(bins)
+    # Assert
+    assert any(">2x" in v for v in violations)
+
+
+# ---------------------------------------------------------------------------
+# aggregate() integration
+# ---------------------------------------------------------------------------
+
+
+def test_aggregate_round_trip() -> None:
+    # Arrange — mix of visual-only and IMU-fused pairs
+    stream = [
+        _est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
+        _est(100, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
+        _est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
+        _est(300, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated", imu_fused=True),
+        _est(400, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
+    ]
+    # Act
+    report = aggregate(stream)
+    # Assert
+    assert len(report.pairs) == 2
+    assert len(report.visual_only_pairs) == 1
+    assert len(report.imu_fused_pairs) == 1
+
+
+# ---------------------------------------------------------------------------
+# CSV evidence
+# ---------------------------------------------------------------------------
+
+
+def test_write_csv_evidence_round_trip(tmp_path: Path) -> None:
+    # Arrange
+    pairs = [_make_pair(drift_m=10.0, age_ms=500)]
+    report = aggregate(
+        [
+            _est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
+            _est(100, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
+            _est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
+        ]
+    )
+    csv_path = tmp_path / "ft-p-02.csv"
+    # Act
+    write_csv_evidence(report, csv_path)
+    text = csv_path.read_text()
+    # Assert
+    assert "drift_m" in text.splitlines()[0]
+    assert len(text.splitlines()) == 1 + len(report.pairs)
+
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+
+def _make_pair(drift_m: float = 0.0, age_ms: int = 0, imu_fused: bool = False) -> AnchorPair:
+    return AnchorPair(
+        segment_first_ms=0,
+        propagated_centre_ms=100,
+        anchor_ms=200,
+        propagated_lat_deg=_BASE_LAT,
+        propagated_lon_deg=_BASE_LON,
+        anchor_lat_deg=_BASE_LAT,
+        anchor_lon_deg=_BASE_LON,
+        drift_m=drift_m,
+        last_satellite_anchor_age_ms=age_ms,
+        imu_fused_segment=imu_fused,
+    )
@@ -0,0 +1,196 @@
+"""Unit tests for the AZ-411 estimate-schema validators (FT-P-03, FT-P-14).
+
+Validates AC-1 (schema completeness), AC-2 (source-label set containment),
+AC-3 (WGS84 range), and the int32 1e-7 decoder. The full single-image
+push scenario in ``test_ft_p_03_14_schema_wgs84.py`` is skipped until
+the upstream replay/SITL helpers land — these tests are the AC coverage
+for the logic itself.
+"""
+
+from __future__ import annotations
+
+import math
+
+import pytest
+
+from runner.helpers.estimate_schema import (
+    ALLOWED_SOURCE_LABELS,
+    LAT_LON_SCALE,
+    REQUIRED_FIELDS,
+    aggregate_validations,
+    decode_lat_lon_int32,
+    validate_estimate_schema,
+    validate_source_label,
+    validate_wgs84_range,
+)
+
+
+# ---------------------------------------------------------------------------
+# AC-1: schema completeness
+# ---------------------------------------------------------------------------
+
+
+def _valid_record(**overrides: object) -> dict:
+    """A baseline record that satisfies all four REQUIRED_FIELDS."""
+    return {
+        "lat": 50.075,
+        "lon": 36.150,
+        "cov_semi_major_m": 4.5,
+        "last_satellite_anchor_age_ms": 1234,
+        **overrides,
+    }
+
+
+def test_valid_record_passes_schema() -> None:
+    # Arrange / Act
+    result = validate_estimate_schema(_valid_record())
+    # Assert
+    assert result.ok is True
+    assert result.missing_fields == []
+    assert result.wrong_typed_fields == []
+
+
+def test_missing_field_caught() -> None:
+    # Arrange
+    rec = _valid_record()
+    del rec["cov_semi_major_m"]
+    # Act
+    result = validate_estimate_schema(rec)
+    # Assert
+    assert not result.ok
+    assert "cov_semi_major_m" in result.missing_fields
+
+
+def test_int_typed_field_rejected_when_wrong_type() -> None:
+    # Arrange — last_satellite_anchor_age_ms is supposed to be int, not float
+    rec = _valid_record(last_satellite_anchor_age_ms=1.5)
+    # Act
+    result = validate_estimate_schema(rec)
+    # Assert
+    assert not result.ok
+    assert "last_satellite_anchor_age_ms" in result.wrong_typed_fields
+
+
+def test_bool_does_not_silently_satisfy_int() -> None:
+    """Python ``isinstance(True, int)`` is True; we must reject it explicitly."""
+    # Arrange
+    rec = _valid_record(last_satellite_anchor_age_ms=True)
+    # Act
+    result = validate_estimate_schema(rec)
+    # Assert
+    assert not result.ok
+    assert "last_satellite_anchor_age_ms" in result.wrong_typed_fields
+
+
+def test_required_fields_table_is_what_the_spec_says() -> None:
+    """Guard against accidental drift between the helper and the AZ-411 spec."""
+    # Arrange
+    names = [n for n, _ in REQUIRED_FIELDS]
+    # Assert
+    assert names == ["lat", "lon", "cov_semi_major_m", "last_satellite_anchor_age_ms"]
+
+
+# ---------------------------------------------------------------------------
+# AC-2: source-label set containment
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("label", sorted(ALLOWED_SOURCE_LABELS))
+def test_each_allowed_label_passes(label: str) -> None:
+    # Arrange / Act
+    result = validate_source_label(label)
+    # Assert
+    assert result.ok
+    assert result.observed == label
+
+
+def test_unknown_label_rejected() -> None:
+    # Arrange / Act
+    result = validate_source_label("imu_only")
+    # Assert
+    assert not result.ok
+    assert "not in" in (result.reason or "")
+
+
+def test_non_string_label_rejected() -> None:
+    # Arrange / Act
+    result = validate_source_label(42)
+    # Assert
+    assert not result.ok
+    assert "expected str" in (result.reason or "")
+
+
+# ---------------------------------------------------------------------------
+# AC-3: WGS84 range + int32 decoding
+# ---------------------------------------------------------------------------
+
+
+def test_valid_wgs84_inside_range() -> None:
+    # Arrange / Act
+    result = validate_wgs84_range(50.075, 36.150)
+    # Assert
+    assert result.ok
+
+
+def test_lat_above_90_rejected() -> None:
+    # Arrange / Act / Assert
+    assert not validate_wgs84_range(91.0, 0.0).ok
+
+
+def test_lon_below_minus_180_rejected() -> None:
+    # Arrange / Act / Assert
+    assert not validate_wgs84_range(0.0, -181.0).ok
+
+
+def test_nan_rejected() -> None:
+    # Arrange / Act / Assert
+    assert not validate_wgs84_range(math.nan, 0.0).ok
+
+
+def test_decode_lat_lon_int32_round_trip() -> None:
+    # Arrange — encode Derkachi-ish coords as int32 1e-7 then decode
+    lat_e7 = 500_750_000
+    lon_e7 = 361_500_000
+    # Act
+    lat, lon = decode_lat_lon_int32(lat_e7, lon_e7)
+    # Assert
+    assert abs(lat - 50.075) < 1e-6
+    assert abs(lon - 36.150) < 1e-6
+    assert lat == lat_e7 * LAT_LON_SCALE
+
+
+def test_decode_lat_lon_int32_rejects_out_of_int32_range() -> None:
+    # Arrange / Act / Assert
+    with pytest.raises(ValueError, match="lat_e7"):
+        decode_lat_lon_int32(2 ** 31, 0)
+    with pytest.raises(ValueError, match="lon_e7"):
+        decode_lat_lon_int32(0, -(2 ** 31) - 1)
+
+
+# ---------------------------------------------------------------------------
+# aggregate_validations
+# ---------------------------------------------------------------------------
+
+
+def test_aggregate_validations_all_ok() -> None:
+    # Arrange
+    records = [_valid_record(), _valid_record(lat=49.9, lon=36.0)]
+    # Act
+    schemas, wgs84s = aggregate_validations(records)
+    # Assert
+    assert all(s.ok for s in schemas)
+    assert all(w.ok for w in wgs84s)
+
+
+def test_aggregate_validations_surfaces_bad_record() -> None:
+    # Arrange — one good, one missing lat
+    bad = _valid_record()
+    del bad["lat"]
+    records = [_valid_record(), bad]
+    # Act
+    schemas, wgs84s = aggregate_validations(records)
+    # Assert
+    assert schemas[0].ok
+    assert not schemas[1].ok
+    # When lat is missing, wgs84 validator emits a missing-field result too.
+    assert not wgs84s[1].ok