[AZ-408] [AZ-410] [AZ-411] Batch 69: synth injectors + FT-P-02/03/14

AZ-408 (3pt) — Replace AZ-406 injector scaffolds with concrete generators:
- outlier.py: deterministic stride + far-away tile replacement; AC-2 ≥350m offset
- blackout_spoof.py: paired video blackout + FC GPS spoof with ≤40ms alignment;
  AC-4 realistic fix_type/hdop; AC-NEW-8 200-500m inter-spoof deltas
- multi_segment.py: ≥3 disjoint windows, ≥30s gaps, ≤25% coverage
- fc_proxy.py: timed-splice runtime proxy with pre-activate RuntimeError guard
- _common.py: derive_rng + tile-manifest reader + tmpfs helpers
- injector_fixtures.py: pytest fixtures wired via runner conftest

AZ-410 (3pt) — FT-P-02 cumulative drift between satellite anchors:
- anchor_pair_detector.py: AC-1 detection, AC-2/3 pass-fraction,
  AC-4 monotonicity check, CSV evidence
- test_ft_p_02_derkachi_drift.py: scenario gated on upstream helper
  NotImplementedError (frame_source_replay / fdr_reader / imu_replay)

AZ-411 (2pt) — FT-P-03 + FT-P-14 schema + WGS84:
- estimate_schema.py: AC-1 schema completeness, AC-2 source-label set
  containment, AC-3 WGS84 range + int32 1e-7 decode
- test_ft_p_03_14_schema_wgs84.py: shared single-image-push scenario

Tests: 248 unit tests pass (+91 vs batch 68).
Reports: batch_69_report.md, batch_69_review.md (PASS),
cumulative_review_batches_67-69_cycle1_report.md (PASS).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-16 17:54:00 +03:00
parent ff1b00200c
commit 702a0c0ff3
27 changed files with 4619 additions and 58 deletions
@@ -0,0 +1,312 @@
"""Unit tests for the AZ-410 anchor-pair detector (FT-P-02 logic).
Validates AC-1 (anchor-pair detection), AC-2 (visual-only drift bound),
AC-3 (IMU-fused drift bound), and AC-4 (monotonic distribution) using
synthetic FdrEstimate streams. The full-replay scenario test
(``test_ft_p_02_derkachi_drift.py``) imports this helper but is skipped
until the docker harness helpers land — these tests are the AC coverage
for the logic itself.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers.anchor_pair_detector import (
AnchorPair,
DEFAULT_AGE_BIN_EDGES_MS,
FdrEstimate,
aggregate,
bin_drifts,
check_monotonic,
compute_pass_fraction,
detect_anchor_pairs,
write_csv_evidence,
)
# ---------------------------------------------------------------------------
# Stream builders
# ---------------------------------------------------------------------------
def _est(
t_ms: int,
lat: float,
lon: float,
label: str,
imu_fused: bool = False,
age_ms: int = 0,
) -> FdrEstimate:
return FdrEstimate(
monotonic_ms=t_ms,
lat_deg=lat,
lon_deg=lon,
source_label=label, # type: ignore[arg-type]
imu_fused=imu_fused,
last_satellite_anchor_age_ms=age_ms,
)
# Derkachi-ish base coords.
_BASE_LAT = 50.075
_BASE_LON = 36.150
# ---------------------------------------------------------------------------
# AC-1: anchor-pair detection
# ---------------------------------------------------------------------------
def test_first_anchor_is_not_a_pair() -> None:
# Arrange — a stream that starts with an anchor must not produce a pair
stream = [
_est(0, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=0),
_est(100, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=100),
]
# Act
pairs = detect_anchor_pairs(stream)
# Assert
assert pairs == [] # zero segments precede each anchor
def test_simple_visual_only_pair() -> None:
# Arrange — a→visual→visual→a, the second `a` makes one pair.
stream = [
_est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
_est(100, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
_est(200, _BASE_LAT + 0.0002, _BASE_LON, "visual_propagated"),
_est(300, _BASE_LAT - 0.0001, _BASE_LON, "satellite_anchored", age_ms=300),
]
# Act
pairs = detect_anchor_pairs(stream)
# Assert
assert len(pairs) == 1
p = pairs[0]
assert p.propagated_centre_ms == 200
assert p.anchor_ms == 300
assert p.last_satellite_anchor_age_ms == 300
assert not p.imu_fused_segment
assert p.drift_m > 0
def test_imu_fused_segment_classifies_pair() -> None:
# Arrange — any frame with imu_fused=True in the segment marks the pair
stream = [
_est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
_est(100, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated", imu_fused=True),
_est(200, _BASE_LAT + 0.0002, _BASE_LON, "visual_propagated"),
_est(300, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=300),
]
# Act
pairs = detect_anchor_pairs(stream)
# Assert
assert pairs[0].imu_fused_segment is True
def test_dead_reckoned_in_segment_still_pair() -> None:
# Arrange
stream = [
_est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
_est(100, _BASE_LAT + 0.0001, _BASE_LON, "dead_reckoned"),
_est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
]
# Act
pairs = detect_anchor_pairs(stream)
# Assert
assert len(pairs) == 1
def test_multiple_pairs_in_one_flight() -> None:
# Arrange — 3 anchors → 2 pairs
stream = [
_est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
_est(50, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
_est(100, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=100),
_est(150, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
_est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=100),
]
# Act
pairs = detect_anchor_pairs(stream)
# Assert
assert len(pairs) == 2
# ---------------------------------------------------------------------------
# Drift computation
# ---------------------------------------------------------------------------
def test_drift_is_geodesic_meters() -> None:
"""Drift uses pyproj/WGS84 Vincenty — ~1 deg of lat ≈ 111 km."""
# Arrange — propagate to lat+1 deg, anchor at base; expect ~111 km drift
stream = [
_est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
_est(100, _BASE_LAT + 1.0, _BASE_LON, "visual_propagated"),
_est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
]
# Act
pairs = detect_anchor_pairs(stream)
# Assert — bracket the expected geodesic distance
assert 110_000 < pairs[0].drift_m < 112_000
# ---------------------------------------------------------------------------
# AC-2 / AC-3: pass-fraction
# ---------------------------------------------------------------------------
def test_pass_fraction_empty_returns_zero() -> None:
# Arrange / Act / Assert
assert compute_pass_fraction([], 100.0) == 0.0
def test_pass_fraction_all_pass() -> None:
# Arrange — 10 pairs all at 10 m drift, bound 100 m
pairs = [_make_pair(drift_m=10.0) for _ in range(10)]
# Act
f = compute_pass_fraction(pairs, drift_bound_m=100.0)
# Assert
assert f == 1.0
def test_pass_fraction_partial() -> None:
# Arrange — 8 of 10 under 100 m
pairs = [_make_pair(drift_m=10.0) for _ in range(8)] + [
_make_pair(drift_m=200.0) for _ in range(2)
]
# Act
f = compute_pass_fraction(pairs, drift_bound_m=100.0)
# Assert
assert f == 0.8
# ---------------------------------------------------------------------------
# AC-4: bin medians + monotonicity
# ---------------------------------------------------------------------------
def test_bin_drifts_default_edges() -> None:
# Arrange — synthetic drifts at known ages
pairs = [
_make_pair(drift_m=10.0, age_ms=500), # <1s bin
_make_pair(drift_m=20.0, age_ms=2_000), # 1-3s bin
_make_pair(drift_m=50.0, age_ms=5_000), # 3-10s bin
_make_pair(drift_m=100.0, age_ms=20_000), # 10-30s bin
_make_pair(drift_m=200.0, age_ms=60_000), # >30s bin
]
# Act
bins = bin_drifts(pairs)
# Assert — every bin has exactly one entry, in monotonic order
counts = [b.count for b in bins]
assert counts == [1, 1, 1, 1, 1]
medians = [b.median_m for b in bins]
assert medians == sorted(medians)
def test_check_monotonic_passes_for_increasing_medians() -> None:
# Arrange
pairs = [
_make_pair(drift_m=10.0, age_ms=500),
_make_pair(drift_m=15.0, age_ms=2_000),
_make_pair(drift_m=20.0, age_ms=5_000),
]
bins = bin_drifts(pairs)
# Act
violations = check_monotonic(bins)
# Assert
assert violations == []
def test_check_monotonic_flags_regression() -> None:
# Arrange — drifts decrease with age (impossible IRL → violation)
pairs = [
_make_pair(drift_m=20.0, age_ms=500),
_make_pair(drift_m=10.0, age_ms=2_000),
]
bins = bin_drifts(pairs)
# Act
violations = check_monotonic(bins)
# Assert
assert any("non-monotonic" in v for v in violations)
def test_check_monotonic_flags_2x_jump() -> None:
# Arrange — 100 m → 250 m is > 2x
pairs = [
_make_pair(drift_m=100.0, age_ms=500),
_make_pair(drift_m=250.0, age_ms=2_000),
]
bins = bin_drifts(pairs)
# Act
violations = check_monotonic(bins)
# Assert
assert any(">2x" in v for v in violations)
# ---------------------------------------------------------------------------
# aggregate() integration
# ---------------------------------------------------------------------------
def test_aggregate_round_trip() -> None:
# Arrange — mix of visual-only and IMU-fused pairs
stream = [
_est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
_est(100, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
_est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
_est(300, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated", imu_fused=True),
_est(400, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
]
# Act
report = aggregate(stream)
# Assert
assert len(report.pairs) == 2
assert len(report.visual_only_pairs) == 1
assert len(report.imu_fused_pairs) == 1
# ---------------------------------------------------------------------------
# CSV evidence
# ---------------------------------------------------------------------------
def test_write_csv_evidence_round_trip(tmp_path: Path) -> None:
# Arrange
pairs = [_make_pair(drift_m=10.0, age_ms=500)]
report = aggregate(
[
_est(0, _BASE_LAT, _BASE_LON, "satellite_anchored"),
_est(100, _BASE_LAT + 0.0001, _BASE_LON, "visual_propagated"),
_est(200, _BASE_LAT, _BASE_LON, "satellite_anchored", age_ms=200),
]
)
csv_path = tmp_path / "ft-p-02.csv"
# Act
write_csv_evidence(report, csv_path)
text = csv_path.read_text()
# Assert
assert "drift_m" in text.splitlines()[0]
assert len(text.splitlines()) == 1 + len(report.pairs)
# ---------------------------------------------------------------------------
# Helper
# ---------------------------------------------------------------------------
def _make_pair(drift_m: float = 0.0, age_ms: int = 0, imu_fused: bool = False) -> AnchorPair:
return AnchorPair(
segment_first_ms=0,
propagated_centre_ms=100,
anchor_ms=200,
propagated_lat_deg=_BASE_LAT,
propagated_lon_deg=_BASE_LON,
anchor_lat_deg=_BASE_LAT,
anchor_lon_deg=_BASE_LON,
drift_m=drift_m,
last_satellite_anchor_age_ms=age_ms,
imu_fused_segment=imu_fused,
)
@@ -0,0 +1,196 @@
"""Unit tests for the AZ-411 estimate-schema validators (FT-P-03, FT-P-14).
Validates AC-1 (schema completeness), AC-2 (source-label set containment),
AC-3 (WGS84 range), and the int32 1e-7 decoder. The full single-image
push scenario in ``test_ft_p_03_14_schema_wgs84.py`` is skipped until
the upstream replay/SITL helpers land — these tests are the AC coverage
for the logic itself.
"""
from __future__ import annotations
import math
import pytest
from runner.helpers.estimate_schema import (
ALLOWED_SOURCE_LABELS,
LAT_LON_SCALE,
REQUIRED_FIELDS,
aggregate_validations,
decode_lat_lon_int32,
validate_estimate_schema,
validate_source_label,
validate_wgs84_range,
)
# ---------------------------------------------------------------------------
# AC-1: schema completeness
# ---------------------------------------------------------------------------
def _valid_record(**overrides: object) -> dict:
"""A baseline record that satisfies all four REQUIRED_FIELDS."""
return {
"lat": 50.075,
"lon": 36.150,
"cov_semi_major_m": 4.5,
"last_satellite_anchor_age_ms": 1234,
**overrides,
}
def test_valid_record_passes_schema() -> None:
# Arrange / Act
result = validate_estimate_schema(_valid_record())
# Assert
assert result.ok is True
assert result.missing_fields == []
assert result.wrong_typed_fields == []
def test_missing_field_caught() -> None:
# Arrange
rec = _valid_record()
del rec["cov_semi_major_m"]
# Act
result = validate_estimate_schema(rec)
# Assert
assert not result.ok
assert "cov_semi_major_m" in result.missing_fields
def test_int_typed_field_rejected_when_wrong_type() -> None:
# Arrange — last_satellite_anchor_age_ms is supposed to be int, not float
rec = _valid_record(last_satellite_anchor_age_ms=1.5)
# Act
result = validate_estimate_schema(rec)
# Assert
assert not result.ok
assert "last_satellite_anchor_age_ms" in result.wrong_typed_fields
def test_bool_does_not_silently_satisfy_int() -> None:
"""Python ``isinstance(True, int)`` is True; we must reject it explicitly."""
# Arrange
rec = _valid_record(last_satellite_anchor_age_ms=True)
# Act
result = validate_estimate_schema(rec)
# Assert
assert not result.ok
assert "last_satellite_anchor_age_ms" in result.wrong_typed_fields
def test_required_fields_table_is_what_the_spec_says() -> None:
"""Guard against accidental drift between the helper and the AZ-411 spec."""
# Arrange
names = [n for n, _ in REQUIRED_FIELDS]
# Assert
assert names == ["lat", "lon", "cov_semi_major_m", "last_satellite_anchor_age_ms"]
# ---------------------------------------------------------------------------
# AC-2: source-label set containment
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("label", sorted(ALLOWED_SOURCE_LABELS))
def test_each_allowed_label_passes(label: str) -> None:
# Arrange / Act
result = validate_source_label(label)
# Assert
assert result.ok
assert result.observed == label
def test_unknown_label_rejected() -> None:
# Arrange / Act
result = validate_source_label("imu_only")
# Assert
assert not result.ok
assert "not in" in (result.reason or "")
def test_non_string_label_rejected() -> None:
# Arrange / Act
result = validate_source_label(42)
# Assert
assert not result.ok
assert "expected str" in (result.reason or "")
# ---------------------------------------------------------------------------
# AC-3: WGS84 range + int32 decoding
# ---------------------------------------------------------------------------
def test_valid_wgs84_inside_range() -> None:
# Arrange / Act
result = validate_wgs84_range(50.075, 36.150)
# Assert
assert result.ok
def test_lat_above_90_rejected() -> None:
# Arrange / Act / Assert
assert not validate_wgs84_range(91.0, 0.0).ok
def test_lon_below_minus_180_rejected() -> None:
# Arrange / Act / Assert
assert not validate_wgs84_range(0.0, -181.0).ok
def test_nan_rejected() -> None:
# Arrange / Act / Assert
assert not validate_wgs84_range(math.nan, 0.0).ok
def test_decode_lat_lon_int32_round_trip() -> None:
# Arrange — encode Derkachi-ish coords as int32 1e-7 then decode
lat_e7 = 500_750_000
lon_e7 = 361_500_000
# Act
lat, lon = decode_lat_lon_int32(lat_e7, lon_e7)
# Assert
assert abs(lat - 50.075) < 1e-6
assert abs(lon - 36.150) < 1e-6
assert lat == lat_e7 * LAT_LON_SCALE
def test_decode_lat_lon_int32_rejects_out_of_int32_range() -> None:
# Arrange / Act / Assert
with pytest.raises(ValueError, match="lat_e7"):
decode_lat_lon_int32(2 ** 31, 0)
with pytest.raises(ValueError, match="lon_e7"):
decode_lat_lon_int32(0, -(2 ** 31) - 1)
# ---------------------------------------------------------------------------
# aggregate_validations
# ---------------------------------------------------------------------------
def test_aggregate_validations_all_ok() -> None:
# Arrange
records = [_valid_record(), _valid_record(lat=49.9, lon=36.0)]
# Act
schemas, wgs84s = aggregate_validations(records)
# Assert
assert all(s.ok for s in schemas)
assert all(w.ok for w in wgs84s)
def test_aggregate_validations_surfaces_bad_record() -> None:
# Arrange — one good, one missing lat
bad = _valid_record()
del bad["lat"]
records = [_valid_record(), bad]
# Act
schemas, wgs84s = aggregate_validations(records)
# Assert
assert schemas[0].ok
assert not schemas[1].ok
# When lat is missing, wgs84 validator emits a missing-field result too.
assert not wgs84s[1].ok