[AZ-408] [AZ-410] [AZ-411] Batch 69: synth injectors + FT-P-02/03/14

AZ-408 (3pt) — Replace AZ-406 injector scaffolds with concrete generators:
- outlier.py: deterministic stride + far-away tile replacement; AC-2 ≥350m offset
- blackout_spoof.py: paired video blackout + FC GPS spoof with ≤40ms alignment;
  AC-4 realistic fix_type/hdop; AC-NEW-8 200-500m inter-spoof deltas
- multi_segment.py: ≥3 disjoint windows, ≥30s gaps, ≤25% coverage
- fc_proxy.py: timed-splice runtime proxy with pre-activate RuntimeError guard
- _common.py: derive_rng + tile-manifest reader + tmpfs helpers
- injector_fixtures.py: pytest fixtures wired via runner conftest

AZ-410 (3pt) — FT-P-02 cumulative drift between satellite anchors:
- anchor_pair_detector.py: AC-1 detection, AC-2/3 pass-fraction,
  AC-4 monotonicity check, CSV evidence
- test_ft_p_02_derkachi_drift.py: scenario gated on upstream helper
  NotImplementedError (frame_source_replay / fdr_reader / imu_replay)

AZ-411 (2pt) — FT-P-03 + FT-P-14 schema + WGS84:
- estimate_schema.py: AC-1 schema completeness, AC-2 source-label set
  containment, AC-3 WGS84 range + int32 1e-7 decode
- test_ft_p_03_14_schema_wgs84.py: shared single-image-push scenario

Tests: 248 unit tests pass (+91 vs batch 68).
Reports: batch_69_report.md, batch_69_review.md (PASS),
cumulative_review_batches_67-69_cycle1_report.md (PASS).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-16 17:54:00 +03:00
parent ff1b00200c
commit 702a0c0ff3
27 changed files with 4619 additions and 58 deletions
@@ -0,0 +1,206 @@
"""FT-P-02 — Cumulative drift between satellite anchors on Derkachi (AC-1.3).
The full scenario:
1. Replay the Derkachi MP4 at 30 fps through the SUT's file-frame source.
2. Replay ``data_imu.csv`` at 10 Hz through the FC inbound (1 IMU per 3
video frames).
3. Observe the SUT's outbound estimate stream + the FDR archive.
4. Detect every (visual_propagated|dead_reckoned) → satellite_anchored
transition; compute drift = ||propagated_centre new_anchor||.
5. Bin drifts by ``last_satellite_anchor_age_ms``; assert AC-2/AC-3/AC-4.
6. Emit ``e2e-results/run-${RUN_ID}/ft-p-02.csv`` with one row per pair.
What this file owns:
* The AC-1.3 logic above, wired through the harness's ``fc_adapter`` /
``vio_strategy`` parametrize matrix (AC-5).
* CSV evidence emission via the AZ-410-owned ``anchor_pair_detector``.
What this file does NOT own:
* The MP4 video-replay path → ``runner.helpers.frame_source_replay``
(still a stub; AZ-408 was about the synthetic-injection injectors,
not the video replayer); the scenario is marked
``@pytest.mark.deferred_ac(reason=...)`` until that helper lands.
* The FDR-archive iteration → ``runner.helpers.fdr_reader`` (owned by
AZ-441); same skip gate.
* The MAVLink ``GLOBAL_POSITION_INT`` GT replay → handled by the
``imu_replay`` helper which currently raises NotImplementedError
(owned by AZ-407 in spec, but the helper file was not touched by
the AZ-407 batch).
When all three upstream helpers land, this file's runtime path activates
automatically — the skip is keyed off the ``NotImplementedError`` from
the helper imports, not off a hard-coded marker.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import anchor_pair_detector as apd
@pytest.fixture(scope="module")
def _harness_helpers_implemented() -> bool:
"""True iff every upstream helper FT-P-02 needs has a real impl.
Used to gate the full-replay scenarios. Helper-level NotImplementedError
is the signal — we don't hard-code a "deferred until task X" marker
because then a developer who lands the helper would have to also
remember to flip the marker. The auto-detect pattern is also what
other downstream scenarios will reuse.
"""
from runner.helpers import fdr_reader, frame_source_replay, imu_replay
from runner.helpers.frame_source_replay import FrameSourceReplayer
try:
# The cheapest sentinel for each helper:
# - FrameSourceReplayer.replay_video raises NotImplementedError
# - fdr_reader.iter_records raises NotImplementedError
# - ImuReplayer.replay raises NotImplementedError
# We check by inspecting __doc__ / source rather than calling, so
# the gate stays cheap.
replayer = FrameSourceReplayer(sink=_NullSink()) # type: ignore[arg-type]
try:
replayer.replay_video(Path("/tmp/non-existent.mp4"))
except NotImplementedError:
return False
try:
list(fdr_reader.iter_records(Path("/tmp/non-existent")))
except NotImplementedError:
return False
try:
imu_replay.ImuReplayer(emitter=_NullImuEmitter()).replay(Path("/tmp/non-existent.csv")) # type: ignore[arg-type]
except NotImplementedError:
return False
return True
except Exception:
return False
class _NullSink:
def write_frame(self, jpeg_bytes: bytes, timestamp_ms: int) -> None:
return None
class _NullImuEmitter:
def emit(self, sample: object) -> None:
return None
@pytest.mark.traces_to("AC-1.3,AC-1,AC-2,AC-3,AC-4,AC-5")
def test_ft_p_02_derkachi_drift(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
_harness_helpers_implemented: bool,
) -> None:
"""Full FT-P-02 scenario (AC-1.3). See module docstring.
AC-1: anchor-pair detection from FDR stream — covered by
``anchor_pair_detector.detect_anchor_pairs``; unit-tested in
``test_anchor_pair_detector.py``.
AC-2: visual-only drift bound (≥95 % < 100 m) — covered by aggregate().
AC-3: IMU-fused drift bound (≥95 % < 50 m) — covered by aggregate().
AC-4: bin medians monotonic with age — covered by check_monotonic().
AC-5: parametrized across (fc_adapter, vio_strategy).
"""
if not _harness_helpers_implemented:
pytest.skip(
"FT-P-02 full replay requires runner.helpers.{frame_source_replay,"
"fdr_reader,imu_replay} — currently AZ-441 / AZ-407 leftovers. "
"Pure-logic ACs covered by e2e/_unit_tests/helpers/test_anchor_pair_detector.py."
)
# Once the helpers land, the body below activates. We keep it
# under the gate rather than commenting it out so the wiring stays
# under code review.
from runner.helpers import fdr_reader, frame_source_replay, imu_replay
from runner.helpers.frame_source_replay import FrameSourceReplayer
# 1. Spin up the SUT through the boundary-driving fixtures
# (mock_suite_sat URL + sitl_observer for the requested fc_adapter +
# a frame-sink + a MAVLink emitter for the requested vio_strategy).
# The actual wiring lives in helpers; the scenario only orchestrates.
sitl_host = "sitl-ardupilot" if fc_adapter == "ardupilot" else "sitl-inav"
# 2. Replay video + IMU.
sink = _resolve_frame_sink()
emitter = _resolve_fc_inbound_emitter(fc_adapter, sitl_host)
video_path = Path("/test-data/flight_derkachi/flight_derkachi.mp4")
imu_csv = Path("/test-data/flight_derkachi/data_imu.csv")
FrameSourceReplayer(sink).replay_video(video_path)
imu_replay.ImuReplayer(emitter).replay(imu_csv)
# 3. Crawl the FDR archive for the outbound estimate stream.
fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
estimates: list[apd.FdrEstimate] = []
for rec in fdr_reader.iter_records(fdr_root):
if rec.record_type == "estimate":
payload = rec.payload
estimates.append(
apd.FdrEstimate(
monotonic_ms=int(rec.monotonic_ms),
lat_deg=float(payload["lat_deg"]), # type: ignore[arg-type]
lon_deg=float(payload["lon_deg"]), # type: ignore[arg-type]
source_label=str(payload["source_label"]), # type: ignore[arg-type]
imu_fused=bool(payload.get("imu_fused", False)),
cov_semi_major_m=float(payload.get("cov_semi_major_m", 0.0)), # type: ignore[arg-type]
last_satellite_anchor_age_ms=int(
payload.get("last_satellite_anchor_age_ms", 0) # type: ignore[arg-type]
),
)
)
# 4. Aggregate + AC checks.
report = apd.aggregate(estimates)
apd.write_csv_evidence(report, evidence_dir / f"ft-p-02-{fc_adapter}-{vio_strategy}.csv")
# 5. Record metrics for the NFR/csv reporter.
nfr_recorder.record_metric(
"ft_p_02.visual_only_pass_fraction", report.visual_only_pass_fraction, ac_id="AC-2"
)
nfr_recorder.record_metric(
"ft_p_02.imu_fused_pass_fraction", report.imu_fused_pass_fraction, ac_id="AC-3"
)
nfr_recorder.record_metric("ft_p_02.total_pairs", float(len(report.pairs)), ac_id="AC-1")
# 6. AC assertions.
if len(report.visual_only_pairs) > 0:
assert report.visual_only_pass_fraction >= 0.95, (
f"AC-2 (visual-only drift <100 m) failed at "
f"{report.visual_only_pass_fraction:.2%} over {len(report.visual_only_pairs)} pairs"
)
if len(report.imu_fused_pairs) > 0:
assert report.imu_fused_pass_fraction >= 0.95, (
f"AC-3 (IMU-fused drift <50 m) failed at "
f"{report.imu_fused_pass_fraction:.2%} over {len(report.imu_fused_pairs)} pairs"
)
if len(report.pairs) >= 20:
# AC-4 requires statistical power; small-N flights skip the
# monotonicity check per the spec's "N<20 flagged" note.
assert not report.monotonic_violations, (
"AC-4 (monotonic drift vs anchor age) failed: "
+ "; ".join(report.monotonic_violations)
)
else:
nfr_recorder.partial("AC-4", reason=f"N={len(report.pairs)} < 20 — statistical power flagged")
def _resolve_frame_sink(): # type: ignore[no-untyped-def]
"""Stub helper resolved when the underlying replayer lands."""
raise NotImplementedError(
"frame sink resolution is owned by AZ-441 / runner.helpers.frame_source_replay"
)
def _resolve_fc_inbound_emitter(fc_adapter: str, host: str): # type: ignore[no-untyped-def]
"""Stub helper resolved when the FC inbound emitter lands."""
raise NotImplementedError(
"FC inbound emitter resolution is owned by AZ-416/AZ-417 / runner.helpers.imu_replay"
)