Files
Oleksandr Bezdieniezhnykh 702a0c0ff3 [AZ-408] [AZ-410] [AZ-411] Batch 69: synth injectors + FT-P-02/03/14
AZ-408 (3pt) — Replace AZ-406 injector scaffolds with concrete generators:
- outlier.py: deterministic stride + far-away tile replacement; AC-2 ≥350m offset
- blackout_spoof.py: paired video blackout + FC GPS spoof with ≤40ms alignment;
  AC-4 realistic fix_type/hdop; AC-NEW-8 200-500m inter-spoof deltas
- multi_segment.py: ≥3 disjoint windows, ≥30s gaps, ≤25% coverage
- fc_proxy.py: timed-splice runtime proxy with pre-activate RuntimeError guard
- _common.py: derive_rng + tile-manifest reader + tmpfs helpers
- injector_fixtures.py: pytest fixtures wired via runner conftest

AZ-410 (3pt) — FT-P-02 cumulative drift between satellite anchors:
- anchor_pair_detector.py: AC-1 detection, AC-2/3 pass-fraction,
  AC-4 monotonicity check, CSV evidence
- test_ft_p_02_derkachi_drift.py: scenario gated on upstream helper
  NotImplementedError (frame_source_replay / fdr_reader / imu_replay)

AZ-411 (2pt) — FT-P-03 + FT-P-14 schema + WGS84:
- estimate_schema.py: AC-1 schema completeness, AC-2 source-label set
  containment, AC-3 WGS84 range + int32 1e-7 decode
- test_ft_p_03_14_schema_wgs84.py: shared single-image-push scenario

Tests: 248 unit tests pass (+91 vs batch 68).
Reports: batch_69_report.md, batch_69_review.md (PASS),
cumulative_review_batches_67-69_cycle1_report.md (PASS).

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-16 17:54:00 +03:00

405 lines
14 KiB
Python

"""Behavioural tests for the AZ-408 outlier injector.
Covers AC-1 (seed determinism), AC-2 (geodesic offset enforcement), and
AC-6 (tmpfs scratch isolation). Density-flag mapping is tested directly
against the ``_DENSITY_RATIO`` table.
"""
from __future__ import annotations
import csv
import io
import json
import math
from pathlib import Path
import pytest
from fixtures.injectors import outlier
from fixtures.injectors._common import (
derive_rng,
far_away_indices,
haversine_m,
iter_video_frame_indices,
read_tile_manifest,
)
# ---------------------------------------------------------------------------
# Fixture-builder helpers (synthetic tile cache + frames)
# ---------------------------------------------------------------------------
def _write_synthetic_frame(path: Path, color: tuple[int, int, int] = (40, 40, 40)) -> None:
from PIL import Image # noqa: PLC0415
img = Image.new("RGB", (256, 256), color=color)
img.save(path, format="JPEG", quality=85, optimize=False, progressive=False, subsampling=2)
def _build_synthetic_frames_dir(parent: Path, count: int = 100) -> Path:
"""Make a fake AD*.jpg directory under ``parent/frames``."""
frames_dir = parent / "frames"
frames_dir.mkdir(parents=True, exist_ok=True)
for i in range(count):
_write_synthetic_frame(frames_dir / f"AD{i + 1:06d}.jpg")
return frames_dir
def _build_synthetic_tile_cache(parent: Path, n_tiles: int = 16) -> Path:
"""Make a fake tile-cache tree under ``parent/tile-cache``.
The fake cache covers the same Derkachi bbox the real builder uses,
but with a smaller grid so the unit test stays fast. Tiles are
placed at zoom 18 with deterministic (tx, ty) offsets — the
far-away-tile check uses geodesic distance computed from the
(tx, ty) so any spread > 350 m at zoom 18 satisfies AC-2.
"""
cache_dir = parent / "tile-cache"
tiles_dir = cache_dir / "tiles" / "18"
tiles_dir.mkdir(parents=True, exist_ok=True)
rows = []
# Zoom-18 grid spread of ~10 tiles each axis covers ~1.5 km at the
# Derkachi latitude — easily > 350 m offset between corners.
base_tx = 1 << 17
base_ty = 1 << 17
for i in range(n_tiles):
tx = base_tx + (i % 4) * 4
ty = base_ty + (i // 4) * 4
tile_subdir = tiles_dir / str(tx)
tile_subdir.mkdir(parents=True, exist_ok=True)
_write_synthetic_frame(tile_subdir / f"{ty}.jpg", color=(i * 5, 90, 200 - i * 5))
rows.append(
{
"zoom_level": 18,
"tile_x": tx,
"tile_y": ty,
"capture_date": "2025-11-01",
"source": "stub",
"m_per_px": 0.5,
"jpeg_path": f"tiles/18/{tx}/{ty}.jpg",
"content_hash": "deadbeef",
"provenance": f"paired_gmaps:AD{i + 1:06d}" if i < 16 else "STUB",
}
)
manifest = cache_dir / "manifest.csv"
with manifest.open("w", newline="") as fp:
writer = csv.DictWriter(fp, fieldnames=list(rows[0].keys()), lineterminator="\n")
writer.writeheader()
writer.writerows(rows)
return cache_dir
# ---------------------------------------------------------------------------
# AC-1: density-flag determinism
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"density, expected_stride",
[("light", 100), ("medium", 10), ("heavy", 3)],
)
def test_density_ratio_maps_to_correct_stride(density: outlier.Density, expected_stride: int) -> None:
# Arrange
total = 1000
# Act
indices = list(iter_video_frame_indices(total, outlier._DENSITY_RATIO[density]))
# Assert
assert indices[0] == 0
# Stride should match the documented ratio
assert indices[1] - indices[0] == expected_stride
expected_count = (total + expected_stride - 1) // expected_stride
assert len(indices) == expected_count
def test_build_is_seed_deterministic(tmp_path: Path) -> None:
"""AC-1: same seed → identical manifest + identical replaced bytes."""
# Arrange
frames = _build_synthetic_frames_dir(tmp_path, count=80)
cache = _build_synthetic_tile_cache(tmp_path, n_tiles=16)
plan = outlier.OutlierInjectionPlan(
source_frames_dir=frames,
tile_cache_dir=cache,
density="medium",
seed=42,
)
# Act
out_a = tmp_path / "run_a"
out_b = tmp_path / "run_b"
outlier.build(plan, out_a)
outlier.build(plan, out_b)
# Assert — manifest bit-identical
manifest_a = (out_a / "manifest.csv").read_bytes()
manifest_b = (out_b / "manifest.csv").read_bytes()
assert manifest_a == manifest_b
# Replaced frames bit-identical
rows = list(csv.DictReader(io.StringIO((out_a / "manifest.csv").read_text())))
assert rows, "manifest should have at least one replaced frame"
for row in rows:
name = row["src_jpeg_path"]
assert (out_a / "frames" / name).read_bytes() == (out_b / "frames" / name).read_bytes(), (
f"replaced frame {name} differs across runs"
)
def test_different_seeds_produce_different_replacements(tmp_path: Path) -> None:
"""Sanity: different seeds → different replacement-tile picks."""
# Arrange
frames = _build_synthetic_frames_dir(tmp_path, count=40)
cache = _build_synthetic_tile_cache(tmp_path, n_tiles=16)
plan_a = outlier.OutlierInjectionPlan(
source_frames_dir=frames, tile_cache_dir=cache, density="medium", seed=1
)
plan_b = outlier.OutlierInjectionPlan(
source_frames_dir=frames, tile_cache_dir=cache, density="medium", seed=2
)
# Act
out_a = tmp_path / "seed_a"
out_b = tmp_path / "seed_b"
outlier.build(plan_a, out_a)
outlier.build(plan_b, out_b)
# Assert — replacement-tile picks differ
rows_a = list(csv.DictReader(io.StringIO((out_a / "manifest.csv").read_text())))
rows_b = list(csv.DictReader(io.StringIO((out_b / "manifest.csv").read_text())))
assert rows_a and rows_b
pick_a = [(r["replacement_tile_x"], r["replacement_tile_y"]) for r in rows_a]
pick_b = [(r["replacement_tile_x"], r["replacement_tile_y"]) for r in rows_b]
assert pick_a != pick_b, "different seeds should produce different replacement picks"
# ---------------------------------------------------------------------------
# AC-2: every replacement crop is ≥350 m from the original frame
# ---------------------------------------------------------------------------
def test_every_replacement_exceeds_min_offset(tmp_path: Path) -> None:
"""AC-2: ≥99 % of crops are > 350 m from original; with synth cache, 100 %."""
# Arrange
frames = _build_synthetic_frames_dir(tmp_path, count=60)
cache = _build_synthetic_tile_cache(tmp_path, n_tiles=16)
plan = outlier.OutlierInjectionPlan(
source_frames_dir=frames,
tile_cache_dir=cache,
density="medium",
seed=7,
min_offset_m=350.0,
)
# Act
report = outlier.build(plan, tmp_path / "out")
# Assert
rows = list(csv.DictReader(io.StringIO((tmp_path / "out" / "manifest.csv").read_text())))
assert rows, "should have replaced at least one frame"
offsets = [float(r["geodesic_offset_m"]) for r in rows]
assert all(o >= 350.0 for o in offsets), f"min offset {min(offsets)} < 350 m"
assert report.min_geodesic_offset_m >= 350.0
def test_far_away_indices_filters_by_distance() -> None:
"""Unit test the helper directly."""
# Arrange
from fixtures.injectors._common import TileGtRow
rows = [
TileGtRow(18, 0, 0, "", "", 0.5, "", "", "", 50.0, 30.0),
TileGtRow(18, 1, 0, "", "", 0.5, "", "", "", 50.001, 30.001), # ~140 m away
TileGtRow(18, 2, 0, "", "", 0.5, "", "", "", 50.02, 30.02), # ~2.8 km away
]
# Act
far = far_away_indices(rows, src_idx=0, min_offset_m=350.0)
# Assert
assert far == [2]
# ---------------------------------------------------------------------------
# AC-6: tmpfs scratch isolation + manifest schema
# ---------------------------------------------------------------------------
def test_build_writes_only_under_out_root(tmp_path: Path) -> None:
"""AC-6: nothing escapes the requested out_root."""
# Arrange
frames = _build_synthetic_frames_dir(tmp_path / "src", count=30)
cache = _build_synthetic_tile_cache(tmp_path / "src", n_tiles=16)
plan = outlier.OutlierInjectionPlan(
source_frames_dir=frames, tile_cache_dir=cache, density="heavy"
)
out_root = tmp_path / "out"
# Act
outlier.build(plan, out_root)
# Assert — only expected files present, nothing outside out_root
expected = {
"frames",
"manifest.csv",
"summary.json",
}
actual = {p.name for p in out_root.iterdir()}
assert actual == expected
def test_build_overwrites_existing_out_root(tmp_path: Path) -> None:
"""Re-running build wipes the previous run cleanly (no stale files)."""
# Arrange
frames = _build_synthetic_frames_dir(tmp_path / "src", count=20)
cache = _build_synthetic_tile_cache(tmp_path / "src", n_tiles=16)
plan = outlier.OutlierInjectionPlan(
source_frames_dir=frames, tile_cache_dir=cache, density="medium"
)
out_root = tmp_path / "out"
outlier.build(plan, out_root)
# Plant a stale file the next build should remove.
(out_root / "stale.txt").write_text("stale")
# Act
outlier.build(plan, out_root)
# Assert
assert not (out_root / "stale.txt").exists()
def test_summary_json_matches_report(tmp_path: Path) -> None:
# Arrange
frames = _build_synthetic_frames_dir(tmp_path / "src", count=50)
cache = _build_synthetic_tile_cache(tmp_path / "src", n_tiles=16)
plan = outlier.OutlierInjectionPlan(
source_frames_dir=frames, tile_cache_dir=cache, density="light", seed=3
)
out_root = tmp_path / "out"
# Act
report = outlier.build(plan, out_root)
payload = json.loads((out_root / "summary.json").read_text())
# Assert
assert payload["scenario"] == "outlier-injection-derkachi"
assert payload["total_source_frames"] == report.total_source_frames
assert payload["replaced_frame_count"] == report.replaced_frame_count
assert payload["density"] == "light"
# ---------------------------------------------------------------------------
# Error handling
# ---------------------------------------------------------------------------
def test_missing_source_frames_raises(tmp_path: Path) -> None:
# Arrange
cache = _build_synthetic_tile_cache(tmp_path, n_tiles=16)
plan = outlier.OutlierInjectionPlan(
source_frames_dir=tmp_path / "does-not-exist",
tile_cache_dir=cache,
density="medium",
)
# Act / Assert
with pytest.raises(FileNotFoundError, match="source frames"):
outlier.build(plan, tmp_path / "out")
def test_missing_tile_manifest_raises(tmp_path: Path) -> None:
# Arrange
frames = _build_synthetic_frames_dir(tmp_path, count=10)
plan = outlier.OutlierInjectionPlan(
source_frames_dir=frames,
tile_cache_dir=tmp_path / "no-cache",
density="medium",
)
# Act / Assert
with pytest.raises(FileNotFoundError, match="tile-cache manifest"):
outlier.build(plan, tmp_path / "out")
def test_read_tile_manifest_round_trips(tmp_path: Path) -> None:
# Arrange
cache = _build_synthetic_tile_cache(tmp_path, n_tiles=8)
# Act
rows = read_tile_manifest(cache / "manifest.csv")
# Assert
assert len(rows) == 8
assert all(-90 <= r.centre_lat_deg <= 90 for r in rows)
assert all(-180 <= r.centre_lon_deg <= 180 for r in rows)
def test_derive_rng_is_stable_across_calls() -> None:
# Arrange / Act
r1 = derive_rng("outlier", 42, "medium").integers(0, 1_000_000_000)
r2 = derive_rng("outlier", 42, "medium").integers(0, 1_000_000_000)
# Assert
assert r1 == r2
def test_derive_rng_differs_across_domains() -> None:
# Arrange / Act
out = derive_rng("outlier", 42).integers(0, 1_000_000_000)
bsp = derive_rng("blackout_spoof", 42).integers(0, 1_000_000_000)
# Assert
assert out != bsp, "different domains must produce independent streams"
def test_haversine_known_distance() -> None:
"""Sanity-check the haversine helper against a known fixture."""
# Arrange
# ~1 deg of latitude ≈ 111 km
# Act
d = haversine_m(50.0, 30.0, 51.0, 30.0)
# Assert
assert 111_000 < d < 112_000
def test_iter_video_frame_indices_rejects_bad_ratio() -> None:
# Arrange / Act / Assert
with pytest.raises(ValueError):
list(iter_video_frame_indices(100, 0.0))
with pytest.raises(ValueError):
list(iter_video_frame_indices(100, 1.5))
def test_cleanup_tmpfs_removes_scratch(tmp_path: Path) -> None:
"""AC-6: ``cleanup_tmpfs`` rm-trees the scratch dir; called from fixture teardown."""
# Arrange
from fixtures.injectors._common import cleanup_tmpfs
scratch = tmp_path / "scratch"
(scratch / "deep" / "nested").mkdir(parents=True)
(scratch / "deep" / "nested" / "file.txt").write_text("x")
# Act
cleanup_tmpfs(scratch)
# Assert
assert not scratch.exists()
def test_cleanup_tmpfs_is_silent_for_missing_path(tmp_path: Path) -> None:
"""``cleanup_tmpfs`` must not raise for a non-existent path (idempotent)."""
# Arrange
from fixtures.injectors._common import cleanup_tmpfs
# Act / Assert
cleanup_tmpfs(tmp_path / "never-existed")
def test_replacement_density_meets_target(tmp_path: Path) -> None:
"""Sanity: heavy density replaces ≈ 1/3 of frames."""
# Arrange
frames = _build_synthetic_frames_dir(tmp_path / "src", count=300)
cache = _build_synthetic_tile_cache(tmp_path / "src", n_tiles=16)
plan = outlier.OutlierInjectionPlan(
source_frames_dir=frames, tile_cache_dir=cache, density="heavy"
)
# Act
report = outlier.build(plan, tmp_path / "out")
# Assert
actual_ratio = report.replaced_frame_count / report.total_source_frames
assert 0.30 < actual_ratio < 0.40, f"heavy density gave {actual_ratio} (want ≈ 0.33)"