"""Behavioural tests for the AZ-408 outlier injector. Covers AC-1 (seed determinism), AC-2 (geodesic offset enforcement), and AC-6 (tmpfs scratch isolation). Density-flag mapping is tested directly against the ``_DENSITY_RATIO`` table. """ from __future__ import annotations import csv import io import json import math from pathlib import Path import pytest from fixtures.injectors import outlier from fixtures.injectors._common import ( derive_rng, far_away_indices, haversine_m, iter_video_frame_indices, read_tile_manifest, ) # --------------------------------------------------------------------------- # Fixture-builder helpers (synthetic tile cache + frames) # --------------------------------------------------------------------------- def _write_synthetic_frame(path: Path, color: tuple[int, int, int] = (40, 40, 40)) -> None: from PIL import Image # noqa: PLC0415 img = Image.new("RGB", (256, 256), color=color) img.save(path, format="JPEG", quality=85, optimize=False, progressive=False, subsampling=2) def _build_synthetic_frames_dir(parent: Path, count: int = 100) -> Path: """Make a fake AD*.jpg directory under ``parent/frames``.""" frames_dir = parent / "frames" frames_dir.mkdir(parents=True, exist_ok=True) for i in range(count): _write_synthetic_frame(frames_dir / f"AD{i + 1:06d}.jpg") return frames_dir def _build_synthetic_tile_cache(parent: Path, n_tiles: int = 16) -> Path: """Make a fake tile-cache tree under ``parent/tile-cache``. The fake cache covers the same Derkachi bbox the real builder uses, but with a smaller grid so the unit test stays fast. Tiles are placed at zoom 18 with deterministic (tx, ty) offsets — the far-away-tile check uses geodesic distance computed from the (tx, ty) so any spread > 350 m at zoom 18 satisfies AC-2. """ cache_dir = parent / "tile-cache" tiles_dir = cache_dir / "tiles" / "18" tiles_dir.mkdir(parents=True, exist_ok=True) rows = [] # Zoom-18 grid spread of ~10 tiles each axis covers ~1.5 km at the # Derkachi latitude — easily > 350 m offset between corners. base_tx = 1 << 17 base_ty = 1 << 17 for i in range(n_tiles): tx = base_tx + (i % 4) * 4 ty = base_ty + (i // 4) * 4 tile_subdir = tiles_dir / str(tx) tile_subdir.mkdir(parents=True, exist_ok=True) _write_synthetic_frame(tile_subdir / f"{ty}.jpg", color=(i * 5, 90, 200 - i * 5)) rows.append( { "zoom_level": 18, "tile_x": tx, "tile_y": ty, "capture_date": "2025-11-01", "source": "stub", "m_per_px": 0.5, "jpeg_path": f"tiles/18/{tx}/{ty}.jpg", "content_hash": "deadbeef", "provenance": f"paired_gmaps:AD{i + 1:06d}" if i < 16 else "STUB", } ) manifest = cache_dir / "manifest.csv" with manifest.open("w", newline="") as fp: writer = csv.DictWriter(fp, fieldnames=list(rows[0].keys()), lineterminator="\n") writer.writeheader() writer.writerows(rows) return cache_dir # --------------------------------------------------------------------------- # AC-1: density-flag determinism # --------------------------------------------------------------------------- @pytest.mark.parametrize( "density, expected_stride", [("light", 100), ("medium", 10), ("heavy", 3)], ) def test_density_ratio_maps_to_correct_stride(density: outlier.Density, expected_stride: int) -> None: # Arrange total = 1000 # Act indices = list(iter_video_frame_indices(total, outlier._DENSITY_RATIO[density])) # Assert assert indices[0] == 0 # Stride should match the documented ratio assert indices[1] - indices[0] == expected_stride expected_count = (total + expected_stride - 1) // expected_stride assert len(indices) == expected_count def test_build_is_seed_deterministic(tmp_path: Path) -> None: """AC-1: same seed → identical manifest + identical replaced bytes.""" # Arrange frames = _build_synthetic_frames_dir(tmp_path, count=80) cache = _build_synthetic_tile_cache(tmp_path, n_tiles=16) plan = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=cache, density="medium", seed=42, ) # Act out_a = tmp_path / "run_a" out_b = tmp_path / "run_b" outlier.build(plan, out_a) outlier.build(plan, out_b) # Assert — manifest bit-identical manifest_a = (out_a / "manifest.csv").read_bytes() manifest_b = (out_b / "manifest.csv").read_bytes() assert manifest_a == manifest_b # Replaced frames bit-identical rows = list(csv.DictReader(io.StringIO((out_a / "manifest.csv").read_text()))) assert rows, "manifest should have at least one replaced frame" for row in rows: name = row["src_jpeg_path"] assert (out_a / "frames" / name).read_bytes() == (out_b / "frames" / name).read_bytes(), ( f"replaced frame {name} differs across runs" ) def test_different_seeds_produce_different_replacements(tmp_path: Path) -> None: """Sanity: different seeds → different replacement-tile picks.""" # Arrange frames = _build_synthetic_frames_dir(tmp_path, count=40) cache = _build_synthetic_tile_cache(tmp_path, n_tiles=16) plan_a = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=cache, density="medium", seed=1 ) plan_b = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=cache, density="medium", seed=2 ) # Act out_a = tmp_path / "seed_a" out_b = tmp_path / "seed_b" outlier.build(plan_a, out_a) outlier.build(plan_b, out_b) # Assert — replacement-tile picks differ rows_a = list(csv.DictReader(io.StringIO((out_a / "manifest.csv").read_text()))) rows_b = list(csv.DictReader(io.StringIO((out_b / "manifest.csv").read_text()))) assert rows_a and rows_b pick_a = [(r["replacement_tile_x"], r["replacement_tile_y"]) for r in rows_a] pick_b = [(r["replacement_tile_x"], r["replacement_tile_y"]) for r in rows_b] assert pick_a != pick_b, "different seeds should produce different replacement picks" # --------------------------------------------------------------------------- # AC-2: every replacement crop is ≥350 m from the original frame # --------------------------------------------------------------------------- def test_every_replacement_exceeds_min_offset(tmp_path: Path) -> None: """AC-2: ≥99 % of crops are > 350 m from original; with synth cache, 100 %.""" # Arrange frames = _build_synthetic_frames_dir(tmp_path, count=60) cache = _build_synthetic_tile_cache(tmp_path, n_tiles=16) plan = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=cache, density="medium", seed=7, min_offset_m=350.0, ) # Act report = outlier.build(plan, tmp_path / "out") # Assert rows = list(csv.DictReader(io.StringIO((tmp_path / "out" / "manifest.csv").read_text()))) assert rows, "should have replaced at least one frame" offsets = [float(r["geodesic_offset_m"]) for r in rows] assert all(o >= 350.0 for o in offsets), f"min offset {min(offsets)} < 350 m" assert report.min_geodesic_offset_m >= 350.0 def test_far_away_indices_filters_by_distance() -> None: """Unit test the helper directly.""" # Arrange from fixtures.injectors._common import TileGtRow rows = [ TileGtRow(18, 0, 0, "", "", 0.5, "", "", "", 50.0, 30.0), TileGtRow(18, 1, 0, "", "", 0.5, "", "", "", 50.001, 30.001), # ~140 m away TileGtRow(18, 2, 0, "", "", 0.5, "", "", "", 50.02, 30.02), # ~2.8 km away ] # Act far = far_away_indices(rows, src_idx=0, min_offset_m=350.0) # Assert assert far == [2] # --------------------------------------------------------------------------- # AC-6: tmpfs scratch isolation + manifest schema # --------------------------------------------------------------------------- def test_build_writes_only_under_out_root(tmp_path: Path) -> None: """AC-6: nothing escapes the requested out_root.""" # Arrange frames = _build_synthetic_frames_dir(tmp_path / "src", count=30) cache = _build_synthetic_tile_cache(tmp_path / "src", n_tiles=16) plan = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=cache, density="heavy" ) out_root = tmp_path / "out" # Act outlier.build(plan, out_root) # Assert — only expected files present, nothing outside out_root expected = { "frames", "manifest.csv", "summary.json", } actual = {p.name for p in out_root.iterdir()} assert actual == expected def test_build_overwrites_existing_out_root(tmp_path: Path) -> None: """Re-running build wipes the previous run cleanly (no stale files).""" # Arrange frames = _build_synthetic_frames_dir(tmp_path / "src", count=20) cache = _build_synthetic_tile_cache(tmp_path / "src", n_tiles=16) plan = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=cache, density="medium" ) out_root = tmp_path / "out" outlier.build(plan, out_root) # Plant a stale file the next build should remove. (out_root / "stale.txt").write_text("stale") # Act outlier.build(plan, out_root) # Assert assert not (out_root / "stale.txt").exists() def test_summary_json_matches_report(tmp_path: Path) -> None: # Arrange frames = _build_synthetic_frames_dir(tmp_path / "src", count=50) cache = _build_synthetic_tile_cache(tmp_path / "src", n_tiles=16) plan = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=cache, density="light", seed=3 ) out_root = tmp_path / "out" # Act report = outlier.build(plan, out_root) payload = json.loads((out_root / "summary.json").read_text()) # Assert assert payload["scenario"] == "outlier-injection-derkachi" assert payload["total_source_frames"] == report.total_source_frames assert payload["replaced_frame_count"] == report.replaced_frame_count assert payload["density"] == "light" # --------------------------------------------------------------------------- # Error handling # --------------------------------------------------------------------------- def test_missing_source_frames_raises(tmp_path: Path) -> None: # Arrange cache = _build_synthetic_tile_cache(tmp_path, n_tiles=16) plan = outlier.OutlierInjectionPlan( source_frames_dir=tmp_path / "does-not-exist", tile_cache_dir=cache, density="medium", ) # Act / Assert with pytest.raises(FileNotFoundError, match="source frames"): outlier.build(plan, tmp_path / "out") def test_missing_tile_manifest_raises(tmp_path: Path) -> None: # Arrange frames = _build_synthetic_frames_dir(tmp_path, count=10) plan = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=tmp_path / "no-cache", density="medium", ) # Act / Assert with pytest.raises(FileNotFoundError, match="tile-cache manifest"): outlier.build(plan, tmp_path / "out") def test_read_tile_manifest_round_trips(tmp_path: Path) -> None: # Arrange cache = _build_synthetic_tile_cache(tmp_path, n_tiles=8) # Act rows = read_tile_manifest(cache / "manifest.csv") # Assert assert len(rows) == 8 assert all(-90 <= r.centre_lat_deg <= 90 for r in rows) assert all(-180 <= r.centre_lon_deg <= 180 for r in rows) def test_derive_rng_is_stable_across_calls() -> None: # Arrange / Act r1 = derive_rng("outlier", 42, "medium").integers(0, 1_000_000_000) r2 = derive_rng("outlier", 42, "medium").integers(0, 1_000_000_000) # Assert assert r1 == r2 def test_derive_rng_differs_across_domains() -> None: # Arrange / Act out = derive_rng("outlier", 42).integers(0, 1_000_000_000) bsp = derive_rng("blackout_spoof", 42).integers(0, 1_000_000_000) # Assert assert out != bsp, "different domains must produce independent streams" def test_haversine_known_distance() -> None: """Sanity-check the haversine helper against a known fixture.""" # Arrange # ~1 deg of latitude ≈ 111 km # Act d = haversine_m(50.0, 30.0, 51.0, 30.0) # Assert assert 111_000 < d < 112_000 def test_iter_video_frame_indices_rejects_bad_ratio() -> None: # Arrange / Act / Assert with pytest.raises(ValueError): list(iter_video_frame_indices(100, 0.0)) with pytest.raises(ValueError): list(iter_video_frame_indices(100, 1.5)) def test_cleanup_tmpfs_removes_scratch(tmp_path: Path) -> None: """AC-6: ``cleanup_tmpfs`` rm-trees the scratch dir; called from fixture teardown.""" # Arrange from fixtures.injectors._common import cleanup_tmpfs scratch = tmp_path / "scratch" (scratch / "deep" / "nested").mkdir(parents=True) (scratch / "deep" / "nested" / "file.txt").write_text("x") # Act cleanup_tmpfs(scratch) # Assert assert not scratch.exists() def test_cleanup_tmpfs_is_silent_for_missing_path(tmp_path: Path) -> None: """``cleanup_tmpfs`` must not raise for a non-existent path (idempotent).""" # Arrange from fixtures.injectors._common import cleanup_tmpfs # Act / Assert cleanup_tmpfs(tmp_path / "never-existed") def test_replacement_density_meets_target(tmp_path: Path) -> None: """Sanity: heavy density replaces ≈ 1/3 of frames.""" # Arrange frames = _build_synthetic_frames_dir(tmp_path / "src", count=300) cache = _build_synthetic_tile_cache(tmp_path / "src", n_tiles=16) plan = outlier.OutlierInjectionPlan( source_frames_dir=frames, tile_cache_dir=cache, density="heavy" ) # Act report = outlier.build(plan, tmp_path / "out") # Assert actual_ratio = report.replaced_frame_count / report.total_source_frames assert 0.30 < actual_ratio < 0.40, f"heavy density gave {actual_ratio} (want ≈ 0.33)"