[AZ-408] [AZ-410] [AZ-411] Batch 69: synth injectors + FT-P-02/03/14

AZ-408 (3pt) — Replace AZ-406 injector scaffolds with concrete generators: - outlier.py: deterministic stride + far-away tile replacement; AC-2 ≥350m offset - blackout_spoof.py: paired video blackout + FC GPS spoof with ≤40ms alignment; AC-4 realistic fix_type/hdop; AC-NEW-8 200-500m inter-spoof deltas - multi_segment.py: ≥3 disjoint windows, ≥30s gaps, ≤25% coverage - fc_proxy.py: timed-splice runtime proxy with pre-activate RuntimeError guard - _common.py: derive_rng + tile-manifest reader + tmpfs helpers - injector_fixtures.py: pytest fixtures wired via runner conftest AZ-410 (3pt) — FT-P-02 cumulative drift between satellite anchors: - anchor_pair_detector.py: AC-1 detection, AC-2/3 pass-fraction, AC-4 monotonicity check, CSV evidence - test_ft_p_02_derkachi_drift.py: scenario gated on upstream helper NotImplementedError (frame_source_replay / fdr_reader / imu_replay) AZ-411 (2pt) — FT-P-03 + FT-P-14 schema + WGS84: - estimate_schema.py: AC-1 schema completeness, AC-2 source-label set containment, AC-3 WGS84 range + int32 1e-7 decode - test_ft_p_03_14_schema_wgs84.py: shared single-image-push scenario Tests: 248 unit tests pass (+91 vs batch 68). Reports: batch_69_report.md, batch_69_review.md (PASS), cumulative_review_batches_67-69_cycle1_report.md (PASS). Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-21 21:01:12 +00:00 · 2026-05-16 17:54:00 +03:00
parent ff1b00200c
commit 702a0c0ff3
27 changed files with 4619 additions and 58 deletions
@@ -212,4 +212,5 @@ pytest_plugins = [
    "runner.reporting.csv_reporter",
    "runner.reporting.evidence_bundler",
    "runner.reporting.nfr_recorder",
+    "runner.helpers.injector_fixtures",
 ]
@@ -0,0 +1,287 @@
+"""Anchor-pair detection + drift binning for FT-P-02 (AC-1.3).
+
+Consumes a stream of FDR ``source_label`` transitions + position estimates
+and produces:
+
+* Anchor pairs: every (visual_propagated | dead_reckoned) → satellite_anchored
+  transition is one pair. The pair records the segment's propagated_centre
+  immediately before the new anchor, the anchor centre itself, and the
+  age of the previous satellite anchor at the moment of the new one.
+
+* Drift per pair = geodesic distance (Vincenty / WGS84) between the
+  propagated centre and the new anchor centre.
+
+* Drift bins by ``last_satellite_anchor_age_ms`` (defaults to the
+  spec's {<1 s, 1-3 s, 3-10 s, 10-30 s, >30 s} buckets).
+
+* Aggregate pass/fail per AC-1.3:
+  - AC-2: ≥95 % of visual-only pairs satisfy drift < 100 m.
+  - AC-3: ≥95 % of IMU-fused pairs satisfy drift < 50 m.
+  - AC-4: bin medians grow monotonically with age; no >2× jump.
+
+The classification (visual-only vs IMU-fused) is purely informational —
+the test code reads it out of the segment's FDR records (any frame with
+``imu_fused=True`` since the prior anchor makes the segment IMU-fused).
+
+The helper is **transport-agnostic**: it takes typed FdrEstimate records
+that the per-scenario test produces from the public FDR archive (no SUT
+import). Unit tests construct synthetic streams directly.
+
+Public-boundary discipline: this module does NOT import any
+``src/gps_denied_onboard`` symbol.
+"""
+
+from __future__ import annotations
+
+import statistics
+from dataclasses import dataclass, field
+from typing import Literal, Sequence
+
+from .geo import distance_m
+
+SourceLabel = Literal["satellite_anchored", "visual_propagated", "dead_reckoned"]
+
+
+@dataclass(frozen=True)
+class FdrEstimate:
+    """One position estimate from the FDR archive (post-flight read).
+
+    The fields are the public-boundary contract — we never import the
+    SUT's ``FdrRecord`` dataclass; we materialise a parallel struct
+    from the FDR JSON payload.
+    """
+
+    monotonic_ms: int
+    lat_deg: float
+    lon_deg: float
+    source_label: SourceLabel
+    imu_fused: bool = False
+    cov_semi_major_m: float = 0.0
+    last_satellite_anchor_age_ms: int = 0
+
+
+@dataclass(frozen=True)
+class AnchorPair:
+    """One (propagated_centre, new_anchor) pair."""
+
+    segment_first_ms: int
+    propagated_centre_ms: int  # timestamp of last estimate before anchor
+    anchor_ms: int
+    propagated_lat_deg: float
+    propagated_lon_deg: float
+    anchor_lat_deg: float
+    anchor_lon_deg: float
+    drift_m: float
+    last_satellite_anchor_age_ms: int
+    imu_fused_segment: bool
+
+
+# Default bin edges per the spec: {<1 s, 1-3 s, 3-10 s, 10-30 s, >30 s}
+DEFAULT_AGE_BIN_EDGES_MS: tuple[int, ...] = (1_000, 3_000, 10_000, 30_000)
+
+
+@dataclass
+class DriftBinStats:
+    """Aggregate statistics for one age-bin."""
+
+    label: str
+    count: int = 0
+    median_m: float = 0.0
+    p95_m: float = 0.0
+    drifts_m: list[float] = field(default_factory=list)
+
+
+@dataclass
+class FtP02Report:
+    """Aggregate report produced by the FT-P-02 scenario."""
+
+    pairs: list[AnchorPair]
+    visual_only_pairs: list[AnchorPair]
+    imu_fused_pairs: list[AnchorPair]
+    visual_only_pass_fraction: float
+    imu_fused_pass_fraction: float
+    bin_stats: list[DriftBinStats]
+    monotonic_violations: list[str]
+
+
+def detect_anchor_pairs(stream: Sequence[FdrEstimate]) -> list[AnchorPair]:
+    """Detect every ``visual_propagated|dead_reckoned → satellite_anchored`` transition.
+
+    Within a single segment (sequence of visual_propagated / dead_reckoned
+    estimates), the **propagated_centre** is the estimate immediately
+    preceding the next anchor — that's the SUT's last published centre
+    before the new anchor pulls it back to ground truth.
+
+    The "first anchor" of the stream has no predecessor segment and is
+    skipped (it is not a pair).
+    """
+    pairs: list[AnchorPair] = []
+    last_anchor: FdrEstimate | None = None
+    current_segment: list[FdrEstimate] = []
+    imu_fused_in_segment = False
+
+    for est in stream:
+        if est.source_label == "satellite_anchored":
+            if last_anchor is not None and current_segment:
+                propagated = current_segment[-1]
+                drift = distance_m(
+                    propagated.lat_deg, propagated.lon_deg,
+                    est.lat_deg, est.lon_deg,
+                )
+                pairs.append(
+                    AnchorPair(
+                        segment_first_ms=current_segment[0].monotonic_ms,
+                        propagated_centre_ms=propagated.monotonic_ms,
+                        anchor_ms=est.monotonic_ms,
+                        propagated_lat_deg=propagated.lat_deg,
+                        propagated_lon_deg=propagated.lon_deg,
+                        anchor_lat_deg=est.lat_deg,
+                        anchor_lon_deg=est.lon_deg,
+                        drift_m=drift,
+                        last_satellite_anchor_age_ms=est.monotonic_ms - last_anchor.monotonic_ms,
+                        imu_fused_segment=imu_fused_in_segment,
+                    )
+                )
+            last_anchor = est
+            current_segment = []
+            imu_fused_in_segment = False
+        else:
+            current_segment.append(est)
+            if est.imu_fused:
+                imu_fused_in_segment = True
+
+    return pairs
+
+
+def _bin_label(age_ms: int, edges: tuple[int, ...]) -> str:
+    """Map an age in ms to a human-readable bin label."""
+    if age_ms < edges[0]:
+        return f"<{edges[0] // 1000}s"
+    for i in range(1, len(edges)):
+        if age_ms < edges[i]:
+            return f"{edges[i - 1] // 1000}-{edges[i] // 1000}s"
+    return f">{edges[-1] // 1000}s"
+
+
+def bin_drifts(
+    pairs: Sequence[AnchorPair],
+    edges: tuple[int, ...] = DEFAULT_AGE_BIN_EDGES_MS,
+) -> list[DriftBinStats]:
+    """Bin drifts by ``last_satellite_anchor_age_ms``; return per-bin stats."""
+    bins: dict[str, list[float]] = {}
+    # Pre-create bins in display order so the output is stable.
+    labels = [_bin_label(0, edges)]
+    labels.extend(f"{edges[i] // 1000}-{edges[i + 1] // 1000}s" for i in range(len(edges) - 1))
+    labels.append(f">{edges[-1] // 1000}s")
+    for label in labels:
+        bins[label] = []
+
+    for p in pairs:
+        bins[_bin_label(p.last_satellite_anchor_age_ms, edges)].append(p.drift_m)
+
+    stats: list[DriftBinStats] = []
+    for label in labels:
+        drifts = bins[label]
+        if drifts:
+            sorted_drifts = sorted(drifts)
+            idx95 = max(0, int(round(0.95 * len(sorted_drifts))) - 1)
+            stats.append(
+                DriftBinStats(
+                    label=label,
+                    count=len(drifts),
+                    median_m=statistics.median(drifts),
+                    p95_m=sorted_drifts[idx95],
+                    drifts_m=drifts,
+                )
+            )
+        else:
+            stats.append(DriftBinStats(label=label, count=0, median_m=0.0, p95_m=0.0))
+    return stats
+
+
+def check_monotonic(bin_stats: Sequence[DriftBinStats]) -> list[str]:
+    """AC-4: bin medians grow monotonically with age; no >2× jump between
+    adjacent populated bins. Returns a list of violation strings (empty
+    iff the AC holds).
+    """
+    violations: list[str] = []
+    populated = [s for s in bin_stats if s.count > 0]
+    for prev, nxt in zip(populated, populated[1:]):
+        if nxt.median_m < prev.median_m:
+            violations.append(
+                f"non-monotonic median: bin {prev.label} median {prev.median_m:.2f} m > "
+                f"bin {nxt.label} median {nxt.median_m:.2f} m"
+            )
+        elif prev.median_m > 0 and nxt.median_m > 2 * prev.median_m:
+            violations.append(
+                f">2x median jump: bin {prev.label} median {prev.median_m:.2f} m → "
+                f"bin {nxt.label} median {nxt.median_m:.2f} m"
+            )
+    return violations
+
+
+def compute_pass_fraction(pairs: Sequence[AnchorPair], drift_bound_m: float) -> float:
+    """Fraction of pairs whose drift < ``drift_bound_m``. Returns 0.0 for empty."""
+    if not pairs:
+        return 0.0
+    pass_count = sum(1 for p in pairs if p.drift_m < drift_bound_m)
+    return pass_count / len(pairs)
+
+
+def aggregate(
+    stream: Sequence[FdrEstimate],
+    visual_only_bound_m: float = 100.0,
+    imu_fused_bound_m: float = 50.0,
+    edges: tuple[int, ...] = DEFAULT_AGE_BIN_EDGES_MS,
+) -> FtP02Report:
+    """End-to-end aggregation: stream → pairs → bins → pass fractions → monotonicity."""
+    pairs = detect_anchor_pairs(stream)
+    visual_only = [p for p in pairs if not p.imu_fused_segment]
+    imu_fused = [p for p in pairs if p.imu_fused_segment]
+    bin_stats = bin_drifts(pairs, edges)
+    return FtP02Report(
+        pairs=pairs,
+        visual_only_pairs=visual_only,
+        imu_fused_pairs=imu_fused,
+        visual_only_pass_fraction=compute_pass_fraction(visual_only, visual_only_bound_m),
+        imu_fused_pass_fraction=compute_pass_fraction(imu_fused, imu_fused_bound_m),
+        bin_stats=bin_stats,
+        monotonic_violations=check_monotonic(bin_stats),
+    )
+
+
+def write_csv_evidence(report: FtP02Report, csv_path) -> None:  # type: ignore[no-untyped-def]
+    """Emit one CSV row per anchor pair under ``csv_path`` (FT-P-02 evidence)."""
+    import csv as _csv
+
+    with csv_path.open("w", newline="") as fp:
+        writer = _csv.writer(fp, lineterminator="\n")
+        writer.writerow(
+            [
+                "segment_first_ms",
+                "propagated_centre_ms",
+                "anchor_ms",
+                "propagated_lat_deg",
+                "propagated_lon_deg",
+                "anchor_lat_deg",
+                "anchor_lon_deg",
+                "drift_m",
+                "last_satellite_anchor_age_ms",
+                "imu_fused_segment",
+            ]
+        )
+        for p in report.pairs:
+            writer.writerow(
+                [
+                    p.segment_first_ms,
+                    p.propagated_centre_ms,
+                    p.anchor_ms,
+                    f"{p.propagated_lat_deg:.7f}",
+                    f"{p.propagated_lon_deg:.7f}",
+                    f"{p.anchor_lat_deg:.7f}",
+                    f"{p.anchor_lon_deg:.7f}",
+                    f"{p.drift_m:.3f}",
+                    p.last_satellite_anchor_age_ms,
+                    int(p.imu_fused_segment),
+                ]
+            )
@@ -0,0 +1,188 @@
+"""SUT outbound-estimate schema + WGS84 validation (FT-P-03, FT-P-14).
+
+Two thin contract checks shared by AZ-411's scenario file:
+
+1. **Schema completeness** (AC-1 of FT-P-03):
+   the outbound estimate must carry the four documented fields
+   ``lat:float``, ``lon:float``, ``cov_semi_major_m:float``,
+   ``last_satellite_anchor_age_ms:int`` — either inside the
+   ``GPS_INPUT`` / ``MSP2_SENSOR_GPS`` payload, OR on a paired
+   side-channel (per AC-4.3).
+
+2. **Source-label set containment** (AC-2): the side-channel emission
+   is exactly one of ``{satellite_anchored, visual_propagated,
+   dead_reckoned}`` — anything else is a real defect.
+
+3. **WGS84 range** (AC-3 of FT-P-14): decoded ``lat`` ∈ [-90, 90],
+   ``lon`` ∈ [-180, 180]; scaling matches the protocol convention
+   (AP/iNav `lat/lon` are 1e-7 scaled int32).
+
+The helpers operate on pure Python dict-like records — the scenario
+test pulls them from the SITL observer / tlog reader and hands them in.
+That keeps these helpers unit-testable without any docker harness.
+
+Public-boundary discipline: this module does NOT import any
+``src/gps_denied_onboard`` symbol.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable, Mapping
+
+REQUIRED_FIELDS: tuple[tuple[str, type], ...] = (
+    ("lat", float),
+    ("lon", float),
+    ("cov_semi_major_m", float),
+    ("last_satellite_anchor_age_ms", int),
+)
+
+ALLOWED_SOURCE_LABELS: frozenset[str] = frozenset(
+    {"satellite_anchored", "visual_propagated", "dead_reckoned"}
+)
+
+# Protocol scaling factors — exact integer 1e-7 per MAVLink GPS_INPUT
+# (`int32 lat / lon * 1e-7`) and iNav MSP2_SENSOR_GPS (same scaling).
+LAT_LON_SCALE = 1e-7
+
+
+@dataclass(frozen=True)
+class SchemaValidationResult:
+    """Outcome of a single ``validate_estimate_schema`` call."""
+
+    ok: bool
+    missing_fields: list[str]
+    wrong_typed_fields: list[str]
+
+
+@dataclass(frozen=True)
+class SourceLabelValidationResult:
+    ok: bool
+    observed: str | None
+    reason: str | None  # filled when not ok
+
+
+@dataclass(frozen=True)
+class Wgs84ValidationResult:
+    ok: bool
+    lat_deg: float | None
+    lon_deg: float | None
+    reason: str | None
+
+
+def validate_estimate_schema(record: Mapping[str, object]) -> SchemaValidationResult:
+    """AC-1: all four documented fields present + correctly typed.
+
+    The record may be the merged ``{payload_fields, sidechannel_fields}``
+    dict the test produces from ``GPS_INPUT.x`` + the paired
+    ``STATUSTEXT`` / ``NAMED_VALUE_FLOAT`` channel. The helper is
+    transport-agnostic; it just walks the four ``REQUIRED_FIELDS`` and
+    checks the type.
+    """
+    missing: list[str] = []
+    wrong: list[str] = []
+    for name, expected in REQUIRED_FIELDS:
+        if name not in record:
+            missing.append(name)
+            continue
+        value = record[name]
+        # Accept bool only when bool is the expected type (Python's
+        # ``isinstance(True, int)`` is True; we don't want that to
+        # silently satisfy ``int``).
+        if expected is int and isinstance(value, bool):
+            wrong.append(name)
+            continue
+        if not isinstance(value, expected):
+            wrong.append(name)
+    return SchemaValidationResult(
+        ok=not missing and not wrong,
+        missing_fields=missing,
+        wrong_typed_fields=wrong,
+    )
+
+
+def validate_source_label(label: object) -> SourceLabelValidationResult:
+    """AC-2: label is exactly one of the three documented strings."""
+    if not isinstance(label, str):
+        return SourceLabelValidationResult(
+            ok=False, observed=None, reason=f"label is {type(label).__name__}, expected str"
+        )
+    if label in ALLOWED_SOURCE_LABELS:
+        return SourceLabelValidationResult(ok=True, observed=label, reason=None)
+    return SourceLabelValidationResult(
+        ok=False, observed=label, reason=f"label {label!r} not in {sorted(ALLOWED_SOURCE_LABELS)}"
+    )
+
+
+def validate_wgs84_range(
+    lat_decoded_deg: float, lon_decoded_deg: float
+) -> Wgs84ValidationResult:
+    """AC-3 of FT-P-14: lat ∈ [-90, 90], lon ∈ [-180, 180]."""
+    if not isinstance(lat_decoded_deg, (int, float)) or not isinstance(
+        lon_decoded_deg, (int, float)
+    ):
+        return Wgs84ValidationResult(
+            ok=False, lat_deg=None, lon_deg=None,
+            reason="lat/lon not numeric",
+        )
+    if lat_decoded_deg != lat_decoded_deg or lon_decoded_deg != lon_decoded_deg:
+        return Wgs84ValidationResult(
+            ok=False, lat_deg=lat_decoded_deg, lon_deg=lon_decoded_deg,
+            reason="lat/lon is NaN",
+        )
+    if not -90.0 <= lat_decoded_deg <= 90.0:
+        return Wgs84ValidationResult(
+            ok=False, lat_deg=lat_decoded_deg, lon_deg=lon_decoded_deg,
+            reason=f"lat {lat_decoded_deg} out of [-90, 90]",
+        )
+    if not -180.0 <= lon_decoded_deg <= 180.0:
+        return Wgs84ValidationResult(
+            ok=False, lat_deg=lat_decoded_deg, lon_deg=lon_decoded_deg,
+            reason=f"lon {lon_decoded_deg} out of [-180, 180]",
+        )
+    return Wgs84ValidationResult(
+        ok=True, lat_deg=lat_decoded_deg, lon_deg=lon_decoded_deg, reason=None
+    )
+
+
+def decode_lat_lon_int32(lat_e7: int, lon_e7: int) -> tuple[float, float]:
+    """Decode the AP/iNav 1e-7 int32 wire format to WGS84 degrees.
+
+    Raises ValueError for inputs outside the int32 range — that's a
+    transport corruption, not an out-of-bounds geographic value, and
+    the test should surface it as such.
+    """
+    INT32_MIN = -(2 ** 31)
+    INT32_MAX = (2 ** 31) - 1
+    if not INT32_MIN <= lat_e7 <= INT32_MAX:
+        raise ValueError(f"lat_e7 {lat_e7} outside int32 range")
+    if not INT32_MIN <= lon_e7 <= INT32_MAX:
+        raise ValueError(f"lon_e7 {lon_e7} outside int32 range")
+    return lat_e7 * LAT_LON_SCALE, lon_e7 * LAT_LON_SCALE
+
+
+def aggregate_validations(
+    records: Iterable[Mapping[str, object]],
+) -> tuple[list[SchemaValidationResult], list[Wgs84ValidationResult]]:
+    """Run schema + WGS84 validations over a record stream.
+
+    Used by FT-P-03 / FT-P-14 to assert "every record satisfies both
+    contracts" — typically against a single-image push (1 outbound
+    record) but stream-friendly for soak-test re-use.
+    """
+    schemas: list[SchemaValidationResult] = []
+    wgs84s: list[Wgs84ValidationResult] = []
+    for rec in records:
+        schemas.append(validate_estimate_schema(rec))
+        lat = rec.get("lat")
+        lon = rec.get("lon")
+        if isinstance(lat, (int, float)) and isinstance(lon, (int, float)):
+            wgs84s.append(validate_wgs84_range(float(lat), float(lon)))
+        else:
+            wgs84s.append(
+                Wgs84ValidationResult(
+                    ok=False, lat_deg=None, lon_deg=None,
+                    reason="missing or non-numeric lat/lon for WGS84 check",
+                )
+            )
+    return schemas, wgs84s
@@ -0,0 +1,180 @@
+"""pytest fixtures wrapping the AZ-408 runtime synthetic-injection injectors.
+
+Per-scenario tests (FT-N-01, FT-N-04, FT-P-08, NFT-RES-04, NFT-PERF-04)
+opt into an injector by requesting one of the fixtures below. Each
+fixture:
+
+1. Builds the injector output under the pytest ``tmp_path_factory`` root
+   (so unit-test runs never touch ``/tmp``).
+2. Yields a typed handle the test asserts against (out_root, schedule,
+   summary).
+3. Tears down the scratch directory at fixture exit per AC-6 (≤2 s).
+
+The fixtures are intentionally **session-scoped per parameter set** —
+within one parametrize variant the same injector tree is reused across
+multiple test methods so we don't pay the ~3 s build cost per assertion.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from pathlib import Path
+
+import pytest
+
+from fixtures.injectors import blackout_spoof, multi_segment, outlier
+from fixtures.injectors._common import cleanup_tmpfs
+
+
+# ---------------------------------------------------------------------------
+# Source data discovery
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="session")
+def derkachi_source_frames() -> Path:
+    """Path to the AD*.jpg frames the injectors operate on.
+
+    Looks up the project's ``_docs/00_problem/input_data/`` (the test
+    container mounts this read-only) and asserts the AD-stills exist.
+    """
+    # Walk up from this file: e2e/runner/helpers/injector_fixtures.py
+    repo_root = Path(__file__).resolve().parents[3]
+    candidates = [
+        repo_root / "_docs/00_problem/input_data",
+        Path("/test-data"),  # docker-compose bind-mount target
+    ]
+    for c in candidates:
+        if (c / "AD000001.jpg").is_file():
+            return c
+    raise FileNotFoundError(
+        "Derkachi source frames not found in any of: "
+        + ", ".join(str(c) for c in candidates)
+    )
+
+
+@pytest.fixture(scope="session")
+def tile_cache_fixture(pytestconfig: pytest.Config) -> Path:
+    """Path to the AZ-407 tile-cache fixture tree.
+
+    Two strategies:
+
+    1. ``--tile-cache-fixture=<path>`` CLI flag (added by tests/fixtures
+       that explicitly need to point at a pre-built cache).
+    2. Default Docker mount at ``/tile-cache`` inside the runner image.
+
+    Skips the consuming test when the cache is missing — the injector
+    unit tests use a synthetic mini-cache (see ``test_outlier.py``) and
+    don't need this fixture.
+    """
+    explicit = pytestconfig.getoption("--tile-cache-fixture", default=None)
+    if explicit is not None:
+        p = Path(str(explicit))
+        if p.is_dir():
+            return p
+    default = Path("/tile-cache")
+    if default.is_dir():
+        return default
+    pytest.skip("tile-cache fixture not available (build with `make fixtures`)")
+
+
+# ---------------------------------------------------------------------------
+# Per-injector fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def outlier_injection_derkachi(
+    request: pytest.FixtureRequest,
+    derkachi_source_frames: Path,
+    tile_cache_fixture: Path,
+    tmp_path_factory: pytest.TempPathFactory,
+) -> Iterator[outlier.OutlierInjectionReport]:
+    """Build the outlier-injection-derkachi fixture for a single test.
+
+    Density is read from the parametrize ID (e.g.
+    ``@pytest.mark.parametrize("density", ["medium"], indirect=True)``)
+    or defaults to ``"medium"``. Seed defaults to ``0`` — override via
+    ``request.param["seed"]`` when a test needs a different stream.
+    """
+    params = request.param if hasattr(request, "param") else {}
+    density = params.get("density", "medium")
+    seed = params.get("seed", 0)
+    out_root = tmp_path_factory.mktemp(f"outlier-{density}-{seed}")
+    report = outlier.build(
+        outlier.OutlierInjectionPlan(
+            source_frames_dir=derkachi_source_frames,
+            tile_cache_dir=tile_cache_fixture,
+            density=density,
+            seed=seed,
+        ),
+        out_root,
+    )
+    yield report
+    cleanup_tmpfs(out_root)
+
+
+@pytest.fixture
+def blackout_spoof_derkachi(
+    request: pytest.FixtureRequest,
+    derkachi_source_frames: Path,
+    tmp_path_factory: pytest.TempPathFactory,
+) -> Iterator[blackout_spoof.BlackoutSpoofReport]:
+    """Build the blackout-spoof-derkachi fixture for a single test."""
+    params = request.param if hasattr(request, "param") else {}
+    window_seconds = params.get("window_seconds", 15.0)
+    seed = params.get("seed", 0)
+    out_root = tmp_path_factory.mktemp(f"blackout-spoof-{int(window_seconds)}s-{seed}")
+    report = blackout_spoof.build(
+        blackout_spoof.BlackoutSpoofPlan(
+            source_frames_dir=derkachi_source_frames,
+            blackout_seconds=window_seconds,
+            seed=seed,
+        ),
+        out_root,
+    )
+    yield report
+    cleanup_tmpfs(out_root)
+
+
+@pytest.fixture
+def multi_segment_derkachi(
+    request: pytest.FixtureRequest,
+    derkachi_source_frames: Path,
+    tmp_path_factory: pytest.TempPathFactory,
+) -> Iterator[multi_segment.MultiSegmentReport]:
+    """Build the multi-segment-derkachi fixture for a single test."""
+    params = request.param if hasattr(request, "param") else {}
+    n_segments = params.get("n_segments", 3)
+    segment_seconds = params.get("segment_seconds", 12.0)
+    out_root = tmp_path_factory.mktemp(f"multi-segment-{n_segments}x{int(segment_seconds)}s")
+    report = multi_segment.build(
+        multi_segment.MultiSegmentPlan(
+            source_frames_dir=derkachi_source_frames,
+            n_segments=n_segments,
+            segment_seconds=segment_seconds,
+        ),
+        out_root,
+    )
+    yield report
+    cleanup_tmpfs(out_root)
+
+
+# ---------------------------------------------------------------------------
+# Tile-cache CLI flag registration
+# ---------------------------------------------------------------------------
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    """Register the ``--tile-cache-fixture`` flag at plugin load time.
+
+    Imported by the runner's ``conftest.py`` via ``pytest_plugins`` so it
+    runs once per session before fixture resolution.
+    """
+    group = parser.getgroup("e2e-runner")
+    group.addoption(
+        "--tile-cache-fixture",
+        action="store",
+        default=None,
+        help="Path to a pre-built tile-cache fixture tree. Default: /tile-cache (Docker mount).",
+    )