[AZ-422] Add FT-P-17 + FT-N-06 mid-flight tile blackbox tests

Implement the AC-8.4 and AC-NEW-6 blackbox scenarios for mid-flight tile generation, dedup, landing-time upload, and freshness gating. Helpers: - runner/helpers/mid_flight_tile_evaluator.py — pure-logic evaluators for tile generation rate, Mode B Fact #105 schema check, footprint+ GSD dedup (via geo.distance_m), upload-audit reconciliation, and the AC-5/AC-6 capture_utc + freshness-gate checks. - runner/helpers/mock_suite_sat_audit.py — httpx wrapper for the mock-suite-sat-service /tiles/audit endpoint with strict response- shape validation. Scenarios: - tests/positive/test_ft_p_17_mid_flight_tiles.py - tests/negative/test_ft_n_06_mid_flight_freshness.py Both skip when sitl_replay_ready is false and fail loudly when fixture records are missing (tests-as-gates discipline). 52 new unit tests (41 evaluator + 11 audit client) cover every helper branch. Review: PASS_WITH_WARNINGS (2 Low — duplicate haversine carry-over, upstream production dependency surface). Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-22 05:11:13 +00:00 · 2026-05-17 15:28:39 +03:00
parent 1ee54b414b
commit 5def1a3eb3
11 changed files with 1782 additions and 2 deletions
@@ -0,0 +1,126 @@
+"""FT-N-06 — Mid-flight tile current-timestamp + fresh-treatment (AZ-422 / AC-NEW-6).
+
+The full scenario:
+
+1. Same 5 min Derkachi replay as FT-P-17; the SUT generates one
+   FDR ``mid-flight-tile-output`` record per tile.
+2. Inspect each tile's manifest entry:
+   * AC-5: ``|capture_utc - generated_at_monotonic_ms| ≤ 60 s``.
+   * AC-6: no FDR ``tile-load-rejected`` record with
+     ``reason == "stale"`` carries any of the generated tile IDs
+     (a fresh tile must not be misclassified by the freshness gate).
+
+Gated on:
+
+* ``sitl_replay_ready`` — full replay requires the SITL fixture.
+* ``runner.helpers.mid_flight_tile_evaluator`` — pure-logic
+  evaluator covered by
+  ``e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py``.
+
+This is a "negative" test in the sense that it asserts a *non*-event:
+no stale rejection of a freshly generated tile. The test still skips
+cleanly when the SITL fixture is not prepared.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from runner.helpers import mid_flight_tile_evaluator as mfe
+
+
+@pytest.mark.traces_to("AC-NEW-6,AC-5,AC-6,AC-7")
+def test_ft_n_06_mid_flight_freshness(
+    fc_adapter: str,
+    vio_strategy: str,
+    evidence_dir,  # type: ignore[no-untyped-def]
+    run_id: str,
+    nfr_recorder,  # type: ignore[no-untyped-def]
+    sitl_replay_ready: bool,
+) -> None:
+    """Full FT-N-06 scenario (AC-NEW-6)."""
+    if not sitl_replay_ready:
+        pytest.skip(
+            "FT-N-06 requires `E2E_SITL_REPLAY_DIR` to point at a SITL replay "
+            "fixture exposing `mid-flight-tile-output` FDR records and any "
+            "`tile-load-rejected` events emitted by the freshness gate "
+            "(AZ-595 + AZ-422 fixture builder). Pure-logic AC-NEW-6 "
+            "coverage lives in "
+            "e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py."
+        )
+
+    from runner.helpers import fdr_reader
+
+    fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
+
+    tiles: list[mfe.TileSpec] = []
+    rejection_payloads: list[dict] = []
+    for rec in fdr_reader.iter_records(fdr_root):
+        if rec.record_type == mfe.MID_FLIGHT_TILE_FDR_KIND:
+            tile = _project_tile(rec)
+            if tile is not None:
+                tiles.append(tile)
+        elif rec.record_type == mfe.TILE_LOAD_REJECTED_FDR_KIND:
+            rejection_payloads.append(dict(rec.payload))
+
+    if not tiles:
+        pytest.fail(
+            f"FT-N-06: no `{mfe.MID_FLIGHT_TILE_FDR_KIND}` FDR records at "
+            f"{fdr_root}. The fixture builder must produce at least one "
+            "generated tile for the freshness/stale check to be meaningful."
+        )
+
+    capture_report = mfe.evaluate_capture_date_freshness(tiles)
+    freshness_report = mfe.evaluate_freshness_gate(tiles, rejection_payloads)
+
+    nfr_recorder.record_metric(
+        "ft_n_06.tile_count", float(len(tiles)), ac_id="AC-NEW-6"
+    )
+    nfr_recorder.record_metric(
+        "ft_n_06.capture_drift_failures",
+        float(len(capture_report.failing_entries)),
+        ac_id="AC-5",
+    )
+    nfr_recorder.record_metric(
+        "ft_n_06.stale_rejection_count",
+        float(len(freshness_report.stale_rejections)),
+        ac_id="AC-6",
+    )
+
+    assert capture_report.passes, (
+        f"AC-5 (|capture_utc - generated_at| ≤ {capture_report.tolerance_s} s) failed: "
+        f"failures={[(e.tile_id, e.drift_s) for e in capture_report.failing_entries]}"
+    )
+    assert freshness_report.passes, (
+        "AC-6 (no `tile-load-rejected: stale` for freshly generated tile) failed: "
+        f"stale_rejected_tile_ids={freshness_report.stale_rejections}"
+    )
+
+
+def _project_tile(rec) -> mfe.TileSpec | None:  # type: ignore[no-untyped-def]
+    """Project an FDR record onto a ``TileSpec``; ``None`` if malformed."""
+    p = rec.payload
+    try:
+        bbox = tuple(p["bbox_wgs84"])  # type: ignore[index]
+    except (KeyError, TypeError):
+        return None
+    if len(bbox) != 4:
+        return None
+    quality = p.get("quality") if isinstance(p.get("quality"), dict) else {}
+    capture_utc: str | None = None
+    if isinstance(quality, dict):
+        raw_capture = quality.get("capture_utc")
+        if isinstance(raw_capture, str):
+            capture_utc = raw_capture
+    return mfe.TileSpec(
+        tile_id=str(p.get("tile_id") or ""),
+        bbox_wgs84=(float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])),
+        zoom_level=int(p.get("zoom_level") or 0),
+        descriptor_sha256=str(p.get("descriptor_sha256") or ""),
+        payload_size_bytes=int(p.get("payload_size_bytes") or 0),
+        quality=dict(quality) if isinstance(quality, dict) else {},
+        generated_at_monotonic_ms=int(rec.monotonic_ms),
+        capture_utc_iso=capture_utc,
+    )
@@ -0,0 +1,182 @@
+"""FT-P-17 — Mid-flight tile generation + landing-time upload (AZ-422 / AC-8.4).
+
+The full scenario:
+
+1. The SUT cold-starts against an empty ``mid-flight-tile-output/``
+   FDR directory + the bind-mounted Derkachi fixture.
+2. Replay 5 min of Derkachi at the SUT's runtime cadence. While the
+   SUT generates orthorectified tiles it writes one FDR record per
+   tile under ``mid-flight-tile-output`` carrying every field the
+   mock-suite-sat-service ingest schema requires (Mode B Fact #105).
+3. After replay, the test simulates a landing event (mechanism is
+   public-input — ``simulate_landing()`` MAVLink command, owned by
+   AZ-595 fixture builder); the SUT then uploads every generated
+   tile to ``mock-suite-sat-service``.
+4. The test parses the FDR archive for generated tiles, fetches the
+   mock-service audit log, and asserts:
+   * AC-1: ≥ 1 tile per ~3 s of high-quality nav frames.
+   * AC-2: every tile has all Mode B Fact #105 fields populated.
+   * AC-3: no two tiles share footprint within ±1 m AND GSD within ±5 %.
+   * AC-4: every generated tile_id is in the audit log (HTTP 202).
+   * AC-7: parameterised across ``(fc_adapter, vio_strategy)``.
+
+FT-N-06 (AC-5/AC-6) is a separate file: ``test_ft_n_06_mid_flight_freshness.py``.
+
+Gated on:
+
+* ``sitl_replay_ready`` — full replay requires the SITL fixture.
+* ``runner.helpers.mid_flight_tile_evaluator`` — pure-logic evaluator
+  covered by ``e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py``.
+* ``runner.helpers.mock_suite_sat_audit.fetch_audit`` — HTTP wrapper
+  covered by ``e2e/_unit_tests/helpers/test_mock_suite_sat_audit.py``.
+* ``FT_P_17_HIGH_QUALITY_WINDOW_S_ENV`` — the fixture builder records
+  the total wall-clock seconds of high-quality nav frames produced
+  by the replay (per AC-2.1a normal-segment criterion). Without this
+  env var the scenario can't compute the AC-1 denominator and skips.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+import pytest
+
+from runner.helpers import mid_flight_tile_evaluator as mfe
+from runner.helpers import mock_suite_sat_audit
+
+FT_P_17_HIGH_QUALITY_WINDOW_S_ENV = "FT_P_17_HIGH_QUALITY_WINDOW_S"
+
+
+@pytest.mark.traces_to("AC-8.4,AC-1,AC-2,AC-3,AC-4,AC-7")
+def test_ft_p_17_mid_flight_tiles(
+    fc_adapter: str,
+    vio_strategy: str,
+    evidence_dir,  # type: ignore[no-untyped-def]
+    run_id: str,
+    nfr_recorder,  # type: ignore[no-untyped-def]
+    sitl_replay_ready: bool,
+    mock_suite_sat_url: str,
+) -> None:
+    """Full FT-P-17 scenario (AC-8.4)."""
+    if not sitl_replay_ready:
+        pytest.skip(
+            "FT-P-17 requires `E2E_SITL_REPLAY_DIR` to point at a SITL replay "
+            "fixture exposing `mid-flight-tile-output` FDR records and the "
+            "post-landing audit population on mock-suite-sat-service "
+            "(AZ-595 + AZ-422 fixture builder). Pure-logic AC-8.4 coverage "
+            "lives in e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py."
+        )
+
+    high_quality_window_s_str = os.environ.get(FT_P_17_HIGH_QUALITY_WINDOW_S_ENV)
+    if not high_quality_window_s_str:
+        pytest.skip(
+            f"FT-P-17 needs `{FT_P_17_HIGH_QUALITY_WINDOW_S_ENV}` env var "
+            "(total wall-clock seconds of high-quality nav frames per "
+            "AC-2.1a). The fixture builder records this from the replay's "
+            "segment-quality FDR records."
+        )
+    try:
+        high_quality_window_s = float(high_quality_window_s_str)
+    except ValueError as exc:
+        pytest.fail(
+            f"FT-P-17: `{FT_P_17_HIGH_QUALITY_WINDOW_S_ENV}` must parse as "
+            f"float; got {high_quality_window_s_str!r}: {exc}"
+        )
+
+    from runner.helpers import fdr_reader
+
+    fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
+    tiles = list(_extract_tiles_from_fdr(fdr_reader, fdr_root))
+    if not tiles:
+        pytest.fail(
+            f"FT-P-17: no `{mfe.MID_FLIGHT_TILE_FDR_KIND}` FDR records under "
+            f"{fdr_root}. The SUT must generate at least one tile per AC-1."
+        )
+
+    audit_entries = mock_suite_sat_audit.fetch_audit(mock_suite_sat_url, run_id=run_id)
+
+    rate_report = mfe.evaluate_tile_generation_rate(tiles, high_quality_window_s)
+    quality_report = mfe.evaluate_tile_quality_metadata(tiles)
+    dedup_report = mfe.evaluate_dedup(tiles)
+    upload_report = mfe.evaluate_upload_acks(tiles, audit_entries)
+
+    nfr_recorder.record_metric(
+        "ft_p_17.tile_count", float(rate_report.tile_count), ac_id="AC-1"
+    )
+    nfr_recorder.record_metric(
+        "ft_p_17.observed_rate_per_3s", rate_report.observed_rate_per_3s, ac_id="AC-1"
+    )
+    nfr_recorder.record_metric(
+        "ft_p_17.high_quality_window_s", high_quality_window_s, ac_id="AC-1"
+    )
+    nfr_recorder.record_metric(
+        "ft_p_17.tile_quality_failures",
+        float(len(quality_report.failing_entries)),
+        ac_id="AC-2",
+    )
+    nfr_recorder.record_metric(
+        "ft_p_17.dedup_duplicate_pairs",
+        float(dedup_report.duplicate_count),
+        ac_id="AC-3",
+    )
+    nfr_recorder.record_metric(
+        "ft_p_17.audit_missing_count",
+        float(len(upload_report.missing_from_audit)),
+        ac_id="AC-4",
+    )
+
+    assert rate_report.passes, (
+        f"AC-1 (≥1 tile per {mfe.MIN_TILES_PER_HIGH_QUALITY_WINDOW_S} s) failed: "
+        f"{rate_report.tile_count} tiles over {high_quality_window_s} s "
+        f"high-quality window → rate={rate_report.observed_rate_per_3s:.3f}/3s"
+    )
+    assert quality_report.passes, (
+        "AC-2 (every tile has Mode B Fact #105 quality fields) failed: "
+        f"failures={[(e.tile_id, e.missing_top_level_fields, e.missing_quality_fields) for e in quality_report.failing_entries]}"
+    )
+    assert dedup_report.passes, (
+        "AC-3 (no duplicate footprint+GSD bins) failed: "
+        f"duplicate_pairs={dedup_report.duplicate_pairs}"
+    )
+    assert upload_report.passes, (
+        "AC-4 (landing-event upload accepted) failed: "
+        f"generated={len(upload_report.generated_tile_ids)}, "
+        f"audited={len(upload_report.audit_tile_ids)}, "
+        f"missing={upload_report.missing_from_audit}"
+    )
+
+
+def _extract_tiles_from_fdr(fdr_reader, fdr_root: Path):  # type: ignore[no-untyped-def]
+    """Yield ``TileSpec``s from every ``mid-flight-tile-output`` FDR record.
+
+    Each record's payload mirrors the mock-suite-sat-service TilePublishRequest
+    shape; the scenario only projects it onto a ``TileSpec`` and lets the
+    evaluators do the AC math.
+    """
+    for rec in fdr_reader.iter_records(fdr_root):
+        if rec.record_type != mfe.MID_FLIGHT_TILE_FDR_KIND:
+            continue
+        p = rec.payload
+        try:
+            bbox = tuple(p["bbox_wgs84"])  # type: ignore[index]
+        except (KeyError, TypeError):
+            continue
+        if len(bbox) != 4:
+            continue
+        quality = p.get("quality") if isinstance(p.get("quality"), dict) else {}
+        capture_utc: str | None = None
+        if isinstance(quality, dict):
+            raw_capture = quality.get("capture_utc")
+            if isinstance(raw_capture, str):
+                capture_utc = raw_capture
+        yield mfe.TileSpec(
+            tile_id=str(p.get("tile_id") or ""),
+            bbox_wgs84=(float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])),
+            zoom_level=int(p.get("zoom_level") or 0),
+            descriptor_sha256=str(p.get("descriptor_sha256") or ""),
+            payload_size_bytes=int(p.get("payload_size_bytes") or 0),
+            quality=dict(quality) if isinstance(quality, dict) else {},
+            generated_at_monotonic_ms=int(rec.monotonic_ms),
+            capture_utc_iso=capture_utc,
+        )