[AZ-422] Add FT-P-17 + FT-N-06 mid-flight tile blackbox tests

Implement the AC-8.4 and AC-NEW-6 blackbox scenarios for mid-flight
tile generation, dedup, landing-time upload, and freshness gating.

Helpers:
- runner/helpers/mid_flight_tile_evaluator.py — pure-logic evaluators
  for tile generation rate, Mode B Fact #105 schema check, footprint+
  GSD dedup (via geo.distance_m), upload-audit reconciliation, and
  the AC-5/AC-6 capture_utc + freshness-gate checks.
- runner/helpers/mock_suite_sat_audit.py — httpx wrapper for the
  mock-suite-sat-service /tiles/audit endpoint with strict response-
  shape validation.

Scenarios:
- tests/positive/test_ft_p_17_mid_flight_tiles.py
- tests/negative/test_ft_n_06_mid_flight_freshness.py

Both skip when sitl_replay_ready is false and fail loudly when fixture
records are missing (tests-as-gates discipline). 52 new unit tests
(41 evaluator + 11 audit client) cover every helper branch.

Review: PASS_WITH_WARNINGS (2 Low — duplicate haversine carry-over,
upstream production dependency surface).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 15:28:39 +03:00
parent 1ee54b414b
commit 5def1a3eb3
11 changed files with 1782 additions and 2 deletions
@@ -0,0 +1,126 @@
"""FT-N-06 — Mid-flight tile current-timestamp + fresh-treatment (AZ-422 / AC-NEW-6).
The full scenario:
1. Same 5 min Derkachi replay as FT-P-17; the SUT generates one
FDR ``mid-flight-tile-output`` record per tile.
2. Inspect each tile's manifest entry:
* AC-5: ``|capture_utc - generated_at_monotonic_ms| ≤ 60 s``.
* AC-6: no FDR ``tile-load-rejected`` record with
``reason == "stale"`` carries any of the generated tile IDs
(a fresh tile must not be misclassified by the freshness gate).
Gated on:
* ``sitl_replay_ready`` — full replay requires the SITL fixture.
* ``runner.helpers.mid_flight_tile_evaluator`` — pure-logic
evaluator covered by
``e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py``.
This is a "negative" test in the sense that it asserts a *non*-event:
no stale rejection of a freshly generated tile. The test still skips
cleanly when the SITL fixture is not prepared.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import mid_flight_tile_evaluator as mfe
@pytest.mark.traces_to("AC-NEW-6,AC-5,AC-6,AC-7")
def test_ft_n_06_mid_flight_freshness(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
sitl_replay_ready: bool,
) -> None:
"""Full FT-N-06 scenario (AC-NEW-6)."""
if not sitl_replay_ready:
pytest.skip(
"FT-N-06 requires `E2E_SITL_REPLAY_DIR` to point at a SITL replay "
"fixture exposing `mid-flight-tile-output` FDR records and any "
"`tile-load-rejected` events emitted by the freshness gate "
"(AZ-595 + AZ-422 fixture builder). Pure-logic AC-NEW-6 "
"coverage lives in "
"e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py."
)
from runner.helpers import fdr_reader
fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
tiles: list[mfe.TileSpec] = []
rejection_payloads: list[dict] = []
for rec in fdr_reader.iter_records(fdr_root):
if rec.record_type == mfe.MID_FLIGHT_TILE_FDR_KIND:
tile = _project_tile(rec)
if tile is not None:
tiles.append(tile)
elif rec.record_type == mfe.TILE_LOAD_REJECTED_FDR_KIND:
rejection_payloads.append(dict(rec.payload))
if not tiles:
pytest.fail(
f"FT-N-06: no `{mfe.MID_FLIGHT_TILE_FDR_KIND}` FDR records at "
f"{fdr_root}. The fixture builder must produce at least one "
"generated tile for the freshness/stale check to be meaningful."
)
capture_report = mfe.evaluate_capture_date_freshness(tiles)
freshness_report = mfe.evaluate_freshness_gate(tiles, rejection_payloads)
nfr_recorder.record_metric(
"ft_n_06.tile_count", float(len(tiles)), ac_id="AC-NEW-6"
)
nfr_recorder.record_metric(
"ft_n_06.capture_drift_failures",
float(len(capture_report.failing_entries)),
ac_id="AC-5",
)
nfr_recorder.record_metric(
"ft_n_06.stale_rejection_count",
float(len(freshness_report.stale_rejections)),
ac_id="AC-6",
)
assert capture_report.passes, (
f"AC-5 (|capture_utc - generated_at| ≤ {capture_report.tolerance_s} s) failed: "
f"failures={[(e.tile_id, e.drift_s) for e in capture_report.failing_entries]}"
)
assert freshness_report.passes, (
"AC-6 (no `tile-load-rejected: stale` for freshly generated tile) failed: "
f"stale_rejected_tile_ids={freshness_report.stale_rejections}"
)
def _project_tile(rec) -> mfe.TileSpec | None: # type: ignore[no-untyped-def]
"""Project an FDR record onto a ``TileSpec``; ``None`` if malformed."""
p = rec.payload
try:
bbox = tuple(p["bbox_wgs84"]) # type: ignore[index]
except (KeyError, TypeError):
return None
if len(bbox) != 4:
return None
quality = p.get("quality") if isinstance(p.get("quality"), dict) else {}
capture_utc: str | None = None
if isinstance(quality, dict):
raw_capture = quality.get("capture_utc")
if isinstance(raw_capture, str):
capture_utc = raw_capture
return mfe.TileSpec(
tile_id=str(p.get("tile_id") or ""),
bbox_wgs84=(float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])),
zoom_level=int(p.get("zoom_level") or 0),
descriptor_sha256=str(p.get("descriptor_sha256") or ""),
payload_size_bytes=int(p.get("payload_size_bytes") or 0),
quality=dict(quality) if isinstance(quality, dict) else {},
generated_at_monotonic_ms=int(rec.monotonic_ms),
capture_utc_iso=capture_utc,
)