[AZ-422] Add FT-P-17 + FT-N-06 mid-flight tile blackbox tests

Implement the AC-8.4 and AC-NEW-6 blackbox scenarios for mid-flight
tile generation, dedup, landing-time upload, and freshness gating.

Helpers:
- runner/helpers/mid_flight_tile_evaluator.py — pure-logic evaluators
  for tile generation rate, Mode B Fact #105 schema check, footprint+
  GSD dedup (via geo.distance_m), upload-audit reconciliation, and
  the AC-5/AC-6 capture_utc + freshness-gate checks.
- runner/helpers/mock_suite_sat_audit.py — httpx wrapper for the
  mock-suite-sat-service /tiles/audit endpoint with strict response-
  shape validation.

Scenarios:
- tests/positive/test_ft_p_17_mid_flight_tiles.py
- tests/negative/test_ft_n_06_mid_flight_freshness.py

Both skip when sitl_replay_ready is false and fail loudly when fixture
records are missing (tests-as-gates discipline). 52 new unit tests
(41 evaluator + 11 audit client) cover every helper branch.

Review: PASS_WITH_WARNINGS (2 Low — duplicate haversine carry-over,
upstream production dependency surface).

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 15:28:39 +03:00
parent 1ee54b414b
commit 5def1a3eb3
11 changed files with 1782 additions and 2 deletions
@@ -0,0 +1,486 @@
"""Unit tests for ``runner.helpers.mid_flight_tile_evaluator`` (AZ-422).
Pure-logic AC-8.4 / AC-NEW-6 coverage for FT-P-17 / FT-N-06.
The scenarios in ``e2e/tests/positive/test_ft_p_17_mid_flight_tiles.py``
and ``e2e/tests/negative/test_ft_n_06_mid_flight_freshness.py`` exercise
the same helpers end-to-end when the SITL fixture is prepared; this
file covers them in isolation.
"""
from __future__ import annotations
import pytest
from runner.helpers import mid_flight_tile_evaluator as mfe
def _full_quality(**overrides: object) -> dict[str, object]:
base: dict[str, object] = {
"capture_utc": "2026-05-17T11:30:00Z",
"source_provider": "operator-supplied",
"resolution_m_per_px": 0.4,
"cloud_coverage_pct": 5.0,
"geo_accuracy_m": 2.0,
}
base.update(overrides)
return base
def _tile(
tile_id: str = "tile_001",
*,
bbox: tuple[float, float, float, float] = (36.20, 49.95, 36.21, 49.96),
zoom: int = 18,
sha: str = "a" * 64,
payload_size: int = 1024,
quality: dict[str, object] | None = None,
generated_at_ms: int = 1_700_000_000_000,
capture_utc: str | None = "2026-05-17T11:30:00Z",
) -> mfe.TileSpec:
return mfe.TileSpec(
tile_id=tile_id,
bbox_wgs84=bbox,
zoom_level=zoom,
descriptor_sha256=sha,
payload_size_bytes=payload_size,
quality=quality if quality is not None else _full_quality(capture_utc=capture_utc or "2026-05-17T11:30:00Z"),
generated_at_monotonic_ms=generated_at_ms,
capture_utc_iso=capture_utc,
)
# ─────────────────────── bbox_centre ───────────────────────
def test_bbox_centre_returns_midpoint() -> None:
# Act
lat, lon = mfe.bbox_centre((36.0, 50.0, 36.2, 50.2))
# Assert
assert lat == pytest.approx(50.1)
assert lon == pytest.approx(36.1)
# ─────────────────────── evaluate_tile_generation_rate ───────────────────────
def test_evaluate_tile_generation_rate_one_per_3s_exact_pass() -> None:
# Arrange — 10 tiles over 30s = 1 tile / 3s
tiles = [_tile(f"t_{i}") for i in range(10)]
# Act
report = mfe.evaluate_tile_generation_rate(tiles, high_quality_window_s=30.0)
# Assert
assert report.passes
assert report.observed_rate_per_3s == pytest.approx(1.0)
def test_evaluate_tile_generation_rate_under_min_fails() -> None:
# Arrange — 1 tile over 30s = 0.1 tile / 3s
tiles = [_tile("t_0")]
# Act
report = mfe.evaluate_tile_generation_rate(tiles, high_quality_window_s=30.0)
# Assert
assert not report.passes
def test_evaluate_tile_generation_rate_zero_window_fails() -> None:
# Act
report = mfe.evaluate_tile_generation_rate([_tile()], high_quality_window_s=0)
# Assert
assert not report.passes
def test_evaluate_tile_generation_rate_invalid_window_per_tile_raises() -> None:
with pytest.raises(ValueError, match="window_s_per_tile"):
mfe.evaluate_tile_generation_rate([_tile()], 30.0, window_s_per_tile=0)
def test_evaluate_tile_generation_rate_empty_tiles_fails() -> None:
# Act
report = mfe.evaluate_tile_generation_rate([], high_quality_window_s=30.0)
# Assert
assert not report.passes
# ─────────────────────── evaluate_tile_quality_metadata ───────────────────────
def test_evaluate_tile_quality_metadata_all_fields_present_passes() -> None:
# Act
report = mfe.evaluate_tile_quality_metadata([_tile()])
# Assert
assert report.passes
def test_evaluate_tile_quality_metadata_missing_quality_field_fails() -> None:
# Arrange
q = _full_quality()
del q["resolution_m_per_px"]
# Act
report = mfe.evaluate_tile_quality_metadata([_tile(quality=q)])
# Assert
assert not report.passes
assert report.failing_entries[0].missing_quality_fields == ("resolution_m_per_px",)
def test_evaluate_tile_quality_metadata_partial_quality_field_drop_fails() -> None:
# Arrange — drop one of the AC-2 Mode B Fact #105 quality fields
q = _full_quality()
del q["cloud_coverage_pct"]
# Act
report = mfe.evaluate_tile_quality_metadata([_tile(quality=q)])
# Assert
assert not report.passes
assert "cloud_coverage_pct" in report.failing_entries[0].missing_quality_fields
def test_evaluate_tile_quality_metadata_quality_not_dict_fails() -> None:
# Arrange
tile = mfe.TileSpec(
tile_id="bad",
bbox_wgs84=(0, 0, 1, 1),
zoom_level=18,
descriptor_sha256="a" * 64,
payload_size_bytes=1,
quality={}, # ensure the dataclass holds a dict; we mutate via object.__setattr__ below
generated_at_monotonic_ms=0,
)
object.__setattr__(tile, "quality", None)
# Act
report = mfe.evaluate_tile_quality_metadata([tile])
# Assert
assert not report.passes
assert set(report.failing_entries[0].missing_quality_fields) == set(mfe.TILE_REQUIRED_QUALITY_FIELDS)
def test_evaluate_tile_quality_metadata_empty_list_fails() -> None:
# Act
report = mfe.evaluate_tile_quality_metadata([])
# Assert
assert not report.passes
def test_evaluate_tile_quality_metadata_null_quality_field_value_fails() -> None:
# Arrange
q = _full_quality(cloud_coverage_pct=None)
# Act
report = mfe.evaluate_tile_quality_metadata([_tile(quality=q)])
# Assert
assert not report.passes
# ─────────────────────── evaluate_dedup ───────────────────────
def test_evaluate_dedup_two_tiles_same_centre_same_gsd_dupes() -> None:
# Arrange — same bbox + identical GSD
bbox = (36.20, 49.95, 36.21, 49.96)
tiles = [
_tile("a", bbox=bbox, quality=_full_quality(resolution_m_per_px=0.5)),
_tile("b", bbox=bbox, quality=_full_quality(resolution_m_per_px=0.5)),
]
# Act
report = mfe.evaluate_dedup(tiles)
# Assert
assert not report.passes
assert report.duplicate_pairs == (("a", "b"),)
def test_evaluate_dedup_far_apart_bboxes_pass() -> None:
# Arrange — bboxes 1 km apart
tiles = [
_tile("a", bbox=(36.20, 49.95, 36.21, 49.96)),
_tile("b", bbox=(36.30, 49.95, 36.31, 49.96)),
]
# Act
report = mfe.evaluate_dedup(tiles)
# Assert
assert report.passes
def test_evaluate_dedup_close_centres_different_gsd_pass() -> None:
# Arrange — same bbox but very different GSD (0.5 vs 1.0 = 50% delta > 5%)
bbox = (36.20, 49.95, 36.21, 49.96)
tiles = [
_tile("a", bbox=bbox, quality=_full_quality(resolution_m_per_px=0.5)),
_tile("b", bbox=bbox, quality=_full_quality(resolution_m_per_px=1.0)),
]
# Act
report = mfe.evaluate_dedup(tiles)
# Assert
assert report.passes
def test_evaluate_dedup_close_centres_close_gsd_dupes() -> None:
# Arrange — same bbox + GSD 0.50 vs 0.51 = 2% delta ≤ 5%
bbox = (36.20, 49.95, 36.21, 49.96)
tiles = [
_tile("a", bbox=bbox, quality=_full_quality(resolution_m_per_px=0.50)),
_tile("b", bbox=bbox, quality=_full_quality(resolution_m_per_px=0.51)),
]
# Act
report = mfe.evaluate_dedup(tiles)
# Assert
assert not report.passes
def test_evaluate_dedup_missing_gsd_skips_pair() -> None:
# Arrange — one tile missing resolution_m_per_px → cannot be a duplicate
bbox = (36.20, 49.95, 36.21, 49.96)
q_no_gsd = _full_quality()
del q_no_gsd["resolution_m_per_px"]
tiles = [
_tile("a", bbox=bbox, quality=q_no_gsd),
_tile("b", bbox=bbox),
]
# Act
report = mfe.evaluate_dedup(tiles)
# Assert
assert report.passes
def test_evaluate_dedup_empty_list_passes() -> None:
# Act
report = mfe.evaluate_dedup([])
# Assert
assert report.passes
def test_evaluate_dedup_invalid_tolerances_raise() -> None:
with pytest.raises(ValueError, match="footprint_tolerance_m"):
mfe.evaluate_dedup([_tile()], footprint_tolerance_m=-1)
with pytest.raises(ValueError, match="gsd_tolerance_fraction"):
mfe.evaluate_dedup([_tile()], gsd_tolerance_fraction=-1)
def test_evaluate_dedup_three_tiles_two_pairs() -> None:
# Arrange — a, b are dupes; c is far away
bbox_close = (36.20, 49.95, 36.21, 49.96)
bbox_far = (36.40, 49.95, 36.41, 49.96)
tiles = [
_tile("a", bbox=bbox_close),
_tile("b", bbox=bbox_close),
_tile("c", bbox=bbox_far),
]
# Act
report = mfe.evaluate_dedup(tiles)
# Assert
assert report.duplicate_pairs == (("a", "b"),)
# ─────────────────────── evaluate_upload_acks ───────────────────────
def test_evaluate_upload_acks_all_acked_passes() -> None:
# Arrange
tiles = [_tile("t_a"), _tile("t_b")]
audit = [{"tile_id": "t_a"}, {"tile_id": "t_b"}]
# Act
report = mfe.evaluate_upload_acks(tiles, audit)
# Assert
assert report.passes
assert report.missing_from_audit == ()
def test_evaluate_upload_acks_missing_tile_fails() -> None:
# Arrange
tiles = [_tile("t_a"), _tile("t_b")]
audit = [{"tile_id": "t_a"}]
# Act
report = mfe.evaluate_upload_acks(tiles, audit)
# Assert
assert not report.passes
assert report.missing_from_audit == ("t_b",)
def test_evaluate_upload_acks_audit_extra_tiles_ok() -> None:
# Arrange — audit may contain stale entries from earlier runs
tiles = [_tile("t_a")]
audit = [{"tile_id": "t_a"}, {"tile_id": "old_run_tile"}]
# Act
report = mfe.evaluate_upload_acks(tiles, audit)
# Assert
assert report.passes
def test_evaluate_upload_acks_empty_generated_fails() -> None:
# Act
report = mfe.evaluate_upload_acks([], [{"tile_id": "x"}])
# Assert
assert not report.passes
def test_evaluate_upload_acks_audit_entry_missing_tile_id_skipped() -> None:
# Arrange
tiles = [_tile("t_a")]
audit = [{"not_tile_id": "garbage"}, {"tile_id": "t_a"}]
# Act
report = mfe.evaluate_upload_acks(tiles, audit)
# Assert
assert report.passes
def test_evaluate_upload_acks_non_dict_audit_entries_skipped() -> None:
# Arrange
tiles = [_tile("t_a")]
audit = ["not a dict", {"tile_id": "t_a"}] # type: ignore[list-item]
# Act
report = mfe.evaluate_upload_acks(tiles, audit)
# Assert
assert report.passes
# ─────────────────────── evaluate_capture_date_freshness ───────────────────────
def test_evaluate_capture_date_freshness_zero_drift_passes() -> None:
# Arrange — generated_at == 1_700_000_000_000 ms == 1_700_000_000 s == 2023-11-14T22:13:20Z
capture = "2023-11-14T22:13:20Z"
tile = _tile(
capture_utc=capture, generated_at_ms=1_700_000_000_000
)
# Act
report = mfe.evaluate_capture_date_freshness([tile])
# Assert
assert report.passes
def test_evaluate_capture_date_freshness_within_tolerance_passes() -> None:
# Arrange — capture 30s before generation
tile = _tile(
capture_utc="2023-11-14T22:12:50Z", generated_at_ms=1_700_000_000_000
)
# Act
report = mfe.evaluate_capture_date_freshness([tile])
# Assert
assert report.passes
def test_evaluate_capture_date_freshness_over_tolerance_fails() -> None:
# Arrange — capture 120s before generation
tile = _tile(
capture_utc="2023-11-14T22:11:20Z", generated_at_ms=1_700_000_000_000
)
# Act
report = mfe.evaluate_capture_date_freshness([tile])
# Assert
assert not report.passes
def test_evaluate_capture_date_freshness_unparseable_capture_fails() -> None:
# Arrange
tile = _tile(capture_utc="not-a-timestamp")
# Act
report = mfe.evaluate_capture_date_freshness([tile])
# Assert
assert not report.passes
assert report.entries[0].drift_s is None
def test_evaluate_capture_date_freshness_missing_capture_falls_back_to_quality_dict() -> None:
# Arrange — capture_utc_iso None but quality dict carries the field
tile = _tile(capture_utc=None)
# Act
report = mfe.evaluate_capture_date_freshness([tile])
# Assert
# The quality dict's "capture_utc" is 2026-05-17T11:30:00Z; generated_at is 2023-11-14
# so drift is huge — should fail
assert not report.passes
assert report.entries[0].drift_s is not None
def test_evaluate_capture_date_freshness_custom_tolerance() -> None:
# Arrange — capture 120s before; widen tolerance to 200s
tile = _tile(
capture_utc="2023-11-14T22:11:20Z", generated_at_ms=1_700_000_000_000
)
# Act
report = mfe.evaluate_capture_date_freshness([tile], tolerance_s=200.0)
# Assert
assert report.passes
def test_evaluate_capture_date_freshness_invalid_tolerance_raises() -> None:
with pytest.raises(ValueError, match="tolerance_s"):
mfe.evaluate_capture_date_freshness([_tile()], tolerance_s=0)
def test_evaluate_capture_date_freshness_empty_list_fails() -> None:
# Act
report = mfe.evaluate_capture_date_freshness([])
# Assert
assert not report.passes
# ─────────────────────── evaluate_freshness_gate ───────────────────────
def test_evaluate_freshness_gate_no_rejections_passes() -> None:
# Arrange
tiles = [_tile("t_a"), _tile("t_b")]
# Act
report = mfe.evaluate_freshness_gate(tiles, [])
# Assert
assert report.passes
def test_evaluate_freshness_gate_unrelated_rejection_passes() -> None:
# Arrange — rejection for some other tile
tiles = [_tile("t_a")]
rejections = [{"id": "old_tile", "reason": "stale"}]
# Act
report = mfe.evaluate_freshness_gate(tiles, rejections)
# Assert
assert report.passes
def test_evaluate_freshness_gate_fresh_tile_rejected_stale_fails() -> None:
# Arrange
tiles = [_tile("t_a")]
rejections = [{"id": "t_a", "reason": "stale"}]
# Act
report = mfe.evaluate_freshness_gate(tiles, rejections)
# Assert
assert not report.passes
assert report.stale_rejections == ("t_a",)
def test_evaluate_freshness_gate_non_stale_reason_ignored() -> None:
# Arrange
tiles = [_tile("t_a")]
rejections = [{"id": "t_a", "reason": "below_floor"}]
# Act
report = mfe.evaluate_freshness_gate(tiles, rejections)
# Assert
assert report.passes
def test_evaluate_freshness_gate_tile_id_key_variant() -> None:
# Arrange — some rejection records use "tile_id" instead of "id"
tiles = [_tile("t_a")]
rejections = [{"tile_id": "t_a", "reason": "stale"}]
# Act
report = mfe.evaluate_freshness_gate(tiles, rejections)
# Assert
assert not report.passes
def test_evaluate_freshness_gate_non_dict_payload_skipped() -> None:
# Arrange
tiles = [_tile("t_a")]
rejections = ["not a dict", {"id": "t_a", "reason": "stale"}] # type: ignore[list-item]
# Act
report = mfe.evaluate_freshness_gate(tiles, rejections)
# Assert
assert not report.passes
assert report.stale_rejections == ("t_a",)
def test_evaluate_freshness_gate_custom_stale_reason() -> None:
# Arrange
tiles = [_tile("t_a")]
rejections = [{"id": "t_a", "reason": "expired_freshness"}]
# Act
report = mfe.evaluate_freshness_gate(tiles, rejections, stale_reason="expired_freshness")
# Assert
assert not report.passes
@@ -0,0 +1,185 @@
"""Unit tests for ``runner.helpers.mock_suite_sat_audit`` (AZ-422)."""
from __future__ import annotations
from typing import Any
import httpx
import pytest
from runner.helpers import mock_suite_sat_audit
def _transport(handler) -> httpx.MockTransport: # type: ignore[no-untyped-def]
return httpx.MockTransport(handler)
# ─────────────────────── happy path ───────────────────────
def test_fetch_audit_returns_entries_list() -> None:
# Arrange
captured: dict[str, Any] = {}
def handler(request: httpx.Request) -> httpx.Response:
captured["url"] = str(request.url)
return httpx.Response(
200,
json={
"run_id": "run_xyz",
"entries": [
{"tile_id": "t_a", "received_at": 1.0},
{"tile_id": "t_b", "received_at": 2.0},
],
},
)
# Act
entries = mock_suite_sat_audit.fetch_audit(
"http://mock-suite-sat-service:8080",
run_id="run_xyz",
transport=_transport(handler),
)
# Assert
assert entries == [
{"tile_id": "t_a", "received_at": 1.0},
{"tile_id": "t_b", "received_at": 2.0},
]
assert "run_id=run_xyz" in captured["url"]
assert "/tiles/audit" in captured["url"]
def test_fetch_audit_empty_entries_list_returned_verbatim() -> None:
# Arrange
def handler(_: httpx.Request) -> httpx.Response:
return httpx.Response(200, json={"run_id": "run_xyz", "entries": []})
# Act
entries = mock_suite_sat_audit.fetch_audit(
"http://service",
run_id="run_xyz",
transport=_transport(handler),
)
# Assert
assert entries == []
def test_fetch_audit_strips_trailing_slash_in_base_url() -> None:
# Arrange
captured: dict[str, Any] = {}
def handler(request: httpx.Request) -> httpx.Response:
captured["url"] = str(request.url)
return httpx.Response(200, json={"run_id": "run_xyz", "entries": []})
# Act
mock_suite_sat_audit.fetch_audit(
"http://service/",
run_id="run_xyz",
transport=_transport(handler),
)
# Assert
assert "//tiles/audit" not in captured["url"]
assert "/tiles/audit?" in captured["url"]
def test_fetch_audit_custom_audit_path() -> None:
# Arrange
captured: dict[str, Any] = {}
def handler(request: httpx.Request) -> httpx.Response:
captured["url"] = str(request.url)
return httpx.Response(200, json={"run_id": "run_xyz", "entries": []})
# Act
mock_suite_sat_audit.fetch_audit(
"http://service",
run_id="run_xyz",
audit_path="/mock/audit",
transport=_transport(handler),
)
# Assert
assert "/mock/audit?" in captured["url"]
# ─────────────────────── error paths ───────────────────────
def test_fetch_audit_empty_base_url_raises() -> None:
with pytest.raises(RuntimeError, match="base_url"):
mock_suite_sat_audit.fetch_audit("", run_id="run_xyz")
def test_fetch_audit_empty_run_id_raises() -> None:
with pytest.raises(RuntimeError, match="run_id"):
mock_suite_sat_audit.fetch_audit("http://service", run_id="")
def test_fetch_audit_non_2xx_raises() -> None:
# Arrange
def handler(_: httpx.Request) -> httpx.Response:
return httpx.Response(500, text="boom")
# Act / Assert
with pytest.raises(RuntimeError, match="HTTP 500"):
mock_suite_sat_audit.fetch_audit(
"http://service",
run_id="run_xyz",
transport=_transport(handler),
)
def test_fetch_audit_non_json_body_raises() -> None:
# Arrange
def handler(_: httpx.Request) -> httpx.Response:
return httpx.Response(200, text="<<<not json>>>")
# Act / Assert
with pytest.raises(RuntimeError, match="not valid JSON"):
mock_suite_sat_audit.fetch_audit(
"http://service",
run_id="run_xyz",
transport=_transport(handler),
)
def test_fetch_audit_body_not_object_raises() -> None:
# Arrange
def handler(_: httpx.Request) -> httpx.Response:
return httpx.Response(200, json=["not", "an", "object"])
# Act / Assert
with pytest.raises(RuntimeError, match="not a JSON object"):
mock_suite_sat_audit.fetch_audit(
"http://service",
run_id="run_xyz",
transport=_transport(handler),
)
def test_fetch_audit_missing_entries_raises() -> None:
# Arrange
def handler(_: httpx.Request) -> httpx.Response:
return httpx.Response(200, json={"run_id": "run_xyz"})
# Act / Assert
with pytest.raises(RuntimeError, match="entries"):
mock_suite_sat_audit.fetch_audit(
"http://service",
run_id="run_xyz",
transport=_transport(handler),
)
def test_fetch_audit_entries_not_list_raises() -> None:
# Arrange
def handler(_: httpx.Request) -> httpx.Response:
return httpx.Response(200, json={"run_id": "run_xyz", "entries": "stringly"})
# Act / Assert
with pytest.raises(RuntimeError, match="entries"):
mock_suite_sat_audit.fetch_audit(
"http://service",
run_id="run_xyz",
transport=_transport(handler),
)