mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 05:11:13 +00:00
[AZ-422] Add FT-P-17 + FT-N-06 mid-flight tile blackbox tests
Implement the AC-8.4 and AC-NEW-6 blackbox scenarios for mid-flight tile generation, dedup, landing-time upload, and freshness gating. Helpers: - runner/helpers/mid_flight_tile_evaluator.py — pure-logic evaluators for tile generation rate, Mode B Fact #105 schema check, footprint+ GSD dedup (via geo.distance_m), upload-audit reconciliation, and the AC-5/AC-6 capture_utc + freshness-gate checks. - runner/helpers/mock_suite_sat_audit.py — httpx wrapper for the mock-suite-sat-service /tiles/audit endpoint with strict response- shape validation. Scenarios: - tests/positive/test_ft_p_17_mid_flight_tiles.py - tests/negative/test_ft_n_06_mid_flight_freshness.py Both skip when sitl_replay_ready is false and fail loudly when fixture records are missing (tests-as-gates discipline). 52 new unit tests (41 evaluator + 11 audit client) cover every helper branch. Review: PASS_WITH_WARNINGS (2 Low — duplicate haversine carry-over, upstream production dependency surface). Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,126 @@
|
||||
"""FT-N-06 — Mid-flight tile current-timestamp + fresh-treatment (AZ-422 / AC-NEW-6).
|
||||
|
||||
The full scenario:
|
||||
|
||||
1. Same 5 min Derkachi replay as FT-P-17; the SUT generates one
|
||||
FDR ``mid-flight-tile-output`` record per tile.
|
||||
2. Inspect each tile's manifest entry:
|
||||
* AC-5: ``|capture_utc - generated_at_monotonic_ms| ≤ 60 s``.
|
||||
* AC-6: no FDR ``tile-load-rejected`` record with
|
||||
``reason == "stale"`` carries any of the generated tile IDs
|
||||
(a fresh tile must not be misclassified by the freshness gate).
|
||||
|
||||
Gated on:
|
||||
|
||||
* ``sitl_replay_ready`` — full replay requires the SITL fixture.
|
||||
* ``runner.helpers.mid_flight_tile_evaluator`` — pure-logic
|
||||
evaluator covered by
|
||||
``e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py``.
|
||||
|
||||
This is a "negative" test in the sense that it asserts a *non*-event:
|
||||
no stale rejection of a freshly generated tile. The test still skips
|
||||
cleanly when the SITL fixture is not prepared.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import mid_flight_tile_evaluator as mfe
|
||||
|
||||
|
||||
@pytest.mark.traces_to("AC-NEW-6,AC-5,AC-6,AC-7")
|
||||
def test_ft_n_06_mid_flight_freshness(
|
||||
fc_adapter: str,
|
||||
vio_strategy: str,
|
||||
evidence_dir, # type: ignore[no-untyped-def]
|
||||
run_id: str,
|
||||
nfr_recorder, # type: ignore[no-untyped-def]
|
||||
sitl_replay_ready: bool,
|
||||
) -> None:
|
||||
"""Full FT-N-06 scenario (AC-NEW-6)."""
|
||||
if not sitl_replay_ready:
|
||||
pytest.skip(
|
||||
"FT-N-06 requires `E2E_SITL_REPLAY_DIR` to point at a SITL replay "
|
||||
"fixture exposing `mid-flight-tile-output` FDR records and any "
|
||||
"`tile-load-rejected` events emitted by the freshness gate "
|
||||
"(AZ-595 + AZ-422 fixture builder). Pure-logic AC-NEW-6 "
|
||||
"coverage lives in "
|
||||
"e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py."
|
||||
)
|
||||
|
||||
from runner.helpers import fdr_reader
|
||||
|
||||
fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
|
||||
|
||||
tiles: list[mfe.TileSpec] = []
|
||||
rejection_payloads: list[dict] = []
|
||||
for rec in fdr_reader.iter_records(fdr_root):
|
||||
if rec.record_type == mfe.MID_FLIGHT_TILE_FDR_KIND:
|
||||
tile = _project_tile(rec)
|
||||
if tile is not None:
|
||||
tiles.append(tile)
|
||||
elif rec.record_type == mfe.TILE_LOAD_REJECTED_FDR_KIND:
|
||||
rejection_payloads.append(dict(rec.payload))
|
||||
|
||||
if not tiles:
|
||||
pytest.fail(
|
||||
f"FT-N-06: no `{mfe.MID_FLIGHT_TILE_FDR_KIND}` FDR records at "
|
||||
f"{fdr_root}. The fixture builder must produce at least one "
|
||||
"generated tile for the freshness/stale check to be meaningful."
|
||||
)
|
||||
|
||||
capture_report = mfe.evaluate_capture_date_freshness(tiles)
|
||||
freshness_report = mfe.evaluate_freshness_gate(tiles, rejection_payloads)
|
||||
|
||||
nfr_recorder.record_metric(
|
||||
"ft_n_06.tile_count", float(len(tiles)), ac_id="AC-NEW-6"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_n_06.capture_drift_failures",
|
||||
float(len(capture_report.failing_entries)),
|
||||
ac_id="AC-5",
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_n_06.stale_rejection_count",
|
||||
float(len(freshness_report.stale_rejections)),
|
||||
ac_id="AC-6",
|
||||
)
|
||||
|
||||
assert capture_report.passes, (
|
||||
f"AC-5 (|capture_utc - generated_at| ≤ {capture_report.tolerance_s} s) failed: "
|
||||
f"failures={[(e.tile_id, e.drift_s) for e in capture_report.failing_entries]}"
|
||||
)
|
||||
assert freshness_report.passes, (
|
||||
"AC-6 (no `tile-load-rejected: stale` for freshly generated tile) failed: "
|
||||
f"stale_rejected_tile_ids={freshness_report.stale_rejections}"
|
||||
)
|
||||
|
||||
|
||||
def _project_tile(rec) -> mfe.TileSpec | None: # type: ignore[no-untyped-def]
|
||||
"""Project an FDR record onto a ``TileSpec``; ``None`` if malformed."""
|
||||
p = rec.payload
|
||||
try:
|
||||
bbox = tuple(p["bbox_wgs84"]) # type: ignore[index]
|
||||
except (KeyError, TypeError):
|
||||
return None
|
||||
if len(bbox) != 4:
|
||||
return None
|
||||
quality = p.get("quality") if isinstance(p.get("quality"), dict) else {}
|
||||
capture_utc: str | None = None
|
||||
if isinstance(quality, dict):
|
||||
raw_capture = quality.get("capture_utc")
|
||||
if isinstance(raw_capture, str):
|
||||
capture_utc = raw_capture
|
||||
return mfe.TileSpec(
|
||||
tile_id=str(p.get("tile_id") or ""),
|
||||
bbox_wgs84=(float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])),
|
||||
zoom_level=int(p.get("zoom_level") or 0),
|
||||
descriptor_sha256=str(p.get("descriptor_sha256") or ""),
|
||||
payload_size_bytes=int(p.get("payload_size_bytes") or 0),
|
||||
quality=dict(quality) if isinstance(quality, dict) else {},
|
||||
generated_at_monotonic_ms=int(rec.monotonic_ms),
|
||||
capture_utc_iso=capture_utc,
|
||||
)
|
||||
@@ -0,0 +1,182 @@
|
||||
"""FT-P-17 — Mid-flight tile generation + landing-time upload (AZ-422 / AC-8.4).
|
||||
|
||||
The full scenario:
|
||||
|
||||
1. The SUT cold-starts against an empty ``mid-flight-tile-output/``
|
||||
FDR directory + the bind-mounted Derkachi fixture.
|
||||
2. Replay 5 min of Derkachi at the SUT's runtime cadence. While the
|
||||
SUT generates orthorectified tiles it writes one FDR record per
|
||||
tile under ``mid-flight-tile-output`` carrying every field the
|
||||
mock-suite-sat-service ingest schema requires (Mode B Fact #105).
|
||||
3. After replay, the test simulates a landing event (mechanism is
|
||||
public-input — ``simulate_landing()`` MAVLink command, owned by
|
||||
AZ-595 fixture builder); the SUT then uploads every generated
|
||||
tile to ``mock-suite-sat-service``.
|
||||
4. The test parses the FDR archive for generated tiles, fetches the
|
||||
mock-service audit log, and asserts:
|
||||
* AC-1: ≥ 1 tile per ~3 s of high-quality nav frames.
|
||||
* AC-2: every tile has all Mode B Fact #105 fields populated.
|
||||
* AC-3: no two tiles share footprint within ±1 m AND GSD within ±5 %.
|
||||
* AC-4: every generated tile_id is in the audit log (HTTP 202).
|
||||
* AC-7: parameterised across ``(fc_adapter, vio_strategy)``.
|
||||
|
||||
FT-N-06 (AC-5/AC-6) is a separate file: ``test_ft_n_06_mid_flight_freshness.py``.
|
||||
|
||||
Gated on:
|
||||
|
||||
* ``sitl_replay_ready`` — full replay requires the SITL fixture.
|
||||
* ``runner.helpers.mid_flight_tile_evaluator`` — pure-logic evaluator
|
||||
covered by ``e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py``.
|
||||
* ``runner.helpers.mock_suite_sat_audit.fetch_audit`` — HTTP wrapper
|
||||
covered by ``e2e/_unit_tests/helpers/test_mock_suite_sat_audit.py``.
|
||||
* ``FT_P_17_HIGH_QUALITY_WINDOW_S_ENV`` — the fixture builder records
|
||||
the total wall-clock seconds of high-quality nav frames produced
|
||||
by the replay (per AC-2.1a normal-segment criterion). Without this
|
||||
env var the scenario can't compute the AC-1 denominator and skips.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from runner.helpers import mid_flight_tile_evaluator as mfe
|
||||
from runner.helpers import mock_suite_sat_audit
|
||||
|
||||
FT_P_17_HIGH_QUALITY_WINDOW_S_ENV = "FT_P_17_HIGH_QUALITY_WINDOW_S"
|
||||
|
||||
|
||||
@pytest.mark.traces_to("AC-8.4,AC-1,AC-2,AC-3,AC-4,AC-7")
|
||||
def test_ft_p_17_mid_flight_tiles(
|
||||
fc_adapter: str,
|
||||
vio_strategy: str,
|
||||
evidence_dir, # type: ignore[no-untyped-def]
|
||||
run_id: str,
|
||||
nfr_recorder, # type: ignore[no-untyped-def]
|
||||
sitl_replay_ready: bool,
|
||||
mock_suite_sat_url: str,
|
||||
) -> None:
|
||||
"""Full FT-P-17 scenario (AC-8.4)."""
|
||||
if not sitl_replay_ready:
|
||||
pytest.skip(
|
||||
"FT-P-17 requires `E2E_SITL_REPLAY_DIR` to point at a SITL replay "
|
||||
"fixture exposing `mid-flight-tile-output` FDR records and the "
|
||||
"post-landing audit population on mock-suite-sat-service "
|
||||
"(AZ-595 + AZ-422 fixture builder). Pure-logic AC-8.4 coverage "
|
||||
"lives in e2e/_unit_tests/helpers/test_mid_flight_tile_evaluator.py."
|
||||
)
|
||||
|
||||
high_quality_window_s_str = os.environ.get(FT_P_17_HIGH_QUALITY_WINDOW_S_ENV)
|
||||
if not high_quality_window_s_str:
|
||||
pytest.skip(
|
||||
f"FT-P-17 needs `{FT_P_17_HIGH_QUALITY_WINDOW_S_ENV}` env var "
|
||||
"(total wall-clock seconds of high-quality nav frames per "
|
||||
"AC-2.1a). The fixture builder records this from the replay's "
|
||||
"segment-quality FDR records."
|
||||
)
|
||||
try:
|
||||
high_quality_window_s = float(high_quality_window_s_str)
|
||||
except ValueError as exc:
|
||||
pytest.fail(
|
||||
f"FT-P-17: `{FT_P_17_HIGH_QUALITY_WINDOW_S_ENV}` must parse as "
|
||||
f"float; got {high_quality_window_s_str!r}: {exc}"
|
||||
)
|
||||
|
||||
from runner.helpers import fdr_reader
|
||||
|
||||
fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
|
||||
tiles = list(_extract_tiles_from_fdr(fdr_reader, fdr_root))
|
||||
if not tiles:
|
||||
pytest.fail(
|
||||
f"FT-P-17: no `{mfe.MID_FLIGHT_TILE_FDR_KIND}` FDR records under "
|
||||
f"{fdr_root}. The SUT must generate at least one tile per AC-1."
|
||||
)
|
||||
|
||||
audit_entries = mock_suite_sat_audit.fetch_audit(mock_suite_sat_url, run_id=run_id)
|
||||
|
||||
rate_report = mfe.evaluate_tile_generation_rate(tiles, high_quality_window_s)
|
||||
quality_report = mfe.evaluate_tile_quality_metadata(tiles)
|
||||
dedup_report = mfe.evaluate_dedup(tiles)
|
||||
upload_report = mfe.evaluate_upload_acks(tiles, audit_entries)
|
||||
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_17.tile_count", float(rate_report.tile_count), ac_id="AC-1"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_17.observed_rate_per_3s", rate_report.observed_rate_per_3s, ac_id="AC-1"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_17.high_quality_window_s", high_quality_window_s, ac_id="AC-1"
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_17.tile_quality_failures",
|
||||
float(len(quality_report.failing_entries)),
|
||||
ac_id="AC-2",
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_17.dedup_duplicate_pairs",
|
||||
float(dedup_report.duplicate_count),
|
||||
ac_id="AC-3",
|
||||
)
|
||||
nfr_recorder.record_metric(
|
||||
"ft_p_17.audit_missing_count",
|
||||
float(len(upload_report.missing_from_audit)),
|
||||
ac_id="AC-4",
|
||||
)
|
||||
|
||||
assert rate_report.passes, (
|
||||
f"AC-1 (≥1 tile per {mfe.MIN_TILES_PER_HIGH_QUALITY_WINDOW_S} s) failed: "
|
||||
f"{rate_report.tile_count} tiles over {high_quality_window_s} s "
|
||||
f"high-quality window → rate={rate_report.observed_rate_per_3s:.3f}/3s"
|
||||
)
|
||||
assert quality_report.passes, (
|
||||
"AC-2 (every tile has Mode B Fact #105 quality fields) failed: "
|
||||
f"failures={[(e.tile_id, e.missing_top_level_fields, e.missing_quality_fields) for e in quality_report.failing_entries]}"
|
||||
)
|
||||
assert dedup_report.passes, (
|
||||
"AC-3 (no duplicate footprint+GSD bins) failed: "
|
||||
f"duplicate_pairs={dedup_report.duplicate_pairs}"
|
||||
)
|
||||
assert upload_report.passes, (
|
||||
"AC-4 (landing-event upload accepted) failed: "
|
||||
f"generated={len(upload_report.generated_tile_ids)}, "
|
||||
f"audited={len(upload_report.audit_tile_ids)}, "
|
||||
f"missing={upload_report.missing_from_audit}"
|
||||
)
|
||||
|
||||
|
||||
def _extract_tiles_from_fdr(fdr_reader, fdr_root: Path): # type: ignore[no-untyped-def]
|
||||
"""Yield ``TileSpec``s from every ``mid-flight-tile-output`` FDR record.
|
||||
|
||||
Each record's payload mirrors the mock-suite-sat-service TilePublishRequest
|
||||
shape; the scenario only projects it onto a ``TileSpec`` and lets the
|
||||
evaluators do the AC math.
|
||||
"""
|
||||
for rec in fdr_reader.iter_records(fdr_root):
|
||||
if rec.record_type != mfe.MID_FLIGHT_TILE_FDR_KIND:
|
||||
continue
|
||||
p = rec.payload
|
||||
try:
|
||||
bbox = tuple(p["bbox_wgs84"]) # type: ignore[index]
|
||||
except (KeyError, TypeError):
|
||||
continue
|
||||
if len(bbox) != 4:
|
||||
continue
|
||||
quality = p.get("quality") if isinstance(p.get("quality"), dict) else {}
|
||||
capture_utc: str | None = None
|
||||
if isinstance(quality, dict):
|
||||
raw_capture = quality.get("capture_utc")
|
||||
if isinstance(raw_capture, str):
|
||||
capture_utc = raw_capture
|
||||
yield mfe.TileSpec(
|
||||
tile_id=str(p.get("tile_id") or ""),
|
||||
bbox_wgs84=(float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])),
|
||||
zoom_level=int(p.get("zoom_level") or 0),
|
||||
descriptor_sha256=str(p.get("descriptor_sha256") or ""),
|
||||
payload_size_bytes=int(p.get("payload_size_bytes") or 0),
|
||||
quality=dict(quality) if isinstance(quality, dict) else {},
|
||||
generated_at_monotonic_ms=int(rec.monotonic_ms),
|
||||
capture_utc_iso=capture_utc,
|
||||
)
|
||||
Reference in New Issue
Block a user