gps-denied-onboard/e2e/tests/positive/test_ft_p_06_mre_budgets.py

"""FT-P-06 — 95th-percentile MRE budgets (AC-2.2).

Piggyback test: depends on the FT-P-04 + FT-P-05 evidence CSVs produced
in the same run. Reads both, aggregates per domain, asserts:

* Frame-to-frame p95 MRE < 1.0 px
* Cross-domain p95 MRE < 2.5 px

What this file owns:

* The AC-4 assertion + the combined report.

What this file does NOT own:

* The FT-P-04 evidence collection — owned by ``test_ft_p_04_*``.
* The FT-P-05 evidence collection — owned by ``test_ft_p_05_*``.
* Both run as the same pytest session; this test depends on the
  artefacts they wrote to ``evidence_dir``.

Skip discipline: if either evidence CSV is missing, the test SKIPS with
a clear reason (it cannot fail without the upstream evidence; that
would mask the actual gate, which is whether FT-P-04 / FT-P-05 ran).
The autodev / Tier-1 runner will only mark this test FAIL if it runs
AND the evidence is present AND the p95 budgets are exceeded.
"""

from __future__ import annotations

from pathlib import Path

import pytest

from runner.helpers import mre_evaluator as me


@pytest.mark.traces_to("AC-2.2,AC-4,AC-5")
def test_ft_p_06_mre_budgets(
    fc_adapter: str,
    vio_strategy: str,
    evidence_dir,  # type: ignore[no-untyped-def]
    nfr_recorder,  # type: ignore[no-untyped-def]
) -> None:
    """AC-4: 95th-percentile MRE < 1.0 px f2f AND < 2.5 px cross-domain.

    AC-5: parametrized across ``(fc_adapter, vio_strategy)``.

    This test is a pure piggyback — it reads the FT-P-04 + FT-P-05 CSVs
    from the same run. If either is missing the test skips (without
    those, FT-P-06 has nothing to assert on).
    """
    f2f_csv = evidence_dir / f"ft-p-04-{fc_adapter}-{vio_strategy}.csv"
    xd_csv = evidence_dir / f"ft-p-05-{fc_adapter}-{vio_strategy}.csv"

    if not f2f_csv.exists() or not xd_csv.exists():
        missing = [str(p.name) for p in (f2f_csv, xd_csv) if not p.exists()]
        pytest.skip(
            f"FT-P-06 piggybacks on FT-P-04 + FT-P-05 evidence; missing in this run: {missing}. "
            "Pure-logic ACs covered by e2e/_unit_tests/helpers/test_mre_evaluator.py."
        )

    # Both CSVs present — load and evaluate.
    try:
        f2f_records = me.load_frame_to_frame_csv(f2f_csv)
    except ValueError as exc:
        # mre_px column absent → FT-P-04 evidence does not yet carry MRE.
        # Per the FT-P-06 spec: "if absent, the test fails" — but at this
        # point the failure is on the SUT (it must expose per-frame MRE).
        pytest.fail(f"FT-P-04 evidence is missing per-frame MRE: {exc}")
    xd_records = me.load_cross_domain_csv(xd_csv)

    combined = me.evaluate_combined_p95(f2f_records, xd_records)

    nfr_recorder.record_metric(
        "ft_p_06.f2f_p95_mre_px",
        combined.frame_to_frame.p95_px,
        ac_id="AC-4",
    )
    nfr_recorder.record_metric(
        "ft_p_06.cross_domain_p95_mre_px",
        combined.cross_domain.p95_px,
        ac_id="AC-4",
    )

    assert combined.frame_to_frame.passes, (
        f"AC-4 (frame-to-frame p95 MRE < {me.MRE_P95_FRAME_TO_FRAME_BUDGET_PX} px) "
        f"failed: p95={combined.frame_to_frame.p95_px:.4f} over "
        f"{combined.frame_to_frame.sample_count} samples"
    )
    assert combined.cross_domain.passes, (
        f"AC-4 (cross-domain p95 MRE < {me.MRE_P95_CROSS_DOMAIN_BUDGET_PX} px) "
        f"failed: p95={combined.cross_domain.p95_px:.4f} over "
        f"{combined.cross_domain.sample_count} samples"
    )