"""FT-P-06 — 95th-percentile MRE budgets (AC-2.2). Piggyback test: depends on the FT-P-04 + FT-P-05 evidence CSVs produced in the same run. Reads both, aggregates per domain, asserts: * Frame-to-frame p95 MRE < 1.0 px * Cross-domain p95 MRE < 2.5 px What this file owns: * The AC-4 assertion + the combined report. What this file does NOT own: * The FT-P-04 evidence collection — owned by ``test_ft_p_04_*``. * The FT-P-05 evidence collection — owned by ``test_ft_p_05_*``. * Both run as the same pytest session; this test depends on the artefacts they wrote to ``evidence_dir``. Skip discipline: if either evidence CSV is missing, the test SKIPS with a clear reason (it cannot fail without the upstream evidence; that would mask the actual gate, which is whether FT-P-04 / FT-P-05 ran). The autodev / Tier-1 runner will only mark this test FAIL if it runs AND the evidence is present AND the p95 budgets are exceeded. """ from __future__ import annotations from pathlib import Path import pytest from runner.helpers import mre_evaluator as me @pytest.mark.traces_to("AC-2.2,AC-4,AC-5") def test_ft_p_06_mre_budgets( fc_adapter: str, vio_strategy: str, evidence_dir, # type: ignore[no-untyped-def] nfr_recorder, # type: ignore[no-untyped-def] ) -> None: """AC-4: 95th-percentile MRE < 1.0 px f2f AND < 2.5 px cross-domain. AC-5: parametrized across ``(fc_adapter, vio_strategy)``. This test is a pure piggyback — it reads the FT-P-04 + FT-P-05 CSVs from the same run. If either is missing the test skips (without those, FT-P-06 has nothing to assert on). """ f2f_csv = evidence_dir / f"ft-p-04-{fc_adapter}-{vio_strategy}.csv" xd_csv = evidence_dir / f"ft-p-05-{fc_adapter}-{vio_strategy}.csv" if not f2f_csv.exists() or not xd_csv.exists(): missing = [str(p.name) for p in (f2f_csv, xd_csv) if not p.exists()] pytest.skip( f"FT-P-06 piggybacks on FT-P-04 + FT-P-05 evidence; missing in this run: {missing}. " "Pure-logic ACs covered by e2e/_unit_tests/helpers/test_mre_evaluator.py." ) # Both CSVs present — load and evaluate. try: f2f_records = me.load_frame_to_frame_csv(f2f_csv) except ValueError as exc: # mre_px column absent → FT-P-04 evidence does not yet carry MRE. # Per the FT-P-06 spec: "if absent, the test fails" — but at this # point the failure is on the SUT (it must expose per-frame MRE). pytest.fail(f"FT-P-04 evidence is missing per-frame MRE: {exc}") xd_records = me.load_cross_domain_csv(xd_csv) combined = me.evaluate_combined_p95(f2f_records, xd_records) nfr_recorder.record_metric( "ft_p_06.f2f_p95_mre_px", combined.frame_to_frame.p95_px, ac_id="AC-4", ) nfr_recorder.record_metric( "ft_p_06.cross_domain_p95_mre_px", combined.cross_domain.p95_px, ac_id="AC-4", ) assert combined.frame_to_frame.passes, ( f"AC-4 (frame-to-frame p95 MRE < {me.MRE_P95_FRAME_TO_FRAME_BUDGET_PX} px) " f"failed: p95={combined.frame_to_frame.p95_px:.4f} over " f"{combined.frame_to_frame.sample_count} samples" ) assert combined.cross_domain.passes, ( f"AC-4 (cross-domain p95 MRE < {me.MRE_P95_CROSS_DOMAIN_BUDGET_PX} px) " f"failed: p95={combined.cross_domain.p95_px:.4f} over " f"{combined.cross_domain.sample_count} samples" )