[AZ-841] Remove xfail markers from Derkachi tests — environment segregation via tier2+RUN_REPLAY_E2E

This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-06-10 05:35:01 +03:00
parent 201ec7cdd4
commit c1baef57be
4 changed files with 33 additions and 70 deletions
+8 -54
View File
@@ -7,12 +7,15 @@ E2E pattern the heavy tests are gated by ``RUN_REPLAY_E2E=1``; the
lightweight AC-4a (mode-agnosticism AST scan) and AC-7 (skip-gate
self-check) run unconditionally.
Some ACs are SKIPPED with documented reasons until upstream work
ships:
Environment segregation:
* **Tier-2 (Jetson)** tests are gated by ``RUN_REPLAY_E2E=1`` +
``@pytest.mark.tier2`` — they SKIP on Mac and only run on Jetson
where the satellite-provider + C6 tile cache are available.
* **Unconditional** tests (AC-4a, AC-4b, AC-7) run everywhere.
Still skipped with documented reasons:
* AC-3 (≤ 100 m for 80 % of ticks) — ``xfail`` until a real Topotek
KHP20S30 calibration ships (camera_info.md notes the intrinsics
are unknown).
* AC-4b (encoder byte-equality) — ``skip`` until AZ-558 routes the
C8 outbound bytes through the ``MavlinkTransport`` seam.
* AC-8 / AC-9 in spec (operator workflow rehearsal) — ``skip`` until
@@ -58,15 +61,6 @@ _HEAVY_SKIP = pytest.mark.skipif(
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache."
),
strict=False,
)
def test_ac1_exits_0_jsonl_count_match(replay_runner, derkachi_replay_inputs) -> None:
"""Real loop emits one EstimatorOutput per video frame, not per GPS fix.
@@ -153,19 +147,6 @@ def test_ac2_jsonl_schema_match(replay_runner) -> None:
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache. "
"The XPASS observed pre-AZ-963 was a false positive: the test "
"did not check returncode, so partial pre-divergence JSONL rows "
"happened to match GT by chance. The returncode assertion added "
"below now makes the failure honest."
),
strict=False,
)
def test_ac3_within_100m_80pct_of_ticks(replay_runner, derkachi_replay_inputs) -> None:
# Act
result = replay_runner(pace="asap")
@@ -395,15 +376,6 @@ def test_ac4_encoder_byte_equality_via_transport_seam() -> None:
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache."
),
strict=False,
)
def test_ac5_determinism_two_runs_diff(replay_runner) -> None:
# Act
r1 = replay_runner(pace="asap")
@@ -433,15 +405,6 @@ def test_ac5_determinism_two_runs_diff(replay_runner) -> None:
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache."
),
strict=False,
)
def test_ac6_pace_realtime_60s_within_5pct(replay_runner) -> None:
# Act — cap to 60 s so a full 490-second flight doesn't pin the test
# to an 8-minute realtime run; the pacing correctness is validated
@@ -460,15 +423,6 @@ def test_ac6_pace_realtime_60s_within_5pct(replay_runner) -> None:
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache."
),
strict=False,
)
def test_ac6_pace_asap_under_30s(replay_runner) -> None:
# Act
result = replay_runner(pace="asap")