[AZ-963] xfail divergent ESKF tests + honest returncode assertion on AC-3

This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-06-09 20:43:15 +03:00
parent 89606ccfdc
commit 201ec7cdd4
5 changed files with 132 additions and 23 deletions
+52 -8
View File
@@ -58,6 +58,15 @@ _HEAVY_SKIP = pytest.mark.skipif(
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache."
),
strict=False,
)
def test_ac1_exits_0_jsonl_count_match(replay_runner, derkachi_replay_inputs) -> None:
"""Real loop emits one EstimatorOutput per video frame, not per GPS fix.
@@ -146,20 +155,28 @@ def test_ac2_jsonl_schema_match(replay_runner) -> None:
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AC-3 requires the C1+C2+C3+C4+C5 satellite-re-anchoring "
"pipeline. Blocked by AZ-777: with AZ-776 landed, the "
"open-loop C1+C5(ESKF) composition now runs end-to-end but "
"with NO satellite anchoring (no C2/C3/C4) because the "
"Derkachi fixture has no reference C6 tile cache. ESKF "
"integrates open-loop, so position drifts unbounded over "
"the 8-min flight and the ≤100 m threshold cannot be met "
"by physics until the reference tile cache (AZ-777) lands."
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache. "
"The XPASS observed pre-AZ-963 was a false positive: the test "
"did not check returncode, so partial pre-divergence JSONL rows "
"happened to match GT by chance. The returncode assertion added "
"below now makes the failure honest."
),
strict=False,
)
def test_ac3_within_100m_80pct_of_ticks(replay_runner, derkachi_replay_inputs) -> None:
# Act
result = replay_runner(pace="asap")
# Assert — pipeline must complete cleanly (AZ-963: prevents XPASS
# on partial pre-divergence output).
assert result.returncode == 0, (
f"gps-denied-replay exited {result.returncode}\n"
f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}"
)
rows = parse_jsonl(result.output_path)
# Assert
@@ -378,6 +395,15 @@ def test_ac4_encoder_byte_equality_via_transport_seam() -> None:
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache."
),
strict=False,
)
def test_ac5_determinism_two_runs_diff(replay_runner) -> None:
# Act
r1 = replay_runner(pace="asap")
@@ -407,6 +433,15 @@ def test_ac5_determinism_two_runs_diff(replay_runner) -> None:
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache."
),
strict=False,
)
def test_ac6_pace_realtime_60s_within_5pct(replay_runner) -> None:
# Act — cap to 60 s so a full 490-second flight doesn't pin the test
# to an 8-minute realtime run; the pacing correctness is validated
@@ -425,6 +460,15 @@ def test_ac6_pace_realtime_60s_within_5pct(replay_runner) -> None:
@pytest.mark.tier2
@_HEAVY_SKIP
@pytest.mark.xfail(
reason=(
"AZ-963: open-loop ESKF diverges on the Derkachi fixture "
"(~10 s, frame ~233, Mahalanobis² > 100). The fixture has no "
"reference C6 tile cache → no satellite anchoring (C2/C3/C4). "
"Expected physics until AZ-777 lands a reference tile cache."
),
strict=False,
)
def test_ac6_pace_asap_under_30s(replay_runner) -> None:
# Act
result = replay_runner(pace="asap")