[AZ-414] [AZ-415] [AZ-418] Test batch 71: sharp turn + multi-segment + smoothing

- AZ-414 (FT-P-07 + FT-N-02): sharp_turn_detector helper covering
  AC-1 (gyro_z run detection + synthetic-overlay fallback),
  AC-2/AC-3 (FT-N-02 during-turn label + monotonic covariance),
  AC-4/AC-5/AC-6 (FT-P-07 recovery lag/drift/heading); twin scenario
  files under positive/ and negative/.
- AZ-415 (FT-P-08): multi_segment_evaluator helper + scenario.
- AZ-418 (FT-P-10): smoothing_evaluator helper covering AC-1 (raw +
  smoothed pose pairing), AC-2 (improvement rate >= 0.80), AC-3
  (mean improvement >= 5 m); scenario file.
- All scenarios skip-gated on upstream frame_source_replay /
  imu_replay / fdr_reader stubs (auto-activate when AZ-441 + AZ-407
  leftovers land).
- +68 unit tests; full e2e unit suite: 393 passed.

See _docs/03_implementation/batch_71_report.md and
_docs/03_implementation/reviews/batch_71_review.md.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 07:12:24 +03:00
parent 29ac16cfcb
commit c6e6cba237
17 changed files with 3195 additions and 1 deletions
@@ -0,0 +1,176 @@
"""FT-N-02 — Sharp-turn legitimate failure (AZ-414 / AC-3.2).
The negative twin of FT-P-07. Same detection / fixture / replay path;
the assertion side checks behaviour DURING the turn (not recovery
after it):
* AC-2: source_label ∈ ``{visual_propagated, dead_reckoned}`` for every
inside-window frame (no ``satellite_anchored`` during the turn).
* AC-3: ``cov_semi_major_m`` is non-decreasing across consecutive
frames within the segment.
The recovery half (AC-4/5/6) is owned by FT-P-07
(``e2e/tests/positive/test_ft_p_07_sharp_turn_recovery.py``); this file
delegates the helper call but does not assert on the returned report.
Gated on the same upstream replay helpers as FT-P-07.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from runner.helpers import sharp_turn_detector as std
DERKACHI_DIR = (
Path(__file__).resolve().parents[3]
/ "_docs"
/ "00_problem"
/ "input_data"
/ "flight_derkachi"
)
DERKACHI_IMU_CSV = DERKACHI_DIR / "data_imu.csv"
DERKACHI_MP4 = DERKACHI_DIR / "flight_derkachi.mp4"
@pytest.fixture(scope="module")
def _harness_helpers_implemented() -> bool:
from runner.helpers import fdr_reader, imu_replay
from runner.helpers.frame_source_replay import FrameSourceReplayer
try:
replayer = FrameSourceReplayer(sink=_NullSink()) # type: ignore[arg-type]
try:
replayer.replay_video(Path("/tmp/non-existent.mp4"))
except NotImplementedError:
return False
try:
list(fdr_reader.iter_records(Path("/tmp/non-existent")))
except NotImplementedError:
return False
try:
imu_replay.ImuReplayer(emitter=_NullImuEmitter()).replay(Path("/tmp/non-existent.csv")) # type: ignore[arg-type]
except NotImplementedError:
return False
return True
except Exception:
return False
class _NullSink:
def write_frame(self, jpeg_bytes: bytes, timestamp_ms: int) -> None:
return None
class _NullImuEmitter:
def emit(self, sample: object) -> None:
return None
@pytest.mark.traces_to("AC-3.2,AC-1,AC-2,AC-3,AC-7")
def test_ft_n_02_sharp_turn_failure(
fc_adapter: str,
vio_strategy: str,
evidence_dir, # type: ignore[no-untyped-def]
run_id: str,
nfr_recorder, # type: ignore[no-untyped-def]
_harness_helpers_implemented: bool,
) -> None:
if not _harness_helpers_implemented:
pytest.skip(
"FT-N-02 full replay requires runner.helpers.{frame_source_replay,"
"imu_replay,fdr_reader} — currently AZ-441 / AZ-407 leftovers. "
"AC-2/AC-3 helper logic covered by "
"e2e/_unit_tests/helpers/test_sharp_turn_detector.py."
)
from runner.helpers import fdr_reader
from runner.helpers.frame_source_replay import FrameSourceReplayer
# 1. AC-1 — identify or synthesise.
detection = std.detect_or_synthesize(DERKACHI_IMU_CSV)
assert detection.segments, "AC-1: at least one turn segment required"
# 2. Drive replay.
FrameSourceReplayer(_resolve_frame_sink()).replay_video(DERKACHI_MP4)
_drive_imu_replay(DERKACHI_IMU_CSV)
# 3. Collect samples.
fdr_root = Path(evidence_dir).parent / f"run-{run_id}" / "fdr"
samples: list[std.TurnFrameSample] = []
for rec in fdr_reader.iter_records(fdr_root):
if rec.record_type != "outbound_estimate":
continue
payload = rec.payload
samples.append(
std.TurnFrameSample(
monotonic_ms=int(rec.monotonic_ms),
lat_deg=float(payload["lat_deg"]), # type: ignore[arg-type]
lon_deg=float(payload["lon_deg"]), # type: ignore[arg-type]
source_label=str(payload["source_label"]), # type: ignore[arg-type]
cov_semi_major_m=float(payload["cov_semi_major_m"]), # type: ignore[arg-type]
)
)
if not samples:
pytest.fail("FT-N-02: no outbound_estimate records produced")
# 4. Evaluate per segment.
n02_reports = [
std.evaluate_ft_n_02(seg, idx, samples)
for idx, seg in enumerate(detection.segments)
]
p07_reports = [
std.evaluate_ft_p_07(seg, idx, samples)
for idx, seg in enumerate(detection.segments)
]
out_csv = evidence_dir / f"ft-n-02-{fc_adapter}-{vio_strategy}.csv"
std.write_csv_evidence(out_csv, detection, n02_reports, p07_reports)
# 5. NFR metrics + AC assertions (NEGATIVE twin assertions).
for r in n02_reports:
nfr_recorder.record_metric(
f"ft_n_02.seg_{r.segment_index}.samples_inside",
float(r.samples_inside),
ac_id="AC-2",
)
nfr_recorder.record_metric(
f"ft_n_02.seg_{r.segment_index}.label_violation_count",
float(len(r.label_violations)),
ac_id="AC-2",
)
nfr_recorder.record_metric(
f"ft_n_02.seg_{r.segment_index}.cov_non_decreasing",
1.0 if r.cov_non_decreasing else 0.0,
ac_id="AC-3",
)
nfr_recorder.record_metric(
"ft_n_02.synthetic_overlay",
1.0 if detection.synthetic_overlay else 0.0,
ac_id="AC-1",
)
for r in n02_reports:
assert r.passes_label, (
f"AC-2 (label ∈ {sorted(std.ALLOWED_DURING_TURN_LABELS)}) failed for segment "
f"{r.segment_index}: violations={r.label_violations}, inside={r.samples_inside}"
)
assert r.passes_cov, (
f"AC-3 (non-decreasing cov_semi_major_m) failed for segment "
f"{r.segment_index}: first_decreasing_at_ms={r.first_decreasing_at_ms}"
)
def _resolve_frame_sink(): # type: ignore[no-untyped-def]
raise NotImplementedError(
"frame sink resolution is owned by AZ-441 / runner.helpers.frame_source_replay"
)
def _drive_imu_replay(csv_path: Path) -> None:
raise NotImplementedError(
"IMU replay driver is owned by AZ-416/AZ-417 / runner.helpers.imu_replay"
)