[AZ-698] Tlog trim + mid-flight alignment for replay

Adds find_aligned_window cross-correlation (NCC, per-window unit norm) between IMU energy and video optical-flow magnitude. Returns AlignedWindow{tlog_start_ns, tlog_end_ns, offset_ms, confidence, used_fallback}, with fallback to head-takeoff on low confidence to preserve AZ-405 behavior. TlogReplayFcAdapter honors tlog_start_ns and skips pre-window messages. New --auto-trim CLI flag, mutex with --time-offset-ms. AC-1..AC-4 covered by unit tests; AC-5 skipped (no real flight_derkachi.mp4 in repo). 106 tests pass in regression slice. Zero new mypy --strict errors. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-22 23:41:13 +00:00 · 2026-05-20 16:29:59 +03:00
parent 64d961f60c
commit 87fe98858f
13 changed files with 1360 additions and 7 deletions
@@ -37,16 +37,22 @@ from typing import TYPE_CHECKING, Any

 from gps_denied_onboard._types.fc import FcKind
 from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
-from gps_denied_onboard.replay_input.interface import AutoSyncConfig, AutoSyncDecision
+from gps_denied_onboard.replay_input.interface import (
+    AlignedWindow,
+    AutoSyncConfig,
+    AutoSyncDecision,
+)

 if TYPE_CHECKING:
    import numpy as np
+    import numpy.typing as npt

 __all__ = [
    "TlogSamples",
    "compute_offset",
    "detect_tlog_takeoff",
    "detect_video_motion_onset",
+    "find_aligned_window",
    "validate_offset_or_fail",
 ]

@@ -644,3 +650,363 @@ def _compute_flow_magnitudes(
 def _build_flag_on(name: str) -> bool:
    raw = os.environ.get(name, "")
    return raw.strip().lower() in {"on", "1", "true", "yes"}
+
+
+# ---------------------------------------------------------------------
+# AZ-698 — mid-flight cross-correlation aligner
+#
+# The AZ-405 head-takeoff detector only works when the video covers
+# the take-off moment. For mid-flight slices (e.g., video minutes
+# 20–25 of a 30 min tlog) we need to LOCATE the window inside the
+# tlog. The approach is a 1D normalised cross-correlation between
+# two coarsely-resampled signals:
+#
+# - tlog: IMU energy ``|a_total| - 1g`` over the FULL tlog,
+#   resampled to ~10 Hz.
+# - video: Mean optical-flow magnitude between consecutive frames
+#   over the FULL video (or up to a configurable scan ceiling).
+#
+# Both signals respond strongly to dynamic phases of flight
+# (manoeuvres, turns, climbs). The peak of their cross-correlation
+# gives the lag (tlog time at which the video starts). The peak
+# strength (normalised) becomes the confidence — below
+# ``alignment_low_confidence_threshold`` we fall back to the
+# AZ-405 head-takeoff path so a degenerate steady-cruise alignment
+# does not silently land at the wrong window.
+
+
+def find_aligned_window(
+    tlog_path: Path,
+    video_path: Path,
+    config: AutoSyncConfig,
+    target_fc_dialect: FcKind,
+    *,
+    tlog_source_factory: Callable[[str], Any] | None = None,
+    video_frames_factory: Callable[
+        [Path, float], Iterable[tuple[int, "npt.NDArray[np.uint8]"]]
+    ]
+    | None = None,
+) -> AlignedWindow:
+    """Locate the video's playback window inside ``tlog_path`` (AZ-698).
+
+    Args:
+        tlog_path: Binary ArduPilot tlog. The whole file is read up
+            to :attr:`AutoSyncConfig.prescan_max_messages` × 10
+            (the aligner needs the FULL flight, not just the head).
+        video_path: Mp4 / mkv input. The leading
+            :attr:`AutoSyncConfig.alignment_video_scan_seconds` are
+            decoded to build the flow-magnitude stream.
+        config: Operator-tunable thresholds.
+        target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV`` — same
+            parity contract as :func:`detect_tlog_takeoff`.
+        tlog_source_factory: Test injection — replaces the
+            ``pymavlink`` open call.
+        video_frames_factory: Test injection — replaces
+            ``cv2.VideoCapture`` frame iteration.
+
+    Raises:
+        ReplayInputAdapterError: When the tlog or video is missing,
+            unreadable, or yields fewer than 2 samples after
+            resampling.
+
+    Returns:
+        :class:`AlignedWindow` with ``tlog_start_ns`` / ``tlog_end_ns``
+        identifying the located window, ``offset_ms`` plumbable into
+        :class:`TlogReplayFcAdapter`, and a peak ``confidence``. When
+        confidence falls below
+        :attr:`AutoSyncConfig.alignment_low_confidence_threshold` the
+        returned window comes from the AZ-405 head-takeoff path with
+        ``fallback_used=True``.
+    """
+    if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
+        raise ReplayInputAdapterError(
+            f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
+        )
+    if not tlog_path.is_file():
+        raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
+    if not video_path.is_file():
+        raise ReplayInputAdapterError(f"video file not found: {video_path}")
+
+    tlog_energy = _load_tlog_imu_energy_stream(
+        tlog_path,
+        max_messages=config.prescan_max_messages * 10,
+        source_factory=tlog_source_factory,
+    )
+    if len(tlog_energy) < 2:
+        raise ReplayInputAdapterError(
+            f"tlog yielded {len(tlog_energy)} IMU sample(s); "
+            "need ≥ 2 for cross-correlation alignment"
+        )
+
+    if video_frames_factory is None:
+        frames = list(
+            _read_video_frames(video_path, config.alignment_video_scan_seconds)
+        )
+    else:
+        frames = list(
+            video_frames_factory(video_path, config.alignment_video_scan_seconds)
+        )
+    if len(frames) < 2:
+        raise ReplayInputAdapterError(
+            f"video yielded {len(frames)} frame(s); "
+            "need ≥ 2 for cross-correlation alignment"
+        )
+    flow_samples = _compute_flow_magnitudes(frames)
+    if len(flow_samples) < 2:
+        raise ReplayInputAdapterError(
+            f"video produced {len(flow_samples)} flow sample(s); "
+            "need ≥ 2 for cross-correlation alignment"
+        )
+
+    return _align_via_cross_correlation(
+        tlog_energy=tlog_energy,
+        flow_samples=flow_samples,
+        config=config,
+        target_fc_dialect=target_fc_dialect,
+        tlog_path=tlog_path,
+        tlog_source_factory=tlog_source_factory,
+    )
+
+
+def _align_via_cross_correlation(
+    *,
+    tlog_energy: tuple[tuple[int, float], ...],
+    flow_samples: tuple[tuple[int, float], ...],
+    config: AutoSyncConfig,
+    target_fc_dialect: FcKind,
+    tlog_path: Path,
+    tlog_source_factory: Callable[[str], Any] | None,
+) -> AlignedWindow:
+    """Pure compute kernel: turn pre-loaded streams into an :class:`AlignedWindow`.
+
+    Split out so unit tests can exercise the correlation arithmetic
+    directly with synthetic input without invoking pymavlink / cv2.
+    """
+    import numpy as _np
+
+    resample_hz = max(config.alignment_resample_hz, 1.0)
+    period_ns = int(1_000_000_000 / resample_hz)
+
+    tlog_origin_ns = tlog_energy[0][0]
+    tlog_resampled = _resample_uniform(tlog_energy, period_ns, tlog_origin_ns)
+    if len(tlog_resampled) < 2:
+        raise ReplayInputAdapterError(
+            "tlog resampled stream has < 2 samples; cannot cross-correlate"
+        )
+
+    video_origin_ns = flow_samples[0][0]
+    flow_resampled = _resample_uniform(flow_samples, period_ns, video_origin_ns)
+    if len(flow_resampled) < 2:
+        raise ReplayInputAdapterError(
+            "video flow stream has < 2 samples; cannot cross-correlate"
+        )
+    if len(flow_resampled) > len(tlog_resampled):
+        raise ReplayInputAdapterError(
+            "video flow stream is longer than the tlog energy stream; "
+            "auto-trim requires the video to be a slice of a longer tlog"
+        )
+
+    tlog_arr = _np.asarray(tlog_resampled, dtype=_np.float64)
+    flow_arr = _np.asarray(flow_resampled, dtype=_np.float64)
+    flow_centred = _zero_mean_normalise(flow_arr)
+    if _np.linalg.norm(flow_centred) == 0.0:
+        # Flat video → no information for correlation. Force the
+        # fallback path; confidence reported as 0.
+        peak_idx = 0
+        confidence = 0.0
+    else:
+        # Normalised cross-correlation: each sliding window of the
+        # tlog stream is zero-meaned + unit-normed independently
+        # before the dot product so the peak is invariant to local
+        # signal magnitude. Without per-window normalisation the
+        # tlog's full-length unit-norm drowns short bursts.
+        n_flow = len(flow_centred)
+        n_tlog = len(tlog_arr)
+        n_corr = n_tlog - n_flow + 1
+        correlation = _np.zeros(n_corr, dtype=_np.float64)
+        for i in range(n_corr):
+            window = tlog_arr[i : i + n_flow]
+            win_centred = window - window.mean()
+            win_norm = float(_np.linalg.norm(win_centred))
+            if win_norm > 0.0:
+                correlation[i] = float(_np.dot(win_centred / win_norm, flow_centred))
+        peak_idx = int(_np.argmax(correlation))
+        confidence = max(0.0, min(1.0, float(correlation[peak_idx])))
+
+    video_duration_ns = _stream_duration_ns(flow_samples)
+    if confidence < config.alignment_low_confidence_threshold:
+        return _fallback_to_head_takeoff(
+            tlog_path=tlog_path,
+            tlog_source_factory=tlog_source_factory,
+            target_fc_dialect=target_fc_dialect,
+            config=config,
+            tlog_energy=tlog_energy,
+            video_origin_ns=video_origin_ns,
+            video_flow_duration_ns=video_duration_ns,
+            confidence=confidence,
+        )
+
+    # Absolute tlog timeline value where video t=0 aligns. The
+    # adapter's seek check compares this against the raw pymavlink
+    # ``msg._timestamp`` so the value MUST be on the tlog timeline,
+    # NOT a delta.
+    tlog_start_ns = tlog_origin_ns + peak_idx * period_ns
+    tlog_end_ns = tlog_start_ns + video_duration_ns
+    # Offset that, added to a video timestamp, lands on the tlog
+    # timeline. Matches ``AutoSyncDecision.offset_ms`` semantics
+    # (``validate_offset_or_fail`` does ``vts + offset_ns``).
+    offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
+    return AlignedWindow(
+        tlog_start_ns=tlog_start_ns,
+        tlog_end_ns=tlog_end_ns,
+        offset_ms=offset_ms,
+        confidence=confidence,
+        fallback_used=False,
+    )
+
+
+def _stream_duration_ns(
+    samples: tuple[tuple[int, float], ...],
+) -> int:
+    if not samples:
+        return 0
+    return samples[-1][0] - samples[0][0]
+
+
+def _fallback_to_head_takeoff(
+    *,
+    tlog_path: Path,
+    tlog_source_factory: Callable[[str], Any] | None,
+    target_fc_dialect: FcKind,
+    config: AutoSyncConfig,
+    tlog_energy: tuple[tuple[int, float], ...],
+    video_origin_ns: int,
+    video_flow_duration_ns: int,
+    confidence: float,
+) -> AlignedWindow:
+    """Low-confidence path: use AZ-405 head-takeoff detector.
+
+    Returns an :class:`AlignedWindow` whose ``offset_ms`` and
+    ``tlog_start_ns`` come from the takeoff onset; ``fallback_used``
+    is ``True`` so callers + FDR audit can record the divergence.
+    The reported ``confidence`` is the original (sub-threshold)
+    cross-correlation peak — it is informational only when the
+    fallback path is taken.
+    """
+    takeoff = detect_tlog_takeoff(
+        tlog_path,
+        target_fc_dialect,
+        config,
+        source_factory=tlog_source_factory,
+    )
+    if takeoff.confidence > 0.0:
+        tlog_start_ns = takeoff.onset_ns
+    elif tlog_energy:
+        tlog_start_ns = tlog_energy[0][0]
+    else:
+        tlog_start_ns = 0
+    tlog_end_ns = tlog_start_ns + video_flow_duration_ns
+    offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
+    return AlignedWindow(
+        tlog_start_ns=tlog_start_ns,
+        tlog_end_ns=tlog_end_ns,
+        offset_ms=offset_ms,
+        confidence=confidence,
+        fallback_used=True,
+    )
+
+
+def _resample_uniform(
+    samples: tuple[tuple[int, float], ...],
+    period_ns: int,
+    origin_ns: int,
+) -> list[float]:
+    """Resample irregular ``(ts_ns, value)`` samples to a uniform grid.
+
+    Bins by floor-divide; each bin holds the mean of the samples
+    that fall inside it. Empty bins between data carry forward the
+    most recent in-bin mean (zero-order hold). Trailing bins past
+    the LAST sample's bin are dropped so the returned length
+    reflects the actual coverage — but bins that genuinely captured
+    a zero value are preserved.
+    """
+    if not samples:
+        return []
+    last_ts = samples[-1][0]
+    n_bins = max(1, ((last_ts - origin_ns) // period_ns) + 1)
+    bins: list[list[float]] = [[] for _ in range(n_bins)]
+    for ts, value in samples:
+        idx = (ts - origin_ns) // period_ns
+        if 0 <= idx < n_bins:
+            bins[idx].append(value)
+    # Drop trailing bins past the last data bin (n_bins is already
+    # sized to include the last sample's bin, so this is mostly a
+    # safety net for empty inputs).
+    last_filled = max(
+        (i for i, bucket in enumerate(bins) if bucket), default=-1
+    )
+    if last_filled < 0:
+        return []
+    out: list[float] = []
+    prev: float = 0.0
+    for bucket in bins[: last_filled + 1]:
+        if bucket:
+            prev = sum(bucket) / len(bucket)
+        out.append(prev)
+    return out
+
+
+def _zero_mean_normalise(
+    arr: "npt.NDArray[np.float64]",
+) -> "npt.NDArray[np.float64]":
+    import numpy as _np
+
+    centred: "npt.NDArray[np.float64]" = arr - arr.mean()
+    norm = float(_np.linalg.norm(centred))
+    if norm == 0.0:
+        return centred
+    result: "npt.NDArray[np.float64]" = centred / norm
+    return result
+
+
+def _load_tlog_imu_energy_stream(
+    tlog_path: Path,
+    *,
+    max_messages: int,
+    source_factory: Callable[[str], Any] | None,
+) -> tuple[tuple[int, float], ...]:
+    """Walk the WHOLE tlog (up to ``max_messages``) for IMU energy samples.
+
+    Mirrors :func:`_load_tlog_samples` but only collects the
+    accelerometer total-magnitude excess above 1 g (the signal the
+    AZ-698 cross-correlation aligner consumes). The ATTITUDE channel
+    is not needed here.
+    """
+    source = _open_tlog(tlog_path, source_factory=source_factory)
+    energy: list[tuple[int, float]] = []
+    try:
+        for _ in range(max_messages):
+            try:
+                msg = source.recv_match(
+                    type=["RAW_IMU", "SCALED_IMU2"],
+                    blocking=False,
+                )
+            except Exception as exc:  # pragma: no cover — defensive.
+                raise ReplayInputAdapterError(
+                    f"tlog scan failed on {tlog_path}: {exc!r}"
+                ) from exc
+            if msg is None:
+                break
+            ts_ns = _msg_timestamp_ns(msg)
+            xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
+            ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
+            za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
+            total_g = math.sqrt(xa * xa + ya * ya + za * za)
+            energy.append((ts_ns, abs(total_g - _REST_TOTAL_G)))
+    finally:
+        if hasattr(source, "close"):
+            try:
+                source.close()
+            except Exception:  # pragma: no cover — defensive.
+                pass
+    return tuple(energy)