[AZ-698] Tlog trim + mid-flight alignment for replay

Adds find_aligned_window cross-correlation (NCC, per-window unit norm)
between IMU energy and video optical-flow magnitude. Returns
AlignedWindow{tlog_start_ns, tlog_end_ns, offset_ms, confidence,
used_fallback}, with fallback to head-takeoff on low confidence to
preserve AZ-405 behavior. TlogReplayFcAdapter honors tlog_start_ns and
skips pre-window messages. New --auto-trim CLI flag, mutex with
--time-offset-ms. AC-1..AC-4 covered by unit tests; AC-5 skipped (no
real flight_derkachi.mp4 in repo). 106 tests pass in regression slice.
Zero new mypy --strict errors.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-20 16:29:59 +03:00
parent 64d961f60c
commit 87fe98858f
13 changed files with 1360 additions and 7 deletions
@@ -37,16 +37,22 @@ from typing import TYPE_CHECKING, Any
from gps_denied_onboard._types.fc import FcKind
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
from gps_denied_onboard.replay_input.interface import AutoSyncConfig, AutoSyncDecision
from gps_denied_onboard.replay_input.interface import (
AlignedWindow,
AutoSyncConfig,
AutoSyncDecision,
)
if TYPE_CHECKING:
import numpy as np
import numpy.typing as npt
__all__ = [
"TlogSamples",
"compute_offset",
"detect_tlog_takeoff",
"detect_video_motion_onset",
"find_aligned_window",
"validate_offset_or_fail",
]
@@ -644,3 +650,363 @@ def _compute_flow_magnitudes(
def _build_flag_on(name: str) -> bool:
raw = os.environ.get(name, "")
return raw.strip().lower() in {"on", "1", "true", "yes"}
# ---------------------------------------------------------------------
# AZ-698 — mid-flight cross-correlation aligner
#
# The AZ-405 head-takeoff detector only works when the video covers
# the take-off moment. For mid-flight slices (e.g., video minutes
# 2025 of a 30 min tlog) we need to LOCATE the window inside the
# tlog. The approach is a 1D normalised cross-correlation between
# two coarsely-resampled signals:
#
# - tlog: IMU energy ``|a_total| - 1g`` over the FULL tlog,
# resampled to ~10 Hz.
# - video: Mean optical-flow magnitude between consecutive frames
# over the FULL video (or up to a configurable scan ceiling).
#
# Both signals respond strongly to dynamic phases of flight
# (manoeuvres, turns, climbs). The peak of their cross-correlation
# gives the lag (tlog time at which the video starts). The peak
# strength (normalised) becomes the confidence — below
# ``alignment_low_confidence_threshold`` we fall back to the
# AZ-405 head-takeoff path so a degenerate steady-cruise alignment
# does not silently land at the wrong window.
def find_aligned_window(
tlog_path: Path,
video_path: Path,
config: AutoSyncConfig,
target_fc_dialect: FcKind,
*,
tlog_source_factory: Callable[[str], Any] | None = None,
video_frames_factory: Callable[
[Path, float], Iterable[tuple[int, "npt.NDArray[np.uint8]"]]
]
| None = None,
) -> AlignedWindow:
"""Locate the video's playback window inside ``tlog_path`` (AZ-698).
Args:
tlog_path: Binary ArduPilot tlog. The whole file is read up
to :attr:`AutoSyncConfig.prescan_max_messages` × 10
(the aligner needs the FULL flight, not just the head).
video_path: Mp4 / mkv input. The leading
:attr:`AutoSyncConfig.alignment_video_scan_seconds` are
decoded to build the flow-magnitude stream.
config: Operator-tunable thresholds.
target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV`` — same
parity contract as :func:`detect_tlog_takeoff`.
tlog_source_factory: Test injection — replaces the
``pymavlink`` open call.
video_frames_factory: Test injection — replaces
``cv2.VideoCapture`` frame iteration.
Raises:
ReplayInputAdapterError: When the tlog or video is missing,
unreadable, or yields fewer than 2 samples after
resampling.
Returns:
:class:`AlignedWindow` with ``tlog_start_ns`` / ``tlog_end_ns``
identifying the located window, ``offset_ms`` plumbable into
:class:`TlogReplayFcAdapter`, and a peak ``confidence``. When
confidence falls below
:attr:`AutoSyncConfig.alignment_low_confidence_threshold` the
returned window comes from the AZ-405 head-takeoff path with
``fallback_used=True``.
"""
if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
raise ReplayInputAdapterError(
f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
)
if not tlog_path.is_file():
raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
if not video_path.is_file():
raise ReplayInputAdapterError(f"video file not found: {video_path}")
tlog_energy = _load_tlog_imu_energy_stream(
tlog_path,
max_messages=config.prescan_max_messages * 10,
source_factory=tlog_source_factory,
)
if len(tlog_energy) < 2:
raise ReplayInputAdapterError(
f"tlog yielded {len(tlog_energy)} IMU sample(s); "
"need ≥ 2 for cross-correlation alignment"
)
if video_frames_factory is None:
frames = list(
_read_video_frames(video_path, config.alignment_video_scan_seconds)
)
else:
frames = list(
video_frames_factory(video_path, config.alignment_video_scan_seconds)
)
if len(frames) < 2:
raise ReplayInputAdapterError(
f"video yielded {len(frames)} frame(s); "
"need ≥ 2 for cross-correlation alignment"
)
flow_samples = _compute_flow_magnitudes(frames)
if len(flow_samples) < 2:
raise ReplayInputAdapterError(
f"video produced {len(flow_samples)} flow sample(s); "
"need ≥ 2 for cross-correlation alignment"
)
return _align_via_cross_correlation(
tlog_energy=tlog_energy,
flow_samples=flow_samples,
config=config,
target_fc_dialect=target_fc_dialect,
tlog_path=tlog_path,
tlog_source_factory=tlog_source_factory,
)
def _align_via_cross_correlation(
*,
tlog_energy: tuple[tuple[int, float], ...],
flow_samples: tuple[tuple[int, float], ...],
config: AutoSyncConfig,
target_fc_dialect: FcKind,
tlog_path: Path,
tlog_source_factory: Callable[[str], Any] | None,
) -> AlignedWindow:
"""Pure compute kernel: turn pre-loaded streams into an :class:`AlignedWindow`.
Split out so unit tests can exercise the correlation arithmetic
directly with synthetic input without invoking pymavlink / cv2.
"""
import numpy as _np
resample_hz = max(config.alignment_resample_hz, 1.0)
period_ns = int(1_000_000_000 / resample_hz)
tlog_origin_ns = tlog_energy[0][0]
tlog_resampled = _resample_uniform(tlog_energy, period_ns, tlog_origin_ns)
if len(tlog_resampled) < 2:
raise ReplayInputAdapterError(
"tlog resampled stream has < 2 samples; cannot cross-correlate"
)
video_origin_ns = flow_samples[0][0]
flow_resampled = _resample_uniform(flow_samples, period_ns, video_origin_ns)
if len(flow_resampled) < 2:
raise ReplayInputAdapterError(
"video flow stream has < 2 samples; cannot cross-correlate"
)
if len(flow_resampled) > len(tlog_resampled):
raise ReplayInputAdapterError(
"video flow stream is longer than the tlog energy stream; "
"auto-trim requires the video to be a slice of a longer tlog"
)
tlog_arr = _np.asarray(tlog_resampled, dtype=_np.float64)
flow_arr = _np.asarray(flow_resampled, dtype=_np.float64)
flow_centred = _zero_mean_normalise(flow_arr)
if _np.linalg.norm(flow_centred) == 0.0:
# Flat video → no information for correlation. Force the
# fallback path; confidence reported as 0.
peak_idx = 0
confidence = 0.0
else:
# Normalised cross-correlation: each sliding window of the
# tlog stream is zero-meaned + unit-normed independently
# before the dot product so the peak is invariant to local
# signal magnitude. Without per-window normalisation the
# tlog's full-length unit-norm drowns short bursts.
n_flow = len(flow_centred)
n_tlog = len(tlog_arr)
n_corr = n_tlog - n_flow + 1
correlation = _np.zeros(n_corr, dtype=_np.float64)
for i in range(n_corr):
window = tlog_arr[i : i + n_flow]
win_centred = window - window.mean()
win_norm = float(_np.linalg.norm(win_centred))
if win_norm > 0.0:
correlation[i] = float(_np.dot(win_centred / win_norm, flow_centred))
peak_idx = int(_np.argmax(correlation))
confidence = max(0.0, min(1.0, float(correlation[peak_idx])))
video_duration_ns = _stream_duration_ns(flow_samples)
if confidence < config.alignment_low_confidence_threshold:
return _fallback_to_head_takeoff(
tlog_path=tlog_path,
tlog_source_factory=tlog_source_factory,
target_fc_dialect=target_fc_dialect,
config=config,
tlog_energy=tlog_energy,
video_origin_ns=video_origin_ns,
video_flow_duration_ns=video_duration_ns,
confidence=confidence,
)
# Absolute tlog timeline value where video t=0 aligns. The
# adapter's seek check compares this against the raw pymavlink
# ``msg._timestamp`` so the value MUST be on the tlog timeline,
# NOT a delta.
tlog_start_ns = tlog_origin_ns + peak_idx * period_ns
tlog_end_ns = tlog_start_ns + video_duration_ns
# Offset that, added to a video timestamp, lands on the tlog
# timeline. Matches ``AutoSyncDecision.offset_ms`` semantics
# (``validate_offset_or_fail`` does ``vts + offset_ns``).
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
return AlignedWindow(
tlog_start_ns=tlog_start_ns,
tlog_end_ns=tlog_end_ns,
offset_ms=offset_ms,
confidence=confidence,
fallback_used=False,
)
def _stream_duration_ns(
samples: tuple[tuple[int, float], ...],
) -> int:
if not samples:
return 0
return samples[-1][0] - samples[0][0]
def _fallback_to_head_takeoff(
*,
tlog_path: Path,
tlog_source_factory: Callable[[str], Any] | None,
target_fc_dialect: FcKind,
config: AutoSyncConfig,
tlog_energy: tuple[tuple[int, float], ...],
video_origin_ns: int,
video_flow_duration_ns: int,
confidence: float,
) -> AlignedWindow:
"""Low-confidence path: use AZ-405 head-takeoff detector.
Returns an :class:`AlignedWindow` whose ``offset_ms`` and
``tlog_start_ns`` come from the takeoff onset; ``fallback_used``
is ``True`` so callers + FDR audit can record the divergence.
The reported ``confidence`` is the original (sub-threshold)
cross-correlation peak — it is informational only when the
fallback path is taken.
"""
takeoff = detect_tlog_takeoff(
tlog_path,
target_fc_dialect,
config,
source_factory=tlog_source_factory,
)
if takeoff.confidence > 0.0:
tlog_start_ns = takeoff.onset_ns
elif tlog_energy:
tlog_start_ns = tlog_energy[0][0]
else:
tlog_start_ns = 0
tlog_end_ns = tlog_start_ns + video_flow_duration_ns
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
return AlignedWindow(
tlog_start_ns=tlog_start_ns,
tlog_end_ns=tlog_end_ns,
offset_ms=offset_ms,
confidence=confidence,
fallback_used=True,
)
def _resample_uniform(
samples: tuple[tuple[int, float], ...],
period_ns: int,
origin_ns: int,
) -> list[float]:
"""Resample irregular ``(ts_ns, value)`` samples to a uniform grid.
Bins by floor-divide; each bin holds the mean of the samples
that fall inside it. Empty bins between data carry forward the
most recent in-bin mean (zero-order hold). Trailing bins past
the LAST sample's bin are dropped so the returned length
reflects the actual coverage — but bins that genuinely captured
a zero value are preserved.
"""
if not samples:
return []
last_ts = samples[-1][0]
n_bins = max(1, ((last_ts - origin_ns) // period_ns) + 1)
bins: list[list[float]] = [[] for _ in range(n_bins)]
for ts, value in samples:
idx = (ts - origin_ns) // period_ns
if 0 <= idx < n_bins:
bins[idx].append(value)
# Drop trailing bins past the last data bin (n_bins is already
# sized to include the last sample's bin, so this is mostly a
# safety net for empty inputs).
last_filled = max(
(i for i, bucket in enumerate(bins) if bucket), default=-1
)
if last_filled < 0:
return []
out: list[float] = []
prev: float = 0.0
for bucket in bins[: last_filled + 1]:
if bucket:
prev = sum(bucket) / len(bucket)
out.append(prev)
return out
def _zero_mean_normalise(
arr: "npt.NDArray[np.float64]",
) -> "npt.NDArray[np.float64]":
import numpy as _np
centred: "npt.NDArray[np.float64]" = arr - arr.mean()
norm = float(_np.linalg.norm(centred))
if norm == 0.0:
return centred
result: "npt.NDArray[np.float64]" = centred / norm
return result
def _load_tlog_imu_energy_stream(
tlog_path: Path,
*,
max_messages: int,
source_factory: Callable[[str], Any] | None,
) -> tuple[tuple[int, float], ...]:
"""Walk the WHOLE tlog (up to ``max_messages``) for IMU energy samples.
Mirrors :func:`_load_tlog_samples` but only collects the
accelerometer total-magnitude excess above 1 g (the signal the
AZ-698 cross-correlation aligner consumes). The ATTITUDE channel
is not needed here.
"""
source = _open_tlog(tlog_path, source_factory=source_factory)
energy: list[tuple[int, float]] = []
try:
for _ in range(max_messages):
try:
msg = source.recv_match(
type=["RAW_IMU", "SCALED_IMU2"],
blocking=False,
)
except Exception as exc: # pragma: no cover — defensive.
raise ReplayInputAdapterError(
f"tlog scan failed on {tlog_path}: {exc!r}"
) from exc
if msg is None:
break
ts_ns = _msg_timestamp_ns(msg)
xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
total_g = math.sqrt(xa * xa + ya * ya + za * za)
energy.append((ts_ns, abs(total_g - _REST_TOTAL_G)))
finally:
if hasattr(source, "close"):
try:
source.close()
except Exception: # pragma: no cover — defensive.
pass
return tuple(energy)