Files
gps-denied-onboard/src/gps_denied_onboard/replay_input/auto_sync.py
T
Oleksandr Bezdieniezhnykh 87fe98858f [AZ-698] Tlog trim + mid-flight alignment for replay
Adds find_aligned_window cross-correlation (NCC, per-window unit norm)
between IMU energy and video optical-flow magnitude. Returns
AlignedWindow{tlog_start_ns, tlog_end_ns, offset_ms, confidence,
used_fallback}, with fallback to head-takeoff on low confidence to
preserve AZ-405 behavior. TlogReplayFcAdapter honors tlog_start_ns and
skips pre-window messages. New --auto-trim CLI flag, mutex with
--time-offset-ms. AC-1..AC-4 covered by unit tests; AC-5 skipped (no
real flight_derkachi.mp4 in repo). 106 tests pass in regression slice.
Zero new mypy --strict errors.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-20 16:29:59 +03:00

1013 lines
37 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Auto-sync detectors + offset compute + AC-9 validator (AZ-405).
Three concerns:
1. **Tlog take-off detector** — walks the head of the tlog, looks for
a sustained vertical-acceleration excess + sustained attitude-rate
excess, returns ``(takeoff_ns, confidence)``.
2. **Video motion-onset detector** — runs OpenCV pyramidal optical
flow over the leading seconds of the video, returns
``(motion_onset_ns, confidence)``.
3. **AC-9 frame-window match validator** — given a candidate offset
and the tlog/video timestamp series, returns 0 if ≥ 95 % of
video frames have an IMU sample within ± 100 ms after the offset
is applied; 2 otherwise.
The detector functions are split into a thin path-reading wrapper
(``detect_tlog_takeoff`` / ``detect_video_motion_onset``) and a pure
sample-driven core (``_compute_tlog_takeoff_from_samples`` /
``_compute_video_onset_from_samples``). Tests exercise the pure cores
directly with synthetic fixtures; production calls the wrappers,
which read the tlog via ``pymavlink`` and the video via ``cv2``.
Both wrappers accept an optional ``source_factory`` (tlog) /
``frames_factory`` (video) injection point so unit tests can swap in
fakes without touching the filesystem (mirrors AZ-399's pattern).
"""
from __future__ import annotations
import bisect
import math
import os
from collections.abc import Callable, Iterable
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Any
from gps_denied_onboard._types.fc import FcKind
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
from gps_denied_onboard.replay_input.interface import (
AlignedWindow,
AutoSyncConfig,
AutoSyncDecision,
)
if TYPE_CHECKING:
import numpy as np
import numpy.typing as npt
__all__ = [
"TlogSamples",
"compute_offset",
"detect_tlog_takeoff",
"detect_video_motion_onset",
"find_aligned_window",
"validate_offset_or_fail",
]
# Conversion: MAVLink RAW_IMU / SCALED_IMU2 publish accelerometer
# components in mG (milli-G); 1 g ≡ 9.80665 m/s² by ISO 80000-3.
_MG_PER_G: float = 1000.0
# Per the AZ-405 spec, the vertical-accel signal of interest is the
# magnitude excess above gravity (i.e., body acceleration regardless
# of frame orientation). At rest |a| ≈ 1 g; during upward thrust |a|
# > 1 g; during free-fall |a| ≈ 0 g. The take-off pattern is a
# sustained excess with positive sign (upward thrust), so we use
# ``|total_g - 1.0|`` as the criterion.
_REST_TOTAL_G: float = 1.0
# ---------------------------------------------------------------------
# DTOs (internal — public API surfaces results via AutoSyncDecision)
@dataclass(frozen=True, slots=True)
class _DetectorResult:
"""Outcome of a single detector pass.
``onset_ns`` is the best-guess event start (ns); ``confidence``
is in [0, 1] and reflects how sustained the signal was relative
to the configured threshold + sustained-time requirement.
"""
onset_ns: int
confidence: float
@dataclass(frozen=True, slots=True)
class TlogSamples:
"""Pre-loaded tlog samples extracted by the take-off detector.
Used as the input shape for :func:`_compute_tlog_takeoff_from_samples`
so unit tests can build a deterministic fixture without parsing a
real ``.tlog`` file.
Attributes:
accel: Sequence of ``(ts_ns, total_accel_g)`` pairs sourced
from ``RAW_IMU`` / ``SCALED_IMU2`` messages.
attitude: Sequence of ``(ts_ns, roll_rad, pitch_rad, yaw_rad)``
tuples sourced from ``ATTITUDE`` messages.
imu_count_by_type: Map of message-type-name → count, used for
the ``"tlog missing required message types: [...]"``
error path (R-DEMO-3).
"""
accel: tuple[tuple[int, float], ...]
attitude: tuple[tuple[int, float, float, float], ...]
imu_count_by_type: dict[str, int]
# ---------------------------------------------------------------------
# Public entrypoints
def detect_tlog_takeoff(
tlog_path: Path,
target_fc_dialect: FcKind,
config: AutoSyncConfig,
*,
source_factory: Callable[[str], Any] | None = None,
) -> _DetectorResult:
"""Walk the tlog head, detect the take-off pattern, return result.
Args:
tlog_path: Path to the tlog file. Existence is checked at
entry.
target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV``. Both speak
``ardupilotmega`` MAVLink on the GCS telemetry channel
(the iNav-side native MSP traffic is irrelevant here);
this parameter is accepted for parity with the rest of
the replay surface and is also used in the missing-
messages error to name the dialect explicitly.
config: Operator-tunable thresholds (see
:class:`AutoSyncConfig`).
source_factory: Test-only injection — when provided, replaces
the pymavlink open call with the factory's return value.
The factory must yield an object with ``recv_match`` /
``close`` semantics matching pymavlink's
``mavutil.mavlink_connection``.
Raises:
ReplayInputAdapterError: When the tlog is missing
``RAW_IMU`` / ``SCALED_IMU2`` (no IMU samples) or
``ATTITUDE`` (no attitude samples). This is the R-DEMO-3
fail-fast path — it surfaces BEFORE any video read in the
coordinator's ``open()`` flow.
"""
if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
raise ReplayInputAdapterError(
f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
)
if not tlog_path.is_file():
raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
samples = _load_tlog_samples(
tlog_path,
config.prescan_max_messages,
source_factory=source_factory,
)
return _compute_tlog_takeoff_from_samples(samples, config)
def detect_video_motion_onset(
video_path: Path,
config: AutoSyncConfig,
*,
frames_factory: Callable[[Path, float], Iterable[tuple[int, "np.ndarray"]]]
| None = None,
) -> _DetectorResult:
"""Scan the leading video segment, detect motion onset, return result.
Args:
video_path: Path to an MP4 / MKV / AVI file.
config: Operator-tunable thresholds (see
:class:`AutoSyncConfig`).
frames_factory: Test-only injection — when provided, returns
a synthetic iterable of ``(monotonic_ns, frame_bgr)``
tuples. Must yield at least 2 frames for the pairwise
optical-flow magnitudes to compute.
Raises:
ReplayInputAdapterError: When the video file is missing or
unreadable, or fewer than 2 frames are decoded.
"""
if not video_path.is_file():
raise ReplayInputAdapterError(f"video file not found: {video_path}")
if frames_factory is None:
frames = list(_read_video_frames(video_path, config.video_motion_scan_seconds))
else:
frames = list(frames_factory(video_path, config.video_motion_scan_seconds))
if len(frames) < 2:
raise ReplayInputAdapterError(
f"video file unreadable or too short: {video_path} "
f"(decoded {len(frames)} frame(s); need ≥ 2)"
)
flow_samples = _compute_flow_magnitudes(frames)
return _compute_video_onset_from_samples(flow_samples, config)
def compute_offset(
tlog_result: _DetectorResult,
video_result: _DetectorResult,
) -> AutoSyncDecision:
"""Combine tlog + video detector outputs into an :class:`AutoSyncDecision`.
Offset semantics (positive = video starts before take-off recorded
in tlog): ``offset_ns = tlog_takeoff_ns - video_motion_onset_ns``.
Combined confidence = ``min(tlog_confidence, video_confidence)`` —
the weakest signal dominates so downstream WARN-and-proceed (AC-6)
fires whenever either side is unreliable.
"""
offset_ns = tlog_result.onset_ns - video_result.onset_ns
combined = min(tlog_result.confidence, video_result.confidence)
return AutoSyncDecision(
offset_ms=offset_ns // 1_000_000,
tlog_takeoff_ns=tlog_result.onset_ns,
video_motion_onset_ns=video_result.onset_ns,
tlog_confidence=tlog_result.confidence,
video_confidence=video_result.confidence,
combined_confidence=combined,
)
def validate_offset_or_fail(
offset_ms: int,
tlog_imu_timestamps_ns: Iterable[int],
video_frame_timestamps_ns: Iterable[int],
threshold_pct: float,
*,
window_ms: int = 100,
) -> int:
"""AC-9 frame-window match validator.
Returns ``0`` when ≥ ``threshold_pct`` % of video frames have an
IMU sample within ± ``window_ms`` after the offset is applied;
returns ``2`` otherwise (CLI exit code for AC-8 hard-fail).
The check is symmetric in offset sign — the offset is added to
each video timestamp and the nearest tlog IMU timestamp is then
looked up by binary search.
"""
video_list = list(video_frame_timestamps_ns)
if not video_list:
# Degenerate input — no frames to match. The replay binary
# rejects empty videos earlier, so reaching this branch
# would be a bug; return 2 so the operator sees the hard-fail
# rather than a false PASS.
return 2
tlog_sorted = sorted(tlog_imu_timestamps_ns)
if not tlog_sorted:
return 2
offset_ns = int(offset_ms) * 1_000_000
window_ns = int(window_ms) * 1_000_000
matched = 0
for vts in video_list:
target_ns = vts + offset_ns
idx = bisect.bisect_left(tlog_sorted, target_ns)
# The nearest IMU sample is whichever of the immediate
# neighbours of `target_ns` is closer. Either may be out of
# range at the ends of the array.
nearest: int | None = None
for j in (idx - 1, idx):
if 0 <= j < len(tlog_sorted):
cand = tlog_sorted[j]
if nearest is None or abs(cand - target_ns) < abs(nearest - target_ns):
nearest = cand
if nearest is not None and abs(nearest - target_ns) <= window_ns:
matched += 1
match_pct = (matched / len(video_list)) * 100.0
return 0 if match_pct >= threshold_pct else 2
# ---------------------------------------------------------------------
# Pure compute kernels (testable without disk IO)
def _compute_tlog_takeoff_from_samples(
samples: TlogSamples,
config: AutoSyncConfig,
) -> _DetectorResult:
"""Pure detector: turn pre-loaded tlog samples into a result.
Algorithm: find the first sustained-window where (a) accel
magnitude excess above 1 g exceeds the threshold for at least
``sustained_seconds``, and (b) attitude-rate magnitude exceeds
its threshold sustained over the same duration. Combined
confidence = ``min(accel_ratio, attitude_ratio)`` — both
signals must agree for a high-confidence take-off.
Raises:
ReplayInputAdapterError: When the tlog had no IMU samples or
no ATTITUDE samples (R-DEMO-3 fail-fast).
"""
if not samples.accel:
missing = ["RAW_IMU", "SCALED_IMU2"]
raise ReplayInputAdapterError(
f"tlog missing required message types: {missing}"
)
if not samples.attitude:
raise ReplayInputAdapterError(
"tlog missing required message types: ['ATTITUDE']"
)
sustained_ns = int(config.sustained_seconds * 1_000_000_000)
# Pair-wise attitude rates (rad/s magnitude vector) — emitted at
# the timestamp of the LATER sample so the rate aligns with when
# it is observable downstream.
attitude_rates: list[tuple[int, float]] = []
for i in range(1, len(samples.attitude)):
ts_prev, roll_prev, pitch_prev, yaw_prev = samples.attitude[i - 1]
ts_curr, roll_curr, pitch_curr, yaw_curr = samples.attitude[i]
dt_s = (ts_curr - ts_prev) / 1_000_000_000.0
if dt_s <= 0.0:
continue
dr = roll_curr - roll_prev
dp = pitch_curr - pitch_prev
dy = _wrap_pi(yaw_curr - yaw_prev)
rate_mag = math.sqrt((dr / dt_s) ** 2 + (dp / dt_s) ** 2 + (dy / dt_s) ** 2)
attitude_rates.append((ts_curr, rate_mag))
accel_excess = tuple(
(ts, abs(total_g - _REST_TOTAL_G)) for ts, total_g in samples.accel
)
accel_event = _find_sustained_event(
accel_excess,
threshold=config.takeoff_accel_threshold_g,
sustained_ns=sustained_ns,
)
attitude_event = _find_sustained_event(
tuple(attitude_rates),
threshold=config.takeoff_attitude_rate_threshold_rad_s,
sustained_ns=sustained_ns,
)
if accel_event is None and attitude_event is None:
# Neither signal crossed; best we can do is flag "no clear
# take-off" so the coordinator can WARN and continue with the
# tlog start as a fallback origin.
first_ns = samples.accel[0][0]
return _DetectorResult(onset_ns=first_ns, confidence=0.0)
if accel_event is not None and attitude_event is not None:
# Both signals fired — they should both point at the same
# event. We adopt the EARLIER of the two onsets so the offset
# is referenced against the moment thrust began (the attitude
# body-rate spike usually trails the thrust by a few hundred
# ms during a vertical climb).
onset_ns = min(accel_event[0], attitude_event[0])
# Confidence is the weakest of the two signals, scaled by
# how cleanly they agree. We keep it simple: min().
confidence = min(accel_event[1], attitude_event[1])
elif accel_event is not None:
# Only the accel signal — discount confidence so the
# combined offset eventually trips the WARN-and-proceed
# threshold (combined_confidence < 0.80 → AC-6).
onset_ns, raw_conf = accel_event
confidence = raw_conf * 0.6
else:
# Only attitude rate — same rationale as above. The
# mypy-narrowing else covers attitude_event is not None.
assert attitude_event is not None
onset_ns, raw_conf = attitude_event
confidence = raw_conf * 0.6
return _DetectorResult(onset_ns=onset_ns, confidence=confidence)
def _compute_video_onset_from_samples(
flow_samples: tuple[tuple[int, float], ...],
config: AutoSyncConfig,
) -> _DetectorResult:
"""Pure detector: turn pre-computed optical-flow magnitudes into a result.
Algorithm: find the first sustained window where the flow
magnitude exceeds the configured threshold for at least
``sustained_seconds``. Confidence = sustained ratio.
"""
if not flow_samples:
return _DetectorResult(onset_ns=0, confidence=0.0)
sustained_ns = int(config.sustained_seconds * 1_000_000_000)
event = _find_sustained_event(
flow_samples,
threshold=config.video_motion_threshold,
sustained_ns=sustained_ns,
)
if event is None:
return _DetectorResult(onset_ns=flow_samples[0][0], confidence=0.0)
onset_ns, confidence = event
return _DetectorResult(onset_ns=onset_ns, confidence=confidence)
def _find_sustained_event(
samples: tuple[tuple[int, float], ...] | list[tuple[int, float]],
*,
threshold: float,
sustained_ns: int,
) -> tuple[int, float] | None:
"""Sliding-window scan: return ``(start_ns, ratio)`` of the
earliest window where the fraction of samples above
``threshold`` is maximised, provided that fraction is ≥ 0.5
(signal-vs-noise floor) and the window covers at least 80 % of
``sustained_ns`` (guards against truncated windows at the tail).
Returns ``None`` when no qualifying window exists.
"""
seq = list(samples)
n = len(seq)
if n < 2:
return None
best_start_ns: int | None = None
best_ratio = 0.0
min_window_ns = int(sustained_ns * 0.8)
for i in range(n):
start_ns = seq[i][0]
end_ns = start_ns + sustained_ns
# Walk j forward while still inside the window.
j = i
above = 0
while j < n and seq[j][0] <= end_ns:
if seq[j][1] > threshold:
above += 1
j += 1
window_size = j - i
if window_size < 2:
continue
window_dur_ns = seq[j - 1][0] - start_ns
if window_dur_ns < min_window_ns:
continue
ratio = above / window_size
if ratio > best_ratio:
best_ratio = ratio
best_start_ns = start_ns
if best_start_ns is None or best_ratio < 0.5:
return None
return (best_start_ns, best_ratio)
def _wrap_pi(angle_rad: float) -> float:
"""Wrap an angle delta into ``(-π, π]`` to handle yaw wrap-around."""
twopi = 2.0 * math.pi
a = angle_rad % twopi
if a > math.pi:
a -= twopi
return a
# ---------------------------------------------------------------------
# Disk-reading wrappers (production paths)
_REQUIRED_TLOG_TYPES: tuple[str, ...] = (
"RAW_IMU",
"SCALED_IMU2",
"ATTITUDE",
)
def _load_tlog_samples(
tlog_path: Path,
max_messages: int,
*,
source_factory: Callable[[str], Any] | None,
) -> TlogSamples:
"""Stream the tlog head, capture IMU + ATTITUDE samples.
Mirrors the AZ-399 source-factory test pattern: production builds
use ``pymavlink`` lazily; tests pass an in-memory fake.
"""
source = _open_tlog(tlog_path, source_factory=source_factory)
accel: list[tuple[int, float]] = []
attitude: list[tuple[int, float, float, float]] = []
counts: dict[str, int] = {}
try:
for _ in range(max_messages):
try:
msg = source.recv_match(
type=list(_REQUIRED_TLOG_TYPES),
blocking=False,
)
except Exception as exc: # pragma: no cover — defensive.
raise ReplayInputAdapterError(
f"tlog scan failed on {tlog_path}: {exc!r}"
) from exc
if msg is None:
break
msg_type = _safe_msg_type(msg)
if not msg_type:
continue
counts[msg_type] = counts.get(msg_type, 0) + 1
ts_ns = _msg_timestamp_ns(msg)
if msg_type in ("RAW_IMU", "SCALED_IMU2"):
xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
total_g = math.sqrt(xa * xa + ya * ya + za * za)
accel.append((ts_ns, total_g))
elif msg_type == "ATTITUDE":
roll = float(getattr(msg, "roll", 0.0))
pitch = float(getattr(msg, "pitch", 0.0))
yaw = float(getattr(msg, "yaw", 0.0))
attitude.append((ts_ns, roll, pitch, yaw))
finally:
if hasattr(source, "close"):
try:
source.close()
except Exception: # pragma: no cover — defensive.
pass
return TlogSamples(
accel=tuple(accel),
attitude=tuple(attitude),
imu_count_by_type=counts,
)
def _open_tlog(
tlog_path: Path,
*,
source_factory: Callable[[str], Any] | None,
) -> Any:
if source_factory is not None:
return source_factory(str(tlog_path))
try:
from pymavlink import mavutil # type: ignore[import-not-found]
except ImportError as exc:
raise ReplayInputAdapterError(
"pymavlink is required for replay auto-sync but is not "
"importable in this binary"
) from exc
return mavutil.mavlink_connection(
str(tlog_path),
dialect="ardupilotmega",
mavlink_version="2.0",
)
def _safe_msg_type(msg: Any) -> str:
try:
if hasattr(msg, "get_type"):
return str(msg.get_type())
except Exception:
return ""
return type(msg).__name__
def _msg_timestamp_ns(msg: Any) -> int:
raw = getattr(msg, "_timestamp", None)
if raw is None:
raise ReplayInputAdapterError(
"tlog message missing _timestamp attribute; pymavlink "
"mavlogfile should populate it on every recv_match() return"
)
return int(float(raw) * 1_000_000_000)
def _read_video_frames(
video_path: Path,
scan_seconds: float,
) -> Iterable[tuple[int, "np.ndarray"]]:
"""Decode the leading ``scan_seconds`` of the video.
Yields ``(monotonic_ns, frame_bgr)`` tuples where ``monotonic_ns``
is the file's per-frame ``CAP_PROP_POS_MSEC × 1e6`` so the
returned timestamps align with what
:class:`VideoFileFrameSource` will report later. The Python
``time.monotonic_ns()`` is NOT used — the auto-sync result has to
be deterministic across runs (AC-10) and tied to the video
timeline.
"""
try:
import cv2 as _cv2 # type: ignore[import-not-found]
except ImportError as exc:
raise ReplayInputAdapterError(
"opencv-python is required for replay auto-sync but is "
"not importable in this binary"
) from exc
capture = _cv2.VideoCapture(str(video_path))
if not capture.isOpened():
capture.release()
raise ReplayInputAdapterError(
f"video file unreadable / unsupported codec: {video_path}"
)
try:
max_pos_ms = scan_seconds * 1000.0
while True:
ok, frame = capture.read()
if not ok or frame is None:
break
pos_ms = float(capture.get(_cv2.CAP_PROP_POS_MSEC))
if pos_ms > max_pos_ms:
break
ts_ns = int(pos_ms * 1_000_000)
yield ts_ns, frame
finally:
capture.release()
def _compute_flow_magnitudes(
frames: list[tuple[int, "np.ndarray"]],
) -> tuple[tuple[int, float], ...]:
"""Pairwise mean optical-flow magnitude between consecutive frames.
Uses Farneback dense flow (``cv2.calcOpticalFlowFarneback``)
rather than pyramidal LK because Farneback returns a flow field
over the whole image with no per-frame feature-tracking state, so
the result is deterministic given the same input frames (AC-10).
Returns ``((ts_ns_of_second_frame, mean_magnitude_px), ...)``.
"""
try:
import cv2 as _cv2 # type: ignore[import-not-found]
import numpy as _np # type: ignore[import-not-found]
except ImportError as exc: # pragma: no cover — guarded at call sites.
raise ReplayInputAdapterError(
"opencv-python + numpy are required for replay auto-sync"
) from exc
if len(frames) < 2:
return ()
# Convert all frames to grayscale once up-front so the per-pair
# cost is dominated by the optical-flow computation itself.
gray_frames = []
for ts_ns, frame in frames:
gray = _cv2.cvtColor(frame, _cv2.COLOR_BGR2GRAY)
gray_frames.append((ts_ns, gray))
out: list[tuple[int, float]] = []
for i in range(1, len(gray_frames)):
prev_ts, prev = gray_frames[i - 1]
curr_ts, curr = gray_frames[i]
flow = _cv2.calcOpticalFlowFarneback(
prev,
curr,
None,
pyr_scale=0.5,
levels=3,
winsize=15,
iterations=3,
poly_n=5,
poly_sigma=1.2,
flags=0,
)
# ``flow`` shape: (H, W, 2) — dx + dy per pixel.
magnitudes = _np.sqrt(flow[..., 0] ** 2 + flow[..., 1] ** 2)
mean_mag = float(magnitudes.mean())
out.append((curr_ts, mean_mag))
return tuple(out)
# Re-export the BUILD-flag check for symmetry with other replay modules.
def _build_flag_on(name: str) -> bool:
raw = os.environ.get(name, "")
return raw.strip().lower() in {"on", "1", "true", "yes"}
# ---------------------------------------------------------------------
# AZ-698 — mid-flight cross-correlation aligner
#
# The AZ-405 head-takeoff detector only works when the video covers
# the take-off moment. For mid-flight slices (e.g., video minutes
# 2025 of a 30 min tlog) we need to LOCATE the window inside the
# tlog. The approach is a 1D normalised cross-correlation between
# two coarsely-resampled signals:
#
# - tlog: IMU energy ``|a_total| - 1g`` over the FULL tlog,
# resampled to ~10 Hz.
# - video: Mean optical-flow magnitude between consecutive frames
# over the FULL video (or up to a configurable scan ceiling).
#
# Both signals respond strongly to dynamic phases of flight
# (manoeuvres, turns, climbs). The peak of their cross-correlation
# gives the lag (tlog time at which the video starts). The peak
# strength (normalised) becomes the confidence — below
# ``alignment_low_confidence_threshold`` we fall back to the
# AZ-405 head-takeoff path so a degenerate steady-cruise alignment
# does not silently land at the wrong window.
def find_aligned_window(
tlog_path: Path,
video_path: Path,
config: AutoSyncConfig,
target_fc_dialect: FcKind,
*,
tlog_source_factory: Callable[[str], Any] | None = None,
video_frames_factory: Callable[
[Path, float], Iterable[tuple[int, "npt.NDArray[np.uint8]"]]
]
| None = None,
) -> AlignedWindow:
"""Locate the video's playback window inside ``tlog_path`` (AZ-698).
Args:
tlog_path: Binary ArduPilot tlog. The whole file is read up
to :attr:`AutoSyncConfig.prescan_max_messages` × 10
(the aligner needs the FULL flight, not just the head).
video_path: Mp4 / mkv input. The leading
:attr:`AutoSyncConfig.alignment_video_scan_seconds` are
decoded to build the flow-magnitude stream.
config: Operator-tunable thresholds.
target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV`` — same
parity contract as :func:`detect_tlog_takeoff`.
tlog_source_factory: Test injection — replaces the
``pymavlink`` open call.
video_frames_factory: Test injection — replaces
``cv2.VideoCapture`` frame iteration.
Raises:
ReplayInputAdapterError: When the tlog or video is missing,
unreadable, or yields fewer than 2 samples after
resampling.
Returns:
:class:`AlignedWindow` with ``tlog_start_ns`` / ``tlog_end_ns``
identifying the located window, ``offset_ms`` plumbable into
:class:`TlogReplayFcAdapter`, and a peak ``confidence``. When
confidence falls below
:attr:`AutoSyncConfig.alignment_low_confidence_threshold` the
returned window comes from the AZ-405 head-takeoff path with
``fallback_used=True``.
"""
if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
raise ReplayInputAdapterError(
f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
)
if not tlog_path.is_file():
raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
if not video_path.is_file():
raise ReplayInputAdapterError(f"video file not found: {video_path}")
tlog_energy = _load_tlog_imu_energy_stream(
tlog_path,
max_messages=config.prescan_max_messages * 10,
source_factory=tlog_source_factory,
)
if len(tlog_energy) < 2:
raise ReplayInputAdapterError(
f"tlog yielded {len(tlog_energy)} IMU sample(s); "
"need ≥ 2 for cross-correlation alignment"
)
if video_frames_factory is None:
frames = list(
_read_video_frames(video_path, config.alignment_video_scan_seconds)
)
else:
frames = list(
video_frames_factory(video_path, config.alignment_video_scan_seconds)
)
if len(frames) < 2:
raise ReplayInputAdapterError(
f"video yielded {len(frames)} frame(s); "
"need ≥ 2 for cross-correlation alignment"
)
flow_samples = _compute_flow_magnitudes(frames)
if len(flow_samples) < 2:
raise ReplayInputAdapterError(
f"video produced {len(flow_samples)} flow sample(s); "
"need ≥ 2 for cross-correlation alignment"
)
return _align_via_cross_correlation(
tlog_energy=tlog_energy,
flow_samples=flow_samples,
config=config,
target_fc_dialect=target_fc_dialect,
tlog_path=tlog_path,
tlog_source_factory=tlog_source_factory,
)
def _align_via_cross_correlation(
*,
tlog_energy: tuple[tuple[int, float], ...],
flow_samples: tuple[tuple[int, float], ...],
config: AutoSyncConfig,
target_fc_dialect: FcKind,
tlog_path: Path,
tlog_source_factory: Callable[[str], Any] | None,
) -> AlignedWindow:
"""Pure compute kernel: turn pre-loaded streams into an :class:`AlignedWindow`.
Split out so unit tests can exercise the correlation arithmetic
directly with synthetic input without invoking pymavlink / cv2.
"""
import numpy as _np
resample_hz = max(config.alignment_resample_hz, 1.0)
period_ns = int(1_000_000_000 / resample_hz)
tlog_origin_ns = tlog_energy[0][0]
tlog_resampled = _resample_uniform(tlog_energy, period_ns, tlog_origin_ns)
if len(tlog_resampled) < 2:
raise ReplayInputAdapterError(
"tlog resampled stream has < 2 samples; cannot cross-correlate"
)
video_origin_ns = flow_samples[0][0]
flow_resampled = _resample_uniform(flow_samples, period_ns, video_origin_ns)
if len(flow_resampled) < 2:
raise ReplayInputAdapterError(
"video flow stream has < 2 samples; cannot cross-correlate"
)
if len(flow_resampled) > len(tlog_resampled):
raise ReplayInputAdapterError(
"video flow stream is longer than the tlog energy stream; "
"auto-trim requires the video to be a slice of a longer tlog"
)
tlog_arr = _np.asarray(tlog_resampled, dtype=_np.float64)
flow_arr = _np.asarray(flow_resampled, dtype=_np.float64)
flow_centred = _zero_mean_normalise(flow_arr)
if _np.linalg.norm(flow_centred) == 0.0:
# Flat video → no information for correlation. Force the
# fallback path; confidence reported as 0.
peak_idx = 0
confidence = 0.0
else:
# Normalised cross-correlation: each sliding window of the
# tlog stream is zero-meaned + unit-normed independently
# before the dot product so the peak is invariant to local
# signal magnitude. Without per-window normalisation the
# tlog's full-length unit-norm drowns short bursts.
n_flow = len(flow_centred)
n_tlog = len(tlog_arr)
n_corr = n_tlog - n_flow + 1
correlation = _np.zeros(n_corr, dtype=_np.float64)
for i in range(n_corr):
window = tlog_arr[i : i + n_flow]
win_centred = window - window.mean()
win_norm = float(_np.linalg.norm(win_centred))
if win_norm > 0.0:
correlation[i] = float(_np.dot(win_centred / win_norm, flow_centred))
peak_idx = int(_np.argmax(correlation))
confidence = max(0.0, min(1.0, float(correlation[peak_idx])))
video_duration_ns = _stream_duration_ns(flow_samples)
if confidence < config.alignment_low_confidence_threshold:
return _fallback_to_head_takeoff(
tlog_path=tlog_path,
tlog_source_factory=tlog_source_factory,
target_fc_dialect=target_fc_dialect,
config=config,
tlog_energy=tlog_energy,
video_origin_ns=video_origin_ns,
video_flow_duration_ns=video_duration_ns,
confidence=confidence,
)
# Absolute tlog timeline value where video t=0 aligns. The
# adapter's seek check compares this against the raw pymavlink
# ``msg._timestamp`` so the value MUST be on the tlog timeline,
# NOT a delta.
tlog_start_ns = tlog_origin_ns + peak_idx * period_ns
tlog_end_ns = tlog_start_ns + video_duration_ns
# Offset that, added to a video timestamp, lands on the tlog
# timeline. Matches ``AutoSyncDecision.offset_ms`` semantics
# (``validate_offset_or_fail`` does ``vts + offset_ns``).
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
return AlignedWindow(
tlog_start_ns=tlog_start_ns,
tlog_end_ns=tlog_end_ns,
offset_ms=offset_ms,
confidence=confidence,
fallback_used=False,
)
def _stream_duration_ns(
samples: tuple[tuple[int, float], ...],
) -> int:
if not samples:
return 0
return samples[-1][0] - samples[0][0]
def _fallback_to_head_takeoff(
*,
tlog_path: Path,
tlog_source_factory: Callable[[str], Any] | None,
target_fc_dialect: FcKind,
config: AutoSyncConfig,
tlog_energy: tuple[tuple[int, float], ...],
video_origin_ns: int,
video_flow_duration_ns: int,
confidence: float,
) -> AlignedWindow:
"""Low-confidence path: use AZ-405 head-takeoff detector.
Returns an :class:`AlignedWindow` whose ``offset_ms`` and
``tlog_start_ns`` come from the takeoff onset; ``fallback_used``
is ``True`` so callers + FDR audit can record the divergence.
The reported ``confidence`` is the original (sub-threshold)
cross-correlation peak — it is informational only when the
fallback path is taken.
"""
takeoff = detect_tlog_takeoff(
tlog_path,
target_fc_dialect,
config,
source_factory=tlog_source_factory,
)
if takeoff.confidence > 0.0:
tlog_start_ns = takeoff.onset_ns
elif tlog_energy:
tlog_start_ns = tlog_energy[0][0]
else:
tlog_start_ns = 0
tlog_end_ns = tlog_start_ns + video_flow_duration_ns
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
return AlignedWindow(
tlog_start_ns=tlog_start_ns,
tlog_end_ns=tlog_end_ns,
offset_ms=offset_ms,
confidence=confidence,
fallback_used=True,
)
def _resample_uniform(
samples: tuple[tuple[int, float], ...],
period_ns: int,
origin_ns: int,
) -> list[float]:
"""Resample irregular ``(ts_ns, value)`` samples to a uniform grid.
Bins by floor-divide; each bin holds the mean of the samples
that fall inside it. Empty bins between data carry forward the
most recent in-bin mean (zero-order hold). Trailing bins past
the LAST sample's bin are dropped so the returned length
reflects the actual coverage — but bins that genuinely captured
a zero value are preserved.
"""
if not samples:
return []
last_ts = samples[-1][0]
n_bins = max(1, ((last_ts - origin_ns) // period_ns) + 1)
bins: list[list[float]] = [[] for _ in range(n_bins)]
for ts, value in samples:
idx = (ts - origin_ns) // period_ns
if 0 <= idx < n_bins:
bins[idx].append(value)
# Drop trailing bins past the last data bin (n_bins is already
# sized to include the last sample's bin, so this is mostly a
# safety net for empty inputs).
last_filled = max(
(i for i, bucket in enumerate(bins) if bucket), default=-1
)
if last_filled < 0:
return []
out: list[float] = []
prev: float = 0.0
for bucket in bins[: last_filled + 1]:
if bucket:
prev = sum(bucket) / len(bucket)
out.append(prev)
return out
def _zero_mean_normalise(
arr: "npt.NDArray[np.float64]",
) -> "npt.NDArray[np.float64]":
import numpy as _np
centred: "npt.NDArray[np.float64]" = arr - arr.mean()
norm = float(_np.linalg.norm(centred))
if norm == 0.0:
return centred
result: "npt.NDArray[np.float64]" = centred / norm
return result
def _load_tlog_imu_energy_stream(
tlog_path: Path,
*,
max_messages: int,
source_factory: Callable[[str], Any] | None,
) -> tuple[tuple[int, float], ...]:
"""Walk the WHOLE tlog (up to ``max_messages``) for IMU energy samples.
Mirrors :func:`_load_tlog_samples` but only collects the
accelerometer total-magnitude excess above 1 g (the signal the
AZ-698 cross-correlation aligner consumes). The ATTITUDE channel
is not needed here.
"""
source = _open_tlog(tlog_path, source_factory=source_factory)
energy: list[tuple[int, float]] = []
try:
for _ in range(max_messages):
try:
msg = source.recv_match(
type=["RAW_IMU", "SCALED_IMU2"],
blocking=False,
)
except Exception as exc: # pragma: no cover — defensive.
raise ReplayInputAdapterError(
f"tlog scan failed on {tlog_path}: {exc!r}"
) from exc
if msg is None:
break
ts_ns = _msg_timestamp_ns(msg)
xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
total_g = math.sqrt(xa * xa + ya * ya + za * za)
energy.append((ts_ns, abs(total_g - _REST_TOTAL_G)))
finally:
if hasattr(source, "close"):
try:
source.close()
except Exception: # pragma: no cover — defensive.
pass
return tuple(energy)