"""Auto-sync detectors + offset compute + AC-9 validator (AZ-405). Three concerns: 1. **Tlog take-off detector** — walks the head of the tlog, looks for a sustained vertical-acceleration excess + sustained attitude-rate excess, returns ``(takeoff_ns, confidence)``. 2. **Video motion-onset detector** — runs OpenCV pyramidal optical flow over the leading seconds of the video, returns ``(motion_onset_ns, confidence)``. 3. **AC-9 frame-window match validator** — given a candidate offset and the tlog/video timestamp series, returns 0 if ≥ 95 % of video frames have an IMU sample within ± 100 ms after the offset is applied; 2 otherwise. The detector functions are split into a thin path-reading wrapper (``detect_tlog_takeoff`` / ``detect_video_motion_onset``) and a pure sample-driven core (``_compute_tlog_takeoff_from_samples`` / ``_compute_video_onset_from_samples``). Tests exercise the pure cores directly with synthetic fixtures; production calls the wrappers, which read the tlog via ``pymavlink`` and the video via ``cv2``. Both wrappers accept an optional ``source_factory`` (tlog) / ``frames_factory`` (video) injection point so unit tests can swap in fakes without touching the filesystem (mirrors AZ-399's pattern). """ from __future__ import annotations import bisect import math import os from collections.abc import Callable, Iterable from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, Any from gps_denied_onboard._types.fc import FcKind from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError from gps_denied_onboard.replay_input.interface import ( AlignedWindow, AutoSyncConfig, AutoSyncDecision, ) if TYPE_CHECKING: import numpy as np import numpy.typing as npt __all__ = [ "TlogSamples", "compute_offset", "detect_tlog_takeoff", "detect_video_motion_onset", "find_aligned_window", "validate_offset_or_fail", ] # Conversion: MAVLink RAW_IMU / SCALED_IMU2 publish accelerometer # components in mG (milli-G); 1 g ≡ 9.80665 m/s² by ISO 80000-3. _MG_PER_G: float = 1000.0 # Per the AZ-405 spec, the vertical-accel signal of interest is the # magnitude excess above gravity (i.e., body acceleration regardless # of frame orientation). At rest |a| ≈ 1 g; during upward thrust |a| # > 1 g; during free-fall |a| ≈ 0 g. The take-off pattern is a # sustained excess with positive sign (upward thrust), so we use # ``|total_g - 1.0|`` as the criterion. _REST_TOTAL_G: float = 1.0 # --------------------------------------------------------------------- # DTOs (internal — public API surfaces results via AutoSyncDecision) @dataclass(frozen=True, slots=True) class _DetectorResult: """Outcome of a single detector pass. ``onset_ns`` is the best-guess event start (ns); ``confidence`` is in [0, 1] and reflects how sustained the signal was relative to the configured threshold + sustained-time requirement. """ onset_ns: int confidence: float @dataclass(frozen=True, slots=True) class TlogSamples: """Pre-loaded tlog samples extracted by the take-off detector. Used as the input shape for :func:`_compute_tlog_takeoff_from_samples` so unit tests can build a deterministic fixture without parsing a real ``.tlog`` file. Attributes: accel: Sequence of ``(ts_ns, total_accel_g)`` pairs sourced from ``RAW_IMU`` / ``SCALED_IMU2`` messages. attitude: Sequence of ``(ts_ns, roll_rad, pitch_rad, yaw_rad)`` tuples sourced from ``ATTITUDE`` messages. imu_count_by_type: Map of message-type-name → count, used for the ``"tlog missing required message types: [...]"`` error path (R-DEMO-3). """ accel: tuple[tuple[int, float], ...] attitude: tuple[tuple[int, float, float, float], ...] imu_count_by_type: dict[str, int] # --------------------------------------------------------------------- # Public entrypoints def detect_tlog_takeoff( tlog_path: Path, target_fc_dialect: FcKind, config: AutoSyncConfig, *, source_factory: Callable[[str], Any] | None = None, ) -> _DetectorResult: """Walk the tlog head, detect the take-off pattern, return result. Args: tlog_path: Path to the tlog file. Existence is checked at entry. target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV``. Both speak ``ardupilotmega`` MAVLink on the GCS telemetry channel (the iNav-side native MSP traffic is irrelevant here); this parameter is accepted for parity with the rest of the replay surface and is also used in the missing- messages error to name the dialect explicitly. config: Operator-tunable thresholds (see :class:`AutoSyncConfig`). source_factory: Test-only injection — when provided, replaces the pymavlink open call with the factory's return value. The factory must yield an object with ``recv_match`` / ``close`` semantics matching pymavlink's ``mavutil.mavlink_connection``. Raises: ReplayInputAdapterError: When the tlog is missing ``RAW_IMU`` / ``SCALED_IMU2`` (no IMU samples) or ``ATTITUDE`` (no attitude samples). This is the R-DEMO-3 fail-fast path — it surfaces BEFORE any video read in the coordinator's ``open()`` flow. """ if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV): raise ReplayInputAdapterError( f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}" ) if not tlog_path.is_file(): raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}") samples = _load_tlog_samples( tlog_path, config.prescan_max_messages, source_factory=source_factory, ) return _compute_tlog_takeoff_from_samples(samples, config) def detect_video_motion_onset( video_path: Path, config: AutoSyncConfig, *, frames_factory: Callable[[Path, float], Iterable[tuple[int, "np.ndarray"]]] | None = None, ) -> _DetectorResult: """Scan the leading video segment, detect motion onset, return result. Args: video_path: Path to an MP4 / MKV / AVI file. config: Operator-tunable thresholds (see :class:`AutoSyncConfig`). frames_factory: Test-only injection — when provided, returns a synthetic iterable of ``(monotonic_ns, frame_bgr)`` tuples. Must yield at least 2 frames for the pairwise optical-flow magnitudes to compute. Raises: ReplayInputAdapterError: When the video file is missing or unreadable, or fewer than 2 frames are decoded. """ if not video_path.is_file(): raise ReplayInputAdapterError(f"video file not found: {video_path}") if frames_factory is None: frames = list(_read_video_frames(video_path, config.video_motion_scan_seconds)) else: frames = list(frames_factory(video_path, config.video_motion_scan_seconds)) if len(frames) < 2: raise ReplayInputAdapterError( f"video file unreadable or too short: {video_path} " f"(decoded {len(frames)} frame(s); need ≥ 2)" ) flow_samples = _compute_flow_magnitudes(frames) return _compute_video_onset_from_samples(flow_samples, config) def compute_offset( tlog_result: _DetectorResult, video_result: _DetectorResult, ) -> AutoSyncDecision: """Combine tlog + video detector outputs into an :class:`AutoSyncDecision`. Offset semantics (positive = video starts before take-off recorded in tlog): ``offset_ns = tlog_takeoff_ns - video_motion_onset_ns``. Combined confidence = ``min(tlog_confidence, video_confidence)`` — the weakest signal dominates so downstream WARN-and-proceed (AC-6) fires whenever either side is unreliable. """ offset_ns = tlog_result.onset_ns - video_result.onset_ns combined = min(tlog_result.confidence, video_result.confidence) return AutoSyncDecision( offset_ms=offset_ns // 1_000_000, tlog_takeoff_ns=tlog_result.onset_ns, video_motion_onset_ns=video_result.onset_ns, tlog_confidence=tlog_result.confidence, video_confidence=video_result.confidence, combined_confidence=combined, ) def validate_offset_or_fail( offset_ms: int, tlog_imu_timestamps_ns: Iterable[int], video_frame_timestamps_ns: Iterable[int], threshold_pct: float, *, window_ms: int = 100, ) -> int: """AC-9 frame-window match validator. Returns ``0`` when ≥ ``threshold_pct`` % of video frames have an IMU sample within ± ``window_ms`` after the offset is applied; returns ``2`` otherwise (CLI exit code for AC-8 hard-fail). The check is symmetric in offset sign — the offset is added to each video timestamp and the nearest tlog IMU timestamp is then looked up by binary search. """ video_list = list(video_frame_timestamps_ns) if not video_list: # Degenerate input — no frames to match. The replay binary # rejects empty videos earlier, so reaching this branch # would be a bug; return 2 so the operator sees the hard-fail # rather than a false PASS. return 2 tlog_sorted = sorted(tlog_imu_timestamps_ns) if not tlog_sorted: return 2 offset_ns = int(offset_ms) * 1_000_000 window_ns = int(window_ms) * 1_000_000 matched = 0 for vts in video_list: target_ns = vts + offset_ns idx = bisect.bisect_left(tlog_sorted, target_ns) # The nearest IMU sample is whichever of the immediate # neighbours of `target_ns` is closer. Either may be out of # range at the ends of the array. nearest: int | None = None for j in (idx - 1, idx): if 0 <= j < len(tlog_sorted): cand = tlog_sorted[j] if nearest is None or abs(cand - target_ns) < abs(nearest - target_ns): nearest = cand if nearest is not None and abs(nearest - target_ns) <= window_ns: matched += 1 match_pct = (matched / len(video_list)) * 100.0 return 0 if match_pct >= threshold_pct else 2 # --------------------------------------------------------------------- # Pure compute kernels (testable without disk IO) def _compute_tlog_takeoff_from_samples( samples: TlogSamples, config: AutoSyncConfig, ) -> _DetectorResult: """Pure detector: turn pre-loaded tlog samples into a result. Algorithm: find the first sustained-window where (a) accel magnitude excess above 1 g exceeds the threshold for at least ``sustained_seconds``, and (b) attitude-rate magnitude exceeds its threshold sustained over the same duration. Combined confidence = ``min(accel_ratio, attitude_ratio)`` — both signals must agree for a high-confidence take-off. Raises: ReplayInputAdapterError: When the tlog had no IMU samples or no ATTITUDE samples (R-DEMO-3 fail-fast). """ if not samples.accel: missing = ["RAW_IMU", "SCALED_IMU2"] raise ReplayInputAdapterError( f"tlog missing required message types: {missing}" ) if not samples.attitude: raise ReplayInputAdapterError( "tlog missing required message types: ['ATTITUDE']" ) sustained_ns = int(config.sustained_seconds * 1_000_000_000) # Pair-wise attitude rates (rad/s magnitude vector) — emitted at # the timestamp of the LATER sample so the rate aligns with when # it is observable downstream. attitude_rates: list[tuple[int, float]] = [] for i in range(1, len(samples.attitude)): ts_prev, roll_prev, pitch_prev, yaw_prev = samples.attitude[i - 1] ts_curr, roll_curr, pitch_curr, yaw_curr = samples.attitude[i] dt_s = (ts_curr - ts_prev) / 1_000_000_000.0 if dt_s <= 0.0: continue dr = roll_curr - roll_prev dp = pitch_curr - pitch_prev dy = _wrap_pi(yaw_curr - yaw_prev) rate_mag = math.sqrt((dr / dt_s) ** 2 + (dp / dt_s) ** 2 + (dy / dt_s) ** 2) attitude_rates.append((ts_curr, rate_mag)) accel_excess = tuple( (ts, abs(total_g - _REST_TOTAL_G)) for ts, total_g in samples.accel ) accel_event = _find_sustained_event( accel_excess, threshold=config.takeoff_accel_threshold_g, sustained_ns=sustained_ns, ) attitude_event = _find_sustained_event( tuple(attitude_rates), threshold=config.takeoff_attitude_rate_threshold_rad_s, sustained_ns=sustained_ns, ) if accel_event is None and attitude_event is None: # Neither signal crossed; best we can do is flag "no clear # take-off" so the coordinator can WARN and continue with the # tlog start as a fallback origin. first_ns = samples.accel[0][0] return _DetectorResult(onset_ns=first_ns, confidence=0.0) if accel_event is not None and attitude_event is not None: # Both signals fired — they should both point at the same # event. We adopt the EARLIER of the two onsets so the offset # is referenced against the moment thrust began (the attitude # body-rate spike usually trails the thrust by a few hundred # ms during a vertical climb). onset_ns = min(accel_event[0], attitude_event[0]) # Confidence is the weakest of the two signals, scaled by # how cleanly they agree. We keep it simple: min(). confidence = min(accel_event[1], attitude_event[1]) elif accel_event is not None: # Only the accel signal — discount confidence so the # combined offset eventually trips the WARN-and-proceed # threshold (combined_confidence < 0.80 → AC-6). onset_ns, raw_conf = accel_event confidence = raw_conf * 0.6 else: # Only attitude rate — same rationale as above. The # mypy-narrowing else covers attitude_event is not None. assert attitude_event is not None onset_ns, raw_conf = attitude_event confidence = raw_conf * 0.6 return _DetectorResult(onset_ns=onset_ns, confidence=confidence) def _compute_video_onset_from_samples( flow_samples: tuple[tuple[int, float], ...], config: AutoSyncConfig, ) -> _DetectorResult: """Pure detector: turn pre-computed optical-flow magnitudes into a result. Algorithm: find the first sustained window where the flow magnitude exceeds the configured threshold for at least ``sustained_seconds``. Confidence = sustained ratio. """ if not flow_samples: return _DetectorResult(onset_ns=0, confidence=0.0) sustained_ns = int(config.sustained_seconds * 1_000_000_000) event = _find_sustained_event( flow_samples, threshold=config.video_motion_threshold, sustained_ns=sustained_ns, ) if event is None: return _DetectorResult(onset_ns=flow_samples[0][0], confidence=0.0) onset_ns, confidence = event return _DetectorResult(onset_ns=onset_ns, confidence=confidence) def _find_sustained_event( samples: tuple[tuple[int, float], ...] | list[tuple[int, float]], *, threshold: float, sustained_ns: int, ) -> tuple[int, float] | None: """Sliding-window scan: return ``(start_ns, ratio)`` of the earliest window where the fraction of samples above ``threshold`` is maximised, provided that fraction is ≥ 0.5 (signal-vs-noise floor) and the window covers at least 80 % of ``sustained_ns`` (guards against truncated windows at the tail). Returns ``None`` when no qualifying window exists. """ seq = list(samples) n = len(seq) if n < 2: return None best_start_ns: int | None = None best_ratio = 0.0 min_window_ns = int(sustained_ns * 0.8) for i in range(n): start_ns = seq[i][0] end_ns = start_ns + sustained_ns # Walk j forward while still inside the window. j = i above = 0 while j < n and seq[j][0] <= end_ns: if seq[j][1] > threshold: above += 1 j += 1 window_size = j - i if window_size < 2: continue window_dur_ns = seq[j - 1][0] - start_ns if window_dur_ns < min_window_ns: continue ratio = above / window_size if ratio > best_ratio: best_ratio = ratio best_start_ns = start_ns if best_start_ns is None or best_ratio < 0.5: return None return (best_start_ns, best_ratio) def _wrap_pi(angle_rad: float) -> float: """Wrap an angle delta into ``(-π, π]`` to handle yaw wrap-around.""" twopi = 2.0 * math.pi a = angle_rad % twopi if a > math.pi: a -= twopi return a # --------------------------------------------------------------------- # Disk-reading wrappers (production paths) _REQUIRED_TLOG_TYPES: tuple[str, ...] = ( "RAW_IMU", "SCALED_IMU2", "ATTITUDE", ) def _load_tlog_samples( tlog_path: Path, max_messages: int, *, source_factory: Callable[[str], Any] | None, ) -> TlogSamples: """Stream the tlog head, capture IMU + ATTITUDE samples. Mirrors the AZ-399 source-factory test pattern: production builds use ``pymavlink`` lazily; tests pass an in-memory fake. """ source = _open_tlog(tlog_path, source_factory=source_factory) accel: list[tuple[int, float]] = [] attitude: list[tuple[int, float, float, float]] = [] counts: dict[str, int] = {} try: for _ in range(max_messages): try: msg = source.recv_match( type=list(_REQUIRED_TLOG_TYPES), blocking=False, ) except Exception as exc: # pragma: no cover — defensive. raise ReplayInputAdapterError( f"tlog scan failed on {tlog_path}: {exc!r}" ) from exc if msg is None: break msg_type = _safe_msg_type(msg) if not msg_type: continue counts[msg_type] = counts.get(msg_type, 0) + 1 ts_ns = _msg_timestamp_ns(msg) if msg_type in ("RAW_IMU", "SCALED_IMU2"): xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G total_g = math.sqrt(xa * xa + ya * ya + za * za) accel.append((ts_ns, total_g)) elif msg_type == "ATTITUDE": roll = float(getattr(msg, "roll", 0.0)) pitch = float(getattr(msg, "pitch", 0.0)) yaw = float(getattr(msg, "yaw", 0.0)) attitude.append((ts_ns, roll, pitch, yaw)) finally: if hasattr(source, "close"): try: source.close() except Exception: # pragma: no cover — defensive. pass return TlogSamples( accel=tuple(accel), attitude=tuple(attitude), imu_count_by_type=counts, ) def _open_tlog( tlog_path: Path, *, source_factory: Callable[[str], Any] | None, ) -> Any: if source_factory is not None: return source_factory(str(tlog_path)) try: from pymavlink import mavutil # type: ignore[import-not-found] except ImportError as exc: raise ReplayInputAdapterError( "pymavlink is required for replay auto-sync but is not " "importable in this binary" ) from exc return mavutil.mavlink_connection( str(tlog_path), dialect="ardupilotmega", mavlink_version="2.0", ) def _safe_msg_type(msg: Any) -> str: try: if hasattr(msg, "get_type"): return str(msg.get_type()) except Exception: return "" return type(msg).__name__ def _msg_timestamp_ns(msg: Any) -> int: raw = getattr(msg, "_timestamp", None) if raw is None: raise ReplayInputAdapterError( "tlog message missing _timestamp attribute; pymavlink " "mavlogfile should populate it on every recv_match() return" ) return int(float(raw) * 1_000_000_000) def _read_video_frames( video_path: Path, scan_seconds: float, ) -> Iterable[tuple[int, "np.ndarray"]]: """Decode the leading ``scan_seconds`` of the video. Yields ``(monotonic_ns, frame_bgr)`` tuples where ``monotonic_ns`` is the file's per-frame ``CAP_PROP_POS_MSEC × 1e6`` so the returned timestamps align with what :class:`VideoFileFrameSource` will report later. The Python ``time.monotonic_ns()`` is NOT used — the auto-sync result has to be deterministic across runs (AC-10) and tied to the video timeline. """ try: import cv2 as _cv2 # type: ignore[import-not-found] except ImportError as exc: raise ReplayInputAdapterError( "opencv-python is required for replay auto-sync but is " "not importable in this binary" ) from exc capture = _cv2.VideoCapture(str(video_path)) if not capture.isOpened(): capture.release() raise ReplayInputAdapterError( f"video file unreadable / unsupported codec: {video_path}" ) try: max_pos_ms = scan_seconds * 1000.0 while True: ok, frame = capture.read() if not ok or frame is None: break pos_ms = float(capture.get(_cv2.CAP_PROP_POS_MSEC)) if pos_ms > max_pos_ms: break ts_ns = int(pos_ms * 1_000_000) yield ts_ns, frame finally: capture.release() def _compute_flow_magnitudes( frames: list[tuple[int, "np.ndarray"]], ) -> tuple[tuple[int, float], ...]: """Pairwise mean optical-flow magnitude between consecutive frames. Uses Farneback dense flow (``cv2.calcOpticalFlowFarneback``) rather than pyramidal LK because Farneback returns a flow field over the whole image with no per-frame feature-tracking state, so the result is deterministic given the same input frames (AC-10). Returns ``((ts_ns_of_second_frame, mean_magnitude_px), ...)``. """ try: import cv2 as _cv2 # type: ignore[import-not-found] import numpy as _np # type: ignore[import-not-found] except ImportError as exc: # pragma: no cover — guarded at call sites. raise ReplayInputAdapterError( "opencv-python + numpy are required for replay auto-sync" ) from exc if len(frames) < 2: return () # Convert all frames to grayscale once up-front so the per-pair # cost is dominated by the optical-flow computation itself. gray_frames = [] for ts_ns, frame in frames: gray = _cv2.cvtColor(frame, _cv2.COLOR_BGR2GRAY) gray_frames.append((ts_ns, gray)) out: list[tuple[int, float]] = [] for i in range(1, len(gray_frames)): prev_ts, prev = gray_frames[i - 1] curr_ts, curr = gray_frames[i] flow = _cv2.calcOpticalFlowFarneback( prev, curr, None, pyr_scale=0.5, levels=3, winsize=15, iterations=3, poly_n=5, poly_sigma=1.2, flags=0, ) # ``flow`` shape: (H, W, 2) — dx + dy per pixel. magnitudes = _np.sqrt(flow[..., 0] ** 2 + flow[..., 1] ** 2) mean_mag = float(magnitudes.mean()) out.append((curr_ts, mean_mag)) return tuple(out) # Re-export the BUILD-flag check for symmetry with other replay modules. def _build_flag_on(name: str) -> bool: raw = os.environ.get(name, "") return raw.strip().lower() in {"on", "1", "true", "yes"} # --------------------------------------------------------------------- # AZ-698 — mid-flight cross-correlation aligner # # The AZ-405 head-takeoff detector only works when the video covers # the take-off moment. For mid-flight slices (e.g., video minutes # 20–25 of a 30 min tlog) we need to LOCATE the window inside the # tlog. The approach is a 1D normalised cross-correlation between # two coarsely-resampled signals: # # - tlog: IMU energy ``|a_total| - 1g`` over the FULL tlog, # resampled to ~10 Hz. # - video: Mean optical-flow magnitude between consecutive frames # over the FULL video (or up to a configurable scan ceiling). # # Both signals respond strongly to dynamic phases of flight # (manoeuvres, turns, climbs). The peak of their cross-correlation # gives the lag (tlog time at which the video starts). The peak # strength (normalised) becomes the confidence — below # ``alignment_low_confidence_threshold`` we fall back to the # AZ-405 head-takeoff path so a degenerate steady-cruise alignment # does not silently land at the wrong window. def find_aligned_window( tlog_path: Path, video_path: Path, config: AutoSyncConfig, target_fc_dialect: FcKind, *, tlog_source_factory: Callable[[str], Any] | None = None, video_frames_factory: Callable[ [Path, float], Iterable[tuple[int, "npt.NDArray[np.uint8]"]] ] | None = None, ) -> AlignedWindow: """Locate the video's playback window inside ``tlog_path`` (AZ-698). Args: tlog_path: Binary ArduPilot tlog. The whole file is read up to :attr:`AutoSyncConfig.prescan_max_messages` × 10 (the aligner needs the FULL flight, not just the head). video_path: Mp4 / mkv input. The leading :attr:`AutoSyncConfig.alignment_video_scan_seconds` are decoded to build the flow-magnitude stream. config: Operator-tunable thresholds. target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV`` — same parity contract as :func:`detect_tlog_takeoff`. tlog_source_factory: Test injection — replaces the ``pymavlink`` open call. video_frames_factory: Test injection — replaces ``cv2.VideoCapture`` frame iteration. Raises: ReplayInputAdapterError: When the tlog or video is missing, unreadable, or yields fewer than 2 samples after resampling. Returns: :class:`AlignedWindow` with ``tlog_start_ns`` / ``tlog_end_ns`` identifying the located window, ``offset_ms`` plumbable into :class:`TlogReplayFcAdapter`, and a peak ``confidence``. When confidence falls below :attr:`AutoSyncConfig.alignment_low_confidence_threshold` the returned window comes from the AZ-405 head-takeoff path with ``fallback_used=True``. """ if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV): raise ReplayInputAdapterError( f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}" ) if not tlog_path.is_file(): raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}") if not video_path.is_file(): raise ReplayInputAdapterError(f"video file not found: {video_path}") tlog_energy = _load_tlog_imu_energy_stream( tlog_path, max_messages=config.prescan_max_messages * 10, source_factory=tlog_source_factory, ) if len(tlog_energy) < 2: raise ReplayInputAdapterError( f"tlog yielded {len(tlog_energy)} IMU sample(s); " "need ≥ 2 for cross-correlation alignment" ) if video_frames_factory is None: frames = list( _read_video_frames(video_path, config.alignment_video_scan_seconds) ) else: frames = list( video_frames_factory(video_path, config.alignment_video_scan_seconds) ) if len(frames) < 2: raise ReplayInputAdapterError( f"video yielded {len(frames)} frame(s); " "need ≥ 2 for cross-correlation alignment" ) flow_samples = _compute_flow_magnitudes(frames) if len(flow_samples) < 2: raise ReplayInputAdapterError( f"video produced {len(flow_samples)} flow sample(s); " "need ≥ 2 for cross-correlation alignment" ) return _align_via_cross_correlation( tlog_energy=tlog_energy, flow_samples=flow_samples, config=config, target_fc_dialect=target_fc_dialect, tlog_path=tlog_path, tlog_source_factory=tlog_source_factory, ) def _align_via_cross_correlation( *, tlog_energy: tuple[tuple[int, float], ...], flow_samples: tuple[tuple[int, float], ...], config: AutoSyncConfig, target_fc_dialect: FcKind, tlog_path: Path, tlog_source_factory: Callable[[str], Any] | None, ) -> AlignedWindow: """Pure compute kernel: turn pre-loaded streams into an :class:`AlignedWindow`. Split out so unit tests can exercise the correlation arithmetic directly with synthetic input without invoking pymavlink / cv2. """ import numpy as _np resample_hz = max(config.alignment_resample_hz, 1.0) period_ns = int(1_000_000_000 / resample_hz) tlog_origin_ns = tlog_energy[0][0] tlog_resampled = _resample_uniform(tlog_energy, period_ns, tlog_origin_ns) if len(tlog_resampled) < 2: raise ReplayInputAdapterError( "tlog resampled stream has < 2 samples; cannot cross-correlate" ) video_origin_ns = flow_samples[0][0] flow_resampled = _resample_uniform(flow_samples, period_ns, video_origin_ns) if len(flow_resampled) < 2: raise ReplayInputAdapterError( "video flow stream has < 2 samples; cannot cross-correlate" ) if len(flow_resampled) > len(tlog_resampled): raise ReplayInputAdapterError( "video flow stream is longer than the tlog energy stream; " "auto-trim requires the video to be a slice of a longer tlog" ) tlog_arr = _np.asarray(tlog_resampled, dtype=_np.float64) flow_arr = _np.asarray(flow_resampled, dtype=_np.float64) flow_centred = _zero_mean_normalise(flow_arr) if _np.linalg.norm(flow_centred) == 0.0: # Flat video → no information for correlation. Force the # fallback path; confidence reported as 0. peak_idx = 0 confidence = 0.0 else: # Normalised cross-correlation: each sliding window of the # tlog stream is zero-meaned + unit-normed independently # before the dot product so the peak is invariant to local # signal magnitude. Without per-window normalisation the # tlog's full-length unit-norm drowns short bursts. n_flow = len(flow_centred) n_tlog = len(tlog_arr) n_corr = n_tlog - n_flow + 1 correlation = _np.zeros(n_corr, dtype=_np.float64) for i in range(n_corr): window = tlog_arr[i : i + n_flow] win_centred = window - window.mean() win_norm = float(_np.linalg.norm(win_centred)) if win_norm > 0.0: correlation[i] = float(_np.dot(win_centred / win_norm, flow_centred)) peak_idx = int(_np.argmax(correlation)) confidence = max(0.0, min(1.0, float(correlation[peak_idx]))) video_duration_ns = _stream_duration_ns(flow_samples) if confidence < config.alignment_low_confidence_threshold: return _fallback_to_head_takeoff( tlog_path=tlog_path, tlog_source_factory=tlog_source_factory, target_fc_dialect=target_fc_dialect, config=config, tlog_energy=tlog_energy, video_origin_ns=video_origin_ns, video_flow_duration_ns=video_duration_ns, confidence=confidence, ) # Absolute tlog timeline value where video t=0 aligns. The # adapter's seek check compares this against the raw pymavlink # ``msg._timestamp`` so the value MUST be on the tlog timeline, # NOT a delta. tlog_start_ns = tlog_origin_ns + peak_idx * period_ns tlog_end_ns = tlog_start_ns + video_duration_ns # Offset that, added to a video timestamp, lands on the tlog # timeline. Matches ``AutoSyncDecision.offset_ms`` semantics # (``validate_offset_or_fail`` does ``vts + offset_ns``). offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000 return AlignedWindow( tlog_start_ns=tlog_start_ns, tlog_end_ns=tlog_end_ns, offset_ms=offset_ms, confidence=confidence, fallback_used=False, ) def _stream_duration_ns( samples: tuple[tuple[int, float], ...], ) -> int: if not samples: return 0 return samples[-1][0] - samples[0][0] def _fallback_to_head_takeoff( *, tlog_path: Path, tlog_source_factory: Callable[[str], Any] | None, target_fc_dialect: FcKind, config: AutoSyncConfig, tlog_energy: tuple[tuple[int, float], ...], video_origin_ns: int, video_flow_duration_ns: int, confidence: float, ) -> AlignedWindow: """Low-confidence path: use AZ-405 head-takeoff detector. Returns an :class:`AlignedWindow` whose ``offset_ms`` and ``tlog_start_ns`` come from the takeoff onset; ``fallback_used`` is ``True`` so callers + FDR audit can record the divergence. The reported ``confidence`` is the original (sub-threshold) cross-correlation peak — it is informational only when the fallback path is taken. """ takeoff = detect_tlog_takeoff( tlog_path, target_fc_dialect, config, source_factory=tlog_source_factory, ) if takeoff.confidence > 0.0: tlog_start_ns = takeoff.onset_ns elif tlog_energy: tlog_start_ns = tlog_energy[0][0] else: tlog_start_ns = 0 tlog_end_ns = tlog_start_ns + video_flow_duration_ns offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000 return AlignedWindow( tlog_start_ns=tlog_start_ns, tlog_end_ns=tlog_end_ns, offset_ms=offset_ms, confidence=confidence, fallback_used=True, ) def _resample_uniform( samples: tuple[tuple[int, float], ...], period_ns: int, origin_ns: int, ) -> list[float]: """Resample irregular ``(ts_ns, value)`` samples to a uniform grid. Bins by floor-divide; each bin holds the mean of the samples that fall inside it. Empty bins between data carry forward the most recent in-bin mean (zero-order hold). Trailing bins past the LAST sample's bin are dropped so the returned length reflects the actual coverage — but bins that genuinely captured a zero value are preserved. """ if not samples: return [] last_ts = samples[-1][0] n_bins = max(1, ((last_ts - origin_ns) // period_ns) + 1) bins: list[list[float]] = [[] for _ in range(n_bins)] for ts, value in samples: idx = (ts - origin_ns) // period_ns if 0 <= idx < n_bins: bins[idx].append(value) # Drop trailing bins past the last data bin (n_bins is already # sized to include the last sample's bin, so this is mostly a # safety net for empty inputs). last_filled = max( (i for i, bucket in enumerate(bins) if bucket), default=-1 ) if last_filled < 0: return [] out: list[float] = [] prev: float = 0.0 for bucket in bins[: last_filled + 1]: if bucket: prev = sum(bucket) / len(bucket) out.append(prev) return out def _zero_mean_normalise( arr: "npt.NDArray[np.float64]", ) -> "npt.NDArray[np.float64]": import numpy as _np centred: "npt.NDArray[np.float64]" = arr - arr.mean() norm = float(_np.linalg.norm(centred)) if norm == 0.0: return centred result: "npt.NDArray[np.float64]" = centred / norm return result def _load_tlog_imu_energy_stream( tlog_path: Path, *, max_messages: int, source_factory: Callable[[str], Any] | None, ) -> tuple[tuple[int, float], ...]: """Walk the WHOLE tlog (up to ``max_messages``) for IMU energy samples. Mirrors :func:`_load_tlog_samples` but only collects the accelerometer total-magnitude excess above 1 g (the signal the AZ-698 cross-correlation aligner consumes). The ATTITUDE channel is not needed here. """ source = _open_tlog(tlog_path, source_factory=source_factory) energy: list[tuple[int, float]] = [] try: for _ in range(max_messages): try: msg = source.recv_match( type=["RAW_IMU", "SCALED_IMU2"], blocking=False, ) except Exception as exc: # pragma: no cover — defensive. raise ReplayInputAdapterError( f"tlog scan failed on {tlog_path}: {exc!r}" ) from exc if msg is None: break ts_ns = _msg_timestamp_ns(msg) xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G total_g = math.sqrt(xa * xa + ya * ya + za * za) energy.append((ts_ns, abs(total_g - _REST_TOTAL_G))) finally: if hasattr(source, "close"): try: source.close() except Exception: # pragma: no cover — defensive. pass return tuple(energy)