mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 18:41:13 +00:00
87fe98858f
Adds find_aligned_window cross-correlation (NCC, per-window unit norm)
between IMU energy and video optical-flow magnitude. Returns
AlignedWindow{tlog_start_ns, tlog_end_ns, offset_ms, confidence,
used_fallback}, with fallback to head-takeoff on low confidence to
preserve AZ-405 behavior. TlogReplayFcAdapter honors tlog_start_ns and
skips pre-window messages. New --auto-trim CLI flag, mutex with
--time-offset-ms. AC-1..AC-4 covered by unit tests; AC-5 skipped (no
real flight_derkachi.mp4 in repo). 106 tests pass in regression slice.
Zero new mypy --strict errors.
Co-authored-by: Cursor <cursoragent@cursor.com>
1013 lines
37 KiB
Python
1013 lines
37 KiB
Python
"""Auto-sync detectors + offset compute + AC-9 validator (AZ-405).
|
||
|
||
Three concerns:
|
||
|
||
1. **Tlog take-off detector** — walks the head of the tlog, looks for
|
||
a sustained vertical-acceleration excess + sustained attitude-rate
|
||
excess, returns ``(takeoff_ns, confidence)``.
|
||
2. **Video motion-onset detector** — runs OpenCV pyramidal optical
|
||
flow over the leading seconds of the video, returns
|
||
``(motion_onset_ns, confidence)``.
|
||
3. **AC-9 frame-window match validator** — given a candidate offset
|
||
and the tlog/video timestamp series, returns 0 if ≥ 95 % of
|
||
video frames have an IMU sample within ± 100 ms after the offset
|
||
is applied; 2 otherwise.
|
||
|
||
The detector functions are split into a thin path-reading wrapper
|
||
(``detect_tlog_takeoff`` / ``detect_video_motion_onset``) and a pure
|
||
sample-driven core (``_compute_tlog_takeoff_from_samples`` /
|
||
``_compute_video_onset_from_samples``). Tests exercise the pure cores
|
||
directly with synthetic fixtures; production calls the wrappers,
|
||
which read the tlog via ``pymavlink`` and the video via ``cv2``.
|
||
|
||
Both wrappers accept an optional ``source_factory`` (tlog) /
|
||
``frames_factory`` (video) injection point so unit tests can swap in
|
||
fakes without touching the filesystem (mirrors AZ-399's pattern).
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import bisect
|
||
import math
|
||
import os
|
||
from collections.abc import Callable, Iterable
|
||
from dataclasses import dataclass
|
||
from pathlib import Path
|
||
from typing import TYPE_CHECKING, Any
|
||
|
||
from gps_denied_onboard._types.fc import FcKind
|
||
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
|
||
from gps_denied_onboard.replay_input.interface import (
|
||
AlignedWindow,
|
||
AutoSyncConfig,
|
||
AutoSyncDecision,
|
||
)
|
||
|
||
if TYPE_CHECKING:
|
||
import numpy as np
|
||
import numpy.typing as npt
|
||
|
||
__all__ = [
|
||
"TlogSamples",
|
||
"compute_offset",
|
||
"detect_tlog_takeoff",
|
||
"detect_video_motion_onset",
|
||
"find_aligned_window",
|
||
"validate_offset_or_fail",
|
||
]
|
||
|
||
|
||
# Conversion: MAVLink RAW_IMU / SCALED_IMU2 publish accelerometer
|
||
# components in mG (milli-G); 1 g ≡ 9.80665 m/s² by ISO 80000-3.
|
||
_MG_PER_G: float = 1000.0
|
||
# Per the AZ-405 spec, the vertical-accel signal of interest is the
|
||
# magnitude excess above gravity (i.e., body acceleration regardless
|
||
# of frame orientation). At rest |a| ≈ 1 g; during upward thrust |a|
|
||
# > 1 g; during free-fall |a| ≈ 0 g. The take-off pattern is a
|
||
# sustained excess with positive sign (upward thrust), so we use
|
||
# ``|total_g - 1.0|`` as the criterion.
|
||
_REST_TOTAL_G: float = 1.0
|
||
|
||
|
||
# ---------------------------------------------------------------------
|
||
# DTOs (internal — public API surfaces results via AutoSyncDecision)
|
||
|
||
|
||
@dataclass(frozen=True, slots=True)
|
||
class _DetectorResult:
|
||
"""Outcome of a single detector pass.
|
||
|
||
``onset_ns`` is the best-guess event start (ns); ``confidence``
|
||
is in [0, 1] and reflects how sustained the signal was relative
|
||
to the configured threshold + sustained-time requirement.
|
||
"""
|
||
|
||
onset_ns: int
|
||
confidence: float
|
||
|
||
|
||
@dataclass(frozen=True, slots=True)
|
||
class TlogSamples:
|
||
"""Pre-loaded tlog samples extracted by the take-off detector.
|
||
|
||
Used as the input shape for :func:`_compute_tlog_takeoff_from_samples`
|
||
so unit tests can build a deterministic fixture without parsing a
|
||
real ``.tlog`` file.
|
||
|
||
Attributes:
|
||
accel: Sequence of ``(ts_ns, total_accel_g)`` pairs sourced
|
||
from ``RAW_IMU`` / ``SCALED_IMU2`` messages.
|
||
attitude: Sequence of ``(ts_ns, roll_rad, pitch_rad, yaw_rad)``
|
||
tuples sourced from ``ATTITUDE`` messages.
|
||
imu_count_by_type: Map of message-type-name → count, used for
|
||
the ``"tlog missing required message types: [...]"``
|
||
error path (R-DEMO-3).
|
||
"""
|
||
|
||
accel: tuple[tuple[int, float], ...]
|
||
attitude: tuple[tuple[int, float, float, float], ...]
|
||
imu_count_by_type: dict[str, int]
|
||
|
||
|
||
# ---------------------------------------------------------------------
|
||
# Public entrypoints
|
||
|
||
|
||
def detect_tlog_takeoff(
|
||
tlog_path: Path,
|
||
target_fc_dialect: FcKind,
|
||
config: AutoSyncConfig,
|
||
*,
|
||
source_factory: Callable[[str], Any] | None = None,
|
||
) -> _DetectorResult:
|
||
"""Walk the tlog head, detect the take-off pattern, return result.
|
||
|
||
Args:
|
||
tlog_path: Path to the tlog file. Existence is checked at
|
||
entry.
|
||
target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV``. Both speak
|
||
``ardupilotmega`` MAVLink on the GCS telemetry channel
|
||
(the iNav-side native MSP traffic is irrelevant here);
|
||
this parameter is accepted for parity with the rest of
|
||
the replay surface and is also used in the missing-
|
||
messages error to name the dialect explicitly.
|
||
config: Operator-tunable thresholds (see
|
||
:class:`AutoSyncConfig`).
|
||
source_factory: Test-only injection — when provided, replaces
|
||
the pymavlink open call with the factory's return value.
|
||
The factory must yield an object with ``recv_match`` /
|
||
``close`` semantics matching pymavlink's
|
||
``mavutil.mavlink_connection``.
|
||
|
||
Raises:
|
||
ReplayInputAdapterError: When the tlog is missing
|
||
``RAW_IMU`` / ``SCALED_IMU2`` (no IMU samples) or
|
||
``ATTITUDE`` (no attitude samples). This is the R-DEMO-3
|
||
fail-fast path — it surfaces BEFORE any video read in the
|
||
coordinator's ``open()`` flow.
|
||
"""
|
||
if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
|
||
raise ReplayInputAdapterError(
|
||
f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
|
||
)
|
||
if not tlog_path.is_file():
|
||
raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
|
||
samples = _load_tlog_samples(
|
||
tlog_path,
|
||
config.prescan_max_messages,
|
||
source_factory=source_factory,
|
||
)
|
||
return _compute_tlog_takeoff_from_samples(samples, config)
|
||
|
||
|
||
def detect_video_motion_onset(
|
||
video_path: Path,
|
||
config: AutoSyncConfig,
|
||
*,
|
||
frames_factory: Callable[[Path, float], Iterable[tuple[int, "np.ndarray"]]]
|
||
| None = None,
|
||
) -> _DetectorResult:
|
||
"""Scan the leading video segment, detect motion onset, return result.
|
||
|
||
Args:
|
||
video_path: Path to an MP4 / MKV / AVI file.
|
||
config: Operator-tunable thresholds (see
|
||
:class:`AutoSyncConfig`).
|
||
frames_factory: Test-only injection — when provided, returns
|
||
a synthetic iterable of ``(monotonic_ns, frame_bgr)``
|
||
tuples. Must yield at least 2 frames for the pairwise
|
||
optical-flow magnitudes to compute.
|
||
|
||
Raises:
|
||
ReplayInputAdapterError: When the video file is missing or
|
||
unreadable, or fewer than 2 frames are decoded.
|
||
"""
|
||
if not video_path.is_file():
|
||
raise ReplayInputAdapterError(f"video file not found: {video_path}")
|
||
if frames_factory is None:
|
||
frames = list(_read_video_frames(video_path, config.video_motion_scan_seconds))
|
||
else:
|
||
frames = list(frames_factory(video_path, config.video_motion_scan_seconds))
|
||
if len(frames) < 2:
|
||
raise ReplayInputAdapterError(
|
||
f"video file unreadable or too short: {video_path} "
|
||
f"(decoded {len(frames)} frame(s); need ≥ 2)"
|
||
)
|
||
flow_samples = _compute_flow_magnitudes(frames)
|
||
return _compute_video_onset_from_samples(flow_samples, config)
|
||
|
||
|
||
def compute_offset(
|
||
tlog_result: _DetectorResult,
|
||
video_result: _DetectorResult,
|
||
) -> AutoSyncDecision:
|
||
"""Combine tlog + video detector outputs into an :class:`AutoSyncDecision`.
|
||
|
||
Offset semantics (positive = video starts before take-off recorded
|
||
in tlog): ``offset_ns = tlog_takeoff_ns - video_motion_onset_ns``.
|
||
Combined confidence = ``min(tlog_confidence, video_confidence)`` —
|
||
the weakest signal dominates so downstream WARN-and-proceed (AC-6)
|
||
fires whenever either side is unreliable.
|
||
"""
|
||
offset_ns = tlog_result.onset_ns - video_result.onset_ns
|
||
combined = min(tlog_result.confidence, video_result.confidence)
|
||
return AutoSyncDecision(
|
||
offset_ms=offset_ns // 1_000_000,
|
||
tlog_takeoff_ns=tlog_result.onset_ns,
|
||
video_motion_onset_ns=video_result.onset_ns,
|
||
tlog_confidence=tlog_result.confidence,
|
||
video_confidence=video_result.confidence,
|
||
combined_confidence=combined,
|
||
)
|
||
|
||
|
||
def validate_offset_or_fail(
|
||
offset_ms: int,
|
||
tlog_imu_timestamps_ns: Iterable[int],
|
||
video_frame_timestamps_ns: Iterable[int],
|
||
threshold_pct: float,
|
||
*,
|
||
window_ms: int = 100,
|
||
) -> int:
|
||
"""AC-9 frame-window match validator.
|
||
|
||
Returns ``0`` when ≥ ``threshold_pct`` % of video frames have an
|
||
IMU sample within ± ``window_ms`` after the offset is applied;
|
||
returns ``2`` otherwise (CLI exit code for AC-8 hard-fail).
|
||
|
||
The check is symmetric in offset sign — the offset is added to
|
||
each video timestamp and the nearest tlog IMU timestamp is then
|
||
looked up by binary search.
|
||
"""
|
||
video_list = list(video_frame_timestamps_ns)
|
||
if not video_list:
|
||
# Degenerate input — no frames to match. The replay binary
|
||
# rejects empty videos earlier, so reaching this branch
|
||
# would be a bug; return 2 so the operator sees the hard-fail
|
||
# rather than a false PASS.
|
||
return 2
|
||
tlog_sorted = sorted(tlog_imu_timestamps_ns)
|
||
if not tlog_sorted:
|
||
return 2
|
||
offset_ns = int(offset_ms) * 1_000_000
|
||
window_ns = int(window_ms) * 1_000_000
|
||
matched = 0
|
||
for vts in video_list:
|
||
target_ns = vts + offset_ns
|
||
idx = bisect.bisect_left(tlog_sorted, target_ns)
|
||
# The nearest IMU sample is whichever of the immediate
|
||
# neighbours of `target_ns` is closer. Either may be out of
|
||
# range at the ends of the array.
|
||
nearest: int | None = None
|
||
for j in (idx - 1, idx):
|
||
if 0 <= j < len(tlog_sorted):
|
||
cand = tlog_sorted[j]
|
||
if nearest is None or abs(cand - target_ns) < abs(nearest - target_ns):
|
||
nearest = cand
|
||
if nearest is not None and abs(nearest - target_ns) <= window_ns:
|
||
matched += 1
|
||
match_pct = (matched / len(video_list)) * 100.0
|
||
return 0 if match_pct >= threshold_pct else 2
|
||
|
||
|
||
# ---------------------------------------------------------------------
|
||
# Pure compute kernels (testable without disk IO)
|
||
|
||
|
||
def _compute_tlog_takeoff_from_samples(
|
||
samples: TlogSamples,
|
||
config: AutoSyncConfig,
|
||
) -> _DetectorResult:
|
||
"""Pure detector: turn pre-loaded tlog samples into a result.
|
||
|
||
Algorithm: find the first sustained-window where (a) accel
|
||
magnitude excess above 1 g exceeds the threshold for at least
|
||
``sustained_seconds``, and (b) attitude-rate magnitude exceeds
|
||
its threshold sustained over the same duration. Combined
|
||
confidence = ``min(accel_ratio, attitude_ratio)`` — both
|
||
signals must agree for a high-confidence take-off.
|
||
|
||
Raises:
|
||
ReplayInputAdapterError: When the tlog had no IMU samples or
|
||
no ATTITUDE samples (R-DEMO-3 fail-fast).
|
||
"""
|
||
if not samples.accel:
|
||
missing = ["RAW_IMU", "SCALED_IMU2"]
|
||
raise ReplayInputAdapterError(
|
||
f"tlog missing required message types: {missing}"
|
||
)
|
||
if not samples.attitude:
|
||
raise ReplayInputAdapterError(
|
||
"tlog missing required message types: ['ATTITUDE']"
|
||
)
|
||
|
||
sustained_ns = int(config.sustained_seconds * 1_000_000_000)
|
||
|
||
# Pair-wise attitude rates (rad/s magnitude vector) — emitted at
|
||
# the timestamp of the LATER sample so the rate aligns with when
|
||
# it is observable downstream.
|
||
attitude_rates: list[tuple[int, float]] = []
|
||
for i in range(1, len(samples.attitude)):
|
||
ts_prev, roll_prev, pitch_prev, yaw_prev = samples.attitude[i - 1]
|
||
ts_curr, roll_curr, pitch_curr, yaw_curr = samples.attitude[i]
|
||
dt_s = (ts_curr - ts_prev) / 1_000_000_000.0
|
||
if dt_s <= 0.0:
|
||
continue
|
||
dr = roll_curr - roll_prev
|
||
dp = pitch_curr - pitch_prev
|
||
dy = _wrap_pi(yaw_curr - yaw_prev)
|
||
rate_mag = math.sqrt((dr / dt_s) ** 2 + (dp / dt_s) ** 2 + (dy / dt_s) ** 2)
|
||
attitude_rates.append((ts_curr, rate_mag))
|
||
|
||
accel_excess = tuple(
|
||
(ts, abs(total_g - _REST_TOTAL_G)) for ts, total_g in samples.accel
|
||
)
|
||
|
||
accel_event = _find_sustained_event(
|
||
accel_excess,
|
||
threshold=config.takeoff_accel_threshold_g,
|
||
sustained_ns=sustained_ns,
|
||
)
|
||
attitude_event = _find_sustained_event(
|
||
tuple(attitude_rates),
|
||
threshold=config.takeoff_attitude_rate_threshold_rad_s,
|
||
sustained_ns=sustained_ns,
|
||
)
|
||
|
||
if accel_event is None and attitude_event is None:
|
||
# Neither signal crossed; best we can do is flag "no clear
|
||
# take-off" so the coordinator can WARN and continue with the
|
||
# tlog start as a fallback origin.
|
||
first_ns = samples.accel[0][0]
|
||
return _DetectorResult(onset_ns=first_ns, confidence=0.0)
|
||
|
||
if accel_event is not None and attitude_event is not None:
|
||
# Both signals fired — they should both point at the same
|
||
# event. We adopt the EARLIER of the two onsets so the offset
|
||
# is referenced against the moment thrust began (the attitude
|
||
# body-rate spike usually trails the thrust by a few hundred
|
||
# ms during a vertical climb).
|
||
onset_ns = min(accel_event[0], attitude_event[0])
|
||
# Confidence is the weakest of the two signals, scaled by
|
||
# how cleanly they agree. We keep it simple: min().
|
||
confidence = min(accel_event[1], attitude_event[1])
|
||
elif accel_event is not None:
|
||
# Only the accel signal — discount confidence so the
|
||
# combined offset eventually trips the WARN-and-proceed
|
||
# threshold (combined_confidence < 0.80 → AC-6).
|
||
onset_ns, raw_conf = accel_event
|
||
confidence = raw_conf * 0.6
|
||
else:
|
||
# Only attitude rate — same rationale as above. The
|
||
# mypy-narrowing else covers attitude_event is not None.
|
||
assert attitude_event is not None
|
||
onset_ns, raw_conf = attitude_event
|
||
confidence = raw_conf * 0.6
|
||
|
||
return _DetectorResult(onset_ns=onset_ns, confidence=confidence)
|
||
|
||
|
||
def _compute_video_onset_from_samples(
|
||
flow_samples: tuple[tuple[int, float], ...],
|
||
config: AutoSyncConfig,
|
||
) -> _DetectorResult:
|
||
"""Pure detector: turn pre-computed optical-flow magnitudes into a result.
|
||
|
||
Algorithm: find the first sustained window where the flow
|
||
magnitude exceeds the configured threshold for at least
|
||
``sustained_seconds``. Confidence = sustained ratio.
|
||
"""
|
||
if not flow_samples:
|
||
return _DetectorResult(onset_ns=0, confidence=0.0)
|
||
sustained_ns = int(config.sustained_seconds * 1_000_000_000)
|
||
event = _find_sustained_event(
|
||
flow_samples,
|
||
threshold=config.video_motion_threshold,
|
||
sustained_ns=sustained_ns,
|
||
)
|
||
if event is None:
|
||
return _DetectorResult(onset_ns=flow_samples[0][0], confidence=0.0)
|
||
onset_ns, confidence = event
|
||
return _DetectorResult(onset_ns=onset_ns, confidence=confidence)
|
||
|
||
|
||
def _find_sustained_event(
|
||
samples: tuple[tuple[int, float], ...] | list[tuple[int, float]],
|
||
*,
|
||
threshold: float,
|
||
sustained_ns: int,
|
||
) -> tuple[int, float] | None:
|
||
"""Sliding-window scan: return ``(start_ns, ratio)`` of the
|
||
earliest window where the fraction of samples above
|
||
``threshold`` is maximised, provided that fraction is ≥ 0.5
|
||
(signal-vs-noise floor) and the window covers at least 80 % of
|
||
``sustained_ns`` (guards against truncated windows at the tail).
|
||
|
||
Returns ``None`` when no qualifying window exists.
|
||
"""
|
||
seq = list(samples)
|
||
n = len(seq)
|
||
if n < 2:
|
||
return None
|
||
best_start_ns: int | None = None
|
||
best_ratio = 0.0
|
||
min_window_ns = int(sustained_ns * 0.8)
|
||
for i in range(n):
|
||
start_ns = seq[i][0]
|
||
end_ns = start_ns + sustained_ns
|
||
# Walk j forward while still inside the window.
|
||
j = i
|
||
above = 0
|
||
while j < n and seq[j][0] <= end_ns:
|
||
if seq[j][1] > threshold:
|
||
above += 1
|
||
j += 1
|
||
window_size = j - i
|
||
if window_size < 2:
|
||
continue
|
||
window_dur_ns = seq[j - 1][0] - start_ns
|
||
if window_dur_ns < min_window_ns:
|
||
continue
|
||
ratio = above / window_size
|
||
if ratio > best_ratio:
|
||
best_ratio = ratio
|
||
best_start_ns = start_ns
|
||
if best_start_ns is None or best_ratio < 0.5:
|
||
return None
|
||
return (best_start_ns, best_ratio)
|
||
|
||
|
||
def _wrap_pi(angle_rad: float) -> float:
|
||
"""Wrap an angle delta into ``(-π, π]`` to handle yaw wrap-around."""
|
||
twopi = 2.0 * math.pi
|
||
a = angle_rad % twopi
|
||
if a > math.pi:
|
||
a -= twopi
|
||
return a
|
||
|
||
|
||
# ---------------------------------------------------------------------
|
||
# Disk-reading wrappers (production paths)
|
||
|
||
|
||
_REQUIRED_TLOG_TYPES: tuple[str, ...] = (
|
||
"RAW_IMU",
|
||
"SCALED_IMU2",
|
||
"ATTITUDE",
|
||
)
|
||
|
||
|
||
def _load_tlog_samples(
|
||
tlog_path: Path,
|
||
max_messages: int,
|
||
*,
|
||
source_factory: Callable[[str], Any] | None,
|
||
) -> TlogSamples:
|
||
"""Stream the tlog head, capture IMU + ATTITUDE samples.
|
||
|
||
Mirrors the AZ-399 source-factory test pattern: production builds
|
||
use ``pymavlink`` lazily; tests pass an in-memory fake.
|
||
"""
|
||
source = _open_tlog(tlog_path, source_factory=source_factory)
|
||
accel: list[tuple[int, float]] = []
|
||
attitude: list[tuple[int, float, float, float]] = []
|
||
counts: dict[str, int] = {}
|
||
try:
|
||
for _ in range(max_messages):
|
||
try:
|
||
msg = source.recv_match(
|
||
type=list(_REQUIRED_TLOG_TYPES),
|
||
blocking=False,
|
||
)
|
||
except Exception as exc: # pragma: no cover — defensive.
|
||
raise ReplayInputAdapterError(
|
||
f"tlog scan failed on {tlog_path}: {exc!r}"
|
||
) from exc
|
||
if msg is None:
|
||
break
|
||
msg_type = _safe_msg_type(msg)
|
||
if not msg_type:
|
||
continue
|
||
counts[msg_type] = counts.get(msg_type, 0) + 1
|
||
ts_ns = _msg_timestamp_ns(msg)
|
||
if msg_type in ("RAW_IMU", "SCALED_IMU2"):
|
||
xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
|
||
ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
|
||
za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
|
||
total_g = math.sqrt(xa * xa + ya * ya + za * za)
|
||
accel.append((ts_ns, total_g))
|
||
elif msg_type == "ATTITUDE":
|
||
roll = float(getattr(msg, "roll", 0.0))
|
||
pitch = float(getattr(msg, "pitch", 0.0))
|
||
yaw = float(getattr(msg, "yaw", 0.0))
|
||
attitude.append((ts_ns, roll, pitch, yaw))
|
||
finally:
|
||
if hasattr(source, "close"):
|
||
try:
|
||
source.close()
|
||
except Exception: # pragma: no cover — defensive.
|
||
pass
|
||
return TlogSamples(
|
||
accel=tuple(accel),
|
||
attitude=tuple(attitude),
|
||
imu_count_by_type=counts,
|
||
)
|
||
|
||
|
||
def _open_tlog(
|
||
tlog_path: Path,
|
||
*,
|
||
source_factory: Callable[[str], Any] | None,
|
||
) -> Any:
|
||
if source_factory is not None:
|
||
return source_factory(str(tlog_path))
|
||
try:
|
||
from pymavlink import mavutil # type: ignore[import-not-found]
|
||
except ImportError as exc:
|
||
raise ReplayInputAdapterError(
|
||
"pymavlink is required for replay auto-sync but is not "
|
||
"importable in this binary"
|
||
) from exc
|
||
return mavutil.mavlink_connection(
|
||
str(tlog_path),
|
||
dialect="ardupilotmega",
|
||
mavlink_version="2.0",
|
||
)
|
||
|
||
|
||
def _safe_msg_type(msg: Any) -> str:
|
||
try:
|
||
if hasattr(msg, "get_type"):
|
||
return str(msg.get_type())
|
||
except Exception:
|
||
return ""
|
||
return type(msg).__name__
|
||
|
||
|
||
def _msg_timestamp_ns(msg: Any) -> int:
|
||
raw = getattr(msg, "_timestamp", None)
|
||
if raw is None:
|
||
raise ReplayInputAdapterError(
|
||
"tlog message missing _timestamp attribute; pymavlink "
|
||
"mavlogfile should populate it on every recv_match() return"
|
||
)
|
||
return int(float(raw) * 1_000_000_000)
|
||
|
||
|
||
def _read_video_frames(
|
||
video_path: Path,
|
||
scan_seconds: float,
|
||
) -> Iterable[tuple[int, "np.ndarray"]]:
|
||
"""Decode the leading ``scan_seconds`` of the video.
|
||
|
||
Yields ``(monotonic_ns, frame_bgr)`` tuples where ``monotonic_ns``
|
||
is the file's per-frame ``CAP_PROP_POS_MSEC × 1e6`` so the
|
||
returned timestamps align with what
|
||
:class:`VideoFileFrameSource` will report later. The Python
|
||
``time.monotonic_ns()`` is NOT used — the auto-sync result has to
|
||
be deterministic across runs (AC-10) and tied to the video
|
||
timeline.
|
||
"""
|
||
try:
|
||
import cv2 as _cv2 # type: ignore[import-not-found]
|
||
except ImportError as exc:
|
||
raise ReplayInputAdapterError(
|
||
"opencv-python is required for replay auto-sync but is "
|
||
"not importable in this binary"
|
||
) from exc
|
||
capture = _cv2.VideoCapture(str(video_path))
|
||
if not capture.isOpened():
|
||
capture.release()
|
||
raise ReplayInputAdapterError(
|
||
f"video file unreadable / unsupported codec: {video_path}"
|
||
)
|
||
try:
|
||
max_pos_ms = scan_seconds * 1000.0
|
||
while True:
|
||
ok, frame = capture.read()
|
||
if not ok or frame is None:
|
||
break
|
||
pos_ms = float(capture.get(_cv2.CAP_PROP_POS_MSEC))
|
||
if pos_ms > max_pos_ms:
|
||
break
|
||
ts_ns = int(pos_ms * 1_000_000)
|
||
yield ts_ns, frame
|
||
finally:
|
||
capture.release()
|
||
|
||
|
||
def _compute_flow_magnitudes(
|
||
frames: list[tuple[int, "np.ndarray"]],
|
||
) -> tuple[tuple[int, float], ...]:
|
||
"""Pairwise mean optical-flow magnitude between consecutive frames.
|
||
|
||
Uses Farneback dense flow (``cv2.calcOpticalFlowFarneback``)
|
||
rather than pyramidal LK because Farneback returns a flow field
|
||
over the whole image with no per-frame feature-tracking state, so
|
||
the result is deterministic given the same input frames (AC-10).
|
||
|
||
Returns ``((ts_ns_of_second_frame, mean_magnitude_px), ...)``.
|
||
"""
|
||
try:
|
||
import cv2 as _cv2 # type: ignore[import-not-found]
|
||
import numpy as _np # type: ignore[import-not-found]
|
||
except ImportError as exc: # pragma: no cover — guarded at call sites.
|
||
raise ReplayInputAdapterError(
|
||
"opencv-python + numpy are required for replay auto-sync"
|
||
) from exc
|
||
if len(frames) < 2:
|
||
return ()
|
||
# Convert all frames to grayscale once up-front so the per-pair
|
||
# cost is dominated by the optical-flow computation itself.
|
||
gray_frames = []
|
||
for ts_ns, frame in frames:
|
||
gray = _cv2.cvtColor(frame, _cv2.COLOR_BGR2GRAY)
|
||
gray_frames.append((ts_ns, gray))
|
||
out: list[tuple[int, float]] = []
|
||
for i in range(1, len(gray_frames)):
|
||
prev_ts, prev = gray_frames[i - 1]
|
||
curr_ts, curr = gray_frames[i]
|
||
flow = _cv2.calcOpticalFlowFarneback(
|
||
prev,
|
||
curr,
|
||
None,
|
||
pyr_scale=0.5,
|
||
levels=3,
|
||
winsize=15,
|
||
iterations=3,
|
||
poly_n=5,
|
||
poly_sigma=1.2,
|
||
flags=0,
|
||
)
|
||
# ``flow`` shape: (H, W, 2) — dx + dy per pixel.
|
||
magnitudes = _np.sqrt(flow[..., 0] ** 2 + flow[..., 1] ** 2)
|
||
mean_mag = float(magnitudes.mean())
|
||
out.append((curr_ts, mean_mag))
|
||
return tuple(out)
|
||
|
||
|
||
# Re-export the BUILD-flag check for symmetry with other replay modules.
|
||
def _build_flag_on(name: str) -> bool:
|
||
raw = os.environ.get(name, "")
|
||
return raw.strip().lower() in {"on", "1", "true", "yes"}
|
||
|
||
|
||
# ---------------------------------------------------------------------
|
||
# AZ-698 — mid-flight cross-correlation aligner
|
||
#
|
||
# The AZ-405 head-takeoff detector only works when the video covers
|
||
# the take-off moment. For mid-flight slices (e.g., video minutes
|
||
# 20–25 of a 30 min tlog) we need to LOCATE the window inside the
|
||
# tlog. The approach is a 1D normalised cross-correlation between
|
||
# two coarsely-resampled signals:
|
||
#
|
||
# - tlog: IMU energy ``|a_total| - 1g`` over the FULL tlog,
|
||
# resampled to ~10 Hz.
|
||
# - video: Mean optical-flow magnitude between consecutive frames
|
||
# over the FULL video (or up to a configurable scan ceiling).
|
||
#
|
||
# Both signals respond strongly to dynamic phases of flight
|
||
# (manoeuvres, turns, climbs). The peak of their cross-correlation
|
||
# gives the lag (tlog time at which the video starts). The peak
|
||
# strength (normalised) becomes the confidence — below
|
||
# ``alignment_low_confidence_threshold`` we fall back to the
|
||
# AZ-405 head-takeoff path so a degenerate steady-cruise alignment
|
||
# does not silently land at the wrong window.
|
||
|
||
|
||
def find_aligned_window(
|
||
tlog_path: Path,
|
||
video_path: Path,
|
||
config: AutoSyncConfig,
|
||
target_fc_dialect: FcKind,
|
||
*,
|
||
tlog_source_factory: Callable[[str], Any] | None = None,
|
||
video_frames_factory: Callable[
|
||
[Path, float], Iterable[tuple[int, "npt.NDArray[np.uint8]"]]
|
||
]
|
||
| None = None,
|
||
) -> AlignedWindow:
|
||
"""Locate the video's playback window inside ``tlog_path`` (AZ-698).
|
||
|
||
Args:
|
||
tlog_path: Binary ArduPilot tlog. The whole file is read up
|
||
to :attr:`AutoSyncConfig.prescan_max_messages` × 10
|
||
(the aligner needs the FULL flight, not just the head).
|
||
video_path: Mp4 / mkv input. The leading
|
||
:attr:`AutoSyncConfig.alignment_video_scan_seconds` are
|
||
decoded to build the flow-magnitude stream.
|
||
config: Operator-tunable thresholds.
|
||
target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV`` — same
|
||
parity contract as :func:`detect_tlog_takeoff`.
|
||
tlog_source_factory: Test injection — replaces the
|
||
``pymavlink`` open call.
|
||
video_frames_factory: Test injection — replaces
|
||
``cv2.VideoCapture`` frame iteration.
|
||
|
||
Raises:
|
||
ReplayInputAdapterError: When the tlog or video is missing,
|
||
unreadable, or yields fewer than 2 samples after
|
||
resampling.
|
||
|
||
Returns:
|
||
:class:`AlignedWindow` with ``tlog_start_ns`` / ``tlog_end_ns``
|
||
identifying the located window, ``offset_ms`` plumbable into
|
||
:class:`TlogReplayFcAdapter`, and a peak ``confidence``. When
|
||
confidence falls below
|
||
:attr:`AutoSyncConfig.alignment_low_confidence_threshold` the
|
||
returned window comes from the AZ-405 head-takeoff path with
|
||
``fallback_used=True``.
|
||
"""
|
||
if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
|
||
raise ReplayInputAdapterError(
|
||
f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
|
||
)
|
||
if not tlog_path.is_file():
|
||
raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
|
||
if not video_path.is_file():
|
||
raise ReplayInputAdapterError(f"video file not found: {video_path}")
|
||
|
||
tlog_energy = _load_tlog_imu_energy_stream(
|
||
tlog_path,
|
||
max_messages=config.prescan_max_messages * 10,
|
||
source_factory=tlog_source_factory,
|
||
)
|
||
if len(tlog_energy) < 2:
|
||
raise ReplayInputAdapterError(
|
||
f"tlog yielded {len(tlog_energy)} IMU sample(s); "
|
||
"need ≥ 2 for cross-correlation alignment"
|
||
)
|
||
|
||
if video_frames_factory is None:
|
||
frames = list(
|
||
_read_video_frames(video_path, config.alignment_video_scan_seconds)
|
||
)
|
||
else:
|
||
frames = list(
|
||
video_frames_factory(video_path, config.alignment_video_scan_seconds)
|
||
)
|
||
if len(frames) < 2:
|
||
raise ReplayInputAdapterError(
|
||
f"video yielded {len(frames)} frame(s); "
|
||
"need ≥ 2 for cross-correlation alignment"
|
||
)
|
||
flow_samples = _compute_flow_magnitudes(frames)
|
||
if len(flow_samples) < 2:
|
||
raise ReplayInputAdapterError(
|
||
f"video produced {len(flow_samples)} flow sample(s); "
|
||
"need ≥ 2 for cross-correlation alignment"
|
||
)
|
||
|
||
return _align_via_cross_correlation(
|
||
tlog_energy=tlog_energy,
|
||
flow_samples=flow_samples,
|
||
config=config,
|
||
target_fc_dialect=target_fc_dialect,
|
||
tlog_path=tlog_path,
|
||
tlog_source_factory=tlog_source_factory,
|
||
)
|
||
|
||
|
||
def _align_via_cross_correlation(
|
||
*,
|
||
tlog_energy: tuple[tuple[int, float], ...],
|
||
flow_samples: tuple[tuple[int, float], ...],
|
||
config: AutoSyncConfig,
|
||
target_fc_dialect: FcKind,
|
||
tlog_path: Path,
|
||
tlog_source_factory: Callable[[str], Any] | None,
|
||
) -> AlignedWindow:
|
||
"""Pure compute kernel: turn pre-loaded streams into an :class:`AlignedWindow`.
|
||
|
||
Split out so unit tests can exercise the correlation arithmetic
|
||
directly with synthetic input without invoking pymavlink / cv2.
|
||
"""
|
||
import numpy as _np
|
||
|
||
resample_hz = max(config.alignment_resample_hz, 1.0)
|
||
period_ns = int(1_000_000_000 / resample_hz)
|
||
|
||
tlog_origin_ns = tlog_energy[0][0]
|
||
tlog_resampled = _resample_uniform(tlog_energy, period_ns, tlog_origin_ns)
|
||
if len(tlog_resampled) < 2:
|
||
raise ReplayInputAdapterError(
|
||
"tlog resampled stream has < 2 samples; cannot cross-correlate"
|
||
)
|
||
|
||
video_origin_ns = flow_samples[0][0]
|
||
flow_resampled = _resample_uniform(flow_samples, period_ns, video_origin_ns)
|
||
if len(flow_resampled) < 2:
|
||
raise ReplayInputAdapterError(
|
||
"video flow stream has < 2 samples; cannot cross-correlate"
|
||
)
|
||
if len(flow_resampled) > len(tlog_resampled):
|
||
raise ReplayInputAdapterError(
|
||
"video flow stream is longer than the tlog energy stream; "
|
||
"auto-trim requires the video to be a slice of a longer tlog"
|
||
)
|
||
|
||
tlog_arr = _np.asarray(tlog_resampled, dtype=_np.float64)
|
||
flow_arr = _np.asarray(flow_resampled, dtype=_np.float64)
|
||
flow_centred = _zero_mean_normalise(flow_arr)
|
||
if _np.linalg.norm(flow_centred) == 0.0:
|
||
# Flat video → no information for correlation. Force the
|
||
# fallback path; confidence reported as 0.
|
||
peak_idx = 0
|
||
confidence = 0.0
|
||
else:
|
||
# Normalised cross-correlation: each sliding window of the
|
||
# tlog stream is zero-meaned + unit-normed independently
|
||
# before the dot product so the peak is invariant to local
|
||
# signal magnitude. Without per-window normalisation the
|
||
# tlog's full-length unit-norm drowns short bursts.
|
||
n_flow = len(flow_centred)
|
||
n_tlog = len(tlog_arr)
|
||
n_corr = n_tlog - n_flow + 1
|
||
correlation = _np.zeros(n_corr, dtype=_np.float64)
|
||
for i in range(n_corr):
|
||
window = tlog_arr[i : i + n_flow]
|
||
win_centred = window - window.mean()
|
||
win_norm = float(_np.linalg.norm(win_centred))
|
||
if win_norm > 0.0:
|
||
correlation[i] = float(_np.dot(win_centred / win_norm, flow_centred))
|
||
peak_idx = int(_np.argmax(correlation))
|
||
confidence = max(0.0, min(1.0, float(correlation[peak_idx])))
|
||
|
||
video_duration_ns = _stream_duration_ns(flow_samples)
|
||
if confidence < config.alignment_low_confidence_threshold:
|
||
return _fallback_to_head_takeoff(
|
||
tlog_path=tlog_path,
|
||
tlog_source_factory=tlog_source_factory,
|
||
target_fc_dialect=target_fc_dialect,
|
||
config=config,
|
||
tlog_energy=tlog_energy,
|
||
video_origin_ns=video_origin_ns,
|
||
video_flow_duration_ns=video_duration_ns,
|
||
confidence=confidence,
|
||
)
|
||
|
||
# Absolute tlog timeline value where video t=0 aligns. The
|
||
# adapter's seek check compares this against the raw pymavlink
|
||
# ``msg._timestamp`` so the value MUST be on the tlog timeline,
|
||
# NOT a delta.
|
||
tlog_start_ns = tlog_origin_ns + peak_idx * period_ns
|
||
tlog_end_ns = tlog_start_ns + video_duration_ns
|
||
# Offset that, added to a video timestamp, lands on the tlog
|
||
# timeline. Matches ``AutoSyncDecision.offset_ms`` semantics
|
||
# (``validate_offset_or_fail`` does ``vts + offset_ns``).
|
||
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
|
||
return AlignedWindow(
|
||
tlog_start_ns=tlog_start_ns,
|
||
tlog_end_ns=tlog_end_ns,
|
||
offset_ms=offset_ms,
|
||
confidence=confidence,
|
||
fallback_used=False,
|
||
)
|
||
|
||
|
||
def _stream_duration_ns(
|
||
samples: tuple[tuple[int, float], ...],
|
||
) -> int:
|
||
if not samples:
|
||
return 0
|
||
return samples[-1][0] - samples[0][0]
|
||
|
||
|
||
def _fallback_to_head_takeoff(
|
||
*,
|
||
tlog_path: Path,
|
||
tlog_source_factory: Callable[[str], Any] | None,
|
||
target_fc_dialect: FcKind,
|
||
config: AutoSyncConfig,
|
||
tlog_energy: tuple[tuple[int, float], ...],
|
||
video_origin_ns: int,
|
||
video_flow_duration_ns: int,
|
||
confidence: float,
|
||
) -> AlignedWindow:
|
||
"""Low-confidence path: use AZ-405 head-takeoff detector.
|
||
|
||
Returns an :class:`AlignedWindow` whose ``offset_ms`` and
|
||
``tlog_start_ns`` come from the takeoff onset; ``fallback_used``
|
||
is ``True`` so callers + FDR audit can record the divergence.
|
||
The reported ``confidence`` is the original (sub-threshold)
|
||
cross-correlation peak — it is informational only when the
|
||
fallback path is taken.
|
||
"""
|
||
takeoff = detect_tlog_takeoff(
|
||
tlog_path,
|
||
target_fc_dialect,
|
||
config,
|
||
source_factory=tlog_source_factory,
|
||
)
|
||
if takeoff.confidence > 0.0:
|
||
tlog_start_ns = takeoff.onset_ns
|
||
elif tlog_energy:
|
||
tlog_start_ns = tlog_energy[0][0]
|
||
else:
|
||
tlog_start_ns = 0
|
||
tlog_end_ns = tlog_start_ns + video_flow_duration_ns
|
||
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
|
||
return AlignedWindow(
|
||
tlog_start_ns=tlog_start_ns,
|
||
tlog_end_ns=tlog_end_ns,
|
||
offset_ms=offset_ms,
|
||
confidence=confidence,
|
||
fallback_used=True,
|
||
)
|
||
|
||
|
||
def _resample_uniform(
|
||
samples: tuple[tuple[int, float], ...],
|
||
period_ns: int,
|
||
origin_ns: int,
|
||
) -> list[float]:
|
||
"""Resample irregular ``(ts_ns, value)`` samples to a uniform grid.
|
||
|
||
Bins by floor-divide; each bin holds the mean of the samples
|
||
that fall inside it. Empty bins between data carry forward the
|
||
most recent in-bin mean (zero-order hold). Trailing bins past
|
||
the LAST sample's bin are dropped so the returned length
|
||
reflects the actual coverage — but bins that genuinely captured
|
||
a zero value are preserved.
|
||
"""
|
||
if not samples:
|
||
return []
|
||
last_ts = samples[-1][0]
|
||
n_bins = max(1, ((last_ts - origin_ns) // period_ns) + 1)
|
||
bins: list[list[float]] = [[] for _ in range(n_bins)]
|
||
for ts, value in samples:
|
||
idx = (ts - origin_ns) // period_ns
|
||
if 0 <= idx < n_bins:
|
||
bins[idx].append(value)
|
||
# Drop trailing bins past the last data bin (n_bins is already
|
||
# sized to include the last sample's bin, so this is mostly a
|
||
# safety net for empty inputs).
|
||
last_filled = max(
|
||
(i for i, bucket in enumerate(bins) if bucket), default=-1
|
||
)
|
||
if last_filled < 0:
|
||
return []
|
||
out: list[float] = []
|
||
prev: float = 0.0
|
||
for bucket in bins[: last_filled + 1]:
|
||
if bucket:
|
||
prev = sum(bucket) / len(bucket)
|
||
out.append(prev)
|
||
return out
|
||
|
||
|
||
def _zero_mean_normalise(
|
||
arr: "npt.NDArray[np.float64]",
|
||
) -> "npt.NDArray[np.float64]":
|
||
import numpy as _np
|
||
|
||
centred: "npt.NDArray[np.float64]" = arr - arr.mean()
|
||
norm = float(_np.linalg.norm(centred))
|
||
if norm == 0.0:
|
||
return centred
|
||
result: "npt.NDArray[np.float64]" = centred / norm
|
||
return result
|
||
|
||
|
||
def _load_tlog_imu_energy_stream(
|
||
tlog_path: Path,
|
||
*,
|
||
max_messages: int,
|
||
source_factory: Callable[[str], Any] | None,
|
||
) -> tuple[tuple[int, float], ...]:
|
||
"""Walk the WHOLE tlog (up to ``max_messages``) for IMU energy samples.
|
||
|
||
Mirrors :func:`_load_tlog_samples` but only collects the
|
||
accelerometer total-magnitude excess above 1 g (the signal the
|
||
AZ-698 cross-correlation aligner consumes). The ATTITUDE channel
|
||
is not needed here.
|
||
"""
|
||
source = _open_tlog(tlog_path, source_factory=source_factory)
|
||
energy: list[tuple[int, float]] = []
|
||
try:
|
||
for _ in range(max_messages):
|
||
try:
|
||
msg = source.recv_match(
|
||
type=["RAW_IMU", "SCALED_IMU2"],
|
||
blocking=False,
|
||
)
|
||
except Exception as exc: # pragma: no cover — defensive.
|
||
raise ReplayInputAdapterError(
|
||
f"tlog scan failed on {tlog_path}: {exc!r}"
|
||
) from exc
|
||
if msg is None:
|
||
break
|
||
ts_ns = _msg_timestamp_ns(msg)
|
||
xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
|
||
ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
|
||
za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
|
||
total_g = math.sqrt(xa * xa + ya * ya + za * za)
|
||
energy.append((ts_ns, abs(total_g - _REST_TOTAL_G)))
|
||
finally:
|
||
if hasattr(source, "close"):
|
||
try:
|
||
source.close()
|
||
except Exception: # pragma: no cover — defensive.
|
||
pass
|
||
return tuple(energy)
|