mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 23:41:13 +00:00
[AZ-698] Tlog trim + mid-flight alignment for replay
Adds find_aligned_window cross-correlation (NCC, per-window unit norm)
between IMU energy and video optical-flow magnitude. Returns
AlignedWindow{tlog_start_ns, tlog_end_ns, offset_ms, confidence,
used_fallback}, with fallback to head-takeoff on low confidence to
preserve AZ-405 behavior. TlogReplayFcAdapter honors tlog_start_ns and
skips pre-window messages. New --auto-trim CLI flag, mutex with
--time-offset-ms. AC-1..AC-4 covered by unit tests; AC-5 skipped (no
real flight_derkachi.mp4 in repo). 106 tests pass in regression slice.
Zero new mypy --strict errors.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -37,16 +37,22 @@ from typing import TYPE_CHECKING, Any
|
||||
|
||||
from gps_denied_onboard._types.fc import FcKind
|
||||
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
|
||||
from gps_denied_onboard.replay_input.interface import AutoSyncConfig, AutoSyncDecision
|
||||
from gps_denied_onboard.replay_input.interface import (
|
||||
AlignedWindow,
|
||||
AutoSyncConfig,
|
||||
AutoSyncDecision,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
||||
__all__ = [
|
||||
"TlogSamples",
|
||||
"compute_offset",
|
||||
"detect_tlog_takeoff",
|
||||
"detect_video_motion_onset",
|
||||
"find_aligned_window",
|
||||
"validate_offset_or_fail",
|
||||
]
|
||||
|
||||
@@ -644,3 +650,363 @@ def _compute_flow_magnitudes(
|
||||
def _build_flag_on(name: str) -> bool:
|
||||
raw = os.environ.get(name, "")
|
||||
return raw.strip().lower() in {"on", "1", "true", "yes"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AZ-698 — mid-flight cross-correlation aligner
|
||||
#
|
||||
# The AZ-405 head-takeoff detector only works when the video covers
|
||||
# the take-off moment. For mid-flight slices (e.g., video minutes
|
||||
# 20–25 of a 30 min tlog) we need to LOCATE the window inside the
|
||||
# tlog. The approach is a 1D normalised cross-correlation between
|
||||
# two coarsely-resampled signals:
|
||||
#
|
||||
# - tlog: IMU energy ``|a_total| - 1g`` over the FULL tlog,
|
||||
# resampled to ~10 Hz.
|
||||
# - video: Mean optical-flow magnitude between consecutive frames
|
||||
# over the FULL video (or up to a configurable scan ceiling).
|
||||
#
|
||||
# Both signals respond strongly to dynamic phases of flight
|
||||
# (manoeuvres, turns, climbs). The peak of their cross-correlation
|
||||
# gives the lag (tlog time at which the video starts). The peak
|
||||
# strength (normalised) becomes the confidence — below
|
||||
# ``alignment_low_confidence_threshold`` we fall back to the
|
||||
# AZ-405 head-takeoff path so a degenerate steady-cruise alignment
|
||||
# does not silently land at the wrong window.
|
||||
|
||||
|
||||
def find_aligned_window(
|
||||
tlog_path: Path,
|
||||
video_path: Path,
|
||||
config: AutoSyncConfig,
|
||||
target_fc_dialect: FcKind,
|
||||
*,
|
||||
tlog_source_factory: Callable[[str], Any] | None = None,
|
||||
video_frames_factory: Callable[
|
||||
[Path, float], Iterable[tuple[int, "npt.NDArray[np.uint8]"]]
|
||||
]
|
||||
| None = None,
|
||||
) -> AlignedWindow:
|
||||
"""Locate the video's playback window inside ``tlog_path`` (AZ-698).
|
||||
|
||||
Args:
|
||||
tlog_path: Binary ArduPilot tlog. The whole file is read up
|
||||
to :attr:`AutoSyncConfig.prescan_max_messages` × 10
|
||||
(the aligner needs the FULL flight, not just the head).
|
||||
video_path: Mp4 / mkv input. The leading
|
||||
:attr:`AutoSyncConfig.alignment_video_scan_seconds` are
|
||||
decoded to build the flow-magnitude stream.
|
||||
config: Operator-tunable thresholds.
|
||||
target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV`` — same
|
||||
parity contract as :func:`detect_tlog_takeoff`.
|
||||
tlog_source_factory: Test injection — replaces the
|
||||
``pymavlink`` open call.
|
||||
video_frames_factory: Test injection — replaces
|
||||
``cv2.VideoCapture`` frame iteration.
|
||||
|
||||
Raises:
|
||||
ReplayInputAdapterError: When the tlog or video is missing,
|
||||
unreadable, or yields fewer than 2 samples after
|
||||
resampling.
|
||||
|
||||
Returns:
|
||||
:class:`AlignedWindow` with ``tlog_start_ns`` / ``tlog_end_ns``
|
||||
identifying the located window, ``offset_ms`` plumbable into
|
||||
:class:`TlogReplayFcAdapter`, and a peak ``confidence``. When
|
||||
confidence falls below
|
||||
:attr:`AutoSyncConfig.alignment_low_confidence_threshold` the
|
||||
returned window comes from the AZ-405 head-takeoff path with
|
||||
``fallback_used=True``.
|
||||
"""
|
||||
if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
|
||||
raise ReplayInputAdapterError(
|
||||
f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
|
||||
)
|
||||
if not tlog_path.is_file():
|
||||
raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
|
||||
if not video_path.is_file():
|
||||
raise ReplayInputAdapterError(f"video file not found: {video_path}")
|
||||
|
||||
tlog_energy = _load_tlog_imu_energy_stream(
|
||||
tlog_path,
|
||||
max_messages=config.prescan_max_messages * 10,
|
||||
source_factory=tlog_source_factory,
|
||||
)
|
||||
if len(tlog_energy) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
f"tlog yielded {len(tlog_energy)} IMU sample(s); "
|
||||
"need ≥ 2 for cross-correlation alignment"
|
||||
)
|
||||
|
||||
if video_frames_factory is None:
|
||||
frames = list(
|
||||
_read_video_frames(video_path, config.alignment_video_scan_seconds)
|
||||
)
|
||||
else:
|
||||
frames = list(
|
||||
video_frames_factory(video_path, config.alignment_video_scan_seconds)
|
||||
)
|
||||
if len(frames) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
f"video yielded {len(frames)} frame(s); "
|
||||
"need ≥ 2 for cross-correlation alignment"
|
||||
)
|
||||
flow_samples = _compute_flow_magnitudes(frames)
|
||||
if len(flow_samples) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
f"video produced {len(flow_samples)} flow sample(s); "
|
||||
"need ≥ 2 for cross-correlation alignment"
|
||||
)
|
||||
|
||||
return _align_via_cross_correlation(
|
||||
tlog_energy=tlog_energy,
|
||||
flow_samples=flow_samples,
|
||||
config=config,
|
||||
target_fc_dialect=target_fc_dialect,
|
||||
tlog_path=tlog_path,
|
||||
tlog_source_factory=tlog_source_factory,
|
||||
)
|
||||
|
||||
|
||||
def _align_via_cross_correlation(
|
||||
*,
|
||||
tlog_energy: tuple[tuple[int, float], ...],
|
||||
flow_samples: tuple[tuple[int, float], ...],
|
||||
config: AutoSyncConfig,
|
||||
target_fc_dialect: FcKind,
|
||||
tlog_path: Path,
|
||||
tlog_source_factory: Callable[[str], Any] | None,
|
||||
) -> AlignedWindow:
|
||||
"""Pure compute kernel: turn pre-loaded streams into an :class:`AlignedWindow`.
|
||||
|
||||
Split out so unit tests can exercise the correlation arithmetic
|
||||
directly with synthetic input without invoking pymavlink / cv2.
|
||||
"""
|
||||
import numpy as _np
|
||||
|
||||
resample_hz = max(config.alignment_resample_hz, 1.0)
|
||||
period_ns = int(1_000_000_000 / resample_hz)
|
||||
|
||||
tlog_origin_ns = tlog_energy[0][0]
|
||||
tlog_resampled = _resample_uniform(tlog_energy, period_ns, tlog_origin_ns)
|
||||
if len(tlog_resampled) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
"tlog resampled stream has < 2 samples; cannot cross-correlate"
|
||||
)
|
||||
|
||||
video_origin_ns = flow_samples[0][0]
|
||||
flow_resampled = _resample_uniform(flow_samples, period_ns, video_origin_ns)
|
||||
if len(flow_resampled) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
"video flow stream has < 2 samples; cannot cross-correlate"
|
||||
)
|
||||
if len(flow_resampled) > len(tlog_resampled):
|
||||
raise ReplayInputAdapterError(
|
||||
"video flow stream is longer than the tlog energy stream; "
|
||||
"auto-trim requires the video to be a slice of a longer tlog"
|
||||
)
|
||||
|
||||
tlog_arr = _np.asarray(tlog_resampled, dtype=_np.float64)
|
||||
flow_arr = _np.asarray(flow_resampled, dtype=_np.float64)
|
||||
flow_centred = _zero_mean_normalise(flow_arr)
|
||||
if _np.linalg.norm(flow_centred) == 0.0:
|
||||
# Flat video → no information for correlation. Force the
|
||||
# fallback path; confidence reported as 0.
|
||||
peak_idx = 0
|
||||
confidence = 0.0
|
||||
else:
|
||||
# Normalised cross-correlation: each sliding window of the
|
||||
# tlog stream is zero-meaned + unit-normed independently
|
||||
# before the dot product so the peak is invariant to local
|
||||
# signal magnitude. Without per-window normalisation the
|
||||
# tlog's full-length unit-norm drowns short bursts.
|
||||
n_flow = len(flow_centred)
|
||||
n_tlog = len(tlog_arr)
|
||||
n_corr = n_tlog - n_flow + 1
|
||||
correlation = _np.zeros(n_corr, dtype=_np.float64)
|
||||
for i in range(n_corr):
|
||||
window = tlog_arr[i : i + n_flow]
|
||||
win_centred = window - window.mean()
|
||||
win_norm = float(_np.linalg.norm(win_centred))
|
||||
if win_norm > 0.0:
|
||||
correlation[i] = float(_np.dot(win_centred / win_norm, flow_centred))
|
||||
peak_idx = int(_np.argmax(correlation))
|
||||
confidence = max(0.0, min(1.0, float(correlation[peak_idx])))
|
||||
|
||||
video_duration_ns = _stream_duration_ns(flow_samples)
|
||||
if confidence < config.alignment_low_confidence_threshold:
|
||||
return _fallback_to_head_takeoff(
|
||||
tlog_path=tlog_path,
|
||||
tlog_source_factory=tlog_source_factory,
|
||||
target_fc_dialect=target_fc_dialect,
|
||||
config=config,
|
||||
tlog_energy=tlog_energy,
|
||||
video_origin_ns=video_origin_ns,
|
||||
video_flow_duration_ns=video_duration_ns,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
# Absolute tlog timeline value where video t=0 aligns. The
|
||||
# adapter's seek check compares this against the raw pymavlink
|
||||
# ``msg._timestamp`` so the value MUST be on the tlog timeline,
|
||||
# NOT a delta.
|
||||
tlog_start_ns = tlog_origin_ns + peak_idx * period_ns
|
||||
tlog_end_ns = tlog_start_ns + video_duration_ns
|
||||
# Offset that, added to a video timestamp, lands on the tlog
|
||||
# timeline. Matches ``AutoSyncDecision.offset_ms`` semantics
|
||||
# (``validate_offset_or_fail`` does ``vts + offset_ns``).
|
||||
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
|
||||
return AlignedWindow(
|
||||
tlog_start_ns=tlog_start_ns,
|
||||
tlog_end_ns=tlog_end_ns,
|
||||
offset_ms=offset_ms,
|
||||
confidence=confidence,
|
||||
fallback_used=False,
|
||||
)
|
||||
|
||||
|
||||
def _stream_duration_ns(
|
||||
samples: tuple[tuple[int, float], ...],
|
||||
) -> int:
|
||||
if not samples:
|
||||
return 0
|
||||
return samples[-1][0] - samples[0][0]
|
||||
|
||||
|
||||
def _fallback_to_head_takeoff(
|
||||
*,
|
||||
tlog_path: Path,
|
||||
tlog_source_factory: Callable[[str], Any] | None,
|
||||
target_fc_dialect: FcKind,
|
||||
config: AutoSyncConfig,
|
||||
tlog_energy: tuple[tuple[int, float], ...],
|
||||
video_origin_ns: int,
|
||||
video_flow_duration_ns: int,
|
||||
confidence: float,
|
||||
) -> AlignedWindow:
|
||||
"""Low-confidence path: use AZ-405 head-takeoff detector.
|
||||
|
||||
Returns an :class:`AlignedWindow` whose ``offset_ms`` and
|
||||
``tlog_start_ns`` come from the takeoff onset; ``fallback_used``
|
||||
is ``True`` so callers + FDR audit can record the divergence.
|
||||
The reported ``confidence`` is the original (sub-threshold)
|
||||
cross-correlation peak — it is informational only when the
|
||||
fallback path is taken.
|
||||
"""
|
||||
takeoff = detect_tlog_takeoff(
|
||||
tlog_path,
|
||||
target_fc_dialect,
|
||||
config,
|
||||
source_factory=tlog_source_factory,
|
||||
)
|
||||
if takeoff.confidence > 0.0:
|
||||
tlog_start_ns = takeoff.onset_ns
|
||||
elif tlog_energy:
|
||||
tlog_start_ns = tlog_energy[0][0]
|
||||
else:
|
||||
tlog_start_ns = 0
|
||||
tlog_end_ns = tlog_start_ns + video_flow_duration_ns
|
||||
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
|
||||
return AlignedWindow(
|
||||
tlog_start_ns=tlog_start_ns,
|
||||
tlog_end_ns=tlog_end_ns,
|
||||
offset_ms=offset_ms,
|
||||
confidence=confidence,
|
||||
fallback_used=True,
|
||||
)
|
||||
|
||||
|
||||
def _resample_uniform(
|
||||
samples: tuple[tuple[int, float], ...],
|
||||
period_ns: int,
|
||||
origin_ns: int,
|
||||
) -> list[float]:
|
||||
"""Resample irregular ``(ts_ns, value)`` samples to a uniform grid.
|
||||
|
||||
Bins by floor-divide; each bin holds the mean of the samples
|
||||
that fall inside it. Empty bins between data carry forward the
|
||||
most recent in-bin mean (zero-order hold). Trailing bins past
|
||||
the LAST sample's bin are dropped so the returned length
|
||||
reflects the actual coverage — but bins that genuinely captured
|
||||
a zero value are preserved.
|
||||
"""
|
||||
if not samples:
|
||||
return []
|
||||
last_ts = samples[-1][0]
|
||||
n_bins = max(1, ((last_ts - origin_ns) // period_ns) + 1)
|
||||
bins: list[list[float]] = [[] for _ in range(n_bins)]
|
||||
for ts, value in samples:
|
||||
idx = (ts - origin_ns) // period_ns
|
||||
if 0 <= idx < n_bins:
|
||||
bins[idx].append(value)
|
||||
# Drop trailing bins past the last data bin (n_bins is already
|
||||
# sized to include the last sample's bin, so this is mostly a
|
||||
# safety net for empty inputs).
|
||||
last_filled = max(
|
||||
(i for i, bucket in enumerate(bins) if bucket), default=-1
|
||||
)
|
||||
if last_filled < 0:
|
||||
return []
|
||||
out: list[float] = []
|
||||
prev: float = 0.0
|
||||
for bucket in bins[: last_filled + 1]:
|
||||
if bucket:
|
||||
prev = sum(bucket) / len(bucket)
|
||||
out.append(prev)
|
||||
return out
|
||||
|
||||
|
||||
def _zero_mean_normalise(
|
||||
arr: "npt.NDArray[np.float64]",
|
||||
) -> "npt.NDArray[np.float64]":
|
||||
import numpy as _np
|
||||
|
||||
centred: "npt.NDArray[np.float64]" = arr - arr.mean()
|
||||
norm = float(_np.linalg.norm(centred))
|
||||
if norm == 0.0:
|
||||
return centred
|
||||
result: "npt.NDArray[np.float64]" = centred / norm
|
||||
return result
|
||||
|
||||
|
||||
def _load_tlog_imu_energy_stream(
|
||||
tlog_path: Path,
|
||||
*,
|
||||
max_messages: int,
|
||||
source_factory: Callable[[str], Any] | None,
|
||||
) -> tuple[tuple[int, float], ...]:
|
||||
"""Walk the WHOLE tlog (up to ``max_messages``) for IMU energy samples.
|
||||
|
||||
Mirrors :func:`_load_tlog_samples` but only collects the
|
||||
accelerometer total-magnitude excess above 1 g (the signal the
|
||||
AZ-698 cross-correlation aligner consumes). The ATTITUDE channel
|
||||
is not needed here.
|
||||
"""
|
||||
source = _open_tlog(tlog_path, source_factory=source_factory)
|
||||
energy: list[tuple[int, float]] = []
|
||||
try:
|
||||
for _ in range(max_messages):
|
||||
try:
|
||||
msg = source.recv_match(
|
||||
type=["RAW_IMU", "SCALED_IMU2"],
|
||||
blocking=False,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover — defensive.
|
||||
raise ReplayInputAdapterError(
|
||||
f"tlog scan failed on {tlog_path}: {exc!r}"
|
||||
) from exc
|
||||
if msg is None:
|
||||
break
|
||||
ts_ns = _msg_timestamp_ns(msg)
|
||||
xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
|
||||
ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
|
||||
za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
|
||||
total_g = math.sqrt(xa * xa + ya * ya + za * za)
|
||||
energy.append((ts_ns, abs(total_g - _REST_TOTAL_G)))
|
||||
finally:
|
||||
if hasattr(source, "close"):
|
||||
try:
|
||||
source.close()
|
||||
except Exception: # pragma: no cover — defensive.
|
||||
pass
|
||||
return tuple(energy)
|
||||
|
||||
Reference in New Issue
Block a user