"""Auto-sync detectors + offset compute + AC-9 validator (AZ-405).

Three concerns:

1. **Tlog take-off detector** — walks the head of the tlog, looks for
   a sustained vertical-acceleration excess + sustained attitude-rate
   excess, returns ``(takeoff_ns, confidence)``.
2. **Video motion-onset detector** — runs OpenCV pyramidal optical
   flow over the leading seconds of the video, returns
   ``(motion_onset_ns, confidence)``.
3. **AC-9 frame-window match validator** — given a candidate offset
   and the tlog/video timestamp series, returns 0 if ≥ 95 % of
   video frames have an IMU sample within ± 100 ms after the offset
   is applied; 2 otherwise.

The detector functions are split into a thin path-reading wrapper
(``detect_tlog_takeoff`` / ``detect_video_motion_onset``) and a pure
sample-driven core (``_compute_tlog_takeoff_from_samples`` /
``_compute_video_onset_from_samples``). Tests exercise the pure cores
directly with synthetic fixtures; production calls the wrappers,
which read the tlog via ``pymavlink`` and the video via ``cv2``.

Both wrappers accept an optional ``source_factory`` (tlog) /
``frames_factory`` (video) injection point so unit tests can swap in
fakes without touching the filesystem (mirrors AZ-399's pattern).
"""

from __future__ import annotations

import bisect
import math
import os
from collections.abc import Callable, Iterable
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Any

from gps_denied_onboard._types.fc import FcKind
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
from gps_denied_onboard.replay_input.interface import (
    AlignedWindow,
    AutoSyncConfig,
    AutoSyncDecision,
)

if TYPE_CHECKING:
    import numpy as np
    import numpy.typing as npt

__all__ = [
    "TlogSamples",
    "compute_offset",
    "detect_tlog_takeoff",
    "detect_video_motion_onset",
    "find_aligned_window",
    "validate_offset_or_fail",
]


# Conversion: MAVLink RAW_IMU / SCALED_IMU2 publish accelerometer
# components in mG (milli-G); 1 g ≡ 9.80665 m/s² by ISO 80000-3.
_MG_PER_G: float = 1000.0
# Per the AZ-405 spec, the vertical-accel signal of interest is the
# magnitude excess above gravity (i.e., body acceleration regardless
# of frame orientation). At rest |a| ≈ 1 g; during upward thrust |a|
# > 1 g; during free-fall |a| ≈ 0 g. The take-off pattern is a
# sustained excess with positive sign (upward thrust), so we use
# ``|total_g - 1.0|`` as the criterion.
_REST_TOTAL_G: float = 1.0


# ---------------------------------------------------------------------
# DTOs (internal — public API surfaces results via AutoSyncDecision)


@dataclass(frozen=True, slots=True)
class _DetectorResult:
    """Outcome of a single detector pass.

    ``onset_ns`` is the best-guess event start (ns); ``confidence``
    is in [0, 1] and reflects how sustained the signal was relative
    to the configured threshold + sustained-time requirement.
    """

    onset_ns: int
    confidence: float


@dataclass(frozen=True, slots=True)
class TlogSamples:
    """Pre-loaded tlog samples extracted by the take-off detector.

    Used as the input shape for :func:`_compute_tlog_takeoff_from_samples`
    so unit tests can build a deterministic fixture without parsing a
    real ``.tlog`` file.

    Attributes:
        accel: Sequence of ``(ts_ns, total_accel_g)`` pairs sourced
            from ``RAW_IMU`` / ``SCALED_IMU2`` messages.
        attitude: Sequence of ``(ts_ns, roll_rad, pitch_rad, yaw_rad)``
            tuples sourced from ``ATTITUDE`` messages.
        imu_count_by_type: Map of message-type-name → count, used for
            the ``"tlog missing required message types: [...]"``
            error path (R-DEMO-3).
    """

    accel: tuple[tuple[int, float], ...]
    attitude: tuple[tuple[int, float, float, float], ...]
    imu_count_by_type: dict[str, int]


# ---------------------------------------------------------------------
# Public entrypoints


def detect_tlog_takeoff(
    tlog_path: Path,
    target_fc_dialect: FcKind,
    config: AutoSyncConfig,
    *,
    source_factory: Callable[[str], Any] | None = None,
) -> _DetectorResult:
    """Walk the tlog head, detect the take-off pattern, return result.

    Args:
        tlog_path: Path to the tlog file. Existence is checked at
            entry.
        target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV``. Both speak
            ``ardupilotmega`` MAVLink on the GCS telemetry channel
            (the iNav-side native MSP traffic is irrelevant here);
            this parameter is accepted for parity with the rest of
            the replay surface and is also used in the missing-
            messages error to name the dialect explicitly.
        config: Operator-tunable thresholds (see
            :class:`AutoSyncConfig`).
        source_factory: Test-only injection — when provided, replaces
            the pymavlink open call with the factory's return value.
            The factory must yield an object with ``recv_match`` /
            ``close`` semantics matching pymavlink's
            ``mavutil.mavlink_connection``.

    Raises:
        ReplayInputAdapterError: When the tlog is missing
            ``RAW_IMU`` / ``SCALED_IMU2`` (no IMU samples) or
            ``ATTITUDE`` (no attitude samples). This is the R-DEMO-3
            fail-fast path — it surfaces BEFORE any video read in the
            coordinator's ``open()`` flow.
    """
    if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
        raise ReplayInputAdapterError(
            f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
        )
    if not tlog_path.is_file():
        raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
    samples = _load_tlog_samples(
        tlog_path,
        config.prescan_max_messages,
        source_factory=source_factory,
    )
    return _compute_tlog_takeoff_from_samples(samples, config)


def detect_video_motion_onset(
    video_path: Path,
    config: AutoSyncConfig,
    *,
    frames_factory: Callable[[Path, float], Iterable[tuple[int, "np.ndarray"]]]
    | None = None,
) -> _DetectorResult:
    """Scan the leading video segment, detect motion onset, return result.

    Args:
        video_path: Path to an MP4 / MKV / AVI file.
        config: Operator-tunable thresholds (see
            :class:`AutoSyncConfig`).
        frames_factory: Test-only injection — when provided, returns
            a synthetic iterable of ``(monotonic_ns, frame_bgr)``
            tuples. Must yield at least 2 frames for the pairwise
            optical-flow magnitudes to compute.

    Raises:
        ReplayInputAdapterError: When the video file is missing or
            unreadable, or fewer than 2 frames are decoded.
    """
    if not video_path.is_file():
        raise ReplayInputAdapterError(f"video file not found: {video_path}")
    if frames_factory is None:
        frames = list(_read_video_frames(video_path, config.video_motion_scan_seconds))
    else:
        frames = list(frames_factory(video_path, config.video_motion_scan_seconds))
    if len(frames) < 2:
        raise ReplayInputAdapterError(
            f"video file unreadable or too short: {video_path} "
            f"(decoded {len(frames)} frame(s); need ≥ 2)"
        )
    flow_samples = _compute_flow_magnitudes(frames)
    return _compute_video_onset_from_samples(flow_samples, config)


def compute_offset(
    tlog_result: _DetectorResult,
    video_result: _DetectorResult,
) -> AutoSyncDecision:
    """Combine tlog + video detector outputs into an :class:`AutoSyncDecision`.

    Offset semantics (positive = video starts before take-off recorded
    in tlog): ``offset_ns = tlog_takeoff_ns - video_motion_onset_ns``.
    Combined confidence = ``min(tlog_confidence, video_confidence)`` —
    the weakest signal dominates so downstream WARN-and-proceed (AC-6)
    fires whenever either side is unreliable.
    """
    offset_ns = tlog_result.onset_ns - video_result.onset_ns
    combined = min(tlog_result.confidence, video_result.confidence)
    return AutoSyncDecision(
        offset_ms=offset_ns // 1_000_000,
        tlog_takeoff_ns=tlog_result.onset_ns,
        video_motion_onset_ns=video_result.onset_ns,
        tlog_confidence=tlog_result.confidence,
        video_confidence=video_result.confidence,
        combined_confidence=combined,
    )


def validate_offset_or_fail(
    offset_ms: int,
    tlog_imu_timestamps_ns: Iterable[int],
    video_frame_timestamps_ns: Iterable[int],
    threshold_pct: float,
    *,
    window_ms: int = 100,
) -> int:
    """AC-9 frame-window match validator.

    Returns ``0`` when ≥ ``threshold_pct`` % of video frames have an
    IMU sample within ± ``window_ms`` after the offset is applied;
    returns ``2`` otherwise (CLI exit code for AC-8 hard-fail).

    The check is symmetric in offset sign — the offset is added to
    each video timestamp and the nearest tlog IMU timestamp is then
    looked up by binary search.
    """
    video_list = list(video_frame_timestamps_ns)
    if not video_list:
        # Degenerate input — no frames to match. The replay binary
        # rejects empty videos earlier, so reaching this branch
        # would be a bug; return 2 so the operator sees the hard-fail
        # rather than a false PASS.
        return 2
    tlog_sorted = sorted(tlog_imu_timestamps_ns)
    if not tlog_sorted:
        return 2
    offset_ns = int(offset_ms) * 1_000_000
    window_ns = int(window_ms) * 1_000_000
    matched = 0
    for vts in video_list:
        target_ns = vts + offset_ns
        idx = bisect.bisect_left(tlog_sorted, target_ns)
        # The nearest IMU sample is whichever of the immediate
        # neighbours of `target_ns` is closer. Either may be out of
        # range at the ends of the array.
        nearest: int | None = None
        for j in (idx - 1, idx):
            if 0 <= j < len(tlog_sorted):
                cand = tlog_sorted[j]
                if nearest is None or abs(cand - target_ns) < abs(nearest - target_ns):
                    nearest = cand
        if nearest is not None and abs(nearest - target_ns) <= window_ns:
            matched += 1
    match_pct = (matched / len(video_list)) * 100.0
    return 0 if match_pct >= threshold_pct else 2


# ---------------------------------------------------------------------
# Pure compute kernels (testable without disk IO)


def _compute_tlog_takeoff_from_samples(
    samples: TlogSamples,
    config: AutoSyncConfig,
) -> _DetectorResult:
    """Pure detector: turn pre-loaded tlog samples into a result.

    Algorithm: find the first sustained-window where (a) accel
    magnitude excess above 1 g exceeds the threshold for at least
    ``sustained_seconds``, and (b) attitude-rate magnitude exceeds
    its threshold sustained over the same duration. Combined
    confidence = ``min(accel_ratio, attitude_ratio)`` — both
    signals must agree for a high-confidence take-off.

    Raises:
        ReplayInputAdapterError: When the tlog had no IMU samples or
            no ATTITUDE samples (R-DEMO-3 fail-fast).
    """
    if not samples.accel:
        missing = ["RAW_IMU", "SCALED_IMU2"]
        raise ReplayInputAdapterError(
            f"tlog missing required message types: {missing}"
        )
    if not samples.attitude:
        raise ReplayInputAdapterError(
            "tlog missing required message types: ['ATTITUDE']"
        )

    sustained_ns = int(config.sustained_seconds * 1_000_000_000)

    # Pair-wise attitude rates (rad/s magnitude vector) — emitted at
    # the timestamp of the LATER sample so the rate aligns with when
    # it is observable downstream.
    attitude_rates: list[tuple[int, float]] = []
    for i in range(1, len(samples.attitude)):
        ts_prev, roll_prev, pitch_prev, yaw_prev = samples.attitude[i - 1]
        ts_curr, roll_curr, pitch_curr, yaw_curr = samples.attitude[i]
        dt_s = (ts_curr - ts_prev) / 1_000_000_000.0
        if dt_s <= 0.0:
            continue
        dr = roll_curr - roll_prev
        dp = pitch_curr - pitch_prev
        dy = _wrap_pi(yaw_curr - yaw_prev)
        rate_mag = math.sqrt((dr / dt_s) ** 2 + (dp / dt_s) ** 2 + (dy / dt_s) ** 2)
        attitude_rates.append((ts_curr, rate_mag))

    accel_excess = tuple(
        (ts, abs(total_g - _REST_TOTAL_G)) for ts, total_g in samples.accel
    )

    accel_event = _find_sustained_event(
        accel_excess,
        threshold=config.takeoff_accel_threshold_g,
        sustained_ns=sustained_ns,
    )
    attitude_event = _find_sustained_event(
        tuple(attitude_rates),
        threshold=config.takeoff_attitude_rate_threshold_rad_s,
        sustained_ns=sustained_ns,
    )

    if accel_event is None and attitude_event is None:
        # Neither signal crossed; best we can do is flag "no clear
        # take-off" so the coordinator can WARN and continue with the
        # tlog start as a fallback origin.
        first_ns = samples.accel[0][0]
        return _DetectorResult(onset_ns=first_ns, confidence=0.0)

    if accel_event is not None and attitude_event is not None:
        # Both signals fired — they should both point at the same
        # event. We adopt the EARLIER of the two onsets so the offset
        # is referenced against the moment thrust began (the attitude
        # body-rate spike usually trails the thrust by a few hundred
        # ms during a vertical climb).
        onset_ns = min(accel_event[0], attitude_event[0])
        # Confidence is the weakest of the two signals, scaled by
        # how cleanly they agree. We keep it simple: min().
        confidence = min(accel_event[1], attitude_event[1])
    elif accel_event is not None:
        # Only the accel signal — discount confidence so the
        # combined offset eventually trips the WARN-and-proceed
        # threshold (combined_confidence < 0.80 → AC-6).
        onset_ns, raw_conf = accel_event
        confidence = raw_conf * 0.6
    else:
        # Only attitude rate — same rationale as above. The
        # mypy-narrowing else covers attitude_event is not None.
        assert attitude_event is not None
        onset_ns, raw_conf = attitude_event
        confidence = raw_conf * 0.6

    return _DetectorResult(onset_ns=onset_ns, confidence=confidence)


def _compute_video_onset_from_samples(
    flow_samples: tuple[tuple[int, float], ...],
    config: AutoSyncConfig,
) -> _DetectorResult:
    """Pure detector: turn pre-computed optical-flow magnitudes into a result.

    Algorithm: find the first sustained window where the flow
    magnitude exceeds the configured threshold for at least
    ``sustained_seconds``. Confidence = sustained ratio.
    """
    if not flow_samples:
        return _DetectorResult(onset_ns=0, confidence=0.0)
    sustained_ns = int(config.sustained_seconds * 1_000_000_000)
    event = _find_sustained_event(
        flow_samples,
        threshold=config.video_motion_threshold,
        sustained_ns=sustained_ns,
    )
    if event is None:
        return _DetectorResult(onset_ns=flow_samples[0][0], confidence=0.0)
    onset_ns, confidence = event
    return _DetectorResult(onset_ns=onset_ns, confidence=confidence)


def _find_sustained_event(
    samples: tuple[tuple[int, float], ...] | list[tuple[int, float]],
    *,
    threshold: float,
    sustained_ns: int,
) -> tuple[int, float] | None:
    """Sliding-window scan: return ``(start_ns, ratio)`` of the
    earliest window where the fraction of samples above
    ``threshold`` is maximised, provided that fraction is ≥ 0.5
    (signal-vs-noise floor) and the window covers at least 80 % of
    ``sustained_ns`` (guards against truncated windows at the tail).

    Returns ``None`` when no qualifying window exists.
    """
    seq = list(samples)
    n = len(seq)
    if n < 2:
        return None
    best_start_ns: int | None = None
    best_ratio = 0.0
    min_window_ns = int(sustained_ns * 0.8)
    for i in range(n):
        start_ns = seq[i][0]
        end_ns = start_ns + sustained_ns
        # Walk j forward while still inside the window.
        j = i
        above = 0
        while j < n and seq[j][0] <= end_ns:
            if seq[j][1] > threshold:
                above += 1
            j += 1
        window_size = j - i
        if window_size < 2:
            continue
        window_dur_ns = seq[j - 1][0] - start_ns
        if window_dur_ns < min_window_ns:
            continue
        ratio = above / window_size
        if ratio > best_ratio:
            best_ratio = ratio
            best_start_ns = start_ns
    if best_start_ns is None or best_ratio < 0.5:
        return None
    return (best_start_ns, best_ratio)


def _wrap_pi(angle_rad: float) -> float:
    """Wrap an angle delta into ``(-π, π]`` to handle yaw wrap-around."""
    twopi = 2.0 * math.pi
    a = angle_rad % twopi
    if a > math.pi:
        a -= twopi
    return a


# ---------------------------------------------------------------------
# Disk-reading wrappers (production paths)


_REQUIRED_TLOG_TYPES: tuple[str, ...] = (
    "RAW_IMU",
    "SCALED_IMU2",
    "ATTITUDE",
)


def _load_tlog_samples(
    tlog_path: Path,
    max_messages: int,
    *,
    source_factory: Callable[[str], Any] | None,
) -> TlogSamples:
    """Stream the tlog head, capture IMU + ATTITUDE samples.

    Mirrors the AZ-399 source-factory test pattern: production builds
    use ``pymavlink`` lazily; tests pass an in-memory fake.
    """
    source = _open_tlog(tlog_path, source_factory=source_factory)
    accel: list[tuple[int, float]] = []
    attitude: list[tuple[int, float, float, float]] = []
    counts: dict[str, int] = {}
    try:
        for _ in range(max_messages):
            try:
                msg = source.recv_match(
                    type=list(_REQUIRED_TLOG_TYPES),
                    blocking=False,
                )
            except Exception as exc:  # pragma: no cover — defensive.
                raise ReplayInputAdapterError(
                    f"tlog scan failed on {tlog_path}: {exc!r}"
                ) from exc
            if msg is None:
                break
            msg_type = _safe_msg_type(msg)
            if not msg_type:
                continue
            counts[msg_type] = counts.get(msg_type, 0) + 1
            ts_ns = _msg_timestamp_ns(msg)
            if msg_type in ("RAW_IMU", "SCALED_IMU2"):
                xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
                ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
                za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
                total_g = math.sqrt(xa * xa + ya * ya + za * za)
                accel.append((ts_ns, total_g))
            elif msg_type == "ATTITUDE":
                roll = float(getattr(msg, "roll", 0.0))
                pitch = float(getattr(msg, "pitch", 0.0))
                yaw = float(getattr(msg, "yaw", 0.0))
                attitude.append((ts_ns, roll, pitch, yaw))
    finally:
        if hasattr(source, "close"):
            try:
                source.close()
            except Exception:  # pragma: no cover — defensive.
                pass
    return TlogSamples(
        accel=tuple(accel),
        attitude=tuple(attitude),
        imu_count_by_type=counts,
    )


def _open_tlog(
    tlog_path: Path,
    *,
    source_factory: Callable[[str], Any] | None,
) -> Any:
    if source_factory is not None:
        return source_factory(str(tlog_path))
    try:
        from pymavlink import mavutil  # type: ignore[import-not-found]
    except ImportError as exc:
        raise ReplayInputAdapterError(
            "pymavlink is required for replay auto-sync but is not "
            "importable in this binary"
        ) from exc
    return mavutil.mavlink_connection(
        str(tlog_path),
        dialect="ardupilotmega",
        mavlink_version="2.0",
    )


def _safe_msg_type(msg: Any) -> str:
    try:
        if hasattr(msg, "get_type"):
            return str(msg.get_type())
    except Exception:
        return ""
    return type(msg).__name__


def _msg_timestamp_ns(msg: Any) -> int:
    raw = getattr(msg, "_timestamp", None)
    if raw is None:
        raise ReplayInputAdapterError(
            "tlog message missing _timestamp attribute; pymavlink "
            "mavlogfile should populate it on every recv_match() return"
        )
    return int(float(raw) * 1_000_000_000)


def _read_video_frames(
    video_path: Path,
    scan_seconds: float,
) -> Iterable[tuple[int, "np.ndarray"]]:
    """Decode the leading ``scan_seconds`` of the video.

    Yields ``(monotonic_ns, frame_bgr)`` tuples where ``monotonic_ns``
    is the file's per-frame ``CAP_PROP_POS_MSEC × 1e6`` so the
    returned timestamps align with what
    :class:`VideoFileFrameSource` will report later. The Python
    ``time.monotonic_ns()`` is NOT used — the auto-sync result has to
    be deterministic across runs (AC-10) and tied to the video
    timeline.
    """
    try:
        import cv2 as _cv2  # type: ignore[import-not-found]
    except ImportError as exc:
        raise ReplayInputAdapterError(
            "opencv-python is required for replay auto-sync but is "
            "not importable in this binary"
        ) from exc
    capture = _cv2.VideoCapture(str(video_path))
    if not capture.isOpened():
        capture.release()
        raise ReplayInputAdapterError(
            f"video file unreadable / unsupported codec: {video_path}"
        )
    try:
        max_pos_ms = scan_seconds * 1000.0
        while True:
            ok, frame = capture.read()
            if not ok or frame is None:
                break
            pos_ms = float(capture.get(_cv2.CAP_PROP_POS_MSEC))
            if pos_ms > max_pos_ms:
                break
            ts_ns = int(pos_ms * 1_000_000)
            yield ts_ns, frame
    finally:
        capture.release()


def _compute_flow_magnitudes(
    frames: list[tuple[int, "np.ndarray"]],
) -> tuple[tuple[int, float], ...]:
    """Pairwise mean optical-flow magnitude between consecutive frames.

    Uses Farneback dense flow (``cv2.calcOpticalFlowFarneback``)
    rather than pyramidal LK because Farneback returns a flow field
    over the whole image with no per-frame feature-tracking state, so
    the result is deterministic given the same input frames (AC-10).

    Returns ``((ts_ns_of_second_frame, mean_magnitude_px), ...)``.
    """
    try:
        import cv2 as _cv2  # type: ignore[import-not-found]
        import numpy as _np  # type: ignore[import-not-found]
    except ImportError as exc:  # pragma: no cover — guarded at call sites.
        raise ReplayInputAdapterError(
            "opencv-python + numpy are required for replay auto-sync"
        ) from exc
    if len(frames) < 2:
        return ()
    # Convert all frames to grayscale once up-front so the per-pair
    # cost is dominated by the optical-flow computation itself.
    gray_frames = []
    for ts_ns, frame in frames:
        gray = _cv2.cvtColor(frame, _cv2.COLOR_BGR2GRAY)
        gray_frames.append((ts_ns, gray))
    out: list[tuple[int, float]] = []
    for i in range(1, len(gray_frames)):
        prev_ts, prev = gray_frames[i - 1]
        curr_ts, curr = gray_frames[i]
        flow = _cv2.calcOpticalFlowFarneback(
            prev,
            curr,
            None,
            pyr_scale=0.5,
            levels=3,
            winsize=15,
            iterations=3,
            poly_n=5,
            poly_sigma=1.2,
            flags=0,
        )
        # ``flow`` shape: (H, W, 2) — dx + dy per pixel.
        magnitudes = _np.sqrt(flow[..., 0] ** 2 + flow[..., 1] ** 2)
        mean_mag = float(magnitudes.mean())
        out.append((curr_ts, mean_mag))
    return tuple(out)


# Re-export the BUILD-flag check for symmetry with other replay modules.
def _build_flag_on(name: str) -> bool:
    raw = os.environ.get(name, "")
    return raw.strip().lower() in {"on", "1", "true", "yes"}


# ---------------------------------------------------------------------
# AZ-698 — mid-flight cross-correlation aligner
#
# The AZ-405 head-takeoff detector only works when the video covers
# the take-off moment. For mid-flight slices (e.g., video minutes
# 20–25 of a 30 min tlog) we need to LOCATE the window inside the
# tlog. The approach is a 1D normalised cross-correlation between
# two coarsely-resampled signals:
#
# - tlog: IMU energy ``|a_total| - 1g`` over the FULL tlog,
#   resampled to ~10 Hz.
# - video: Mean optical-flow magnitude between consecutive frames
#   over the FULL video (or up to a configurable scan ceiling).
#
# Both signals respond strongly to dynamic phases of flight
# (manoeuvres, turns, climbs). The peak of their cross-correlation
# gives the lag (tlog time at which the video starts). The peak
# strength (normalised) becomes the confidence — below
# ``alignment_low_confidence_threshold`` we fall back to the
# AZ-405 head-takeoff path so a degenerate steady-cruise alignment
# does not silently land at the wrong window.


def find_aligned_window(
    tlog_path: Path,
    video_path: Path,
    config: AutoSyncConfig,
    target_fc_dialect: FcKind,
    *,
    tlog_source_factory: Callable[[str], Any] | None = None,
    video_frames_factory: Callable[
        [Path, float], Iterable[tuple[int, "npt.NDArray[np.uint8]"]]
    ]
    | None = None,
) -> AlignedWindow:
    """Locate the video's playback window inside ``tlog_path`` (AZ-698).

    Args:
        tlog_path: Binary ArduPilot tlog. The whole file is read up
            to :attr:`AutoSyncConfig.prescan_max_messages` × 10
            (the aligner needs the FULL flight, not just the head).
        video_path: Mp4 / mkv input. The leading
            :attr:`AutoSyncConfig.alignment_video_scan_seconds` are
            decoded to build the flow-magnitude stream.
        config: Operator-tunable thresholds.
        target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV`` — same
            parity contract as :func:`detect_tlog_takeoff`.
        tlog_source_factory: Test injection — replaces the
            ``pymavlink`` open call.
        video_frames_factory: Test injection — replaces
            ``cv2.VideoCapture`` frame iteration.

    Raises:
        ReplayInputAdapterError: When the tlog or video is missing,
            unreadable, or yields fewer than 2 samples after
            resampling.

    Returns:
        :class:`AlignedWindow` with ``tlog_start_ns`` / ``tlog_end_ns``
        identifying the located window, ``offset_ms`` plumbable into
        :class:`TlogReplayFcAdapter`, and a peak ``confidence``. When
        confidence falls below
        :attr:`AutoSyncConfig.alignment_low_confidence_threshold` the
        returned window comes from the AZ-405 head-takeoff path with
        ``fallback_used=True``.
    """
    if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
        raise ReplayInputAdapterError(
            f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
        )
    if not tlog_path.is_file():
        raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
    if not video_path.is_file():
        raise ReplayInputAdapterError(f"video file not found: {video_path}")

    tlog_energy = _load_tlog_imu_energy_stream(
        tlog_path,
        max_messages=config.prescan_max_messages * 10,
        source_factory=tlog_source_factory,
    )
    if len(tlog_energy) < 2:
        raise ReplayInputAdapterError(
            f"tlog yielded {len(tlog_energy)} IMU sample(s); "
            "need ≥ 2 for cross-correlation alignment"
        )

    if video_frames_factory is None:
        frames = list(
            _read_video_frames(video_path, config.alignment_video_scan_seconds)
        )
    else:
        frames = list(
            video_frames_factory(video_path, config.alignment_video_scan_seconds)
        )
    if len(frames) < 2:
        raise ReplayInputAdapterError(
            f"video yielded {len(frames)} frame(s); "
            "need ≥ 2 for cross-correlation alignment"
        )
    flow_samples = _compute_flow_magnitudes(frames)
    if len(flow_samples) < 2:
        raise ReplayInputAdapterError(
            f"video produced {len(flow_samples)} flow sample(s); "
            "need ≥ 2 for cross-correlation alignment"
        )

    return _align_via_cross_correlation(
        tlog_energy=tlog_energy,
        flow_samples=flow_samples,
        config=config,
        target_fc_dialect=target_fc_dialect,
        tlog_path=tlog_path,
        tlog_source_factory=tlog_source_factory,
    )


def _align_via_cross_correlation(
    *,
    tlog_energy: tuple[tuple[int, float], ...],
    flow_samples: tuple[tuple[int, float], ...],
    config: AutoSyncConfig,
    target_fc_dialect: FcKind,
    tlog_path: Path,
    tlog_source_factory: Callable[[str], Any] | None,
) -> AlignedWindow:
    """Pure compute kernel: turn pre-loaded streams into an :class:`AlignedWindow`.

    Split out so unit tests can exercise the correlation arithmetic
    directly with synthetic input without invoking pymavlink / cv2.
    """
    import numpy as _np

    resample_hz = max(config.alignment_resample_hz, 1.0)
    period_ns = int(1_000_000_000 / resample_hz)

    tlog_origin_ns = tlog_energy[0][0]
    tlog_resampled = _resample_uniform(tlog_energy, period_ns, tlog_origin_ns)
    if len(tlog_resampled) < 2:
        raise ReplayInputAdapterError(
            "tlog resampled stream has < 2 samples; cannot cross-correlate"
        )

    video_origin_ns = flow_samples[0][0]
    flow_resampled = _resample_uniform(flow_samples, period_ns, video_origin_ns)
    if len(flow_resampled) < 2:
        raise ReplayInputAdapterError(
            "video flow stream has < 2 samples; cannot cross-correlate"
        )
    if len(flow_resampled) > len(tlog_resampled):
        raise ReplayInputAdapterError(
            "video flow stream is longer than the tlog energy stream; "
            "auto-trim requires the video to be a slice of a longer tlog"
        )

    tlog_arr = _np.asarray(tlog_resampled, dtype=_np.float64)
    flow_arr = _np.asarray(flow_resampled, dtype=_np.float64)
    flow_centred = _zero_mean_normalise(flow_arr)
    if _np.linalg.norm(flow_centred) == 0.0:
        # Flat video → no information for correlation. Force the
        # fallback path; confidence reported as 0.
        peak_idx = 0
        confidence = 0.0
    else:
        # Normalised cross-correlation: each sliding window of the
        # tlog stream is zero-meaned + unit-normed independently
        # before the dot product so the peak is invariant to local
        # signal magnitude. Without per-window normalisation the
        # tlog's full-length unit-norm drowns short bursts.
        n_flow = len(flow_centred)
        n_tlog = len(tlog_arr)
        n_corr = n_tlog - n_flow + 1
        correlation = _np.zeros(n_corr, dtype=_np.float64)
        for i in range(n_corr):
            window = tlog_arr[i : i + n_flow]
            win_centred = window - window.mean()
            win_norm = float(_np.linalg.norm(win_centred))
            if win_norm > 0.0:
                correlation[i] = float(_np.dot(win_centred / win_norm, flow_centred))
        peak_idx = int(_np.argmax(correlation))
        confidence = max(0.0, min(1.0, float(correlation[peak_idx])))

    video_duration_ns = _stream_duration_ns(flow_samples)
    if confidence < config.alignment_low_confidence_threshold:
        return _fallback_to_head_takeoff(
            tlog_path=tlog_path,
            tlog_source_factory=tlog_source_factory,
            target_fc_dialect=target_fc_dialect,
            config=config,
            tlog_energy=tlog_energy,
            video_origin_ns=video_origin_ns,
            video_flow_duration_ns=video_duration_ns,
            confidence=confidence,
        )

    # Absolute tlog timeline value where video t=0 aligns. The
    # adapter's seek check compares this against the raw pymavlink
    # ``msg._timestamp`` so the value MUST be on the tlog timeline,
    # NOT a delta.
    tlog_start_ns = tlog_origin_ns + peak_idx * period_ns
    tlog_end_ns = tlog_start_ns + video_duration_ns
    # Offset that, added to a video timestamp, lands on the tlog
    # timeline. Matches ``AutoSyncDecision.offset_ms`` semantics
    # (``validate_offset_or_fail`` does ``vts + offset_ns``).
    offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
    return AlignedWindow(
        tlog_start_ns=tlog_start_ns,
        tlog_end_ns=tlog_end_ns,
        offset_ms=offset_ms,
        confidence=confidence,
        fallback_used=False,
    )


def _stream_duration_ns(
    samples: tuple[tuple[int, float], ...],
) -> int:
    if not samples:
        return 0
    return samples[-1][0] - samples[0][0]


def _fallback_to_head_takeoff(
    *,
    tlog_path: Path,
    tlog_source_factory: Callable[[str], Any] | None,
    target_fc_dialect: FcKind,
    config: AutoSyncConfig,
    tlog_energy: tuple[tuple[int, float], ...],
    video_origin_ns: int,
    video_flow_duration_ns: int,
    confidence: float,
) -> AlignedWindow:
    """Low-confidence path: use AZ-405 head-takeoff detector.

    Returns an :class:`AlignedWindow` whose ``offset_ms`` and
    ``tlog_start_ns`` come from the takeoff onset; ``fallback_used``
    is ``True`` so callers + FDR audit can record the divergence.
    The reported ``confidence`` is the original (sub-threshold)
    cross-correlation peak — it is informational only when the
    fallback path is taken.
    """
    takeoff = detect_tlog_takeoff(
        tlog_path,
        target_fc_dialect,
        config,
        source_factory=tlog_source_factory,
    )
    if takeoff.confidence > 0.0:
        tlog_start_ns = takeoff.onset_ns
    elif tlog_energy:
        tlog_start_ns = tlog_energy[0][0]
    else:
        tlog_start_ns = 0
    tlog_end_ns = tlog_start_ns + video_flow_duration_ns
    offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
    return AlignedWindow(
        tlog_start_ns=tlog_start_ns,
        tlog_end_ns=tlog_end_ns,
        offset_ms=offset_ms,
        confidence=confidence,
        fallback_used=True,
    )


def _resample_uniform(
    samples: tuple[tuple[int, float], ...],
    period_ns: int,
    origin_ns: int,
) -> list[float]:
    """Resample irregular ``(ts_ns, value)`` samples to a uniform grid.

    Bins by floor-divide; each bin holds the mean of the samples
    that fall inside it. Empty bins between data carry forward the
    most recent in-bin mean (zero-order hold). Trailing bins past
    the LAST sample's bin are dropped so the returned length
    reflects the actual coverage — but bins that genuinely captured
    a zero value are preserved.
    """
    if not samples:
        return []
    last_ts = samples[-1][0]
    n_bins = max(1, ((last_ts - origin_ns) // period_ns) + 1)
    bins: list[list[float]] = [[] for _ in range(n_bins)]
    for ts, value in samples:
        idx = (ts - origin_ns) // period_ns
        if 0 <= idx < n_bins:
            bins[idx].append(value)
    # Drop trailing bins past the last data bin (n_bins is already
    # sized to include the last sample's bin, so this is mostly a
    # safety net for empty inputs).
    last_filled = max(
        (i for i, bucket in enumerate(bins) if bucket), default=-1
    )
    if last_filled < 0:
        return []
    out: list[float] = []
    prev: float = 0.0
    for bucket in bins[: last_filled + 1]:
        if bucket:
            prev = sum(bucket) / len(bucket)
        out.append(prev)
    return out


def _zero_mean_normalise(
    arr: "npt.NDArray[np.float64]",
) -> "npt.NDArray[np.float64]":
    import numpy as _np

    centred: "npt.NDArray[np.float64]" = arr - arr.mean()
    norm = float(_np.linalg.norm(centred))
    if norm == 0.0:
        return centred
    result: "npt.NDArray[np.float64]" = centred / norm
    return result


def _load_tlog_imu_energy_stream(
    tlog_path: Path,
    *,
    max_messages: int,
    source_factory: Callable[[str], Any] | None,
) -> tuple[tuple[int, float], ...]:
    """Walk the WHOLE tlog (up to ``max_messages``) for IMU energy samples.

    Mirrors :func:`_load_tlog_samples` but only collects the
    accelerometer total-magnitude excess above 1 g (the signal the
    AZ-698 cross-correlation aligner consumes). The ATTITUDE channel
    is not needed here.
    """
    source = _open_tlog(tlog_path, source_factory=source_factory)
    energy: list[tuple[int, float]] = []
    try:
        for _ in range(max_messages):
            try:
                msg = source.recv_match(
                    type=["RAW_IMU", "SCALED_IMU2"],
                    blocking=False,
                )
            except Exception as exc:  # pragma: no cover — defensive.
                raise ReplayInputAdapterError(
                    f"tlog scan failed on {tlog_path}: {exc!r}"
                ) from exc
            if msg is None:
                break
            ts_ns = _msg_timestamp_ns(msg)
            xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
            ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
            za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
            total_g = math.sqrt(xa * xa + ya * ya + za * za)
            energy.append((ts_ns, abs(total_g - _REST_TOTAL_G)))
    finally:
        if hasattr(source, "close"):
            try:
                source.close()
            except Exception:  # pragma: no cover — defensive.
                pass
    return tuple(energy)