"""Helpers shared by the AZ-404 E2E replay tests.

* :func:`parse_jsonl` — read the ``JsonlReplaySink`` output into a list
  of dicts with one entry per emit.
* :func:`l2_horizontal_m` — WGS84-aware L2 horizontal distance between
  two ``(lat, lon)`` pairs in metres.
* :func:`match_percentage` — share of estimator emissions whose
  L2 distance to the closest ground-truth row is within a threshold.
* :class:`CapturingMavlinkTransport` — test-only ``MavlinkTransport``
  impl that records every ``write`` so AC-4b can compare the byte
  streams produced by ``compose_root(config_live)`` vs.
  ``compose_root(config_replay)``.
* :func:`load_ground_truth_csv` — the IMU CSV's ``GLOBAL_POSITION_INT``
  columns ARE the AC-3 reference (the original tlog's GPS rows
  exported to CSV); this helper materialises them.

All functions are pure / deterministic and stay safely importable on
dev macOS without ``RUN_REPLAY_E2E``; the regular regression suite
calls them via the unit-level helper test in this module's sibling
``test_helpers.py``.
"""

from __future__ import annotations

import csv
import json
import math
from dataclasses import dataclass
from pathlib import Path
from typing import Any

__all__ = [
    "CapturingMavlinkTransport",
    "GroundTruthRow",
    "l2_horizontal_m",
    "load_ground_truth_csv",
    "match_percentage",
    "parse_jsonl",
]


# WGS84 mean Earth radius. Matches the value used by
# `helpers/wgs_converter.py` (AZ-279) so the e2e check is consistent
# with the production converter.
_EARTH_RADIUS_M: float = 6_371_008.8


@dataclass(frozen=True)
class GroundTruthRow:
    """One row from the Derkachi data_imu.csv ground-truth slice."""

    t_s: float
    lat_deg: float
    lon_deg: float
    alt_m: float


def parse_jsonl(path: Path) -> list[dict[str, Any]]:
    """Return one dict per line of a JsonlReplaySink output file.

    Empty trailing lines are tolerated (orjson always terminates with
    ``\\n`` so the last newline is followed by ``""``); other empty
    lines indicate a corrupt file and surface as a JSON decode error.
    """
    records: list[dict[str, Any]] = []
    with path.open(encoding="utf-8") as fp:
        for lineno, line in enumerate(fp, start=1):
            stripped = line.rstrip("\n")
            if not stripped:
                continue
            try:
                records.append(json.loads(stripped))
            except json.JSONDecodeError as exc:
                raise AssertionError(
                    f"line {lineno} in {path} is not valid JSON: {exc.msg!r}"
                ) from exc
    return records


def l2_horizontal_m(
    lat1_deg: float, lon1_deg: float, lat2_deg: float, lon2_deg: float
) -> float:
    """WGS84-spherical great-circle distance in metres.

    Uses the haversine formula with the C5/AZ-279 mean Earth radius.
    Sufficient for the AC-3 ≤ 100 m threshold (sub-metre accuracy at
    the Derkachi latitude band; the spherical approximation diverges
    from the WGS84 ellipsoid by < 0.5 % at these latitudes — well
    within the AC-3 budget).
    """
    phi1 = math.radians(lat1_deg)
    phi2 = math.radians(lat2_deg)
    dphi = phi2 - phi1
    dlam = math.radians(lon2_deg - lon1_deg)
    a = (
        math.sin(dphi / 2.0) ** 2
        + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2.0) ** 2
    )
    c = 2.0 * math.asin(min(1.0, math.sqrt(a)))
    return _EARTH_RADIUS_M * c


def load_ground_truth_csv(csv_path: Path) -> list[GroundTruthRow]:
    """Load the Derkachi IMU CSV's GPS rows as ground truth.

    The original ``flight_derkachi.tlog``'s ``GLOBAL_POSITION_INT``
    messages were exported to ``data_imu.csv``; the ``lat / lon /
    alt`` columns are degrees * 1e7 / metres * 1e3 (mavlink integer
    encoding), so we divide accordingly.
    """
    rows: list[GroundTruthRow] = []
    with csv_path.open(newline="") as fp:
        reader = csv.DictReader(fp)
        for r in reader:
            rows.append(
                GroundTruthRow(
                    t_s=float(r["Time"]),
                    lat_deg=float(r["GLOBAL_POSITION_INT.lat"]) / 1e7,
                    lon_deg=float(r["GLOBAL_POSITION_INT.lon"]) / 1e7,
                    alt_m=float(r["GLOBAL_POSITION_INT.alt"]) / 1e3,
                )
            )
    return rows


def match_percentage(
    emissions: list[dict[str, Any]],
    ground_truth: list[GroundTruthRow],
    *,
    threshold_m: float,
) -> float:
    """Share of emissions within ``threshold_m`` of the closest GT row.

    For each emitted ``EstimatorOutput`` JSONL record, find the
    nearest-in-time ground-truth row, compute the horizontal L2
    distance, and count it as a hit when ≤ ``threshold_m``. Returns
    the hit ratio in [0.0, 1.0].

    Nearest-in-time is sufficient because the IMU CSV's 10 Hz cadence
    (matching the C5 emit rate) means the candidate row is typically
    < 50 ms off the emit timestamp — well below the AC-3 100 m budget.
    """
    if not emissions:
        return 0.0
    if not ground_truth:
        raise AssertionError("ground_truth must be non-empty")
    gt_sorted = sorted(ground_truth, key=lambda r: r.t_s)
    gt_times = [r.t_s for r in gt_sorted]
    hits = 0
    for emit in emissions:
        emit_ts_ns = int(emit["emitted_at"])
        emit_t_s = emit_ts_ns / 1e9
        idx = _bisect_left(gt_times, emit_t_s)
        candidates = []
        if idx > 0:
            candidates.append(gt_sorted[idx - 1])
        if idx < len(gt_sorted):
            candidates.append(gt_sorted[idx])
        # Nearest-in-time row.
        nearest = min(candidates, key=lambda r: abs(r.t_s - emit_t_s))
        emit_pos = emit["position_wgs84"]
        d = l2_horizontal_m(
            emit_pos["lat_deg"],
            emit_pos["lon_deg"],
            nearest.lat_deg,
            nearest.lon_deg,
        )
        if d <= threshold_m:
            hits += 1
    return hits / len(emissions)


def _bisect_left(seq: list[float], target: float) -> int:
    """Stdlib bisect_left, inlined to keep import surface narrow."""
    lo, hi = 0, len(seq)
    while lo < hi:
        mid = (lo + hi) // 2
        if seq[mid] < target:
            lo = mid + 1
        else:
            hi = mid
    return lo


class CapturingMavlinkTransport:
    """Test-only :class:`MavlinkTransport` that records every write.

    Used by AZ-404 AC-4b: capture the byte streams produced by
    ``compose_root(config_live).c8.emit_external_position(out)`` and
    ``compose_root(config_replay).c8.emit_external_position(out)`` to
    assert byte-identity per replay protocol Invariant 5.

    NOTE: AC-4b is currently SKIPPED (blocked on AZ-558 — the C8
    encoders still bypass the ``MavlinkTransport`` seam by calling
    ``mav.*_send`` directly). This class is in place so the test
    fixture is ready the moment AZ-558 lands.
    """

    def __init__(self) -> None:
        self._chunks: list[bytes] = []
        self._closed = False

    def write(self, payload: bytes) -> int:
        if self._closed:
            raise RuntimeError("CapturingMavlinkTransport.write after close")
        self._chunks.append(bytes(payload))
        return len(payload)

    def bytes_written(self) -> int:
        return sum(len(c) for c in self._chunks)

    def close(self) -> None:
        self._closed = True

    @property
    def captured_payloads(self) -> tuple[bytes, ...]:
        """Tuple of every payload passed to :meth:`write`, in order."""
        return tuple(self._chunks)

    @property
    def captured_concat(self) -> bytes:
        """All captured payloads concatenated — the wire-byte stream."""
        return b"".join(self._chunks)