Files
gps-denied-onboard/tests/unit/replay_input/test_csv_ground_truth.py
T
Oleksandr Bezdieniezhnykh 6be207cef3 [AZ-894] [AZ-896] Add CSV-driven replay adapter + format docs
Replaces the tlog two-clock replay surface with a single-clock path
driven by the Derkachi-schema CSV. --imu is the new required CLI arg;
--tlog stays as a deprecated alias (warned + ignored when --imu set)
until AZ-895 deletes it.

* csv_ground_truth.py parses the 15-column schema, fails fast at
  startup on every documented schema fault (AC-5).
* CsvReplayFcAdapter slots into ReplayInputBundle.fc_adapter alongside
  the tlog sibling; mirrors Invariant-5 outbound wiring; inbound bus is
  intentionally a no-op since the loop reads CSV directly.
* _run_replay_loop branches on imu_csv_path, stamps
  VioOutput.emitted_at_ns from the CSV-derived frame_end_ns (AC-4),
  closing the AZ-848 two-clock surface for the new path.
* AZ-896 ships the operator-facing format spec at
  _docs/02_document/contracts/replay/csv_replay_format.md plus a
  20-row example CSV (AC-3 regression-locked).

Tests: 11 + 12 new unit tests, plus updates to AZ-401 import-boundary
and AZ-402 CLI suites. Full unit suite 2,327 passed / 86 skipped.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-26 18:40:29 +03:00

288 lines
8.5 KiB
Python

"""AZ-894 — CSV-driven IMU + GPS ground-truth extractor.
Covers AC-1 (parses 4,899 IMU + 4,899 GPS samples on a single monotonic
clock) and AC-5 (clear ``ReplayInputAdapterError`` at startup for schema
faults) of ``_docs/02_tasks/todo/AZ-894_csv_driven_replay_adapter.md``.
The happy-path test is gated on the committed Derkachi fixture
(``_docs/00_problem/input_data/flight_derkachi/data_imu.csv``, 4,899
rows + header). Schema-fault tests use synthetic CSV strings written
to ``tmp_path`` so they remain deterministic and do not depend on the
fixture being present.
Style: every test follows the Arrange / Act / Assert pattern.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from gps_denied_onboard.replay_input.csv_ground_truth import (
CSV_SOURCE_LABEL,
REQUIRED_COLUMNS,
load_csv_ground_truth,
)
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
_DERKACHI_CSV: Path = (
Path(__file__).resolve().parents[3]
/ "_docs"
/ "00_problem"
/ "input_data"
/ "flight_derkachi"
/ "data_imu.csv"
)
_EXAMPLE_CSV: Path = (
Path(__file__).resolve().parents[3]
/ "_docs"
/ "02_document"
/ "contracts"
/ "replay"
/ "example_data_imu.csv"
)
# ---------------------------------------------------------------------
# Header + minimal-row helpers
def _write_csv(path: Path, header: str, rows: list[str]) -> Path:
path.write_text(header + "\n" + "\n".join(rows) + "\n", encoding="utf-8")
return path
def _full_header() -> str:
return ",".join(REQUIRED_COLUMNS)
def _row(time_s: float, *, prefix_ms: float = 0.0) -> str:
# 15 fields total matching REQUIRED_COLUMNS ordering. Values are
# picked to be valid floats; the exact magnitudes do not matter
# for these tests (the loader only validates parseability + range).
fields = [
str(prefix_ms),
str(time_s),
"10",
"-3",
"-980",
"50",
"30",
"-5",
"50.0809634",
"36.1115442",
"141290",
"-4",
"-6",
"-88",
"35041",
]
return ",".join(fields)
# ---------------------------------------------------------------------
# AC-1: happy path on the real Derkachi CSV
@pytest.mark.skipif(
not _DERKACHI_CSV.is_file(),
reason="Derkachi fixture data_imu.csv not present",
)
def test_ac1_loads_derkachi_csv_emits_paired_samples() -> None:
# Arrange — committed fixture path; nothing to set up.
# Note: AZ-894 spec mentions "4,899 samples"; the actual fixture
# spans Time=0.0..489.9 s in 0.1 s steps → 4,900 rows. We pin the
# concrete count so the test catches truncation, plus the
# span-derived invariant so future fixtures with a different
# length still pass for the right reason.
expected_count = 4900
# Act
gt = load_csv_ground_truth(_DERKACHI_CSV)
# Assert
assert gt.source == CSV_SOURCE_LABEL
assert len(gt.records) == expected_count
assert len(gt.imu_samples) == expected_count
# First row of the fixture has Time=0; last is 489.9 s (10 Hz).
assert gt.records[0].ts_ns == 0
assert gt.records[-1].ts_ns == int(489.9 * 1e9)
# IMU samples share the same canonical clock as the GPS records.
for gps, imu in zip(gt.records, gt.imu_samples, strict=True):
assert gps.ts_ns == imu.ts_ns
# ---------------------------------------------------------------------
# AZ-896 AC-3: the shipped example CSV stays parser-clean
@pytest.mark.skipif(
not _EXAMPLE_CSV.is_file(),
reason="AZ-896 example_data_imu.csv not present",
)
def test_az896_example_csv_loads_clean() -> None:
# Arrange — committed AZ-896 example; nothing to set up.
# Act
gt = load_csv_ground_truth(_EXAMPLE_CSV)
# Assert
assert gt.source == CSV_SOURCE_LABEL
assert len(gt.records) >= 10
assert len(gt.records) == len(gt.imu_samples)
assert gt.records[0].ts_ns == 0
# ---------------------------------------------------------------------
# AC-1 (small fixture): paired-sample invariants
def test_paired_imu_and_gps_share_clock(tmp_path: Path) -> None:
# Arrange
csv = _write_csv(
tmp_path / "ok.csv",
_full_header(),
[
_row(0.0, prefix_ms=4551116.348),
_row(0.1, prefix_ms=4551216.348),
_row(0.2, prefix_ms=4551316.348),
],
)
# Act
gt = load_csv_ground_truth(csv)
# Assert
assert len(gt.records) == 3 and len(gt.imu_samples) == 3
expected_ns = [0, 100_000_000, 200_000_000]
assert [r.ts_ns for r in gt.records] == expected_ns
assert [s.ts_ns for s in gt.imu_samples] == expected_ns
def test_gps_unit_conversion(tmp_path: Path) -> None:
# Arrange — values exercise the deg/mm/cm-s/cdeg conversions.
header = _full_header()
row = ",".join([
"0.0", "0.0",
"10", "-3", "-980", "50", "30", "-5", # IMU stays raw
"50.0809634", # lat already in degrees
"36.1115442", # lon already in degrees
"141290", # alt in mm → 141.290 m
"-400", # vx in cm/s → -4.0 m/s
"600", # vy in cm/s → 6.0 m/s
"-88", # vz in cm/s → -0.88 m/s
"35041", # hdg in cdeg → 350.41 deg
])
csv = _write_csv(tmp_path / "units.csv", header, [row])
# Act
gt = load_csv_ground_truth(csv)
# Assert
fix = gt.records[0]
assert fix.lat_deg == pytest.approx(50.0809634)
assert fix.lon_deg == pytest.approx(36.1115442)
assert fix.alt_m == pytest.approx(141.290)
assert fix.vx_m_s == pytest.approx(-4.0)
assert fix.vy_m_s == pytest.approx(6.0)
assert fix.vz_m_s == pytest.approx(-0.88)
assert fix.hdg_deg == pytest.approx(350.41)
# ---------------------------------------------------------------------
# AC-5: schema faults raise ReplayInputAdapterError at startup
def test_ac5_file_not_found_raises(tmp_path: Path) -> None:
# Arrange
missing = tmp_path / "absent.csv"
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="CSV file not found"):
load_csv_ground_truth(missing)
def test_ac5_missing_required_column_raises(tmp_path: Path) -> None:
# Arrange — drop one required column from the header.
bad_header = ",".join(c for c in REQUIRED_COLUMNS if c != "SCALED_IMU2.xacc")
csv = _write_csv(
tmp_path / "missing_col.csv",
bad_header,
["0,0,-3,-980,50,30,-5,50.0,36.0,141290,-4,-6,-88,35041"],
)
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="missing required columns"):
load_csv_ground_truth(csv)
def test_ac5_nan_in_time_raises(tmp_path: Path) -> None:
# Arrange
csv = _write_csv(
tmp_path / "nan_time.csv",
_full_header(),
[_row(0.0), _row(float("nan"))],
)
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="Time=.*is NaN/Inf"):
load_csv_ground_truth(csv)
def test_ac5_non_monotonic_time_raises(tmp_path: Path) -> None:
# Arrange
csv = _write_csv(
tmp_path / "non_monotonic.csv",
_full_header(),
[_row(0.1), _row(0.0)],
)
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="non-monotonic Time"):
load_csv_ground_truth(csv)
def test_ac5_repeated_time_also_non_monotonic(tmp_path: Path) -> None:
# Arrange — equal timestamps still violate strict monotonicity so
# the preintegrator never gets fed a zero-delta window.
csv = _write_csv(
tmp_path / "repeated.csv",
_full_header(),
[_row(0.0), _row(0.0)],
)
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="non-monotonic Time"):
load_csv_ground_truth(csv)
def test_ac5_non_numeric_imu_value_raises(tmp_path: Path) -> None:
# Arrange — substitute a non-parseable token in the IMU column.
row = ",".join([
"0.0", "0.0",
"not-a-number", # SCALED_IMU2.xacc
"-3", "-980", "50", "30", "-5",
"50.0", "36.0", "141290", "-4", "-6", "-88", "35041",
])
csv = _write_csv(tmp_path / "bad_imu.csv", _full_header(), [row])
# Act + Assert
with pytest.raises(
ReplayInputAdapterError,
match=r"SCALED_IMU2\.xacc=.*is not a number",
):
load_csv_ground_truth(csv)
def test_ac5_header_only_raises(tmp_path: Path) -> None:
# Arrange — header but no data rows.
csv = tmp_path / "header_only.csv"
csv.write_text(_full_header() + "\n", encoding="utf-8")
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="no data rows"):
load_csv_ground_truth(csv)