[AZ-894] [AZ-896] Add CSV-driven replay adapter + format docs

Replaces the tlog two-clock replay surface with a single-clock path
driven by the Derkachi-schema CSV. --imu is the new required CLI arg;
--tlog stays as a deprecated alias (warned + ignored when --imu set)
until AZ-895 deletes it.

* csv_ground_truth.py parses the 15-column schema, fails fast at
  startup on every documented schema fault (AC-5).
* CsvReplayFcAdapter slots into ReplayInputBundle.fc_adapter alongside
  the tlog sibling; mirrors Invariant-5 outbound wiring; inbound bus is
  intentionally a no-op since the loop reads CSV directly.
* _run_replay_loop branches on imu_csv_path, stamps
  VioOutput.emitted_at_ns from the CSV-derived frame_end_ns (AC-4),
  closing the AZ-848 two-clock surface for the new path.
* AZ-896 ships the operator-facing format spec at
  _docs/02_document/contracts/replay/csv_replay_format.md plus a
  20-row example CSV (AC-3 regression-locked).

Tests: 11 + 12 new unit tests, plus updates to AZ-401 import-boundary
and AZ-402 CLI suites. Full unit suite 2,327 passed / 86 skipped.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-26 18:40:29 +03:00
parent 3020779404
commit 6be207cef3
19 changed files with 1833 additions and 93 deletions
+10 -2
View File
@@ -69,6 +69,7 @@ class DerkachiReplayInputs:
video_path: Path
tlog_path: Path
imu_csv_path: Path
calibration_path: Path
config_path: Path
signing_key_path: Path
@@ -170,6 +171,7 @@ def derkachi_replay_inputs(tmp_path_factory: pytest.TempPathFactory) -> Derkachi
return DerkachiReplayInputs(
video_path=video_path,
tlog_path=tlog_path,
imu_csv_path=csv_path,
calibration_path=_calibration_path(),
config_path=config_path,
signing_key_path=signing_key_path,
@@ -241,8 +243,8 @@ def replay_runner(derkachi_replay_inputs: DerkachiReplayInputs) -> Any:
binary,
"--video",
str(derkachi_replay_inputs.video_path),
"--tlog",
str(derkachi_replay_inputs.tlog_path),
"--imu",
str(derkachi_replay_inputs.imu_csv_path),
"--output",
str(out_path),
"--camera-calibration",
@@ -254,6 +256,12 @@ def replay_runner(derkachi_replay_inputs: DerkachiReplayInputs) -> Any:
"--pace",
pace,
]
# --tlog is deprecated under AZ-894 but we still forward it
# when the synth tlog exists, so the legacy-path e2e tests
# (test_derkachi_real_tlog.py) keep exercising the deprecation
# warning until AZ-895 deletes the path entirely.
if derkachi_replay_inputs.tlog_path.is_file():
argv.extend(["--tlog", str(derkachi_replay_inputs.tlog_path)])
if time_offset_ms is not None:
argv.extend(["--time-offset-ms", str(time_offset_ms)])
if skip_auto_sync:
@@ -0,0 +1,249 @@
"""AZ-894 — ``CsvReplayFcAdapter`` unit tests.
Focused contract coverage for the thin :class:`FcAdapter` sibling that
backs the CSV-driven replay input. The functional inbound/outbound
plumbing the runtime loop relies on (CSV parsing, frame-stamped IMU
draining, ESKF cold-start origin) is exercised in
``tests/unit/replay_input/test_csv_ground_truth.py`` and the AZ-404
e2e harness; here we pin the Protocol surface (open/close idempotency,
build-flag refusal, source-set refusal, transport-less emit refusal)
so a refactor of the adapter cannot silently regress Invariant 5 or
the FcAdapter Protocol parity that the composition root depends on.
Style: every test follows the Arrange / Act / Assert pattern.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from gps_denied_onboard._types.fc import FcKind, FlightState, Severity
from gps_denied_onboard.components.c8_fc_adapter.csv_replay_adapter import (
CsvReplayFcAdapter,
)
from gps_denied_onboard.components.c8_fc_adapter.errors import (
FcAdapterConfigError,
FcEmitError,
FcOpenError,
SourceSetSwitchNotSupportedError,
)
from gps_denied_onboard.components.c8_fc_adapter.tlog_replay_adapter import (
ReplayPace,
)
@pytest.fixture(autouse=True)
def _build_flag_on(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("BUILD_CSV_REPLAY_ADAPTER", "ON")
class _FakeClock:
"""Minimal Clock stub returning a monotonic counter in ns."""
def __init__(self) -> None:
self._t = 0
def monotonic_ns(self) -> int:
self._t += 1
return self._t
def sleep_until_ns(self, _: int) -> None: # pragma: no cover — unused.
return None
class _FakeFdr:
"""No-op FDR client stand-in (CsvReplayFcAdapter does not emit FDR yet)."""
def enqueue(self, _record: object) -> None: # pragma: no cover — unused.
return None
@pytest.fixture()
def csv_file(tmp_path: Path) -> Path:
# Existence is the only check the adapter does at open() time —
# the body never has to be parseable here (parsing lives in
# csv_ground_truth.load_csv_ground_truth, tested separately).
path = tmp_path / "data_imu.csv"
path.write_text("placeholder", encoding="utf-8")
return path
def _make_adapter(csv_path: Path) -> CsvReplayFcAdapter:
return CsvReplayFcAdapter(
csv_path=csv_path,
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
clock=_FakeClock(),
fdr_client=_FakeFdr(),
pace=ReplayPace.ASAP,
)
# ----------------------------------------------------------------------
# Build flag
def test_construction_refused_when_build_flag_off(
monkeypatch: pytest.MonkeyPatch, csv_file: Path
) -> None:
# Arrange
monkeypatch.setenv("BUILD_CSV_REPLAY_ADAPTER", "OFF")
# Act + Assert
with pytest.raises(FcAdapterConfigError, match="BUILD_CSV_REPLAY_ADAPTER"):
_make_adapter(csv_file)
def test_construction_rejects_non_path(csv_file: Path) -> None:
# Arrange — argument intentionally a str rather than Path.
# Act + Assert
with pytest.raises(FcAdapterConfigError, match="csv_path must be a pathlib.Path"):
CsvReplayFcAdapter(
csv_path=str(csv_file), # type: ignore[arg-type]
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
clock=_FakeClock(),
fdr_client=_FakeFdr(),
pace=ReplayPace.ASAP,
)
def test_construction_rejects_unknown_dialect(csv_file: Path) -> None:
# Act + Assert
with pytest.raises(FcAdapterConfigError, match="target_fc_dialect"):
CsvReplayFcAdapter(
csv_path=csv_file,
target_fc_dialect=FcKind.GCS_QGC,
clock=_FakeClock(),
fdr_client=_FakeFdr(),
pace=ReplayPace.ASAP,
)
# ----------------------------------------------------------------------
# open / close
def test_open_refused_when_csv_missing(tmp_path: Path) -> None:
# Arrange
adapter = _make_adapter(tmp_path / "absent.csv")
# Act + Assert
with pytest.raises(FcOpenError, match="CSV file not found"):
adapter.open()
def test_double_open_raises(csv_file: Path) -> None:
# Arrange
adapter = _make_adapter(csv_file)
adapter.open()
# Act + Assert
with pytest.raises(FcOpenError, match="already opened"):
adapter.open()
def test_close_is_idempotent_before_open(csv_file: Path) -> None:
# Arrange
adapter = _make_adapter(csv_file)
# Act — close() before open() is a documented no-op (parity with tlog).
adapter.close()
adapter.close()
# Assert — no exception raised; state remains closeable.
assert True
def test_close_is_idempotent_after_open(csv_file: Path) -> None:
# Arrange
adapter = _make_adapter(csv_file)
adapter.open()
# Act
adapter.close()
adapter.close()
# Assert
assert True
# ----------------------------------------------------------------------
# Protocol parity
def test_subscribe_returns_real_subscription_handle(csv_file: Path) -> None:
# Arrange
adapter = _make_adapter(csv_file)
adapter.open()
# Act
subscription = adapter.subscribe_telemetry(lambda _frame: None)
# Assert — handle exposes the cancel() entry point even though the
# bus is intentionally never fed (replay loop reads CSV directly).
assert hasattr(subscription, "cancel")
subscription.cancel()
def test_source_set_switch_unsupported(csv_file: Path) -> None:
# Arrange
adapter = _make_adapter(csv_file)
# Act + Assert
with pytest.raises(SourceSetSwitchNotSupportedError):
adapter.request_source_set_switch()
def test_current_flight_state_returns_init_signal(csv_file: Path) -> None:
# Arrange — CSV carries no MAVLink HEARTBEAT, so the adapter has
# nothing to latch; the contract is to return an INIT-state signal.
adapter = _make_adapter(csv_file)
# Act
signal = adapter.current_flight_state()
# Assert
assert signal.state is FlightState.INIT
assert signal.last_valid_gps_hint_wgs84 is None
assert signal.last_valid_gps_age_ms is None
# ----------------------------------------------------------------------
# Outbound (Invariant 5)
def test_emit_external_position_raises_without_transport(csv_file: Path) -> None:
# Arrange — no MavlinkTransport injected → adapter falls back to the
# AZ-399 raise-on-emit contract, mirroring TlogReplayFcAdapter.
adapter = _make_adapter(csv_file)
adapter.open()
# Act + Assert
with pytest.raises(FcEmitError, match="does not emit"):
adapter.emit_external_position(_dummy_estimator_output())
def test_emit_status_text_raises_without_transport(csv_file: Path) -> None:
# Arrange
adapter = _make_adapter(csv_file)
adapter.open()
# Act + Assert
with pytest.raises(FcEmitError, match="does not emit"):
adapter.emit_status_text("hello", severity=Severity.INFO)
# ----------------------------------------------------------------------
# Helpers
def _dummy_estimator_output() -> object:
# The transport-less emit path short-circuits with FcEmitError before
# reading any field, so a duck-typed stand-in is enough — duplicating
# the full EstimatorOutput (UUID frame_id, 6x6 covariance, etc.)
# would only hide the actual contract being tested.
from types import SimpleNamespace
return SimpleNamespace()
@@ -0,0 +1,287 @@
"""AZ-894 — CSV-driven IMU + GPS ground-truth extractor.
Covers AC-1 (parses 4,899 IMU + 4,899 GPS samples on a single monotonic
clock) and AC-5 (clear ``ReplayInputAdapterError`` at startup for schema
faults) of ``_docs/02_tasks/todo/AZ-894_csv_driven_replay_adapter.md``.
The happy-path test is gated on the committed Derkachi fixture
(``_docs/00_problem/input_data/flight_derkachi/data_imu.csv``, 4,899
rows + header). Schema-fault tests use synthetic CSV strings written
to ``tmp_path`` so they remain deterministic and do not depend on the
fixture being present.
Style: every test follows the Arrange / Act / Assert pattern.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from gps_denied_onboard.replay_input.csv_ground_truth import (
CSV_SOURCE_LABEL,
REQUIRED_COLUMNS,
load_csv_ground_truth,
)
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
_DERKACHI_CSV: Path = (
Path(__file__).resolve().parents[3]
/ "_docs"
/ "00_problem"
/ "input_data"
/ "flight_derkachi"
/ "data_imu.csv"
)
_EXAMPLE_CSV: Path = (
Path(__file__).resolve().parents[3]
/ "_docs"
/ "02_document"
/ "contracts"
/ "replay"
/ "example_data_imu.csv"
)
# ---------------------------------------------------------------------
# Header + minimal-row helpers
def _write_csv(path: Path, header: str, rows: list[str]) -> Path:
path.write_text(header + "\n" + "\n".join(rows) + "\n", encoding="utf-8")
return path
def _full_header() -> str:
return ",".join(REQUIRED_COLUMNS)
def _row(time_s: float, *, prefix_ms: float = 0.0) -> str:
# 15 fields total matching REQUIRED_COLUMNS ordering. Values are
# picked to be valid floats; the exact magnitudes do not matter
# for these tests (the loader only validates parseability + range).
fields = [
str(prefix_ms),
str(time_s),
"10",
"-3",
"-980",
"50",
"30",
"-5",
"50.0809634",
"36.1115442",
"141290",
"-4",
"-6",
"-88",
"35041",
]
return ",".join(fields)
# ---------------------------------------------------------------------
# AC-1: happy path on the real Derkachi CSV
@pytest.mark.skipif(
not _DERKACHI_CSV.is_file(),
reason="Derkachi fixture data_imu.csv not present",
)
def test_ac1_loads_derkachi_csv_emits_paired_samples() -> None:
# Arrange — committed fixture path; nothing to set up.
# Note: AZ-894 spec mentions "4,899 samples"; the actual fixture
# spans Time=0.0..489.9 s in 0.1 s steps → 4,900 rows. We pin the
# concrete count so the test catches truncation, plus the
# span-derived invariant so future fixtures with a different
# length still pass for the right reason.
expected_count = 4900
# Act
gt = load_csv_ground_truth(_DERKACHI_CSV)
# Assert
assert gt.source == CSV_SOURCE_LABEL
assert len(gt.records) == expected_count
assert len(gt.imu_samples) == expected_count
# First row of the fixture has Time=0; last is 489.9 s (10 Hz).
assert gt.records[0].ts_ns == 0
assert gt.records[-1].ts_ns == int(489.9 * 1e9)
# IMU samples share the same canonical clock as the GPS records.
for gps, imu in zip(gt.records, gt.imu_samples, strict=True):
assert gps.ts_ns == imu.ts_ns
# ---------------------------------------------------------------------
# AZ-896 AC-3: the shipped example CSV stays parser-clean
@pytest.mark.skipif(
not _EXAMPLE_CSV.is_file(),
reason="AZ-896 example_data_imu.csv not present",
)
def test_az896_example_csv_loads_clean() -> None:
# Arrange — committed AZ-896 example; nothing to set up.
# Act
gt = load_csv_ground_truth(_EXAMPLE_CSV)
# Assert
assert gt.source == CSV_SOURCE_LABEL
assert len(gt.records) >= 10
assert len(gt.records) == len(gt.imu_samples)
assert gt.records[0].ts_ns == 0
# ---------------------------------------------------------------------
# AC-1 (small fixture): paired-sample invariants
def test_paired_imu_and_gps_share_clock(tmp_path: Path) -> None:
# Arrange
csv = _write_csv(
tmp_path / "ok.csv",
_full_header(),
[
_row(0.0, prefix_ms=4551116.348),
_row(0.1, prefix_ms=4551216.348),
_row(0.2, prefix_ms=4551316.348),
],
)
# Act
gt = load_csv_ground_truth(csv)
# Assert
assert len(gt.records) == 3 and len(gt.imu_samples) == 3
expected_ns = [0, 100_000_000, 200_000_000]
assert [r.ts_ns for r in gt.records] == expected_ns
assert [s.ts_ns for s in gt.imu_samples] == expected_ns
def test_gps_unit_conversion(tmp_path: Path) -> None:
# Arrange — values exercise the deg/mm/cm-s/cdeg conversions.
header = _full_header()
row = ",".join([
"0.0", "0.0",
"10", "-3", "-980", "50", "30", "-5", # IMU stays raw
"50.0809634", # lat already in degrees
"36.1115442", # lon already in degrees
"141290", # alt in mm → 141.290 m
"-400", # vx in cm/s → -4.0 m/s
"600", # vy in cm/s → 6.0 m/s
"-88", # vz in cm/s → -0.88 m/s
"35041", # hdg in cdeg → 350.41 deg
])
csv = _write_csv(tmp_path / "units.csv", header, [row])
# Act
gt = load_csv_ground_truth(csv)
# Assert
fix = gt.records[0]
assert fix.lat_deg == pytest.approx(50.0809634)
assert fix.lon_deg == pytest.approx(36.1115442)
assert fix.alt_m == pytest.approx(141.290)
assert fix.vx_m_s == pytest.approx(-4.0)
assert fix.vy_m_s == pytest.approx(6.0)
assert fix.vz_m_s == pytest.approx(-0.88)
assert fix.hdg_deg == pytest.approx(350.41)
# ---------------------------------------------------------------------
# AC-5: schema faults raise ReplayInputAdapterError at startup
def test_ac5_file_not_found_raises(tmp_path: Path) -> None:
# Arrange
missing = tmp_path / "absent.csv"
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="CSV file not found"):
load_csv_ground_truth(missing)
def test_ac5_missing_required_column_raises(tmp_path: Path) -> None:
# Arrange — drop one required column from the header.
bad_header = ",".join(c for c in REQUIRED_COLUMNS if c != "SCALED_IMU2.xacc")
csv = _write_csv(
tmp_path / "missing_col.csv",
bad_header,
["0,0,-3,-980,50,30,-5,50.0,36.0,141290,-4,-6,-88,35041"],
)
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="missing required columns"):
load_csv_ground_truth(csv)
def test_ac5_nan_in_time_raises(tmp_path: Path) -> None:
# Arrange
csv = _write_csv(
tmp_path / "nan_time.csv",
_full_header(),
[_row(0.0), _row(float("nan"))],
)
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="Time=.*is NaN/Inf"):
load_csv_ground_truth(csv)
def test_ac5_non_monotonic_time_raises(tmp_path: Path) -> None:
# Arrange
csv = _write_csv(
tmp_path / "non_monotonic.csv",
_full_header(),
[_row(0.1), _row(0.0)],
)
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="non-monotonic Time"):
load_csv_ground_truth(csv)
def test_ac5_repeated_time_also_non_monotonic(tmp_path: Path) -> None:
# Arrange — equal timestamps still violate strict monotonicity so
# the preintegrator never gets fed a zero-delta window.
csv = _write_csv(
tmp_path / "repeated.csv",
_full_header(),
[_row(0.0), _row(0.0)],
)
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="non-monotonic Time"):
load_csv_ground_truth(csv)
def test_ac5_non_numeric_imu_value_raises(tmp_path: Path) -> None:
# Arrange — substitute a non-parseable token in the IMU column.
row = ",".join([
"0.0", "0.0",
"not-a-number", # SCALED_IMU2.xacc
"-3", "-980", "50", "30", "-5",
"50.0", "36.0", "141290", "-4", "-6", "-88", "35041",
])
csv = _write_csv(tmp_path / "bad_imu.csv", _full_header(), [row])
# Act + Assert
with pytest.raises(
ReplayInputAdapterError,
match=r"SCALED_IMU2\.xacc=.*is not a number",
):
load_csv_ground_truth(csv)
def test_ac5_header_only_raises(tmp_path: Path) -> None:
# Arrange — header but no data rows.
csv = tmp_path / "header_only.csv"
csv.write_text(_full_header() + "\n", encoding="utf-8")
# Act + Assert
with pytest.raises(ReplayInputAdapterError, match="no data rows"):
load_csv_ground_truth(csv)
+13 -5
View File
@@ -491,12 +491,14 @@ def test_ac8_replay_branch_imports_only_public_apis() -> None:
tree = ast.parse(text)
# Allowed deep imports: into the c8_fc_adapter component (the
# noop transport + the JSONL sink) and into the `replay_input`
# cross-cutting coordinator (Layer-4). Both are documented in
# module-layout.md as the replay strategy homes.
# noop transport + the JSONL sink + the AZ-894 CSV replay adapter)
# and into the `replay_input` cross-cutting coordinator (Layer-4).
# All of these are documented in module-layout.md as the replay
# strategy homes.
allowed_deep_prefixes = (
"gps_denied_onboard.components.c8_fc_adapter.noop_mavlink_transport",
"gps_denied_onboard.components.c8_fc_adapter.replay_sink",
"gps_denied_onboard.components.c8_fc_adapter.csv_replay_adapter",
"gps_denied_onboard.replay_input.tlog_video_adapter",
)
@@ -632,9 +634,14 @@ def test_replay_branch_rejects_empty_video_path(
build_replay_components(config)
def test_replay_branch_rejects_empty_tlog_path(
def test_replay_branch_rejects_both_inputs_empty(
_airborne_replay_env: Path,
) -> None:
# AZ-894: the validation gate now accepts either imu_csv_path
# (canonical) or tlog_path (legacy) — rejecting only when both
# are empty. Keeping the historical name pattern (test_*_rejects_*)
# for grep parity but renamed to reflect the new semantics.
# Arrange
runtime_cfg = RuntimeConfig(camera_calibration_path=str(_airborne_replay_env))
config = Config(
@@ -642,6 +649,7 @@ def test_replay_branch_rejects_empty_tlog_path(
replay=ReplayConfig(
video_path="/dev/null/fake.mp4",
tlog_path="",
imu_csv_path="",
output_path="/tmp/out.jsonl",
pace="asap",
target_fc_dialect="ardupilot_plane",
@@ -650,7 +658,7 @@ def test_replay_branch_rejects_empty_tlog_path(
)
# Act / Assert
with pytest.raises(CompositionError, match="tlog_path is empty"):
with pytest.raises(CompositionError, match="imu_csv_path is empty"):
build_replay_components(config)
+7
View File
@@ -57,6 +57,10 @@ def _required_files(tmp_path: Path, _calib_payload: dict[str, Any]) -> dict[str,
video.write_bytes(b"\x00\x00\x00\x18ftypmp42") # placeholder
tlog = tmp_path / "flight.tlog"
tlog.write_bytes(b"\x00")
imu_csv = tmp_path / "data_imu.csv"
# Minimal placeholder — the CLI only validates existence, parsing
# happens later inside the runtime loop.
imu_csv.write_text("placeholder", encoding="utf-8")
output = tmp_path / "out.jsonl"
calib = tmp_path / "calib.json"
calib.write_text(json.dumps(_calib_payload))
@@ -67,6 +71,7 @@ def _required_files(tmp_path: Path, _calib_payload: dict[str, Any]) -> dict[str,
return {
"video": video,
"tlog": tlog,
"imu": imu_csv,
"output": output,
"camera_calibration": calib,
"config": config_yaml,
@@ -95,6 +100,7 @@ def _argv(files: dict[str, Path], **overrides: Any) -> list[str]:
"""Build a CLI argv from the required-files fixture + overrides."""
base = {
"--video": str(files["video"]),
"--imu": str(files["imu"]),
"--tlog": str(files["tlog"]),
"--output": str(files["output"]),
"--camera-calibration": str(files["camera_calibration"]),
@@ -477,6 +483,7 @@ def test_ac10_console_script_runs_help() -> None:
# Required-arg surface check
for arg in (
"--video",
"--imu",
"--tlog",
"--output",
"--camera-calibration",