[AZ-959] replay_api: POST /replay (video,csv) + /static/example-csv

Extend the AZ-701 replay_api POST /replay endpoint so AZ-897 (now
in ../ui repo) can drive the AZ-894 CSV-replay path. The endpoint
keeps full back-compat for tlog clients and adds:

- (video, tlog) OR (video, csv) multipart with strict XOR enforced
  at the API boundary (AC-2 / AC-3 → 400 multipart_missing_field)
- validate_csv_kind: rejects malformed CSV schema at boundary by
  scanning the header line for AZ-896 required tokens; messages
  point at csv_replay_format.md (AC-4)
- ReplayInputs DTO: tlog_path / csv_path are now Path | None with
  XOR re-enforced in __post_init__ for internal callers
- JobStorage reserves both input.tlog and input.csv paths; handler
  writes exactly one
- SubprocessReplayRunner.run dispatches --imu vs --tlog argv (AC-1)
- _maybe_render_report dispatches load_csv_ground_truth vs
  load_tlog_ground_truth; CsvGpsFix and TlogGpsFix have
  field-compatible shapes for the GroundTruthRow adapter (AC-6)
- GET /static/example-csv serves the AZ-896 reference CSV; honours
  REPLAY_API_EXAMPLE_CSV_PATH env, falls back to source-checkout
  layout, returns 503 with example_csv_unavailable when neither
  resolves to a readable file. No auth required (AC-5)

Tests: 27/27 unit tests green:
- 18 pre-existing tlog-path tests unchanged (AC-7)
- 9 new tests covering ACs 1-6 + validate_csv_kind isolation

Deferred (NOT silently fixed; reported to user as end-of-turn
notes for scope discipline):

- gps-denied-render-map only consumes binary tlog truth today, so
  CSV-path jobs return map_html_url=None. Extending render-map to
  dispatch on truth-file extension is AZ-700 follow-up territory.
- ReportContext.tlog_path field is now overloaded as the
  "ground-truth source path"; the rendered report still labels
  the line "Tlog: <csv_path>" which is cosmetically misleading
  for CSV runs. Field rename + label fix is AZ-699 follow-up.

Bookkeeping: AZ-959 spec moved todo/ → done/, dep-table preamble
fifth bump documents what landed + what's deferred, state.md
records batch 5 complete and what comes next.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-29 12:45:25 +03:00
parent 05fcacffa3
commit 1d18e25cf4
8 changed files with 476 additions and 17 deletions
@@ -38,6 +38,7 @@ from gps_denied_onboard.replay_api import (
create_app,
)
from gps_denied_onboard.replay_api.handlers import (
validate_csv_kind,
validate_tlog_kind,
validate_video_kind,
)
@@ -107,6 +108,30 @@ def _valid_calibration_bytes() -> bytes:
return b'{"focal_length": 1, "acquisition_method": "factory-sheet"}'
def _valid_csv_bytes() -> bytes:
"""Minimal AZ-896-schema CSV with 2 data rows.
Header tokens match
``_docs/02_document/contracts/replay/csv_replay_format.md``.
Values are minimal-but-valid; the API-boundary validator only
checks the header, the per-row checks live in
``csv_ground_truth.py`` and aren't exercised by the multipart
handler.
"""
header = (
"timestamp(ms),Time,"
"SCALED_IMU2.xacc,SCALED_IMU2.yacc,SCALED_IMU2.zacc,"
"SCALED_IMU2.xgyro,SCALED_IMU2.ygyro,SCALED_IMU2.zgyro,"
"GLOBAL_POSITION_INT.lat,GLOBAL_POSITION_INT.lon,"
"GLOBAL_POSITION_INT.alt,GLOBAL_POSITION_INT.vx,"
"GLOBAL_POSITION_INT.vy,GLOBAL_POSITION_INT.vz,"
"GLOBAL_POSITION_INT.hdg"
)
row1 = "0,0.0,21,-3,-984,52,32,-5,50.0809634,36.1115442,141290,0,0,0,35041"
row2 = "100,0.1,-68,-9,-995,58,-17,1,50.0809634,36.1115441,141360,0,0,0,35042"
return f"{header}\n{row1}\n{row2}\n".encode("utf-8")
@pytest.fixture(autouse=True)
def _disable_auth_by_default(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "false")
@@ -630,6 +655,254 @@ def test_post_replay_rejects_misnamed_zip_as_video(
assert response.json()["error_code"] == "unsupported_file_kind"
# ---------------------------------------------------------------------
# AZ-959 — CSV-path multipart + XOR validation + /static/example-csv
def test_validate_csv_kind_accepts_az896_header() -> None:
# Act / Assert — must not raise on the canonical header
validate_csv_kind(_valid_csv_bytes()[:512])
def test_validate_csv_kind_rejects_header_missing_time_column() -> None:
# Arrange — drop the Time column from an otherwise-valid header
bogus = (
b"timestamp(ms),"
b"SCALED_IMU2.xacc,SCALED_IMU2.yacc,SCALED_IMU2.zacc,"
b"SCALED_IMU2.xgyro,SCALED_IMU2.ygyro,SCALED_IMU2.zgyro,"
b"GLOBAL_POSITION_INT.lat,GLOBAL_POSITION_INT.lon\n"
b"0,0,0,0,0,0,0,0,0\n"
)
# Act / Assert
with pytest.raises(Exception) as exc:
validate_csv_kind(bogus)
assert "Time" in str(exc.value)
assert "csv_replay_format.md" in str(exc.value)
def test_post_replay_csv_path_returns_200_and_dispatches_imu_flag(
fake_runner: _FakeRunner,
make_app: Any,
) -> None:
# Arrange — AC-1
app = make_app(fake_runner)
client = TestClient(app)
# Act
response = client.post(
"/replay",
files={
"csv": ("data_imu.csv", _valid_csv_bytes(), "text/csv"),
"video": ("derkachi.mp4", _valid_mp4_bytes(), "video/mp4"),
"calibration": (
"k.json",
_valid_calibration_bytes(),
"application/json",
),
},
data={"pace": "asap"},
)
# Assert
assert response.status_code == 200, response.text
body = response.json()
assert body["state"] == JobState.DONE.value
assert body["sync"] is True
# Runner saw the csv_path branch (tlog_path is None for csv jobs)
assert len(fake_runner.calls) == 1
inputs = fake_runner.calls[0]
assert inputs.tlog_path is None
assert inputs.csv_path is not None
assert inputs.csv_path.is_file()
assert inputs.csv_path.read_bytes() == _valid_csv_bytes()
def test_post_replay_rejects_both_tlog_and_csv(
fake_runner: _FakeRunner,
make_app: Any,
) -> None:
# Arrange — AC-2
client = TestClient(make_app(fake_runner))
# Act
response = client.post(
"/replay",
files={
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
"csv": ("d.csv", _valid_csv_bytes(), "text/csv"),
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
},
)
# Assert
assert response.status_code == 400
body = response.json()
assert body["error_code"] == "multipart_missing_field"
assert "exactly one" in body["message"].lower()
def test_post_replay_rejects_neither_tlog_nor_csv(
fake_runner: _FakeRunner,
make_app: Any,
) -> None:
# Arrange — AC-3
client = TestClient(make_app(fake_runner))
# Act
response = client.post(
"/replay",
files={
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
},
)
# Assert
assert response.status_code == 400
body = response.json()
assert body["error_code"] == "multipart_missing_field"
assert "exactly one" in body["message"].lower()
def test_post_replay_rejects_malformed_csv_at_api_boundary(
fake_runner: _FakeRunner,
make_app: Any,
) -> None:
# Arrange — AC-4: CSV header missing the Time column
bogus_csv = (
b"timestamp(ms),"
b"SCALED_IMU2.xacc,SCALED_IMU2.yacc,SCALED_IMU2.zacc,"
b"SCALED_IMU2.xgyro,SCALED_IMU2.ygyro,SCALED_IMU2.zgyro,"
b"GLOBAL_POSITION_INT.lat,GLOBAL_POSITION_INT.lon\n"
b"0,0,0,0,0,0,0,0,0\n"
)
client = TestClient(make_app(fake_runner))
# Act
response = client.post(
"/replay",
files={
"csv": ("bad.csv", bogus_csv, "text/csv"),
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
},
)
# Assert
assert response.status_code == 400
body = response.json()
assert body["error_code"] == "unsupported_file_kind"
assert "csv_replay_format.md" in body["message"]
def test_static_example_csv_serves_canonical_doc_file(
fake_runner: _FakeRunner,
make_app: Any,
) -> None:
# Arrange — AC-5: endpoint serves the source-tree CSV bytes
from gps_denied_onboard.replay_api.app import _example_csv_path
canonical_path = _example_csv_path()
if canonical_path is None:
pytest.skip(
"example CSV not on disk — running outside a source checkout"
)
client = TestClient(make_app(fake_runner))
# Act
response = client.get("/static/example-csv")
# Assert
assert response.status_code == 200
assert response.headers["content-type"].startswith("text/csv")
assert "charset=utf-8" in response.headers["content-type"]
assert response.content == canonical_path.read_bytes()
def test_static_example_csv_returns_503_when_path_misconfigured(
monkeypatch: pytest.MonkeyPatch,
fake_runner: _FakeRunner,
make_app: Any,
tmp_path: Path,
) -> None:
# Arrange — env var points at a path that does not exist;
# we want to also stop the source-checkout fallback from finding
# the canonical file. Easiest is to point the env var at a
# bogus path: the helper short-circuits to that branch and
# returns None without falling back.
monkeypatch.setenv(
"REPLAY_API_EXAMPLE_CSV_PATH", str(tmp_path / "nonexistent.csv")
)
client = TestClient(make_app(fake_runner))
# Act
response = client.get("/static/example-csv")
# Assert
assert response.status_code == 503
body = response.json()
assert body["error_code"] == "example_csv_unavailable"
def test_subprocess_runner_renders_report_for_csv_ground_truth(
tmp_path: Path,
) -> None:
# Arrange — AC-6: ground-truth dispatch through the SubprocessReplayRunner.
# We call _maybe_render_report directly so the subprocess invocation
# itself doesn't have to run (the input branch under test is the GT
# loader, not the gps-denied-replay binary).
from gps_denied_onboard.replay_api.app import (
SubprocessReplayRunner,
_example_csv_path,
)
csv_path = _example_csv_path()
if csv_path is None:
pytest.skip(
"example CSV not on disk — running outside a source checkout"
)
runner = SubprocessReplayRunner()
output_dir = tmp_path / "output"
output_dir.mkdir()
calibration_path = tmp_path / "calib.json"
calibration_path.write_text(_valid_calibration_bytes().decode())
video_path = tmp_path / "video.mp4"
video_path.write_bytes(_valid_mp4_bytes())
emissions_path = output_dir / "emissions.jsonl"
emissions_path.write_text(
json.dumps(
{
"frame_id": 0,
"position_wgs84": {
"lat_deg": 50.0809634,
"lon_deg": 36.1115442,
"alt_m": 141.290,
},
"emitted_at": 0,
}
)
+ "\n"
)
inputs = ReplayInputs(
csv_path=csv_path,
video_path=video_path,
calibration_path=calibration_path,
)
# Act
report_path = runner._maybe_render_report(
inputs, emissions_path, output_dir
)
# Assert
assert report_path is not None
assert report_path.is_file()
text = report_path.read_text()
assert "Verdict" in text or "verdict" in text.lower()
# ---------------------------------------------------------------------
# Helpers