[AZ-959] replay_api: POST /replay (video,csv) + /static/example-csv

Extend the AZ-701 replay_api POST /replay endpoint so AZ-897 (now
in ../ui repo) can drive the AZ-894 CSV-replay path. The endpoint
keeps full back-compat for tlog clients and adds:

- (video, tlog) OR (video, csv) multipart with strict XOR enforced
  at the API boundary (AC-2 / AC-3 → 400 multipart_missing_field)
- validate_csv_kind: rejects malformed CSV schema at boundary by
  scanning the header line for AZ-896 required tokens; messages
  point at csv_replay_format.md (AC-4)
- ReplayInputs DTO: tlog_path / csv_path are now Path | None with
  XOR re-enforced in __post_init__ for internal callers
- JobStorage reserves both input.tlog and input.csv paths; handler
  writes exactly one
- SubprocessReplayRunner.run dispatches --imu vs --tlog argv (AC-1)
- _maybe_render_report dispatches load_csv_ground_truth vs
  load_tlog_ground_truth; CsvGpsFix and TlogGpsFix have
  field-compatible shapes for the GroundTruthRow adapter (AC-6)
- GET /static/example-csv serves the AZ-896 reference CSV; honours
  REPLAY_API_EXAMPLE_CSV_PATH env, falls back to source-checkout
  layout, returns 503 with example_csv_unavailable when neither
  resolves to a readable file. No auth required (AC-5)

Tests: 27/27 unit tests green:
- 18 pre-existing tlog-path tests unchanged (AC-7)
- 9 new tests covering ACs 1-6 + validate_csv_kind isolation

Deferred (NOT silently fixed; reported to user as end-of-turn
notes for scope discipline):

- gps-denied-render-map only consumes binary tlog truth today, so
  CSV-path jobs return map_html_url=None. Extending render-map to
  dispatch on truth-file extension is AZ-700 follow-up territory.
- ReportContext.tlog_path field is now overloaded as the
  "ground-truth source path"; the rendered report still labels
  the line "Tlog: <csv_path>" which is cosmetically misleading
  for CSV runs. Field rename + label fix is AZ-699 follow-up.

Bookkeeping: AZ-959 spec moved todo/ → done/, dep-table preamble
fifth bump documents what landed + what's deferred, state.md
records batch 5 complete and what comes next.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-29 12:45:25 +03:00
parent 05fcacffa3
commit 1d18e25cf4
8 changed files with 476 additions and 17 deletions
+124 -11
View File
@@ -37,12 +37,14 @@ from gps_denied_onboard.replay_api.errors import (
UnsupportedFileKindError,
)
from gps_denied_onboard.replay_api.handlers import (
MIN_CSV_PROBE_BYTES,
MIN_TLOG_PROBE_BYTES,
MIN_VIDEO_PROBE_BYTES,
auth_required,
expected_bearer_token,
extract_bearer_token,
validate_calibration_kind,
validate_csv_kind,
validate_tlog_kind,
validate_upload_size,
validate_video_kind,
@@ -63,7 +65,9 @@ __all__ = ["SubprocessReplayRunner", "build_runner_from_env", "create_app"]
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api")
_PROBE_BYTES_MAX: int = max(MIN_TLOG_PROBE_BYTES, MIN_VIDEO_PROBE_BYTES, 64)
_PROBE_BYTES_MAX: int = max(
MIN_TLOG_PROBE_BYTES, MIN_VIDEO_PROBE_BYTES, MIN_CSV_PROBE_BYTES, 64
)
# ---------------------------------------------------------------------
@@ -107,12 +111,16 @@ class SubprocessReplayRunner:
signing_key_path.write_bytes(b"\x00" * 32)
emissions_path = output_dir / "emissions.jsonl"
if inputs.csv_path is not None:
input_flag_pair = ["--imu", str(inputs.csv_path)]
else:
assert inputs.tlog_path is not None
input_flag_pair = ["--tlog", str(inputs.tlog_path)]
argv = [
self._replay_binary,
"--video",
str(inputs.video_path),
"--tlog",
str(inputs.tlog_path),
*input_flag_pair,
"--output",
str(emissions_path),
"--camera-calibration",
@@ -175,6 +183,7 @@ class SubprocessReplayRunner:
horizontal_error_distribution,
)
from gps_denied_onboard.replay_input import (
load_csv_ground_truth,
load_tlog_ground_truth,
)
except Exception as exc:
@@ -191,7 +200,13 @@ class SubprocessReplayRunner:
if not emissions:
return None
gt_series = load_tlog_ground_truth(inputs.tlog_path).records
if inputs.csv_path is not None:
gt_series = load_csv_ground_truth(inputs.csv_path).records
gt_source_path = inputs.csv_path
else:
assert inputs.tlog_path is not None
gt_series = load_tlog_ground_truth(inputs.tlog_path).records
gt_source_path = inputs.tlog_path
if not gt_series:
return None
@@ -222,7 +237,11 @@ class SubprocessReplayRunner:
)
context = ReportContext(
run_date_utc=datetime.utcnow().date().isoformat(),
tlog_path=inputs.tlog_path,
# tlog_path is widened to "ground-truth source" in cycle-4
# (tlog or csv depending on which input drove the run);
# ReportContext field rename deferred to AZ-699 follow-up
# to keep AZ-959 scope minimal.
tlog_path=gt_source_path,
video_path=inputs.video_path,
calibration_acquisition_method=calibration_method,
clip_duration_s=clip_duration_s,
@@ -243,6 +262,16 @@ class SubprocessReplayRunner:
output_dir: Path,
report_path: Path | None,
) -> Path | None:
# gps-denied-render-map only understands binary tlog truth
# today; CSV-truth dispatch is an AZ-700 follow-up. For now,
# CSV-path runs ship without a map (report + emissions still
# render, see _maybe_render_report).
if inputs.tlog_path is None:
_LOGGER.info(
"skipping map render — CSV-path runs do not yet support "
"the gps-denied-render-map CLI (AZ-700 follow-up)"
)
return None
if not shutil.which(self._render_binary):
venv_bin = Path(sys.executable).parent / self._render_binary
if not venv_bin.exists():
@@ -433,8 +462,9 @@ def create_app(
@app.post("/replay")
async def post_replay(
tlog: Annotated[UploadFile, File()],
video: Annotated[UploadFile, File()],
tlog: Annotated[UploadFile | None, File()] = None,
csv: Annotated[UploadFile | None, File()] = None,
calibration: Annotated[UploadFile | None, File()] = None,
pace: Annotated[str, Form()] = "asap",
auto_trim: Annotated[bool, Form()] = True,
@@ -442,9 +472,28 @@ def create_app(
) -> Response:
_check_auth(authorization)
tlog_bytes = await tlog.read()
validate_upload_size(len(tlog_bytes), limit=max_upload_bytes)
validate_tlog_kind(tlog_bytes[:_PROBE_BYTES_MAX])
# AC-2 / AC-3: exactly one of (tlog, csv) must be present.
if (tlog is None) == (csv is None):
raise MultipartMissingFieldError(
"POST /replay requires exactly one of `tlog` or `csv` "
"multipart fields (got "
f"tlog={'present' if tlog else 'absent'}, "
f"csv={'present' if csv else 'absent'}). See "
"_docs/02_document/contracts/replay/csv_replay_format.md "
"for the CSV schema."
)
tlog_bytes: bytes | None = None
csv_bytes: bytes | None = None
if tlog is not None:
tlog_bytes = await tlog.read()
validate_upload_size(len(tlog_bytes), limit=max_upload_bytes)
validate_tlog_kind(tlog_bytes[:_PROBE_BYTES_MAX])
else:
assert csv is not None
csv_bytes = await csv.read()
validate_upload_size(len(csv_bytes), limit=max_upload_bytes)
validate_csv_kind(csv_bytes[:_PROBE_BYTES_MAX])
video_bytes = await video.read()
validate_upload_size(len(video_bytes), limit=max_upload_bytes)
@@ -461,7 +510,10 @@ def create_app(
# Allocate per-job storage and write the uploads.
job_id = _new_job_id()
job_storage = storage.allocate_job(job_id)
job_storage.tlog_path.write_bytes(tlog_bytes)
if tlog_bytes is not None:
job_storage.tlog_path.write_bytes(tlog_bytes)
if csv_bytes is not None:
job_storage.csv_path.write_bytes(csv_bytes)
job_storage.video_path.write_bytes(video_bytes)
if calibration_bytes is not None:
job_storage.calibration_path.write_bytes(calibration_bytes)
@@ -477,7 +529,12 @@ def create_app(
)
inputs = ReplayInputs(
tlog_path=job_storage.tlog_path,
tlog_path=(
job_storage.tlog_path if tlog_bytes is not None else None
),
csv_path=(
job_storage.csv_path if csv_bytes is not None else None
),
video_path=job_storage.video_path,
calibration_path=job_storage.calibration_path,
pace=pace,
@@ -572,6 +629,36 @@ def create_app(
filename="map.html",
)
@app.get("/static/example-csv")
async def get_example_csv() -> Response:
"""Serve the AZ-896 reference CSV for the AZ-897 UI workflow.
No auth required — the example file is a public reference
document. Returns 503 when the file cannot be located, which
per the AZ-959 spec is treated as a deploy-misconfiguration
signal (file exists in the source tree).
"""
path = _example_csv_path()
if path is None:
return JSONResponse(
status_code=503,
content={
"error_code": "example_csv_unavailable",
"message": (
"example CSV not located — set "
"REPLAY_API_EXAMPLE_CSV_PATH or run from a "
"source checkout that contains "
"_docs/02_document/contracts/replay/"
"example_data_imu.csv"
),
},
)
return FileResponse(
path=path,
media_type="text/csv; charset=utf-8",
filename="example_data_imu.csv",
)
@app.get("/jobs/{job_id}/report")
async def get_report(
job_id: str,
@@ -626,6 +713,32 @@ def _default_calibration_path() -> Path | None:
return None
def _example_csv_path() -> Path | None:
"""Locate the AZ-896 reference CSV.
First honours ``REPLAY_API_EXAMPLE_CSV_PATH``. As a dev / source-
checkout fallback walks up from this module looking for the
canonical doc location. Returns ``None`` when neither path
yields a readable file — the handler then returns 503.
"""
raw = os.environ.get("REPLAY_API_EXAMPLE_CSV_PATH")
if raw:
configured = Path(raw)
return configured if configured.is_file() else None
for parent in Path(__file__).resolve().parents:
candidate = (
parent
/ "_docs"
/ "02_document"
/ "contracts"
/ "replay"
/ "example_data_imu.csv"
)
if candidate.is_file():
return candidate
return None
def _await_terminal(registry: JobRegistry, job_id: str) -> JobSnapshot:
"""Block until ``job_id`` reaches a terminal state.
@@ -18,12 +18,14 @@ from gps_denied_onboard.replay_api.errors import (
)
__all__ = [
"MIN_CSV_PROBE_BYTES",
"MIN_TLOG_PROBE_BYTES",
"MIN_VIDEO_PROBE_BYTES",
"auth_required",
"expected_bearer_token",
"extract_bearer_token",
"validate_calibration_kind",
"validate_csv_kind",
"validate_tlog_kind",
"validate_upload_size",
"validate_video_kind",
@@ -48,6 +50,26 @@ _MP4_FTYP_MARKER: bytes = b"ftyp"
MIN_VIDEO_PROBE_BYTES: int = 12
# CSV header line for the AZ-896 replay format is ~410 chars; probe
# generously so we can read the full header regardless of OS line
# endings or operator whitespace. The validator only checks the
# headline column tokens; the parser in ``csv_ground_truth`` does
# the strict per-row validation downstream.
MIN_CSV_PROBE_BYTES: int = 512
_CSV_REQUIRED_HEADER_TOKENS: tuple[str, ...] = (
"timestamp(ms)",
"Time",
"SCALED_IMU2.xacc",
"SCALED_IMU2.xgyro",
"GLOBAL_POSITION_INT.lat",
"GLOBAL_POSITION_INT.lon",
)
_CSV_FORMAT_DOC_PATH: str = (
"_docs/02_document/contracts/replay/csv_replay_format.md"
)
def validate_tlog_kind(probe_bytes: bytes) -> None:
"""Reject anything that doesn't open with a MAVLink magic byte.
@@ -90,6 +112,34 @@ def validate_video_kind(probe_bytes: bytes) -> None:
)
def validate_csv_kind(probe_bytes: bytes) -> None:
"""Reject anything that doesn't open with the AZ-896 CSV header.
The strict per-row schema lives in ``csv_ground_truth.py``; this
boundary check just confirms the first line looks like the AZ-896
header so we fail fast at the API before the subprocess hands the
error back through an opaque non-zero exit code.
"""
if len(probe_bytes) < 1:
raise UnsupportedFileKindError("csv upload is empty")
header_end = probe_bytes.find(b"\n")
header_bytes = probe_bytes if header_end < 0 else probe_bytes[:header_end]
try:
header = header_bytes.decode("utf-8").strip()
except UnicodeDecodeError as exc:
raise UnsupportedFileKindError(
"csv header is not valid UTF-8 (see "
f"{_CSV_FORMAT_DOC_PATH})"
) from exc
columns = {col.strip() for col in header.split(",")}
missing = [token for token in _CSV_REQUIRED_HEADER_TOKENS if token not in columns]
if missing:
raise UnsupportedFileKindError(
"csv header is missing required columns "
f"{missing} (see {_CSV_FORMAT_DOC_PATH})"
)
def validate_calibration_kind(probe_bytes: bytes) -> None:
"""Light JSON-shape check; the renderer is the strict validator."""
if not probe_bytes:
+18 -3
View File
@@ -38,21 +38,36 @@ class JobState(str, Enum):
@dataclass(frozen=True, slots=True)
class ReplayInputs:
"""The (tlog + video + calibration) bundle a runner consumes.
"""The (tlog|csv + video + calibration) bundle a runner consumes.
Storage paths are absolute. The handler builds these from a
per-job temp directory (see ``storage.py``).
Exactly one of ``tlog_path`` / ``csv_path`` must be set — the
handler validates this at the multipart boundary and the DTO
re-enforces it in ``__post_init__`` so any internal call site
that violates the contract fails fast.
``pace`` and ``auto_trim`` mirror the ``gps-denied-replay`` CLI
flags; the runner is responsible for translating them into argv.
flags; the runner is responsible for translating them into argv
(``--imu`` for the csv path, ``--tlog`` for the tlog path).
"""
tlog_path: Path
video_path: Path
calibration_path: Path
tlog_path: Path | None = None
csv_path: Path | None = None
pace: str = "asap"
auto_trim: bool = True
def __post_init__(self) -> None:
if (self.tlog_path is None) == (self.csv_path is None):
raise ValueError(
"ReplayInputs requires exactly one of tlog_path or "
"csv_path to be set (got "
f"tlog_path={self.tlog_path!r}, csv_path={self.csv_path!r})"
)
@dataclass(frozen=True, slots=True)
class ReplayJobResult:
+9 -1
View File
@@ -27,10 +27,17 @@ _LOGGER = logging.getLogger("gps_denied_onboard.replay_api.storage")
@dataclass(frozen=True, slots=True)
class JobStorage:
"""The per-job paths the handler hands to the runner."""
"""The per-job paths the handler hands to the runner.
Both ``tlog_path`` and ``csv_path`` are reserved on disk; the
handler writes to exactly one and leaves the other unused. The
``ReplayInputs`` DTO carries ``None`` for the branch that wasn't
written so downstream consumers know which clock source applies.
"""
root: Path
tlog_path: Path
csv_path: Path
video_path: Path
calibration_path: Path
output_dir: Path
@@ -60,6 +67,7 @@ class StorageRoot:
return JobStorage(
root=job_root,
tlog_path=job_root / "input.tlog",
csv_path=job_root / "input.csv",
video_path=job_root / "input.mp4",
calibration_path=job_root / "calibration.json",
output_dir=output_dir,