mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 19:31:15 +00:00
1d18e25cf4
Extend the AZ-701 replay_api POST /replay endpoint so AZ-897 (now in ../ui repo) can drive the AZ-894 CSV-replay path. The endpoint keeps full back-compat for tlog clients and adds: - (video, tlog) OR (video, csv) multipart with strict XOR enforced at the API boundary (AC-2 / AC-3 → 400 multipart_missing_field) - validate_csv_kind: rejects malformed CSV schema at boundary by scanning the header line for AZ-896 required tokens; messages point at csv_replay_format.md (AC-4) - ReplayInputs DTO: tlog_path / csv_path are now Path | None with XOR re-enforced in __post_init__ for internal callers - JobStorage reserves both input.tlog and input.csv paths; handler writes exactly one - SubprocessReplayRunner.run dispatches --imu vs --tlog argv (AC-1) - _maybe_render_report dispatches load_csv_ground_truth vs load_tlog_ground_truth; CsvGpsFix and TlogGpsFix have field-compatible shapes for the GroundTruthRow adapter (AC-6) - GET /static/example-csv serves the AZ-896 reference CSV; honours REPLAY_API_EXAMPLE_CSV_PATH env, falls back to source-checkout layout, returns 503 with example_csv_unavailable when neither resolves to a readable file. No auth required (AC-5) Tests: 27/27 unit tests green: - 18 pre-existing tlog-path tests unchanged (AC-7) - 9 new tests covering ACs 1-6 + validate_csv_kind isolation Deferred (NOT silently fixed; reported to user as end-of-turn notes for scope discipline): - gps-denied-render-map only consumes binary tlog truth today, so CSV-path jobs return map_html_url=None. Extending render-map to dispatch on truth-file extension is AZ-700 follow-up territory. - ReportContext.tlog_path field is now overloaded as the "ground-truth source path"; the rendered report still labels the line "Tlog: <csv_path>" which is cosmetically misleading for CSV runs. Field rename + label fix is AZ-699 follow-up. Bookkeeping: AZ-959 spec moved todo/ → done/, dep-table preamble fifth bump documents what landed + what's deferred, state.md records batch 5 complete and what comes next. Co-authored-by: Cursor <cursoragent@cursor.com>
203 lines
6.8 KiB
Python
203 lines
6.8 KiB
Python
"""AZ-701 — multipart upload + magic-byte validation + auth helpers.
|
|
|
|
The functions here are deliberately framework-light: they take raw
|
|
bytes / streams and return validated artefacts. ``app.py`` wires
|
|
them into FastAPI dependencies; unit tests call them directly.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
|
|
from gps_denied_onboard.replay_api.errors import (
|
|
MultipartMissingFieldError,
|
|
PayloadTooLargeError,
|
|
UnauthorizedError,
|
|
UnsupportedFileKindError,
|
|
)
|
|
|
|
__all__ = [
|
|
"MIN_CSV_PROBE_BYTES",
|
|
"MIN_TLOG_PROBE_BYTES",
|
|
"MIN_VIDEO_PROBE_BYTES",
|
|
"auth_required",
|
|
"expected_bearer_token",
|
|
"extract_bearer_token",
|
|
"validate_calibration_kind",
|
|
"validate_csv_kind",
|
|
"validate_tlog_kind",
|
|
"validate_upload_size",
|
|
"validate_video_kind",
|
|
]
|
|
|
|
|
|
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api.handlers")
|
|
|
|
|
|
# MAVLink magic bytes — pymavlink uses 0xFD for v2.0 and 0xFE for
|
|
# v1.0. The Derkachi tlog is v2.0; we accept both because some
|
|
# operators ship v1.0 captures from older autopilots.
|
|
_MAVLINK_MAGIC_V2: int = 0xFD
|
|
_MAVLINK_MAGIC_V1: int = 0xFE
|
|
MIN_TLOG_PROBE_BYTES: int = 9
|
|
|
|
|
|
# mp4 boxes start with a 4-byte size, then 4 ASCII bytes for the
|
|
# box type. The first box in every valid mp4 is ``ftyp`` (per
|
|
# ISO/IEC 14496-12). ``"ftyp"`` lives at offset 4.
|
|
_MP4_FTYP_MARKER: bytes = b"ftyp"
|
|
MIN_VIDEO_PROBE_BYTES: int = 12
|
|
|
|
|
|
# CSV header line for the AZ-896 replay format is ~410 chars; probe
|
|
# generously so we can read the full header regardless of OS line
|
|
# endings or operator whitespace. The validator only checks the
|
|
# headline column tokens; the parser in ``csv_ground_truth`` does
|
|
# the strict per-row validation downstream.
|
|
MIN_CSV_PROBE_BYTES: int = 512
|
|
|
|
_CSV_REQUIRED_HEADER_TOKENS: tuple[str, ...] = (
|
|
"timestamp(ms)",
|
|
"Time",
|
|
"SCALED_IMU2.xacc",
|
|
"SCALED_IMU2.xgyro",
|
|
"GLOBAL_POSITION_INT.lat",
|
|
"GLOBAL_POSITION_INT.lon",
|
|
)
|
|
_CSV_FORMAT_DOC_PATH: str = (
|
|
"_docs/02_document/contracts/replay/csv_replay_format.md"
|
|
)
|
|
|
|
|
|
def validate_tlog_kind(probe_bytes: bytes) -> None:
|
|
"""Reject anything that doesn't open with a MAVLink magic byte.
|
|
|
|
pymavlink's tlog format prefixes each record with an 8-byte
|
|
big-endian microsecond timestamp followed by the raw MAVLink
|
|
frame, which always starts with the magic byte. So byte 8 of
|
|
any well-formed tlog is the MAVLink magic.
|
|
"""
|
|
if len(probe_bytes) < MIN_TLOG_PROBE_BYTES:
|
|
raise UnsupportedFileKindError(
|
|
f"tlog probe too small (need ≥ {MIN_TLOG_PROBE_BYTES} bytes "
|
|
f"to validate magic; got {len(probe_bytes)})"
|
|
)
|
|
magic = probe_bytes[8]
|
|
if magic not in (_MAVLINK_MAGIC_V2, _MAVLINK_MAGIC_V1):
|
|
raise UnsupportedFileKindError(
|
|
f"tlog magic byte 0x{magic:02X} at offset 8 is not "
|
|
f"MAVLink (expected 0x{_MAVLINK_MAGIC_V2:02X} or "
|
|
f"0x{_MAVLINK_MAGIC_V1:02X})"
|
|
)
|
|
|
|
|
|
def validate_video_kind(probe_bytes: bytes) -> None:
|
|
"""Reject anything that doesn't have an ``ftyp`` box at offset 4.
|
|
|
|
The size prefix at bytes 0-3 varies; the marker is the
|
|
discriminator. This catches the common "operator renamed
|
|
`.zip` to `.mp4`" attack — the AC-9 case.
|
|
"""
|
|
if len(probe_bytes) < MIN_VIDEO_PROBE_BYTES:
|
|
raise UnsupportedFileKindError(
|
|
f"video probe too small (need ≥ {MIN_VIDEO_PROBE_BYTES} "
|
|
f"bytes to validate ftyp; got {len(probe_bytes)})"
|
|
)
|
|
marker = probe_bytes[4:8]
|
|
if marker != _MP4_FTYP_MARKER:
|
|
raise UnsupportedFileKindError(
|
|
"video does not begin with an mp4 'ftyp' box at offset 4 "
|
|
f"(saw {marker!r})"
|
|
)
|
|
|
|
|
|
def validate_csv_kind(probe_bytes: bytes) -> None:
|
|
"""Reject anything that doesn't open with the AZ-896 CSV header.
|
|
|
|
The strict per-row schema lives in ``csv_ground_truth.py``; this
|
|
boundary check just confirms the first line looks like the AZ-896
|
|
header so we fail fast at the API before the subprocess hands the
|
|
error back through an opaque non-zero exit code.
|
|
"""
|
|
if len(probe_bytes) < 1:
|
|
raise UnsupportedFileKindError("csv upload is empty")
|
|
header_end = probe_bytes.find(b"\n")
|
|
header_bytes = probe_bytes if header_end < 0 else probe_bytes[:header_end]
|
|
try:
|
|
header = header_bytes.decode("utf-8").strip()
|
|
except UnicodeDecodeError as exc:
|
|
raise UnsupportedFileKindError(
|
|
"csv header is not valid UTF-8 (see "
|
|
f"{_CSV_FORMAT_DOC_PATH})"
|
|
) from exc
|
|
columns = {col.strip() for col in header.split(",")}
|
|
missing = [token for token in _CSV_REQUIRED_HEADER_TOKENS if token not in columns]
|
|
if missing:
|
|
raise UnsupportedFileKindError(
|
|
"csv header is missing required columns "
|
|
f"{missing} (see {_CSV_FORMAT_DOC_PATH})"
|
|
)
|
|
|
|
|
|
def validate_calibration_kind(probe_bytes: bytes) -> None:
|
|
"""Light JSON-shape check; the renderer is the strict validator."""
|
|
if not probe_bytes:
|
|
raise UnsupportedFileKindError("calibration upload is empty")
|
|
stripped = probe_bytes.lstrip()
|
|
if not stripped.startswith(b"{"):
|
|
raise UnsupportedFileKindError(
|
|
"calibration must be a JSON object (first non-whitespace "
|
|
"byte should be '{')"
|
|
)
|
|
|
|
|
|
def validate_upload_size(num_bytes: int, *, limit: int) -> None:
|
|
if num_bytes > limit:
|
|
raise PayloadTooLargeError(
|
|
f"upload size {num_bytes} exceeds REPLAY_API_MAX_UPLOAD_BYTES "
|
|
f"({limit})"
|
|
)
|
|
|
|
|
|
def expected_bearer_token() -> str | None:
|
|
"""Read the configured bearer token at request time.
|
|
|
|
Returning ``None`` means auth is disabled (
|
|
``REPLAY_API_AUTH_REQUIRED=false``); the caller is expected to
|
|
have logged the WARN once at service start.
|
|
"""
|
|
if not auth_required():
|
|
return None
|
|
token = os.environ.get("REPLAY_API_BEARER_TOKEN")
|
|
if not token:
|
|
raise UnauthorizedError(
|
|
"REPLAY_API_BEARER_TOKEN is not configured but auth is required"
|
|
)
|
|
return token
|
|
|
|
|
|
def auth_required() -> bool:
|
|
value = os.environ.get("REPLAY_API_AUTH_REQUIRED", "true").lower()
|
|
return value not in {"0", "false", "no", "off"}
|
|
|
|
|
|
def extract_bearer_token(header_value: str | None) -> str:
|
|
"""Parse ``Authorization: Bearer <token>`` strictly."""
|
|
if not header_value:
|
|
raise UnauthorizedError("missing Authorization header")
|
|
parts = header_value.split(" ", 1)
|
|
if len(parts) != 2 or parts[0].strip().lower() != "bearer":
|
|
raise UnauthorizedError(
|
|
"Authorization header must be 'Bearer <token>'"
|
|
)
|
|
token = parts[1].strip()
|
|
if not token:
|
|
raise UnauthorizedError("Authorization bearer token is empty")
|
|
return token
|
|
|
|
|
|
def _ensure_field(name: str, value: object) -> None:
|
|
if value is None:
|
|
raise MultipartMissingFieldError(f"missing multipart field: {name}")
|