"""AZ-701 — multipart upload + magic-byte validation + auth helpers. The functions here are deliberately framework-light: they take raw bytes / streams and return validated artefacts. ``app.py`` wires them into FastAPI dependencies; unit tests call them directly. """ from __future__ import annotations import logging import os from gps_denied_onboard.replay_api.errors import ( MultipartMissingFieldError, PayloadTooLargeError, UnauthorizedError, UnsupportedFileKindError, ) __all__ = [ "MIN_CSV_PROBE_BYTES", "MIN_TLOG_PROBE_BYTES", "MIN_VIDEO_PROBE_BYTES", "auth_required", "expected_bearer_token", "extract_bearer_token", "validate_calibration_kind", "validate_csv_kind", "validate_tlog_kind", "validate_upload_size", "validate_video_kind", ] _LOGGER = logging.getLogger("gps_denied_onboard.replay_api.handlers") # MAVLink magic bytes — pymavlink uses 0xFD for v2.0 and 0xFE for # v1.0. The Derkachi tlog is v2.0; we accept both because some # operators ship v1.0 captures from older autopilots. _MAVLINK_MAGIC_V2: int = 0xFD _MAVLINK_MAGIC_V1: int = 0xFE MIN_TLOG_PROBE_BYTES: int = 9 # mp4 boxes start with a 4-byte size, then 4 ASCII bytes for the # box type. The first box in every valid mp4 is ``ftyp`` (per # ISO/IEC 14496-12). ``"ftyp"`` lives at offset 4. _MP4_FTYP_MARKER: bytes = b"ftyp" MIN_VIDEO_PROBE_BYTES: int = 12 # CSV header line for the AZ-896 replay format is ~410 chars; probe # generously so we can read the full header regardless of OS line # endings or operator whitespace. The validator only checks the # headline column tokens; the parser in ``csv_ground_truth`` does # the strict per-row validation downstream. MIN_CSV_PROBE_BYTES: int = 512 _CSV_REQUIRED_HEADER_TOKENS: tuple[str, ...] = ( "timestamp(ms)", "Time", "SCALED_IMU2.xacc", "SCALED_IMU2.xgyro", "GLOBAL_POSITION_INT.lat", "GLOBAL_POSITION_INT.lon", ) _CSV_FORMAT_DOC_PATH: str = ( "_docs/02_document/contracts/replay/csv_replay_format.md" ) def validate_tlog_kind(probe_bytes: bytes) -> None: """Reject anything that doesn't open with a MAVLink magic byte. pymavlink's tlog format prefixes each record with an 8-byte big-endian microsecond timestamp followed by the raw MAVLink frame, which always starts with the magic byte. So byte 8 of any well-formed tlog is the MAVLink magic. """ if len(probe_bytes) < MIN_TLOG_PROBE_BYTES: raise UnsupportedFileKindError( f"tlog probe too small (need ≥ {MIN_TLOG_PROBE_BYTES} bytes " f"to validate magic; got {len(probe_bytes)})" ) magic = probe_bytes[8] if magic not in (_MAVLINK_MAGIC_V2, _MAVLINK_MAGIC_V1): raise UnsupportedFileKindError( f"tlog magic byte 0x{magic:02X} at offset 8 is not " f"MAVLink (expected 0x{_MAVLINK_MAGIC_V2:02X} or " f"0x{_MAVLINK_MAGIC_V1:02X})" ) def validate_video_kind(probe_bytes: bytes) -> None: """Reject anything that doesn't have an ``ftyp`` box at offset 4. The size prefix at bytes 0-3 varies; the marker is the discriminator. This catches the common "operator renamed `.zip` to `.mp4`" attack — the AC-9 case. """ if len(probe_bytes) < MIN_VIDEO_PROBE_BYTES: raise UnsupportedFileKindError( f"video probe too small (need ≥ {MIN_VIDEO_PROBE_BYTES} " f"bytes to validate ftyp; got {len(probe_bytes)})" ) marker = probe_bytes[4:8] if marker != _MP4_FTYP_MARKER: raise UnsupportedFileKindError( "video does not begin with an mp4 'ftyp' box at offset 4 " f"(saw {marker!r})" ) def validate_csv_kind(probe_bytes: bytes) -> None: """Reject anything that doesn't open with the AZ-896 CSV header. The strict per-row schema lives in ``csv_ground_truth.py``; this boundary check just confirms the first line looks like the AZ-896 header so we fail fast at the API before the subprocess hands the error back through an opaque non-zero exit code. """ if len(probe_bytes) < 1: raise UnsupportedFileKindError("csv upload is empty") header_end = probe_bytes.find(b"\n") header_bytes = probe_bytes if header_end < 0 else probe_bytes[:header_end] try: header = header_bytes.decode("utf-8").strip() except UnicodeDecodeError as exc: raise UnsupportedFileKindError( "csv header is not valid UTF-8 (see " f"{_CSV_FORMAT_DOC_PATH})" ) from exc columns = {col.strip() for col in header.split(",")} missing = [token for token in _CSV_REQUIRED_HEADER_TOKENS if token not in columns] if missing: raise UnsupportedFileKindError( "csv header is missing required columns " f"{missing} (see {_CSV_FORMAT_DOC_PATH})" ) def validate_calibration_kind(probe_bytes: bytes) -> None: """Light JSON-shape check; the renderer is the strict validator.""" if not probe_bytes: raise UnsupportedFileKindError("calibration upload is empty") stripped = probe_bytes.lstrip() if not stripped.startswith(b"{"): raise UnsupportedFileKindError( "calibration must be a JSON object (first non-whitespace " "byte should be '{')" ) def validate_upload_size(num_bytes: int, *, limit: int) -> None: if num_bytes > limit: raise PayloadTooLargeError( f"upload size {num_bytes} exceeds REPLAY_API_MAX_UPLOAD_BYTES " f"({limit})" ) def expected_bearer_token() -> str | None: """Read the configured bearer token at request time. Returning ``None`` means auth is disabled ( ``REPLAY_API_AUTH_REQUIRED=false``); the caller is expected to have logged the WARN once at service start. """ if not auth_required(): return None token = os.environ.get("REPLAY_API_BEARER_TOKEN") if not token: raise UnauthorizedError( "REPLAY_API_BEARER_TOKEN is not configured but auth is required" ) return token def auth_required() -> bool: value = os.environ.get("REPLAY_API_AUTH_REQUIRED", "true").lower() return value not in {"0", "false", "no", "off"} def extract_bearer_token(header_value: str | None) -> str: """Parse ``Authorization: Bearer `` strictly.""" if not header_value: raise UnauthorizedError("missing Authorization header") parts = header_value.split(" ", 1) if len(parts) != 2 or parts[0].strip().lower() != "bearer": raise UnauthorizedError( "Authorization header must be 'Bearer '" ) token = parts[1].strip() if not token: raise UnauthorizedError("Authorization bearer token is empty") return token def _ensure_field(name: str, value: object) -> None: if value is None: raise MultipartMissingFieldError(f"missing multipart field: {name}")