[AZ-701] HTTP replay API service (FastAPI + magic-byte upload validation)

New replay_api component: FastAPI service wrapping the offline gps-denied-replay pipeline. POST tlog+video (multipart) → either sync 200 with result/map/report URLs, or async 202 + job id with /jobs/{id} polling. Magic-byte validation, bearer auth, in-memory JobRegistry with concurrency + queue caps (429 on overflow). Helper accuracy_report.py promoted from tests/ to src/ because the API needs the Markdown report writer at runtime; all AZ-699 imports re-pointed. OpenAPI spec exported to docs. 18/18 unit tests pass (AC-1 sync, AC-2 async, AC-3 state machine, AC-5 auth, AC-6 health, AC-8 concurrency, AC-9 magic-byte). Full unit suite: 2251 pass, 86 skip, 1 pre-existing C12 cold-start flake (unchanged). mypy --strict clean on the new surface. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-21 10:31:13 +00:00 · 2026-05-20 17:30:26 +03:00
parent b66b68ff76
commit 7d53cef0cf
22 changed files with 2854 additions and 13 deletions
@@ -1,188 +0,0 @@
-"""AZ-699 Markdown accuracy-report writer (test helper).
-
-Renders a :class:`HorizontalErrorDistribution` (the production
-helper in ``gps_denied_onboard.helpers.gps_compare``) plus run
-context (calibration acquisition method, clip duration, fixture
-paths) into the canonical Markdown layout consumed by
-``_docs/06_metrics/real_flight_validation_{date}.md``.
-
-This module lives under ``tests/`` (NOT production) — the report
-is an artefact of running the AZ-699 e2e test. Promoting the
-writer to ``src/`` would invite production code to import a test
-helper, so the file ownership rule keeps it here.
-
-Style: every function is pure; the side effect (writing the file)
-is the caller's. Tests in ``tests/unit/test_az699_report_writer.py``
-exercise both the rendering and the threshold-gate verdict logic.
-"""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from pathlib import Path
-
-from gps_denied_onboard.helpers.gps_compare import HorizontalErrorDistribution
-
-__all__ = [
-    "AC3_GATE_PCT",
-    "AC3_GATE_THRESHOLD_M",
-    "ReportContext",
-    "format_failure_message",
-    "render_report",
-    "verdict_passes_ac3",
-]
-
-
-# AZ-696 epic AC-3 threshold + minimum-share gate. Keeping these
-# named constants here (rather than inlined into the test) so the
-# unit tests for the failure-message template can pin them.
-AC3_GATE_THRESHOLD_M: float = 100.0
-AC3_GATE_PCT: float = 80.0
-
-
-@dataclass(frozen=True)
-class ReportContext:
-    """Run context surfaced in the report header.
-
-    Attributes:
-        run_date_utc: ISO-8601 date (YYYY-MM-DD) at which the run
-            executed — drives the report filename.
-        tlog_path: Real tlog the runner consumed.
-        video_path: Video clip the runner consumed.
-        calibration_acquisition_method: Provenance of the camera
-            calibration (e.g. ``"factory-sheet"`` for AZ-702 or
-            ``"placeholder"`` for the adti26 fallback). Surfaced in
-            the failure message per AZ-699 AC-3.
-        clip_duration_s: Duration of the analysed clip in seconds.
-        emissions_count: Total estimator-output records consumed
-            from the JSONL (may differ from
-            ``distribution.count`` when some emissions land
-            outside the GT window).
-    """
-
-    run_date_utc: str
-    tlog_path: Path
-    video_path: Path
-    calibration_acquisition_method: str
-    clip_duration_s: float
-    emissions_count: int
-
-
-def verdict_passes_ac3(distribution: HorizontalErrorDistribution) -> bool:
-    """Return ``True`` when the run meets AZ-696 epic AC-3."""
-    if distribution.count == 0:
-        return False
-    share = distribution.threshold_hit_share.get(AC3_GATE_THRESHOLD_M)
-    if share is None:
-        return False
-    return share * 100.0 >= AC3_GATE_PCT
-
-
-def format_failure_message(
-    distribution: HorizontalErrorDistribution,
-    context: ReportContext,
-) -> str:
-    """Build the honest failure message for AZ-699 AC-3.
-
-    The message references the calibration acquisition method
-    (factory-sheet for AZ-702 or placeholder otherwise) and the
-    measured residual budget, so the operator can attribute a
-    failure to its likely root cause (calibration uncertainty,
-    drift, anchor scarcity) without re-reading the source.
-    """
-    share = distribution.threshold_hit_share.get(AC3_GATE_THRESHOLD_M, 0.0)
-    pct = share * 100.0
-    return (
-        f"AZ-699 AC-3: only {pct:.1f} % of {distribution.count} "
-        f"emissions within {AC3_GATE_THRESHOLD_M:.0f} m of ground "
-        f"truth; epic threshold is {AC3_GATE_PCT:.0f} %. "
-        f"Residual: mean={distribution.horizontal_error_mean_m:.1f} m, "
-        f"p50={distribution.horizontal_error_p50_m:.1f} m, "
-        f"p95={distribution.horizontal_error_p95_m:.1f} m, "
-        f"p99={distribution.horizontal_error_p99_m:.1f} m. "
-        f"Calibration: {context.calibration_acquisition_method}. "
-        "See _docs/06_metrics/real_flight_validation_"
-        f"{context.run_date_utc}.md for the full distribution."
-    )
-
-
-def render_report(
-    distribution: HorizontalErrorDistribution,
-    context: ReportContext,
-    *,
-    passed: bool,
-) -> str:
-    """Render the full Markdown report body.
-
-    The output layout (header + horizontal-error stats + threshold
-    table + vertical-error stats + verdict) is the schema referenced
-    by ``_docs/02_document/tests/blackbox-tests.md``.
-    """
-    verdict = "PASS" if passed else "FAIL"
-    horiz_rows = [
-        ("Mean", distribution.horizontal_error_mean_m),
-        ("p50", distribution.horizontal_error_p50_m),
-        ("p95", distribution.horizontal_error_p95_m),
-        ("p99", distribution.horizontal_error_p99_m),
-    ]
-    threshold_rows = [
-        (t, share)
-        for t, share in sorted(distribution.threshold_hit_share.items())
-    ]
-
-    lines: list[str] = []
-    lines.append(f"# Real-flight validation — {context.run_date_utc}")
-    lines.append("")
-    lines.append(f"**Verdict**: {verdict} (AC-3 gate: "
-                 f"≥ {AC3_GATE_PCT:.0f} % within "
-                 f"{AC3_GATE_THRESHOLD_M:.0f} m)")
-    lines.append("")
-    lines.append("## Run context")
-    lines.append("")
-    lines.append(f"- Tlog: `{context.tlog_path}`")
-    lines.append(f"- Video: `{context.video_path}`")
-    lines.append(
-        f"- Calibration acquisition method: {context.calibration_acquisition_method}"
-    )
-    lines.append(f"- Clip duration: {context.clip_duration_s:.1f} s")
-    lines.append(f"- Emissions consumed: {context.emissions_count}")
-    lines.append(f"- Ground-truth pairings: {distribution.count}")
-    lines.append("")
-    lines.append("## Horizontal error (metres)")
-    lines.append("")
-    lines.append("| Statistic | Value |")
-    lines.append("| --------- | ----- |")
-    for name, value in horiz_rows:
-        lines.append(f"| {name} | {value:.2f} |")
-    lines.append("")
-    lines.append("## Threshold-hit share")
-    lines.append("")
-    lines.append("| Threshold (m) | Hit share (%) |")
-    lines.append("| ------------- | ------------- |")
-    for threshold, share in threshold_rows:
-        lines.append(f"| {threshold:g} | {share * 100.0:.1f} |")
-    lines.append("")
-    if distribution.vertical_count > 0:
-        lines.append("## Vertical error (metres)")
-        lines.append("")
-        lines.append("| Statistic | Value |")
-        lines.append("| --------- | ----- |")
-        lines.append(
-            f"| Mean | {distribution.vertical_error_mean_m:.2f} |"
-        )
-        lines.append(
-            f"| p50 | {distribution.vertical_error_p50_m:.2f} |"
-        )
-        lines.append(
-            f"| p95 | {distribution.vertical_error_p95_m:.2f} |"
-        )
-        lines.append(
-            f"| Samples | {distribution.vertical_count} |"
-        )
-        lines.append("")
-    else:
-        lines.append("## Vertical error")
-        lines.append("")
-        lines.append("_No emissions carried a comparable altitude — vertical stats skipped._")
-        lines.append("")
-    return "\n".join(lines) + "\n"
@@ -46,7 +46,7 @@ from gps_denied_onboard.helpers.gps_compare import (
    horizontal_error_distribution,
 )
 from gps_denied_onboard.replay_input import load_tlog_ground_truth
-from tests.e2e.replay._report_writer import (
+from gps_denied_onboard.helpers.accuracy_report import (
    AC3_GATE_PCT,
    AC3_GATE_THRESHOLD_M,
    ReportContext,
@@ -0,0 +1,663 @@
+"""AZ-701 — replay_api unit tests.
+
+Covers the AC matrix without invoking the real `gps-denied-replay`
+subprocess. A fake `ReplayRunner` writes deterministic emissions
+into the per-job output dir; everything downstream (job state,
+HTTP handlers, magic-byte validation, auth, concurrency) is then
+exercised against real FastAPI routing via `httpx.AsyncClient`.
+
+FastAPI / uvicorn / python-multipart are operator-only deps —
+the whole module skips cleanly when any is missing.
+
+Style: every test follows Arrange / Act / Assert.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import threading
+import time
+from collections.abc import Iterator
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+fastapi = pytest.importorskip(
+    "fastapi",
+    reason="FastAPI is an operator-only dep; install gps-denied-onboard[operator-tools]",
+)
+pytest.importorskip("httpx", reason="httpx required for the FastAPI TestClient")
+pytest.importorskip("multipart", reason="python-multipart required by FastAPI")
+
+from fastapi.testclient import TestClient
+
+from gps_denied_onboard.replay_api import (
+    JobState,
+    create_app,
+)
+from gps_denied_onboard.replay_api.handlers import (
+    validate_tlog_kind,
+    validate_video_kind,
+)
+from gps_denied_onboard.replay_api.interface import (
+    ReplayInputs,
+    ReplayJobResult,
+)
+from gps_denied_onboard.replay_api.jobs import JobRegistry
+from gps_denied_onboard.replay_api.storage import StorageRoot
+
+
+# ---------------------------------------------------------------------
+# Fixtures + fakes
+
+
+class _FakeRunner:
+    """Deterministic runner that writes a single emissions row."""
+
+    def __init__(self, *, delay_s: float = 0.0, fail: bool = False) -> None:
+        self.delay_s = delay_s
+        self.fail = fail
+        self.calls: list[ReplayInputs] = []
+
+    def run(self, inputs: ReplayInputs, *, output_dir: Path) -> ReplayJobResult:
+        self.calls.append(inputs)
+        if self.delay_s:
+            time.sleep(self.delay_s)
+        if self.fail:
+            raise RuntimeError("fake runner forced failure")
+        emissions = output_dir / "emissions.jsonl"
+        emissions.write_text(
+            json.dumps(
+                {
+                    "frame_id": 0,
+                    "position_wgs84": {
+                        "lat_deg": 50.0,
+                        "lon_deg": 30.0,
+                        "alt_m": 100.0,
+                    },
+                    "emitted_at": 0,
+                }
+            )
+            + "\n"
+        )
+        report = output_dir / "accuracy_report.md"
+        report.write_text("# Fake report\n\n**Verdict**: PASS\n")
+        map_html = output_dir / "map.html"
+        map_html.write_text("<!DOCTYPE html><html><body>fake map</body></html>")
+        return ReplayJobResult(
+            emissions_jsonl_path=emissions,
+            accuracy_report_md_path=report,
+            map_html_path=map_html,
+        )
+
+
+def _valid_tlog_bytes() -> bytes:
+    """First 8 bytes are a microsecond timestamp; byte 8 = MAVLink magic."""
+    return b"\x00\x00\x00\x00\x00\x00\x00\x00\xfd" + b"\x00" * 32
+
+
+def _valid_mp4_bytes() -> bytes:
+    """ISO mp4: any size prefix + 'ftyp' marker at offset 4."""
+    return b"\x00\x00\x00\x20ftypisom\x00\x00\x02\x00mp42" + b"\x00" * 16
+
+
+def _valid_calibration_bytes() -> bytes:
+    return b'{"focal_length": 1, "acquisition_method": "factory-sheet"}'
+
+
+@pytest.fixture(autouse=True)
+def _disable_auth_by_default(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
+    monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "false")
+    monkeypatch.delenv("REPLAY_API_BEARER_TOKEN", raising=False)
+    yield
+
+
+@pytest.fixture
+def storage(tmp_path: Path) -> StorageRoot:
+    return StorageRoot(tmp_path / "replay_api")
+
+
+@pytest.fixture
+def fake_runner() -> _FakeRunner:
+    return _FakeRunner()
+
+
+@pytest.fixture
+def make_app(
+    storage: StorageRoot,
+) -> Any:
+    def _factory(
+        runner: Any,
+        *,
+        max_concurrent: int = 1,
+        max_queued: int = 8,
+        sync_max_bytes: int = 10_000_000,
+    ) -> Any:
+        registry = JobRegistry(
+            runner=runner,
+            storage=storage,
+            max_concurrent=max_concurrent,
+            max_queued=max_queued,
+        )
+        return create_app(
+            runner=runner,
+            storage=storage,
+            registry=registry,
+            sync_max_bytes=sync_max_bytes,
+        )
+
+    return _factory
+
+
+# ---------------------------------------------------------------------
+# Magic-byte validation (AC-9)
+
+
+def test_validate_tlog_kind_accepts_mavlink_v2_magic() -> None:
+    # Act / Assert — must not raise
+    validate_tlog_kind(_valid_tlog_bytes())
+
+
+def test_validate_tlog_kind_rejects_zip_renamed_to_tlog() -> None:
+    # Arrange — ZIP magic bytes at offset 0; pre-bytes 0..7 are
+    # the (forged) timestamp; byte 8 holds the (non-MAVLink) magic.
+    bogus = b"\x00\x00\x00\x00\x00\x00\x00\x00PK\x03\x04rest_of_zip_header"
+
+    # Act / Assert
+    with pytest.raises(Exception) as exc:
+        validate_tlog_kind(bogus)
+    assert "MAVLink" in str(exc.value)
+
+
+def test_validate_video_kind_accepts_mp4_ftyp() -> None:
+    validate_video_kind(_valid_mp4_bytes())
+
+
+def test_validate_video_kind_rejects_arbitrary_bytes() -> None:
+    with pytest.raises(Exception) as exc:
+        validate_video_kind(b"\x00" * 64)
+    assert "ftyp" in str(exc.value)
+
+
+# ---------------------------------------------------------------------
+# AC-1 — sync POST → 200 + JSONL
+
+
+def test_post_replay_sync_returns_200_with_result_urls(
+    fake_runner: _FakeRunner,
+    make_app: Any,
+) -> None:
+    # Arrange
+    app = make_app(fake_runner)
+    client = TestClient(app)
+
+    # Act
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("derkachi.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("derkachi.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": (
+                "khp20s30.json",
+                _valid_calibration_bytes(),
+                "application/json",
+            ),
+        },
+        data={"pace": "asap"},
+    )
+
+    # Assert
+    assert response.status_code == 200, response.text
+    body = response.json()
+    assert body["state"] == JobState.DONE.value
+    assert body["sync"] is True
+    assert body["emissions_jsonl_url"].endswith("/result")
+    assert body["map_html_url"].endswith("/map")
+    assert body["accuracy_report_md_url"].endswith("/report")
+    # Runner saw exactly one job with the expected pace + auto-trim default.
+    assert len(fake_runner.calls) == 1
+    assert fake_runner.calls[0].pace == "asap"
+    assert fake_runner.calls[0].auto_trim is True
+
+
+def test_post_replay_serves_jsonl_and_map_for_done_job(
+    fake_runner: _FakeRunner,
+    make_app: Any,
+) -> None:
+    # Arrange
+    app = make_app(fake_runner)
+    client = TestClient(app)
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("derkachi.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("derkachi.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    )
+    body = response.json()
+    job_id = body["job_id"]
+
+    # Act
+    jsonl_resp = client.get(f"/jobs/{job_id}/result")
+    map_resp = client.get(f"/jobs/{job_id}/map")
+    report_resp = client.get(f"/jobs/{job_id}/report")
+
+    # Assert
+    assert jsonl_resp.status_code == 200
+    assert "lat_deg" in jsonl_resp.text
+    assert map_resp.status_code == 200
+    assert "fake map" in map_resp.text
+    assert report_resp.status_code == 200
+    assert "**Verdict**: PASS" in report_resp.text
+
+
+# ---------------------------------------------------------------------
+# AC-2 — async POST → 202 + job id
+
+
+def test_post_replay_async_returns_202_when_video_exceeds_sync_bytes(
+    storage: StorageRoot,
+) -> None:
+    # Arrange — runner sleeps so we observe the queued/running state.
+    runner = _FakeRunner(delay_s=0.2)
+    registry = JobRegistry(runner=runner, storage=storage, max_concurrent=1)
+    app = create_app(
+        runner=runner,
+        storage=storage,
+        registry=registry,
+        sync_max_bytes=10,  # any non-trivial video exceeds this
+    )
+    client = TestClient(app)
+
+    # Act
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": (
+                "k.json",
+                _valid_calibration_bytes(),
+                "application/json",
+            ),
+        },
+    )
+
+    # Assert
+    assert response.status_code == 202, response.text
+    body = response.json()
+    assert body["state"] in {JobState.QUEUED.value, JobState.RUNNING.value}
+    assert "Location" in response.headers
+    assert response.headers["Location"] == f"/jobs/{body['job_id']}"
+    _wait_done(client, body["job_id"])
+
+
+# ---------------------------------------------------------------------
+# AC-3 — job state transitions queued → running → done
+
+
+def test_job_state_transitions_observable_via_polling(
+    storage: StorageRoot,
+) -> None:
+    # Arrange
+    runner = _FakeRunner(delay_s=0.3)
+    registry = JobRegistry(runner=runner, storage=storage, max_concurrent=1)
+    app = create_app(
+        runner=runner,
+        storage=storage,
+        registry=registry,
+        sync_max_bytes=10,
+    )
+    client = TestClient(app)
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    )
+    job_id = response.json()["job_id"]
+
+    # Act + Assert — poll until done; record the unique states seen.
+    seen: set[str] = set()
+    deadline = time.monotonic() + 10.0
+    while time.monotonic() < deadline:
+        snap = client.get(f"/jobs/{job_id}").json()
+        seen.add(snap["state"])
+        if snap["state"] == JobState.DONE.value:
+            break
+        time.sleep(0.05)
+    assert JobState.DONE.value in seen
+    # We expect to have seen at least one of queued/running before done.
+    assert seen & {JobState.QUEUED.value, JobState.RUNNING.value}
+
+
+def test_failed_runner_marks_job_failed(
+    storage: StorageRoot,
+) -> None:
+    # Arrange
+    runner = _FakeRunner(fail=True)
+    registry = JobRegistry(runner=runner, storage=storage)
+    app = create_app(
+        runner=runner, storage=storage, registry=registry, sync_max_bytes=10
+    )
+    client = TestClient(app)
+
+    # Act
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    )
+    job_id = response.json()["job_id"]
+    snap = _wait_terminal(client, job_id)
+
+    # Assert
+    assert snap["state"] == JobState.FAILED.value
+    assert "fake runner forced failure" in (snap["error"] or "")
+
+
+# ---------------------------------------------------------------------
+# AC-4 — result + map served from job id (covered above)
+
+
+def test_result_endpoints_409_when_job_not_done(
+    storage: StorageRoot,
+) -> None:
+    # Arrange — slow runner so job stays running long enough to probe.
+    runner = _FakeRunner(delay_s=0.5)
+    registry = JobRegistry(runner=runner, storage=storage)
+    app = create_app(
+        runner=runner, storage=storage, registry=registry, sync_max_bytes=10
+    )
+    client = TestClient(app)
+    job_id = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    ).json()["job_id"]
+
+    # Act — race the runner; we want to hit the not-done branch.
+    res = client.get(f"/jobs/{job_id}/result")
+
+    # Assert
+    if res.status_code == 200:
+        pytest.skip("runner finished before we could probe the 409 path")
+    assert res.status_code == 409
+    body = res.json()
+    assert body["error_code"] == "job_not_complete"
+    _wait_done(client, job_id)
+
+
+# ---------------------------------------------------------------------
+# AC-5 — auth enforced when configured
+
+
+def test_post_replay_returns_401_without_bearer_when_required(
+    monkeypatch: pytest.MonkeyPatch,
+    storage: StorageRoot,
+    fake_runner: _FakeRunner,
+) -> None:
+    # Arrange
+    monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "true")
+    monkeypatch.setenv("REPLAY_API_BEARER_TOKEN", "shibboleth")
+    registry = JobRegistry(runner=fake_runner, storage=storage)
+    app = create_app(
+        runner=fake_runner,
+        storage=storage,
+        registry=registry,
+        sync_max_bytes=10_000_000,
+    )
+    client = TestClient(app)
+
+    # Act
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    )
+
+    # Assert
+    assert response.status_code == 401
+    assert response.json()["error_code"] == "unauthorized"
+
+
+def test_post_replay_accepts_correct_bearer(
+    monkeypatch: pytest.MonkeyPatch,
+    storage: StorageRoot,
+    fake_runner: _FakeRunner,
+) -> None:
+    # Arrange
+    monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "true")
+    monkeypatch.setenv("REPLAY_API_BEARER_TOKEN", "shibboleth")
+    registry = JobRegistry(runner=fake_runner, storage=storage)
+    app = create_app(
+        runner=fake_runner,
+        storage=storage,
+        registry=registry,
+        sync_max_bytes=10_000_000,
+    )
+    client = TestClient(app)
+
+    # Act
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+        headers={"Authorization": "Bearer shibboleth"},
+    )
+
+    # Assert
+    assert response.status_code == 200, response.text
+
+
+# ---------------------------------------------------------------------
+# AC-6 — health endpoints
+
+
+def test_healthz_always_returns_200(fake_runner: _FakeRunner, make_app: Any) -> None:
+    # Arrange
+    client = TestClient(make_app(fake_runner))
+
+    # Act / Assert
+    assert client.get("/healthz").status_code == 200
+
+
+def test_readyz_returns_503_when_binary_missing(
+    monkeypatch: pytest.MonkeyPatch,
+    fake_runner: _FakeRunner,
+    make_app: Any,
+) -> None:
+    # Arrange — point readyz at a binary we know doesn't exist.
+    monkeypatch.setenv("REPLAY_API_REPLAY_BINARY", "definitely-not-a-binary-az701")
+    client = TestClient(make_app(fake_runner))
+
+    # Act
+    response = client.get("/readyz")
+
+    # Assert
+    assert response.status_code == 503
+    assert "not on PATH" in response.json()["reason"]
+
+
+# ---------------------------------------------------------------------
+# AC-8 — concurrency limit enforced
+
+
+def test_concurrency_limit_queues_excess_jobs(storage: StorageRoot) -> None:
+    # Arrange
+    runner = _FakeRunner(delay_s=0.5)
+    registry = JobRegistry(
+        runner=runner, storage=storage, max_concurrent=1, max_queued=8
+    )
+    app = create_app(
+        runner=runner, storage=storage, registry=registry, sync_max_bytes=10
+    )
+    client = TestClient(app)
+    job_ids: list[str] = []
+
+    # Act — submit 3 in quick succession; sync_max_bytes=10 forces async mode.
+    for _ in range(3):
+        resp = client.post(
+            "/replay",
+            files={
+                "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+                "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+                "calibration": (
+                    "k.json",
+                    _valid_calibration_bytes(),
+                    "application/json",
+                ),
+            },
+        )
+        assert resp.status_code == 202, resp.text
+        job_ids.append(resp.json()["job_id"])
+
+    # Sample states quickly — at this instant we expect 1 running and ≥ 1 queued.
+    states = [
+        client.get(f"/jobs/{jid}").json()["state"] for jid in job_ids
+    ]
+    assert states.count(JobState.RUNNING.value) <= 1, (
+        f"more than one running at once: {states}"
+    )
+    assert (
+        states.count(JobState.QUEUED.value) >= 1
+        or states.count(JobState.DONE.value) >= 2
+    ), f"no queued state observed; states={states}"
+
+    # Wait for everything to finish so the test exits cleanly.
+    for jid in job_ids:
+        _wait_done(client, jid)
+
+
+def test_queue_full_returns_429(storage: StorageRoot) -> None:
+    # Arrange — max_queued=0 forces the 429 path on the second submit.
+    runner = _FakeRunner(delay_s=0.5)
+    registry = JobRegistry(
+        runner=runner, storage=storage, max_concurrent=1, max_queued=0
+    )
+    app = create_app(
+        runner=runner, storage=storage, registry=registry, sync_max_bytes=10
+    )
+    client = TestClient(app)
+
+    # Act
+    first = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    )
+    second = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    )
+
+    # Assert
+    assert first.status_code == 202
+    assert second.status_code == 429
+    assert second.json()["error_code"] == "concurrency_limit_reached"
+    _wait_done(client, first.json()["job_id"])
+
+
+# ---------------------------------------------------------------------
+# AC-9 — magic-byte upload validation (HTTP path)
+
+
+def test_post_replay_rejects_misnamed_zip_as_tlog(
+    fake_runner: _FakeRunner, make_app: Any
+) -> None:
+    # Arrange
+    bogus_tlog = b"\x00\x00\x00\x00\x00\x00\x00\x00PK\x03\x04bogus"
+    client = TestClient(make_app(fake_runner))
+
+    # Act
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", bogus_tlog, "application/octet-stream"),
+            "video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    )
+
+    # Assert
+    assert response.status_code == 400
+    assert response.json()["error_code"] == "unsupported_file_kind"
+
+
+def test_post_replay_rejects_misnamed_zip_as_video(
+    fake_runner: _FakeRunner, make_app: Any
+) -> None:
+    # Arrange
+    bogus_video = b"\x00\x00\x00\x20notftyp..." + b"\x00" * 64
+    client = TestClient(make_app(fake_runner))
+
+    # Act
+    response = client.post(
+        "/replay",
+        files={
+            "tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
+            "video": ("d.mp4", bogus_video, "video/mp4"),
+            "calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
+        },
+    )
+
+    # Assert
+    assert response.status_code == 400
+    assert response.json()["error_code"] == "unsupported_file_kind"
+
+
+# ---------------------------------------------------------------------
+# Helpers
+
+
+def _wait_done(client: TestClient, job_id: str, timeout_s: float = 10.0) -> None:
+    """Block until ``job_id`` is in state ``done``."""
+    deadline = time.monotonic() + timeout_s
+    while time.monotonic() < deadline:
+        snap = client.get(f"/jobs/{job_id}").json()
+        if snap["state"] == JobState.DONE.value:
+            return
+        if snap["state"] == JobState.FAILED.value:
+            raise AssertionError(f"job {job_id} unexpectedly failed: {snap}")
+        time.sleep(0.05)
+    raise AssertionError(f"job {job_id} did not reach DONE within {timeout_s}s")
+
+
+def _wait_terminal(
+    client: TestClient, job_id: str, timeout_s: float = 10.0
+) -> dict[str, Any]:
+    deadline = time.monotonic() + timeout_s
+    while time.monotonic() < deadline:
+        snap = client.get(f"/jobs/{job_id}").json()
+        if snap["state"] in {JobState.DONE.value, JobState.FAILED.value}:
+            return snap
+        time.sleep(0.05)
+    raise AssertionError(f"job {job_id} did not reach terminal state")
+
+
+# Suppress unused-imports warnings for symbols only the test harness uses.
+_ = (os, threading, fastapi)
@@ -20,7 +20,7 @@ from gps_denied_onboard.helpers.gps_compare import (
    horizontal_error_distribution,
    percentile_sorted,
 )
-from tests.e2e.replay._report_writer import (
+from gps_denied_onboard.helpers.accuracy_report import (
    AC3_GATE_PCT,
    AC3_GATE_THRESHOLD_M,
    ReportContext,