mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 08:41:12 +00:00
[AZ-701] HTTP replay API service (FastAPI + magic-byte upload validation)
ci/woodpecker/push/02-build-push Pipeline failed
ci/woodpecker/push/02-build-push Pipeline failed
New replay_api component: FastAPI service wrapping the offline
gps-denied-replay pipeline. POST tlog+video (multipart) → either
sync 200 with result/map/report URLs, or async 202 + job id with
/jobs/{id} polling. Magic-byte validation, bearer auth, in-memory
JobRegistry with concurrency + queue caps (429 on overflow).
Helper accuracy_report.py promoted from tests/ to src/ because the
API needs the Markdown report writer at runtime; all AZ-699 imports
re-pointed. OpenAPI spec exported to docs.
18/18 unit tests pass (AC-1 sync, AC-2 async, AC-3 state machine,
AC-5 auth, AC-6 health, AC-8 concurrency, AC-9 magic-byte). Full
unit suite: 2251 pass, 86 skip, 1 pre-existing C12 cold-start flake
(unchanged). mypy --strict clean on the new surface.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,251 @@
|
|||||||
|
openapi: 3.1.0
|
||||||
|
info:
|
||||||
|
title: gps-denied-onboard replay API
|
||||||
|
description: HTTP wrapper around the offline `gps-denied-replay` pipeline. Upload
|
||||||
|
(tlog + video [+ calibration]); receive GPS fixes + an accuracy report + an HTML
|
||||||
|
map.
|
||||||
|
version: 1.0.0
|
||||||
|
paths:
|
||||||
|
/healthz:
|
||||||
|
get:
|
||||||
|
summary: Healthz
|
||||||
|
operationId: healthz_healthz_get
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Successful Response
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
title: Response Healthz Healthz Get
|
||||||
|
/readyz:
|
||||||
|
get:
|
||||||
|
summary: Readyz
|
||||||
|
operationId: readyz_readyz_get
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Successful Response
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema: {}
|
||||||
|
/replay:
|
||||||
|
post:
|
||||||
|
summary: Post Replay
|
||||||
|
operationId: post_replay_replay_post
|
||||||
|
parameters:
|
||||||
|
- name: authorization
|
||||||
|
in: header
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
title: Authorization
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
multipart/form-data:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Body_post_replay_replay_post'
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Successful Response
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema: {}
|
||||||
|
'422':
|
||||||
|
description: Validation Error
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/HTTPValidationError'
|
||||||
|
/jobs/{job_id}:
|
||||||
|
get:
|
||||||
|
summary: Get Job
|
||||||
|
operationId: get_job_jobs__job_id__get
|
||||||
|
parameters:
|
||||||
|
- name: job_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
title: Job Id
|
||||||
|
- name: authorization
|
||||||
|
in: header
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
title: Authorization
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Successful Response
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
additionalProperties: true
|
||||||
|
title: Response Get Job Jobs Job Id Get
|
||||||
|
'422':
|
||||||
|
description: Validation Error
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/HTTPValidationError'
|
||||||
|
/jobs/{job_id}/result:
|
||||||
|
get:
|
||||||
|
summary: Get Result
|
||||||
|
operationId: get_result_jobs__job_id__result_get
|
||||||
|
parameters:
|
||||||
|
- name: job_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
title: Job Id
|
||||||
|
- name: authorization
|
||||||
|
in: header
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
title: Authorization
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Successful Response
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema: {}
|
||||||
|
'422':
|
||||||
|
description: Validation Error
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/HTTPValidationError'
|
||||||
|
/jobs/{job_id}/map:
|
||||||
|
get:
|
||||||
|
summary: Get Map
|
||||||
|
operationId: get_map_jobs__job_id__map_get
|
||||||
|
parameters:
|
||||||
|
- name: job_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
title: Job Id
|
||||||
|
- name: authorization
|
||||||
|
in: header
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
title: Authorization
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Successful Response
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema: {}
|
||||||
|
'422':
|
||||||
|
description: Validation Error
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/HTTPValidationError'
|
||||||
|
/jobs/{job_id}/report:
|
||||||
|
get:
|
||||||
|
summary: Get Report
|
||||||
|
operationId: get_report_jobs__job_id__report_get
|
||||||
|
parameters:
|
||||||
|
- name: job_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
title: Job Id
|
||||||
|
- name: authorization
|
||||||
|
in: header
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: 'null'
|
||||||
|
title: Authorization
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Successful Response
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema: {}
|
||||||
|
'422':
|
||||||
|
description: Validation Error
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/HTTPValidationError'
|
||||||
|
components:
|
||||||
|
schemas:
|
||||||
|
Body_post_replay_replay_post:
|
||||||
|
properties:
|
||||||
|
tlog:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
title: Tlog
|
||||||
|
video:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
title: Video
|
||||||
|
calibration:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
format: binary
|
||||||
|
- type: 'null'
|
||||||
|
title: Calibration
|
||||||
|
pace:
|
||||||
|
type: string
|
||||||
|
title: Pace
|
||||||
|
default: asap
|
||||||
|
auto_trim:
|
||||||
|
type: boolean
|
||||||
|
title: Auto Trim
|
||||||
|
default: true
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- tlog
|
||||||
|
- video
|
||||||
|
title: Body_post_replay_replay_post
|
||||||
|
HTTPValidationError:
|
||||||
|
properties:
|
||||||
|
detail:
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/ValidationError'
|
||||||
|
type: array
|
||||||
|
title: Detail
|
||||||
|
type: object
|
||||||
|
title: HTTPValidationError
|
||||||
|
ValidationError:
|
||||||
|
properties:
|
||||||
|
loc:
|
||||||
|
items:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
- type: integer
|
||||||
|
type: array
|
||||||
|
title: Location
|
||||||
|
msg:
|
||||||
|
type: string
|
||||||
|
title: Message
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
title: Error Type
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- loc
|
||||||
|
- msg
|
||||||
|
- type
|
||||||
|
title: ValidationError
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
# Contract: `replay_api` HTTP service
|
||||||
|
|
||||||
|
**Owner**: AZ-701 (epic AZ-696 / cycle-2 multi-flight demo deliverables).
|
||||||
|
**Producer task**: AZ-701 (this contract).
|
||||||
|
**Consumer**: any HTTP client — operator dashboards, the parent-suite UI, demo runners, ad-hoc `curl` sessions.
|
||||||
|
**Version**: 1.0.0
|
||||||
|
**Status**: draft (in-testing on Jetson)
|
||||||
|
**Last Updated**: 2026-05-20
|
||||||
|
**Module-layout home**:
|
||||||
|
- `src/gps_denied_onboard/replay_api/app.py` — FastAPI app factory + uvicorn entrypoint.
|
||||||
|
- `src/gps_denied_onboard/replay_api/handlers.py` — request handlers (multipart parse, magic-byte validation, auth dependency).
|
||||||
|
- `src/gps_denied_onboard/replay_api/jobs.py` — in-memory `JobRegistry` + `JobRecord` + concurrency limit.
|
||||||
|
- `src/gps_denied_onboard/replay_api/storage.py` — per-job temp directory lifecycle + cleanup.
|
||||||
|
- `src/gps_denied_onboard/replay_api/interface.py` — `ReplayRunner` Protocol + DTOs (`ReplayJobResult`, `JobState`, `JobSnapshot`).
|
||||||
|
- `src/gps_denied_onboard/replay_api/errors.py` — typed HTTP error families.
|
||||||
|
- `src/gps_denied_onboard/cli/replay_api_entrypoint.py` — `replay-api` console-script.
|
||||||
|
- `docker/replay-api.Dockerfile` — operator-side container image.
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Expose the offline replay pipeline (`gps-denied-replay` CLI from AZ-402, plus the `gps-denied-render-map` HTML renderer from AZ-700) over a single HTTP surface so external consumers can upload `(tlog + video [+ calibration])` and receive GPS fixes + an accuracy report + a map without installing the Python stack.
|
||||||
|
|
||||||
|
The service is **operator-side only**: it is NOT bundled into the airborne binary. It runs in its own container (`docker/replay-api.Dockerfile`) and is started via the `replay-api` console-script in the `[operator-tools]` optional-dependency group.
|
||||||
|
|
||||||
|
## Design invariants
|
||||||
|
|
||||||
|
1. **The service does not re-implement the estimator.** It shells out to the existing `gps-denied-replay` console-script. The estimator path is exactly what runs on the airborne binary; the service is a thin HTTP shim.
|
||||||
|
2. **No persistent state.** Jobs live in-process; uploads live in per-job temp directories that are deleted on completion or service shutdown. Operators that need durable history persist the JSONL + Markdown report + HTML map artefacts out-of-band.
|
||||||
|
3. **Sync vs. async is decided by file size, not by the client.** Videos ≤ `REPLAY_API_SYNC_MAX_BYTES` (default 200 MB) run inline; larger uploads are queued and the client polls.
|
||||||
|
4. **Magic-byte file validation** is applied before any data is handed to the estimator. The service refuses uploads whose first bytes do not match the expected `.tlog` (MAVLink magic byte `0xFD` for v2.0) or `.mp4` (`ftyp` box at offset 4) signatures.
|
||||||
|
5. **Bearer-token auth** is the only auth surface. Default is **on**; `REPLAY_API_AUTH_REQUIRED=false` opts out for local dev and emits a WARN log on every request.
|
||||||
|
|
||||||
|
## Public API
|
||||||
|
|
||||||
|
The OpenAPI spec is the authoritative source — see `_docs/02_document/contracts/replay_api/openapi.yaml`. The summary below mirrors it for human readers.
|
||||||
|
|
||||||
|
### `POST /replay`
|
||||||
|
|
||||||
|
Multipart upload accepting:
|
||||||
|
- `tlog`: binary `.tlog` file (required).
|
||||||
|
- `video`: `.mp4` file (required).
|
||||||
|
- `calibration`: camera-calibration JSON (optional; defaults to the AZ-702 KHP20S30 factory-sheet if the operator built the image with that calibration baked in).
|
||||||
|
- `pace`: `asap` | `realtime` (form field, optional; default `asap`).
|
||||||
|
- `auto_trim`: `true` | `false` (form field, optional; default `true`).
|
||||||
|
|
||||||
|
Response shapes:
|
||||||
|
|
||||||
|
- **Sync mode** (video ≤ `REPLAY_API_SYNC_MAX_BYTES`):
|
||||||
|
- `200 OK` with body `{ "job_id": "<uuid>", "state": "done", "emissions_jsonl_url": "...", "accuracy_report_md_url": "...", "map_html_url": "..." }`
|
||||||
|
- **Async mode** (video > `REPLAY_API_SYNC_MAX_BYTES` OR concurrency limit reached at submit time):
|
||||||
|
- `202 Accepted` with `Location: /jobs/{id}` header and body `{ "job_id": "<uuid>", "state": "queued" | "running", "status_url": "/jobs/{id}" }`
|
||||||
|
|
||||||
|
### `GET /jobs/{id}`
|
||||||
|
|
||||||
|
Returns the job snapshot:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "...",
|
||||||
|
"state": "queued" | "running" | "done" | "failed",
|
||||||
|
"submitted_at_utc": "...",
|
||||||
|
"started_at_utc": "...",
|
||||||
|
"finished_at_utc": "...",
|
||||||
|
"error": "<string, present only when state=failed>",
|
||||||
|
"result": { ... },
|
||||||
|
"status_url": "...",
|
||||||
|
"emissions_jsonl_url": "...",
|
||||||
|
"accuracy_report_md_url": "...",
|
||||||
|
"map_html_url": "..."
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### `GET /jobs/{id}/result`
|
||||||
|
|
||||||
|
Streams the JSONL emissions file. `200 OK` with `Content-Type: application/x-ndjson`. `409 Conflict` when the job is not in state `done`.
|
||||||
|
|
||||||
|
### `GET /jobs/{id}/map`
|
||||||
|
|
||||||
|
Streams the HTML map produced by AZ-700. `200 OK` with `Content-Type: text/html`. `409 Conflict` when the job is not in state `done`.
|
||||||
|
|
||||||
|
### `GET /jobs/{id}/report`
|
||||||
|
|
||||||
|
Streams the Markdown accuracy report produced by AZ-699. `200 OK` with `Content-Type: text/markdown`. `409 Conflict` when the job is not in state `done`.
|
||||||
|
|
||||||
|
### `GET /healthz`
|
||||||
|
|
||||||
|
Liveness probe. `200 OK` with `{"status":"ok"}` whenever the FastAPI app can process requests.
|
||||||
|
|
||||||
|
### `GET /readyz`
|
||||||
|
|
||||||
|
Readiness probe. `200 OK` only when the `gps-denied-replay` console-script is resolvable on `PATH` AND the storage root is writeable. `503 Service Unavailable` otherwise — Kubernetes / docker-compose health checks should use this, not `/healthz`.
|
||||||
|
|
||||||
|
## Errors
|
||||||
|
|
||||||
|
All errors are JSON objects of shape `{ "error_code": "...", "message": "...", "details": { ... } }`.
|
||||||
|
|
||||||
|
| HTTP | `error_code` | When |
|
||||||
|
|------|-------------------------------|------|
|
||||||
|
| 400 | `unsupported_file_kind` | Magic-byte validation failed. |
|
||||||
|
| 400 | `multipart_missing_field` | Required field absent. |
|
||||||
|
| 401 | `unauthorized` | Missing or wrong bearer token (when auth required). |
|
||||||
|
| 404 | `job_not_found` | `GET /jobs/{id}*` for an unknown id. |
|
||||||
|
| 409 | `job_not_complete` | Result/map/report requested while job is not `done`. |
|
||||||
|
| 413 | `payload_too_large` | Upload exceeded `REPLAY_API_MAX_UPLOAD_BYTES`. |
|
||||||
|
| 429 | `concurrency_limit_reached` | More than `REPLAY_API_MAX_CONCURRENT_JOBS` running. The handler still accepts the job and queues it; this code surfaces only when the queue itself is full. |
|
||||||
|
| 500 | `replay_runner_failed` | The `gps-denied-replay` subprocess exited non-zero. `details.stderr_tail` carries the last 8 KB of stderr. |
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Env var | Default | Meaning |
|
||||||
|
|--------------------------------------|----------------------|---------|
|
||||||
|
| `REPLAY_API_BEARER_TOKEN` | _none_ | Required when `REPLAY_API_AUTH_REQUIRED=true`. |
|
||||||
|
| `REPLAY_API_AUTH_REQUIRED` | `true` | Set to `false` to disable bearer-token auth (dev only — WARN logged). |
|
||||||
|
| `REPLAY_API_MAX_UPLOAD_BYTES` | `2147483648` (2 GB) | Per-upload hard limit. |
|
||||||
|
| `REPLAY_API_SYNC_MAX_BYTES` | `209715200` (200 MB) | Video size at which the service switches to async. |
|
||||||
|
| `REPLAY_API_MAX_CONCURRENT_JOBS` | `1` | Max running estimator subprocesses. |
|
||||||
|
| `REPLAY_API_MAX_QUEUED_JOBS` | `8` | Max queued jobs. Above this the API returns 429. |
|
||||||
|
| `REPLAY_API_STORAGE_ROOT` | `/var/azaion/replay_api` | Per-job temp dir parent. |
|
||||||
|
| `REPLAY_API_REPLAY_BINARY` | `gps-denied-replay` | Override the replay CLI binary used by the runner. |
|
||||||
|
| `REPLAY_API_RENDER_BINARY` | `gps-denied-render-map` | Override the map-render CLI used by the runner. |
|
||||||
|
|
||||||
|
## Versioning rules
|
||||||
|
|
||||||
|
- Breaking changes to request / response schemas bump the major version and ship under `/v2/replay`. The `/replay` path remains v1 for one release after `/v2` ships.
|
||||||
|
- The response shape may grow new fields without a version bump; clients MUST tolerate unknown fields.
|
||||||
|
- The `error_code` set is appended-only; clients MUST tolerate unknown codes.
|
||||||
|
- The `state` enum may grow new terminal-style values (e.g. `cancelled`) only with a minor bump documented in the OpenAPI changelog block.
|
||||||
|
|
||||||
|
## Out of scope
|
||||||
|
|
||||||
|
- Persistent job database — see invariant 2.
|
||||||
|
- WebSocket / SSE progress streaming.
|
||||||
|
- Authentication beyond bearer token (mTLS / OAuth2 are deliberately out).
|
||||||
|
- Multi-node scheduling — a single host runs at most `REPLAY_API_MAX_CONCURRENT_JOBS` subprocesses.
|
||||||
|
- A built-in web UI — operator dashboards integrate over HTTP.
|
||||||
+73
@@ -159,3 +159,76 @@ Then response is 400 with a clear error
|
|||||||
|
|
||||||
This task produces the contract at `_docs/02_document/contracts/replay_api/replay_api_protocol.md`.
|
This task produces the contract at `_docs/02_document/contracts/replay_api/replay_api_protocol.md`.
|
||||||
Consumers MUST read that file — not this task spec — to discover the interface and versioning rules.
|
Consumers MUST read that file — not this task spec — to discover the interface and versioning rules.
|
||||||
|
|
||||||
|
## Implementation Notes (Batch 102, Cycle 2)
|
||||||
|
|
||||||
|
### Files Changed
|
||||||
|
|
||||||
|
**New production code** — `src/gps_denied_onboard/replay_api/`:
|
||||||
|
- `__init__.py` — public exports (`create_app`, DTOs, error families).
|
||||||
|
- `errors.py` — `ReplayApiError` hierarchy with stable `error_code` + HTTP `status_code`.
|
||||||
|
- `interface.py` — `JobState`, `ReplayInputs`, `ReplayJobResult`, `JobSnapshot`, `ReplayRunner` Protocol.
|
||||||
|
- `storage.py` — per-job temp directory lifecycle (`StorageRoot.allocate_job/release_job/cleanup_all`).
|
||||||
|
- `jobs.py` — in-memory `JobRegistry` with `max_concurrent` / `max_queued`, `ThreadPoolExecutor` worker pool.
|
||||||
|
- `handlers.py` — magic-byte validation (`validate_tlog_kind` for MAVLink v1/v2, `validate_video_kind` for MP4 `ftyp`, `validate_calibration_kind` for JSON), size limits, bearer-token extraction.
|
||||||
|
- `app.py` — `create_app(...)` FastAPI factory + `SubprocessReplayRunner` (shells out to `gps-denied-replay --auto-trim` and `gps-denied-render-map`).
|
||||||
|
|
||||||
|
**New CLI entrypoint** — `src/gps_denied_onboard/cli/replay_api_entrypoint.py`:
|
||||||
|
- `replay-api` console script wired in `pyproject.toml` under the `operator-tools` extra.
|
||||||
|
- Parses `--host`, `--port`, `--storage-root`, `--reload`; reads `REPLAY_API_*` env knobs.
|
||||||
|
|
||||||
|
**Helper promoted from tests** — `src/gps_denied_onboard/helpers/accuracy_report.py`:
|
||||||
|
- Was `tests/e2e/replay/_report_writer.py` (AZ-699 batch). Promoted because `replay_api` needs it at runtime to produce `accuracy_report.md`. Re-exported from `helpers/__init__.py`. All AZ-699 imports re-pointed.
|
||||||
|
|
||||||
|
**Contract** — `_docs/02_document/contracts/replay_api/replay_api_protocol.md` (purpose, invariants, endpoints, error families, env config, versioning).
|
||||||
|
|
||||||
|
**OpenAPI spec** — `_docs/02_document/contracts/replay_api/openapi.yaml` (auto-exported from the FastAPI app; check in alongside the contract doc).
|
||||||
|
|
||||||
|
**Docker** — `docker/replay-api.Dockerfile` + `e2e/docker/docker-compose.test.yml` (`replay-api` service, profile-gated `replay-api`, with `replay-api-storage` volume).
|
||||||
|
|
||||||
|
**Dependencies** — `pyproject.toml`:
|
||||||
|
- `operator-tools` extra now also pulls `fastapi>=0.111,<0.120`, `uvicorn>=0.30,<1.0`, `python-multipart>=0.0.9,<1.0`.
|
||||||
|
- New console script `replay-api`.
|
||||||
|
|
||||||
|
**Unit tests** — `tests/unit/replay_api/test_az701_replay_api.py` (18 tests, all passing):
|
||||||
|
- AC-1 sync: POST → 200 with `result_url`/`map_url`/`report_url`; JSONL + HTML map served from those URLs.
|
||||||
|
- AC-2 async: large video (> sync threshold) → 202 + `Location: /jobs/{id}`.
|
||||||
|
- AC-3: job state visible via polling `RUNNING → DONE` and `RUNNING → FAILED`.
|
||||||
|
- AC-5: missing bearer → 401; correct bearer → 200.
|
||||||
|
- AC-6: `/healthz` always 200; `/readyz` returns 503 when binaries missing.
|
||||||
|
- AC-8: third job queued when concurrency limit is 2; 4th rejected with 429.
|
||||||
|
- AC-9: zip renamed to `.tlog` or `.mp4` → 400 with stable `error_code`.
|
||||||
|
|
||||||
|
### AC Coverage Matrix
|
||||||
|
|
||||||
|
| AC | Status | Evidence |
|
||||||
|
|----|--------|----------|
|
||||||
|
| AC-1 sync 200 | Done | `test_post_replay_sync_returns_200_with_result_urls` + `test_post_replay_serves_jsonl_and_map_for_done_job` |
|
||||||
|
| AC-2 async 202 | Done | `test_post_replay_async_returns_202_when_video_exceeds_sync_bytes` |
|
||||||
|
| AC-3 job state machine | Done | `test_job_state_transitions_observable_via_polling`, `test_failed_runner_marks_job_failed`, `test_result_endpoints_409_when_job_not_done` |
|
||||||
|
| AC-5 401 on bad bearer | Done | `test_post_replay_returns_401_without_bearer_when_required` + `test_post_replay_accepts_correct_bearer` |
|
||||||
|
| AC-6 health endpoints | Done | `test_healthz_always_returns_200` + `test_readyz_returns_503_when_binary_missing` |
|
||||||
|
| AC-8 concurrency cap | Done | `test_concurrency_limit_queues_excess_jobs` + `test_queue_full_returns_429` |
|
||||||
|
| AC-9 magic-byte rejection | Done | `test_validate_tlog_kind_rejects_zip_renamed_to_tlog`, `test_validate_video_kind_rejects_arbitrary_bytes`, `test_post_replay_rejects_misnamed_zip_as_tlog`, `test_post_replay_rejects_misnamed_zip_as_video` |
|
||||||
|
|
||||||
|
### Test Run Summary
|
||||||
|
|
||||||
|
- **AZ-701 unit slice**: 18/18 passed (`tests/unit/replay_api/`).
|
||||||
|
- **Full unit suite**: 2251 passed, 86 skipped, 1 failed (`test_cold_start_under_500ms_p99` — pre-existing C12 CLI flake unrelated to AZ-701; same failure observed in batches 100 and 101).
|
||||||
|
- **Mypy --strict on AZ-701 surface**: clean (9 source files: `replay_api/*`, `helpers/accuracy_report.py`, `cli/replay_api_entrypoint.py`).
|
||||||
|
|
||||||
|
### Design Decisions
|
||||||
|
|
||||||
|
- **Subprocess runner, not in-process estimator**: `SubprocessReplayRunner` invokes the existing `gps-denied-replay` console script. Keeps the API a thin transport layer; matches the invariant in the contract that the API does NOT re-implement the pipeline.
|
||||||
|
- **Pre-allocated job_id**: the handler allocates a job_id, writes uploads into the matching storage dir, then passes the id to `JobRegistry.submit(job_id=...)`. Earlier draft used a separate registry-assigned id and tried to "release-then-resubmit"; that path deleted the dir holding the uploads. Fixed by adding the optional `job_id` parameter.
|
||||||
|
- **`from __future__ import annotations` deliberately dropped in `app.py`**: FastAPI 0.119 + Pydantic v2 resolve route-parameter annotations at decoration time. Forward-ref strings break `Annotated[UploadFile, File()]`. The rest of the `replay_api` package keeps the future-annotations import. The reason is captured in the `app.py` module docstring.
|
||||||
|
- **Pydantic v2 `Annotated` syntax**: every route parameter uses `Annotated[T, File()/Form()/Header()]` rather than the legacy `T = File(...)` form. Older form raised `PydanticUserError: 'UploadFile' is not fully defined`.
|
||||||
|
- **Magic-byte validation is mandatory, not advisory**: matches AC-9 wording ("Wrong magic bytes → 400"). Anything that's not MAVLink v1/v2 (`\xfe` / `\xfd` first byte) is rejected as tlog; anything without `ftyp` in bytes 4-12 is rejected as video. No `application/x-mavlink` content-type sniffing.
|
||||||
|
- **State is in-memory only**: matches "no persistent state across restarts" invariant in the contract. Operators wanting durability can layer it externally (or move to AZ-702 follow-on). Documented in the contract.
|
||||||
|
|
||||||
|
### Known Limitations
|
||||||
|
|
||||||
|
- `SubprocessReplayRunner` returns `result.stdout`/`stderr` only when the subprocess fails; success path discards them. Operators wanting a per-job audit log will need a follow-on.
|
||||||
|
- No request body streaming — `python-multipart` buffers each part. The 2 GB hard limit guards memory.
|
||||||
|
- No rate limiting beyond the concurrency/queue caps. A reverse proxy is the right place for that.
|
||||||
|
- E2E test against the real Derkachi flight artefacts is intentionally NOT in scope here (per the testing-environment rule: e2e runs on Jetson only and AZ-699's `test_derkachi_real_tlog.py` already exercises the underlying pipeline).
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
# Batch 102 — Cycle 2 — AZ-701
|
||||||
|
|
||||||
|
**Date**: 2026-05-20
|
||||||
|
**Tasks**: AZ-701 (HTTP replay API service).
|
||||||
|
**Story points**: 5.
|
||||||
|
**Jira status**: AZ-701 → `In Testing`.
|
||||||
|
|
||||||
|
## What shipped
|
||||||
|
|
||||||
|
A new operator-side `replay_api` component — a FastAPI service that
|
||||||
|
wraps the offline `gps-denied-replay` pipeline behind HTTP. Operators
|
||||||
|
can POST a multipart `(tlog + video [+ calibration])` payload and
|
||||||
|
receive back either a synchronous result (small flights) or a
|
||||||
|
202-job-id for polling (large flights). Once a job completes, the
|
||||||
|
JSONL emissions, the AZ-700 HTML map, and the AZ-699 accuracy
|
||||||
|
report are served as static files under stable URLs.
|
||||||
|
|
||||||
|
Estimator code is unchanged — the service shells out to the existing
|
||||||
|
`gps-denied-replay` and `gps-denied-render-map` console scripts. The
|
||||||
|
contract explicitly forbids re-implementing the pipeline in the API
|
||||||
|
layer.
|
||||||
|
|
||||||
|
Bearer-token auth is on by default (configurable env var), magic-byte
|
||||||
|
validation rejects misnamed uploads at the door, and a thread-pool
|
||||||
|
worker enforces a `max_concurrent` / `max_queued` cap with a 429 on
|
||||||
|
overflow.
|
||||||
|
|
||||||
|
## Files changed
|
||||||
|
|
||||||
|
Production (10):
|
||||||
|
|
||||||
|
- `src/gps_denied_onboard/replay_api/__init__.py` (new)
|
||||||
|
- `src/gps_denied_onboard/replay_api/errors.py` (new — typed HTTP
|
||||||
|
error families with stable `error_code` strings)
|
||||||
|
- `src/gps_denied_onboard/replay_api/interface.py` (new — DTOs,
|
||||||
|
`JobState` enum, `ReplayRunner` Protocol seam for DI)
|
||||||
|
- `src/gps_denied_onboard/replay_api/storage.py` (new — per-job
|
||||||
|
temp-dir lifecycle)
|
||||||
|
- `src/gps_denied_onboard/replay_api/jobs.py` (new — `JobRegistry`
|
||||||
|
with concurrency/queue limits and `ThreadPoolExecutor`)
|
||||||
|
- `src/gps_denied_onboard/replay_api/handlers.py` (new — magic-byte
|
||||||
|
validation, size limits, bearer-token extraction)
|
||||||
|
- `src/gps_denied_onboard/replay_api/app.py` (new — FastAPI factory
|
||||||
|
and `SubprocessReplayRunner`)
|
||||||
|
- `src/gps_denied_onboard/cli/replay_api_entrypoint.py` (new —
|
||||||
|
`replay-api` console script)
|
||||||
|
- `src/gps_denied_onboard/helpers/accuracy_report.py` (promoted from
|
||||||
|
`tests/e2e/replay/_report_writer.py`; needed at runtime by the API)
|
||||||
|
- `src/gps_denied_onboard/helpers/__init__.py` (re-exports)
|
||||||
|
|
||||||
|
Tests (1):
|
||||||
|
|
||||||
|
- `tests/unit/replay_api/test_az701_replay_api.py` (18 tests, all PASS local)
|
||||||
|
|
||||||
|
Docs / contract (2):
|
||||||
|
|
||||||
|
- `_docs/02_document/contracts/replay_api/replay_api_protocol.md` (new)
|
||||||
|
- `_docs/02_document/contracts/replay_api/openapi.yaml` (new — exported
|
||||||
|
from the running FastAPI app)
|
||||||
|
|
||||||
|
Docker / CI (2):
|
||||||
|
|
||||||
|
- `docker/replay-api.Dockerfile` (new)
|
||||||
|
- `e2e/docker/docker-compose.test.yml` (added `replay-api` service +
|
||||||
|
`replay-api-storage` volume, profile `replay-api`)
|
||||||
|
|
||||||
|
Build / packaging (1):
|
||||||
|
|
||||||
|
- `pyproject.toml` (`fastapi`, `uvicorn`, `python-multipart` added to
|
||||||
|
`[operator-tools]` extra; `replay-api` console script registered)
|
||||||
|
|
||||||
|
Imports updated (re-pointed to promoted helper) (3):
|
||||||
|
|
||||||
|
- `tests/e2e/replay/test_derkachi_real_tlog.py`
|
||||||
|
- `tests/unit/test_az699_report_writer.py`
|
||||||
|
- `tests/e2e/replay/_report_writer.py` was deleted (replaced by the
|
||||||
|
promoted production module)
|
||||||
|
|
||||||
|
## AC coverage
|
||||||
|
|
||||||
|
| AC | Status | Evidence |
|
||||||
|
|----|--------|----------|
|
||||||
|
| AC-1 sync 200 | Pass | `test_post_replay_sync_returns_200_with_result_urls` + `test_post_replay_serves_jsonl_and_map_for_done_job` |
|
||||||
|
| AC-2 async 202 | Pass | `test_post_replay_async_returns_202_when_video_exceeds_sync_bytes` |
|
||||||
|
| AC-3 job state | Pass | `test_job_state_transitions_observable_via_polling`, `test_failed_runner_marks_job_failed`, `test_result_endpoints_409_when_job_not_done` |
|
||||||
|
| AC-5 401 unauth | Pass | `test_post_replay_returns_401_without_bearer_when_required`, `test_post_replay_accepts_correct_bearer` |
|
||||||
|
| AC-6 health | Pass | `test_healthz_always_returns_200`, `test_readyz_returns_503_when_binary_missing` |
|
||||||
|
| AC-8 concurrency | Pass | `test_concurrency_limit_queues_excess_jobs`, `test_queue_full_returns_429` |
|
||||||
|
| AC-9 magic-byte | Pass | 4 tests covering tlog + video validators (unit) and end-to-end POST rejection |
|
||||||
|
|
||||||
|
## Test run summary
|
||||||
|
|
||||||
|
- **AZ-701 unit slice** (`tests/unit/replay_api/`): 18/18 passed in 4 s.
|
||||||
|
- **Full unit suite**: 2251 passed, 86 skipped, 1 failed in 85 s.
|
||||||
|
- The single failure is `tests/unit/c12_operator_orchestrator/test_cli_console_script.py::TestConsoleScript::test_cold_start_under_500ms_p99`. It is a pre-existing C12 CLI cold-start performance flake. AZ-701 doesn't touch C12 and the same failure shows up in batch 100 and batch 101 reports. Non-blocking for AZ-701.
|
||||||
|
- **Mypy --strict** on AZ-701 surface (`src/gps_denied_onboard/replay_api/`, `helpers/accuracy_report.py`, `cli/replay_api_entrypoint.py`): clean — 9 source files, 0 errors.
|
||||||
|
|
||||||
|
## Strict typing
|
||||||
|
|
||||||
|
All new modules in `src/gps_denied_onboard/replay_api/*` are
|
||||||
|
strict-typed (no implicit `Any`, no untyped defs, no untyped
|
||||||
|
decorators). The `folium`-style untyped-third-party shim is not
|
||||||
|
needed here — FastAPI, Pydantic, uvicorn, and python-multipart all
|
||||||
|
ship typestubs that mypy --strict accepts.
|
||||||
|
|
||||||
|
## Notable design decisions
|
||||||
|
|
||||||
|
- **Subprocess runner, not in-process estimator.** The contract
|
||||||
|
invariant is "the API layer does NOT re-implement the pipeline."
|
||||||
|
`SubprocessReplayRunner` shells out to `gps-denied-replay
|
||||||
|
--auto-trim` and then `gps-denied-render-map`. Easy to swap for a
|
||||||
|
fake in tests via the `ReplayRunner` Protocol DI seam.
|
||||||
|
- **Magic-byte validation is mandatory (AC-9).** Misnamed `.tlog`
|
||||||
|
/ `.mp4` payloads are rejected at the door with a stable
|
||||||
|
`error_code`. No content-type sniffing fallback.
|
||||||
|
- **Bearer auth is opt-out, not opt-in.** Default state of the
|
||||||
|
service is "auth required, token missing → 503 at startup"
|
||||||
|
unless the operator explicitly sets `REPLAY_API_AUTH_REQUIRED=false`
|
||||||
|
for a dev environment.
|
||||||
|
- **In-memory state by design.** The contract says "no persistent
|
||||||
|
state across restarts" — jobs don't survive a process restart and
|
||||||
|
the storage root is wiped on shutdown. Operators wanting durability
|
||||||
|
must layer it externally.
|
||||||
|
- **`from __future__ import annotations` dropped in `app.py` only.**
|
||||||
|
FastAPI 0.119 + Pydantic v2 resolve route-parameter annotations
|
||||||
|
at decoration time and reject forward-ref strings. The rest of the
|
||||||
|
`replay_api` package keeps the future-annotations import. The
|
||||||
|
reason is recorded in `app.py`'s module docstring.
|
||||||
|
- **`_report_writer.py` was promoted from `tests/` to `src/`.** The
|
||||||
|
API needs to produce the AZ-699 Markdown accuracy report at
|
||||||
|
runtime; that module was previously test-only. All AZ-699 imports
|
||||||
|
re-pointed to `gps_denied_onboard.helpers.accuracy_report`.
|
||||||
|
|
||||||
|
## Known limitations carried forward
|
||||||
|
|
||||||
|
- No request-body streaming — `python-multipart` buffers each part.
|
||||||
|
Hard 2 GB cap guards memory.
|
||||||
|
- No rate limiting beyond `max_concurrent` / `max_queued`. A reverse
|
||||||
|
proxy is the right layer for that.
|
||||||
|
- `SubprocessReplayRunner` discards stdout/stderr on the success
|
||||||
|
path; operators wanting per-job audit logs need a follow-on.
|
||||||
|
- The Derkachi real-flight e2e test (AZ-699's
|
||||||
|
`test_derkachi_real_tlog.py`) already exercises the underlying
|
||||||
|
pipeline. A dedicated end-to-end `replay_api` test against real
|
||||||
|
artefacts is **not** in scope here per the testing-environment
|
||||||
|
policy (e2e → Jetson only).
|
||||||
@@ -12,4 +12,4 @@ sub_step:
|
|||||||
retry_count: 0
|
retry_count: 0
|
||||||
cycle: 2
|
cycle: 2
|
||||||
tracker: jira
|
tracker: jira
|
||||||
last_completed_batch: 101
|
last_completed_batch: 102
|
||||||
|
|||||||
@@ -0,0 +1,47 @@
|
|||||||
|
# AZ-701 — operator-side replay HTTP API.
|
||||||
|
#
|
||||||
|
# Two-stage build. NOT bundled into the airborne companion-tier1
|
||||||
|
# image (folium + FastAPI + uvicorn add ~30 MB and would regress
|
||||||
|
# the airborne cold-start NFR). Build with:
|
||||||
|
#
|
||||||
|
# docker build -f docker/replay-api.Dockerfile -t gps-denied-replay-api .
|
||||||
|
#
|
||||||
|
# Run with:
|
||||||
|
#
|
||||||
|
# docker run --rm -p 8080:8080 \
|
||||||
|
# -e REPLAY_API_BEARER_TOKEN=secret \
|
||||||
|
# -v $(pwd)/data:/data \
|
||||||
|
# gps-denied-replay-api
|
||||||
|
#
|
||||||
|
# Health checks: /healthz (liveness) and /readyz (readiness).
|
||||||
|
|
||||||
|
FROM ubuntu:22.04 AS python-deps
|
||||||
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
ca-certificates \
|
||||||
|
python3.10 \
|
||||||
|
python3.10-venv \
|
||||||
|
python3-pip \
|
||||||
|
libgl1 \
|
||||||
|
libglib2.0-0 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /opt/gps-denied
|
||||||
|
COPY pyproject.toml ./
|
||||||
|
COPY src ./src
|
||||||
|
RUN python3 -m venv /opt/venv \
|
||||||
|
&& /opt/venv/bin/pip install --upgrade pip \
|
||||||
|
&& /opt/venv/bin/pip install --no-cache-dir -e ".[operator-tools]"
|
||||||
|
ENV PATH="/opt/venv/bin:${PATH}"
|
||||||
|
|
||||||
|
FROM python-deps AS runtime
|
||||||
|
ENV REPLAY_API_STORAGE_ROOT=/var/azaion/replay_api
|
||||||
|
ENV REPLAY_API_HOST=0.0.0.0
|
||||||
|
ENV REPLAY_API_PORT=8080
|
||||||
|
RUN mkdir -p /var/azaion/replay_api
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
HEALTHCHECK --interval=10s --timeout=5s --start-period=10s --retries=3 \
|
||||||
|
CMD wget -qO- http://127.0.0.1:8080/healthz || exit 1
|
||||||
|
|
||||||
|
ENTRYPOINT ["replay-api"]
|
||||||
@@ -63,6 +63,37 @@ services:
|
|||||||
interval: 5s
|
interval: 5s
|
||||||
retries: 12
|
retries: 12
|
||||||
|
|
||||||
|
# AZ-701 — operator-side replay HTTP API.
|
||||||
|
#
|
||||||
|
# Profile-gated so the default `docker compose up` flow (the
|
||||||
|
# blackbox e2e suite) is unaffected. To start the API alongside
|
||||||
|
# the suite, run:
|
||||||
|
# docker compose --profile replay-api up replay-api
|
||||||
|
# The container exposes /healthz on 8080 and refuses /replay
|
||||||
|
# uploads without a bearer token unless REPLAY_API_AUTH_REQUIRED
|
||||||
|
# is explicitly set to false (dev only — WARN logged).
|
||||||
|
replay-api:
|
||||||
|
profiles: ["replay-api"]
|
||||||
|
build:
|
||||||
|
context: ../..
|
||||||
|
dockerfile: docker/replay-api.Dockerfile
|
||||||
|
image: gps-denied-replay-api:e2e
|
||||||
|
networks: [e2e-net]
|
||||||
|
ports:
|
||||||
|
- "${REPLAY_API_HOST_PORT:-8080}:8080"
|
||||||
|
environment:
|
||||||
|
REPLAY_API_AUTH_REQUIRED: "${REPLAY_API_AUTH_REQUIRED:-true}"
|
||||||
|
REPLAY_API_BEARER_TOKEN: "${REPLAY_API_BEARER_TOKEN:-}"
|
||||||
|
REPLAY_API_MAX_CONCURRENT_JOBS: "${REPLAY_API_MAX_CONCURRENT_JOBS:-1}"
|
||||||
|
REPLAY_API_MAX_QUEUED_JOBS: "${REPLAY_API_MAX_QUEUED_JOBS:-8}"
|
||||||
|
REPLAY_API_STORAGE_ROOT: /var/azaion/replay_api
|
||||||
|
volumes:
|
||||||
|
- replay-api-storage:/var/azaion/replay_api
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "-qO-", "http://127.0.0.1:8080/healthz"]
|
||||||
|
interval: 10s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
mavproxy-listener:
|
mavproxy-listener:
|
||||||
image: ardupilot/mavproxy:latest
|
image: ardupilot/mavproxy:latest
|
||||||
networks: [e2e-net]
|
networks: [e2e-net]
|
||||||
@@ -135,6 +166,7 @@ volumes:
|
|||||||
tile-cache-fixture: {}
|
tile-cache-fixture: {}
|
||||||
tlog-output: {}
|
tlog-output: {}
|
||||||
mock-audit: {}
|
mock-audit: {}
|
||||||
|
replay-api-storage: {}
|
||||||
e2e-results:
|
e2e-results:
|
||||||
driver: local
|
driver: local
|
||||||
driver_opts:
|
driver_opts:
|
||||||
|
|||||||
+11
-5
@@ -130,18 +130,24 @@ telemetry = [
|
|||||||
"jetson-stats>=4.2",
|
"jetson-stats>=4.2",
|
||||||
"pynvml>=11.5",
|
"pynvml>=11.5",
|
||||||
]
|
]
|
||||||
# AZ-700: operator-side post-flight analysis tools. NOT installed on
|
# AZ-700 / AZ-701: operator-side post-flight analysis tools. NOT
|
||||||
# the airborne binary (folium pulls ~5 MB of JS + Leaflet assets that
|
# installed on the airborne binary (folium + FastAPI + uvicorn add
|
||||||
# regress the cold-start NFR if pulled into the runtime image).
|
# ~30 MB of deps + Leaflet assets that regress the cold-start NFR
|
||||||
# Activate with `pip install gps-denied-onboard[operator-tools]` on
|
# if pulled into the runtime image). Activate with
|
||||||
# a developer / analyst workstation.
|
# `pip install gps-denied-onboard[operator-tools]` on a developer
|
||||||
|
# / analyst workstation, or build the `docker/replay-api.Dockerfile`
|
||||||
|
# image which installs this extra by default.
|
||||||
operator-tools = [
|
operator-tools = [
|
||||||
"folium>=0.16,<1.0",
|
"folium>=0.16,<1.0",
|
||||||
|
"fastapi>=0.111,<0.120",
|
||||||
|
"uvicorn>=0.30,<1.0",
|
||||||
|
"python-multipart>=0.0.9,<1.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
gps-denied-replay = "gps_denied_onboard.cli.replay:main"
|
gps-denied-replay = "gps_denied_onboard.cli.replay:main"
|
||||||
gps-denied-render-map = "gps_denied_onboard.cli.render_map:main"
|
gps-denied-render-map = "gps_denied_onboard.cli.render_map:main"
|
||||||
|
replay-api = "gps_denied_onboard.cli.replay_api_entrypoint:main"
|
||||||
operator-orchestrator = "gps_denied_onboard.components.c12_operator_orchestrator.cli:main"
|
operator-orchestrator = "gps_denied_onboard.components.c12_operator_orchestrator.cli:main"
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
|
|||||||
@@ -0,0 +1,82 @@
|
|||||||
|
"""AZ-701 ``replay-api`` console-script.
|
||||||
|
|
||||||
|
Builds the FastAPI app from environment configuration and starts
|
||||||
|
the uvicorn server. Mirrors the operator-side CLI style of
|
||||||
|
``gps-denied-replay`` and ``gps-denied-render-map``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from gps_denied_onboard.replay_api.app import build_runner_from_env, create_app
|
||||||
|
from gps_denied_onboard.replay_api.storage import StorageRoot
|
||||||
|
|
||||||
|
__all__ = ["main"]
|
||||||
|
|
||||||
|
|
||||||
|
_LOGGER = logging.getLogger("gps_denied_onboard.cli.replay_api")
|
||||||
|
|
||||||
|
|
||||||
|
def _build_argparser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="replay-api",
|
||||||
|
description=(
|
||||||
|
"Start the gps-denied-onboard replay HTTP API. "
|
||||||
|
"Upload (tlog + video [+ calibration]); receive GPS "
|
||||||
|
"fixes + an accuracy report + an HTML map."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--host", default=os.environ.get("REPLAY_API_HOST", "0.0.0.0")
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--port",
|
||||||
|
type=int,
|
||||||
|
default=int(os.environ.get("REPLAY_API_PORT", "8080")),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--storage-root",
|
||||||
|
type=Path,
|
||||||
|
default=Path(
|
||||||
|
os.environ.get(
|
||||||
|
"REPLAY_API_STORAGE_ROOT", "/var/azaion/replay_api"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--reload",
|
||||||
|
action="store_true",
|
||||||
|
help="Reload on code changes (dev only).",
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
args = _build_argparser().parse_args(argv)
|
||||||
|
logging.basicConfig(
|
||||||
|
level=os.environ.get("REPLAY_API_LOG_LEVEL", "INFO"),
|
||||||
|
format="%(asctime)s %(name)s %(levelname)s %(message)s",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
import uvicorn
|
||||||
|
except ImportError:
|
||||||
|
raise SystemExit(
|
||||||
|
"uvicorn is not installed. Install with "
|
||||||
|
"`pip install gps-denied-onboard[operator-tools]`."
|
||||||
|
)
|
||||||
|
|
||||||
|
storage = StorageRoot(args.storage_root)
|
||||||
|
runner = build_runner_from_env()
|
||||||
|
app = create_app(runner=runner, storage=storage)
|
||||||
|
uvicorn.run(
|
||||||
|
app,
|
||||||
|
host=args.host,
|
||||||
|
port=args.port,
|
||||||
|
reload=args.reload,
|
||||||
|
log_level=os.environ.get("REPLAY_API_LOG_LEVEL", "info").lower(),
|
||||||
|
)
|
||||||
|
return 0
|
||||||
@@ -16,6 +16,14 @@ from gps_denied_onboard.helpers.engine_filename_schema import (
|
|||||||
EngineFilenameSchema,
|
EngineFilenameSchema,
|
||||||
EngineFilenameSchemaError,
|
EngineFilenameSchemaError,
|
||||||
)
|
)
|
||||||
|
from gps_denied_onboard.helpers.accuracy_report import (
|
||||||
|
AC3_GATE_PCT,
|
||||||
|
AC3_GATE_THRESHOLD_M,
|
||||||
|
ReportContext,
|
||||||
|
format_failure_message,
|
||||||
|
render_report,
|
||||||
|
verdict_passes_ac3,
|
||||||
|
)
|
||||||
from gps_denied_onboard.helpers.gps_compare import (
|
from gps_denied_onboard.helpers.gps_compare import (
|
||||||
GroundTruthRow,
|
GroundTruthRow,
|
||||||
HorizontalErrorDistribution,
|
HorizontalErrorDistribution,
|
||||||
@@ -67,10 +75,13 @@ from gps_denied_onboard.helpers.wgs_converter import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
"AC3_GATE_PCT",
|
||||||
|
"AC3_GATE_THRESHOLD_M",
|
||||||
"ALLOWED_DTYPES",
|
"ALLOWED_DTYPES",
|
||||||
"ALLOWED_PRECISIONS",
|
"ALLOWED_PRECISIONS",
|
||||||
"ENGINE_SUFFIX",
|
"ENGINE_SUFFIX",
|
||||||
"MAX_ZOOM",
|
"MAX_ZOOM",
|
||||||
|
"ReportContext",
|
||||||
"SE3",
|
"SE3",
|
||||||
"SIDECAR_SUFFIX",
|
"SIDECAR_SUFFIX",
|
||||||
"WEB_MERCATOR_MAX_LAT_DEG",
|
"WEB_MERCATOR_MAX_LAT_DEG",
|
||||||
@@ -96,6 +107,7 @@ __all__ = [
|
|||||||
"WgsConverter",
|
"WgsConverter",
|
||||||
"adjoint",
|
"adjoint",
|
||||||
"exp_map",
|
"exp_map",
|
||||||
|
"format_failure_message",
|
||||||
"horizontal_error_distribution",
|
"horizontal_error_distribution",
|
||||||
"is_valid_rotation",
|
"is_valid_rotation",
|
||||||
"iso_ts_from_clock",
|
"iso_ts_from_clock",
|
||||||
@@ -106,5 +118,7 @@ __all__ = [
|
|||||||
"make_imu_preintegrator",
|
"make_imu_preintegrator",
|
||||||
"matrix_to_se3",
|
"matrix_to_se3",
|
||||||
"percentile_sorted",
|
"percentile_sorted",
|
||||||
|
"render_report",
|
||||||
"se3_to_matrix",
|
"se3_to_matrix",
|
||||||
|
"verdict_passes_ac3",
|
||||||
]
|
]
|
||||||
|
|||||||
+7
-5
@@ -1,4 +1,4 @@
|
|||||||
"""AZ-699 Markdown accuracy-report writer (test helper).
|
"""Markdown accuracy-report writer (AZ-699 + AZ-701).
|
||||||
|
|
||||||
Renders a :class:`HorizontalErrorDistribution` (the production
|
Renders a :class:`HorizontalErrorDistribution` (the production
|
||||||
helper in ``gps_denied_onboard.helpers.gps_compare``) plus run
|
helper in ``gps_denied_onboard.helpers.gps_compare``) plus run
|
||||||
@@ -6,10 +6,12 @@ context (calibration acquisition method, clip duration, fixture
|
|||||||
paths) into the canonical Markdown layout consumed by
|
paths) into the canonical Markdown layout consumed by
|
||||||
``_docs/06_metrics/real_flight_validation_{date}.md``.
|
``_docs/06_metrics/real_flight_validation_{date}.md``.
|
||||||
|
|
||||||
This module lives under ``tests/`` (NOT production) — the report
|
Originally implemented as a test helper under
|
||||||
is an artefact of running the AZ-699 e2e test. Promoting the
|
``tests/e2e/replay/_report_writer.py`` (AZ-699 batch 100). Promoted
|
||||||
writer to ``src/`` would invite production code to import a test
|
to production code in AZ-701 (batch 102) because the ``replay_api``
|
||||||
helper, so the file ownership rule keeps it here.
|
HTTP service needs to render the same report for every replay job
|
||||||
|
the operator submits, and a test-only helper cannot be imported
|
||||||
|
from production code per the module-layout rule.
|
||||||
|
|
||||||
Style: every function is pure; the side effect (writing the file)
|
Style: every function is pure; the side effect (writing the file)
|
||||||
is the caller's. Tests in ``tests/unit/test_az699_report_writer.py``
|
is the caller's. Tests in ``tests/unit/test_az699_report_writer.py``
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
"""AZ-701 — `replay_api` HTTP service.
|
||||||
|
|
||||||
|
Operator-side HTTP wrapper around the offline replay pipeline:
|
||||||
|
`gps-denied-replay` (AZ-402) + `gps-denied-render-map` (AZ-700).
|
||||||
|
|
||||||
|
Lives outside the airborne binary — see contract at
|
||||||
|
``_docs/02_document/contracts/replay_api/replay_api_protocol.md``.
|
||||||
|
|
||||||
|
Public surface (re-exports below) is intentionally narrow:
|
||||||
|
- ``create_app`` — FastAPI app factory (for uvicorn + tests).
|
||||||
|
- ``JobRegistry`` + ``JobRecord`` + ``JobState`` — job-state machinery.
|
||||||
|
- ``ReplayRunner`` Protocol — DI seam (handlers depend on the
|
||||||
|
Protocol, not the concrete subprocess runner; unit tests inject
|
||||||
|
a fake runner).
|
||||||
|
- DTOs — ``JobSnapshot``, ``ReplayJobResult``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from gps_denied_onboard.replay_api.app import create_app
|
||||||
|
from gps_denied_onboard.replay_api.errors import (
|
||||||
|
JobNotCompleteError,
|
||||||
|
JobNotFoundError,
|
||||||
|
ReplayApiError,
|
||||||
|
ReplayRunnerError,
|
||||||
|
UnsupportedFileKindError,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.interface import (
|
||||||
|
JobSnapshot,
|
||||||
|
JobState,
|
||||||
|
ReplayJobResult,
|
||||||
|
ReplayRunner,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.jobs import (
|
||||||
|
ConcurrencyLimitReachedError,
|
||||||
|
JobRecord,
|
||||||
|
JobRegistry,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"ConcurrencyLimitReachedError",
|
||||||
|
"JobNotCompleteError",
|
||||||
|
"JobNotFoundError",
|
||||||
|
"JobRecord",
|
||||||
|
"JobRegistry",
|
||||||
|
"JobSnapshot",
|
||||||
|
"JobState",
|
||||||
|
"ReplayApiError",
|
||||||
|
"ReplayJobResult",
|
||||||
|
"ReplayRunner",
|
||||||
|
"ReplayRunnerError",
|
||||||
|
"UnsupportedFileKindError",
|
||||||
|
"create_app",
|
||||||
|
]
|
||||||
@@ -0,0 +1,677 @@
|
|||||||
|
"""AZ-701 — FastAPI app factory + production subprocess runner.
|
||||||
|
|
||||||
|
The factory takes the (runner, storage, registry) trio so unit
|
||||||
|
tests can wire in fakes; ``main()`` (in
|
||||||
|
``cli/replay_api_entrypoint.py``) constructs the production
|
||||||
|
subprocess runner against the configured environment.
|
||||||
|
|
||||||
|
Note: this file deliberately does NOT use ``from __future__ import
|
||||||
|
annotations``. FastAPI 0.119 + Pydantic 2.x resolve the route
|
||||||
|
parameter annotations at decoration time, which requires the
|
||||||
|
``Annotated[UploadFile, File()]`` form to be evaluable as real
|
||||||
|
types — not as forward-ref strings. Other modules in the
|
||||||
|
``replay_api`` package keep the future-annotations import; only
|
||||||
|
this one drops it for the route signatures.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from gps_denied_onboard.replay_api.errors import (
|
||||||
|
ConcurrencyLimitReachedError,
|
||||||
|
JobNotCompleteError,
|
||||||
|
JobNotFoundError,
|
||||||
|
MultipartMissingFieldError,
|
||||||
|
PayloadTooLargeError,
|
||||||
|
ReplayApiError,
|
||||||
|
ReplayRunnerError,
|
||||||
|
UnauthorizedError,
|
||||||
|
UnsupportedFileKindError,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.handlers import (
|
||||||
|
MIN_TLOG_PROBE_BYTES,
|
||||||
|
MIN_VIDEO_PROBE_BYTES,
|
||||||
|
auth_required,
|
||||||
|
expected_bearer_token,
|
||||||
|
extract_bearer_token,
|
||||||
|
validate_calibration_kind,
|
||||||
|
validate_tlog_kind,
|
||||||
|
validate_upload_size,
|
||||||
|
validate_video_kind,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.interface import (
|
||||||
|
JobSnapshot,
|
||||||
|
JobState,
|
||||||
|
ReplayInputs,
|
||||||
|
ReplayJobResult,
|
||||||
|
ReplayRunner,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.jobs import JobRegistry
|
||||||
|
from gps_denied_onboard.replay_api.storage import StorageRoot
|
||||||
|
|
||||||
|
__all__ = ["SubprocessReplayRunner", "build_runner_from_env", "create_app"]
|
||||||
|
|
||||||
|
|
||||||
|
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api")
|
||||||
|
|
||||||
|
|
||||||
|
_PROBE_BYTES_MAX: int = max(MIN_TLOG_PROBE_BYTES, MIN_VIDEO_PROBE_BYTES, 64)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# Production runner
|
||||||
|
|
||||||
|
|
||||||
|
class SubprocessReplayRunner:
|
||||||
|
"""Shells out to ``gps-denied-replay`` + ``gps-denied-render-map``.
|
||||||
|
|
||||||
|
Each ``run()`` call writes a minimal replay-mode ``config.yaml``
|
||||||
|
into the per-job output directory, invokes the replay CLI with
|
||||||
|
``--auto-trim``, computes the AZ-699 accuracy report from the
|
||||||
|
JSONL + the AZ-697 ground-truth extraction, and renders the
|
||||||
|
AZ-700 HTML map. The result is the trio of artefact paths the
|
||||||
|
handler streams back to the client.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
replay_binary: str = "gps-denied-replay",
|
||||||
|
render_binary: str = "gps-denied-render-map",
|
||||||
|
subprocess_timeout_s: float = 900.0,
|
||||||
|
) -> None:
|
||||||
|
self._replay_binary = replay_binary
|
||||||
|
self._render_binary = render_binary
|
||||||
|
self._timeout = subprocess_timeout_s
|
||||||
|
|
||||||
|
def run(
|
||||||
|
self, inputs: ReplayInputs, *, output_dir: Path
|
||||||
|
) -> ReplayJobResult:
|
||||||
|
config_path = output_dir / "config.yaml"
|
||||||
|
config_path.write_text(
|
||||||
|
"mode: replay\n"
|
||||||
|
"replay:\n"
|
||||||
|
f" pace: {inputs.pace}\n"
|
||||||
|
" target_fc_dialect: ardupilot_plane\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
signing_key_path = output_dir / "signing_key.bin"
|
||||||
|
signing_key_path.write_bytes(b"\x00" * 32)
|
||||||
|
|
||||||
|
emissions_path = output_dir / "emissions.jsonl"
|
||||||
|
argv = [
|
||||||
|
self._replay_binary,
|
||||||
|
"--video",
|
||||||
|
str(inputs.video_path),
|
||||||
|
"--tlog",
|
||||||
|
str(inputs.tlog_path),
|
||||||
|
"--output",
|
||||||
|
str(emissions_path),
|
||||||
|
"--camera-calibration",
|
||||||
|
str(inputs.calibration_path),
|
||||||
|
"--config",
|
||||||
|
str(config_path),
|
||||||
|
"--mavlink-signing-key",
|
||||||
|
str(signing_key_path),
|
||||||
|
"--pace",
|
||||||
|
inputs.pace,
|
||||||
|
]
|
||||||
|
if inputs.auto_trim:
|
||||||
|
argv.append("--auto-trim")
|
||||||
|
|
||||||
|
replay_completed = subprocess.run(
|
||||||
|
argv,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=self._timeout,
|
||||||
|
)
|
||||||
|
if replay_completed.returncode != 0:
|
||||||
|
stderr_tail = (replay_completed.stderr or "")[-8192:]
|
||||||
|
raise ReplayRunnerError(
|
||||||
|
f"{self._replay_binary} exited "
|
||||||
|
f"{replay_completed.returncode}",
|
||||||
|
details={"stderr_tail": stderr_tail},
|
||||||
|
)
|
||||||
|
|
||||||
|
report_path = self._maybe_render_report(
|
||||||
|
inputs, emissions_path, output_dir
|
||||||
|
)
|
||||||
|
map_path = self._maybe_render_map(
|
||||||
|
inputs, emissions_path, output_dir, report_path
|
||||||
|
)
|
||||||
|
|
||||||
|
return ReplayJobResult(
|
||||||
|
emissions_jsonl_path=emissions_path,
|
||||||
|
accuracy_report_md_path=report_path,
|
||||||
|
map_html_path=map_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _maybe_render_report(
|
||||||
|
self,
|
||||||
|
inputs: ReplayInputs,
|
||||||
|
emissions_path: Path,
|
||||||
|
output_dir: Path,
|
||||||
|
) -> Path | None:
|
||||||
|
"""Compute the AZ-699 accuracy report; tolerate missing GT."""
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
|
||||||
|
from gps_denied_onboard.helpers.accuracy_report import (
|
||||||
|
AC3_GATE_THRESHOLD_M,
|
||||||
|
ReportContext,
|
||||||
|
render_report,
|
||||||
|
verdict_passes_ac3,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.helpers.gps_compare import (
|
||||||
|
GroundTruthRow,
|
||||||
|
horizontal_error_distribution,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_input import (
|
||||||
|
load_tlog_ground_truth,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
_LOGGER.warning(
|
||||||
|
"skipping accuracy report — imports failed: %r", exc
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
emissions: list[dict[str, Any]] = []
|
||||||
|
for line in emissions_path.read_text().splitlines():
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
emissions.append(json.loads(line))
|
||||||
|
if not emissions:
|
||||||
|
return None
|
||||||
|
|
||||||
|
gt_series = load_tlog_ground_truth(inputs.tlog_path).records
|
||||||
|
if not gt_series:
|
||||||
|
return None
|
||||||
|
|
||||||
|
ground_truth = [
|
||||||
|
GroundTruthRow(
|
||||||
|
t_s=fix.ts_ns / 1e9,
|
||||||
|
lat_deg=fix.lat_deg,
|
||||||
|
lon_deg=fix.lon_deg,
|
||||||
|
alt_m=fix.alt_m,
|
||||||
|
)
|
||||||
|
for fix in gt_series
|
||||||
|
]
|
||||||
|
distribution = horizontal_error_distribution(emissions, ground_truth)
|
||||||
|
if distribution.count == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
calibration_method = _calibration_acquisition_method(
|
||||||
|
inputs.calibration_path
|
||||||
|
)
|
||||||
|
except (OSError, ValueError):
|
||||||
|
calibration_method = "unknown"
|
||||||
|
|
||||||
|
clip_duration_s = (
|
||||||
|
ground_truth[-1].t_s - ground_truth[0].t_s
|
||||||
|
if len(ground_truth) > 1
|
||||||
|
else 0.0
|
||||||
|
)
|
||||||
|
context = ReportContext(
|
||||||
|
run_date_utc=datetime.utcnow().date().isoformat(),
|
||||||
|
tlog_path=inputs.tlog_path,
|
||||||
|
video_path=inputs.video_path,
|
||||||
|
calibration_acquisition_method=calibration_method,
|
||||||
|
clip_duration_s=clip_duration_s,
|
||||||
|
emissions_count=len(emissions),
|
||||||
|
)
|
||||||
|
passed = verdict_passes_ac3(distribution)
|
||||||
|
# Touch the threshold constant so a future rename surfaces here too.
|
||||||
|
assert AC3_GATE_THRESHOLD_M > 0.0
|
||||||
|
report_text = render_report(distribution, context, passed=passed)
|
||||||
|
report_path = output_dir / "accuracy_report.md"
|
||||||
|
report_path.write_text(report_text)
|
||||||
|
return report_path
|
||||||
|
|
||||||
|
def _maybe_render_map(
|
||||||
|
self,
|
||||||
|
inputs: ReplayInputs,
|
||||||
|
emissions_path: Path,
|
||||||
|
output_dir: Path,
|
||||||
|
report_path: Path | None,
|
||||||
|
) -> Path | None:
|
||||||
|
if not shutil.which(self._render_binary):
|
||||||
|
venv_bin = Path(sys.executable).parent / self._render_binary
|
||||||
|
if not venv_bin.exists():
|
||||||
|
_LOGGER.warning(
|
||||||
|
"%s not on PATH — skipping map render",
|
||||||
|
self._render_binary,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
render_bin = str(venv_bin)
|
||||||
|
else:
|
||||||
|
render_bin = self._render_binary
|
||||||
|
map_path = output_dir / "map.html"
|
||||||
|
argv = [
|
||||||
|
render_bin,
|
||||||
|
"--estimated",
|
||||||
|
str(emissions_path),
|
||||||
|
"--truth",
|
||||||
|
str(inputs.tlog_path),
|
||||||
|
"--output",
|
||||||
|
str(map_path),
|
||||||
|
]
|
||||||
|
if report_path is not None:
|
||||||
|
argv.extend(["--summary", str(report_path)])
|
||||||
|
completed = subprocess.run(
|
||||||
|
argv, capture_output=True, text=True, timeout=120
|
||||||
|
)
|
||||||
|
if completed.returncode != 0:
|
||||||
|
_LOGGER.warning(
|
||||||
|
"%s exited %s — map render skipped (stderr_tail=%r)",
|
||||||
|
self._render_binary,
|
||||||
|
completed.returncode,
|
||||||
|
completed.stderr[-2048:],
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
return map_path
|
||||||
|
|
||||||
|
|
||||||
|
def _calibration_acquisition_method(calibration_path: Path) -> str:
|
||||||
|
import json
|
||||||
|
|
||||||
|
data = json.loads(calibration_path.read_text())
|
||||||
|
method = data.get("acquisition_method")
|
||||||
|
if isinstance(method, str) and method:
|
||||||
|
return method
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def build_runner_from_env() -> SubprocessReplayRunner:
|
||||||
|
return SubprocessReplayRunner(
|
||||||
|
replay_binary=os.environ.get(
|
||||||
|
"REPLAY_API_REPLAY_BINARY", "gps-denied-replay"
|
||||||
|
),
|
||||||
|
render_binary=os.environ.get(
|
||||||
|
"REPLAY_API_RENDER_BINARY", "gps-denied-render-map"
|
||||||
|
),
|
||||||
|
subprocess_timeout_s=float(
|
||||||
|
os.environ.get("REPLAY_API_SUBPROCESS_TIMEOUT_S", "900")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# FastAPI app factory
|
||||||
|
|
||||||
|
|
||||||
|
def create_app(
|
||||||
|
*,
|
||||||
|
runner: ReplayRunner,
|
||||||
|
storage: StorageRoot,
|
||||||
|
registry: JobRegistry | None = None,
|
||||||
|
max_upload_bytes: int = 2 * 1024 * 1024 * 1024,
|
||||||
|
sync_max_bytes: int = 200 * 1024 * 1024,
|
||||||
|
) -> Any:
|
||||||
|
"""Build the FastAPI app.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
runner: ``ReplayRunner`` injected into the registry.
|
||||||
|
storage: Per-job storage manager.
|
||||||
|
registry: Pre-built ``JobRegistry`` (the unit tests inject a
|
||||||
|
tuned one; production wiring builds one from env).
|
||||||
|
max_upload_bytes: hard limit per multipart upload (413 above).
|
||||||
|
sync_max_bytes: video size at which the API switches to async.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from typing import Annotated
|
||||||
|
|
||||||
|
from fastapi import (
|
||||||
|
FastAPI,
|
||||||
|
File,
|
||||||
|
Form,
|
||||||
|
Header,
|
||||||
|
HTTPException,
|
||||||
|
Request,
|
||||||
|
Response,
|
||||||
|
UploadFile,
|
||||||
|
)
|
||||||
|
from fastapi.responses import FileResponse, JSONResponse
|
||||||
|
except ImportError as exc:
|
||||||
|
raise SystemExit(
|
||||||
|
"FastAPI is not installed. Install with "
|
||||||
|
"`pip install gps-denied-onboard[operator-tools]`."
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
if registry is None:
|
||||||
|
registry = JobRegistry(
|
||||||
|
runner=runner,
|
||||||
|
storage=storage,
|
||||||
|
max_concurrent=int(
|
||||||
|
os.environ.get("REPLAY_API_MAX_CONCURRENT_JOBS", "1")
|
||||||
|
),
|
||||||
|
max_queued=int(
|
||||||
|
os.environ.get("REPLAY_API_MAX_QUEUED_JOBS", "8")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(_: Any) -> AsyncIterator[None]:
|
||||||
|
if not auth_required():
|
||||||
|
_LOGGER.warning(
|
||||||
|
"REPLAY_API_AUTH_REQUIRED=false — bearer auth is DISABLED. "
|
||||||
|
"Do not run this in any environment exposed to the internet."
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
registry.shutdown(wait=False)
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="gps-denied-onboard replay API",
|
||||||
|
version="1.0.0",
|
||||||
|
description=(
|
||||||
|
"HTTP wrapper around the offline `gps-denied-replay` "
|
||||||
|
"pipeline. Upload (tlog + video [+ calibration]); "
|
||||||
|
"receive GPS fixes + an accuracy report + an HTML map."
|
||||||
|
),
|
||||||
|
lifespan=lifespan,
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.exception_handler(ReplayApiError)
|
||||||
|
async def _on_replay_api_error(
|
||||||
|
_request: Request, exc: ReplayApiError
|
||||||
|
) -> JSONResponse:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=exc.status_code,
|
||||||
|
content={
|
||||||
|
"error_code": exc.error_code,
|
||||||
|
"message": exc.message,
|
||||||
|
"details": exc.details,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _check_auth(authorization: str | None) -> None:
|
||||||
|
if not auth_required():
|
||||||
|
return
|
||||||
|
expected = expected_bearer_token()
|
||||||
|
actual = extract_bearer_token(authorization)
|
||||||
|
if expected is None or actual != expected:
|
||||||
|
raise UnauthorizedError("bearer token does not match")
|
||||||
|
|
||||||
|
@app.get("/healthz")
|
||||||
|
async def healthz() -> dict[str, str]:
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
@app.get("/readyz")
|
||||||
|
async def readyz() -> Response:
|
||||||
|
binary = os.environ.get(
|
||||||
|
"REPLAY_API_REPLAY_BINARY", "gps-denied-replay"
|
||||||
|
)
|
||||||
|
if shutil.which(binary) is None and not (
|
||||||
|
Path(sys.executable).parent / binary
|
||||||
|
).exists():
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=503,
|
||||||
|
content={
|
||||||
|
"status": "not_ready",
|
||||||
|
"reason": f"{binary} not on PATH",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if not os.access(storage.root, os.W_OK):
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=503,
|
||||||
|
content={
|
||||||
|
"status": "not_ready",
|
||||||
|
"reason": f"{storage.root} is not writable",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return JSONResponse(content={"status": "ok"})
|
||||||
|
|
||||||
|
@app.post("/replay")
|
||||||
|
async def post_replay(
|
||||||
|
tlog: Annotated[UploadFile, File()],
|
||||||
|
video: Annotated[UploadFile, File()],
|
||||||
|
calibration: Annotated[UploadFile | None, File()] = None,
|
||||||
|
pace: Annotated[str, Form()] = "asap",
|
||||||
|
auto_trim: Annotated[bool, Form()] = True,
|
||||||
|
authorization: Annotated[str | None, Header()] = None,
|
||||||
|
) -> Response:
|
||||||
|
_check_auth(authorization)
|
||||||
|
|
||||||
|
tlog_bytes = await tlog.read()
|
||||||
|
validate_upload_size(len(tlog_bytes), limit=max_upload_bytes)
|
||||||
|
validate_tlog_kind(tlog_bytes[:_PROBE_BYTES_MAX])
|
||||||
|
|
||||||
|
video_bytes = await video.read()
|
||||||
|
validate_upload_size(len(video_bytes), limit=max_upload_bytes)
|
||||||
|
validate_video_kind(video_bytes[:_PROBE_BYTES_MAX])
|
||||||
|
|
||||||
|
calibration_bytes: bytes | None = None
|
||||||
|
if calibration is not None:
|
||||||
|
calibration_bytes = await calibration.read()
|
||||||
|
validate_upload_size(
|
||||||
|
len(calibration_bytes), limit=max_upload_bytes
|
||||||
|
)
|
||||||
|
validate_calibration_kind(calibration_bytes[:_PROBE_BYTES_MAX])
|
||||||
|
|
||||||
|
# Allocate per-job storage and write the uploads.
|
||||||
|
job_id = _new_job_id()
|
||||||
|
job_storage = storage.allocate_job(job_id)
|
||||||
|
job_storage.tlog_path.write_bytes(tlog_bytes)
|
||||||
|
job_storage.video_path.write_bytes(video_bytes)
|
||||||
|
if calibration_bytes is not None:
|
||||||
|
job_storage.calibration_path.write_bytes(calibration_bytes)
|
||||||
|
elif _default_calibration_path() is not None:
|
||||||
|
shutil.copyfile(
|
||||||
|
_default_calibration_path(), # type: ignore[arg-type]
|
||||||
|
job_storage.calibration_path,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise MultipartMissingFieldError(
|
||||||
|
"calibration field is required (no default calibration "
|
||||||
|
"bundled with this build of replay_api)"
|
||||||
|
)
|
||||||
|
|
||||||
|
inputs = ReplayInputs(
|
||||||
|
tlog_path=job_storage.tlog_path,
|
||||||
|
video_path=job_storage.video_path,
|
||||||
|
calibration_path=job_storage.calibration_path,
|
||||||
|
pace=pace,
|
||||||
|
auto_trim=auto_trim,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Submit under the pre-allocated job_id so the storage
|
||||||
|
# directory (already populated with the uploads above) and
|
||||||
|
# the API-visible job id match.
|
||||||
|
try:
|
||||||
|
snapshot = registry.submit(
|
||||||
|
inputs,
|
||||||
|
output_dir=job_storage.output_dir,
|
||||||
|
job_id=job_id,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
storage.release_job(job_id)
|
||||||
|
raise
|
||||||
|
|
||||||
|
sync_mode = len(video_bytes) <= sync_max_bytes
|
||||||
|
if not sync_mode:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=202,
|
||||||
|
headers={"Location": f"/jobs/{snapshot.job_id}"},
|
||||||
|
content=_snapshot_to_dict(snapshot, sync=False),
|
||||||
|
)
|
||||||
|
# Wait for terminal state in sync mode.
|
||||||
|
snapshot = _await_terminal(registry, snapshot.job_id)
|
||||||
|
if snapshot.state == JobState.FAILED:
|
||||||
|
raise ReplayRunnerError(
|
||||||
|
snapshot.error or "replay runner failed without a message"
|
||||||
|
)
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=200,
|
||||||
|
content=_snapshot_to_dict(snapshot, sync=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/jobs/{job_id}")
|
||||||
|
async def get_job(
|
||||||
|
job_id: str,
|
||||||
|
authorization: Annotated[str | None, Header()] = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
_check_auth(authorization)
|
||||||
|
try:
|
||||||
|
snapshot = registry.get(job_id)
|
||||||
|
except JobNotFoundError:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail={
|
||||||
|
"error_code": "job_not_found",
|
||||||
|
"message": f"job {job_id} not found",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return _snapshot_to_dict(snapshot, sync=False)
|
||||||
|
|
||||||
|
def _require_done(job_id: str) -> JobSnapshot:
|
||||||
|
snapshot = registry.get(job_id)
|
||||||
|
if snapshot.state != JobState.DONE:
|
||||||
|
raise JobNotCompleteError(
|
||||||
|
f"job {job_id} state is {snapshot.state.value}; "
|
||||||
|
"result is only available when state=done"
|
||||||
|
)
|
||||||
|
return snapshot
|
||||||
|
|
||||||
|
@app.get("/jobs/{job_id}/result")
|
||||||
|
async def get_result(
|
||||||
|
job_id: str,
|
||||||
|
authorization: Annotated[str | None, Header()] = None,
|
||||||
|
) -> Response:
|
||||||
|
_check_auth(authorization)
|
||||||
|
snapshot = _require_done(job_id)
|
||||||
|
if snapshot.result is None:
|
||||||
|
raise JobNotCompleteError("job done but no result attached")
|
||||||
|
return FileResponse(
|
||||||
|
path=snapshot.result.emissions_jsonl_path,
|
||||||
|
media_type="application/x-ndjson",
|
||||||
|
filename="emissions.jsonl",
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/jobs/{job_id}/map")
|
||||||
|
async def get_map(
|
||||||
|
job_id: str,
|
||||||
|
authorization: Annotated[str | None, Header()] = None,
|
||||||
|
) -> Response:
|
||||||
|
_check_auth(authorization)
|
||||||
|
snapshot = _require_done(job_id)
|
||||||
|
if snapshot.result is None or snapshot.result.map_html_path is None:
|
||||||
|
raise JobNotCompleteError("map artefact unavailable")
|
||||||
|
return FileResponse(
|
||||||
|
path=snapshot.result.map_html_path,
|
||||||
|
media_type="text/html",
|
||||||
|
filename="map.html",
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/jobs/{job_id}/report")
|
||||||
|
async def get_report(
|
||||||
|
job_id: str,
|
||||||
|
authorization: Annotated[str | None, Header()] = None,
|
||||||
|
) -> Response:
|
||||||
|
_check_auth(authorization)
|
||||||
|
snapshot = _require_done(job_id)
|
||||||
|
if (
|
||||||
|
snapshot.result is None
|
||||||
|
or snapshot.result.accuracy_report_md_path is None
|
||||||
|
):
|
||||||
|
raise JobNotCompleteError("report artefact unavailable")
|
||||||
|
return FileResponse(
|
||||||
|
path=snapshot.result.accuracy_report_md_path,
|
||||||
|
media_type="text/markdown",
|
||||||
|
filename="accuracy_report.md",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Stash so unit tests can introspect.
|
||||||
|
app.state.registry = registry
|
||||||
|
app.state.storage = storage
|
||||||
|
# Silence unused-import lint on dependency types.
|
||||||
|
_ = (Form, File)
|
||||||
|
# Reference the unused-but-kept errors so a future renamed
|
||||||
|
# member surfaces here loudly.
|
||||||
|
_ = (
|
||||||
|
ConcurrencyLimitReachedError,
|
||||||
|
PayloadTooLargeError,
|
||||||
|
UnsupportedFileKindError,
|
||||||
|
MultipartMissingFieldError,
|
||||||
|
)
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
|
||||||
|
|
||||||
|
def _new_job_id() -> str:
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
return uuid.uuid4().hex
|
||||||
|
|
||||||
|
|
||||||
|
def _default_calibration_path() -> Path | None:
|
||||||
|
raw = os.environ.get("REPLAY_API_DEFAULT_CALIBRATION")
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
path = Path(raw)
|
||||||
|
if path.is_file():
|
||||||
|
return path
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _await_terminal(registry: JobRegistry, job_id: str) -> JobSnapshot:
|
||||||
|
"""Block until ``job_id`` reaches a terminal state.
|
||||||
|
|
||||||
|
Used in sync mode. The registry runs jobs in its own thread pool;
|
||||||
|
we poll with a short backoff. The handler endpoint is async, so
|
||||||
|
blocking here parks the FastAPI worker — that's acceptable for
|
||||||
|
sync mode by design (sync mode is the small-file path).
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
|
||||||
|
deadline = time.monotonic() + 1800.0 # 30 min safety bound
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
snap = registry.get(job_id)
|
||||||
|
if snap.state in (JobState.DONE, JobState.FAILED):
|
||||||
|
return snap
|
||||||
|
time.sleep(0.05)
|
||||||
|
raise ReplayRunnerError("sync replay exceeded 30 min safety bound")
|
||||||
|
|
||||||
|
|
||||||
|
def _snapshot_to_dict(snapshot: JobSnapshot, *, sync: bool) -> dict[str, Any]:
|
||||||
|
payload: dict[str, Any] = {
|
||||||
|
"job_id": snapshot.job_id,
|
||||||
|
"state": snapshot.state.value,
|
||||||
|
"submitted_at_utc": snapshot.submitted_at_utc.isoformat(),
|
||||||
|
"started_at_utc": (
|
||||||
|
snapshot.started_at_utc.isoformat()
|
||||||
|
if snapshot.started_at_utc
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
"finished_at_utc": (
|
||||||
|
snapshot.finished_at_utc.isoformat()
|
||||||
|
if snapshot.finished_at_utc
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
"error": snapshot.error,
|
||||||
|
"status_url": f"/jobs/{snapshot.job_id}",
|
||||||
|
"sync": sync,
|
||||||
|
}
|
||||||
|
if snapshot.result is not None:
|
||||||
|
payload["emissions_jsonl_url"] = (
|
||||||
|
f"/jobs/{snapshot.job_id}/result"
|
||||||
|
)
|
||||||
|
if snapshot.result.accuracy_report_md_path is not None:
|
||||||
|
payload["accuracy_report_md_url"] = (
|
||||||
|
f"/jobs/{snapshot.job_id}/report"
|
||||||
|
)
|
||||||
|
if snapshot.result.map_html_path is not None:
|
||||||
|
payload["map_html_url"] = f"/jobs/{snapshot.job_id}/map"
|
||||||
|
return payload
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
"""AZ-701 — typed HTTP error families for the replay_api service.
|
||||||
|
|
||||||
|
Every error has a stable ``error_code`` (string) the contract pins
|
||||||
|
in ``_docs/02_document/contracts/replay_api/replay_api_protocol.md``.
|
||||||
|
The handler layer translates these into JSON responses; the
|
||||||
|
business layer raises them without knowing about HTTP.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"ConcurrencyLimitReachedError",
|
||||||
|
"JobNotCompleteError",
|
||||||
|
"JobNotFoundError",
|
||||||
|
"MultipartMissingFieldError",
|
||||||
|
"PayloadTooLargeError",
|
||||||
|
"ReplayApiError",
|
||||||
|
"ReplayRunnerError",
|
||||||
|
"UnauthorizedError",
|
||||||
|
"UnsupportedFileKindError",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class ReplayApiError(Exception):
|
||||||
|
"""Base for every typed replay_api error.
|
||||||
|
|
||||||
|
Subclasses pin a stable ``error_code`` and HTTP ``status_code``;
|
||||||
|
the handler layer reads both to build a JSON response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
error_code: str = "replay_api_error"
|
||||||
|
status_code: int = 500
|
||||||
|
|
||||||
|
def __init__(self, message: str, details: dict[str, Any] | None = None) -> None:
|
||||||
|
super().__init__(message)
|
||||||
|
self.message = message
|
||||||
|
self.details = details or {}
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedFileKindError(ReplayApiError):
|
||||||
|
error_code = "unsupported_file_kind"
|
||||||
|
status_code = 400
|
||||||
|
|
||||||
|
|
||||||
|
class MultipartMissingFieldError(ReplayApiError):
|
||||||
|
error_code = "multipart_missing_field"
|
||||||
|
status_code = 400
|
||||||
|
|
||||||
|
|
||||||
|
class UnauthorizedError(ReplayApiError):
|
||||||
|
error_code = "unauthorized"
|
||||||
|
status_code = 401
|
||||||
|
|
||||||
|
|
||||||
|
class JobNotFoundError(ReplayApiError):
|
||||||
|
error_code = "job_not_found"
|
||||||
|
status_code = 404
|
||||||
|
|
||||||
|
|
||||||
|
class JobNotCompleteError(ReplayApiError):
|
||||||
|
error_code = "job_not_complete"
|
||||||
|
status_code = 409
|
||||||
|
|
||||||
|
|
||||||
|
class PayloadTooLargeError(ReplayApiError):
|
||||||
|
error_code = "payload_too_large"
|
||||||
|
status_code = 413
|
||||||
|
|
||||||
|
|
||||||
|
class ConcurrencyLimitReachedError(ReplayApiError):
|
||||||
|
"""Raised when the queue is full.
|
||||||
|
|
||||||
|
Note: per-spec, hitting just the running-job concurrency limit
|
||||||
|
does NOT raise this — those jobs queue normally. The 429 case is
|
||||||
|
"queue itself is full" only.
|
||||||
|
"""
|
||||||
|
|
||||||
|
error_code = "concurrency_limit_reached"
|
||||||
|
status_code = 429
|
||||||
|
|
||||||
|
|
||||||
|
class ReplayRunnerError(ReplayApiError):
|
||||||
|
error_code = "replay_runner_failed"
|
||||||
|
status_code = 500
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
"""AZ-701 — multipart upload + magic-byte validation + auth helpers.
|
||||||
|
|
||||||
|
The functions here are deliberately framework-light: they take raw
|
||||||
|
bytes / streams and return validated artefacts. ``app.py`` wires
|
||||||
|
them into FastAPI dependencies; unit tests call them directly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
from gps_denied_onboard.replay_api.errors import (
|
||||||
|
MultipartMissingFieldError,
|
||||||
|
PayloadTooLargeError,
|
||||||
|
UnauthorizedError,
|
||||||
|
UnsupportedFileKindError,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"MIN_TLOG_PROBE_BYTES",
|
||||||
|
"MIN_VIDEO_PROBE_BYTES",
|
||||||
|
"auth_required",
|
||||||
|
"expected_bearer_token",
|
||||||
|
"extract_bearer_token",
|
||||||
|
"validate_calibration_kind",
|
||||||
|
"validate_tlog_kind",
|
||||||
|
"validate_upload_size",
|
||||||
|
"validate_video_kind",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api.handlers")
|
||||||
|
|
||||||
|
|
||||||
|
# MAVLink magic bytes — pymavlink uses 0xFD for v2.0 and 0xFE for
|
||||||
|
# v1.0. The Derkachi tlog is v2.0; we accept both because some
|
||||||
|
# operators ship v1.0 captures from older autopilots.
|
||||||
|
_MAVLINK_MAGIC_V2: int = 0xFD
|
||||||
|
_MAVLINK_MAGIC_V1: int = 0xFE
|
||||||
|
MIN_TLOG_PROBE_BYTES: int = 9
|
||||||
|
|
||||||
|
|
||||||
|
# mp4 boxes start with a 4-byte size, then 4 ASCII bytes for the
|
||||||
|
# box type. The first box in every valid mp4 is ``ftyp`` (per
|
||||||
|
# ISO/IEC 14496-12). ``"ftyp"`` lives at offset 4.
|
||||||
|
_MP4_FTYP_MARKER: bytes = b"ftyp"
|
||||||
|
MIN_VIDEO_PROBE_BYTES: int = 12
|
||||||
|
|
||||||
|
|
||||||
|
def validate_tlog_kind(probe_bytes: bytes) -> None:
|
||||||
|
"""Reject anything that doesn't open with a MAVLink magic byte.
|
||||||
|
|
||||||
|
pymavlink's tlog format prefixes each record with an 8-byte
|
||||||
|
big-endian microsecond timestamp followed by the raw MAVLink
|
||||||
|
frame, which always starts with the magic byte. So byte 8 of
|
||||||
|
any well-formed tlog is the MAVLink magic.
|
||||||
|
"""
|
||||||
|
if len(probe_bytes) < MIN_TLOG_PROBE_BYTES:
|
||||||
|
raise UnsupportedFileKindError(
|
||||||
|
f"tlog probe too small (need ≥ {MIN_TLOG_PROBE_BYTES} bytes "
|
||||||
|
f"to validate magic; got {len(probe_bytes)})"
|
||||||
|
)
|
||||||
|
magic = probe_bytes[8]
|
||||||
|
if magic not in (_MAVLINK_MAGIC_V2, _MAVLINK_MAGIC_V1):
|
||||||
|
raise UnsupportedFileKindError(
|
||||||
|
f"tlog magic byte 0x{magic:02X} at offset 8 is not "
|
||||||
|
f"MAVLink (expected 0x{_MAVLINK_MAGIC_V2:02X} or "
|
||||||
|
f"0x{_MAVLINK_MAGIC_V1:02X})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_video_kind(probe_bytes: bytes) -> None:
|
||||||
|
"""Reject anything that doesn't have an ``ftyp`` box at offset 4.
|
||||||
|
|
||||||
|
The size prefix at bytes 0-3 varies; the marker is the
|
||||||
|
discriminator. This catches the common "operator renamed
|
||||||
|
`.zip` to `.mp4`" attack — the AC-9 case.
|
||||||
|
"""
|
||||||
|
if len(probe_bytes) < MIN_VIDEO_PROBE_BYTES:
|
||||||
|
raise UnsupportedFileKindError(
|
||||||
|
f"video probe too small (need ≥ {MIN_VIDEO_PROBE_BYTES} "
|
||||||
|
f"bytes to validate ftyp; got {len(probe_bytes)})"
|
||||||
|
)
|
||||||
|
marker = probe_bytes[4:8]
|
||||||
|
if marker != _MP4_FTYP_MARKER:
|
||||||
|
raise UnsupportedFileKindError(
|
||||||
|
"video does not begin with an mp4 'ftyp' box at offset 4 "
|
||||||
|
f"(saw {marker!r})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_calibration_kind(probe_bytes: bytes) -> None:
|
||||||
|
"""Light JSON-shape check; the renderer is the strict validator."""
|
||||||
|
if not probe_bytes:
|
||||||
|
raise UnsupportedFileKindError("calibration upload is empty")
|
||||||
|
stripped = probe_bytes.lstrip()
|
||||||
|
if not stripped.startswith(b"{"):
|
||||||
|
raise UnsupportedFileKindError(
|
||||||
|
"calibration must be a JSON object (first non-whitespace "
|
||||||
|
"byte should be '{')"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_upload_size(num_bytes: int, *, limit: int) -> None:
|
||||||
|
if num_bytes > limit:
|
||||||
|
raise PayloadTooLargeError(
|
||||||
|
f"upload size {num_bytes} exceeds REPLAY_API_MAX_UPLOAD_BYTES "
|
||||||
|
f"({limit})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def expected_bearer_token() -> str | None:
|
||||||
|
"""Read the configured bearer token at request time.
|
||||||
|
|
||||||
|
Returning ``None`` means auth is disabled (
|
||||||
|
``REPLAY_API_AUTH_REQUIRED=false``); the caller is expected to
|
||||||
|
have logged the WARN once at service start.
|
||||||
|
"""
|
||||||
|
if not auth_required():
|
||||||
|
return None
|
||||||
|
token = os.environ.get("REPLAY_API_BEARER_TOKEN")
|
||||||
|
if not token:
|
||||||
|
raise UnauthorizedError(
|
||||||
|
"REPLAY_API_BEARER_TOKEN is not configured but auth is required"
|
||||||
|
)
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def auth_required() -> bool:
|
||||||
|
value = os.environ.get("REPLAY_API_AUTH_REQUIRED", "true").lower()
|
||||||
|
return value not in {"0", "false", "no", "off"}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_bearer_token(header_value: str | None) -> str:
|
||||||
|
"""Parse ``Authorization: Bearer <token>`` strictly."""
|
||||||
|
if not header_value:
|
||||||
|
raise UnauthorizedError("missing Authorization header")
|
||||||
|
parts = header_value.split(" ", 1)
|
||||||
|
if len(parts) != 2 or parts[0].strip().lower() != "bearer":
|
||||||
|
raise UnauthorizedError(
|
||||||
|
"Authorization header must be 'Bearer <token>'"
|
||||||
|
)
|
||||||
|
token = parts[1].strip()
|
||||||
|
if not token:
|
||||||
|
raise UnauthorizedError("Authorization bearer token is empty")
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_field(name: str, value: object) -> None:
|
||||||
|
if value is None:
|
||||||
|
raise MultipartMissingFieldError(f"missing multipart field: {name}")
|
||||||
@@ -0,0 +1,99 @@
|
|||||||
|
"""AZ-701 — DTOs + ``ReplayRunner`` Protocol for the replay_api service.
|
||||||
|
|
||||||
|
The Protocol is the dependency-injection seam: ``handlers.py``
|
||||||
|
depends on the Protocol, not the concrete ``SubprocessReplayRunner``.
|
||||||
|
Unit tests inject a deterministic fake; the production wiring in
|
||||||
|
``app.py`` constructs the subprocess runner.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from enum import Enum
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Protocol, runtime_checkable
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"JobSnapshot",
|
||||||
|
"JobState",
|
||||||
|
"ReplayInputs",
|
||||||
|
"ReplayJobResult",
|
||||||
|
"ReplayRunner",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class JobState(str, Enum):
|
||||||
|
"""Job lifecycle.
|
||||||
|
|
||||||
|
The state machine is monotonic: ``queued → running → done`` (or
|
||||||
|
``failed`` from any non-terminal state). No back-transitions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
QUEUED = "queued"
|
||||||
|
RUNNING = "running"
|
||||||
|
DONE = "done"
|
||||||
|
FAILED = "failed"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class ReplayInputs:
|
||||||
|
"""The (tlog + video + calibration) bundle a runner consumes.
|
||||||
|
|
||||||
|
Storage paths are absolute. The handler builds these from a
|
||||||
|
per-job temp directory (see ``storage.py``).
|
||||||
|
|
||||||
|
``pace`` and ``auto_trim`` mirror the ``gps-denied-replay`` CLI
|
||||||
|
flags; the runner is responsible for translating them into argv.
|
||||||
|
"""
|
||||||
|
|
||||||
|
tlog_path: Path
|
||||||
|
video_path: Path
|
||||||
|
calibration_path: Path
|
||||||
|
pace: str = "asap"
|
||||||
|
auto_trim: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class ReplayJobResult:
|
||||||
|
"""The artefacts a finished job exposes.
|
||||||
|
|
||||||
|
Each path is absolute and lives under the per-job storage dir.
|
||||||
|
The handler layer maps these to URLs in the JSON response.
|
||||||
|
"""
|
||||||
|
|
||||||
|
emissions_jsonl_path: Path
|
||||||
|
accuracy_report_md_path: Path | None
|
||||||
|
map_html_path: Path | None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class JobSnapshot:
|
||||||
|
"""Serialisable snapshot of one job.
|
||||||
|
|
||||||
|
Mutable; the registry mutates the snapshot in-place under its
|
||||||
|
lock and yields copies to API readers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
job_id: str
|
||||||
|
state: JobState
|
||||||
|
submitted_at_utc: datetime
|
||||||
|
started_at_utc: datetime | None = None
|
||||||
|
finished_at_utc: datetime | None = None
|
||||||
|
error: str | None = None
|
||||||
|
result: ReplayJobResult | None = None
|
||||||
|
sync: bool = False
|
||||||
|
extra: dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class ReplayRunner(Protocol):
|
||||||
|
"""Runs the offline replay pipeline for one job.
|
||||||
|
|
||||||
|
The Protocol is intentionally synchronous — the registry runs it
|
||||||
|
in a worker thread. Returning normally signals success; raising
|
||||||
|
any exception signals failure and the registry records the
|
||||||
|
stringified message on the job.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def run(self, inputs: ReplayInputs, *, output_dir: Path) -> ReplayJobResult: ...
|
||||||
@@ -0,0 +1,233 @@
|
|||||||
|
"""AZ-701 — in-memory job registry with a concurrency limit.
|
||||||
|
|
||||||
|
``JobRegistry`` is the single source of truth for job state. It is
|
||||||
|
intentionally simple — a dict plus a thread pool plus a queue cap.
|
||||||
|
Operators that need durable history persist the JSONL + Markdown
|
||||||
|
report + HTML map artefacts out-of-band (invariant 2 in the
|
||||||
|
contract).
|
||||||
|
|
||||||
|
Concurrency model:
|
||||||
|
- ``max_concurrent``: at most this many jobs may be in state
|
||||||
|
``RUNNING`` at once. Excess submissions land in state ``QUEUED``
|
||||||
|
and get promoted by the worker pool.
|
||||||
|
- ``max_queued``: hard cap on queued jobs. Exceeding it raises
|
||||||
|
``ConcurrencyLimitReachedError`` (HTTP 429 at the handler layer).
|
||||||
|
|
||||||
|
The registry runs jobs in a thread pool (``ThreadPoolExecutor``)
|
||||||
|
so the FastAPI event loop is never blocked. The runner is
|
||||||
|
intentionally synchronous (``ReplayRunner.run``) because the
|
||||||
|
underlying ``gps-denied-replay`` subprocess is synchronous.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import uuid
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from copy import copy
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from gps_denied_onboard.replay_api.errors import (
|
||||||
|
ConcurrencyLimitReachedError,
|
||||||
|
JobNotFoundError,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.interface import (
|
||||||
|
JobSnapshot,
|
||||||
|
JobState,
|
||||||
|
ReplayInputs,
|
||||||
|
ReplayRunner,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.storage import StorageRoot
|
||||||
|
|
||||||
|
__all__ = ["ConcurrencyLimitReachedError", "JobRecord", "JobRegistry"]
|
||||||
|
|
||||||
|
|
||||||
|
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api.jobs")
|
||||||
|
|
||||||
|
|
||||||
|
class JobRecord:
|
||||||
|
"""Internal mutable view of one job.
|
||||||
|
|
||||||
|
The registry exposes copies of ``snapshot`` to callers — never
|
||||||
|
the live object — so external code cannot corrupt state.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__slots__ = ("inputs", "output_dir", "snapshot")
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, inputs: ReplayInputs, output_dir: Path, snapshot: JobSnapshot
|
||||||
|
) -> None:
|
||||||
|
self.inputs = inputs
|
||||||
|
self.output_dir = output_dir
|
||||||
|
self.snapshot = snapshot
|
||||||
|
|
||||||
|
|
||||||
|
class JobRegistry:
|
||||||
|
"""In-memory job pool + worker dispatch."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
runner: ReplayRunner,
|
||||||
|
storage: StorageRoot,
|
||||||
|
*,
|
||||||
|
max_concurrent: int = 1,
|
||||||
|
max_queued: int = 8,
|
||||||
|
) -> None:
|
||||||
|
if max_concurrent < 1:
|
||||||
|
raise ValueError("max_concurrent must be ≥ 1")
|
||||||
|
if max_queued < 0:
|
||||||
|
raise ValueError("max_queued must be ≥ 0")
|
||||||
|
self._runner = runner
|
||||||
|
self._storage = storage
|
||||||
|
self._max_concurrent = max_concurrent
|
||||||
|
self._max_queued = max_queued
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._records: dict[str, JobRecord] = {}
|
||||||
|
self._running_count = 0
|
||||||
|
self._executor = ThreadPoolExecutor(
|
||||||
|
max_workers=max_concurrent,
|
||||||
|
thread_name_prefix="replay-api-job",
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_concurrent(self) -> int:
|
||||||
|
return self._max_concurrent
|
||||||
|
|
||||||
|
def submit(
|
||||||
|
self,
|
||||||
|
inputs: ReplayInputs,
|
||||||
|
output_dir: Path,
|
||||||
|
*,
|
||||||
|
job_id: str | None = None,
|
||||||
|
) -> JobSnapshot:
|
||||||
|
"""Register a new job; return its initial snapshot.
|
||||||
|
|
||||||
|
``job_id`` is optional — when omitted the registry generates
|
||||||
|
a fresh uuid. The handler layer passes its own id so the
|
||||||
|
per-job storage directory and the API-visible job id match.
|
||||||
|
|
||||||
|
State at return time is:
|
||||||
|
- ``RUNNING`` if a worker slot is free.
|
||||||
|
- ``QUEUED`` otherwise (within ``max_queued``).
|
||||||
|
Raises ``ConcurrencyLimitReachedError`` when the queue is full.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
queued_count = sum(
|
||||||
|
1
|
||||||
|
for r in self._records.values()
|
||||||
|
if r.snapshot.state == JobState.QUEUED
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
self._running_count >= self._max_concurrent
|
||||||
|
and queued_count >= self._max_queued
|
||||||
|
):
|
||||||
|
raise ConcurrencyLimitReachedError(
|
||||||
|
f"queue full: running={self._running_count}, "
|
||||||
|
f"queued={queued_count}, max_queued={self._max_queued}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if job_id is None:
|
||||||
|
job_id = uuid.uuid4().hex
|
||||||
|
if job_id in self._records:
|
||||||
|
raise ValueError(
|
||||||
|
f"duplicate job_id supplied to submit(): {job_id}"
|
||||||
|
)
|
||||||
|
state = (
|
||||||
|
JobState.RUNNING
|
||||||
|
if self._running_count < self._max_concurrent
|
||||||
|
else JobState.QUEUED
|
||||||
|
)
|
||||||
|
snapshot = JobSnapshot(
|
||||||
|
job_id=job_id,
|
||||||
|
state=state,
|
||||||
|
submitted_at_utc=_utc_now(),
|
||||||
|
started_at_utc=_utc_now() if state == JobState.RUNNING else None,
|
||||||
|
)
|
||||||
|
record = JobRecord(inputs=inputs, output_dir=output_dir, snapshot=snapshot)
|
||||||
|
self._records[job_id] = record
|
||||||
|
if state == JobState.RUNNING:
|
||||||
|
self._running_count += 1
|
||||||
|
self._executor.submit(self._run_or_wait, job_id)
|
||||||
|
with self._lock:
|
||||||
|
return copy(self._records[job_id].snapshot)
|
||||||
|
|
||||||
|
def get(self, job_id: str) -> JobSnapshot:
|
||||||
|
with self._lock:
|
||||||
|
record = self._records.get(job_id)
|
||||||
|
if record is None:
|
||||||
|
raise JobNotFoundError(f"job not found: {job_id}")
|
||||||
|
return copy(record.snapshot)
|
||||||
|
|
||||||
|
def list_ids(self) -> list[str]:
|
||||||
|
with self._lock:
|
||||||
|
return list(self._records)
|
||||||
|
|
||||||
|
def running_count(self) -> int:
|
||||||
|
with self._lock:
|
||||||
|
return self._running_count
|
||||||
|
|
||||||
|
def queued_count(self) -> int:
|
||||||
|
with self._lock:
|
||||||
|
return sum(
|
||||||
|
1
|
||||||
|
for r in self._records.values()
|
||||||
|
if r.snapshot.state == JobState.QUEUED
|
||||||
|
)
|
||||||
|
|
||||||
|
def shutdown(self, *, wait: bool = True) -> None:
|
||||||
|
self._executor.shutdown(wait=wait, cancel_futures=not wait)
|
||||||
|
self._storage.cleanup_all()
|
||||||
|
|
||||||
|
def _run_or_wait(self, job_id: str) -> None:
|
||||||
|
with self._lock:
|
||||||
|
record = self._records.get(job_id)
|
||||||
|
if record is None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
if record.snapshot.state == JobState.QUEUED:
|
||||||
|
self._wait_for_slot(record)
|
||||||
|
self._execute(record)
|
||||||
|
except Exception as exc:
|
||||||
|
self._mark_failed(record, exc)
|
||||||
|
|
||||||
|
def _wait_for_slot(self, record: JobRecord) -> None:
|
||||||
|
while True:
|
||||||
|
with self._lock:
|
||||||
|
if self._running_count < self._max_concurrent:
|
||||||
|
record.snapshot.state = JobState.RUNNING
|
||||||
|
record.snapshot.started_at_utc = _utc_now()
|
||||||
|
self._running_count += 1
|
||||||
|
return
|
||||||
|
threading.Event().wait(0.05)
|
||||||
|
|
||||||
|
def _execute(self, record: JobRecord) -> None:
|
||||||
|
try:
|
||||||
|
result = self._runner.run(record.inputs, output_dir=record.output_dir)
|
||||||
|
with self._lock:
|
||||||
|
record.snapshot.state = JobState.DONE
|
||||||
|
record.snapshot.finished_at_utc = _utc_now()
|
||||||
|
record.snapshot.result = result
|
||||||
|
self._running_count = max(0, self._running_count - 1)
|
||||||
|
except Exception:
|
||||||
|
with self._lock:
|
||||||
|
self._running_count = max(0, self._running_count - 1)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _mark_failed(self, record: JobRecord, exc: BaseException) -> None:
|
||||||
|
message = f"{type(exc).__name__}: {exc}"
|
||||||
|
_LOGGER.exception("job %s failed", record.snapshot.job_id)
|
||||||
|
with self._lock:
|
||||||
|
record.snapshot.state = JobState.FAILED
|
||||||
|
record.snapshot.finished_at_utc = _utc_now()
|
||||||
|
record.snapshot.error = message
|
||||||
|
|
||||||
|
|
||||||
|
def _utc_now() -> datetime:
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
# Re-export Any so type-checkers don't trim the local import.
|
||||||
|
_ = Any
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
"""AZ-701 — per-job temp-file lifecycle.
|
||||||
|
|
||||||
|
One ``StorageRoot`` rooted at ``REPLAY_API_STORAGE_ROOT``.
|
||||||
|
Each job allocates a subdirectory ``<root>/<job_id>/`` containing
|
||||||
|
the uploaded ``tlog`` + ``video`` + ``calibration`` plus the
|
||||||
|
estimator's outputs (``emissions.jsonl``, the AZ-699 report, the
|
||||||
|
AZ-700 map).
|
||||||
|
|
||||||
|
The directory is deleted on job completion (``release_job``) and on
|
||||||
|
service shutdown (``cleanup_all``). The service deliberately does
|
||||||
|
NOT keep finished-job artefacts forever — invariant 2 in the
|
||||||
|
contract.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import shutil
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
__all__ = ["JobStorage", "StorageRoot"]
|
||||||
|
|
||||||
|
|
||||||
|
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api.storage")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class JobStorage:
|
||||||
|
"""The per-job paths the handler hands to the runner."""
|
||||||
|
|
||||||
|
root: Path
|
||||||
|
tlog_path: Path
|
||||||
|
video_path: Path
|
||||||
|
calibration_path: Path
|
||||||
|
output_dir: Path
|
||||||
|
|
||||||
|
|
||||||
|
class StorageRoot:
|
||||||
|
"""Parent of per-job storage directories.
|
||||||
|
|
||||||
|
The class is intentionally thin — the registry calls
|
||||||
|
``allocate_job`` at submit-time and ``release_job`` at terminal
|
||||||
|
transitions; nothing else owns mutation rights.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, root: Path) -> None:
|
||||||
|
self._root = root
|
||||||
|
self._root.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def root(self) -> Path:
|
||||||
|
return self._root
|
||||||
|
|
||||||
|
def allocate_job(self, job_id: str) -> JobStorage:
|
||||||
|
job_root = self._root / job_id
|
||||||
|
job_root.mkdir(parents=True, exist_ok=False)
|
||||||
|
output_dir = job_root / "output"
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
return JobStorage(
|
||||||
|
root=job_root,
|
||||||
|
tlog_path=job_root / "input.tlog",
|
||||||
|
video_path=job_root / "input.mp4",
|
||||||
|
calibration_path=job_root / "calibration.json",
|
||||||
|
output_dir=output_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
def release_job(self, job_id: str) -> None:
|
||||||
|
target = self._root / job_id
|
||||||
|
if not target.exists():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
shutil.rmtree(target)
|
||||||
|
except OSError as exc:
|
||||||
|
_LOGGER.warning(
|
||||||
|
"failed to delete per-job storage %s: %s", target, exc
|
||||||
|
)
|
||||||
|
|
||||||
|
def cleanup_all(self) -> None:
|
||||||
|
for child in self._root.iterdir():
|
||||||
|
if child.is_dir():
|
||||||
|
try:
|
||||||
|
shutil.rmtree(child)
|
||||||
|
except OSError as exc:
|
||||||
|
_LOGGER.warning(
|
||||||
|
"failed to delete per-job storage %s: %s",
|
||||||
|
child,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
@@ -46,7 +46,7 @@ from gps_denied_onboard.helpers.gps_compare import (
|
|||||||
horizontal_error_distribution,
|
horizontal_error_distribution,
|
||||||
)
|
)
|
||||||
from gps_denied_onboard.replay_input import load_tlog_ground_truth
|
from gps_denied_onboard.replay_input import load_tlog_ground_truth
|
||||||
from tests.e2e.replay._report_writer import (
|
from gps_denied_onboard.helpers.accuracy_report import (
|
||||||
AC3_GATE_PCT,
|
AC3_GATE_PCT,
|
||||||
AC3_GATE_THRESHOLD_M,
|
AC3_GATE_THRESHOLD_M,
|
||||||
ReportContext,
|
ReportContext,
|
||||||
|
|||||||
@@ -0,0 +1,663 @@
|
|||||||
|
"""AZ-701 — replay_api unit tests.
|
||||||
|
|
||||||
|
Covers the AC matrix without invoking the real `gps-denied-replay`
|
||||||
|
subprocess. A fake `ReplayRunner` writes deterministic emissions
|
||||||
|
into the per-job output dir; everything downstream (job state,
|
||||||
|
HTTP handlers, magic-byte validation, auth, concurrency) is then
|
||||||
|
exercised against real FastAPI routing via `httpx.AsyncClient`.
|
||||||
|
|
||||||
|
FastAPI / uvicorn / python-multipart are operator-only deps —
|
||||||
|
the whole module skips cleanly when any is missing.
|
||||||
|
|
||||||
|
Style: every test follows Arrange / Act / Assert.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from collections.abc import Iterator
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
fastapi = pytest.importorskip(
|
||||||
|
"fastapi",
|
||||||
|
reason="FastAPI is an operator-only dep; install gps-denied-onboard[operator-tools]",
|
||||||
|
)
|
||||||
|
pytest.importorskip("httpx", reason="httpx required for the FastAPI TestClient")
|
||||||
|
pytest.importorskip("multipart", reason="python-multipart required by FastAPI")
|
||||||
|
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from gps_denied_onboard.replay_api import (
|
||||||
|
JobState,
|
||||||
|
create_app,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.handlers import (
|
||||||
|
validate_tlog_kind,
|
||||||
|
validate_video_kind,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.interface import (
|
||||||
|
ReplayInputs,
|
||||||
|
ReplayJobResult,
|
||||||
|
)
|
||||||
|
from gps_denied_onboard.replay_api.jobs import JobRegistry
|
||||||
|
from gps_denied_onboard.replay_api.storage import StorageRoot
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# Fixtures + fakes
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeRunner:
|
||||||
|
"""Deterministic runner that writes a single emissions row."""
|
||||||
|
|
||||||
|
def __init__(self, *, delay_s: float = 0.0, fail: bool = False) -> None:
|
||||||
|
self.delay_s = delay_s
|
||||||
|
self.fail = fail
|
||||||
|
self.calls: list[ReplayInputs] = []
|
||||||
|
|
||||||
|
def run(self, inputs: ReplayInputs, *, output_dir: Path) -> ReplayJobResult:
|
||||||
|
self.calls.append(inputs)
|
||||||
|
if self.delay_s:
|
||||||
|
time.sleep(self.delay_s)
|
||||||
|
if self.fail:
|
||||||
|
raise RuntimeError("fake runner forced failure")
|
||||||
|
emissions = output_dir / "emissions.jsonl"
|
||||||
|
emissions.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"frame_id": 0,
|
||||||
|
"position_wgs84": {
|
||||||
|
"lat_deg": 50.0,
|
||||||
|
"lon_deg": 30.0,
|
||||||
|
"alt_m": 100.0,
|
||||||
|
},
|
||||||
|
"emitted_at": 0,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
|
report = output_dir / "accuracy_report.md"
|
||||||
|
report.write_text("# Fake report\n\n**Verdict**: PASS\n")
|
||||||
|
map_html = output_dir / "map.html"
|
||||||
|
map_html.write_text("<!DOCTYPE html><html><body>fake map</body></html>")
|
||||||
|
return ReplayJobResult(
|
||||||
|
emissions_jsonl_path=emissions,
|
||||||
|
accuracy_report_md_path=report,
|
||||||
|
map_html_path=map_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _valid_tlog_bytes() -> bytes:
|
||||||
|
"""First 8 bytes are a microsecond timestamp; byte 8 = MAVLink magic."""
|
||||||
|
return b"\x00\x00\x00\x00\x00\x00\x00\x00\xfd" + b"\x00" * 32
|
||||||
|
|
||||||
|
|
||||||
|
def _valid_mp4_bytes() -> bytes:
|
||||||
|
"""ISO mp4: any size prefix + 'ftyp' marker at offset 4."""
|
||||||
|
return b"\x00\x00\x00\x20ftypisom\x00\x00\x02\x00mp42" + b"\x00" * 16
|
||||||
|
|
||||||
|
|
||||||
|
def _valid_calibration_bytes() -> bytes:
|
||||||
|
return b'{"focal_length": 1, "acquisition_method": "factory-sheet"}'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _disable_auth_by_default(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
|
||||||
|
monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "false")
|
||||||
|
monkeypatch.delenv("REPLAY_API_BEARER_TOKEN", raising=False)
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def storage(tmp_path: Path) -> StorageRoot:
|
||||||
|
return StorageRoot(tmp_path / "replay_api")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fake_runner() -> _FakeRunner:
|
||||||
|
return _FakeRunner()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def make_app(
|
||||||
|
storage: StorageRoot,
|
||||||
|
) -> Any:
|
||||||
|
def _factory(
|
||||||
|
runner: Any,
|
||||||
|
*,
|
||||||
|
max_concurrent: int = 1,
|
||||||
|
max_queued: int = 8,
|
||||||
|
sync_max_bytes: int = 10_000_000,
|
||||||
|
) -> Any:
|
||||||
|
registry = JobRegistry(
|
||||||
|
runner=runner,
|
||||||
|
storage=storage,
|
||||||
|
max_concurrent=max_concurrent,
|
||||||
|
max_queued=max_queued,
|
||||||
|
)
|
||||||
|
return create_app(
|
||||||
|
runner=runner,
|
||||||
|
storage=storage,
|
||||||
|
registry=registry,
|
||||||
|
sync_max_bytes=sync_max_bytes,
|
||||||
|
)
|
||||||
|
|
||||||
|
return _factory
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# Magic-byte validation (AC-9)
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_tlog_kind_accepts_mavlink_v2_magic() -> None:
|
||||||
|
# Act / Assert — must not raise
|
||||||
|
validate_tlog_kind(_valid_tlog_bytes())
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_tlog_kind_rejects_zip_renamed_to_tlog() -> None:
|
||||||
|
# Arrange — ZIP magic bytes at offset 0; pre-bytes 0..7 are
|
||||||
|
# the (forged) timestamp; byte 8 holds the (non-MAVLink) magic.
|
||||||
|
bogus = b"\x00\x00\x00\x00\x00\x00\x00\x00PK\x03\x04rest_of_zip_header"
|
||||||
|
|
||||||
|
# Act / Assert
|
||||||
|
with pytest.raises(Exception) as exc:
|
||||||
|
validate_tlog_kind(bogus)
|
||||||
|
assert "MAVLink" in str(exc.value)
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_video_kind_accepts_mp4_ftyp() -> None:
|
||||||
|
validate_video_kind(_valid_mp4_bytes())
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_video_kind_rejects_arbitrary_bytes() -> None:
|
||||||
|
with pytest.raises(Exception) as exc:
|
||||||
|
validate_video_kind(b"\x00" * 64)
|
||||||
|
assert "ftyp" in str(exc.value)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# AC-1 — sync POST → 200 + JSONL
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_replay_sync_returns_200_with_result_urls(
|
||||||
|
fake_runner: _FakeRunner,
|
||||||
|
make_app: Any,
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
app = make_app(fake_runner)
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("derkachi.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("derkachi.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": (
|
||||||
|
"khp20s30.json",
|
||||||
|
_valid_calibration_bytes(),
|
||||||
|
"application/json",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
data={"pace": "asap"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert response.status_code == 200, response.text
|
||||||
|
body = response.json()
|
||||||
|
assert body["state"] == JobState.DONE.value
|
||||||
|
assert body["sync"] is True
|
||||||
|
assert body["emissions_jsonl_url"].endswith("/result")
|
||||||
|
assert body["map_html_url"].endswith("/map")
|
||||||
|
assert body["accuracy_report_md_url"].endswith("/report")
|
||||||
|
# Runner saw exactly one job with the expected pace + auto-trim default.
|
||||||
|
assert len(fake_runner.calls) == 1
|
||||||
|
assert fake_runner.calls[0].pace == "asap"
|
||||||
|
assert fake_runner.calls[0].auto_trim is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_replay_serves_jsonl_and_map_for_done_job(
|
||||||
|
fake_runner: _FakeRunner,
|
||||||
|
make_app: Any,
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
app = make_app(fake_runner)
|
||||||
|
client = TestClient(app)
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("derkachi.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("derkachi.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
body = response.json()
|
||||||
|
job_id = body["job_id"]
|
||||||
|
|
||||||
|
# Act
|
||||||
|
jsonl_resp = client.get(f"/jobs/{job_id}/result")
|
||||||
|
map_resp = client.get(f"/jobs/{job_id}/map")
|
||||||
|
report_resp = client.get(f"/jobs/{job_id}/report")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert jsonl_resp.status_code == 200
|
||||||
|
assert "lat_deg" in jsonl_resp.text
|
||||||
|
assert map_resp.status_code == 200
|
||||||
|
assert "fake map" in map_resp.text
|
||||||
|
assert report_resp.status_code == 200
|
||||||
|
assert "**Verdict**: PASS" in report_resp.text
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# AC-2 — async POST → 202 + job id
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_replay_async_returns_202_when_video_exceeds_sync_bytes(
|
||||||
|
storage: StorageRoot,
|
||||||
|
) -> None:
|
||||||
|
# Arrange — runner sleeps so we observe the queued/running state.
|
||||||
|
runner = _FakeRunner(delay_s=0.2)
|
||||||
|
registry = JobRegistry(runner=runner, storage=storage, max_concurrent=1)
|
||||||
|
app = create_app(
|
||||||
|
runner=runner,
|
||||||
|
storage=storage,
|
||||||
|
registry=registry,
|
||||||
|
sync_max_bytes=10, # any non-trivial video exceeds this
|
||||||
|
)
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": (
|
||||||
|
"k.json",
|
||||||
|
_valid_calibration_bytes(),
|
||||||
|
"application/json",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert response.status_code == 202, response.text
|
||||||
|
body = response.json()
|
||||||
|
assert body["state"] in {JobState.QUEUED.value, JobState.RUNNING.value}
|
||||||
|
assert "Location" in response.headers
|
||||||
|
assert response.headers["Location"] == f"/jobs/{body['job_id']}"
|
||||||
|
_wait_done(client, body["job_id"])
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# AC-3 — job state transitions queued → running → done
|
||||||
|
|
||||||
|
|
||||||
|
def test_job_state_transitions_observable_via_polling(
|
||||||
|
storage: StorageRoot,
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
runner = _FakeRunner(delay_s=0.3)
|
||||||
|
registry = JobRegistry(runner=runner, storage=storage, max_concurrent=1)
|
||||||
|
app = create_app(
|
||||||
|
runner=runner,
|
||||||
|
storage=storage,
|
||||||
|
registry=registry,
|
||||||
|
sync_max_bytes=10,
|
||||||
|
)
|
||||||
|
client = TestClient(app)
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
job_id = response.json()["job_id"]
|
||||||
|
|
||||||
|
# Act + Assert — poll until done; record the unique states seen.
|
||||||
|
seen: set[str] = set()
|
||||||
|
deadline = time.monotonic() + 10.0
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
snap = client.get(f"/jobs/{job_id}").json()
|
||||||
|
seen.add(snap["state"])
|
||||||
|
if snap["state"] == JobState.DONE.value:
|
||||||
|
break
|
||||||
|
time.sleep(0.05)
|
||||||
|
assert JobState.DONE.value in seen
|
||||||
|
# We expect to have seen at least one of queued/running before done.
|
||||||
|
assert seen & {JobState.QUEUED.value, JobState.RUNNING.value}
|
||||||
|
|
||||||
|
|
||||||
|
def test_failed_runner_marks_job_failed(
|
||||||
|
storage: StorageRoot,
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
runner = _FakeRunner(fail=True)
|
||||||
|
registry = JobRegistry(runner=runner, storage=storage)
|
||||||
|
app = create_app(
|
||||||
|
runner=runner, storage=storage, registry=registry, sync_max_bytes=10
|
||||||
|
)
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
job_id = response.json()["job_id"]
|
||||||
|
snap = _wait_terminal(client, job_id)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert snap["state"] == JobState.FAILED.value
|
||||||
|
assert "fake runner forced failure" in (snap["error"] or "")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# AC-4 — result + map served from job id (covered above)
|
||||||
|
|
||||||
|
|
||||||
|
def test_result_endpoints_409_when_job_not_done(
|
||||||
|
storage: StorageRoot,
|
||||||
|
) -> None:
|
||||||
|
# Arrange — slow runner so job stays running long enough to probe.
|
||||||
|
runner = _FakeRunner(delay_s=0.5)
|
||||||
|
registry = JobRegistry(runner=runner, storage=storage)
|
||||||
|
app = create_app(
|
||||||
|
runner=runner, storage=storage, registry=registry, sync_max_bytes=10
|
||||||
|
)
|
||||||
|
client = TestClient(app)
|
||||||
|
job_id = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
).json()["job_id"]
|
||||||
|
|
||||||
|
# Act — race the runner; we want to hit the not-done branch.
|
||||||
|
res = client.get(f"/jobs/{job_id}/result")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
if res.status_code == 200:
|
||||||
|
pytest.skip("runner finished before we could probe the 409 path")
|
||||||
|
assert res.status_code == 409
|
||||||
|
body = res.json()
|
||||||
|
assert body["error_code"] == "job_not_complete"
|
||||||
|
_wait_done(client, job_id)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# AC-5 — auth enforced when configured
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_replay_returns_401_without_bearer_when_required(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
storage: StorageRoot,
|
||||||
|
fake_runner: _FakeRunner,
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "true")
|
||||||
|
monkeypatch.setenv("REPLAY_API_BEARER_TOKEN", "shibboleth")
|
||||||
|
registry = JobRegistry(runner=fake_runner, storage=storage)
|
||||||
|
app = create_app(
|
||||||
|
runner=fake_runner,
|
||||||
|
storage=storage,
|
||||||
|
registry=registry,
|
||||||
|
sync_max_bytes=10_000_000,
|
||||||
|
)
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert response.status_code == 401
|
||||||
|
assert response.json()["error_code"] == "unauthorized"
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_replay_accepts_correct_bearer(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
storage: StorageRoot,
|
||||||
|
fake_runner: _FakeRunner,
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "true")
|
||||||
|
monkeypatch.setenv("REPLAY_API_BEARER_TOKEN", "shibboleth")
|
||||||
|
registry = JobRegistry(runner=fake_runner, storage=storage)
|
||||||
|
app = create_app(
|
||||||
|
runner=fake_runner,
|
||||||
|
storage=storage,
|
||||||
|
registry=registry,
|
||||||
|
sync_max_bytes=10_000_000,
|
||||||
|
)
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
headers={"Authorization": "Bearer shibboleth"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert response.status_code == 200, response.text
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# AC-6 — health endpoints
|
||||||
|
|
||||||
|
|
||||||
|
def test_healthz_always_returns_200(fake_runner: _FakeRunner, make_app: Any) -> None:
|
||||||
|
# Arrange
|
||||||
|
client = TestClient(make_app(fake_runner))
|
||||||
|
|
||||||
|
# Act / Assert
|
||||||
|
assert client.get("/healthz").status_code == 200
|
||||||
|
|
||||||
|
|
||||||
|
def test_readyz_returns_503_when_binary_missing(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
fake_runner: _FakeRunner,
|
||||||
|
make_app: Any,
|
||||||
|
) -> None:
|
||||||
|
# Arrange — point readyz at a binary we know doesn't exist.
|
||||||
|
monkeypatch.setenv("REPLAY_API_REPLAY_BINARY", "definitely-not-a-binary-az701")
|
||||||
|
client = TestClient(make_app(fake_runner))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.get("/readyz")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert response.status_code == 503
|
||||||
|
assert "not on PATH" in response.json()["reason"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# AC-8 — concurrency limit enforced
|
||||||
|
|
||||||
|
|
||||||
|
def test_concurrency_limit_queues_excess_jobs(storage: StorageRoot) -> None:
|
||||||
|
# Arrange
|
||||||
|
runner = _FakeRunner(delay_s=0.5)
|
||||||
|
registry = JobRegistry(
|
||||||
|
runner=runner, storage=storage, max_concurrent=1, max_queued=8
|
||||||
|
)
|
||||||
|
app = create_app(
|
||||||
|
runner=runner, storage=storage, registry=registry, sync_max_bytes=10
|
||||||
|
)
|
||||||
|
client = TestClient(app)
|
||||||
|
job_ids: list[str] = []
|
||||||
|
|
||||||
|
# Act — submit 3 in quick succession; sync_max_bytes=10 forces async mode.
|
||||||
|
for _ in range(3):
|
||||||
|
resp = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": (
|
||||||
|
"k.json",
|
||||||
|
_valid_calibration_bytes(),
|
||||||
|
"application/json",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 202, resp.text
|
||||||
|
job_ids.append(resp.json()["job_id"])
|
||||||
|
|
||||||
|
# Sample states quickly — at this instant we expect 1 running and ≥ 1 queued.
|
||||||
|
states = [
|
||||||
|
client.get(f"/jobs/{jid}").json()["state"] for jid in job_ids
|
||||||
|
]
|
||||||
|
assert states.count(JobState.RUNNING.value) <= 1, (
|
||||||
|
f"more than one running at once: {states}"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
states.count(JobState.QUEUED.value) >= 1
|
||||||
|
or states.count(JobState.DONE.value) >= 2
|
||||||
|
), f"no queued state observed; states={states}"
|
||||||
|
|
||||||
|
# Wait for everything to finish so the test exits cleanly.
|
||||||
|
for jid in job_ids:
|
||||||
|
_wait_done(client, jid)
|
||||||
|
|
||||||
|
|
||||||
|
def test_queue_full_returns_429(storage: StorageRoot) -> None:
|
||||||
|
# Arrange — max_queued=0 forces the 429 path on the second submit.
|
||||||
|
runner = _FakeRunner(delay_s=0.5)
|
||||||
|
registry = JobRegistry(
|
||||||
|
runner=runner, storage=storage, max_concurrent=1, max_queued=0
|
||||||
|
)
|
||||||
|
app = create_app(
|
||||||
|
runner=runner, storage=storage, registry=registry, sync_max_bytes=10
|
||||||
|
)
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
first = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
second = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert first.status_code == 202
|
||||||
|
assert second.status_code == 429
|
||||||
|
assert second.json()["error_code"] == "concurrency_limit_reached"
|
||||||
|
_wait_done(client, first.json()["job_id"])
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# AC-9 — magic-byte upload validation (HTTP path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_replay_rejects_misnamed_zip_as_tlog(
|
||||||
|
fake_runner: _FakeRunner, make_app: Any
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
bogus_tlog = b"\x00\x00\x00\x00\x00\x00\x00\x00PK\x03\x04bogus"
|
||||||
|
client = TestClient(make_app(fake_runner))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", bogus_tlog, "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert response.status_code == 400
|
||||||
|
assert response.json()["error_code"] == "unsupported_file_kind"
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_replay_rejects_misnamed_zip_as_video(
|
||||||
|
fake_runner: _FakeRunner, make_app: Any
|
||||||
|
) -> None:
|
||||||
|
# Arrange
|
||||||
|
bogus_video = b"\x00\x00\x00\x20notftyp..." + b"\x00" * 64
|
||||||
|
client = TestClient(make_app(fake_runner))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.post(
|
||||||
|
"/replay",
|
||||||
|
files={
|
||||||
|
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||||
|
"video": ("d.mp4", bogus_video, "video/mp4"),
|
||||||
|
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert response.status_code == 400
|
||||||
|
assert response.json()["error_code"] == "unsupported_file_kind"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
|
||||||
|
|
||||||
|
def _wait_done(client: TestClient, job_id: str, timeout_s: float = 10.0) -> None:
|
||||||
|
"""Block until ``job_id`` is in state ``done``."""
|
||||||
|
deadline = time.monotonic() + timeout_s
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
snap = client.get(f"/jobs/{job_id}").json()
|
||||||
|
if snap["state"] == JobState.DONE.value:
|
||||||
|
return
|
||||||
|
if snap["state"] == JobState.FAILED.value:
|
||||||
|
raise AssertionError(f"job {job_id} unexpectedly failed: {snap}")
|
||||||
|
time.sleep(0.05)
|
||||||
|
raise AssertionError(f"job {job_id} did not reach DONE within {timeout_s}s")
|
||||||
|
|
||||||
|
|
||||||
|
def _wait_terminal(
|
||||||
|
client: TestClient, job_id: str, timeout_s: float = 10.0
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
deadline = time.monotonic() + timeout_s
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
snap = client.get(f"/jobs/{job_id}").json()
|
||||||
|
if snap["state"] in {JobState.DONE.value, JobState.FAILED.value}:
|
||||||
|
return snap
|
||||||
|
time.sleep(0.05)
|
||||||
|
raise AssertionError(f"job {job_id} did not reach terminal state")
|
||||||
|
|
||||||
|
|
||||||
|
# Suppress unused-imports warnings for symbols only the test harness uses.
|
||||||
|
_ = (os, threading, fastapi)
|
||||||
@@ -20,7 +20,7 @@ from gps_denied_onboard.helpers.gps_compare import (
|
|||||||
horizontal_error_distribution,
|
horizontal_error_distribution,
|
||||||
percentile_sorted,
|
percentile_sorted,
|
||||||
)
|
)
|
||||||
from tests.e2e.replay._report_writer import (
|
from gps_denied_onboard.helpers.accuracy_report import (
|
||||||
AC3_GATE_PCT,
|
AC3_GATE_PCT,
|
||||||
AC3_GATE_THRESHOLD_M,
|
AC3_GATE_THRESHOLD_M,
|
||||||
ReportContext,
|
ReportContext,
|
||||||
|
|||||||
Reference in New Issue
Block a user