mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 08:51:12 +00:00
Compare commits
7 Commits
a7b3e60716
...
7d53cef0cf
| Author | SHA1 | Date | |
|---|---|---|---|
| 7d53cef0cf | |||
| b66b68ff76 | |||
| dcde602f61 | |||
| f5366bbca1 | |||
| 87fe98858f | |||
| 64d961f60c | |||
| a12638dd92 |
@@ -13,3 +13,11 @@ globs: ["**/*test*", "**/*spec*", "**/*Test*", "**/tests/**", "**/test/**"]
|
||||
- Never use Thread Sleep or fixed delays in tests; use polling or async waits
|
||||
- Keep test data factories/builders for reusable test setup
|
||||
- Tests must be independent: no shared mutable state between tests
|
||||
|
||||
## Test environment (this project)
|
||||
|
||||
- **Unit tests** (`tests/unit/`): may run locally on the dev workstation (`pytest tests/unit/` in the project venv). Local PASS is equivalent to Jetson PASS for this tier because the suite is fully synthetic.
|
||||
- **Blackbox / e2e / performance / resilience / security / resource-limit** tests (`tests/e2e/`, `e2e/tests/`, `tests/perf/`, …): MUST run on the Jetson Orin Nano Super (or a Jetson-equivalent arm64 agent). Use `scripts/run-tests-jetson.sh` for local dev; CI runs `.woodpecker/01-test.yml` on the colocated arm64 Jetson Woodpecker agent.
|
||||
- Do NOT run e2e tests on the local workstation and report the result. If the Jetson is unreachable, the e2e verdict is "not run" — record the gap in `_docs/_process_leftovers/` rather than substituting a local result.
|
||||
- Tests gated by `RUN_REPLAY_E2E` or `@pytest.mark.tier2` are expected to SKIP locally; that is correct behaviour, not a failure to investigate.
|
||||
- Canonical source for this policy: `_docs/02_document/tests/environment.md` § Where each tier runs (active policy).
|
||||
|
||||
@@ -45,6 +45,11 @@ tests/fixtures/tiles_corpus/*.jpg
|
||||
tests/fixtures/tiles_corpus/*.png
|
||||
e2e/fixtures/sitl_replay/
|
||||
|
||||
# Problem-folder flight-log inputs (binary, out-of-band)
|
||||
_docs/00_problem/input_data/**/*.tlog
|
||||
_docs/00_problem/input_data/**/*.mp4
|
||||
_docs/00_problem/input_data/**/*.h264
|
||||
|
||||
# Editor / OS noise
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
@@ -1,3 +1,34 @@
|
||||
Camera model: Topotek KHP20S30
|
||||
Daylight Sensor: 1/2.8" CMOS (2.13 Мп).
|
||||
Full HD (1920x1080), 30/60 fps
|
||||
# Derkachi camera
|
||||
|
||||
Camera model: **Topotek KHP20S30**
|
||||
Daylight sensor: 1/2.8" CMOS (Sony IMX291-class, 2.13 MP)
|
||||
Image resolution: Full HD 1920×1080 @ 30/60 fps
|
||||
Lens: 20× optical zoom, f = 4.7 mm – 94 mm
|
||||
|
||||
## Calibration
|
||||
|
||||
**File**: [`khp20s30_factory.json`](./khp20s30_factory.json)
|
||||
**Acquisition method**: `factory_sheet` (AZ-702 — factory-sheet approximation)
|
||||
**Assumed zoom setting**: wide-angle (f = 4.7 mm), HFOV ≈ 59.5°
|
||||
|
||||
Per-unit checkerboard refinement is **deferred** (no hardware access to the
|
||||
Derkachi unit). The factory-sheet calibration is the cheapest reasonable
|
||||
starting point. The residual focal-length error is expected to be in the
|
||||
**1–3 %** band; at high AGL this may push horizontal position error past the
|
||||
AC-3 100 m budget, in which case AZ-699 (T3 real-flight validation) reports
|
||||
the honest finding and a follow-up checkerboard task is filed.
|
||||
|
||||
### Why factory-sheet (not checkerboard or PnP-from-tlog)
|
||||
|
||||
* **Checkerboard**: needs physical access to the airframe + a known-geometry
|
||||
calibration target. Not in scope for AZ-696.
|
||||
* **PnP-from-tlog back-computation**: would require a 5-point task in its own
|
||||
right; deferred as an AZ-696 follow-up if the residual budget proves
|
||||
insufficient.
|
||||
|
||||
### Replay-test wiring
|
||||
|
||||
`tests/e2e/replay/conftest.py::_calibration_path()` prefers this file when
|
||||
present and falls back to `tests/fixtures/calibration/adti26.json` otherwise,
|
||||
so dev environments that don't carry the calibration file still exercise the
|
||||
AC-1 / AC-2 / AC-5 / AC-6 paths.
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"camera_id": "khp20s30_factory",
|
||||
"intrinsics_3x3": [
|
||||
[1680.4469, 0.0, 960.0],
|
||||
[0.0, 1680.4469, 540.0],
|
||||
[0.0, 0.0, 1.0]
|
||||
],
|
||||
"distortion": [0.0, 0.0, 0.0, 0.0, 0.0],
|
||||
"body_to_camera_se3": [
|
||||
[1.0, 0.0, 0.0, 0.0],
|
||||
[0.0, 1.0, 0.0, 0.0],
|
||||
[0.0, 0.0, 1.0, 0.0],
|
||||
[0.0, 0.0, 0.0, 1.0]
|
||||
],
|
||||
"acquisition_method": "factory_sheet",
|
||||
"metadata": {
|
||||
"model": "Topotek KHP20S30",
|
||||
"sensor": "1/2.8\" CMOS (Sony IMX291-class), 2.13 MP",
|
||||
"image_resolution_px": [1920, 1080],
|
||||
"sensor_width_mm": 5.37,
|
||||
"sensor_height_mm": 3.02,
|
||||
"assumed_focal_length_mm": 4.7,
|
||||
"focal_length_range_mm": [4.7, 94.0],
|
||||
"assumed_zoom": "wide-angle (max FOV, f=4.7 mm)",
|
||||
"computed_hfov_deg": 59.48,
|
||||
"computed_vfov_deg": 35.62,
|
||||
"intrinsics_formula": "fx = fy = focal_mm * (image_width_px / sensor_width_mm); cx = width/2; cy = height/2",
|
||||
"body_to_camera_convention": "identity-down (nadir, camera-z aligned with aircraft body-z = down per FRD body frame)",
|
||||
"residual_budget_pct": 3.0,
|
||||
"note": "Factory-sheet approximation per AZ-702. The KHP20S30 is a 20x optical-zoom camera (f=4.7-94 mm); the wide-angle f=4.7 mm setting is assumed without per-flight EXIF confirmation. Per-unit checkerboard refinement is deferred — see _docs/00_problem/input_data/flight_derkachi/camera_info.md and the AZ-696 epic. AC-3 (<= 100 m horizontal error) may honestly fail if the assumed focal length is wrong by enough to swamp the 100 m budget at the Derkachi AGL band.",
|
||||
"task": "AZ-702",
|
||||
"epic": "AZ-696"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,251 @@
|
||||
openapi: 3.1.0
|
||||
info:
|
||||
title: gps-denied-onboard replay API
|
||||
description: HTTP wrapper around the offline `gps-denied-replay` pipeline. Upload
|
||||
(tlog + video [+ calibration]); receive GPS fixes + an accuracy report + an HTML
|
||||
map.
|
||||
version: 1.0.0
|
||||
paths:
|
||||
/healthz:
|
||||
get:
|
||||
summary: Healthz
|
||||
operationId: healthz_healthz_get
|
||||
responses:
|
||||
'200':
|
||||
description: Successful Response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
title: Response Healthz Healthz Get
|
||||
/readyz:
|
||||
get:
|
||||
summary: Readyz
|
||||
operationId: readyz_readyz_get
|
||||
responses:
|
||||
'200':
|
||||
description: Successful Response
|
||||
content:
|
||||
application/json:
|
||||
schema: {}
|
||||
/replay:
|
||||
post:
|
||||
summary: Post Replay
|
||||
operationId: post_replay_replay_post
|
||||
parameters:
|
||||
- name: authorization
|
||||
in: header
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Authorization
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
multipart/form-data:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Body_post_replay_replay_post'
|
||||
responses:
|
||||
'200':
|
||||
description: Successful Response
|
||||
content:
|
||||
application/json:
|
||||
schema: {}
|
||||
'422':
|
||||
description: Validation Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/HTTPValidationError'
|
||||
/jobs/{job_id}:
|
||||
get:
|
||||
summary: Get Job
|
||||
operationId: get_job_jobs__job_id__get
|
||||
parameters:
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
title: Job Id
|
||||
- name: authorization
|
||||
in: header
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Authorization
|
||||
responses:
|
||||
'200':
|
||||
description: Successful Response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
additionalProperties: true
|
||||
title: Response Get Job Jobs Job Id Get
|
||||
'422':
|
||||
description: Validation Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/HTTPValidationError'
|
||||
/jobs/{job_id}/result:
|
||||
get:
|
||||
summary: Get Result
|
||||
operationId: get_result_jobs__job_id__result_get
|
||||
parameters:
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
title: Job Id
|
||||
- name: authorization
|
||||
in: header
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Authorization
|
||||
responses:
|
||||
'200':
|
||||
description: Successful Response
|
||||
content:
|
||||
application/json:
|
||||
schema: {}
|
||||
'422':
|
||||
description: Validation Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/HTTPValidationError'
|
||||
/jobs/{job_id}/map:
|
||||
get:
|
||||
summary: Get Map
|
||||
operationId: get_map_jobs__job_id__map_get
|
||||
parameters:
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
title: Job Id
|
||||
- name: authorization
|
||||
in: header
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Authorization
|
||||
responses:
|
||||
'200':
|
||||
description: Successful Response
|
||||
content:
|
||||
application/json:
|
||||
schema: {}
|
||||
'422':
|
||||
description: Validation Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/HTTPValidationError'
|
||||
/jobs/{job_id}/report:
|
||||
get:
|
||||
summary: Get Report
|
||||
operationId: get_report_jobs__job_id__report_get
|
||||
parameters:
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
title: Job Id
|
||||
- name: authorization
|
||||
in: header
|
||||
required: false
|
||||
schema:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
title: Authorization
|
||||
responses:
|
||||
'200':
|
||||
description: Successful Response
|
||||
content:
|
||||
application/json:
|
||||
schema: {}
|
||||
'422':
|
||||
description: Validation Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/HTTPValidationError'
|
||||
components:
|
||||
schemas:
|
||||
Body_post_replay_replay_post:
|
||||
properties:
|
||||
tlog:
|
||||
type: string
|
||||
format: binary
|
||||
title: Tlog
|
||||
video:
|
||||
type: string
|
||||
format: binary
|
||||
title: Video
|
||||
calibration:
|
||||
anyOf:
|
||||
- type: string
|
||||
format: binary
|
||||
- type: 'null'
|
||||
title: Calibration
|
||||
pace:
|
||||
type: string
|
||||
title: Pace
|
||||
default: asap
|
||||
auto_trim:
|
||||
type: boolean
|
||||
title: Auto Trim
|
||||
default: true
|
||||
type: object
|
||||
required:
|
||||
- tlog
|
||||
- video
|
||||
title: Body_post_replay_replay_post
|
||||
HTTPValidationError:
|
||||
properties:
|
||||
detail:
|
||||
items:
|
||||
$ref: '#/components/schemas/ValidationError'
|
||||
type: array
|
||||
title: Detail
|
||||
type: object
|
||||
title: HTTPValidationError
|
||||
ValidationError:
|
||||
properties:
|
||||
loc:
|
||||
items:
|
||||
anyOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
type: array
|
||||
title: Location
|
||||
msg:
|
||||
type: string
|
||||
title: Message
|
||||
type:
|
||||
type: string
|
||||
title: Error Type
|
||||
type: object
|
||||
required:
|
||||
- loc
|
||||
- msg
|
||||
- type
|
||||
title: ValidationError
|
||||
@@ -0,0 +1,135 @@
|
||||
# Contract: `replay_api` HTTP service
|
||||
|
||||
**Owner**: AZ-701 (epic AZ-696 / cycle-2 multi-flight demo deliverables).
|
||||
**Producer task**: AZ-701 (this contract).
|
||||
**Consumer**: any HTTP client — operator dashboards, the parent-suite UI, demo runners, ad-hoc `curl` sessions.
|
||||
**Version**: 1.0.0
|
||||
**Status**: draft (in-testing on Jetson)
|
||||
**Last Updated**: 2026-05-20
|
||||
**Module-layout home**:
|
||||
- `src/gps_denied_onboard/replay_api/app.py` — FastAPI app factory + uvicorn entrypoint.
|
||||
- `src/gps_denied_onboard/replay_api/handlers.py` — request handlers (multipart parse, magic-byte validation, auth dependency).
|
||||
- `src/gps_denied_onboard/replay_api/jobs.py` — in-memory `JobRegistry` + `JobRecord` + concurrency limit.
|
||||
- `src/gps_denied_onboard/replay_api/storage.py` — per-job temp directory lifecycle + cleanup.
|
||||
- `src/gps_denied_onboard/replay_api/interface.py` — `ReplayRunner` Protocol + DTOs (`ReplayJobResult`, `JobState`, `JobSnapshot`).
|
||||
- `src/gps_denied_onboard/replay_api/errors.py` — typed HTTP error families.
|
||||
- `src/gps_denied_onboard/cli/replay_api_entrypoint.py` — `replay-api` console-script.
|
||||
- `docker/replay-api.Dockerfile` — operator-side container image.
|
||||
|
||||
## Purpose
|
||||
|
||||
Expose the offline replay pipeline (`gps-denied-replay` CLI from AZ-402, plus the `gps-denied-render-map` HTML renderer from AZ-700) over a single HTTP surface so external consumers can upload `(tlog + video [+ calibration])` and receive GPS fixes + an accuracy report + a map without installing the Python stack.
|
||||
|
||||
The service is **operator-side only**: it is NOT bundled into the airborne binary. It runs in its own container (`docker/replay-api.Dockerfile`) and is started via the `replay-api` console-script in the `[operator-tools]` optional-dependency group.
|
||||
|
||||
## Design invariants
|
||||
|
||||
1. **The service does not re-implement the estimator.** It shells out to the existing `gps-denied-replay` console-script. The estimator path is exactly what runs on the airborne binary; the service is a thin HTTP shim.
|
||||
2. **No persistent state.** Jobs live in-process; uploads live in per-job temp directories that are deleted on completion or service shutdown. Operators that need durable history persist the JSONL + Markdown report + HTML map artefacts out-of-band.
|
||||
3. **Sync vs. async is decided by file size, not by the client.** Videos ≤ `REPLAY_API_SYNC_MAX_BYTES` (default 200 MB) run inline; larger uploads are queued and the client polls.
|
||||
4. **Magic-byte file validation** is applied before any data is handed to the estimator. The service refuses uploads whose first bytes do not match the expected `.tlog` (MAVLink magic byte `0xFD` for v2.0) or `.mp4` (`ftyp` box at offset 4) signatures.
|
||||
5. **Bearer-token auth** is the only auth surface. Default is **on**; `REPLAY_API_AUTH_REQUIRED=false` opts out for local dev and emits a WARN log on every request.
|
||||
|
||||
## Public API
|
||||
|
||||
The OpenAPI spec is the authoritative source — see `_docs/02_document/contracts/replay_api/openapi.yaml`. The summary below mirrors it for human readers.
|
||||
|
||||
### `POST /replay`
|
||||
|
||||
Multipart upload accepting:
|
||||
- `tlog`: binary `.tlog` file (required).
|
||||
- `video`: `.mp4` file (required).
|
||||
- `calibration`: camera-calibration JSON (optional; defaults to the AZ-702 KHP20S30 factory-sheet if the operator built the image with that calibration baked in).
|
||||
- `pace`: `asap` | `realtime` (form field, optional; default `asap`).
|
||||
- `auto_trim`: `true` | `false` (form field, optional; default `true`).
|
||||
|
||||
Response shapes:
|
||||
|
||||
- **Sync mode** (video ≤ `REPLAY_API_SYNC_MAX_BYTES`):
|
||||
- `200 OK` with body `{ "job_id": "<uuid>", "state": "done", "emissions_jsonl_url": "...", "accuracy_report_md_url": "...", "map_html_url": "..." }`
|
||||
- **Async mode** (video > `REPLAY_API_SYNC_MAX_BYTES` OR concurrency limit reached at submit time):
|
||||
- `202 Accepted` with `Location: /jobs/{id}` header and body `{ "job_id": "<uuid>", "state": "queued" | "running", "status_url": "/jobs/{id}" }`
|
||||
|
||||
### `GET /jobs/{id}`
|
||||
|
||||
Returns the job snapshot:
|
||||
|
||||
```json
|
||||
{
|
||||
"job_id": "...",
|
||||
"state": "queued" | "running" | "done" | "failed",
|
||||
"submitted_at_utc": "...",
|
||||
"started_at_utc": "...",
|
||||
"finished_at_utc": "...",
|
||||
"error": "<string, present only when state=failed>",
|
||||
"result": { ... },
|
||||
"status_url": "...",
|
||||
"emissions_jsonl_url": "...",
|
||||
"accuracy_report_md_url": "...",
|
||||
"map_html_url": "..."
|
||||
}
|
||||
```
|
||||
|
||||
### `GET /jobs/{id}/result`
|
||||
|
||||
Streams the JSONL emissions file. `200 OK` with `Content-Type: application/x-ndjson`. `409 Conflict` when the job is not in state `done`.
|
||||
|
||||
### `GET /jobs/{id}/map`
|
||||
|
||||
Streams the HTML map produced by AZ-700. `200 OK` with `Content-Type: text/html`. `409 Conflict` when the job is not in state `done`.
|
||||
|
||||
### `GET /jobs/{id}/report`
|
||||
|
||||
Streams the Markdown accuracy report produced by AZ-699. `200 OK` with `Content-Type: text/markdown`. `409 Conflict` when the job is not in state `done`.
|
||||
|
||||
### `GET /healthz`
|
||||
|
||||
Liveness probe. `200 OK` with `{"status":"ok"}` whenever the FastAPI app can process requests.
|
||||
|
||||
### `GET /readyz`
|
||||
|
||||
Readiness probe. `200 OK` only when the `gps-denied-replay` console-script is resolvable on `PATH` AND the storage root is writeable. `503 Service Unavailable` otherwise — Kubernetes / docker-compose health checks should use this, not `/healthz`.
|
||||
|
||||
## Errors
|
||||
|
||||
All errors are JSON objects of shape `{ "error_code": "...", "message": "...", "details": { ... } }`.
|
||||
|
||||
| HTTP | `error_code` | When |
|
||||
|------|-------------------------------|------|
|
||||
| 400 | `unsupported_file_kind` | Magic-byte validation failed. |
|
||||
| 400 | `multipart_missing_field` | Required field absent. |
|
||||
| 401 | `unauthorized` | Missing or wrong bearer token (when auth required). |
|
||||
| 404 | `job_not_found` | `GET /jobs/{id}*` for an unknown id. |
|
||||
| 409 | `job_not_complete` | Result/map/report requested while job is not `done`. |
|
||||
| 413 | `payload_too_large` | Upload exceeded `REPLAY_API_MAX_UPLOAD_BYTES`. |
|
||||
| 429 | `concurrency_limit_reached` | More than `REPLAY_API_MAX_CONCURRENT_JOBS` running. The handler still accepts the job and queues it; this code surfaces only when the queue itself is full. |
|
||||
| 500 | `replay_runner_failed` | The `gps-denied-replay` subprocess exited non-zero. `details.stderr_tail` carries the last 8 KB of stderr. |
|
||||
|
||||
## Configuration
|
||||
|
||||
| Env var | Default | Meaning |
|
||||
|--------------------------------------|----------------------|---------|
|
||||
| `REPLAY_API_BEARER_TOKEN` | _none_ | Required when `REPLAY_API_AUTH_REQUIRED=true`. |
|
||||
| `REPLAY_API_AUTH_REQUIRED` | `true` | Set to `false` to disable bearer-token auth (dev only — WARN logged). |
|
||||
| `REPLAY_API_MAX_UPLOAD_BYTES` | `2147483648` (2 GB) | Per-upload hard limit. |
|
||||
| `REPLAY_API_SYNC_MAX_BYTES` | `209715200` (200 MB) | Video size at which the service switches to async. |
|
||||
| `REPLAY_API_MAX_CONCURRENT_JOBS` | `1` | Max running estimator subprocesses. |
|
||||
| `REPLAY_API_MAX_QUEUED_JOBS` | `8` | Max queued jobs. Above this the API returns 429. |
|
||||
| `REPLAY_API_STORAGE_ROOT` | `/var/azaion/replay_api` | Per-job temp dir parent. |
|
||||
| `REPLAY_API_REPLAY_BINARY` | `gps-denied-replay` | Override the replay CLI binary used by the runner. |
|
||||
| `REPLAY_API_RENDER_BINARY` | `gps-denied-render-map` | Override the map-render CLI used by the runner. |
|
||||
|
||||
## Versioning rules
|
||||
|
||||
- Breaking changes to request / response schemas bump the major version and ship under `/v2/replay`. The `/replay` path remains v1 for one release after `/v2` ships.
|
||||
- The response shape may grow new fields without a version bump; clients MUST tolerate unknown fields.
|
||||
- The `error_code` set is appended-only; clients MUST tolerate unknown codes.
|
||||
- The `state` enum may grow new terminal-style values (e.g. `cancelled`) only with a minor bump documented in the OpenAPI changelog block.
|
||||
|
||||
## Out of scope
|
||||
|
||||
- Persistent job database — see invariant 2.
|
||||
- WebSocket / SSE progress streaming.
|
||||
- Authentication beyond bearer token (mTLS / OAuth2 are deliberately out).
|
||||
- Multi-node scheduling — a single host runs at most `REPLAY_API_MAX_CONCURRENT_JOBS` subprocesses.
|
||||
- A built-in web UI — operator dashboards integrate over HTTP.
|
||||
@@ -593,3 +593,82 @@ All tests run from the `e2e-runner` container against the SUT through public bou
|
||||
|
||||
**Expected outcome**: All mid-flight tiles current-timestamped and fresh.
|
||||
**Max execution time**: 6 min.
|
||||
|
||||
---
|
||||
|
||||
### FT-P-20: Real-flight validation runner — honest verdict + Markdown accuracy report
|
||||
|
||||
**Summary**: Runs the full `gps-denied-replay` against the **real** Derkachi binary tlog + flight video + AZ-702 factory-sheet camera calibration, computes the per-emission horizontal-error distribution, and writes a structured Markdown accuracy report. Replaces the AZ-404 `@xfail` mask on AC-3 with a real PASS/FAIL.
|
||||
**Traces to**: AZ-699 AC-1..AC-3 (epic AZ-696 AC-3 — the 100 m / 80 % gate).
|
||||
**Category**: Position Accuracy
|
||||
|
||||
**Preconditions**:
|
||||
- `_docs/00_problem/input_data/flight_derkachi/derkachi.tlog` (real binary, multi-flight).
|
||||
- `_docs/00_problem/input_data/flight_derkachi/flight_derkachi.mp4` (real recording, > 1 MB; the placeholder used by AZ-404 does not satisfy this gate).
|
||||
- `_docs/00_problem/input_data/flight_derkachi/khp20s30_factory.json` (AZ-702 calibration).
|
||||
- `gps-denied-replay` console-script installed.
|
||||
- `RUN_REPLAY_E2E=1` (matches the existing AZ-404 gate).
|
||||
|
||||
**Input data**: real `derkachi.tlog` covers up to three sorties; the AZ-698 segmenter + `--auto-trim` locates the matching flight automatically.
|
||||
|
||||
**Steps**:
|
||||
|
||||
| Step | Consumer Action | Expected System Response |
|
||||
|------|----------------|------------------------|
|
||||
| 1 | Invoke `gps-denied-replay --auto-trim ...` with real fixtures | Subprocess exits 0 within the 15-min NFR budget |
|
||||
| 2 | Parse JSONL emissions; pair each with the nearest-in-time ground-truth row (binary-tlog GPS via AZ-697) | Distribution computed: count, mean, p50, p95, p99, threshold-hit share at 10/25/50/100 m |
|
||||
| 3 | Render the Markdown accuracy report and write `_docs/06_metrics/real_flight_validation_{YYYY-MM-DD}.md` | Report exists with header, run context, horizontal-error stats, threshold-hit table, and (when available) vertical-error stats |
|
||||
| 4 | Evaluate the AC-3 gate: ≥ 80 % within 100 m | Verdict is PASS or honest FAIL — no `@xfail` mask |
|
||||
| 5 | On FAIL, surface a failure message referencing the calibration acquisition method (factory-sheet / placeholder / unknown) and the residual budget | Operator can attribute the failure without re-reading the source |
|
||||
|
||||
**Expected outcome**: PASS when the estimator meets the epic AC-3 gate; honest FAIL otherwise. The Markdown report is the durable artefact (consumed by the cycle retrospective and downstream tuning work).
|
||||
|
||||
**Max execution time**: 15 min (matches AZ-699 NFR for a single Tier-2 Jetson run).
|
||||
|
||||
**Report artefact schema** (canonical, produced by `tests/e2e/replay/_report_writer.py`):
|
||||
|
||||
```markdown
|
||||
# Real-flight validation — YYYY-MM-DD
|
||||
|
||||
**Verdict**: PASS | FAIL (AC-3 gate: ≥ 80 % within 100 m)
|
||||
|
||||
## Run context
|
||||
|
||||
- Tlog: `<path>`
|
||||
- Video: `<path>`
|
||||
- Calibration acquisition method: factory-sheet | placeholder | unknown
|
||||
- Clip duration: <float> s
|
||||
- Emissions consumed: <int>
|
||||
- Ground-truth pairings: <int>
|
||||
|
||||
## Horizontal error (metres)
|
||||
|
||||
| Statistic | Value |
|
||||
| --------- | ----- |
|
||||
| Mean | <float> |
|
||||
| p50 | <float> |
|
||||
| p95 | <float> |
|
||||
| p99 | <float> |
|
||||
|
||||
## Threshold-hit share
|
||||
|
||||
| Threshold (m) | Hit share (%) |
|
||||
| ------------- | ------------- |
|
||||
| 10 | <float> |
|
||||
| 25 | <float> |
|
||||
| 50 | <float> |
|
||||
| 100 | <float> |
|
||||
|
||||
## Vertical error (metres)
|
||||
|
||||
| Statistic | Value |
|
||||
| --------- | ----- |
|
||||
| Mean | <float> |
|
||||
| p50 | <float> |
|
||||
| p95 | <float> |
|
||||
| Samples | <int> |
|
||||
```
|
||||
|
||||
The Vertical-error section is replaced by `_No emissions carried a comparable altitude — vertical stats skipped._` when none of the JSONL rows carry an `alt_m` field comparable to the ground-truth altitude.
|
||||
|
||||
**Skip semantics**: AZ-699 distinguishes between *missing-prerequisite skip* (cleanly skipped with the missing file's path) and *test-cannot-resolve mask* (`@xfail` — explicitly forbidden by AZ-699 AC-1). The AZ-404 1-min test's `@xfail` on AC-3 is unchanged (AZ-699 AC-4 is "add a new test, don't replace") — FT-P-20 is the honest replacement that runs alongside it.
|
||||
|
||||
@@ -1,17 +1,40 @@
|
||||
# Test Environment
|
||||
|
||||
> **Active policy — 2026-05-20**: **all tests run on Jetson only.** The Jetson
|
||||
> Orin Nano Super (or a Jetson-equivalent arm64 agent) is the single canonical
|
||||
> test environment for every tier of testing — unit, integration, blackbox /
|
||||
> e2e, performance, resilience, security, resource-limit. Workstation x86
|
||||
> Docker (the historical "Tier-1" path) is **deprecated** and is not a
|
||||
> supported test environment going forward; the Tier-1 sections below are
|
||||
> retained as historical reference / traceability only. CI test pipelines
|
||||
> target the colocated arm64 Jetson Woodpecker agent (see
|
||||
> `_docs/04_deploy/ci_cd_pipeline.md`); local-development test runs SHOULD
|
||||
> use `scripts/run-tests-jetson.sh` against the configured `jetson-e2e` SSH
|
||||
> alias rather than `scripts/run-tests.sh`. This decision supersedes the
|
||||
> 2026-05-09 "both" decision recorded in the § Test Execution section.
|
||||
> **Active policy — 2026-05-20 (refined)**: the canonical CI / release-gate
|
||||
> test environment is the Jetson Orin Nano Super (or a Jetson-equivalent
|
||||
> arm64 agent). **Unit tests** (`pytest tests/unit/`) MAY be run on a local
|
||||
> developer workstation for fast iteration — they are hardware-agnostic by
|
||||
> construction, the suite is fully synthetic, and Jetson SSH round-trips add
|
||||
> latency without adding signal. **Blackbox / e2e / performance / resilience
|
||||
> / security / resource-limit tests** (`tests/e2e/`, `e2e/tests/`,
|
||||
> `tests/perf/`, etc.) MUST run on the Jetson — never on a local workstation
|
||||
> — because their pass criteria are tied to Jetson wall-clock latency,
|
||||
> thermal envelope, and the real-camera + real-FC SITL loop. Workstation x86
|
||||
> Docker (the historical "Tier-1" path) is **deprecated** as a supported
|
||||
> e2e environment; the Tier-1 sections below are retained as historical
|
||||
> reference / traceability only. CI e2e pipelines target the colocated
|
||||
> arm64 Jetson Woodpecker agent (see `_docs/04_deploy/ci_cd_pipeline.md`);
|
||||
> local-development e2e runs SHOULD use `scripts/run-tests-jetson.sh`
|
||||
> against the configured `jetson-e2e` SSH alias rather than
|
||||
> `scripts/run-tests.sh`. This refinement supersedes the 2026-05-20 "all
|
||||
> tiers on Jetson" wording and the 2026-05-09 "both" decision recorded in
|
||||
> the § Test Execution section.
|
||||
|
||||
## Where each tier runs (active policy)
|
||||
|
||||
| Tier | Local workstation | Jetson (canonical) | When local is the only option |
|
||||
|------|--------------------|--------------------|-------------------------------|
|
||||
| Unit (`tests/unit/`) | ✅ allowed and encouraged for dev iteration | ✅ also run as part of the Jetson CI lane | always |
|
||||
| Blackbox / e2e (`tests/e2e/`, `e2e/tests/`) | ❌ forbidden — placeholder fixtures + missing hardware = false-negative runs | ✅ required for any merge / release decision | never — if Jetson is unreachable, the e2e verdict is "not run" rather than a local result |
|
||||
| Performance / resilience / security / resource-limit | ❌ forbidden | ✅ required | never |
|
||||
| Thermal chamber (AC-NEW-5) | ❌ forbidden | ✅ chamber Jetson only | never |
|
||||
|
||||
Practical consequences:
|
||||
|
||||
- A PR may merge on green local unit tests + green Jetson e2e tests.
|
||||
- A PR MAY NOT merge on green local unit tests alone — the Jetson e2e lane is the binding signal.
|
||||
- When the Jetson agent is offline, the e2e verdict is "pending Jetson" — record the gap (e.g. via `_docs/_process_leftovers/`) rather than substituting a local run.
|
||||
- Tests in `tests/e2e/` that gate on `RUN_REPLAY_E2E` or `@pytest.mark.tier2` will SKIP locally; this is correct behaviour, not a failure to investigate.
|
||||
|
||||
## Overview
|
||||
|
||||
@@ -263,11 +286,21 @@ The captured-fixture builder framework (`e2e/fixtures/sitl_replay_builder/`) reg
|
||||
|
||||
## Test Execution
|
||||
|
||||
**Decision (2026-05-20)** — **Jetson only.** Supersedes the 2026-05-09 "both" decision below. All tests (unit, integration, blackbox / e2e, performance, resilience, security, resource-limit) run on the Jetson Orin Nano Super (or a Jetson-equivalent arm64 agent). The workstation x86 Docker path is deprecated. Rationale captured in `_docs/LESSONS.md` (2026-05-20 entry): repeated workstation-vs-Jetson environment divergences (Dockerfile build order, missing `libgl1`, gtsam wheel availability, venv symlink resolution, lazy-import side-effect registration) were producing false-negative test runs and consuming engineering time without ever exercising the production-equivalent hardware path.
|
||||
**Decision (2026-05-20, refined later that day)** — **Jetson is the binding e2e environment; unit tests may run locally.** This refines the earlier "Jetson only for everything" wording. Rationale captured in `_docs/LESSONS.md` (2026-05-20 entries):
|
||||
|
||||
- The original "Jetson-only across all tiers" decision came from repeated workstation-vs-Jetson environment divergences in the e2e / build path (Dockerfile build order, missing `libgl1`, gtsam wheel availability, venv symlink resolution, lazy-import side-effect registration). Those divergences are real and continue to justify Jetson as the binding e2e environment.
|
||||
- Forcing the unit-test suite over an SSH-orchestrated Jetson loop added 30–90 s per iteration without producing any signal the local interpreter doesn't already produce. The unit suite is fully synthetic — no camera, no SITL, no Jetson-specific runtime — so a local PASS is equivalent to a Jetson PASS for that tier.
|
||||
|
||||
**Operational entry points**:
|
||||
- Local-development: `scripts/run-tests-jetson.sh` against the configured `jetson-e2e` SSH alias (see `_docs/03_implementation/jetson_harness_setup.md` for one-time setup).
|
||||
- CI: `.woodpecker/01-test.yml` on the colocated arm64 Jetson agent (see `_docs/04_deploy/ci_cd_pipeline.md`).
|
||||
|
||||
| Tier | Entry point | Where it runs |
|
||||
|------|-------------|---------------|
|
||||
| Unit (`tests/unit/`) | `pytest tests/unit/ -q` directly, or `scripts/run-tests.sh` | local workstation (Python 3.10+ venv) |
|
||||
| Blackbox / e2e (`tests/e2e/`, `e2e/tests/`) | `scripts/run-tests-jetson.sh` (local dev) / `.woodpecker/01-test.yml` (CI) | colocated arm64 Jetson Woodpecker agent — see `_docs/04_deploy/ci_cd_pipeline.md` |
|
||||
| Performance / resilience / security / resource-limit | same as e2e | Jetson only |
|
||||
| AC-NEW-5 thermal chamber | quarterly + pre-release | `self-hosted-jetson-orin-chamber` |
|
||||
|
||||
A green local unit-test run is necessary-but-not-sufficient for merge; the Jetson e2e lane is the binding signal.
|
||||
|
||||
The remainder of this section preserves the original 2026-05-09 decision context for traceability.
|
||||
|
||||
|
||||
@@ -177,6 +177,12 @@ are all declared and documented below under **Cycle Check**.
|
||||
| AZ-623 | AZ-618 Phase E: build_pre_constructed seeds c282_ransac_filter + c5 helpers | 3 | AZ-619, AZ-282, AZ-276, AZ-277, AZ-279, AZ-381 | AZ-602 |
|
||||
| AZ-624 | AZ-618 Phase F: wire build_pre_constructed into main() + AC-1..AC-5 (incl. Jetson tier-2) | 2 | AZ-619, AZ-620, AZ-621, AZ-622, AZ-623 | AZ-602 |
|
||||
| AZ-687 | build_pre_constructed must guard c6_descriptor_index when config.mode == 'replay' | 2 | AZ-619, AZ-620, AZ-624 | AZ-602 |
|
||||
| AZ-697 | T1: Direct binary-tlog GPS-truth extractor | 3 | None | AZ-696 |
|
||||
| AZ-698 | T2: Tlog trim + mid-flight alignment for replay | 5 | AZ-697 | AZ-696 |
|
||||
| AZ-699 | T3: Real-flight validation runner + accuracy report | 3 | AZ-697 | AZ-696 |
|
||||
| AZ-700 | T4: Replay map visualization (estimated vs ground-truth tracks) | 3 | AZ-699 | AZ-696 |
|
||||
| AZ-701 | T5: HTTP Replay API service (POST tlog+video, return GPS fixes + map) | 5 | AZ-699, AZ-700 | AZ-696 |
|
||||
| AZ-702 | T6: Topotek KHP20S30 camera calibration (factory-sheet approximation) | 1 | None | AZ-696 |
|
||||
|
||||
## Notes
|
||||
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
# Direct binary-tlog GPS-truth extractor
|
||||
|
||||
**Task**: AZ-697_tlog_ground_truth_extractor
|
||||
**Name**: Direct binary-tlog GPS-truth extractor (replaces data_imu.csv middle-man)
|
||||
**Description**: New `tlog_ground_truth.py` module that streams `GLOBAL_POSITION_INT` (or falls back to `GPS_RAW_INT`) from a binary ArduPilot tlog into a typed `TlogGroundTruth` DTO. Production helper (not test-only).
|
||||
**Complexity**: 3 points
|
||||
**Dependencies**: None
|
||||
**Component**: replay_input (cross-cutting validation helper)
|
||||
**Tracker**: AZ-697
|
||||
**Epic**: AZ-696
|
||||
|
||||
## Problem
|
||||
|
||||
Cycle-1 AC-3 (≤ 100 m horizontal error for 80 % of ticks) was permanently
|
||||
`@xfail` partly because the test fed the SUT a tlog synthesized from
|
||||
`_docs/00_problem/input_data/flight_derkachi/data_imu.csv`, and read
|
||||
ground truth from the same CSV — comparing the estimator to itself.
|
||||
|
||||
A real binary `derkachi.tlog` (5.8 MB ArduPilot tlog, MAVLink v2) was
|
||||
committed on 2026-05-20. The remaining gap is a direct extractor that
|
||||
reads `GLOBAL_POSITION_INT` (or `GPS_RAW_INT`) from the binary and
|
||||
returns a typed DTO suitable for the AC-3 comparison helper.
|
||||
|
||||
## Outcome
|
||||
|
||||
- A new production module `src/gps_denied_onboard/replay_input/tlog_ground_truth.py`
|
||||
exposes `load_tlog_ground_truth(path: Path) -> TlogGroundTruth`.
|
||||
- The existing AC-3 comparison helpers (`l2_horizontal_m`,
|
||||
`match_percentage`) move from `tests/e2e/replay/_helpers.py` into
|
||||
`src/gps_denied_onboard/helpers/` so they are production code, not
|
||||
test-only.
|
||||
- The replay-test conftest uses the new extractor when the real tlog is
|
||||
present; CSV path remains as a synth-tlog fallback.
|
||||
|
||||
## Scope
|
||||
|
||||
### Included
|
||||
- New `TlogGroundTruth` dataclass (frozen + slotted) with per-record
|
||||
`ts_ns`, `lat_deg`, `lon_deg`, `alt_m`, `hdg_deg`, `vx_m_s`, `vy_m_s`,
|
||||
`vz_m_s` fields.
|
||||
- `load_tlog_ground_truth(path)` — lazy `pymavlink.mavutil` open
|
||||
mirroring `replay_input/auto_sync.py::_open_tlog`.
|
||||
- Move `l2_horizontal_m` + `match_percentage` from test helpers to
|
||||
`src/gps_denied_onboard/helpers/gps_compare.py`.
|
||||
- Wire `tests/e2e/replay/conftest.py` to consume the new path when
|
||||
`derkachi.tlog` exists.
|
||||
- Unit tests under `tests/unit/replay_input/test_tlog_ground_truth.py`
|
||||
using a synthetic tlog (extend `tests/e2e/replay/_tlog_synth.py`).
|
||||
|
||||
### Excluded
|
||||
- Tlog trimming for mid-flight slices — AZ-698 (T2).
|
||||
- Accuracy report writing — AZ-699 (T3).
|
||||
- Map visualization — AZ-700 (T4).
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
**AC-1: Happy path on real tlog**
|
||||
Given the committed `derkachi.tlog`
|
||||
When `load_tlog_ground_truth(derkachi.tlog)` runs
|
||||
Then it returns `TlogGroundTruth` with `len(records) > 100` and lat ≈ 50.08, lon ≈ 36.11
|
||||
|
||||
**AC-2: Empty GPS gracefully**
|
||||
Given a tlog with no `GLOBAL_POSITION_INT` / `GPS_RAW_INT` messages
|
||||
When the extractor runs
|
||||
Then it returns `TlogGroundTruth(records=())` and logs WARN (does NOT raise)
|
||||
|
||||
**AC-3: Fallback precedence**
|
||||
Given a tlog containing only `GPS_RAW_INT` (no `GLOBAL_POSITION_INT`)
|
||||
When the extractor runs
|
||||
Then it returns records sourced from `GPS_RAW_INT`
|
||||
|
||||
**AC-4: Type safety**
|
||||
When `mypy --strict src/gps_denied_onboard/replay_input/tlog_ground_truth.py` runs
|
||||
Then it reports zero errors
|
||||
|
||||
**AC-5: Comparison helpers in production**
|
||||
Given the moved `l2_horizontal_m` + `match_percentage`
|
||||
When imported from `gps_denied_onboard.helpers.gps_compare`
|
||||
Then they behave identically to the prior test-helpers location (snapshot test)
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
**Performance**
|
||||
- `load_tlog_ground_truth(derkachi.tlog)` (5.8 MB, ~60 s of GPS at 5 Hz) returns in < 2 s on Tier-1 hardware.
|
||||
|
||||
**Reliability**
|
||||
- Lazy pymavlink import; missing dep raises `ReplayInputAdapterError` per project convention.
|
||||
|
||||
## Unit Tests
|
||||
|
||||
| AC Ref | What to Test | Required Outcome |
|
||||
|--------|-------------|-----------------|
|
||||
| AC-1 | Real derkachi.tlog parse | Non-empty TlogGroundTruth with Derkachi geofence lat/lon |
|
||||
| AC-2 | Tlog with no GPS messages | Empty records tuple + WARN log |
|
||||
| AC-3 | GPS_RAW_INT fallback | Records sourced from GPS_RAW_INT when GLOBAL_POSITION_INT absent |
|
||||
| AC-3 | Mixed GLOBAL_POSITION_INT + GPS_RAW_INT | GLOBAL_POSITION_INT wins per AC-3 |
|
||||
| AC-4 | mypy --strict | Zero errors |
|
||||
| AC-5 | Helper move snapshot | Same numeric output as prior test-helpers location |
|
||||
|
||||
## Blackbox Tests
|
||||
|
||||
| AC Ref | Initial Data/Conditions | What to Test | Expected Behavior | NFR References |
|
||||
|--------|------------------------|-------------|-------------------|----------------|
|
||||
| AC-1 | derkachi.tlog (real) | Load full tlog | ≥ 100 records, Derkachi geofence | Perf < 2s |
|
||||
|
||||
## Constraints
|
||||
|
||||
- pymavlink is already a project dep (used by C8); MUST be lazy-imported (auto_sync.py pattern).
|
||||
- New module MUST follow the project's frozen + slotted dataclass convention.
|
||||
- File ownership goes in `_docs/02_document/module-layout.md` per AZ-696 epic layout (no contract — internal helper).
|
||||
|
||||
## Risks & Mitigation
|
||||
|
||||
**Risk 1: MAVLink unit-conversion bugs**
|
||||
- *Risk*: Mavlink encodes lat/lon as int × 1e7. Forgetting the divide ships records off by 7 orders of magnitude.
|
||||
- *Mitigation*: AC-1 asserts Derkachi geofence values; unit test snapshots a known fixture.
|
||||
|
||||
**Risk 2: pymavlink import flakiness on Jetson**
|
||||
- *Risk*: pymavlink occasionally fails to import on aarch64.
|
||||
- *Mitigation*: Lazy import + raise `ReplayInputAdapterError` (existing pattern).
|
||||
@@ -0,0 +1,212 @@
|
||||
# Tlog trim + mid-flight alignment for replay
|
||||
|
||||
**Task**: AZ-698_tlog_trim_midflight_alignment
|
||||
**Name**: Trim tlog to video window + align mid-flight slices via cross-correlation
|
||||
**Description**: Extend `replay_input/auto_sync.py` and `TlogReplayFcAdapter` to handle the case where the video is a mid-flight slice of a longer tlog (not the takeoff). Adds `find_aligned_window` (cross-correlation of IMU energy vs video optical-flow magnitude) and a `--auto-trim` CLI flag.
|
||||
**Complexity**: 5 points
|
||||
**Dependencies**: AZ-697
|
||||
**Component**: replay_input + c8_fc_adapter
|
||||
**Tracker**: AZ-698
|
||||
**Epic**: AZ-696
|
||||
|
||||
## Problem
|
||||
|
||||
`replay_input/auto_sync.py::detect_tlog_takeoff` walks the tlog HEAD for
|
||||
the takeoff event (sustained vertical accel + attitude rate). When the
|
||||
uploaded video covers a **mid-flight slice** (e.g., 20–25 min into a
|
||||
30 min flight), takeoff detection lands at t=0 and the resulting offset
|
||||
is garbage. The replay coordinator then streams the entire tlog
|
||||
start-to-end, wasting I/O on the leading minutes and computing
|
||||
estimates against stale tlog samples.
|
||||
|
||||
The user's pipeline framing: "tlog is usually bigger than video, and
|
||||
usually the last chunk in tlog is relevant" — the system must locate
|
||||
the video's window within the tlog and trim accordingly.
|
||||
|
||||
## Outcome
|
||||
|
||||
- A new `find_aligned_window(tlog_path, video_path, config) -> AlignedWindow`
|
||||
returns `(tlog_start_ns, tlog_end_ns, offset_ms, confidence)`.
|
||||
- `TlogReplayFcAdapter.open()` honors `tlog_start_ns` — seeks past
|
||||
pre-window messages so downstream only sees the relevant slice.
|
||||
- `gps-denied-replay --auto-trim` is the default for uploads that don't
|
||||
pass `--time-offset-ms` or `--skip-auto-sync`.
|
||||
- Existing takeoff-aligned Derkachi clip continues to pass AC-9 (no
|
||||
regression on AZ-405).
|
||||
|
||||
## Scope
|
||||
|
||||
### Included
|
||||
- New `find_aligned_window` algorithm — cross-correlation of:
|
||||
- IMU energy stream (10 Hz subsampled `|a| − 1g` from `RAW_IMU`/`SCALED_IMU2`)
|
||||
- Video optical-flow magnitude (existing `_compute_flow_magnitudes`)
|
||||
- New `AlignedWindow` DTO under `replay_input/interface.py`.
|
||||
- `TlogReplayFcAdapter._timestamp_filter(tlog_start_ns)` seek logic.
|
||||
- `gps-denied-replay --auto-trim` CLI flag wiring.
|
||||
- Tests: takeoff-aligned regression + synthetic mid-flight scenario.
|
||||
|
||||
### Excluded
|
||||
- Real-flight validation runner — AZ-699 (T3).
|
||||
- Map visualization — AZ-700 (T4).
|
||||
- HTTP API — AZ-701 (T5).
|
||||
- Camera calibration — AZ-702 (T6).
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
**AC-1: Backward-compat on takeoff-aligned clip**
|
||||
Given the existing Derkachi 60 s clip with synthesized tlog
|
||||
When `find_aligned_window` runs
|
||||
Then it returns `offset_ms` within ± 50 ms of the current `auto_sync.compute_offset` result
|
||||
|
||||
**AC-2: Mid-flight alignment**
|
||||
Given a synthetic scenario: tlog covering 0–300 s, video covering 100–110 s with motion onset at tlog t=105 s
|
||||
When `find_aligned_window` runs
|
||||
Then `tlog_start_ns ≈ 100 s`, `tlog_end_ns ≈ 110 s`, `offset_ms` places video t=0 at tlog t=100 s
|
||||
|
||||
**AC-3: Tlog trim honored by replay adapter**
|
||||
Given `TlogReplayFcAdapter` opened with `tlog_start_ns = 100 s`
|
||||
When messages flow
|
||||
Then only messages with `_timestamp ≥ 100 s` reach subscribers
|
||||
|
||||
**AC-4: AC-9 frame-window validator passes for both scenarios**
|
||||
Given the resolved offset from AC-1 or AC-2
|
||||
When the AC-9 validator runs on the aligned window
|
||||
Then it returns 0 (≥ 95 % match)
|
||||
|
||||
**AC-5: End-to-end CLI smoke**
|
||||
Given `gps-denied-replay --auto-trim --video derkachi.mp4 --tlog derkachi.tlog`
|
||||
When the run completes
|
||||
Then exit code is 0 and the output JSONL is non-empty
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
**Performance**
|
||||
- Alignment over a 30-min tlog completes in < 30 s on Tier-1 hardware (10 Hz subsampled IMU stream).
|
||||
|
||||
**Reliability**
|
||||
- Low confidence (< `low_confidence_threshold`) falls back to head-takeoff detection (existing behavior).
|
||||
|
||||
## Unit Tests
|
||||
|
||||
| AC Ref | What to Test | Required Outcome |
|
||||
|--------|-------------|-----------------|
|
||||
| AC-1 | Takeoff-aligned offset match | Within ± 50 ms of compute_offset |
|
||||
| AC-2 | Mid-flight window discovery | Correct (start_ns, end_ns) |
|
||||
| AC-3 | Adapter seek skips pre-window | First emitted ts ≥ tlog_start_ns |
|
||||
| AC-4 | Validator on aligned scenarios | Returns 0 |
|
||||
|
||||
## Blackbox Tests
|
||||
|
||||
| AC Ref | Initial Data/Conditions | What to Test | Expected Behavior | NFR References |
|
||||
|--------|------------------------|-------------|-------------------|----------------|
|
||||
| AC-5 | Real derkachi inputs + --auto-trim | Full replay CLI run | Clean exit 0 + non-empty JSONL | — |
|
||||
|
||||
## Constraints
|
||||
|
||||
- Reuse the existing `_find_sustained_event` window-scan utility — no new generic algorithms.
|
||||
- IMU subsampling MUST be deterministic (AC-10 across the rest of the replay path).
|
||||
- `tlog_start_ns` seek MUST not break the existing AZ-611 `--skip-auto-sync` path.
|
||||
|
||||
## Risks & Mitigation
|
||||
|
||||
**Risk 1: False maxima during steady cruise**
|
||||
- *Risk*: Cross-correlation of steady-state cruise IMU + uniform video flow can have multiple equal-height peaks.
|
||||
- *Mitigation*: Report `combined_confidence`; below threshold falls back to head-takeoff or explicit offset.
|
||||
|
||||
**Risk 2: Performance on long tlogs**
|
||||
- *Risk*: Multi-hour tlogs would slow naive correlation.
|
||||
- *Mitigation*: Subsample both streams to 10 Hz before FFT-based correlation.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Notes (Batch 99 — Cycle 2)
|
||||
|
||||
**Status**: In Testing (Jira AZ-698).
|
||||
|
||||
### Files changed
|
||||
|
||||
Production:
|
||||
- `src/gps_denied_onboard/replay_input/interface.py` — added `AlignedWindow` DTO, new `alignment_*` fields on `AutoSyncConfig`, optional `aligned_window` on `ReplayInputBundle`.
|
||||
- `src/gps_denied_onboard/replay_input/auto_sync.py` — added `find_aligned_window`, internal `_align_via_cross_correlation` (normalised cross-correlation per sliding window), `_fallback_to_head_takeoff`, `_resample_uniform`, `_zero_mean_normalise`, `_load_tlog_imu_energy_stream`, `_stream_duration_ns`.
|
||||
- `src/gps_denied_onboard/replay_input/tlog_video_adapter.py` — added `_run_auto_trim` branch in `open()`, threads `tlog_start_ns` to the adapter and `AlignedWindow` onto the returned bundle, two new `_LOG_KIND_*` logs.
|
||||
- `src/gps_denied_onboard/components/c8_fc_adapter/tlog_replay_adapter.py` — added `_tlog_start_ns` seek hook; `feed_one_message` skips messages with `_timestamp < _tlog_start_ns` and counts the drop.
|
||||
- `src/gps_denied_onboard/config/schema.py` — `auto_trim: bool` on `ReplayConfig` (mutex with `time_offset_ms`); `alignment_*` knobs on `ReplayAutoSyncConfig`.
|
||||
- `src/gps_denied_onboard/config/loader.py` — coercion entries for the new knobs.
|
||||
- `src/gps_denied_onboard/runtime_root/_replay_branch.py` — passes `auto_trim` and the new alignment knobs into the replay adapter constructor.
|
||||
- `src/gps_denied_onboard/cli/replay.py` — `--auto-trim` flag wired into `ReplayConfig`.
|
||||
|
||||
Tests:
|
||||
- `tests/unit/replay_input/test_az698_window_alignment.py` — AC-1..AC-4 + fallback + immutability + CLI smoke (AC-5 skipped: real `flight_derkachi.mp4` is a 134 B placeholder).
|
||||
|
||||
### AC coverage
|
||||
|
||||
| AC | Test | Result |
|
||||
|----|------|--------|
|
||||
| AC-1 | `test_ac1_takeoff_aligned_offset_matches_az405_within_50ms` | PASS |
|
||||
| AC-2 | `test_ac2_mid_flight_alignment_locates_correct_window` | PASS |
|
||||
| AC-3 | `test_ac3_adapter_seek_skips_pre_window_messages`, `test_ac3_adapter_default_no_seek_passes_every_message` | PASS |
|
||||
| AC-4 | `test_ac4_validator_passes_for_takeoff_aligned_offset`, `test_ac4_validator_passes_for_mid_flight_offset` | PASS |
|
||||
| AC-5 | `test_ac5_cli_auto_trim_smoke_uses_find_aligned_window` | SKIPPED (real video missing) |
|
||||
|
||||
### Test results
|
||||
|
||||
50 passed, 2 skipped across the replay/c8 regression slice (`test_az698_window_alignment.py`, `test_az405_auto_sync.py`, `test_az405_replay_input_adapter.py`, `test_az399_tlog_replay_adapter.py`, `test_tlog_ground_truth.py`, `test_az697_gps_compare.py`, `test_khp20s30_factory.py`, `test_az687_pre_constructed_replay_mode.py`, `test_az269_config_loader.py`). No regressions.
|
||||
|
||||
### Strict typing
|
||||
|
||||
`mypy --strict` on the 8 modified `src/` files: 17 errors total, all pre-existing (verified by stashing this batch's `src/` changes and re-running). Zero new errors introduced by AZ-698.
|
||||
|
||||
### Known limitations
|
||||
|
||||
- AC-5 is a literal skip in this batch. The repo's `flight_derkachi.mp4` is a 134-byte placeholder, not a real recording. Real end-to-end CLI smoke against `derkachi.tlog` + the actual flight video is covered by AZ-699 (validation runner) once the video is sourced.
|
||||
- Pre-existing `mypy --strict` errors in `auto_sync.py`, `tlog_replay_adapter.py`, `tlog_video_adapter.py`, `_replay_branch.py`, `cli/replay.py`, and `loader.py` are out of scope per `coderule.mdc` (only fix pre-existing lints in the modified area when necessary). They were not necessary for AZ-698.
|
||||
|
||||
### Algorithm note
|
||||
|
||||
Implementation uses **normalised cross-correlation with per-window unit-norm** (each `len(flow_arr)`-sized slice of the tlog energy stream is zero-meaned + unit-normed before the dot product with the unit-normed flow stream). This makes the peak confidence scale-invariant — a 10 s motion burst inside a 300 s tlog produces a peak ≥ 0.95, where the original FFT-style correlation with full-length normalisation produced ≤ 0.3 and tripped the low-confidence fallback. Cost is O(N·M); with the 10 Hz subsample and a typical 300 s tlog × 10 s flow window, that's ~3 000 inner products — well below the NFR perf budget.
|
||||
|
||||
### Follow-up: multi-flight tlog handling (post-batch-99 review)
|
||||
|
||||
User reported that real `derkachi.tlog` contains **three takeoffs at the same field**, but the uploaded video covers only the **last** one. The original AZ-698 implementation was vulnerable in two places:
|
||||
|
||||
1. NCC `argmax` returns the **first** index of the maximum — if all three flights produce comparable correlation peaks, the result would lock onto flight 1.
|
||||
2. The low-confidence fallback called `detect_tlog_takeoff` on the whole tlog, which is the AZ-405 head-takeoff detector — also locks onto flight 1.
|
||||
|
||||
Both contradicted the spec line 22: *"the last chunk in tlog is relevant"*.
|
||||
|
||||
Resolution: added a pre-NCC flight segmenter and made the aligner explicitly select the last flight before running NCC. The fallback also now uses the last segment's start instead of head-takeoff detection.
|
||||
|
||||
#### New module surface
|
||||
|
||||
- `_segment_flights_from_imu_energy(samples, *, motion_threshold, min_flight_duration_ns, max_internal_gap_ns) -> list[(start_ns, end_ns)]` — partitions the IMU energy stream into distinct flights. A flight is a contiguous span where energy stayed above threshold, with sub-threshold runs (cruise lulls) shorter than `max_internal_gap_ns`. Reused logic note: `_find_sustained_event` (AZ-405) returns only the FIRST qualifying window by design; partitioning all flights needs the fresh one-pass walk.
|
||||
|
||||
#### New config knobs (`AutoSyncConfig` + `ReplayAutoSyncConfig`)
|
||||
|
||||
| Knob | Default | Meaning |
|
||||
|------|---------|---------|
|
||||
| `alignment_segment_motion_threshold_g` | `0.10` | Min IMU energy (`|a| − 1g`, g-units) for a sample to count as in-flight. 0.10 captures cruise oscillation while ignoring stationary sensor noise (~ 0.02). |
|
||||
| `alignment_segment_min_flight_duration_seconds` | `30.0` | Discards short ground-startup blips. |
|
||||
| `alignment_segment_max_internal_gap_seconds` | `30.0` | Sub-threshold gaps shorter than this stay inside a flight. |
|
||||
|
||||
#### New observability fields (`AlignedWindow`)
|
||||
|
||||
- `flight_count_detected: int` — how many distinct flights the segmenter found. `1` for a clean single-flight tlog. `> 1` means multi-flight; the aligner always selected the last one.
|
||||
- `selected_flight_index: int` — zero-based; always `flight_count_detected - 1` when segmentation fired, else `-1` (segmenter found nothing — fall through to whole-tlog NCC, preserving pre-segmentation behaviour for degenerate inputs).
|
||||
|
||||
Surfaced via the `replay.auto_trim.resolved` / `replay.auto_trim.fallback_to_takeoff` log records' `kv` dict so the operator can audit segment selection from the FDR.
|
||||
|
||||
#### New tests
|
||||
|
||||
| Test | Asserts |
|
||||
|------|---------|
|
||||
| `test_segmenter_one_flight_returns_single_span` | Single-flight tlog → 1 segment, correct bounds |
|
||||
| `test_segmenter_three_flights_returns_three_spans_in_order` | 3-flight tlog → 3 segments in chronological order |
|
||||
| `test_segmenter_drops_ground_blip_below_min_duration` | < 30 s motion is filtered out |
|
||||
| `test_segmenter_keeps_brief_cruise_lull_inside_flight` | 3 s mid-flight lull does NOT split a flight |
|
||||
| `test_find_aligned_window_picks_last_flight_for_multi_flight_tlog` | Full `find_aligned_window` pipeline on a 3-flight tlog → resulting window is inside flight 3, not flight 1 or 2 |
|
||||
| `test_align_via_cross_correlation_locks_onto_burst_inside_last_segment` | NCC path locks correctly on a pre-restricted-to-last-flight energy stream |
|
||||
| `test_find_aligned_window_uses_only_segment_for_segmented_tlog_fallback` | Low-confidence fallback uses segment start (last flight), NOT head-takeoff (flight 1) |
|
||||
|
||||
All 19 AZ-698 tests pass, 1 expected skip (AC-5 real-video smoke). 113 tests pass in the broader regression slice — no regressions.
|
||||
|
||||
Backward-compat verified: AC-1 / AC-2 / AC-3 / AC-4 tests exercise `_align_via_cross_correlation` directly and continue to pass; the segmentation gate only fires through `find_aligned_window`'s public entry point.
|
||||
@@ -0,0 +1,153 @@
|
||||
# Real-flight validation runner + accuracy report
|
||||
|
||||
**Task**: AZ-699_real_flight_validation_runner
|
||||
**Name**: Run estimator against real Derkachi tlog + video; compute honest accuracy metrics; write report
|
||||
**Description**: New e2e test `test_derkachi_real_tlog.py` that feeds the real `derkachi.tlog` (not the synth) into the replay pipeline, compares the JSONL output against the binary-tlog GPS truth (from AZ-697), and writes a structured Markdown accuracy report. Flips AC-3 from `@xfail` to a real PASS/FAIL verdict.
|
||||
**Complexity**: 3 points
|
||||
**Dependencies**: AZ-697
|
||||
**Component**: Blackbox Tests (epic AZ-696)
|
||||
**Tracker**: AZ-699
|
||||
**Epic**: AZ-696
|
||||
|
||||
## Problem
|
||||
|
||||
`tests/e2e/replay/test_derkachi_1min.py::test_ac3_within_100m_80pct_of_ticks`
|
||||
is permanently `@xfail`. Even when the test runs (Jetson Tier-2), the
|
||||
result is hidden — we have no honest measurement of estimator accuracy
|
||||
against a real flight. The cycle-1 retrospective (`_docs/06_metrics/retro_2026-05-20.md`)
|
||||
flagged this as the highest-impact open verification.
|
||||
|
||||
The two contributors:
|
||||
1. Synth tlog (compares estimator to itself) — fixed by AZ-697.
|
||||
2. Unknown camera intrinsics — addressed by AZ-702 (T6, factory sheet).
|
||||
|
||||
This task wires the real tlog + the calibration into a new test and
|
||||
produces the honest verdict + a structured report.
|
||||
|
||||
## Outcome
|
||||
|
||||
- A new test runs the full `gps-denied-replay` against `derkachi.tlog` +
|
||||
`flight_derkachi.mp4` + `khp20s30_factory.json` (or the current
|
||||
fallback) and reports honest accuracy metrics.
|
||||
- A structured report at `_docs/06_metrics/real_flight_validation_{YYYY-MM-DD}.md`
|
||||
contains mean / p50 / p95 / p99 horizontal error, % within {10, 25, 50, 100} m,
|
||||
vertical error stats, and notes the calibration assumption.
|
||||
- AC-3 emits a real PASS or honest FAIL verdict (no `@xfail` mask).
|
||||
|
||||
## Scope
|
||||
|
||||
### Included
|
||||
- New test `tests/e2e/replay/test_derkachi_real_tlog.py` parallel to the existing 1-min test but using the binary tlog.
|
||||
- Metric helpers (mean/p50/p95/p99 percentile + threshold-hit counters) live in `src/gps_denied_onboard/helpers/gps_compare.py` (extends AZ-697).
|
||||
- Report writer `tests/e2e/replay/_report_writer.py` (test helper, not production code).
|
||||
- Updated `_docs/06_metrics/real_flight_validation_{date}.md` artifact format documented in `_docs/02_document/tests/blackbox-tests.md`.
|
||||
|
||||
### Excluded
|
||||
- Map visualization — AZ-700.
|
||||
- HTTP API — AZ-701.
|
||||
- Camera calibration acquisition — AZ-702 (this task ships with whatever calibration is current).
|
||||
- Editing the existing `test_derkachi_1min.py` (new test runs alongside).
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
**AC-1: Real PASS/FAIL verdict (no mask)**
|
||||
Given the new test on Tier-2 Jetson
|
||||
When `pytest tests/e2e/replay/test_derkachi_real_tlog.py -m tier2` runs
|
||||
Then the result is PASS or FAIL — no `@xfail`, no `@skip`
|
||||
|
||||
**AC-2: Structured report written**
|
||||
Given a successful invocation
|
||||
When the test finishes
|
||||
Then `_docs/06_metrics/real_flight_validation_{YYYY-MM-DD}.md` exists with all required metrics in a Markdown table
|
||||
|
||||
**AC-3: FAIL message attributes calibration uncertainty**
|
||||
Given the test fails the 80 %/100 m gate
|
||||
When the failure message renders
|
||||
Then it references the calibration acquisition method (factory-sheet per AZ-702) and the residual budget
|
||||
|
||||
**AC-4: Existing 1-min test untouched**
|
||||
Given the cycle-1 test `test_ac3_within_100m_80pct_of_ticks`
|
||||
When all changes land
|
||||
Then the existing `@xfail` test still exists and runs (we add, don't replace)
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
**Performance**
|
||||
- The new test must complete within the existing Jetson Tier-2 wall budget (≤ 15 min for a 60 s clip; report longer for longer clips).
|
||||
|
||||
## Unit Tests
|
||||
|
||||
| AC Ref | What to Test | Required Outcome |
|
||||
|--------|-------------|-----------------|
|
||||
| AC-2 | Report writer with mock metrics | Markdown contains every required row |
|
||||
| AC-3 | Failure message templating | Contains "calibration: factory-sheet" + budget |
|
||||
|
||||
## Blackbox Tests
|
||||
|
||||
| AC Ref | Initial Data/Conditions | What to Test | Expected Behavior | NFR References |
|
||||
|--------|------------------------|-------------|-------------------|----------------|
|
||||
| AC-1 | Real derkachi.tlog + video + KHP20S30 calibration | Full replay + accuracy gate | PASS or FAIL (honest) | Perf ≤ 15 min |
|
||||
| AC-2 | After AC-1 run | Report file existence + contents | Structured report on disk | — |
|
||||
|
||||
## Constraints
|
||||
|
||||
- The new test MUST use the existing `gps-denied-replay` console-script — no inlined estimator invocation.
|
||||
- The report MUST be Markdown (not HTML/JSON) so it lives alongside other `_docs/06_metrics/` artifacts.
|
||||
- Skipping in CI when `RUN_REPLAY_E2E=0` is allowed (matches existing pattern); the test MUST run when the env var is set.
|
||||
|
||||
## Risks & Mitigation
|
||||
|
||||
**Risk 1: Honest FAIL exposes a true product gap**
|
||||
- *Risk*: The estimator may legitimately fail the 100 m/80 % gate even with correct calibration. Derkachi is cruise altitude with limited VPR anchor diversity.
|
||||
- *Mitigation*: That's the goal — honest measurement. Surface the gap; downstream cycles can tighten.
|
||||
|
||||
**Risk 2: tlog format edge cases**
|
||||
- *Risk*: Real tlogs may carry non-standard system IDs, dialect mismatches, or corrupt segments.
|
||||
- *Mitigation*: AZ-697's AC-3 / AC-4 cover this at the truth-extractor level; this task only consumes the result.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Notes (Batch 100 — Cycle 2)
|
||||
|
||||
**Status**: In Testing (Jira AZ-699).
|
||||
|
||||
### Files changed
|
||||
|
||||
Production:
|
||||
- `src/gps_denied_onboard/helpers/gps_compare.py` — extended with `HorizontalErrorDistribution` DTO, `horizontal_error_distribution(emissions, ground_truth)` single-walk aggregator, and `percentile_sorted(values, pct)` linear-interpolation helper (numpy-equivalent). Re-exports added to `helpers/__init__.py`.
|
||||
|
||||
Test helpers (under `tests/`, not production):
|
||||
- `tests/e2e/replay/_report_writer.py` — `render_report`, `format_failure_message`, `verdict_passes_ac3`, plus `ReportContext` DTO and the `AC3_GATE_THRESHOLD_M` / `AC3_GATE_PCT` constants.
|
||||
|
||||
Tests:
|
||||
- `tests/e2e/replay/test_derkachi_real_tlog.py` — the AZ-699 e2e runner (skipped without real video + `RUN_REPLAY_E2E=1`; honest PASS/FAIL when prerequisites met).
|
||||
- `tests/unit/test_az699_report_writer.py` — 16 unit tests covering percentile arithmetic, distribution aggregator, verdict gate, failure-message templating, and report layout.
|
||||
|
||||
Documentation:
|
||||
- `_docs/02_document/tests/blackbox-tests.md` — new entry **FT-P-20** documenting the artefact schema for `_docs/06_metrics/real_flight_validation_{YYYY-MM-DD}.md`.
|
||||
|
||||
### AC coverage
|
||||
|
||||
| AC | Test / Artefact | Result |
|
||||
|----|-----------------|--------|
|
||||
| AC-1 | `test_az699_real_flight_validation_emits_verdict_and_report` | SKIPPED on dev (real video missing); ready to run on Tier-2 Jetson with `RUN_REPLAY_E2E=1` + real video. NO `@xfail` mask. |
|
||||
| AC-2 | `test_render_report_contains_all_required_rows_on_pass`, `test_render_report_marks_failure_when_below_gate`, `test_render_report_includes_vertical_when_available` | PASS |
|
||||
| AC-3 | `test_failure_message_references_calibration_method_factory_sheet`, `test_failure_message_references_calibration_method_placeholder` | PASS |
|
||||
| AC-4 | `tests/e2e/replay/test_derkachi_1min.py` untouched (verified by diff scope: this batch added a sibling file, did not modify the existing one) | PASS |
|
||||
|
||||
### Test results
|
||||
|
||||
129 passed, 3 skipped (all documented prerequisites: AZ-699 e2e wants real video + Tier-2; AZ-698 AC-5 wants real video; AZ-399 AC-1 wants 500 MB tlog) in the focused regression slice. Full unit suite: 2219 passed, 1 pre-existing unrelated failure in `tests/unit/c12_operator_orchestrator/test_cli_console_script.py::test_cold_start_under_500ms_p99` (CLI cold-start NFR, 8/11 samples > 700 ms; touches none of AZ-697/698/699's code paths).
|
||||
|
||||
### Strict typing
|
||||
|
||||
`mypy --strict` on the three new modules (`helpers/gps_compare.py`, `helpers/__init__.py`, `tests/e2e/replay/_report_writer.py`): **Success: no issues found in 3 source files.**
|
||||
|
||||
### Known limitations
|
||||
|
||||
- AC-1 (the actual real-flight run on Tier-2 Jetson) cannot execute in this dev environment. The test is wired, gated cleanly, and ready — drop a real `flight_derkachi.mp4` (> 1 MB) into `_docs/00_problem/input_data/flight_derkachi/`, set `RUN_REPLAY_E2E=1`, and run on Tier-2 to produce the verdict + report.
|
||||
- Calibration acquisition method is read from the JSON's `acquisition_method` field. AZ-702 ships `factory-sheet`; any other value (or absence) is labelled `unknown` in the failure message.
|
||||
|
||||
### Design note: helper module location
|
||||
|
||||
`gps_compare.py` lives under `src/gps_denied_onboard/helpers/` (production) because both the AZ-699 test and the future AZ-701 HTTP-API path (T5) need it. `_report_writer.py` lives under `tests/e2e/replay/` because it is purely a test artefact — promoting it would invite production code to import a test helper, violating the file ownership rule documented in `_docs/02_document/module-layout.md`.
|
||||
@@ -0,0 +1,148 @@
|
||||
# Replay map visualization (estimated vs ground-truth tracks)
|
||||
|
||||
**Task**: AZ-700_replay_map_visualization
|
||||
**Name**: HTML map showing estimated GPS track vs tlog ground-truth track
|
||||
**Description**: New `gps-denied-render-map` console script. Takes a JSONL of estimator output + a tlog (or CSV fallback) and renders a single-file HTML map (folium / Leaflet) with both tracks in distinct colors, start/end markers, and an embedded accuracy summary from AZ-699.
|
||||
**Complexity**: 3 points
|
||||
**Dependencies**: AZ-699
|
||||
**Component**: cli (offline analysis surface)
|
||||
**Tracker**: AZ-700
|
||||
**Epic**: AZ-696
|
||||
|
||||
## Problem
|
||||
|
||||
Today the only feedback from a replay run is a JSONL file. There is no
|
||||
way to visually verify whether the estimator is drifting, jumping, or
|
||||
roughly tracking the real flight. A human reading the JSONL cannot
|
||||
quickly answer "does this make sense geographically?"
|
||||
|
||||
The user's pipeline explicitly calls for: "and then show both points on
|
||||
the map."
|
||||
|
||||
## Outcome
|
||||
|
||||
- A standalone CLI `gps-denied-render-map` produces a self-contained
|
||||
HTML map of the estimated track + the tlog ground-truth track for any
|
||||
prior replay run.
|
||||
- The map is shareable as a single file (no server required); developers
|
||||
open it locally; AZ-701's HTTP API serves it back to API consumers.
|
||||
|
||||
## Scope
|
||||
|
||||
### Included
|
||||
- New module `src/gps_denied_onboard/cli/render_map.py`.
|
||||
- New console script `gps-denied-render-map` in `pyproject.toml`.
|
||||
- folium dependency pin in the appropriate `[project.optional-dependencies]` group (NOT in airborne-binary deps — operator-side only).
|
||||
- Default map style + tile provider (OpenStreetMap fallback documented for offline use).
|
||||
- Auto-fit bounds; distance circles (100 m, 50 m) around start point for scale.
|
||||
- Accuracy summary banner (read from `_docs/06_metrics/real_flight_validation_{date}.md` when `--summary` is passed).
|
||||
|
||||
### Excluded
|
||||
- Interactive time-slider playback (deferred follow-up).
|
||||
- Embedded altitude profile chart.
|
||||
- Animated marker traversal.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
**AC-1: CLI produces self-contained HTML**
|
||||
Given a JSONL + tlog
|
||||
When `gps-denied-render-map --estimated out.jsonl --truth derkachi.tlog --output map.html` runs
|
||||
Then `map.html` exists, parses as valid HTML, exits 0
|
||||
|
||||
**AC-2: Two distinct tracks visible**
|
||||
Given the rendered map opened in a browser
|
||||
When inspected
|
||||
Then it contains exactly two polyline layers (red = truth, blue = estimated) with start/end markers
|
||||
|
||||
**AC-3: Markers + scale circles**
|
||||
Given the rendered map
|
||||
When parsed
|
||||
Then it contains the start (green) + end (black) markers + 100 m + 50 m scale circles
|
||||
|
||||
**AC-4: Accuracy summary inclusion**
|
||||
Given `--summary _docs/06_metrics/real_flight_validation_2026-XX-XX.md`
|
||||
When the map renders
|
||||
Then the HTML header contains the accuracy metrics table
|
||||
|
||||
**AC-5: Offline fallback documented**
|
||||
Given an environment without internet access
|
||||
When the map is rendered with `--offline-tiles`
|
||||
Then tile loading uses a documented fallback (or fails fast with a clear error if no fallback is configured)
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
**Compatibility**
|
||||
- Output HTML must render in Chrome 110+ and Firefox 110+ without console errors.
|
||||
|
||||
**Performance**
|
||||
- For a 60 s flight (~600 truth points + ~600 estimated points), render time < 5 s on Tier-1 hardware.
|
||||
|
||||
## Unit Tests
|
||||
|
||||
| AC Ref | What to Test | Required Outcome |
|
||||
|--------|-------------|-----------------|
|
||||
| AC-1 | CLI invocation with synthetic data | Output HTML file exists + non-empty |
|
||||
| AC-2 | Parse output HTML | Exactly 2 polyline layers + 4 expected markers |
|
||||
| AC-4 | Summary embed | Markdown summary metrics present in HTML |
|
||||
|
||||
## Blackbox Tests
|
||||
|
||||
| AC Ref | Initial Data/Conditions | What to Test | Expected Behavior | NFR References |
|
||||
|--------|------------------------|-------------|-------------------|----------------|
|
||||
| AC-1 | Real Derkachi replay JSONL + tlog | End-to-end render | HTML opens in browser, both tracks visible | Compat |
|
||||
|
||||
## Constraints
|
||||
|
||||
- folium MUST be in the operator-only dep group; airborne binary cold-start regression test must remain green.
|
||||
- HTML output MUST be self-contained — embedded JS/CSS, no per-page CDN calls in `--offline-tiles` mode.
|
||||
- Console script naming follows the project pattern (`gps-denied-<verb>`).
|
||||
|
||||
## Risks & Mitigation
|
||||
|
||||
**Risk 1: folium dep size**
|
||||
- *Risk*: folium pulls ~5 MB of JS. Adding to airborne deps would regress cold-start.
|
||||
- *Mitigation*: optional-dependencies group + ADR-002 build-time exclusion principle.
|
||||
|
||||
**Risk 2: CDN dependency at render time**
|
||||
- *Risk*: Default folium uses Leaflet via CDN — fails on offline Jetsons.
|
||||
- *Mitigation*: Document `--offline-tiles` flag; provide bundled assets path or fail-fast.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Notes (Batch 101 — Cycle 2)
|
||||
|
||||
**Status**: In Testing (Jira AZ-700).
|
||||
|
||||
### Files changed
|
||||
|
||||
Production:
|
||||
- `src/gps_denied_onboard/cli/render_map.py` — new module: `RenderInputs` DTO, `render_map_html`, `load_estimated_track`, `load_ground_truth_track`, argparse CLI, `main()`.
|
||||
- `pyproject.toml` — new `[project.optional-dependencies] operator-tools = ["folium>=0.16,<1.0"]` group; new console script `gps-denied-render-map = "gps_denied_onboard.cli.render_map:main"`.
|
||||
|
||||
Tests:
|
||||
- `tests/unit/test_az700_render_map.py` — 14 unit tests covering JSONL parsing, HTML rendering (2 polylines, 4 markers, 2 scale circles, summary embed, offline-tiles toggle), and CLI smoke including a minimal binary-tlog helper.
|
||||
|
||||
### AC coverage
|
||||
|
||||
| AC | Test / Artefact | Result |
|
||||
| ---- | ---------------------------------------------------------------------------------------- | ------ |
|
||||
| AC-1 | `test_cli_writes_html_with_default_tiles` | PASS (local). The Jetson e2e visual smoke is `AC-4` and is operator-driven on Tier-2. |
|
||||
| AC-2 | `test_render_map_html_emits_two_polylines`, `…emits_four_markers_and_two_circles` | PASS |
|
||||
| AC-3 | `test_render_map_html_emits_two_polylines`, `…emits_four_markers_and_two_circles` | PASS — output HTML contains exactly 2 polyline layers (red + blue) and 4 markers + 2 scale circles. |
|
||||
| AC-4 | Visual smoke on Tier-2 Jetson (operator opens `map.html` produced by AZ-699's e2e run) | DEFERRED to Jetson — wired and ready. |
|
||||
| AC-5 | `test_render_map_html_offline_tiles_omits_openstreetmap`, `…_template_uses_local_url` | PASS |
|
||||
|
||||
### Test results
|
||||
|
||||
`pytest tests/unit/test_az700_render_map.py` → 14 passed in 2.5 s. Wider regression slice (AZ-697/698/699/700 + replay_input + calibration): 107 passed, 1 skipped (pre-existing AC-5 e2e smoke that needs real video).
|
||||
|
||||
### Strict typing
|
||||
|
||||
`mypy --strict src/gps_denied_onboard/cli/render_map.py` → **Success: no issues found in 1 source file.** Used `# type: ignore[import-untyped, import-not-found, unused-ignore]` on the lazy folium import so the strict pass is clean whether folium is installed or not.
|
||||
|
||||
### Design notes
|
||||
|
||||
- folium 0.20 (the latest in the pinned range) was used. The default tile provider is OpenStreetMap (`tiles="OpenStreetMap"`); the AC-5 `--offline-tiles` flag drops the base layer entirely, and `--offline-tiles-template` accepts a local tile-URL template for operators with a bundled tile pack.
|
||||
- folium is lazy-imported inside `_import_folium()` so the airborne binary (which does NOT install `[operator-tools]`) doesn't pay for it on cold start. The C12 cold-start NFR is unaffected.
|
||||
- The `_write_minimal_tlog` test helper builds a binary tlog with just `GLOBAL_POSITION_INT` records — that's the minimum AZ-697 needs — without coupling the test to the full Derkachi CSV schema used by `tests/e2e/replay/_tlog_synth.py`.
|
||||
- All AZ-700 unit tests run locally per the refined test-environment policy (`_docs/02_document/tests/environment.md` § Where each tier runs); the Tier-2 visual-smoke AC-4 stays on the Jetson.
|
||||
@@ -0,0 +1,234 @@
|
||||
# HTTP Replay API service
|
||||
|
||||
**Task**: AZ-701_http_replay_api_service
|
||||
**Name**: HTTP API for offline replay (POST tlog+video, return GPS fixes + map URL)
|
||||
**Description**: New `replay_api` component (FastAPI) wrapping the offline replay pipeline. One primary endpoint `POST /replay` accepts multipart `(tlog + video [+ calibration])` and returns either a synchronous JSONL+summary or an async job id. Returns links to the map artifact rendered by AZ-700.
|
||||
**Complexity**: 5 points
|
||||
**Dependencies**: AZ-699, AZ-700
|
||||
**Component**: replay_api (new component)
|
||||
**Tracker**: AZ-701
|
||||
**Epic**: AZ-696
|
||||
|
||||
## Problem
|
||||
|
||||
The product today has zero HTTP surface. The only ways to invoke the
|
||||
estimator on a recorded flight are:
|
||||
1. The airborne binary (real-time MAVLink GPS_INPUT — needs the
|
||||
aircraft + FC).
|
||||
2. `gps-denied-replay` CLI (operator workstation, Python install
|
||||
required).
|
||||
3. `operator-orchestrator` CLI (Click, pre-flight cache only — does
|
||||
NOT run the estimator).
|
||||
|
||||
External consumers (operator tools, suite web UIs, demo dashboards,
|
||||
other suite services) cannot validate flights without installing the
|
||||
full Python stack. The user's pipeline framing explicitly calls for
|
||||
"part of the api — tlog and video uploading. and emits gps fixes back
|
||||
to the user."
|
||||
|
||||
## Outcome
|
||||
|
||||
- A new HTTP service exposes `POST /replay` and the supporting `GET /jobs/{id}*` polling endpoints.
|
||||
- The service wraps `gps-denied-replay` and AZ-700's map renderer behind a single multipart upload.
|
||||
- Containerized; runs in `docker-compose.test.yml`; OpenAPI spec is committed.
|
||||
- Authentication via bearer token, gated explicitly off in dev mode (logs WARN).
|
||||
|
||||
## Scope
|
||||
|
||||
### Included
|
||||
- New component `src/gps_denied_onboard/replay_api/`:
|
||||
- `app.py` (FastAPI instance)
|
||||
- `handlers.py` (multipart upload, validation)
|
||||
- `jobs.py` (sync ≤ 2 min videos / async > 2 min)
|
||||
- `storage.py` (temp file lifecycle, cleanup)
|
||||
- `interface.py` (`ReplayRunner` Protocol so handlers are decoupled)
|
||||
- `errors.py` (custom HTTP error families)
|
||||
- Endpoints: `POST /replay`, `GET /jobs/{id}`, `GET /jobs/{id}/result`, `GET /jobs/{id}/map`, `GET /healthz`, `GET /readyz`.
|
||||
- Bearer-token auth: `REPLAY_API_BEARER_TOKEN` env var; explicit dev opt-out via `REPLAY_API_AUTH_REQUIRED=false`.
|
||||
- Upload size limit + concurrent-job limit, env-configurable.
|
||||
- New `replay-api` console script (uvicorn entrypoint) in `pyproject.toml`.
|
||||
- New `docker/replay-api.Dockerfile` + `docker-compose.test.yml` entry.
|
||||
- OpenAPI spec exported to `_docs/02_document/contracts/replay_api/openapi.yaml`.
|
||||
- Contract file `_docs/02_document/contracts/replay_api/replay_api_protocol.md` (per shared/api decompose Step 4.5 rule).
|
||||
- File-upload magic-byte validation for `.tlog` + `.mp4`.
|
||||
|
||||
### Excluded
|
||||
- Web UI (parent-suite concern).
|
||||
- Persistent job database (in-memory + temp disk is sufficient for v1).
|
||||
- Multi-node job distribution.
|
||||
- WebSocket streaming of progress.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
**AC-1: Sync happy path (short video, dev mode)**
|
||||
Given `REPLAY_API_AUTH_REQUIRED=false` and a 60 s video
|
||||
When `POST /replay` runs with multipart `tlog + video`
|
||||
Then response is 200 with JSONL of GPS fixes + accuracy summary inline
|
||||
|
||||
**AC-2: Async happy path (long video)**
|
||||
Given a > 2-minute video
|
||||
When `POST /replay` runs
|
||||
Then response is 202 with `Location: /jobs/{id}` and `{job_id, status_url}`
|
||||
|
||||
**AC-3: Job state transitions**
|
||||
Given an async job
|
||||
When polled via `GET /jobs/{id}`
|
||||
Then state transitions `queued → running → done` are observable
|
||||
|
||||
**AC-4: Result + map served from job id**
|
||||
Given a `done` job
|
||||
When `GET /jobs/{id}/result` is called
|
||||
Then it streams the JSONL; `GET /jobs/{id}/map` returns the HTML map (from AZ-700)
|
||||
|
||||
**AC-5: Auth enforced when configured**
|
||||
Given `REPLAY_API_BEARER_TOKEN=secret`
|
||||
When `POST /replay` runs without `Authorization: Bearer secret`
|
||||
Then response is 401
|
||||
|
||||
**AC-6: Health endpoints**
|
||||
Given the service is up and `gps-denied-replay` console-script is on PATH
|
||||
When `GET /healthz` and `GET /readyz` are called
|
||||
Then both return 200
|
||||
|
||||
**AC-7: OpenAPI + contract documented**
|
||||
Given the service is running
|
||||
When the OpenAPI spec is exported
|
||||
Then `_docs/02_document/contracts/replay_api/openapi.yaml` is committed; `replay_api_protocol.md` documents the versioning rules
|
||||
|
||||
**AC-8: Concurrency limit enforced**
|
||||
Given `REPLAY_API_MAX_CONCURRENT_JOBS=1`
|
||||
When 3 jobs are submitted in quick succession
|
||||
Then exactly 1 is `running`; 2 are `queued`
|
||||
|
||||
**AC-9: Magic-byte upload validation**
|
||||
Given a `POST /replay` with a misnamed `.tlog` (actually a `.zip`)
|
||||
When the handler validates
|
||||
Then response is 400 with a clear error
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
**Performance**
|
||||
- For a 60 s Derkachi video, sync `POST /replay` returns within `gps-denied-replay` ASAP-mode wall + 5 s overhead on Tier-2 Jetson.
|
||||
|
||||
**Security**
|
||||
- Magic-byte file validation; reject anything not matching `.tlog` (MAVLink magic 0xFD/0xFE) or `.mp4` (ftyp).
|
||||
- Bearer auth always available; default-OFF only with explicit env var.
|
||||
|
||||
**Compatibility**
|
||||
- FastAPI / uvicorn / python-multipart pinned; document version compatibility window.
|
||||
|
||||
## Unit Tests
|
||||
|
||||
| AC Ref | What to Test | Required Outcome |
|
||||
|--------|-------------|-----------------|
|
||||
| AC-1 | Sync POST → 200 + JSONL | Round-trip succeeds with synth fixtures |
|
||||
| AC-2 | Async POST → 202 + job id | 202 with Location header |
|
||||
| AC-3 | Job state machine | Transitions observed |
|
||||
| AC-5 | Missing/wrong bearer → 401 | Strict failure |
|
||||
| AC-8 | Concurrency limit | 2 of 3 queued |
|
||||
| AC-9 | Wrong magic bytes → 400 | Clear error |
|
||||
|
||||
## Blackbox Tests
|
||||
|
||||
| AC Ref | Initial Data/Conditions | What to Test | Expected Behavior | NFR References |
|
||||
|--------|------------------------|-------------|-------------------|----------------|
|
||||
| AC-1, AC-4 | Real derkachi.tlog + video | `curl` round-trip in docker-compose | 200 + JSONL + map HTML | Perf |
|
||||
| AC-6 | Container up | Health endpoint checks | 200 OK | — |
|
||||
|
||||
## Constraints
|
||||
|
||||
- FastAPI MUST live in an operator-only build target; ADR-002 binary-exclusion applies. Airborne binary cold-start regression test must remain green.
|
||||
- New component MUST follow interface-first + constructor-injection (Principle #13 in architecture.md).
|
||||
- Contract file MUST exist before the endpoint is callable in CI (per decompose Step 4.5 rule).
|
||||
|
||||
## Risks & Mitigation
|
||||
|
||||
**Risk 1: FastAPI / uvicorn dep weight on airborne binary**
|
||||
- *Risk*: Adding the API dep to the airborne binary regresses cold-start.
|
||||
- *Mitigation*: Place `replay_api/` in an operator-only optional-dependencies group; CMake / build-time exclusion enforces.
|
||||
|
||||
**Risk 2: HTTP timeout on long videos**
|
||||
- *Risk*: Sync mode + a long video → HTTP timeout.
|
||||
- *Mitigation*: Async mode triggers automatically above the configured video-length threshold.
|
||||
|
||||
**Risk 3: File-upload abuse**
|
||||
- *Risk*: Malicious uploads (huge files, zip bombs, fake MIME types).
|
||||
- *Mitigation*: Hard size limit (2 GB default), magic-byte validation, temp-file cleanup, configurable disk quota.
|
||||
|
||||
## Contract
|
||||
|
||||
This task produces the contract at `_docs/02_document/contracts/replay_api/replay_api_protocol.md`.
|
||||
Consumers MUST read that file — not this task spec — to discover the interface and versioning rules.
|
||||
|
||||
## Implementation Notes (Batch 102, Cycle 2)
|
||||
|
||||
### Files Changed
|
||||
|
||||
**New production code** — `src/gps_denied_onboard/replay_api/`:
|
||||
- `__init__.py` — public exports (`create_app`, DTOs, error families).
|
||||
- `errors.py` — `ReplayApiError` hierarchy with stable `error_code` + HTTP `status_code`.
|
||||
- `interface.py` — `JobState`, `ReplayInputs`, `ReplayJobResult`, `JobSnapshot`, `ReplayRunner` Protocol.
|
||||
- `storage.py` — per-job temp directory lifecycle (`StorageRoot.allocate_job/release_job/cleanup_all`).
|
||||
- `jobs.py` — in-memory `JobRegistry` with `max_concurrent` / `max_queued`, `ThreadPoolExecutor` worker pool.
|
||||
- `handlers.py` — magic-byte validation (`validate_tlog_kind` for MAVLink v1/v2, `validate_video_kind` for MP4 `ftyp`, `validate_calibration_kind` for JSON), size limits, bearer-token extraction.
|
||||
- `app.py` — `create_app(...)` FastAPI factory + `SubprocessReplayRunner` (shells out to `gps-denied-replay --auto-trim` and `gps-denied-render-map`).
|
||||
|
||||
**New CLI entrypoint** — `src/gps_denied_onboard/cli/replay_api_entrypoint.py`:
|
||||
- `replay-api` console script wired in `pyproject.toml` under the `operator-tools` extra.
|
||||
- Parses `--host`, `--port`, `--storage-root`, `--reload`; reads `REPLAY_API_*` env knobs.
|
||||
|
||||
**Helper promoted from tests** — `src/gps_denied_onboard/helpers/accuracy_report.py`:
|
||||
- Was `tests/e2e/replay/_report_writer.py` (AZ-699 batch). Promoted because `replay_api` needs it at runtime to produce `accuracy_report.md`. Re-exported from `helpers/__init__.py`. All AZ-699 imports re-pointed.
|
||||
|
||||
**Contract** — `_docs/02_document/contracts/replay_api/replay_api_protocol.md` (purpose, invariants, endpoints, error families, env config, versioning).
|
||||
|
||||
**OpenAPI spec** — `_docs/02_document/contracts/replay_api/openapi.yaml` (auto-exported from the FastAPI app; check in alongside the contract doc).
|
||||
|
||||
**Docker** — `docker/replay-api.Dockerfile` + `e2e/docker/docker-compose.test.yml` (`replay-api` service, profile-gated `replay-api`, with `replay-api-storage` volume).
|
||||
|
||||
**Dependencies** — `pyproject.toml`:
|
||||
- `operator-tools` extra now also pulls `fastapi>=0.111,<0.120`, `uvicorn>=0.30,<1.0`, `python-multipart>=0.0.9,<1.0`.
|
||||
- New console script `replay-api`.
|
||||
|
||||
**Unit tests** — `tests/unit/replay_api/test_az701_replay_api.py` (18 tests, all passing):
|
||||
- AC-1 sync: POST → 200 with `result_url`/`map_url`/`report_url`; JSONL + HTML map served from those URLs.
|
||||
- AC-2 async: large video (> sync threshold) → 202 + `Location: /jobs/{id}`.
|
||||
- AC-3: job state visible via polling `RUNNING → DONE` and `RUNNING → FAILED`.
|
||||
- AC-5: missing bearer → 401; correct bearer → 200.
|
||||
- AC-6: `/healthz` always 200; `/readyz` returns 503 when binaries missing.
|
||||
- AC-8: third job queued when concurrency limit is 2; 4th rejected with 429.
|
||||
- AC-9: zip renamed to `.tlog` or `.mp4` → 400 with stable `error_code`.
|
||||
|
||||
### AC Coverage Matrix
|
||||
|
||||
| AC | Status | Evidence |
|
||||
|----|--------|----------|
|
||||
| AC-1 sync 200 | Done | `test_post_replay_sync_returns_200_with_result_urls` + `test_post_replay_serves_jsonl_and_map_for_done_job` |
|
||||
| AC-2 async 202 | Done | `test_post_replay_async_returns_202_when_video_exceeds_sync_bytes` |
|
||||
| AC-3 job state machine | Done | `test_job_state_transitions_observable_via_polling`, `test_failed_runner_marks_job_failed`, `test_result_endpoints_409_when_job_not_done` |
|
||||
| AC-5 401 on bad bearer | Done | `test_post_replay_returns_401_without_bearer_when_required` + `test_post_replay_accepts_correct_bearer` |
|
||||
| AC-6 health endpoints | Done | `test_healthz_always_returns_200` + `test_readyz_returns_503_when_binary_missing` |
|
||||
| AC-8 concurrency cap | Done | `test_concurrency_limit_queues_excess_jobs` + `test_queue_full_returns_429` |
|
||||
| AC-9 magic-byte rejection | Done | `test_validate_tlog_kind_rejects_zip_renamed_to_tlog`, `test_validate_video_kind_rejects_arbitrary_bytes`, `test_post_replay_rejects_misnamed_zip_as_tlog`, `test_post_replay_rejects_misnamed_zip_as_video` |
|
||||
|
||||
### Test Run Summary
|
||||
|
||||
- **AZ-701 unit slice**: 18/18 passed (`tests/unit/replay_api/`).
|
||||
- **Full unit suite**: 2251 passed, 86 skipped, 1 failed (`test_cold_start_under_500ms_p99` — pre-existing C12 CLI flake unrelated to AZ-701; same failure observed in batches 100 and 101).
|
||||
- **Mypy --strict on AZ-701 surface**: clean (9 source files: `replay_api/*`, `helpers/accuracy_report.py`, `cli/replay_api_entrypoint.py`).
|
||||
|
||||
### Design Decisions
|
||||
|
||||
- **Subprocess runner, not in-process estimator**: `SubprocessReplayRunner` invokes the existing `gps-denied-replay` console script. Keeps the API a thin transport layer; matches the invariant in the contract that the API does NOT re-implement the pipeline.
|
||||
- **Pre-allocated job_id**: the handler allocates a job_id, writes uploads into the matching storage dir, then passes the id to `JobRegistry.submit(job_id=...)`. Earlier draft used a separate registry-assigned id and tried to "release-then-resubmit"; that path deleted the dir holding the uploads. Fixed by adding the optional `job_id` parameter.
|
||||
- **`from __future__ import annotations` deliberately dropped in `app.py`**: FastAPI 0.119 + Pydantic v2 resolve route-parameter annotations at decoration time. Forward-ref strings break `Annotated[UploadFile, File()]`. The rest of the `replay_api` package keeps the future-annotations import. The reason is captured in the `app.py` module docstring.
|
||||
- **Pydantic v2 `Annotated` syntax**: every route parameter uses `Annotated[T, File()/Form()/Header()]` rather than the legacy `T = File(...)` form. Older form raised `PydanticUserError: 'UploadFile' is not fully defined`.
|
||||
- **Magic-byte validation is mandatory, not advisory**: matches AC-9 wording ("Wrong magic bytes → 400"). Anything that's not MAVLink v1/v2 (`\xfe` / `\xfd` first byte) is rejected as tlog; anything without `ftyp` in bytes 4-12 is rejected as video. No `application/x-mavlink` content-type sniffing.
|
||||
- **State is in-memory only**: matches "no persistent state across restarts" invariant in the contract. Operators wanting durability can layer it externally (or move to AZ-702 follow-on). Documented in the contract.
|
||||
|
||||
### Known Limitations
|
||||
|
||||
- `SubprocessReplayRunner` returns `result.stdout`/`stderr` only when the subprocess fails; success path discards them. Operators wanting a per-job audit log will need a follow-on.
|
||||
- No request body streaming — `python-multipart` buffers each part. The 2 GB hard limit guards memory.
|
||||
- No rate limiting beyond the concurrency/queue caps. A reverse proxy is the right place for that.
|
||||
- E2E test against the real Derkachi flight artefacts is intentionally NOT in scope here (per the testing-environment rule: e2e runs on Jetson only and AZ-699's `test_derkachi_real_tlog.py` already exercises the underlying pipeline).
|
||||
@@ -0,0 +1,106 @@
|
||||
# Topotek KHP20S30 camera calibration (factory-sheet approximation)
|
||||
|
||||
**Task**: AZ-702_khp20s30_calibration
|
||||
**Name**: Provide a calibration JSON for the Topotek KHP20S30 nadir camera (factory-sheet approximation)
|
||||
**Description**: Compute and commit a `CameraCalibrationArtifact` JSON for the Derkachi camera (Topotek KHP20S30) from manufacturer factory data. Replaces the `adti26.json` placeholder that AC-3 currently uses. Documents the residual error vs a per-unit checkerboard refinement.
|
||||
**Complexity**: 1 point
|
||||
**Dependencies**: None
|
||||
**Component**: input_data / shared_helpers
|
||||
**Tracker**: AZ-702
|
||||
**Epic**: AZ-696
|
||||
|
||||
## Problem
|
||||
|
||||
`_docs/00_problem/input_data/flight_derkachi/camera_info.md` states the
|
||||
Topotek KHP20S30 intrinsics are unknown. `tests/e2e/replay/conftest.py`
|
||||
(line 50–56) substitutes `tests/fixtures/calibration/adti26.json` as a
|
||||
placeholder. AC-3 (≤ 100 m horizontal error for 80 % of ticks) is
|
||||
`@xfail` until a real calibration ships.
|
||||
|
||||
The cheapest reasonable starting point is a factory-sheet approximation
|
||||
— compute `K` from the manufacturer's published focal length + sensor
|
||||
geometry, accept the 1–3 % focal-length residual as a documented
|
||||
budget, and let AC-3 either PASS or honestly FAIL with the residual
|
||||
attributed.
|
||||
|
||||
## Outcome
|
||||
|
||||
- A calibration JSON `khp20s30_factory.json` exists in the Derkachi
|
||||
input directory, parses against the project's
|
||||
`CameraCalibrationArtifact` schema, and documents the acquisition
|
||||
method as `factory_sheet`.
|
||||
- `camera_info.md` is updated to reference the new calibration + the
|
||||
residual budget + the deferral handle (`AZ-XXX_checkerboard_refinement`).
|
||||
- AZ-699 (T3) uses this calibration as its `--camera-calibration` input.
|
||||
|
||||
## Scope
|
||||
|
||||
### Included
|
||||
- Source manufacturer factory data for the Topotek KHP20S30 (sensor: 1/2.8" CMOS, 2.13 MP, 1920×1080; lens focal length, FOV, pixel pitch).
|
||||
- Compute `K = [[fx, 0, cx], [0, fy, cy], [0, 0, 1]]` from `fx = fy = focal_length_mm × (image_width_px / sensor_width_mm)`.
|
||||
- Set distortion to `[0, 0, 0, 0, 0]` (factory-sheet approximation).
|
||||
- Set `body_to_camera_se3` to identity-down (nadir; camera-z = aircraft-down).
|
||||
- Set `acquisition_method = "factory_sheet"`.
|
||||
- Write `_docs/00_problem/input_data/flight_derkachi/khp20s30_factory.json`.
|
||||
- Update `_docs/00_problem/input_data/flight_derkachi/camera_info.md`.
|
||||
- New unit test under `tests/unit/calibration/` asserting the JSON parses and matches the documented inputs.
|
||||
|
||||
### Excluded
|
||||
- Physical checkerboard calibration (needs hardware).
|
||||
- PnP-from-tlog back-computation (deferred follow-up).
|
||||
- Updating `adti26.json` or other test fixtures.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
**AC-1: Calibration JSON parses**
|
||||
Given the new `khp20s30_factory.json`
|
||||
When loaded by the project's calibration parser (same schema as `adti26.json`)
|
||||
Then it parses without error and all fields are populated
|
||||
|
||||
**AC-2: Doc updated**
|
||||
Given `camera_info.md` before
|
||||
When the calibration is committed
|
||||
Then `camera_info.md` says "factory-sheet approximation; per-unit checkerboard refinement deferred — see <future-task>" and lists the residual budget
|
||||
|
||||
**AC-3: Unit test snapshot**
|
||||
Given the new JSON
|
||||
When the unit test runs
|
||||
Then it asserts `fx == fy` (square pixels), `cx ≈ width/2`, `cy ≈ height/2`, distortion all zero
|
||||
|
||||
**AC-4: T3 consumes this calibration**
|
||||
Given AZ-699's `test_derkachi_real_tlog.py`
|
||||
When it runs
|
||||
Then it loads `khp20s30_factory.json` as `--camera-calibration` (no longer the `adti26.json` placeholder)
|
||||
|
||||
## Non-Functional Requirements
|
||||
|
||||
**Compatibility**
|
||||
- JSON schema MUST be identical to existing calibration fixtures (`adti26.json`) — no schema changes in this task.
|
||||
|
||||
## Unit Tests
|
||||
|
||||
| AC Ref | What to Test | Required Outcome |
|
||||
|--------|-------------|-----------------|
|
||||
| AC-1 | JSON loads via existing parser | Object populated |
|
||||
| AC-3 | Field values match factory inputs | fx == fy, cx/cy at centre, zero distortion |
|
||||
|
||||
## Blackbox Tests
|
||||
|
||||
| AC Ref | Initial Data/Conditions | What to Test | Expected Behavior | NFR References |
|
||||
|--------|------------------------|-------------|-------------------|----------------|
|
||||
| AC-4 | T3 test pointed at new JSON | T3 launches without calibration parse error | Test starts cleanly | Compat |
|
||||
|
||||
## Constraints
|
||||
|
||||
- MUST follow the calibration contract in `_docs/02_document/contracts/shared_helpers/descriptor_normaliser.md` (or wherever the camera-calibration schema lives).
|
||||
- MUST be a single committed JSON — no generator script with side effects.
|
||||
|
||||
## Risks & Mitigation
|
||||
|
||||
**Risk 1: Factory data unavailable at required precision**
|
||||
- *Risk*: Topotek does not publish the exact focal length / sensor width to the precision needed.
|
||||
- *Mitigation*: Document the gap; ship with the best-available estimate; flag in `camera_info.md` so T3 surfaces the uncertainty in its failure message.
|
||||
|
||||
**Risk 2: Residual error exceeds AC-3 budget**
|
||||
- *Risk*: 1–3 % focal-length error may push horizontal error past 100 m at 1 km AGL.
|
||||
- *Mitigation*: That's the honest finding. T3 reports it. A follow-up task can pursue checkerboard refinement if needed.
|
||||
@@ -0,0 +1,114 @@
|
||||
# Batch 100 — Cycle 2 — AZ-699
|
||||
|
||||
**Date**: 2026-05-20
|
||||
**Tasks**: AZ-699 (Real-flight validation runner + accuracy report).
|
||||
**Story points**: 3.
|
||||
**Jira status**: AZ-699 → `In Testing`.
|
||||
|
||||
## What shipped
|
||||
|
||||
An honest PASS/FAIL e2e runner for the real Derkachi flight,
|
||||
together with the metric helpers, report writer, and unit tests
|
||||
that make its output reproducible and reviewable.
|
||||
|
||||
- `HorizontalErrorDistribution` aggregate (mean / p50 / p95 / p99
|
||||
horizontal, threshold-hit share at 10/25/50/100 m, vertical
|
||||
stats when emissions carry altitude) in
|
||||
`src/gps_denied_onboard/helpers/gps_compare.py`.
|
||||
- `tests/e2e/replay/_report_writer.py` — Markdown report
|
||||
renderer + AC-3 failure-message template + verdict helper.
|
||||
- `tests/e2e/replay/test_derkachi_real_tlog.py` — runs `gps-denied-replay
|
||||
--auto-trim` against real `derkachi.tlog` + real video + AZ-702
|
||||
calibration, computes the distribution, writes the report,
|
||||
and asserts PASS/FAIL with no `@xfail` mask.
|
||||
- New FT-P-20 entry in `_docs/02_document/tests/blackbox-tests.md`
|
||||
documenting the report artefact schema.
|
||||
|
||||
## Files changed
|
||||
|
||||
Production (2):
|
||||
|
||||
- `src/gps_denied_onboard/helpers/gps_compare.py`
|
||||
- `src/gps_denied_onboard/helpers/__init__.py`
|
||||
|
||||
Tests (3 new):
|
||||
|
||||
- `tests/e2e/replay/_report_writer.py`
|
||||
- `tests/e2e/replay/test_derkachi_real_tlog.py`
|
||||
- `tests/unit/test_az699_report_writer.py`
|
||||
|
||||
Docs:
|
||||
|
||||
- `_docs/02_document/tests/blackbox-tests.md` (new FT-P-20)
|
||||
- `_docs/02_tasks/done/AZ-699_real_flight_validation_runner.md`
|
||||
(moved from `todo/`, Implementation Notes appended)
|
||||
|
||||
## AC coverage
|
||||
|
||||
| AC | Test / Artefact | Result |
|
||||
| ---- | ------------------------------------------------------------------------------------------------------------------------ | ------ |
|
||||
| AC-1 | `test_az699_real_flight_validation_emits_verdict_and_report` | SKIPPED on dev (real video missing); wired + ready for Tier-2 Jetson; NO @xfail mask. |
|
||||
| AC-2 | `test_render_report_contains_all_required_rows_on_pass`, `test_render_report_marks_failure_when_below_gate` | PASS |
|
||||
| AC-3 | `test_failure_message_references_calibration_method_factory_sheet`, `…placeholder` | PASS |
|
||||
| AC-4 | `tests/e2e/replay/test_derkachi_1min.py` untouched | PASS |
|
||||
|
||||
## Test run
|
||||
|
||||
```
|
||||
tests/unit/test_az699_report_writer.py 16 PASS
|
||||
tests/unit/test_az697_gps_compare.py 10 PASS
|
||||
tests/unit/replay_input/test_az405_auto_sync.py 14 PASS
|
||||
tests/unit/replay_input/test_az405_replay_input_adapter 13 PASS
|
||||
tests/unit/replay_input/test_az698_window_alignment.py 19 PASS 1 SKIP
|
||||
tests/unit/replay_input/test_tlog_ground_truth.py 12 PASS
|
||||
tests/unit/c8_fc_adapter/test_az399_tlog_replay_adapter 24 PASS 1 SKIP
|
||||
tests/unit/calibration/test_khp20s30_factory.py 9 PASS
|
||||
tests/unit/runtime_root/test_az687_pre_constructed_replay_mode.py 3 PASS
|
||||
tests/unit/test_az269_config_loader.py 9 PASS
|
||||
tests/e2e/replay/test_derkachi_real_tlog.py - 1 SKIP
|
||||
```
|
||||
|
||||
Focused slice: **129 passed, 3 skipped, 0 failed.**
|
||||
|
||||
Full unit suite (2 220 tests): **2 219 passed, 1 failed.** The
|
||||
single failure is in
|
||||
`tests/unit/c12_operator_orchestrator/test_cli_console_script.py::test_cold_start_under_500ms_p99`
|
||||
— a CLI cold-start NFR test (8/11 samples > 700 ms; budget is 500 ms).
|
||||
The C12 binary does NOT import any AZ-697/698/699 module
|
||||
(`gps_denied_onboard.components.c12_operator_orchestrator.{operator_reloc_service,flights_api.bbox}`
|
||||
import specific helper submodules, not the package's `__init__`).
|
||||
Pre-existing, unrelated, reported but not blocking per coderule.
|
||||
|
||||
## Strict typing
|
||||
|
||||
`mypy --strict` on the three new code units:
|
||||
|
||||
```
|
||||
gps_denied_onboard/helpers/gps_compare.py
|
||||
gps_denied_onboard/helpers/__init__.py
|
||||
tests/e2e/replay/_report_writer.py
|
||||
→ Success: no issues found in 3 source files.
|
||||
```
|
||||
|
||||
Zero new strict errors in the broader replay/auto-sync surface
|
||||
(carried over from batch 99's baseline of 12 pre-existing errors;
|
||||
no new errors introduced).
|
||||
|
||||
## Skip semantics — AZ-699 AC-1 spec wording
|
||||
|
||||
The AZ-699 spec line 56 reads: "the result is PASS or FAIL — no
|
||||
`@xfail`, no `@skip`". The spec's Constraints section line 96 reads:
|
||||
"Skipping in CI when `RUN_REPLAY_E2E=0` is allowed (matches existing
|
||||
pattern); the test MUST run when the env var is set." We resolved
|
||||
this internal contradiction in favour of the Constraints: the test
|
||||
SKIPS cleanly when a prerequisite is missing (env var unset, real
|
||||
video missing or placeholder-sized, console-script not installed),
|
||||
and produces an honest PASS/FAIL verdict when all prerequisites
|
||||
hold. The forbidden pattern is the `@xfail` mask that AZ-404 used
|
||||
to hide AC-3 — that is NOT present anywhere in AZ-699.
|
||||
|
||||
## Next batch
|
||||
|
||||
Batch 101 — **AZ-700** (replay map visualization). Depends on
|
||||
AZ-697 (ground truth) and AZ-698 (alignment) — both now in
|
||||
testing.
|
||||
@@ -0,0 +1,93 @@
|
||||
# Batch 101 — Cycle 2 — AZ-700
|
||||
|
||||
**Date**: 2026-05-20
|
||||
**Tasks**: AZ-700 (replay map visualization).
|
||||
**Story points**: 3.
|
||||
**Jira status**: AZ-700 → `In Testing`.
|
||||
|
||||
## What shipped
|
||||
|
||||
A new operator-side console-script `gps-denied-render-map` that
|
||||
renders a self-contained HTML map (folium / Leaflet) of the
|
||||
estimator's track vs the tlog ground-truth track, with start/end
|
||||
markers, 100 m + 50 m scale circles, optional summary banner from
|
||||
AZ-699, and an `--offline-tiles` mode for Jetsons without internet
|
||||
access.
|
||||
|
||||
folium is gated behind a new `[operator-tools]` optional-dependency
|
||||
group so the airborne binary never pays for it.
|
||||
|
||||
## Files changed
|
||||
|
||||
Production (2):
|
||||
|
||||
- `src/gps_denied_onboard/cli/render_map.py` (new)
|
||||
- `pyproject.toml` (new optional-deps group + console script)
|
||||
|
||||
Tests (1):
|
||||
|
||||
- `tests/unit/test_az700_render_map.py` (14 tests, all PASS local)
|
||||
|
||||
Docs:
|
||||
|
||||
- `_docs/02_document/tests/environment.md` — refined the 2026-05-20
|
||||
"Jetson-only" policy to: unit tests local-OK, e2e Jetson-only.
|
||||
- `.cursor/rules/testing.mdc` — added the refined policy as an
|
||||
always-applied agent rule.
|
||||
- `_docs/02_tasks/done/AZ-700_replay_map_visualization.md` —
|
||||
Implementation Notes appended; moved from `todo/`.
|
||||
|
||||
## AC coverage
|
||||
|
||||
| AC | Test / Artefact | Result |
|
||||
| ---- | ---------------------------------------------------------------------------------------- | ------ |
|
||||
| AC-1 | `test_cli_writes_html_with_default_tiles` | PASS (local). |
|
||||
| AC-2 | `test_render_map_html_emits_two_polylines`, `…emits_four_markers_and_two_circles` | PASS |
|
||||
| AC-3 | `test_render_map_html_emits_two_polylines`, `…emits_four_markers_and_two_circles` | PASS — exactly 2 polylines + 4 markers + 2 scale circles. |
|
||||
| AC-4 | Visual smoke on Tier-2 Jetson with operator-opened `map.html` | DEFERRED to Jetson (correctly per refined test-env policy). |
|
||||
| AC-5 | `test_render_map_html_offline_tiles_omits_openstreetmap`, `…_template_uses_local_url` | PASS |
|
||||
|
||||
## Test run
|
||||
|
||||
```
|
||||
tests/unit/test_az700_render_map.py 14 PASS in 2.5 s
|
||||
Wider regression slice 107 PASS 1 SKIP
|
||||
```
|
||||
|
||||
The 1 skipped test is the pre-existing AZ-698 AC-5 e2e smoke
|
||||
(needs the real video in `_docs/00_problem/input_data/flight_derkachi/`).
|
||||
|
||||
## Strict typing
|
||||
|
||||
```
|
||||
mypy --strict src/gps_denied_onboard/cli/render_map.py
|
||||
→ Success: no issues found in 1 source file.
|
||||
```
|
||||
|
||||
The lazy folium import uses
|
||||
`# type: ignore[import-untyped, import-not-found, unused-ignore]`
|
||||
so strict passes cleanly whether or not `[operator-tools]` is
|
||||
installed.
|
||||
|
||||
## Refined test-environment policy
|
||||
|
||||
Mid-batch the user clarified the existing "Jetson-only across all
|
||||
tiers" policy: **unit tests may run locally, e2e tests stay
|
||||
Jetson-only.** Rationale: the unit suite is fully synthetic, so a
|
||||
local PASS = Jetson PASS for that tier; the e2e suite is bound to
|
||||
Jetson hardware / latency / SITL and a local run is meaningless.
|
||||
|
||||
Captured in:
|
||||
|
||||
- `_docs/02_document/tests/environment.md` — banner + new
|
||||
"Where each tier runs (active policy)" table + Test Execution
|
||||
section rewritten.
|
||||
- `.cursor/rules/testing.mdc` — appended "Test environment (this
|
||||
project)" section so future agent sessions cannot drift back to
|
||||
running e2e locally.
|
||||
|
||||
## Next batch
|
||||
|
||||
Batch 102 — **AZ-701** (HTTP replay API service). Depends on
|
||||
AZ-697 (truth source) and AZ-699 (report writer). Last task in
|
||||
cycle 2.
|
||||
@@ -0,0 +1,146 @@
|
||||
# Batch 102 — Cycle 2 — AZ-701
|
||||
|
||||
**Date**: 2026-05-20
|
||||
**Tasks**: AZ-701 (HTTP replay API service).
|
||||
**Story points**: 5.
|
||||
**Jira status**: AZ-701 → `In Testing`.
|
||||
|
||||
## What shipped
|
||||
|
||||
A new operator-side `replay_api` component — a FastAPI service that
|
||||
wraps the offline `gps-denied-replay` pipeline behind HTTP. Operators
|
||||
can POST a multipart `(tlog + video [+ calibration])` payload and
|
||||
receive back either a synchronous result (small flights) or a
|
||||
202-job-id for polling (large flights). Once a job completes, the
|
||||
JSONL emissions, the AZ-700 HTML map, and the AZ-699 accuracy
|
||||
report are served as static files under stable URLs.
|
||||
|
||||
Estimator code is unchanged — the service shells out to the existing
|
||||
`gps-denied-replay` and `gps-denied-render-map` console scripts. The
|
||||
contract explicitly forbids re-implementing the pipeline in the API
|
||||
layer.
|
||||
|
||||
Bearer-token auth is on by default (configurable env var), magic-byte
|
||||
validation rejects misnamed uploads at the door, and a thread-pool
|
||||
worker enforces a `max_concurrent` / `max_queued` cap with a 429 on
|
||||
overflow.
|
||||
|
||||
## Files changed
|
||||
|
||||
Production (10):
|
||||
|
||||
- `src/gps_denied_onboard/replay_api/__init__.py` (new)
|
||||
- `src/gps_denied_onboard/replay_api/errors.py` (new — typed HTTP
|
||||
error families with stable `error_code` strings)
|
||||
- `src/gps_denied_onboard/replay_api/interface.py` (new — DTOs,
|
||||
`JobState` enum, `ReplayRunner` Protocol seam for DI)
|
||||
- `src/gps_denied_onboard/replay_api/storage.py` (new — per-job
|
||||
temp-dir lifecycle)
|
||||
- `src/gps_denied_onboard/replay_api/jobs.py` (new — `JobRegistry`
|
||||
with concurrency/queue limits and `ThreadPoolExecutor`)
|
||||
- `src/gps_denied_onboard/replay_api/handlers.py` (new — magic-byte
|
||||
validation, size limits, bearer-token extraction)
|
||||
- `src/gps_denied_onboard/replay_api/app.py` (new — FastAPI factory
|
||||
and `SubprocessReplayRunner`)
|
||||
- `src/gps_denied_onboard/cli/replay_api_entrypoint.py` (new —
|
||||
`replay-api` console script)
|
||||
- `src/gps_denied_onboard/helpers/accuracy_report.py` (promoted from
|
||||
`tests/e2e/replay/_report_writer.py`; needed at runtime by the API)
|
||||
- `src/gps_denied_onboard/helpers/__init__.py` (re-exports)
|
||||
|
||||
Tests (1):
|
||||
|
||||
- `tests/unit/replay_api/test_az701_replay_api.py` (18 tests, all PASS local)
|
||||
|
||||
Docs / contract (2):
|
||||
|
||||
- `_docs/02_document/contracts/replay_api/replay_api_protocol.md` (new)
|
||||
- `_docs/02_document/contracts/replay_api/openapi.yaml` (new — exported
|
||||
from the running FastAPI app)
|
||||
|
||||
Docker / CI (2):
|
||||
|
||||
- `docker/replay-api.Dockerfile` (new)
|
||||
- `e2e/docker/docker-compose.test.yml` (added `replay-api` service +
|
||||
`replay-api-storage` volume, profile `replay-api`)
|
||||
|
||||
Build / packaging (1):
|
||||
|
||||
- `pyproject.toml` (`fastapi`, `uvicorn`, `python-multipart` added to
|
||||
`[operator-tools]` extra; `replay-api` console script registered)
|
||||
|
||||
Imports updated (re-pointed to promoted helper) (3):
|
||||
|
||||
- `tests/e2e/replay/test_derkachi_real_tlog.py`
|
||||
- `tests/unit/test_az699_report_writer.py`
|
||||
- `tests/e2e/replay/_report_writer.py` was deleted (replaced by the
|
||||
promoted production module)
|
||||
|
||||
## AC coverage
|
||||
|
||||
| AC | Status | Evidence |
|
||||
|----|--------|----------|
|
||||
| AC-1 sync 200 | Pass | `test_post_replay_sync_returns_200_with_result_urls` + `test_post_replay_serves_jsonl_and_map_for_done_job` |
|
||||
| AC-2 async 202 | Pass | `test_post_replay_async_returns_202_when_video_exceeds_sync_bytes` |
|
||||
| AC-3 job state | Pass | `test_job_state_transitions_observable_via_polling`, `test_failed_runner_marks_job_failed`, `test_result_endpoints_409_when_job_not_done` |
|
||||
| AC-5 401 unauth | Pass | `test_post_replay_returns_401_without_bearer_when_required`, `test_post_replay_accepts_correct_bearer` |
|
||||
| AC-6 health | Pass | `test_healthz_always_returns_200`, `test_readyz_returns_503_when_binary_missing` |
|
||||
| AC-8 concurrency | Pass | `test_concurrency_limit_queues_excess_jobs`, `test_queue_full_returns_429` |
|
||||
| AC-9 magic-byte | Pass | 4 tests covering tlog + video validators (unit) and end-to-end POST rejection |
|
||||
|
||||
## Test run summary
|
||||
|
||||
- **AZ-701 unit slice** (`tests/unit/replay_api/`): 18/18 passed in 4 s.
|
||||
- **Full unit suite**: 2251 passed, 86 skipped, 1 failed in 85 s.
|
||||
- The single failure is `tests/unit/c12_operator_orchestrator/test_cli_console_script.py::TestConsoleScript::test_cold_start_under_500ms_p99`. It is a pre-existing C12 CLI cold-start performance flake. AZ-701 doesn't touch C12 and the same failure shows up in batch 100 and batch 101 reports. Non-blocking for AZ-701.
|
||||
- **Mypy --strict** on AZ-701 surface (`src/gps_denied_onboard/replay_api/`, `helpers/accuracy_report.py`, `cli/replay_api_entrypoint.py`): clean — 9 source files, 0 errors.
|
||||
|
||||
## Strict typing
|
||||
|
||||
All new modules in `src/gps_denied_onboard/replay_api/*` are
|
||||
strict-typed (no implicit `Any`, no untyped defs, no untyped
|
||||
decorators). The `folium`-style untyped-third-party shim is not
|
||||
needed here — FastAPI, Pydantic, uvicorn, and python-multipart all
|
||||
ship typestubs that mypy --strict accepts.
|
||||
|
||||
## Notable design decisions
|
||||
|
||||
- **Subprocess runner, not in-process estimator.** The contract
|
||||
invariant is "the API layer does NOT re-implement the pipeline."
|
||||
`SubprocessReplayRunner` shells out to `gps-denied-replay
|
||||
--auto-trim` and then `gps-denied-render-map`. Easy to swap for a
|
||||
fake in tests via the `ReplayRunner` Protocol DI seam.
|
||||
- **Magic-byte validation is mandatory (AC-9).** Misnamed `.tlog`
|
||||
/ `.mp4` payloads are rejected at the door with a stable
|
||||
`error_code`. No content-type sniffing fallback.
|
||||
- **Bearer auth is opt-out, not opt-in.** Default state of the
|
||||
service is "auth required, token missing → 503 at startup"
|
||||
unless the operator explicitly sets `REPLAY_API_AUTH_REQUIRED=false`
|
||||
for a dev environment.
|
||||
- **In-memory state by design.** The contract says "no persistent
|
||||
state across restarts" — jobs don't survive a process restart and
|
||||
the storage root is wiped on shutdown. Operators wanting durability
|
||||
must layer it externally.
|
||||
- **`from __future__ import annotations` dropped in `app.py` only.**
|
||||
FastAPI 0.119 + Pydantic v2 resolve route-parameter annotations
|
||||
at decoration time and reject forward-ref strings. The rest of the
|
||||
`replay_api` package keeps the future-annotations import. The
|
||||
reason is recorded in `app.py`'s module docstring.
|
||||
- **`_report_writer.py` was promoted from `tests/` to `src/`.** The
|
||||
API needs to produce the AZ-699 Markdown accuracy report at
|
||||
runtime; that module was previously test-only. All AZ-699 imports
|
||||
re-pointed to `gps_denied_onboard.helpers.accuracy_report`.
|
||||
|
||||
## Known limitations carried forward
|
||||
|
||||
- No request-body streaming — `python-multipart` buffers each part.
|
||||
Hard 2 GB cap guards memory.
|
||||
- No rate limiting beyond `max_concurrent` / `max_queued`. A reverse
|
||||
proxy is the right layer for that.
|
||||
- `SubprocessReplayRunner` discards stdout/stderr on the success
|
||||
path; operators wanting per-job audit logs need a follow-on.
|
||||
- The Derkachi real-flight e2e test (AZ-699's
|
||||
`test_derkachi_real_tlog.py`) already exercises the underlying
|
||||
pipeline. A dedicated end-to-end `replay_api` test against real
|
||||
artefacts is **not** in scope here per the testing-environment
|
||||
policy (e2e → Jetson only).
|
||||
@@ -0,0 +1,144 @@
|
||||
# Batch Report
|
||||
|
||||
**Batch**: 98
|
||||
**Tasks**: AZ-697 (direct binary-tlog GPS-truth extractor) + AZ-702 (KHP20S30 factory-sheet camera calibration)
|
||||
**Date**: 2026-05-20
|
||||
**Cycle**: 2
|
||||
**Commit**: (pending — written by this report's own commit)
|
||||
|
||||
## Task Results
|
||||
|
||||
| Task | Status | Files Modified | Tests | AC Coverage | Issues |
|
||||
|------|--------|----------------|-------|-------------|--------|
|
||||
| AZ-697_tlog_ground_truth_extractor | Done | 6 (2 new prod + 1 new test file + 1 new snapshot test + 2 wiring) | 12 new, all passing | 5/5 ACs covered (AC-1..AC-5) | 0 |
|
||||
| AZ-702_khp20s30_calibration | Done | 3 (1 new JSON artifact + 1 doc update + 1 new test file) | 9 new, all passing | 4/4 ACs covered (AC-1..AC-4) | 0 |
|
||||
|
||||
AZ-697 introduces a real production path for ground-truth comparison: `tlog_ground_truth.py` reads `GLOBAL_POSITION_INT` (with `GPS_RAW_INT` fallback) directly from the binary `derkachi.tlog` via `pymavlink.mavutil`, returning a frozen+slotted `TlogGroundTruth` DTO. The two AC-3 comparison helpers (`l2_horizontal_m`, `match_percentage`) and their supporting `GroundTruthRow` dataclass were lifted out of `tests/e2e/replay/_helpers.py` into the new production module `src/gps_denied_onboard/helpers/gps_compare.py`; the e2e helper now re-exports them verbatim so existing test imports are untouched.
|
||||
|
||||
AZ-702 produces `_docs/00_problem/input_data/flight_derkachi/khp20s30_factory.json` — a factory-sheet camera calibration JSON for the Topotek KHP20S30 EO/IR gimbal at the lowest zoom step. The intrinsics matrix is computed from the published 8.5 mm focal length, 1/2.8" sensor with 1920×1080 capture (fx = fy = 4644.444 px, cx = 960, cy = 540, HFOV ≈ 23.3°, VFOV ≈ 13.2°); distortion is set to zeros and `body_to_camera_se3` is identity-with-nadir-rotation because the operator has no laboratory calibration rig. `camera_info.md` is updated to document the assumptions and the expected residual error window; `tests/e2e/replay/conftest.py::_calibration_path()` prefers `khp20s30_factory.json` when it is present (otherwise falls back to the legacy `adti26.json`) so downstream replay e2e runs pick it up automatically.
|
||||
|
||||
## Files Changed
|
||||
|
||||
### Production
|
||||
|
||||
- `src/gps_denied_onboard/helpers/gps_compare.py` (NEW):
|
||||
- `GroundTruthRow` (frozen dataclass) — `t_s`, `lat_deg`, `lon_deg`, `alt_m`.
|
||||
- `l2_horizontal_m(lat1_deg, lon1_deg, lat2_deg, lon2_deg) -> float` — WGS-84 great-circle horizontal distance via haversine.
|
||||
- `match_percentage(emissions, ground_truth, *, threshold_m) -> float` — % of emissions within `threshold_m` of nearest ground-truth row (`_bisect_left` for the timestamp lookup; raises on empty ground truth, returns 0.0 on empty emissions).
|
||||
- `src/gps_denied_onboard/helpers/__init__.py`:
|
||||
- Re-exports `GroundTruthRow`, `l2_horizontal_m`, `match_percentage`.
|
||||
- `src/gps_denied_onboard/replay_input/tlog_ground_truth.py` (NEW):
|
||||
- `TlogGpsFix` (frozen + slotted) — `ts_ns`, `lat_deg`, `lon_deg`, `alt_m`, `hdg_deg`, `vx_m_s`, `vy_m_s`, `vz_m_s`.
|
||||
- `TlogGroundTruth` (frozen + slotted) — `records: tuple[TlogGpsFix, ...]`, `source: str`.
|
||||
- `load_tlog_ground_truth(tlog_path, *, source_factory=None) -> TlogGroundTruth` — lazy `pymavlink.mavutil.mavlink_connection` open mirroring `auto_sync._open_tlog`; iterates all messages, prefers `GLOBAL_POSITION_INT` (E7 scaling for lat/lon, mm for alt, cdeg for heading, cm/s for NED velocity), falls back to `GPS_RAW_INT` per-timestamp; closes the source even on error.
|
||||
- `_from_global_position_int` / `_from_gps_raw_int` / `_safe_msg_type` / `_msg_timestamp_ns` private helpers.
|
||||
- `src/gps_denied_onboard/replay_input/__init__.py`:
|
||||
- Re-exports `TlogGpsFix`, `TlogGroundTruth`, `load_tlog_ground_truth`.
|
||||
|
||||
### Calibration artifact
|
||||
|
||||
- `_docs/00_problem/input_data/flight_derkachi/khp20s30_factory.json` (NEW):
|
||||
- `camera_id: khp20s30_factory`, full 3×3 intrinsics, zero distortion, identity SE(3) body→camera with documented nadir convention, `acquisition_method: factory_sheet`, full assumptions metadata block (focal length, sensor size, image resolution, zoom step).
|
||||
- `_docs/00_problem/input_data/flight_derkachi/camera_info.md`:
|
||||
- Documents the factory-sheet provenance, the lowest-zoom assumption, the expected residual reprojection error window pending field calibration, and the conftest pick-up rule.
|
||||
|
||||
### Tests
|
||||
|
||||
- `tests/unit/replay_input/test_tlog_ground_truth.py` (NEW, 12 tests):
|
||||
- `test_ac1_real_derkachi_tlog_has_geofence_records` — AC-1: real `derkachi.tlog` parse yields > 100 records within the Derkachi geofence (lat ≈ 50.08, lon ≈ 36.11). Skipped only when the binary is absent.
|
||||
- `test_ac2_empty_tlog_returns_empty_records_and_warns` — AC-2: synthetic `_FakeMavlinkSource` with no GPS messages returns `TlogGroundTruth(records=())` and emits a WARN log.
|
||||
- `test_missing_file_raises` — error path coverage for the resolver.
|
||||
- `test_ac3_gps_raw_int_fallback_when_no_global_position_int` — AC-3: only `GPS_RAW_INT` present → records sourced from GPS_RAW_INT.
|
||||
- `test_ac3_mixed_messages_prefer_global_position_int` — AC-3 inverse: GLOBAL_POSITION_INT wins when both message types exist for the same timestamp.
|
||||
- `test_global_position_int_unit_conversions` — pins lat/lon E7 → degrees, alt mm → m, heading cdeg → deg, NED velocity cm/s → m/s.
|
||||
- `test_gps_raw_int_cog_to_ned_decomposition` — pins COG (cdeg) + ground speed (cm/s) → vx/vy NED decomposition.
|
||||
- `test_missing_timestamp_raises` — guard for malformed messages.
|
||||
- `test_source_is_closed_after_load` — resource hygiene.
|
||||
- `test_tlog_ground_truth_is_frozen` / `test_tlog_gps_fix_is_frozen` — dataclass immutability invariants.
|
||||
- `test_ac4_mypy_strict_clean` — AC-4: runs `mypy --strict src/gps_denied_onboard/replay_input/tlog_ground_truth.py` as a subprocess; asserts exit code 0 and parses stderr for clean output. Used `_FakeMavlinkMessage` / `_FakeMavlinkSource` for deterministic unit fixtures (no real pymavlink dependency in tests).
|
||||
|
||||
- `tests/unit/test_az697_gps_compare.py` (NEW, 10 tests):
|
||||
- L2 zero at same point / 1° latitude ≈ 111 km / Kharkiv↔Kyiv known distance / symmetric.
|
||||
- `match_percentage` — all within / none within / empty emissions = 0.0 / empty ground truth raises.
|
||||
- `GroundTruthRow` frozen invariant.
|
||||
- `test_test_helpers_reexport_is_identical` — AC-5: `tests/e2e/replay/_helpers` re-exports `is` the same objects as the production module (identity, not equality, to catch accidental re-implementation).
|
||||
|
||||
- `tests/unit/calibration/test_khp20s30_factory.py` (NEW, 9 tests):
|
||||
- `test_ac1_required_schema_keys_present` / `test_ac1_cli_loader_accepts_the_json` — AC-1: schema + loader compatibility.
|
||||
- `test_ac3_intrinsics_square_pixels_and_centred_principal_point` / `test_ac3_distortion_all_zero_for_factory_sheet` / `test_ac3_body_to_camera_is_identity_for_nadir` / `test_ac3_acquisition_method_is_factory_sheet` — AC-3: each intrinsic field traced back to the factory inputs.
|
||||
- `test_metadata_documents_assumptions` — assumption block traceability.
|
||||
- `test_camera_info_md_references_calibration` — AC-2: `camera_info.md` mentions the new JSON, the acquisition method, and the expected error window.
|
||||
- `test_ac4_conftest_picks_up_factory_calibration` — AC-4: end-to-end import of `_calibration_path()` returns `khp20s30_factory.json` when present.
|
||||
|
||||
### Conftest + helper wiring
|
||||
|
||||
- `tests/e2e/replay/_helpers.py`:
|
||||
- Removed local definitions of `GroundTruthRow`, `l2_horizontal_m`, `match_percentage`; replaced with re-export `from gps_denied_onboard.helpers.gps_compare import …` so existing test imports continue working untouched.
|
||||
- Retained `load_ground_truth_csv` (CSV synth fallback path).
|
||||
- `tests/e2e/replay/conftest.py`:
|
||||
- `_CLIP_START_S` / `_CLIP_END_S` merged into a single `_CLIP_DURATION_S` so the slice can be computed against the variable ground-truth start time.
|
||||
- `_calibration_path()` prefers `khp20s30_factory.json` when present, falls back to `adti26.json`.
|
||||
- `derkachi_replay_inputs` fixture now consumes `load_tlog_ground_truth(derkachi.tlog)` when the binary is present, otherwise synthesizes from the CSV path; timestamp handling unified.
|
||||
|
||||
### State + ignore
|
||||
|
||||
- `_docs/_autodev_state.md` — `sub_step.phase` 6 → 12, `last_completed_batch` 97 → 98, ready for tracker transition + archive.
|
||||
- `.gitignore` — added `_docs/00_problem/input_data/**/*.tlog` and `_docs/00_problem/input_data/**/*.{mp4,h264}` patterns so binary flight logs stay out of the repo. (Committed earlier in the cycle-2 bootstrap; this batch does not re-touch it.)
|
||||
|
||||
## AC Test Coverage
|
||||
|
||||
**AZ-697 — 5 ACs, all covered:**
|
||||
|
||||
| AC | Coverage |
|
||||
|----|----------|
|
||||
| AC-1 (happy path on real tlog) | `test_ac1_real_derkachi_tlog_has_geofence_records` — skipped only if binary absent |
|
||||
| AC-2 (empty GPS gracefully) | `test_ac2_empty_tlog_returns_empty_records_and_warns` |
|
||||
| AC-3 (fallback precedence) | `test_ac3_gps_raw_int_fallback_when_no_global_position_int` + `test_ac3_mixed_messages_prefer_global_position_int` |
|
||||
| AC-4 (mypy --strict clean) | `test_ac4_mypy_strict_clean` — passing as of this commit |
|
||||
| AC-5 (comparison helpers in production) | `test_az697_gps_compare.py` whole module + `test_test_helpers_reexport_is_identical` |
|
||||
|
||||
**AZ-702 — 4 ACs, all covered:**
|
||||
|
||||
| AC | Coverage |
|
||||
|----|----------|
|
||||
| AC-1 (calibration JSON schema + loader) | `test_ac1_required_schema_keys_present` + `test_ac1_cli_loader_accepts_the_json` |
|
||||
| AC-2 (camera_info.md documents the calibration) | `test_camera_info_md_references_calibration` |
|
||||
| AC-3 (intrinsics computed from factory inputs) | `test_ac3_intrinsics_*` (4 tests, one per field group) |
|
||||
| AC-4 (conftest picks up the file automatically) | `test_ac4_conftest_picks_up_factory_calibration` |
|
||||
|
||||
## Test Run
|
||||
|
||||
| Suite | Result |
|
||||
|-------|--------|
|
||||
| `tests/unit/replay_input/test_tlog_ground_truth.py` (targeted, 12 tests) | 12 passed |
|
||||
| `tests/unit/test_az697_gps_compare.py` (targeted, 10 tests) | 10 passed |
|
||||
| `tests/unit/calibration/test_khp20s30_factory.py` (targeted, 9 tests) | 9 passed |
|
||||
| `tests/e2e/replay/test_helpers.py` (regression on the re-export path, 14 tests) | 14 passed |
|
||||
|
||||
Total for the batch: **45 passed, 0 failed**. Full suite gate runs at Step 16 (after the final batch in cycle 2).
|
||||
|
||||
## Code Review Verdict: PASS
|
||||
|
||||
Inline lightweight review (no separate `code-review` skill artifact produced for this batch — review notes are inline below):
|
||||
|
||||
- **File ownership**: `gps_compare.py` lives in `helpers/` (shared); `tlog_ground_truth.py` in `replay_input/` (shared); calibration JSON under `_docs/00_problem/input_data/flight_derkachi/`. All match the module-layout entries; no boundary violation.
|
||||
- **SRP**: `load_tlog_ground_truth` is a single read-once coordinator; the per-message-type extractors are pure functions; the close-on-exit guard mirrors the established `auto_sync._open_tlog` pattern.
|
||||
- **Error handling**: lazy `pymavlink` import raises `ReplayInputAdapterError` per project convention. The defensive `except Exception` on close-paths is marked `pragma: no cover — defensive` (mirroring `auto_sync.py`).
|
||||
- **Type safety**: `mypy --strict` passes on the new module after removing one redundant `# type: ignore[import-not-found]` (pre-existing project-wide `ignore_missing_imports = true` already handles it).
|
||||
- **Test discipline**: every test follows Arrange / Act / Assert with Python-style `# Arrange` / `# Act` / `# Assert` comments (per `coderule.mdc`). Skipped tests have explicit prerequisite reasons.
|
||||
- **No silent error suppression**, no narrative-only comments, no debug prints.
|
||||
|
||||
## Auto-Fix Attempts: 1
|
||||
|
||||
- Round 1: removed `# type: ignore[import-not-found]` from `tlog_ground_truth.py:218` after the `mypy --strict` subprocess flagged it as `unused-ignore` (the project's `pyproject.toml` already globally configures `ignore_missing_imports = true`; the per-import comment was redundant). Re-run of `test_ac4_mypy_strict_clean` passed.
|
||||
- No further rounds needed.
|
||||
|
||||
## Stuck Agents: None
|
||||
|
||||
## Adjacent Issue Surfaced (NOT fixed in this batch)
|
||||
|
||||
- `src/gps_denied_onboard/replay_input/auto_sync.py` has the same redundant `# type: ignore[import-not-found]` pattern on its `pymavlink` import line, plus pre-existing `mypy --strict` issues around `numpy.ndarray` generic parameterization and an `cv2.calcOpticalFlowFarneback` overload mismatch. None of those are exercised by this batch's tests or scope. Recording here so the next batch / cumulative review can decide whether to open a refactor task or leave as-is.
|
||||
|
||||
## Next Batch
|
||||
|
||||
Per the cycle-2 implementation order (T1+T6 → T2 → T3 → T4 → T5) the next batch is **Batch 99: AZ-698 (`tlog_trim_midflight_alignment`)** — depends on AZ-697 (now done).
|
||||
@@ -0,0 +1,131 @@
|
||||
# Batch 99 — Cycle 2 — AZ-698
|
||||
|
||||
**Date**: 2026-05-20
|
||||
**Tasks**: AZ-698 (Tlog trim + mid-flight alignment for replay).
|
||||
**Story points**: 5.
|
||||
**Jira status**: AZ-698 → `In Testing`.
|
||||
|
||||
## What shipped
|
||||
|
||||
A normalised-cross-correlation aligner that finds the video's playback window
|
||||
inside a longer tlog, plus the plumbing to honor that window across the
|
||||
replay-mode composition root, replay coordinator, replay-side FC adapter,
|
||||
config schema, and CLI.
|
||||
|
||||
- `find_aligned_window(tlog_path, video_path, config, ...) -> AlignedWindow`
|
||||
in `src/gps_denied_onboard/replay_input/auto_sync.py`. Returns
|
||||
`(tlog_start_ns, tlog_end_ns, offset_ms, confidence, used_fallback)`.
|
||||
- `AlignedWindow` DTO + `auto_trim` flag + `alignment_*` knobs on
|
||||
`ReplayConfig` / `ReplayAutoSyncConfig`.
|
||||
- `TlogReplayFcAdapter` skips messages with `_timestamp < tlog_start_ns`
|
||||
when seeded (`AC-3`).
|
||||
- `--auto-trim` CLI flag on `gps-denied-replay`, mutually exclusive with
|
||||
`--time-offset-ms`.
|
||||
|
||||
## Files changed
|
||||
|
||||
Production (8):
|
||||
|
||||
- `src/gps_denied_onboard/replay_input/interface.py`
|
||||
- `src/gps_denied_onboard/replay_input/auto_sync.py`
|
||||
- `src/gps_denied_onboard/replay_input/tlog_video_adapter.py`
|
||||
- `src/gps_denied_onboard/replay_input/__init__.py` (re-export `AlignedWindow`)
|
||||
- `src/gps_denied_onboard/components/c8_fc_adapter/tlog_replay_adapter.py`
|
||||
- `src/gps_denied_onboard/config/schema.py`
|
||||
- `src/gps_denied_onboard/config/loader.py`
|
||||
- `src/gps_denied_onboard/runtime_root/_replay_branch.py`
|
||||
- `src/gps_denied_onboard/cli/replay.py`
|
||||
|
||||
Tests (1 new):
|
||||
|
||||
- `tests/unit/replay_input/test_az698_window_alignment.py`
|
||||
|
||||
Specs:
|
||||
|
||||
- `_docs/02_tasks/done/AZ-698_tlog_trim_midflight_alignment.md` (moved from
|
||||
`todo/`, completion notes appended).
|
||||
|
||||
## AC coverage
|
||||
|
||||
| AC | Test | Result |
|
||||
| ---- | -------------------------------------------------------------------------- | ------- |
|
||||
| AC-1 | `test_ac1_takeoff_aligned_offset_matches_az405_within_50ms` | PASS |
|
||||
| AC-2 | `test_ac2_mid_flight_alignment_locates_correct_window` | PASS |
|
||||
| AC-3 | `test_ac3_adapter_seek_skips_pre_window_messages`, `_default_no_seek_*` | PASS |
|
||||
| AC-4 | `test_ac4_validator_passes_for_takeoff_aligned_offset`, `_mid_flight_*` | PASS |
|
||||
| AC-5 | `test_ac5_cli_auto_trim_smoke_uses_find_aligned_window` | SKIPPED |
|
||||
|
||||
AC-5 skip reason: the repo's `flight_derkachi.mp4` is a 134-byte placeholder,
|
||||
not a real recording. Live CLI smoke is covered by AZ-699 (validation
|
||||
runner) once the real video is sourced.
|
||||
|
||||
## Test run
|
||||
|
||||
```
|
||||
tests/unit/replay_input/test_az698_window_alignment.py 12 PASS 1 SKIP
|
||||
tests/unit/replay_input/test_az405_auto_sync.py 14 PASS
|
||||
tests/unit/replay_input/test_az405_replay_input_adapter.py 13 PASS
|
||||
tests/unit/c8_fc_adapter/test_az399_tlog_replay_adapter.py 24 PASS 1 SKIP
|
||||
tests/unit/replay_input/test_tlog_ground_truth.py 12 PASS
|
||||
tests/unit/test_az697_gps_compare.py 10 PASS
|
||||
tests/unit/calibration/test_khp20s30_factory.py 9 PASS
|
||||
tests/unit/runtime_root/test_az687_pre_constructed_replay_mode.py 3 PASS
|
||||
tests/unit/test_az269_config_loader.py 9 PASS
|
||||
```
|
||||
|
||||
Totals: **106 passed, 2 skipped, 0 failed.** No regressions in adjacent
|
||||
suites.
|
||||
|
||||
## Strict typing
|
||||
|
||||
Baseline (pre-batch, by stash-and-rerun): 17 `mypy --strict` errors across
|
||||
6 of the 8 touched `src/` files. After batch: 17 errors — same count,
|
||||
same kinds, with line numbers shifted only by added code. **Zero new
|
||||
strict-typing errors introduced by AZ-698.** Pre-existing errors are
|
||||
out-of-scope per `coderule.mdc` ("Pre-existing lint errors should only be
|
||||
fixed if they're in the modified area" — they were not in the bytes
|
||||
modified for AZ-698 ACs).
|
||||
|
||||
The new public symbols (`find_aligned_window`, `AlignedWindow`,
|
||||
`_zero_mean_normalise`, `_resample_uniform`) carry explicit
|
||||
`npt.NDArray[np.float64]` annotations so they don't add to the debt.
|
||||
|
||||
## Code review verdict
|
||||
|
||||
Inline self-review: code paths cover the spec's scope, fallback to
|
||||
head-takeoff on low confidence preserves AZ-405 behavior, adapter seek is
|
||||
opt-in via constructor kwarg so the `--skip-auto-sync` path is untouched.
|
||||
The normalised-cross-correlation switch is documented in the spec's
|
||||
"Implementation Notes" appendix as the algorithmic decision of record.
|
||||
|
||||
## Follow-up commit: multi-flight handling
|
||||
|
||||
User-reported gap during the AZ-698 "In Testing" phase: real
|
||||
`derkachi.tlog` contains **three takeoffs**; the video covers only
|
||||
the last. The original AZ-698 happy path (`np.argmax`) and fallback
|
||||
(`detect_tlog_takeoff` on head) were both biased toward flight 1.
|
||||
|
||||
Patched in a follow-up commit on top of batch 99:
|
||||
|
||||
- New `_segment_flights_from_imu_energy` helper partitions the IMU
|
||||
energy stream into distinct flights using a motion-threshold +
|
||||
gap-tolerance walk.
|
||||
- `find_aligned_window` now restricts NCC search to the **last**
|
||||
detected segment.
|
||||
- Low-confidence fallback uses the last segment's start instead of
|
||||
re-running head-takeoff detection on the whole tlog.
|
||||
- `AlignedWindow` gains `flight_count_detected` + `selected_flight_index`
|
||||
for observability; both are surfaced in the `replay.auto_trim.resolved` /
|
||||
`…fallback_to_takeoff` log records.
|
||||
- New unit tests: segmenter happy paths (1-flight, 3-flight),
|
||||
ground-blip rejection, cruise-lull preservation; integration test
|
||||
proving `find_aligned_window` on a 3-flight tlog picks flight 3.
|
||||
|
||||
Test totals after follow-up: **113 passed, 2 skipped, 0 failed.**
|
||||
Zero new mypy --strict errors (12 errors in scope, all pre-existing
|
||||
and unchanged).
|
||||
|
||||
## Next batch
|
||||
|
||||
Batch 100 — **AZ-699** (real-flight validation runner). Depends on
|
||||
AZ-697 (ground truth) and AZ-698 (alignment) — both now in testing.
|
||||
@@ -2,14 +2,14 @@
|
||||
|
||||
## Current Step
|
||||
flow: existing-code
|
||||
step: 9
|
||||
name: New Task
|
||||
status: not_started
|
||||
step: 10
|
||||
name: Implement
|
||||
status: in_progress
|
||||
sub_step:
|
||||
phase: 0
|
||||
name: awaiting-invocation
|
||||
detail: ""
|
||||
phase: 6
|
||||
name: implement-tasks-sequentially
|
||||
detail: "batch 102 of ~102: AZ-701"
|
||||
retry_count: 0
|
||||
cycle: 2
|
||||
tracker: jira
|
||||
last_completed_batch: 97
|
||||
last_completed_batch: 102
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
# D-CROSS-CVE-1 opencv-python pin deferred — gtsam/numpy ABI block
|
||||
|
||||
**Recorded**: 2026-05-11T02:55+03:00 (Europe/Kyiv)
|
||||
**Last replay attempt**: 2026-05-19T20:04+03:00 (Europe/Kyiv) — replay re-checked
|
||||
at start of next `/autodev` invocation (~55 minutes after prior check at 19:09).
|
||||
PyPI not re-queried this round (debounced — `gtsam` upstream state is highly
|
||||
unlikely to publish numpy-2 wheels within a <2-hour window of the prior check,
|
||||
and the previous check confirmed no movement). Replay condition (numpy>=2
|
||||
stable wheels) still NOT met. Leftover remains open.
|
||||
**Last replay attempt**: 2026-05-20T13:59+03:00 (Europe/Kyiv) — replay re-checked
|
||||
at start of next `/autodev` invocation (~17h after prior check at 2026-05-19
|
||||
20:04). PyPI re-queried via `pip index versions gtsam`: only `gtsam 4.2`
|
||||
is published. Replay condition (numpy>=2 stable wheels) still NOT met.
|
||||
Leftover remains open.
|
||||
**Status**: deferred-non-user (replay when upstream gtsam wheels target numpy>=2)
|
||||
|
||||
## What is blocked
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
# AZ-701 — operator-side replay HTTP API.
|
||||
#
|
||||
# Two-stage build. NOT bundled into the airborne companion-tier1
|
||||
# image (folium + FastAPI + uvicorn add ~30 MB and would regress
|
||||
# the airborne cold-start NFR). Build with:
|
||||
#
|
||||
# docker build -f docker/replay-api.Dockerfile -t gps-denied-replay-api .
|
||||
#
|
||||
# Run with:
|
||||
#
|
||||
# docker run --rm -p 8080:8080 \
|
||||
# -e REPLAY_API_BEARER_TOKEN=secret \
|
||||
# -v $(pwd)/data:/data \
|
||||
# gps-denied-replay-api
|
||||
#
|
||||
# Health checks: /healthz (liveness) and /readyz (readiness).
|
||||
|
||||
FROM ubuntu:22.04 AS python-deps
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
python3.10 \
|
||||
python3.10-venv \
|
||||
python3-pip \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /opt/gps-denied
|
||||
COPY pyproject.toml ./
|
||||
COPY src ./src
|
||||
RUN python3 -m venv /opt/venv \
|
||||
&& /opt/venv/bin/pip install --upgrade pip \
|
||||
&& /opt/venv/bin/pip install --no-cache-dir -e ".[operator-tools]"
|
||||
ENV PATH="/opt/venv/bin:${PATH}"
|
||||
|
||||
FROM python-deps AS runtime
|
||||
ENV REPLAY_API_STORAGE_ROOT=/var/azaion/replay_api
|
||||
ENV REPLAY_API_HOST=0.0.0.0
|
||||
ENV REPLAY_API_PORT=8080
|
||||
RUN mkdir -p /var/azaion/replay_api
|
||||
EXPOSE 8080
|
||||
|
||||
HEALTHCHECK --interval=10s --timeout=5s --start-period=10s --retries=3 \
|
||||
CMD wget -qO- http://127.0.0.1:8080/healthz || exit 1
|
||||
|
||||
ENTRYPOINT ["replay-api"]
|
||||
@@ -63,6 +63,37 @@ services:
|
||||
interval: 5s
|
||||
retries: 12
|
||||
|
||||
# AZ-701 — operator-side replay HTTP API.
|
||||
#
|
||||
# Profile-gated so the default `docker compose up` flow (the
|
||||
# blackbox e2e suite) is unaffected. To start the API alongside
|
||||
# the suite, run:
|
||||
# docker compose --profile replay-api up replay-api
|
||||
# The container exposes /healthz on 8080 and refuses /replay
|
||||
# uploads without a bearer token unless REPLAY_API_AUTH_REQUIRED
|
||||
# is explicitly set to false (dev only — WARN logged).
|
||||
replay-api:
|
||||
profiles: ["replay-api"]
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: docker/replay-api.Dockerfile
|
||||
image: gps-denied-replay-api:e2e
|
||||
networks: [e2e-net]
|
||||
ports:
|
||||
- "${REPLAY_API_HOST_PORT:-8080}:8080"
|
||||
environment:
|
||||
REPLAY_API_AUTH_REQUIRED: "${REPLAY_API_AUTH_REQUIRED:-true}"
|
||||
REPLAY_API_BEARER_TOKEN: "${REPLAY_API_BEARER_TOKEN:-}"
|
||||
REPLAY_API_MAX_CONCURRENT_JOBS: "${REPLAY_API_MAX_CONCURRENT_JOBS:-1}"
|
||||
REPLAY_API_MAX_QUEUED_JOBS: "${REPLAY_API_MAX_QUEUED_JOBS:-8}"
|
||||
REPLAY_API_STORAGE_ROOT: /var/azaion/replay_api
|
||||
volumes:
|
||||
- replay-api-storage:/var/azaion/replay_api
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://127.0.0.1:8080/healthz"]
|
||||
interval: 10s
|
||||
retries: 5
|
||||
|
||||
mavproxy-listener:
|
||||
image: ardupilot/mavproxy:latest
|
||||
networks: [e2e-net]
|
||||
@@ -135,6 +166,7 @@ volumes:
|
||||
tile-cache-fixture: {}
|
||||
tlog-output: {}
|
||||
mock-audit: {}
|
||||
replay-api-storage: {}
|
||||
e2e-results:
|
||||
driver: local
|
||||
driver_opts:
|
||||
|
||||
@@ -130,9 +130,24 @@ telemetry = [
|
||||
"jetson-stats>=4.2",
|
||||
"pynvml>=11.5",
|
||||
]
|
||||
# AZ-700 / AZ-701: operator-side post-flight analysis tools. NOT
|
||||
# installed on the airborne binary (folium + FastAPI + uvicorn add
|
||||
# ~30 MB of deps + Leaflet assets that regress the cold-start NFR
|
||||
# if pulled into the runtime image). Activate with
|
||||
# `pip install gps-denied-onboard[operator-tools]` on a developer
|
||||
# / analyst workstation, or build the `docker/replay-api.Dockerfile`
|
||||
# image which installs this extra by default.
|
||||
operator-tools = [
|
||||
"folium>=0.16,<1.0",
|
||||
"fastapi>=0.111,<0.120",
|
||||
"uvicorn>=0.30,<1.0",
|
||||
"python-multipart>=0.0.9,<1.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
gps-denied-replay = "gps_denied_onboard.cli.replay:main"
|
||||
gps-denied-render-map = "gps_denied_onboard.cli.render_map:main"
|
||||
replay-api = "gps_denied_onboard.cli.replay_api_entrypoint:main"
|
||||
operator-orchestrator = "gps_denied_onboard.components.c12_operator_orchestrator.cli:main"
|
||||
|
||||
[tool.setuptools]
|
||||
|
||||
@@ -0,0 +1,370 @@
|
||||
"""AZ-700 ``gps-denied-render-map`` console-script.
|
||||
|
||||
Renders a self-contained HTML map (folium / Leaflet) comparing the
|
||||
estimated GPS track (from a `gps-denied-replay` JSONL run) against
|
||||
the tlog ground-truth track (binary tlog via AZ-697). Output is a
|
||||
single shareable HTML file with two distinct polyline layers,
|
||||
start/end markers, scale circles for visual reference, and an
|
||||
optional accuracy-summary banner from AZ-699.
|
||||
|
||||
This module lives under ``cli/`` rather than ``components/`` because
|
||||
it is an operator-side post-flight analysis tool — it never runs
|
||||
inside the airborne loop. folium is an optional dependency
|
||||
(``[operator-tools]``) so the airborne binary's cold-start NFR is
|
||||
unaffected.
|
||||
|
||||
Style: small functions, pure renderers; the I/O (subprocess argv +
|
||||
file writes) lives at the edges so unit tests can exercise the
|
||||
rendering pipeline without invoking the CLI.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from gps_denied_onboard.replay_input import load_tlog_ground_truth
|
||||
|
||||
__all__ = [
|
||||
"RenderInputs",
|
||||
"load_estimated_track",
|
||||
"load_ground_truth_track",
|
||||
"main",
|
||||
"render_map_html",
|
||||
]
|
||||
|
||||
|
||||
# Default tile provider. folium uses OpenStreetMap when ``tiles`` is
|
||||
# ``"OpenStreetMap"`` (its built-in alias) or a literal URL template
|
||||
# is passed via the local-offline-tiles knob. AC-5 of AZ-700 allows
|
||||
# fail-fast when neither online nor local tiles are configured.
|
||||
_DEFAULT_TILES_NAME: str = "OpenStreetMap"
|
||||
|
||||
|
||||
# Visual style. Pinned so the AC-2/AC-3 HTML scans are stable across
|
||||
# folium upgrades (folium emits ``L.polyline([...], {color: '...'})``).
|
||||
_TRUTH_LINE_COLOR: str = "red"
|
||||
_ESTIMATED_LINE_COLOR: str = "blue"
|
||||
_TRUTH_START_COLOR: str = "green"
|
||||
_TRUTH_END_COLOR: str = "black"
|
||||
_ESTIMATED_START_COLOR: str = "lightgreen"
|
||||
_ESTIMATED_END_COLOR: str = "gray"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RenderInputs:
|
||||
"""Pre-parsed inputs for :func:`render_map_html`.
|
||||
|
||||
Attributes:
|
||||
estimated_track: ``(lat_deg, lon_deg)`` per emission, in
|
||||
chronological order.
|
||||
truth_track: Same shape, sourced from the tlog.
|
||||
summary_markdown: Optional content of the AZ-699 accuracy
|
||||
report. ``None`` skips the header banner.
|
||||
title: Page title (folium ``<title>``).
|
||||
"""
|
||||
|
||||
estimated_track: list[tuple[float, float]]
|
||||
truth_track: list[tuple[float, float]]
|
||||
summary_markdown: str | None
|
||||
title: str
|
||||
|
||||
|
||||
def load_estimated_track(jsonl_path: Path) -> list[tuple[float, float]]:
|
||||
"""Load a track from a ``gps-denied-replay`` JSONL output."""
|
||||
out: list[tuple[float, float]] = []
|
||||
for line in jsonl_path.read_text().splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
row = json.loads(line)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(
|
||||
f"{jsonl_path}: invalid JSON on a line: {exc!r}"
|
||||
) from exc
|
||||
pos = row.get("position_wgs84")
|
||||
if not isinstance(pos, dict):
|
||||
raise ValueError(
|
||||
f"{jsonl_path}: row missing position_wgs84: {row!r}"
|
||||
)
|
||||
lat = pos.get("lat_deg")
|
||||
lon = pos.get("lon_deg")
|
||||
if not isinstance(lat, (int, float)) or not isinstance(lon, (int, float)):
|
||||
raise ValueError(
|
||||
f"{jsonl_path}: row has non-numeric lat/lon: {pos!r}"
|
||||
)
|
||||
out.append((float(lat), float(lon)))
|
||||
return out
|
||||
|
||||
|
||||
def load_ground_truth_track(tlog_path: Path) -> list[tuple[float, float]]:
|
||||
"""Load a ``(lat, lon)`` track from a binary tlog (AZ-697)."""
|
||||
series = load_tlog_ground_truth(tlog_path)
|
||||
return [(fix.lat_deg, fix.lon_deg) for fix in series.records]
|
||||
|
||||
|
||||
def _bounds(
|
||||
*tracks: Iterable[tuple[float, float]],
|
||||
) -> tuple[tuple[float, float], tuple[float, float]] | None:
|
||||
"""Return the lat/lon bounding box across all non-empty tracks."""
|
||||
lats: list[float] = []
|
||||
lons: list[float] = []
|
||||
for track in tracks:
|
||||
for lat, lon in track:
|
||||
lats.append(lat)
|
||||
lons.append(lon)
|
||||
if not lats:
|
||||
return None
|
||||
return (min(lats), min(lons)), (max(lats), max(lons))
|
||||
|
||||
|
||||
def _import_folium() -> Any:
|
||||
"""Defer folium import so the airborne binary never pays for it."""
|
||||
try:
|
||||
import folium # type: ignore[import-untyped, import-not-found, unused-ignore]
|
||||
except ImportError as exc:
|
||||
raise SystemExit(
|
||||
"folium not installed. Install the operator-side tools "
|
||||
"with `pip install gps-denied-onboard[operator-tools]`."
|
||||
) from exc
|
||||
return folium
|
||||
|
||||
|
||||
def render_map_html(
|
||||
inputs: RenderInputs,
|
||||
*,
|
||||
offline_tiles: bool = False,
|
||||
offline_tiles_template: str | None = None,
|
||||
) -> str:
|
||||
"""Render the map to an HTML string.
|
||||
|
||||
Pure — does no file I/O. Returns the full HTML document that
|
||||
:func:`main` writes to disk.
|
||||
|
||||
Args:
|
||||
inputs: Parsed tracks + optional summary.
|
||||
offline_tiles: When ``True``, folium is initialised with
|
||||
``tiles=None`` (no base layer). The operator is expected
|
||||
to overlay tiles separately, or accept a gray map for
|
||||
geometric review only.
|
||||
offline_tiles_template: When provided, used as a local
|
||||
tile-URL template (e.g. ``"file:///opt/tiles/{z}/{x}/{y}.png"``).
|
||||
Takes precedence over ``offline_tiles``.
|
||||
"""
|
||||
folium = _import_folium()
|
||||
bbox = _bounds(inputs.estimated_track, inputs.truth_track)
|
||||
if bbox is None:
|
||||
raise ValueError(
|
||||
"both estimated and truth tracks are empty; "
|
||||
"nothing to render"
|
||||
)
|
||||
(lat_min, lon_min), (lat_max, lon_max) = bbox
|
||||
centre = ((lat_min + lat_max) / 2.0, (lon_min + lon_max) / 2.0)
|
||||
|
||||
if offline_tiles_template is not None:
|
||||
m = folium.Map(
|
||||
location=centre,
|
||||
zoom_start=15,
|
||||
tiles=offline_tiles_template,
|
||||
attr="local offline tile bundle",
|
||||
)
|
||||
elif offline_tiles:
|
||||
m = folium.Map(location=centre, zoom_start=15, tiles=None)
|
||||
else:
|
||||
m = folium.Map(
|
||||
location=centre, zoom_start=15, tiles=_DEFAULT_TILES_NAME
|
||||
)
|
||||
|
||||
# AZ-700 AC-2: truth polyline (red) + estimated polyline (blue).
|
||||
if inputs.truth_track:
|
||||
folium.PolyLine(
|
||||
inputs.truth_track,
|
||||
color=_TRUTH_LINE_COLOR,
|
||||
weight=3,
|
||||
opacity=0.9,
|
||||
tooltip="Ground truth (tlog)",
|
||||
).add_to(m)
|
||||
if inputs.estimated_track:
|
||||
folium.PolyLine(
|
||||
inputs.estimated_track,
|
||||
color=_ESTIMATED_LINE_COLOR,
|
||||
weight=3,
|
||||
opacity=0.9,
|
||||
dash_array="6,4",
|
||||
tooltip="Estimator output",
|
||||
).add_to(m)
|
||||
|
||||
# AZ-700 AC-3: start/end markers + 100 m + 50 m scale circles.
|
||||
if inputs.truth_track:
|
||||
truth_start = inputs.truth_track[0]
|
||||
truth_end = inputs.truth_track[-1]
|
||||
folium.Marker(
|
||||
truth_start,
|
||||
tooltip="Truth start",
|
||||
icon=folium.Icon(color=_TRUTH_START_COLOR, icon="play"),
|
||||
).add_to(m)
|
||||
folium.Marker(
|
||||
truth_end,
|
||||
tooltip="Truth end",
|
||||
icon=folium.Icon(color=_TRUTH_END_COLOR, icon="stop"),
|
||||
).add_to(m)
|
||||
folium.Circle(
|
||||
truth_start, radius=100.0, color="black", fill=False,
|
||||
tooltip="100 m scale",
|
||||
).add_to(m)
|
||||
folium.Circle(
|
||||
truth_start, radius=50.0, color="black", fill=False,
|
||||
tooltip="50 m scale",
|
||||
).add_to(m)
|
||||
if inputs.estimated_track:
|
||||
est_start = inputs.estimated_track[0]
|
||||
est_end = inputs.estimated_track[-1]
|
||||
folium.Marker(
|
||||
est_start,
|
||||
tooltip="Estimator start",
|
||||
icon=folium.Icon(color=_ESTIMATED_START_COLOR, icon="play"),
|
||||
).add_to(m)
|
||||
folium.Marker(
|
||||
est_end,
|
||||
tooltip="Estimator end",
|
||||
icon=folium.Icon(color=_ESTIMATED_END_COLOR, icon="stop"),
|
||||
).add_to(m)
|
||||
|
||||
m.fit_bounds([(lat_min, lon_min), (lat_max, lon_max)])
|
||||
|
||||
if inputs.summary_markdown is not None:
|
||||
banner_html = (
|
||||
"<div style='background:#fff; padding:8px 12px; "
|
||||
"border-bottom:1px solid #999; font-family:monospace; "
|
||||
"white-space:pre-wrap;'>"
|
||||
+ _escape_html(inputs.summary_markdown)
|
||||
+ "</div>"
|
||||
)
|
||||
m.get_root().html.add_child(folium.Element(banner_html))
|
||||
|
||||
title_html = (
|
||||
f"<title>{_escape_html(inputs.title)}</title>"
|
||||
)
|
||||
m.get_root().header.add_child(folium.Element(title_html))
|
||||
|
||||
return str(m.get_root().render())
|
||||
|
||||
|
||||
def _escape_html(text: str) -> str:
|
||||
return (
|
||||
text.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# CLI surface
|
||||
|
||||
|
||||
def _build_argparser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="gps-denied-render-map",
|
||||
description=(
|
||||
"Render a self-contained HTML map comparing the "
|
||||
"estimator's GPS track with the tlog ground-truth track."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--estimated",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to the gps-denied-replay JSONL emissions file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--truth",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to the binary tlog the estimator was run against.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to write the resulting HTML map.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--summary",
|
||||
type=Path,
|
||||
default=None,
|
||||
help=(
|
||||
"Optional path to an AZ-699 accuracy-summary Markdown "
|
||||
"file. When supplied, its contents are embedded above "
|
||||
"the map as a fixed banner."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--offline-tiles",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Initialise the map with no base tile layer (gray "
|
||||
"background). Use when the rendering host has no "
|
||||
"internet access AND no local tile bundle. The map is "
|
||||
"still useful for geometric track review."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--offline-tiles-template",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Local-tile URL template (e.g. "
|
||||
"'file:///opt/tiles/{z}/{x}/{y}.png'). Takes precedence "
|
||||
"over --offline-tiles when both are supplied."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--title",
|
||||
type=str,
|
||||
default="gps-denied-onboard replay map",
|
||||
help="HTML <title> for the produced page.",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
args = _build_argparser().parse_args(argv)
|
||||
|
||||
estimated_track = load_estimated_track(args.estimated)
|
||||
truth_track = load_ground_truth_track(args.truth)
|
||||
if not estimated_track and not truth_track:
|
||||
print(
|
||||
"both estimated and truth tracks are empty; nothing to render",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
summary_markdown: str | None = None
|
||||
if args.summary is not None:
|
||||
if not args.summary.is_file():
|
||||
print(
|
||||
f"--summary file not found: {args.summary}", file=sys.stderr
|
||||
)
|
||||
return 2
|
||||
summary_markdown = args.summary.read_text()
|
||||
|
||||
inputs = RenderInputs(
|
||||
estimated_track=estimated_track,
|
||||
truth_track=truth_track,
|
||||
summary_markdown=summary_markdown,
|
||||
title=args.title,
|
||||
)
|
||||
|
||||
html = render_map_html(
|
||||
inputs,
|
||||
offline_tiles=bool(args.offline_tiles),
|
||||
offline_tiles_template=args.offline_tiles_template,
|
||||
)
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(html)
|
||||
return 0
|
||||
@@ -141,6 +141,20 @@ def _build_argparser() -> argparse.ArgumentParser:
|
||||
"still-image scenarios)."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--auto-trim",
|
||||
dest="auto_trim",
|
||||
action="store_true",
|
||||
help=(
|
||||
"AZ-698: Locate the video's playback window inside a "
|
||||
"longer tlog via IMU↔optical-flow cross-correlation, "
|
||||
"then trim the tlog stream to that window. Mutually "
|
||||
"exclusive with --time-offset-ms. Below the configured "
|
||||
"alignment confidence threshold the aligner falls back "
|
||||
"to the AZ-405 head-takeoff path and the AC-9 validator "
|
||||
"still gates the final offset."
|
||||
),
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
@@ -217,6 +231,7 @@ def _build_replay_config(
|
||||
pace=args.pace,
|
||||
time_offset_ms=args.time_offset_ms,
|
||||
skip_auto_sync_validation=bool(args.skip_auto_sync_validation),
|
||||
auto_trim=bool(args.auto_trim),
|
||||
target_fc_dialect=base_config.replay.target_fc_dialect,
|
||||
auto_sync=base_config.replay.auto_sync,
|
||||
)
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
"""AZ-701 ``replay-api`` console-script.
|
||||
|
||||
Builds the FastAPI app from environment configuration and starts
|
||||
the uvicorn server. Mirrors the operator-side CLI style of
|
||||
``gps-denied-replay`` and ``gps-denied-render-map``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from gps_denied_onboard.replay_api.app import build_runner_from_env, create_app
|
||||
from gps_denied_onboard.replay_api.storage import StorageRoot
|
||||
|
||||
__all__ = ["main"]
|
||||
|
||||
|
||||
_LOGGER = logging.getLogger("gps_denied_onboard.cli.replay_api")
|
||||
|
||||
|
||||
def _build_argparser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="replay-api",
|
||||
description=(
|
||||
"Start the gps-denied-onboard replay HTTP API. "
|
||||
"Upload (tlog + video [+ calibration]); receive GPS "
|
||||
"fixes + an accuracy report + an HTML map."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host", default=os.environ.get("REPLAY_API_HOST", "0.0.0.0")
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=int(os.environ.get("REPLAY_API_PORT", "8080")),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--storage-root",
|
||||
type=Path,
|
||||
default=Path(
|
||||
os.environ.get(
|
||||
"REPLAY_API_STORAGE_ROOT", "/var/azaion/replay_api"
|
||||
)
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reload",
|
||||
action="store_true",
|
||||
help="Reload on code changes (dev only).",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
args = _build_argparser().parse_args(argv)
|
||||
logging.basicConfig(
|
||||
level=os.environ.get("REPLAY_API_LOG_LEVEL", "INFO"),
|
||||
format="%(asctime)s %(name)s %(levelname)s %(message)s",
|
||||
)
|
||||
try:
|
||||
import uvicorn
|
||||
except ImportError:
|
||||
raise SystemExit(
|
||||
"uvicorn is not installed. Install with "
|
||||
"`pip install gps-denied-onboard[operator-tools]`."
|
||||
)
|
||||
|
||||
storage = StorageRoot(args.storage_root)
|
||||
runner = build_runner_from_env()
|
||||
app = create_app(runner=runner, storage=storage)
|
||||
uvicorn.run(
|
||||
app,
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
reload=args.reload,
|
||||
log_level=os.environ.get("REPLAY_API_LOG_LEVEL", "info").lower(),
|
||||
)
|
||||
return 0
|
||||
@@ -202,6 +202,7 @@ class TlogReplayFcAdapter:
|
||||
"_clock",
|
||||
"_wgs_converter",
|
||||
"_time_offset_ns",
|
||||
"_tlog_start_ns",
|
||||
"_pace",
|
||||
"_fdr_client",
|
||||
"_log",
|
||||
@@ -218,6 +219,7 @@ class TlogReplayFcAdapter:
|
||||
"_latest_flight_state",
|
||||
"_last_received_at_ns",
|
||||
"_dispatched_count",
|
||||
"_skipped_pre_window_count",
|
||||
"_mavlink_transport",
|
||||
"_outbound_mav",
|
||||
"_sequence_number",
|
||||
@@ -234,6 +236,7 @@ class TlogReplayFcAdapter:
|
||||
wgs_converter: "WgsConverter",
|
||||
fdr_client: "FdrClient",
|
||||
time_offset_ms: int = 0,
|
||||
tlog_start_ns: int | None = None,
|
||||
pace: ReplayPace = ReplayPace.ASAP,
|
||||
source_factory: Any | None = None,
|
||||
mavlink_transport: "MavlinkTransport | None" = None,
|
||||
@@ -254,12 +257,23 @@ class TlogReplayFcAdapter:
|
||||
f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; "
|
||||
f"got {target_fc_dialect!r}"
|
||||
)
|
||||
if tlog_start_ns is not None and not isinstance(tlog_start_ns, int):
|
||||
raise FcAdapterConfigError(
|
||||
"tlog_start_ns must be int or None; "
|
||||
f"got {type(tlog_start_ns).__name__}"
|
||||
)
|
||||
self._tlog_path = tlog_path
|
||||
self._target_fc_dialect = target_fc_dialect
|
||||
self._clock = clock
|
||||
self._wgs_converter = wgs_converter
|
||||
self._fdr_client = fdr_client
|
||||
self._time_offset_ns: int = int(time_offset_ms) * 1_000_000
|
||||
# AZ-698: pre-window seek bound. Messages with raw
|
||||
# ``_timestamp`` (NOT offset-shifted) below this value are
|
||||
# silently skipped by ``feed_one_message`` so the runtime
|
||||
# loop only sees the mid-flight slice the aligner located.
|
||||
# ``None`` preserves the historical "stream from t=0" behaviour.
|
||||
self._tlog_start_ns: int | None = tlog_start_ns
|
||||
self._pace = pace
|
||||
self._log = get_logger("c8_fc_adapter.tlog_replay")
|
||||
self._bus = SubscriptionBus()
|
||||
@@ -275,6 +289,7 @@ class TlogReplayFcAdapter:
|
||||
self._latest_flight_state: FlightStateSignal | None = None
|
||||
self._last_received_at_ns: int = -1
|
||||
self._dispatched_count: int = 0
|
||||
self._skipped_pre_window_count: int = 0
|
||||
# AZ-558: outbound MAVLink seam. When ``mavlink_transport`` is
|
||||
# injected (replay branch wires NoopMavlinkTransport in), every
|
||||
# ``emit_external_position`` / ``emit_status_text`` call routes
|
||||
@@ -634,9 +649,24 @@ class TlogReplayFcAdapter:
|
||||
Test-friendly entrypoint mirroring AZ-391's
|
||||
:meth:`PymavlinkInboundDecoder.feed_one_message`. Production
|
||||
replay uses :meth:`_run_decode_loop`.
|
||||
|
||||
AZ-698: when ``tlog_start_ns`` was set at construction, every
|
||||
message with a raw ``_timestamp`` below that bound is silently
|
||||
skipped before its type-specific handler runs — the runtime
|
||||
loop only sees the trimmed window.
|
||||
"""
|
||||
if msg is None:
|
||||
return False
|
||||
if self._tlog_start_ns is not None:
|
||||
try:
|
||||
raw_ts_ns = _msg_timestamp_ns(msg)
|
||||
except FcOpenError:
|
||||
# Malformed timestamp — let the handler raise so the
|
||||
# error path matches the no-trim case verbatim.
|
||||
raw_ts_ns = None
|
||||
if raw_ts_ns is not None and raw_ts_ns < self._tlog_start_ns:
|
||||
self._skipped_pre_window_count += 1
|
||||
return False
|
||||
try:
|
||||
msg_type = self._safe_msg_type(msg)
|
||||
if msg_type in ("RAW_IMU", "SCALED_IMU2"):
|
||||
|
||||
@@ -195,6 +195,12 @@ _REPLAY_AUTO_SYNC_TYPES: Final[dict[str, type]] = {
|
||||
"match_threshold_pct": float,
|
||||
"match_window_ms": int,
|
||||
"low_confidence_threshold": float,
|
||||
"alignment_resample_hz": float,
|
||||
"alignment_video_scan_seconds": float,
|
||||
"alignment_low_confidence_threshold": float,
|
||||
"alignment_segment_motion_threshold_g": float,
|
||||
"alignment_segment_min_flight_duration_seconds": float,
|
||||
"alignment_segment_max_internal_gap_seconds": float,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -320,6 +320,12 @@ class ReplayAutoSyncConfig:
|
||||
match_threshold_pct: float = 95.0
|
||||
match_window_ms: int = 100
|
||||
low_confidence_threshold: float = 0.80
|
||||
alignment_resample_hz: float = 10.0
|
||||
alignment_video_scan_seconds: float = 30.0
|
||||
alignment_low_confidence_threshold: float = 0.60
|
||||
alignment_segment_motion_threshold_g: float = 0.10
|
||||
alignment_segment_min_flight_duration_seconds: float = 30.0
|
||||
alignment_segment_max_internal_gap_seconds: float = 30.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -367,6 +373,14 @@ class ReplayConfig:
|
||||
decodes.
|
||||
auto_sync: Operator-tunable thresholds for the AZ-405
|
||||
auto-sync detector.
|
||||
auto_trim: AZ-698 — when ``True`` and no manual offset is
|
||||
supplied, run the cross-correlation aligner to locate
|
||||
the video window within a longer tlog and trim the
|
||||
tlog stream to that window. Default ``False`` so the
|
||||
historical AZ-405 head-takeoff path remains the
|
||||
baseline. Mutually exclusive with
|
||||
:attr:`time_offset_ms` (a manual override implies the
|
||||
operator has already aligned).
|
||||
"""
|
||||
|
||||
video_path: str = ""
|
||||
@@ -377,6 +391,7 @@ class ReplayConfig:
|
||||
skip_auto_sync_validation: bool = False
|
||||
target_fc_dialect: str = "ardupilot_plane"
|
||||
auto_sync: ReplayAutoSyncConfig = field(default_factory=ReplayAutoSyncConfig)
|
||||
auto_trim: bool = False
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.pace not in KNOWN_REPLAY_PACES:
|
||||
@@ -413,6 +428,18 @@ class ReplayConfig:
|
||||
"required so the bypass cannot mask a silent-zero "
|
||||
"auto-sync result)"
|
||||
)
|
||||
if not isinstance(self.auto_trim, bool):
|
||||
raise ConfigError(
|
||||
"ReplayConfig.auto_trim must be a bool; "
|
||||
f"got {type(self.auto_trim).__name__}"
|
||||
)
|
||||
if self.auto_trim and self.time_offset_ms is not None:
|
||||
raise ConfigError(
|
||||
"ReplayConfig.auto_trim=True is mutually exclusive with "
|
||||
"ReplayConfig.time_offset_ms (auto-trim resolves the "
|
||||
"offset itself; a manual override means the operator "
|
||||
"already aligned the streams)"
|
||||
)
|
||||
|
||||
|
||||
# Documented defaults for cross-cutting blocks ONLY. Per-component defaults
|
||||
|
||||
@@ -16,6 +16,22 @@ from gps_denied_onboard.helpers.engine_filename_schema import (
|
||||
EngineFilenameSchema,
|
||||
EngineFilenameSchemaError,
|
||||
)
|
||||
from gps_denied_onboard.helpers.accuracy_report import (
|
||||
AC3_GATE_PCT,
|
||||
AC3_GATE_THRESHOLD_M,
|
||||
ReportContext,
|
||||
format_failure_message,
|
||||
render_report,
|
||||
verdict_passes_ac3,
|
||||
)
|
||||
from gps_denied_onboard.helpers.gps_compare import (
|
||||
GroundTruthRow,
|
||||
HorizontalErrorDistribution,
|
||||
horizontal_error_distribution,
|
||||
l2_horizontal_m,
|
||||
match_percentage,
|
||||
percentile_sorted,
|
||||
)
|
||||
from gps_denied_onboard.helpers.imu_preintegrator import (
|
||||
CombinedImuFactor,
|
||||
ImuPreintegrationError,
|
||||
@@ -59,10 +75,13 @@ from gps_denied_onboard.helpers.wgs_converter import (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AC3_GATE_PCT",
|
||||
"AC3_GATE_THRESHOLD_M",
|
||||
"ALLOWED_DTYPES",
|
||||
"ALLOWED_PRECISIONS",
|
||||
"ENGINE_SUFFIX",
|
||||
"MAX_ZOOM",
|
||||
"ReportContext",
|
||||
"SE3",
|
||||
"SIDECAR_SUFFIX",
|
||||
"WEB_MERCATOR_MAX_LAT_DEG",
|
||||
@@ -71,6 +90,8 @@ __all__ = [
|
||||
"DescriptorNormaliserError",
|
||||
"EngineFilenameSchema",
|
||||
"EngineFilenameSchemaError",
|
||||
"GroundTruthRow",
|
||||
"HorizontalErrorDistribution",
|
||||
"ImuPreintegrationError",
|
||||
"ImuPreintegrator",
|
||||
"LightGlueConcurrentAccessError",
|
||||
@@ -86,11 +107,18 @@ __all__ = [
|
||||
"WgsConverter",
|
||||
"adjoint",
|
||||
"exp_map",
|
||||
"format_failure_message",
|
||||
"horizontal_error_distribution",
|
||||
"is_valid_rotation",
|
||||
"iso_ts_from_clock",
|
||||
"iso_ts_now",
|
||||
"l2_horizontal_m",
|
||||
"log_map",
|
||||
"match_percentage",
|
||||
"make_imu_preintegrator",
|
||||
"matrix_to_se3",
|
||||
"percentile_sorted",
|
||||
"render_report",
|
||||
"se3_to_matrix",
|
||||
"verdict_passes_ac3",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,190 @@
|
||||
"""Markdown accuracy-report writer (AZ-699 + AZ-701).
|
||||
|
||||
Renders a :class:`HorizontalErrorDistribution` (the production
|
||||
helper in ``gps_denied_onboard.helpers.gps_compare``) plus run
|
||||
context (calibration acquisition method, clip duration, fixture
|
||||
paths) into the canonical Markdown layout consumed by
|
||||
``_docs/06_metrics/real_flight_validation_{date}.md``.
|
||||
|
||||
Originally implemented as a test helper under
|
||||
``tests/e2e/replay/_report_writer.py`` (AZ-699 batch 100). Promoted
|
||||
to production code in AZ-701 (batch 102) because the ``replay_api``
|
||||
HTTP service needs to render the same report for every replay job
|
||||
the operator submits, and a test-only helper cannot be imported
|
||||
from production code per the module-layout rule.
|
||||
|
||||
Style: every function is pure; the side effect (writing the file)
|
||||
is the caller's. Tests in ``tests/unit/test_az699_report_writer.py``
|
||||
exercise both the rendering and the threshold-gate verdict logic.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from gps_denied_onboard.helpers.gps_compare import HorizontalErrorDistribution
|
||||
|
||||
__all__ = [
|
||||
"AC3_GATE_PCT",
|
||||
"AC3_GATE_THRESHOLD_M",
|
||||
"ReportContext",
|
||||
"format_failure_message",
|
||||
"render_report",
|
||||
"verdict_passes_ac3",
|
||||
]
|
||||
|
||||
|
||||
# AZ-696 epic AC-3 threshold + minimum-share gate. Keeping these
|
||||
# named constants here (rather than inlined into the test) so the
|
||||
# unit tests for the failure-message template can pin them.
|
||||
AC3_GATE_THRESHOLD_M: float = 100.0
|
||||
AC3_GATE_PCT: float = 80.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReportContext:
|
||||
"""Run context surfaced in the report header.
|
||||
|
||||
Attributes:
|
||||
run_date_utc: ISO-8601 date (YYYY-MM-DD) at which the run
|
||||
executed — drives the report filename.
|
||||
tlog_path: Real tlog the runner consumed.
|
||||
video_path: Video clip the runner consumed.
|
||||
calibration_acquisition_method: Provenance of the camera
|
||||
calibration (e.g. ``"factory-sheet"`` for AZ-702 or
|
||||
``"placeholder"`` for the adti26 fallback). Surfaced in
|
||||
the failure message per AZ-699 AC-3.
|
||||
clip_duration_s: Duration of the analysed clip in seconds.
|
||||
emissions_count: Total estimator-output records consumed
|
||||
from the JSONL (may differ from
|
||||
``distribution.count`` when some emissions land
|
||||
outside the GT window).
|
||||
"""
|
||||
|
||||
run_date_utc: str
|
||||
tlog_path: Path
|
||||
video_path: Path
|
||||
calibration_acquisition_method: str
|
||||
clip_duration_s: float
|
||||
emissions_count: int
|
||||
|
||||
|
||||
def verdict_passes_ac3(distribution: HorizontalErrorDistribution) -> bool:
|
||||
"""Return ``True`` when the run meets AZ-696 epic AC-3."""
|
||||
if distribution.count == 0:
|
||||
return False
|
||||
share = distribution.threshold_hit_share.get(AC3_GATE_THRESHOLD_M)
|
||||
if share is None:
|
||||
return False
|
||||
return share * 100.0 >= AC3_GATE_PCT
|
||||
|
||||
|
||||
def format_failure_message(
|
||||
distribution: HorizontalErrorDistribution,
|
||||
context: ReportContext,
|
||||
) -> str:
|
||||
"""Build the honest failure message for AZ-699 AC-3.
|
||||
|
||||
The message references the calibration acquisition method
|
||||
(factory-sheet for AZ-702 or placeholder otherwise) and the
|
||||
measured residual budget, so the operator can attribute a
|
||||
failure to its likely root cause (calibration uncertainty,
|
||||
drift, anchor scarcity) without re-reading the source.
|
||||
"""
|
||||
share = distribution.threshold_hit_share.get(AC3_GATE_THRESHOLD_M, 0.0)
|
||||
pct = share * 100.0
|
||||
return (
|
||||
f"AZ-699 AC-3: only {pct:.1f} % of {distribution.count} "
|
||||
f"emissions within {AC3_GATE_THRESHOLD_M:.0f} m of ground "
|
||||
f"truth; epic threshold is {AC3_GATE_PCT:.0f} %. "
|
||||
f"Residual: mean={distribution.horizontal_error_mean_m:.1f} m, "
|
||||
f"p50={distribution.horizontal_error_p50_m:.1f} m, "
|
||||
f"p95={distribution.horizontal_error_p95_m:.1f} m, "
|
||||
f"p99={distribution.horizontal_error_p99_m:.1f} m. "
|
||||
f"Calibration: {context.calibration_acquisition_method}. "
|
||||
"See _docs/06_metrics/real_flight_validation_"
|
||||
f"{context.run_date_utc}.md for the full distribution."
|
||||
)
|
||||
|
||||
|
||||
def render_report(
|
||||
distribution: HorizontalErrorDistribution,
|
||||
context: ReportContext,
|
||||
*,
|
||||
passed: bool,
|
||||
) -> str:
|
||||
"""Render the full Markdown report body.
|
||||
|
||||
The output layout (header + horizontal-error stats + threshold
|
||||
table + vertical-error stats + verdict) is the schema referenced
|
||||
by ``_docs/02_document/tests/blackbox-tests.md``.
|
||||
"""
|
||||
verdict = "PASS" if passed else "FAIL"
|
||||
horiz_rows = [
|
||||
("Mean", distribution.horizontal_error_mean_m),
|
||||
("p50", distribution.horizontal_error_p50_m),
|
||||
("p95", distribution.horizontal_error_p95_m),
|
||||
("p99", distribution.horizontal_error_p99_m),
|
||||
]
|
||||
threshold_rows = [
|
||||
(t, share)
|
||||
for t, share in sorted(distribution.threshold_hit_share.items())
|
||||
]
|
||||
|
||||
lines: list[str] = []
|
||||
lines.append(f"# Real-flight validation — {context.run_date_utc}")
|
||||
lines.append("")
|
||||
lines.append(f"**Verdict**: {verdict} (AC-3 gate: "
|
||||
f"≥ {AC3_GATE_PCT:.0f} % within "
|
||||
f"{AC3_GATE_THRESHOLD_M:.0f} m)")
|
||||
lines.append("")
|
||||
lines.append("## Run context")
|
||||
lines.append("")
|
||||
lines.append(f"- Tlog: `{context.tlog_path}`")
|
||||
lines.append(f"- Video: `{context.video_path}`")
|
||||
lines.append(
|
||||
f"- Calibration acquisition method: {context.calibration_acquisition_method}"
|
||||
)
|
||||
lines.append(f"- Clip duration: {context.clip_duration_s:.1f} s")
|
||||
lines.append(f"- Emissions consumed: {context.emissions_count}")
|
||||
lines.append(f"- Ground-truth pairings: {distribution.count}")
|
||||
lines.append("")
|
||||
lines.append("## Horizontal error (metres)")
|
||||
lines.append("")
|
||||
lines.append("| Statistic | Value |")
|
||||
lines.append("| --------- | ----- |")
|
||||
for name, value in horiz_rows:
|
||||
lines.append(f"| {name} | {value:.2f} |")
|
||||
lines.append("")
|
||||
lines.append("## Threshold-hit share")
|
||||
lines.append("")
|
||||
lines.append("| Threshold (m) | Hit share (%) |")
|
||||
lines.append("| ------------- | ------------- |")
|
||||
for threshold, share in threshold_rows:
|
||||
lines.append(f"| {threshold:g} | {share * 100.0:.1f} |")
|
||||
lines.append("")
|
||||
if distribution.vertical_count > 0:
|
||||
lines.append("## Vertical error (metres)")
|
||||
lines.append("")
|
||||
lines.append("| Statistic | Value |")
|
||||
lines.append("| --------- | ----- |")
|
||||
lines.append(
|
||||
f"| Mean | {distribution.vertical_error_mean_m:.2f} |"
|
||||
)
|
||||
lines.append(
|
||||
f"| p50 | {distribution.vertical_error_p50_m:.2f} |"
|
||||
)
|
||||
lines.append(
|
||||
f"| p95 | {distribution.vertical_error_p95_m:.2f} |"
|
||||
)
|
||||
lines.append(
|
||||
f"| Samples | {distribution.vertical_count} |"
|
||||
)
|
||||
lines.append("")
|
||||
else:
|
||||
lines.append("## Vertical error")
|
||||
lines.append("")
|
||||
lines.append("_No emissions carried a comparable altitude — vertical stats skipped._")
|
||||
lines.append("")
|
||||
return "\n".join(lines) + "\n"
|
||||
@@ -0,0 +1,282 @@
|
||||
"""WGS84 GPS comparison helpers (AZ-697 / E-DEMO-REPLAY).
|
||||
|
||||
Production helpers for comparing estimator GPS emissions against a
|
||||
ground-truth track. Promoted from the AZ-404 e2e test helpers so the
|
||||
AZ-699 (real-flight validation runner) and AZ-701 (HTTP replay API)
|
||||
code paths can consume them without dragging ``tests/`` into the
|
||||
import graph.
|
||||
|
||||
The numerical kernels are identical to the prior test-helpers location;
|
||||
the snapshot test in ``tests/unit/helpers/test_gps_compare.py`` pins
|
||||
that equivalence so a future change to either side breaks loudly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
__all__ = [
|
||||
"GroundTruthRow",
|
||||
"HorizontalErrorDistribution",
|
||||
"horizontal_error_distribution",
|
||||
"l2_horizontal_m",
|
||||
"match_percentage",
|
||||
"percentile_sorted",
|
||||
]
|
||||
|
||||
|
||||
# WGS84 mean Earth radius. Matches the value used by
|
||||
# `helpers/wgs_converter.py` (AZ-279) so this comparison stays
|
||||
# consistent with the production geodesy converter.
|
||||
_EARTH_RADIUS_M: float = 6_371_008.8
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GroundTruthRow:
|
||||
"""One row of GPS ground-truth (lat/lon/alt at a time)."""
|
||||
|
||||
t_s: float
|
||||
lat_deg: float
|
||||
lon_deg: float
|
||||
alt_m: float
|
||||
|
||||
|
||||
def l2_horizontal_m(
|
||||
lat1_deg: float, lon1_deg: float, lat2_deg: float, lon2_deg: float
|
||||
) -> float:
|
||||
"""WGS84-spherical great-circle distance in metres.
|
||||
|
||||
Haversine with the C5/AZ-279 mean Earth radius. The spherical
|
||||
approximation diverges from the WGS84 ellipsoid by < 0.5 % in the
|
||||
[-60°, 60°] latitude band — sufficient for the AZ-696 epic's
|
||||
≤ 100 m AC-3 threshold.
|
||||
"""
|
||||
phi1 = math.radians(lat1_deg)
|
||||
phi2 = math.radians(lat2_deg)
|
||||
dphi = phi2 - phi1
|
||||
dlam = math.radians(lon2_deg - lon1_deg)
|
||||
a = (
|
||||
math.sin(dphi / 2.0) ** 2
|
||||
+ math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2.0) ** 2
|
||||
)
|
||||
c = 2.0 * math.asin(min(1.0, math.sqrt(a)))
|
||||
return _EARTH_RADIUS_M * c
|
||||
|
||||
|
||||
def match_percentage(
|
||||
emissions: list[dict[str, Any]],
|
||||
ground_truth: list[GroundTruthRow],
|
||||
*,
|
||||
threshold_m: float,
|
||||
) -> float:
|
||||
"""Share of emissions within ``threshold_m`` of the closest GT row.
|
||||
|
||||
For each emitted ``EstimatorOutput`` JSONL record, finds the
|
||||
nearest-in-time ground-truth row, computes the horizontal L2
|
||||
distance, and counts it as a hit when ≤ ``threshold_m``. Returns the
|
||||
hit ratio in ``[0.0, 1.0]``.
|
||||
|
||||
Nearest-in-time is sufficient when GT cadence (5–10 Hz for tlog
|
||||
GPS) places the candidate row within ~100 ms of the emit timestamp,
|
||||
well below typical drone-replay error budgets.
|
||||
"""
|
||||
if not emissions:
|
||||
return 0.0
|
||||
if not ground_truth:
|
||||
raise AssertionError("ground_truth must be non-empty")
|
||||
gt_sorted = sorted(ground_truth, key=lambda r: r.t_s)
|
||||
gt_times = [r.t_s for r in gt_sorted]
|
||||
hits = 0
|
||||
for emit in emissions:
|
||||
emit_ts_ns = int(emit["emitted_at"])
|
||||
emit_t_s = emit_ts_ns / 1e9
|
||||
idx = _bisect_left(gt_times, emit_t_s)
|
||||
candidates = []
|
||||
if idx > 0:
|
||||
candidates.append(gt_sorted[idx - 1])
|
||||
if idx < len(gt_sorted):
|
||||
candidates.append(gt_sorted[idx])
|
||||
nearest = min(candidates, key=lambda r: abs(r.t_s - emit_t_s))
|
||||
emit_pos = emit["position_wgs84"]
|
||||
d = l2_horizontal_m(
|
||||
emit_pos["lat_deg"],
|
||||
emit_pos["lon_deg"],
|
||||
nearest.lat_deg,
|
||||
nearest.lon_deg,
|
||||
)
|
||||
if d <= threshold_m:
|
||||
hits += 1
|
||||
return hits / len(emissions)
|
||||
|
||||
|
||||
def _bisect_left(seq: list[float], target: float) -> int:
|
||||
"""Stdlib bisect_left, inlined to keep this module's import surface narrow."""
|
||||
lo, hi = 0, len(seq)
|
||||
while lo < hi:
|
||||
mid = (lo + hi) // 2
|
||||
if seq[mid] < target:
|
||||
lo = mid + 1
|
||||
else:
|
||||
hi = mid
|
||||
return lo
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AZ-699: accuracy distribution helpers for the real-flight runner
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
# Threshold-hit buckets for the AZ-696 epic's distance gates. Reported
|
||||
# in this exact order so report tables stay stable across runs.
|
||||
_DEFAULT_THRESHOLDS_M: tuple[float, ...] = (10.0, 25.0, 50.0, 100.0)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class HorizontalErrorDistribution:
|
||||
"""Aggregate horizontal-error stats for one estimator run.
|
||||
|
||||
All distances are great-circle metres (see :func:`l2_horizontal_m`).
|
||||
``threshold_hit_share`` keys map the buckets in
|
||||
:data:`_DEFAULT_THRESHOLDS_M`; values are share-in-``[0.0, 1.0]``.
|
||||
``count`` is the number of emissions that contributed (i.e. each
|
||||
emission has at least one ground-truth candidate to compare to).
|
||||
|
||||
The ``vertical_*`` fields are populated when emissions carry an
|
||||
``alt_m`` component AND the matched ground-truth row carries an
|
||||
altitude. Missing altitudes contribute nothing — both stats and
|
||||
counts skip those samples.
|
||||
|
||||
Style: this DTO is the source of truth for AZ-699's report writer
|
||||
and for downstream HTTP-API / dashboard consumers.
|
||||
"""
|
||||
|
||||
count: int
|
||||
horizontal_error_mean_m: float
|
||||
horizontal_error_p50_m: float
|
||||
horizontal_error_p95_m: float
|
||||
horizontal_error_p99_m: float
|
||||
threshold_hit_share: dict[float, float]
|
||||
vertical_count: int
|
||||
vertical_error_mean_m: float
|
||||
vertical_error_p50_m: float
|
||||
vertical_error_p95_m: float
|
||||
|
||||
|
||||
def percentile_sorted(values_sorted: list[float], pct: float) -> float:
|
||||
"""Linear-interpolation percentile over an already-sorted list.
|
||||
|
||||
``pct`` is in ``[0.0, 100.0]``. Matches numpy's default
|
||||
``linear`` interpolation rule so the report numbers are reproducible
|
||||
by anyone with numpy or scipy at hand. Empty input → ``0.0`` so
|
||||
downstream report formatters never trip on a divide-by-zero; the
|
||||
caller is expected to gate on ``count > 0`` for honest reporting.
|
||||
"""
|
||||
if not values_sorted:
|
||||
return 0.0
|
||||
if pct <= 0.0:
|
||||
return values_sorted[0]
|
||||
if pct >= 100.0:
|
||||
return values_sorted[-1]
|
||||
n = len(values_sorted)
|
||||
rank = (pct / 100.0) * (n - 1)
|
||||
lo = int(math.floor(rank))
|
||||
hi = int(math.ceil(rank))
|
||||
if lo == hi:
|
||||
return values_sorted[lo]
|
||||
frac = rank - lo
|
||||
return values_sorted[lo] * (1.0 - frac) + values_sorted[hi] * frac
|
||||
|
||||
|
||||
def horizontal_error_distribution(
|
||||
emissions: list[dict[str, Any]],
|
||||
ground_truth: list[GroundTruthRow],
|
||||
*,
|
||||
thresholds_m: tuple[float, ...] = _DEFAULT_THRESHOLDS_M,
|
||||
) -> HorizontalErrorDistribution:
|
||||
"""Walk all emissions once, return aggregated horizontal-error stats.
|
||||
|
||||
Pairs each emission with the nearest-in-time ground-truth row using
|
||||
the same bisect strategy as :func:`match_percentage`. Vertical error
|
||||
is computed when both the emission record and the matched row carry
|
||||
a numeric altitude.
|
||||
|
||||
Empty emissions produce a zero-count distribution (every numeric
|
||||
field is ``0.0``). The caller is responsible for surfacing
|
||||
"no emissions" as a separate fail condition before consuming the
|
||||
report — the helper itself never raises on empty input so report
|
||||
formatting stays defensive.
|
||||
"""
|
||||
if not ground_truth:
|
||||
raise AssertionError("ground_truth must be non-empty")
|
||||
if not emissions:
|
||||
return HorizontalErrorDistribution(
|
||||
count=0,
|
||||
horizontal_error_mean_m=0.0,
|
||||
horizontal_error_p50_m=0.0,
|
||||
horizontal_error_p95_m=0.0,
|
||||
horizontal_error_p99_m=0.0,
|
||||
threshold_hit_share={t: 0.0 for t in thresholds_m},
|
||||
vertical_count=0,
|
||||
vertical_error_mean_m=0.0,
|
||||
vertical_error_p50_m=0.0,
|
||||
vertical_error_p95_m=0.0,
|
||||
)
|
||||
|
||||
gt_sorted = sorted(ground_truth, key=lambda r: r.t_s)
|
||||
gt_times = [r.t_s for r in gt_sorted]
|
||||
|
||||
horiz_errors: list[float] = []
|
||||
vert_errors: list[float] = []
|
||||
for emit in emissions:
|
||||
emit_ts_ns = int(emit["emitted_at"])
|
||||
emit_t_s = emit_ts_ns / 1e9
|
||||
idx = _bisect_left(gt_times, emit_t_s)
|
||||
candidates = []
|
||||
if idx > 0:
|
||||
candidates.append(gt_sorted[idx - 1])
|
||||
if idx < len(gt_sorted):
|
||||
candidates.append(gt_sorted[idx])
|
||||
nearest = min(candidates, key=lambda r: abs(r.t_s - emit_t_s))
|
||||
emit_pos = emit["position_wgs84"]
|
||||
horiz_errors.append(
|
||||
l2_horizontal_m(
|
||||
emit_pos["lat_deg"],
|
||||
emit_pos["lon_deg"],
|
||||
nearest.lat_deg,
|
||||
nearest.lon_deg,
|
||||
)
|
||||
)
|
||||
emit_alt = emit_pos.get("alt_m")
|
||||
if emit_alt is not None and not math.isnan(nearest.alt_m):
|
||||
vert_errors.append(abs(float(emit_alt) - nearest.alt_m))
|
||||
|
||||
horiz_sorted = sorted(horiz_errors)
|
||||
horiz_mean = sum(horiz_errors) / len(horiz_errors)
|
||||
thresholds_share = {
|
||||
t: sum(1 for e in horiz_errors if e <= t) / len(horiz_errors)
|
||||
for t in thresholds_m
|
||||
}
|
||||
|
||||
if vert_errors:
|
||||
vert_sorted = sorted(vert_errors)
|
||||
vert_mean = sum(vert_errors) / len(vert_errors)
|
||||
vert_p50 = percentile_sorted(vert_sorted, 50.0)
|
||||
vert_p95 = percentile_sorted(vert_sorted, 95.0)
|
||||
else:
|
||||
vert_mean = 0.0
|
||||
vert_p50 = 0.0
|
||||
vert_p95 = 0.0
|
||||
|
||||
return HorizontalErrorDistribution(
|
||||
count=len(horiz_errors),
|
||||
horizontal_error_mean_m=horiz_mean,
|
||||
horizontal_error_p50_m=percentile_sorted(horiz_sorted, 50.0),
|
||||
horizontal_error_p95_m=percentile_sorted(horiz_sorted, 95.0),
|
||||
horizontal_error_p99_m=percentile_sorted(horiz_sorted, 99.0),
|
||||
threshold_hit_share=thresholds_share,
|
||||
vertical_count=len(vert_errors),
|
||||
vertical_error_mean_m=vert_mean,
|
||||
vertical_error_p50_m=vert_p50,
|
||||
vertical_error_p95_m=vert_p95,
|
||||
)
|
||||
@@ -0,0 +1,54 @@
|
||||
"""AZ-701 — `replay_api` HTTP service.
|
||||
|
||||
Operator-side HTTP wrapper around the offline replay pipeline:
|
||||
`gps-denied-replay` (AZ-402) + `gps-denied-render-map` (AZ-700).
|
||||
|
||||
Lives outside the airborne binary — see contract at
|
||||
``_docs/02_document/contracts/replay_api/replay_api_protocol.md``.
|
||||
|
||||
Public surface (re-exports below) is intentionally narrow:
|
||||
- ``create_app`` — FastAPI app factory (for uvicorn + tests).
|
||||
- ``JobRegistry`` + ``JobRecord`` + ``JobState`` — job-state machinery.
|
||||
- ``ReplayRunner`` Protocol — DI seam (handlers depend on the
|
||||
Protocol, not the concrete subprocess runner; unit tests inject
|
||||
a fake runner).
|
||||
- DTOs — ``JobSnapshot``, ``ReplayJobResult``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from gps_denied_onboard.replay_api.app import create_app
|
||||
from gps_denied_onboard.replay_api.errors import (
|
||||
JobNotCompleteError,
|
||||
JobNotFoundError,
|
||||
ReplayApiError,
|
||||
ReplayRunnerError,
|
||||
UnsupportedFileKindError,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.interface import (
|
||||
JobSnapshot,
|
||||
JobState,
|
||||
ReplayJobResult,
|
||||
ReplayRunner,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.jobs import (
|
||||
ConcurrencyLimitReachedError,
|
||||
JobRecord,
|
||||
JobRegistry,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ConcurrencyLimitReachedError",
|
||||
"JobNotCompleteError",
|
||||
"JobNotFoundError",
|
||||
"JobRecord",
|
||||
"JobRegistry",
|
||||
"JobSnapshot",
|
||||
"JobState",
|
||||
"ReplayApiError",
|
||||
"ReplayJobResult",
|
||||
"ReplayRunner",
|
||||
"ReplayRunnerError",
|
||||
"UnsupportedFileKindError",
|
||||
"create_app",
|
||||
]
|
||||
@@ -0,0 +1,677 @@
|
||||
"""AZ-701 — FastAPI app factory + production subprocess runner.
|
||||
|
||||
The factory takes the (runner, storage, registry) trio so unit
|
||||
tests can wire in fakes; ``main()`` (in
|
||||
``cli/replay_api_entrypoint.py``) constructs the production
|
||||
subprocess runner against the configured environment.
|
||||
|
||||
Note: this file deliberately does NOT use ``from __future__ import
|
||||
annotations``. FastAPI 0.119 + Pydantic 2.x resolve the route
|
||||
parameter annotations at decoration time, which requires the
|
||||
``Annotated[UploadFile, File()]`` form to be evaluable as real
|
||||
types — not as forward-ref strings. Other modules in the
|
||||
``replay_api`` package keep the future-annotations import; only
|
||||
this one drops it for the route signatures.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from collections.abc import AsyncIterator
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from gps_denied_onboard.replay_api.errors import (
|
||||
ConcurrencyLimitReachedError,
|
||||
JobNotCompleteError,
|
||||
JobNotFoundError,
|
||||
MultipartMissingFieldError,
|
||||
PayloadTooLargeError,
|
||||
ReplayApiError,
|
||||
ReplayRunnerError,
|
||||
UnauthorizedError,
|
||||
UnsupportedFileKindError,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.handlers import (
|
||||
MIN_TLOG_PROBE_BYTES,
|
||||
MIN_VIDEO_PROBE_BYTES,
|
||||
auth_required,
|
||||
expected_bearer_token,
|
||||
extract_bearer_token,
|
||||
validate_calibration_kind,
|
||||
validate_tlog_kind,
|
||||
validate_upload_size,
|
||||
validate_video_kind,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.interface import (
|
||||
JobSnapshot,
|
||||
JobState,
|
||||
ReplayInputs,
|
||||
ReplayJobResult,
|
||||
ReplayRunner,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.jobs import JobRegistry
|
||||
from gps_denied_onboard.replay_api.storage import StorageRoot
|
||||
|
||||
__all__ = ["SubprocessReplayRunner", "build_runner_from_env", "create_app"]
|
||||
|
||||
|
||||
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api")
|
||||
|
||||
|
||||
_PROBE_BYTES_MAX: int = max(MIN_TLOG_PROBE_BYTES, MIN_VIDEO_PROBE_BYTES, 64)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Production runner
|
||||
|
||||
|
||||
class SubprocessReplayRunner:
|
||||
"""Shells out to ``gps-denied-replay`` + ``gps-denied-render-map``.
|
||||
|
||||
Each ``run()`` call writes a minimal replay-mode ``config.yaml``
|
||||
into the per-job output directory, invokes the replay CLI with
|
||||
``--auto-trim``, computes the AZ-699 accuracy report from the
|
||||
JSONL + the AZ-697 ground-truth extraction, and renders the
|
||||
AZ-700 HTML map. The result is the trio of artefact paths the
|
||||
handler streams back to the client.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
replay_binary: str = "gps-denied-replay",
|
||||
render_binary: str = "gps-denied-render-map",
|
||||
subprocess_timeout_s: float = 900.0,
|
||||
) -> None:
|
||||
self._replay_binary = replay_binary
|
||||
self._render_binary = render_binary
|
||||
self._timeout = subprocess_timeout_s
|
||||
|
||||
def run(
|
||||
self, inputs: ReplayInputs, *, output_dir: Path
|
||||
) -> ReplayJobResult:
|
||||
config_path = output_dir / "config.yaml"
|
||||
config_path.write_text(
|
||||
"mode: replay\n"
|
||||
"replay:\n"
|
||||
f" pace: {inputs.pace}\n"
|
||||
" target_fc_dialect: ardupilot_plane\n"
|
||||
)
|
||||
|
||||
signing_key_path = output_dir / "signing_key.bin"
|
||||
signing_key_path.write_bytes(b"\x00" * 32)
|
||||
|
||||
emissions_path = output_dir / "emissions.jsonl"
|
||||
argv = [
|
||||
self._replay_binary,
|
||||
"--video",
|
||||
str(inputs.video_path),
|
||||
"--tlog",
|
||||
str(inputs.tlog_path),
|
||||
"--output",
|
||||
str(emissions_path),
|
||||
"--camera-calibration",
|
||||
str(inputs.calibration_path),
|
||||
"--config",
|
||||
str(config_path),
|
||||
"--mavlink-signing-key",
|
||||
str(signing_key_path),
|
||||
"--pace",
|
||||
inputs.pace,
|
||||
]
|
||||
if inputs.auto_trim:
|
||||
argv.append("--auto-trim")
|
||||
|
||||
replay_completed = subprocess.run(
|
||||
argv,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
)
|
||||
if replay_completed.returncode != 0:
|
||||
stderr_tail = (replay_completed.stderr or "")[-8192:]
|
||||
raise ReplayRunnerError(
|
||||
f"{self._replay_binary} exited "
|
||||
f"{replay_completed.returncode}",
|
||||
details={"stderr_tail": stderr_tail},
|
||||
)
|
||||
|
||||
report_path = self._maybe_render_report(
|
||||
inputs, emissions_path, output_dir
|
||||
)
|
||||
map_path = self._maybe_render_map(
|
||||
inputs, emissions_path, output_dir, report_path
|
||||
)
|
||||
|
||||
return ReplayJobResult(
|
||||
emissions_jsonl_path=emissions_path,
|
||||
accuracy_report_md_path=report_path,
|
||||
map_html_path=map_path,
|
||||
)
|
||||
|
||||
def _maybe_render_report(
|
||||
self,
|
||||
inputs: ReplayInputs,
|
||||
emissions_path: Path,
|
||||
output_dir: Path,
|
||||
) -> Path | None:
|
||||
"""Compute the AZ-699 accuracy report; tolerate missing GT."""
|
||||
try:
|
||||
import json
|
||||
|
||||
from gps_denied_onboard.helpers.accuracy_report import (
|
||||
AC3_GATE_THRESHOLD_M,
|
||||
ReportContext,
|
||||
render_report,
|
||||
verdict_passes_ac3,
|
||||
)
|
||||
from gps_denied_onboard.helpers.gps_compare import (
|
||||
GroundTruthRow,
|
||||
horizontal_error_distribution,
|
||||
)
|
||||
from gps_denied_onboard.replay_input import (
|
||||
load_tlog_ground_truth,
|
||||
)
|
||||
except Exception as exc:
|
||||
_LOGGER.warning(
|
||||
"skipping accuracy report — imports failed: %r", exc
|
||||
)
|
||||
return None
|
||||
|
||||
emissions: list[dict[str, Any]] = []
|
||||
for line in emissions_path.read_text().splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
emissions.append(json.loads(line))
|
||||
if not emissions:
|
||||
return None
|
||||
|
||||
gt_series = load_tlog_ground_truth(inputs.tlog_path).records
|
||||
if not gt_series:
|
||||
return None
|
||||
|
||||
ground_truth = [
|
||||
GroundTruthRow(
|
||||
t_s=fix.ts_ns / 1e9,
|
||||
lat_deg=fix.lat_deg,
|
||||
lon_deg=fix.lon_deg,
|
||||
alt_m=fix.alt_m,
|
||||
)
|
||||
for fix in gt_series
|
||||
]
|
||||
distribution = horizontal_error_distribution(emissions, ground_truth)
|
||||
if distribution.count == 0:
|
||||
return None
|
||||
|
||||
try:
|
||||
calibration_method = _calibration_acquisition_method(
|
||||
inputs.calibration_path
|
||||
)
|
||||
except (OSError, ValueError):
|
||||
calibration_method = "unknown"
|
||||
|
||||
clip_duration_s = (
|
||||
ground_truth[-1].t_s - ground_truth[0].t_s
|
||||
if len(ground_truth) > 1
|
||||
else 0.0
|
||||
)
|
||||
context = ReportContext(
|
||||
run_date_utc=datetime.utcnow().date().isoformat(),
|
||||
tlog_path=inputs.tlog_path,
|
||||
video_path=inputs.video_path,
|
||||
calibration_acquisition_method=calibration_method,
|
||||
clip_duration_s=clip_duration_s,
|
||||
emissions_count=len(emissions),
|
||||
)
|
||||
passed = verdict_passes_ac3(distribution)
|
||||
# Touch the threshold constant so a future rename surfaces here too.
|
||||
assert AC3_GATE_THRESHOLD_M > 0.0
|
||||
report_text = render_report(distribution, context, passed=passed)
|
||||
report_path = output_dir / "accuracy_report.md"
|
||||
report_path.write_text(report_text)
|
||||
return report_path
|
||||
|
||||
def _maybe_render_map(
|
||||
self,
|
||||
inputs: ReplayInputs,
|
||||
emissions_path: Path,
|
||||
output_dir: Path,
|
||||
report_path: Path | None,
|
||||
) -> Path | None:
|
||||
if not shutil.which(self._render_binary):
|
||||
venv_bin = Path(sys.executable).parent / self._render_binary
|
||||
if not venv_bin.exists():
|
||||
_LOGGER.warning(
|
||||
"%s not on PATH — skipping map render",
|
||||
self._render_binary,
|
||||
)
|
||||
return None
|
||||
render_bin = str(venv_bin)
|
||||
else:
|
||||
render_bin = self._render_binary
|
||||
map_path = output_dir / "map.html"
|
||||
argv = [
|
||||
render_bin,
|
||||
"--estimated",
|
||||
str(emissions_path),
|
||||
"--truth",
|
||||
str(inputs.tlog_path),
|
||||
"--output",
|
||||
str(map_path),
|
||||
]
|
||||
if report_path is not None:
|
||||
argv.extend(["--summary", str(report_path)])
|
||||
completed = subprocess.run(
|
||||
argv, capture_output=True, text=True, timeout=120
|
||||
)
|
||||
if completed.returncode != 0:
|
||||
_LOGGER.warning(
|
||||
"%s exited %s — map render skipped (stderr_tail=%r)",
|
||||
self._render_binary,
|
||||
completed.returncode,
|
||||
completed.stderr[-2048:],
|
||||
)
|
||||
return None
|
||||
return map_path
|
||||
|
||||
|
||||
def _calibration_acquisition_method(calibration_path: Path) -> str:
|
||||
import json
|
||||
|
||||
data = json.loads(calibration_path.read_text())
|
||||
method = data.get("acquisition_method")
|
||||
if isinstance(method, str) and method:
|
||||
return method
|
||||
return "unknown"
|
||||
|
||||
|
||||
def build_runner_from_env() -> SubprocessReplayRunner:
|
||||
return SubprocessReplayRunner(
|
||||
replay_binary=os.environ.get(
|
||||
"REPLAY_API_REPLAY_BINARY", "gps-denied-replay"
|
||||
),
|
||||
render_binary=os.environ.get(
|
||||
"REPLAY_API_RENDER_BINARY", "gps-denied-render-map"
|
||||
),
|
||||
subprocess_timeout_s=float(
|
||||
os.environ.get("REPLAY_API_SUBPROCESS_TIMEOUT_S", "900")
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# FastAPI app factory
|
||||
|
||||
|
||||
def create_app(
|
||||
*,
|
||||
runner: ReplayRunner,
|
||||
storage: StorageRoot,
|
||||
registry: JobRegistry | None = None,
|
||||
max_upload_bytes: int = 2 * 1024 * 1024 * 1024,
|
||||
sync_max_bytes: int = 200 * 1024 * 1024,
|
||||
) -> Any:
|
||||
"""Build the FastAPI app.
|
||||
|
||||
Args:
|
||||
runner: ``ReplayRunner`` injected into the registry.
|
||||
storage: Per-job storage manager.
|
||||
registry: Pre-built ``JobRegistry`` (the unit tests inject a
|
||||
tuned one; production wiring builds one from env).
|
||||
max_upload_bytes: hard limit per multipart upload (413 above).
|
||||
sync_max_bytes: video size at which the API switches to async.
|
||||
"""
|
||||
try:
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import (
|
||||
FastAPI,
|
||||
File,
|
||||
Form,
|
||||
Header,
|
||||
HTTPException,
|
||||
Request,
|
||||
Response,
|
||||
UploadFile,
|
||||
)
|
||||
from fastapi.responses import FileResponse, JSONResponse
|
||||
except ImportError as exc:
|
||||
raise SystemExit(
|
||||
"FastAPI is not installed. Install with "
|
||||
"`pip install gps-denied-onboard[operator-tools]`."
|
||||
) from exc
|
||||
|
||||
if registry is None:
|
||||
registry = JobRegistry(
|
||||
runner=runner,
|
||||
storage=storage,
|
||||
max_concurrent=int(
|
||||
os.environ.get("REPLAY_API_MAX_CONCURRENT_JOBS", "1")
|
||||
),
|
||||
max_queued=int(
|
||||
os.environ.get("REPLAY_API_MAX_QUEUED_JOBS", "8")
|
||||
),
|
||||
)
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: Any) -> AsyncIterator[None]:
|
||||
if not auth_required():
|
||||
_LOGGER.warning(
|
||||
"REPLAY_API_AUTH_REQUIRED=false — bearer auth is DISABLED. "
|
||||
"Do not run this in any environment exposed to the internet."
|
||||
)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
registry.shutdown(wait=False)
|
||||
|
||||
app = FastAPI(
|
||||
title="gps-denied-onboard replay API",
|
||||
version="1.0.0",
|
||||
description=(
|
||||
"HTTP wrapper around the offline `gps-denied-replay` "
|
||||
"pipeline. Upload (tlog + video [+ calibration]); "
|
||||
"receive GPS fixes + an accuracy report + an HTML map."
|
||||
),
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
@app.exception_handler(ReplayApiError)
|
||||
async def _on_replay_api_error(
|
||||
_request: Request, exc: ReplayApiError
|
||||
) -> JSONResponse:
|
||||
return JSONResponse(
|
||||
status_code=exc.status_code,
|
||||
content={
|
||||
"error_code": exc.error_code,
|
||||
"message": exc.message,
|
||||
"details": exc.details,
|
||||
},
|
||||
)
|
||||
|
||||
def _check_auth(authorization: str | None) -> None:
|
||||
if not auth_required():
|
||||
return
|
||||
expected = expected_bearer_token()
|
||||
actual = extract_bearer_token(authorization)
|
||||
if expected is None or actual != expected:
|
||||
raise UnauthorizedError("bearer token does not match")
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz() -> dict[str, str]:
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.get("/readyz")
|
||||
async def readyz() -> Response:
|
||||
binary = os.environ.get(
|
||||
"REPLAY_API_REPLAY_BINARY", "gps-denied-replay"
|
||||
)
|
||||
if shutil.which(binary) is None and not (
|
||||
Path(sys.executable).parent / binary
|
||||
).exists():
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={
|
||||
"status": "not_ready",
|
||||
"reason": f"{binary} not on PATH",
|
||||
},
|
||||
)
|
||||
if not os.access(storage.root, os.W_OK):
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={
|
||||
"status": "not_ready",
|
||||
"reason": f"{storage.root} is not writable",
|
||||
},
|
||||
)
|
||||
return JSONResponse(content={"status": "ok"})
|
||||
|
||||
@app.post("/replay")
|
||||
async def post_replay(
|
||||
tlog: Annotated[UploadFile, File()],
|
||||
video: Annotated[UploadFile, File()],
|
||||
calibration: Annotated[UploadFile | None, File()] = None,
|
||||
pace: Annotated[str, Form()] = "asap",
|
||||
auto_trim: Annotated[bool, Form()] = True,
|
||||
authorization: Annotated[str | None, Header()] = None,
|
||||
) -> Response:
|
||||
_check_auth(authorization)
|
||||
|
||||
tlog_bytes = await tlog.read()
|
||||
validate_upload_size(len(tlog_bytes), limit=max_upload_bytes)
|
||||
validate_tlog_kind(tlog_bytes[:_PROBE_BYTES_MAX])
|
||||
|
||||
video_bytes = await video.read()
|
||||
validate_upload_size(len(video_bytes), limit=max_upload_bytes)
|
||||
validate_video_kind(video_bytes[:_PROBE_BYTES_MAX])
|
||||
|
||||
calibration_bytes: bytes | None = None
|
||||
if calibration is not None:
|
||||
calibration_bytes = await calibration.read()
|
||||
validate_upload_size(
|
||||
len(calibration_bytes), limit=max_upload_bytes
|
||||
)
|
||||
validate_calibration_kind(calibration_bytes[:_PROBE_BYTES_MAX])
|
||||
|
||||
# Allocate per-job storage and write the uploads.
|
||||
job_id = _new_job_id()
|
||||
job_storage = storage.allocate_job(job_id)
|
||||
job_storage.tlog_path.write_bytes(tlog_bytes)
|
||||
job_storage.video_path.write_bytes(video_bytes)
|
||||
if calibration_bytes is not None:
|
||||
job_storage.calibration_path.write_bytes(calibration_bytes)
|
||||
elif _default_calibration_path() is not None:
|
||||
shutil.copyfile(
|
||||
_default_calibration_path(), # type: ignore[arg-type]
|
||||
job_storage.calibration_path,
|
||||
)
|
||||
else:
|
||||
raise MultipartMissingFieldError(
|
||||
"calibration field is required (no default calibration "
|
||||
"bundled with this build of replay_api)"
|
||||
)
|
||||
|
||||
inputs = ReplayInputs(
|
||||
tlog_path=job_storage.tlog_path,
|
||||
video_path=job_storage.video_path,
|
||||
calibration_path=job_storage.calibration_path,
|
||||
pace=pace,
|
||||
auto_trim=auto_trim,
|
||||
)
|
||||
|
||||
# Submit under the pre-allocated job_id so the storage
|
||||
# directory (already populated with the uploads above) and
|
||||
# the API-visible job id match.
|
||||
try:
|
||||
snapshot = registry.submit(
|
||||
inputs,
|
||||
output_dir=job_storage.output_dir,
|
||||
job_id=job_id,
|
||||
)
|
||||
except Exception:
|
||||
storage.release_job(job_id)
|
||||
raise
|
||||
|
||||
sync_mode = len(video_bytes) <= sync_max_bytes
|
||||
if not sync_mode:
|
||||
return JSONResponse(
|
||||
status_code=202,
|
||||
headers={"Location": f"/jobs/{snapshot.job_id}"},
|
||||
content=_snapshot_to_dict(snapshot, sync=False),
|
||||
)
|
||||
# Wait for terminal state in sync mode.
|
||||
snapshot = _await_terminal(registry, snapshot.job_id)
|
||||
if snapshot.state == JobState.FAILED:
|
||||
raise ReplayRunnerError(
|
||||
snapshot.error or "replay runner failed without a message"
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=200,
|
||||
content=_snapshot_to_dict(snapshot, sync=True),
|
||||
)
|
||||
|
||||
@app.get("/jobs/{job_id}")
|
||||
async def get_job(
|
||||
job_id: str,
|
||||
authorization: Annotated[str | None, Header()] = None,
|
||||
) -> dict[str, Any]:
|
||||
_check_auth(authorization)
|
||||
try:
|
||||
snapshot = registry.get(job_id)
|
||||
except JobNotFoundError:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={
|
||||
"error_code": "job_not_found",
|
||||
"message": f"job {job_id} not found",
|
||||
},
|
||||
)
|
||||
return _snapshot_to_dict(snapshot, sync=False)
|
||||
|
||||
def _require_done(job_id: str) -> JobSnapshot:
|
||||
snapshot = registry.get(job_id)
|
||||
if snapshot.state != JobState.DONE:
|
||||
raise JobNotCompleteError(
|
||||
f"job {job_id} state is {snapshot.state.value}; "
|
||||
"result is only available when state=done"
|
||||
)
|
||||
return snapshot
|
||||
|
||||
@app.get("/jobs/{job_id}/result")
|
||||
async def get_result(
|
||||
job_id: str,
|
||||
authorization: Annotated[str | None, Header()] = None,
|
||||
) -> Response:
|
||||
_check_auth(authorization)
|
||||
snapshot = _require_done(job_id)
|
||||
if snapshot.result is None:
|
||||
raise JobNotCompleteError("job done but no result attached")
|
||||
return FileResponse(
|
||||
path=snapshot.result.emissions_jsonl_path,
|
||||
media_type="application/x-ndjson",
|
||||
filename="emissions.jsonl",
|
||||
)
|
||||
|
||||
@app.get("/jobs/{job_id}/map")
|
||||
async def get_map(
|
||||
job_id: str,
|
||||
authorization: Annotated[str | None, Header()] = None,
|
||||
) -> Response:
|
||||
_check_auth(authorization)
|
||||
snapshot = _require_done(job_id)
|
||||
if snapshot.result is None or snapshot.result.map_html_path is None:
|
||||
raise JobNotCompleteError("map artefact unavailable")
|
||||
return FileResponse(
|
||||
path=snapshot.result.map_html_path,
|
||||
media_type="text/html",
|
||||
filename="map.html",
|
||||
)
|
||||
|
||||
@app.get("/jobs/{job_id}/report")
|
||||
async def get_report(
|
||||
job_id: str,
|
||||
authorization: Annotated[str | None, Header()] = None,
|
||||
) -> Response:
|
||||
_check_auth(authorization)
|
||||
snapshot = _require_done(job_id)
|
||||
if (
|
||||
snapshot.result is None
|
||||
or snapshot.result.accuracy_report_md_path is None
|
||||
):
|
||||
raise JobNotCompleteError("report artefact unavailable")
|
||||
return FileResponse(
|
||||
path=snapshot.result.accuracy_report_md_path,
|
||||
media_type="text/markdown",
|
||||
filename="accuracy_report.md",
|
||||
)
|
||||
|
||||
# Stash so unit tests can introspect.
|
||||
app.state.registry = registry
|
||||
app.state.storage = storage
|
||||
# Silence unused-import lint on dependency types.
|
||||
_ = (Form, File)
|
||||
# Reference the unused-but-kept errors so a future renamed
|
||||
# member surfaces here loudly.
|
||||
_ = (
|
||||
ConcurrencyLimitReachedError,
|
||||
PayloadTooLargeError,
|
||||
UnsupportedFileKindError,
|
||||
MultipartMissingFieldError,
|
||||
)
|
||||
return app
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Helpers
|
||||
|
||||
|
||||
def _new_job_id() -> str:
|
||||
import uuid
|
||||
|
||||
return uuid.uuid4().hex
|
||||
|
||||
|
||||
def _default_calibration_path() -> Path | None:
|
||||
raw = os.environ.get("REPLAY_API_DEFAULT_CALIBRATION")
|
||||
if not raw:
|
||||
return None
|
||||
path = Path(raw)
|
||||
if path.is_file():
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def _await_terminal(registry: JobRegistry, job_id: str) -> JobSnapshot:
|
||||
"""Block until ``job_id`` reaches a terminal state.
|
||||
|
||||
Used in sync mode. The registry runs jobs in its own thread pool;
|
||||
we poll with a short backoff. The handler endpoint is async, so
|
||||
blocking here parks the FastAPI worker — that's acceptable for
|
||||
sync mode by design (sync mode is the small-file path).
|
||||
"""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + 1800.0 # 30 min safety bound
|
||||
while time.monotonic() < deadline:
|
||||
snap = registry.get(job_id)
|
||||
if snap.state in (JobState.DONE, JobState.FAILED):
|
||||
return snap
|
||||
time.sleep(0.05)
|
||||
raise ReplayRunnerError("sync replay exceeded 30 min safety bound")
|
||||
|
||||
|
||||
def _snapshot_to_dict(snapshot: JobSnapshot, *, sync: bool) -> dict[str, Any]:
|
||||
payload: dict[str, Any] = {
|
||||
"job_id": snapshot.job_id,
|
||||
"state": snapshot.state.value,
|
||||
"submitted_at_utc": snapshot.submitted_at_utc.isoformat(),
|
||||
"started_at_utc": (
|
||||
snapshot.started_at_utc.isoformat()
|
||||
if snapshot.started_at_utc
|
||||
else None
|
||||
),
|
||||
"finished_at_utc": (
|
||||
snapshot.finished_at_utc.isoformat()
|
||||
if snapshot.finished_at_utc
|
||||
else None
|
||||
),
|
||||
"error": snapshot.error,
|
||||
"status_url": f"/jobs/{snapshot.job_id}",
|
||||
"sync": sync,
|
||||
}
|
||||
if snapshot.result is not None:
|
||||
payload["emissions_jsonl_url"] = (
|
||||
f"/jobs/{snapshot.job_id}/result"
|
||||
)
|
||||
if snapshot.result.accuracy_report_md_path is not None:
|
||||
payload["accuracy_report_md_url"] = (
|
||||
f"/jobs/{snapshot.job_id}/report"
|
||||
)
|
||||
if snapshot.result.map_html_path is not None:
|
||||
payload["map_html_url"] = f"/jobs/{snapshot.job_id}/map"
|
||||
return payload
|
||||
@@ -0,0 +1,86 @@
|
||||
"""AZ-701 — typed HTTP error families for the replay_api service.
|
||||
|
||||
Every error has a stable ``error_code`` (string) the contract pins
|
||||
in ``_docs/02_document/contracts/replay_api/replay_api_protocol.md``.
|
||||
The handler layer translates these into JSON responses; the
|
||||
business layer raises them without knowing about HTTP.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
__all__ = [
|
||||
"ConcurrencyLimitReachedError",
|
||||
"JobNotCompleteError",
|
||||
"JobNotFoundError",
|
||||
"MultipartMissingFieldError",
|
||||
"PayloadTooLargeError",
|
||||
"ReplayApiError",
|
||||
"ReplayRunnerError",
|
||||
"UnauthorizedError",
|
||||
"UnsupportedFileKindError",
|
||||
]
|
||||
|
||||
|
||||
class ReplayApiError(Exception):
|
||||
"""Base for every typed replay_api error.
|
||||
|
||||
Subclasses pin a stable ``error_code`` and HTTP ``status_code``;
|
||||
the handler layer reads both to build a JSON response.
|
||||
"""
|
||||
|
||||
error_code: str = "replay_api_error"
|
||||
status_code: int = 500
|
||||
|
||||
def __init__(self, message: str, details: dict[str, Any] | None = None) -> None:
|
||||
super().__init__(message)
|
||||
self.message = message
|
||||
self.details = details or {}
|
||||
|
||||
|
||||
class UnsupportedFileKindError(ReplayApiError):
|
||||
error_code = "unsupported_file_kind"
|
||||
status_code = 400
|
||||
|
||||
|
||||
class MultipartMissingFieldError(ReplayApiError):
|
||||
error_code = "multipart_missing_field"
|
||||
status_code = 400
|
||||
|
||||
|
||||
class UnauthorizedError(ReplayApiError):
|
||||
error_code = "unauthorized"
|
||||
status_code = 401
|
||||
|
||||
|
||||
class JobNotFoundError(ReplayApiError):
|
||||
error_code = "job_not_found"
|
||||
status_code = 404
|
||||
|
||||
|
||||
class JobNotCompleteError(ReplayApiError):
|
||||
error_code = "job_not_complete"
|
||||
status_code = 409
|
||||
|
||||
|
||||
class PayloadTooLargeError(ReplayApiError):
|
||||
error_code = "payload_too_large"
|
||||
status_code = 413
|
||||
|
||||
|
||||
class ConcurrencyLimitReachedError(ReplayApiError):
|
||||
"""Raised when the queue is full.
|
||||
|
||||
Note: per-spec, hitting just the running-job concurrency limit
|
||||
does NOT raise this — those jobs queue normally. The 429 case is
|
||||
"queue itself is full" only.
|
||||
"""
|
||||
|
||||
error_code = "concurrency_limit_reached"
|
||||
status_code = 429
|
||||
|
||||
|
||||
class ReplayRunnerError(ReplayApiError):
|
||||
error_code = "replay_runner_failed"
|
||||
status_code = 500
|
||||
@@ -0,0 +1,152 @@
|
||||
"""AZ-701 — multipart upload + magic-byte validation + auth helpers.
|
||||
|
||||
The functions here are deliberately framework-light: they take raw
|
||||
bytes / streams and return validated artefacts. ``app.py`` wires
|
||||
them into FastAPI dependencies; unit tests call them directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from gps_denied_onboard.replay_api.errors import (
|
||||
MultipartMissingFieldError,
|
||||
PayloadTooLargeError,
|
||||
UnauthorizedError,
|
||||
UnsupportedFileKindError,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"MIN_TLOG_PROBE_BYTES",
|
||||
"MIN_VIDEO_PROBE_BYTES",
|
||||
"auth_required",
|
||||
"expected_bearer_token",
|
||||
"extract_bearer_token",
|
||||
"validate_calibration_kind",
|
||||
"validate_tlog_kind",
|
||||
"validate_upload_size",
|
||||
"validate_video_kind",
|
||||
]
|
||||
|
||||
|
||||
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api.handlers")
|
||||
|
||||
|
||||
# MAVLink magic bytes — pymavlink uses 0xFD for v2.0 and 0xFE for
|
||||
# v1.0. The Derkachi tlog is v2.0; we accept both because some
|
||||
# operators ship v1.0 captures from older autopilots.
|
||||
_MAVLINK_MAGIC_V2: int = 0xFD
|
||||
_MAVLINK_MAGIC_V1: int = 0xFE
|
||||
MIN_TLOG_PROBE_BYTES: int = 9
|
||||
|
||||
|
||||
# mp4 boxes start with a 4-byte size, then 4 ASCII bytes for the
|
||||
# box type. The first box in every valid mp4 is ``ftyp`` (per
|
||||
# ISO/IEC 14496-12). ``"ftyp"`` lives at offset 4.
|
||||
_MP4_FTYP_MARKER: bytes = b"ftyp"
|
||||
MIN_VIDEO_PROBE_BYTES: int = 12
|
||||
|
||||
|
||||
def validate_tlog_kind(probe_bytes: bytes) -> None:
|
||||
"""Reject anything that doesn't open with a MAVLink magic byte.
|
||||
|
||||
pymavlink's tlog format prefixes each record with an 8-byte
|
||||
big-endian microsecond timestamp followed by the raw MAVLink
|
||||
frame, which always starts with the magic byte. So byte 8 of
|
||||
any well-formed tlog is the MAVLink magic.
|
||||
"""
|
||||
if len(probe_bytes) < MIN_TLOG_PROBE_BYTES:
|
||||
raise UnsupportedFileKindError(
|
||||
f"tlog probe too small (need ≥ {MIN_TLOG_PROBE_BYTES} bytes "
|
||||
f"to validate magic; got {len(probe_bytes)})"
|
||||
)
|
||||
magic = probe_bytes[8]
|
||||
if magic not in (_MAVLINK_MAGIC_V2, _MAVLINK_MAGIC_V1):
|
||||
raise UnsupportedFileKindError(
|
||||
f"tlog magic byte 0x{magic:02X} at offset 8 is not "
|
||||
f"MAVLink (expected 0x{_MAVLINK_MAGIC_V2:02X} or "
|
||||
f"0x{_MAVLINK_MAGIC_V1:02X})"
|
||||
)
|
||||
|
||||
|
||||
def validate_video_kind(probe_bytes: bytes) -> None:
|
||||
"""Reject anything that doesn't have an ``ftyp`` box at offset 4.
|
||||
|
||||
The size prefix at bytes 0-3 varies; the marker is the
|
||||
discriminator. This catches the common "operator renamed
|
||||
`.zip` to `.mp4`" attack — the AC-9 case.
|
||||
"""
|
||||
if len(probe_bytes) < MIN_VIDEO_PROBE_BYTES:
|
||||
raise UnsupportedFileKindError(
|
||||
f"video probe too small (need ≥ {MIN_VIDEO_PROBE_BYTES} "
|
||||
f"bytes to validate ftyp; got {len(probe_bytes)})"
|
||||
)
|
||||
marker = probe_bytes[4:8]
|
||||
if marker != _MP4_FTYP_MARKER:
|
||||
raise UnsupportedFileKindError(
|
||||
"video does not begin with an mp4 'ftyp' box at offset 4 "
|
||||
f"(saw {marker!r})"
|
||||
)
|
||||
|
||||
|
||||
def validate_calibration_kind(probe_bytes: bytes) -> None:
|
||||
"""Light JSON-shape check; the renderer is the strict validator."""
|
||||
if not probe_bytes:
|
||||
raise UnsupportedFileKindError("calibration upload is empty")
|
||||
stripped = probe_bytes.lstrip()
|
||||
if not stripped.startswith(b"{"):
|
||||
raise UnsupportedFileKindError(
|
||||
"calibration must be a JSON object (first non-whitespace "
|
||||
"byte should be '{')"
|
||||
)
|
||||
|
||||
|
||||
def validate_upload_size(num_bytes: int, *, limit: int) -> None:
|
||||
if num_bytes > limit:
|
||||
raise PayloadTooLargeError(
|
||||
f"upload size {num_bytes} exceeds REPLAY_API_MAX_UPLOAD_BYTES "
|
||||
f"({limit})"
|
||||
)
|
||||
|
||||
|
||||
def expected_bearer_token() -> str | None:
|
||||
"""Read the configured bearer token at request time.
|
||||
|
||||
Returning ``None`` means auth is disabled (
|
||||
``REPLAY_API_AUTH_REQUIRED=false``); the caller is expected to
|
||||
have logged the WARN once at service start.
|
||||
"""
|
||||
if not auth_required():
|
||||
return None
|
||||
token = os.environ.get("REPLAY_API_BEARER_TOKEN")
|
||||
if not token:
|
||||
raise UnauthorizedError(
|
||||
"REPLAY_API_BEARER_TOKEN is not configured but auth is required"
|
||||
)
|
||||
return token
|
||||
|
||||
|
||||
def auth_required() -> bool:
|
||||
value = os.environ.get("REPLAY_API_AUTH_REQUIRED", "true").lower()
|
||||
return value not in {"0", "false", "no", "off"}
|
||||
|
||||
|
||||
def extract_bearer_token(header_value: str | None) -> str:
|
||||
"""Parse ``Authorization: Bearer <token>`` strictly."""
|
||||
if not header_value:
|
||||
raise UnauthorizedError("missing Authorization header")
|
||||
parts = header_value.split(" ", 1)
|
||||
if len(parts) != 2 or parts[0].strip().lower() != "bearer":
|
||||
raise UnauthorizedError(
|
||||
"Authorization header must be 'Bearer <token>'"
|
||||
)
|
||||
token = parts[1].strip()
|
||||
if not token:
|
||||
raise UnauthorizedError("Authorization bearer token is empty")
|
||||
return token
|
||||
|
||||
|
||||
def _ensure_field(name: str, value: object) -> None:
|
||||
if value is None:
|
||||
raise MultipartMissingFieldError(f"missing multipart field: {name}")
|
||||
@@ -0,0 +1,99 @@
|
||||
"""AZ-701 — DTOs + ``ReplayRunner`` Protocol for the replay_api service.
|
||||
|
||||
The Protocol is the dependency-injection seam: ``handlers.py``
|
||||
depends on the Protocol, not the concrete ``SubprocessReplayRunner``.
|
||||
Unit tests inject a deterministic fake; the production wiring in
|
||||
``app.py`` constructs the subprocess runner.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
__all__ = [
|
||||
"JobSnapshot",
|
||||
"JobState",
|
||||
"ReplayInputs",
|
||||
"ReplayJobResult",
|
||||
"ReplayRunner",
|
||||
]
|
||||
|
||||
|
||||
class JobState(str, Enum):
|
||||
"""Job lifecycle.
|
||||
|
||||
The state machine is monotonic: ``queued → running → done`` (or
|
||||
``failed`` from any non-terminal state). No back-transitions.
|
||||
"""
|
||||
|
||||
QUEUED = "queued"
|
||||
RUNNING = "running"
|
||||
DONE = "done"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ReplayInputs:
|
||||
"""The (tlog + video + calibration) bundle a runner consumes.
|
||||
|
||||
Storage paths are absolute. The handler builds these from a
|
||||
per-job temp directory (see ``storage.py``).
|
||||
|
||||
``pace`` and ``auto_trim`` mirror the ``gps-denied-replay`` CLI
|
||||
flags; the runner is responsible for translating them into argv.
|
||||
"""
|
||||
|
||||
tlog_path: Path
|
||||
video_path: Path
|
||||
calibration_path: Path
|
||||
pace: str = "asap"
|
||||
auto_trim: bool = True
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ReplayJobResult:
|
||||
"""The artefacts a finished job exposes.
|
||||
|
||||
Each path is absolute and lives under the per-job storage dir.
|
||||
The handler layer maps these to URLs in the JSON response.
|
||||
"""
|
||||
|
||||
emissions_jsonl_path: Path
|
||||
accuracy_report_md_path: Path | None
|
||||
map_html_path: Path | None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class JobSnapshot:
|
||||
"""Serialisable snapshot of one job.
|
||||
|
||||
Mutable; the registry mutates the snapshot in-place under its
|
||||
lock and yields copies to API readers.
|
||||
"""
|
||||
|
||||
job_id: str
|
||||
state: JobState
|
||||
submitted_at_utc: datetime
|
||||
started_at_utc: datetime | None = None
|
||||
finished_at_utc: datetime | None = None
|
||||
error: str | None = None
|
||||
result: ReplayJobResult | None = None
|
||||
sync: bool = False
|
||||
extra: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class ReplayRunner(Protocol):
|
||||
"""Runs the offline replay pipeline for one job.
|
||||
|
||||
The Protocol is intentionally synchronous — the registry runs it
|
||||
in a worker thread. Returning normally signals success; raising
|
||||
any exception signals failure and the registry records the
|
||||
stringified message on the job.
|
||||
"""
|
||||
|
||||
def run(self, inputs: ReplayInputs, *, output_dir: Path) -> ReplayJobResult: ...
|
||||
@@ -0,0 +1,233 @@
|
||||
"""AZ-701 — in-memory job registry with a concurrency limit.
|
||||
|
||||
``JobRegistry`` is the single source of truth for job state. It is
|
||||
intentionally simple — a dict plus a thread pool plus a queue cap.
|
||||
Operators that need durable history persist the JSONL + Markdown
|
||||
report + HTML map artefacts out-of-band (invariant 2 in the
|
||||
contract).
|
||||
|
||||
Concurrency model:
|
||||
- ``max_concurrent``: at most this many jobs may be in state
|
||||
``RUNNING`` at once. Excess submissions land in state ``QUEUED``
|
||||
and get promoted by the worker pool.
|
||||
- ``max_queued``: hard cap on queued jobs. Exceeding it raises
|
||||
``ConcurrencyLimitReachedError`` (HTTP 429 at the handler layer).
|
||||
|
||||
The registry runs jobs in a thread pool (``ThreadPoolExecutor``)
|
||||
so the FastAPI event loop is never blocked. The runner is
|
||||
intentionally synchronous (``ReplayRunner.run``) because the
|
||||
underlying ``gps-denied-replay`` subprocess is synchronous.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import copy
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from gps_denied_onboard.replay_api.errors import (
|
||||
ConcurrencyLimitReachedError,
|
||||
JobNotFoundError,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.interface import (
|
||||
JobSnapshot,
|
||||
JobState,
|
||||
ReplayInputs,
|
||||
ReplayRunner,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.storage import StorageRoot
|
||||
|
||||
__all__ = ["ConcurrencyLimitReachedError", "JobRecord", "JobRegistry"]
|
||||
|
||||
|
||||
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api.jobs")
|
||||
|
||||
|
||||
class JobRecord:
|
||||
"""Internal mutable view of one job.
|
||||
|
||||
The registry exposes copies of ``snapshot`` to callers — never
|
||||
the live object — so external code cannot corrupt state.
|
||||
"""
|
||||
|
||||
__slots__ = ("inputs", "output_dir", "snapshot")
|
||||
|
||||
def __init__(
|
||||
self, inputs: ReplayInputs, output_dir: Path, snapshot: JobSnapshot
|
||||
) -> None:
|
||||
self.inputs = inputs
|
||||
self.output_dir = output_dir
|
||||
self.snapshot = snapshot
|
||||
|
||||
|
||||
class JobRegistry:
|
||||
"""In-memory job pool + worker dispatch."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
runner: ReplayRunner,
|
||||
storage: StorageRoot,
|
||||
*,
|
||||
max_concurrent: int = 1,
|
||||
max_queued: int = 8,
|
||||
) -> None:
|
||||
if max_concurrent < 1:
|
||||
raise ValueError("max_concurrent must be ≥ 1")
|
||||
if max_queued < 0:
|
||||
raise ValueError("max_queued must be ≥ 0")
|
||||
self._runner = runner
|
||||
self._storage = storage
|
||||
self._max_concurrent = max_concurrent
|
||||
self._max_queued = max_queued
|
||||
self._lock = threading.Lock()
|
||||
self._records: dict[str, JobRecord] = {}
|
||||
self._running_count = 0
|
||||
self._executor = ThreadPoolExecutor(
|
||||
max_workers=max_concurrent,
|
||||
thread_name_prefix="replay-api-job",
|
||||
)
|
||||
|
||||
@property
|
||||
def max_concurrent(self) -> int:
|
||||
return self._max_concurrent
|
||||
|
||||
def submit(
|
||||
self,
|
||||
inputs: ReplayInputs,
|
||||
output_dir: Path,
|
||||
*,
|
||||
job_id: str | None = None,
|
||||
) -> JobSnapshot:
|
||||
"""Register a new job; return its initial snapshot.
|
||||
|
||||
``job_id`` is optional — when omitted the registry generates
|
||||
a fresh uuid. The handler layer passes its own id so the
|
||||
per-job storage directory and the API-visible job id match.
|
||||
|
||||
State at return time is:
|
||||
- ``RUNNING`` if a worker slot is free.
|
||||
- ``QUEUED`` otherwise (within ``max_queued``).
|
||||
Raises ``ConcurrencyLimitReachedError`` when the queue is full.
|
||||
"""
|
||||
with self._lock:
|
||||
queued_count = sum(
|
||||
1
|
||||
for r in self._records.values()
|
||||
if r.snapshot.state == JobState.QUEUED
|
||||
)
|
||||
if (
|
||||
self._running_count >= self._max_concurrent
|
||||
and queued_count >= self._max_queued
|
||||
):
|
||||
raise ConcurrencyLimitReachedError(
|
||||
f"queue full: running={self._running_count}, "
|
||||
f"queued={queued_count}, max_queued={self._max_queued}"
|
||||
)
|
||||
|
||||
if job_id is None:
|
||||
job_id = uuid.uuid4().hex
|
||||
if job_id in self._records:
|
||||
raise ValueError(
|
||||
f"duplicate job_id supplied to submit(): {job_id}"
|
||||
)
|
||||
state = (
|
||||
JobState.RUNNING
|
||||
if self._running_count < self._max_concurrent
|
||||
else JobState.QUEUED
|
||||
)
|
||||
snapshot = JobSnapshot(
|
||||
job_id=job_id,
|
||||
state=state,
|
||||
submitted_at_utc=_utc_now(),
|
||||
started_at_utc=_utc_now() if state == JobState.RUNNING else None,
|
||||
)
|
||||
record = JobRecord(inputs=inputs, output_dir=output_dir, snapshot=snapshot)
|
||||
self._records[job_id] = record
|
||||
if state == JobState.RUNNING:
|
||||
self._running_count += 1
|
||||
self._executor.submit(self._run_or_wait, job_id)
|
||||
with self._lock:
|
||||
return copy(self._records[job_id].snapshot)
|
||||
|
||||
def get(self, job_id: str) -> JobSnapshot:
|
||||
with self._lock:
|
||||
record = self._records.get(job_id)
|
||||
if record is None:
|
||||
raise JobNotFoundError(f"job not found: {job_id}")
|
||||
return copy(record.snapshot)
|
||||
|
||||
def list_ids(self) -> list[str]:
|
||||
with self._lock:
|
||||
return list(self._records)
|
||||
|
||||
def running_count(self) -> int:
|
||||
with self._lock:
|
||||
return self._running_count
|
||||
|
||||
def queued_count(self) -> int:
|
||||
with self._lock:
|
||||
return sum(
|
||||
1
|
||||
for r in self._records.values()
|
||||
if r.snapshot.state == JobState.QUEUED
|
||||
)
|
||||
|
||||
def shutdown(self, *, wait: bool = True) -> None:
|
||||
self._executor.shutdown(wait=wait, cancel_futures=not wait)
|
||||
self._storage.cleanup_all()
|
||||
|
||||
def _run_or_wait(self, job_id: str) -> None:
|
||||
with self._lock:
|
||||
record = self._records.get(job_id)
|
||||
if record is None:
|
||||
return
|
||||
try:
|
||||
if record.snapshot.state == JobState.QUEUED:
|
||||
self._wait_for_slot(record)
|
||||
self._execute(record)
|
||||
except Exception as exc:
|
||||
self._mark_failed(record, exc)
|
||||
|
||||
def _wait_for_slot(self, record: JobRecord) -> None:
|
||||
while True:
|
||||
with self._lock:
|
||||
if self._running_count < self._max_concurrent:
|
||||
record.snapshot.state = JobState.RUNNING
|
||||
record.snapshot.started_at_utc = _utc_now()
|
||||
self._running_count += 1
|
||||
return
|
||||
threading.Event().wait(0.05)
|
||||
|
||||
def _execute(self, record: JobRecord) -> None:
|
||||
try:
|
||||
result = self._runner.run(record.inputs, output_dir=record.output_dir)
|
||||
with self._lock:
|
||||
record.snapshot.state = JobState.DONE
|
||||
record.snapshot.finished_at_utc = _utc_now()
|
||||
record.snapshot.result = result
|
||||
self._running_count = max(0, self._running_count - 1)
|
||||
except Exception:
|
||||
with self._lock:
|
||||
self._running_count = max(0, self._running_count - 1)
|
||||
raise
|
||||
|
||||
def _mark_failed(self, record: JobRecord, exc: BaseException) -> None:
|
||||
message = f"{type(exc).__name__}: {exc}"
|
||||
_LOGGER.exception("job %s failed", record.snapshot.job_id)
|
||||
with self._lock:
|
||||
record.snapshot.state = JobState.FAILED
|
||||
record.snapshot.finished_at_utc = _utc_now()
|
||||
record.snapshot.error = message
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
# Re-export Any so type-checkers don't trim the local import.
|
||||
_ = Any
|
||||
@@ -0,0 +1,89 @@
|
||||
"""AZ-701 — per-job temp-file lifecycle.
|
||||
|
||||
One ``StorageRoot`` rooted at ``REPLAY_API_STORAGE_ROOT``.
|
||||
Each job allocates a subdirectory ``<root>/<job_id>/`` containing
|
||||
the uploaded ``tlog`` + ``video`` + ``calibration`` plus the
|
||||
estimator's outputs (``emissions.jsonl``, the AZ-699 report, the
|
||||
AZ-700 map).
|
||||
|
||||
The directory is deleted on job completion (``release_job``) and on
|
||||
service shutdown (``cleanup_all``). The service deliberately does
|
||||
NOT keep finished-job artefacts forever — invariant 2 in the
|
||||
contract.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
__all__ = ["JobStorage", "StorageRoot"]
|
||||
|
||||
|
||||
_LOGGER = logging.getLogger("gps_denied_onboard.replay_api.storage")
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class JobStorage:
|
||||
"""The per-job paths the handler hands to the runner."""
|
||||
|
||||
root: Path
|
||||
tlog_path: Path
|
||||
video_path: Path
|
||||
calibration_path: Path
|
||||
output_dir: Path
|
||||
|
||||
|
||||
class StorageRoot:
|
||||
"""Parent of per-job storage directories.
|
||||
|
||||
The class is intentionally thin — the registry calls
|
||||
``allocate_job`` at submit-time and ``release_job`` at terminal
|
||||
transitions; nothing else owns mutation rights.
|
||||
"""
|
||||
|
||||
def __init__(self, root: Path) -> None:
|
||||
self._root = root
|
||||
self._root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@property
|
||||
def root(self) -> Path:
|
||||
return self._root
|
||||
|
||||
def allocate_job(self, job_id: str) -> JobStorage:
|
||||
job_root = self._root / job_id
|
||||
job_root.mkdir(parents=True, exist_ok=False)
|
||||
output_dir = job_root / "output"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
return JobStorage(
|
||||
root=job_root,
|
||||
tlog_path=job_root / "input.tlog",
|
||||
video_path=job_root / "input.mp4",
|
||||
calibration_path=job_root / "calibration.json",
|
||||
output_dir=output_dir,
|
||||
)
|
||||
|
||||
def release_job(self, job_id: str) -> None:
|
||||
target = self._root / job_id
|
||||
if not target.exists():
|
||||
return
|
||||
try:
|
||||
shutil.rmtree(target)
|
||||
except OSError as exc:
|
||||
_LOGGER.warning(
|
||||
"failed to delete per-job storage %s: %s", target, exc
|
||||
)
|
||||
|
||||
def cleanup_all(self) -> None:
|
||||
for child in self._root.iterdir():
|
||||
if child.is_dir():
|
||||
try:
|
||||
shutil.rmtree(child)
|
||||
except OSError as exc:
|
||||
_LOGGER.warning(
|
||||
"failed to delete per-job storage %s: %s",
|
||||
child,
|
||||
exc,
|
||||
)
|
||||
@@ -21,16 +21,26 @@ path.
|
||||
|
||||
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
|
||||
from gps_denied_onboard.replay_input.interface import (
|
||||
AlignedWindow,
|
||||
AutoSyncConfig,
|
||||
AutoSyncDecision,
|
||||
ReplayInputBundle,
|
||||
)
|
||||
from gps_denied_onboard.replay_input.tlog_ground_truth import (
|
||||
TlogGpsFix,
|
||||
TlogGroundTruth,
|
||||
load_tlog_ground_truth,
|
||||
)
|
||||
from gps_denied_onboard.replay_input.tlog_video_adapter import ReplayInputAdapter
|
||||
|
||||
__all__ = [
|
||||
"AlignedWindow",
|
||||
"AutoSyncConfig",
|
||||
"AutoSyncDecision",
|
||||
"ReplayInputAdapter",
|
||||
"ReplayInputAdapterError",
|
||||
"ReplayInputBundle",
|
||||
"TlogGpsFix",
|
||||
"TlogGroundTruth",
|
||||
"load_tlog_ground_truth",
|
||||
]
|
||||
|
||||
@@ -37,16 +37,22 @@ from typing import TYPE_CHECKING, Any
|
||||
|
||||
from gps_denied_onboard._types.fc import FcKind
|
||||
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
|
||||
from gps_denied_onboard.replay_input.interface import AutoSyncConfig, AutoSyncDecision
|
||||
from gps_denied_onboard.replay_input.interface import (
|
||||
AlignedWindow,
|
||||
AutoSyncConfig,
|
||||
AutoSyncDecision,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
||||
__all__ = [
|
||||
"TlogSamples",
|
||||
"compute_offset",
|
||||
"detect_tlog_takeoff",
|
||||
"detect_video_motion_onset",
|
||||
"find_aligned_window",
|
||||
"validate_offset_or_fail",
|
||||
]
|
||||
|
||||
@@ -644,3 +650,486 @@ def _compute_flow_magnitudes(
|
||||
def _build_flag_on(name: str) -> bool:
|
||||
raw = os.environ.get(name, "")
|
||||
return raw.strip().lower() in {"on", "1", "true", "yes"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AZ-698 — mid-flight cross-correlation aligner
|
||||
#
|
||||
# The AZ-405 head-takeoff detector only works when the video covers
|
||||
# the take-off moment. For mid-flight slices (e.g., video minutes
|
||||
# 20–25 of a 30 min tlog) we need to LOCATE the window inside the
|
||||
# tlog. The approach is a 1D normalised cross-correlation between
|
||||
# two coarsely-resampled signals:
|
||||
#
|
||||
# - tlog: IMU energy ``|a_total| - 1g`` over the FULL tlog,
|
||||
# resampled to ~10 Hz.
|
||||
# - video: Mean optical-flow magnitude between consecutive frames
|
||||
# over the FULL video (or up to a configurable scan ceiling).
|
||||
#
|
||||
# Both signals respond strongly to dynamic phases of flight
|
||||
# (manoeuvres, turns, climbs). The peak of their cross-correlation
|
||||
# gives the lag (tlog time at which the video starts). The peak
|
||||
# strength (normalised) becomes the confidence — below
|
||||
# ``alignment_low_confidence_threshold`` we fall back to the
|
||||
# AZ-405 head-takeoff path so a degenerate steady-cruise alignment
|
||||
# does not silently land at the wrong window.
|
||||
|
||||
|
||||
def find_aligned_window(
|
||||
tlog_path: Path,
|
||||
video_path: Path,
|
||||
config: AutoSyncConfig,
|
||||
target_fc_dialect: FcKind,
|
||||
*,
|
||||
tlog_source_factory: Callable[[str], Any] | None = None,
|
||||
video_frames_factory: Callable[
|
||||
[Path, float], Iterable[tuple[int, "npt.NDArray[np.uint8]"]]
|
||||
]
|
||||
| None = None,
|
||||
) -> AlignedWindow:
|
||||
"""Locate the video's playback window inside ``tlog_path`` (AZ-698).
|
||||
|
||||
Args:
|
||||
tlog_path: Binary ArduPilot tlog. The whole file is read up
|
||||
to :attr:`AutoSyncConfig.prescan_max_messages` × 10
|
||||
(the aligner needs the FULL flight, not just the head).
|
||||
video_path: Mp4 / mkv input. The leading
|
||||
:attr:`AutoSyncConfig.alignment_video_scan_seconds` are
|
||||
decoded to build the flow-magnitude stream.
|
||||
config: Operator-tunable thresholds.
|
||||
target_fc_dialect: ``ARDUPILOT_PLANE`` or ``INAV`` — same
|
||||
parity contract as :func:`detect_tlog_takeoff`.
|
||||
tlog_source_factory: Test injection — replaces the
|
||||
``pymavlink`` open call.
|
||||
video_frames_factory: Test injection — replaces
|
||||
``cv2.VideoCapture`` frame iteration.
|
||||
|
||||
Raises:
|
||||
ReplayInputAdapterError: When the tlog or video is missing,
|
||||
unreadable, or yields fewer than 2 samples after
|
||||
resampling.
|
||||
|
||||
Returns:
|
||||
:class:`AlignedWindow` with ``tlog_start_ns`` / ``tlog_end_ns``
|
||||
identifying the located window, ``offset_ms`` plumbable into
|
||||
:class:`TlogReplayFcAdapter`, and a peak ``confidence``. When
|
||||
confidence falls below
|
||||
:attr:`AutoSyncConfig.alignment_low_confidence_threshold` the
|
||||
returned window comes from the AZ-405 head-takeoff path with
|
||||
``fallback_used=True``.
|
||||
"""
|
||||
if target_fc_dialect not in (FcKind.ARDUPILOT_PLANE, FcKind.INAV):
|
||||
raise ReplayInputAdapterError(
|
||||
f"target_fc_dialect must be ARDUPILOT_PLANE or INAV; got {target_fc_dialect!r}"
|
||||
)
|
||||
if not tlog_path.is_file():
|
||||
raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
|
||||
if not video_path.is_file():
|
||||
raise ReplayInputAdapterError(f"video file not found: {video_path}")
|
||||
|
||||
tlog_energy_full = _load_tlog_imu_energy_stream(
|
||||
tlog_path,
|
||||
max_messages=config.prescan_max_messages * 10,
|
||||
source_factory=tlog_source_factory,
|
||||
)
|
||||
if len(tlog_energy_full) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
f"tlog yielded {len(tlog_energy_full)} IMU sample(s); "
|
||||
"need ≥ 2 for cross-correlation alignment"
|
||||
)
|
||||
|
||||
# Multi-flight handling: a tlog may cover several takeoffs at the
|
||||
# same field (engine starts between sorties); the uploaded video
|
||||
# only covers ONE of them, conventionally the LAST. Segment the
|
||||
# tlog first and restrict NCC to the last detected flight so the
|
||||
# peak cannot lock onto an earlier sortie.
|
||||
flight_segments = _segment_flights_from_imu_energy(
|
||||
tlog_energy_full,
|
||||
motion_threshold=config.alignment_segment_motion_threshold_g,
|
||||
min_flight_duration_ns=int(
|
||||
config.alignment_segment_min_flight_duration_seconds * 1_000_000_000
|
||||
),
|
||||
max_internal_gap_ns=int(
|
||||
config.alignment_segment_max_internal_gap_seconds * 1_000_000_000
|
||||
),
|
||||
)
|
||||
if flight_segments:
|
||||
seg_start_ns, seg_end_ns = flight_segments[-1]
|
||||
tlog_energy = tuple(
|
||||
(ts, e) for ts, e in tlog_energy_full
|
||||
if seg_start_ns <= ts <= seg_end_ns
|
||||
)
|
||||
flight_count_detected = len(flight_segments)
|
||||
selected_flight_index = len(flight_segments) - 1
|
||||
else:
|
||||
# No clear flight pattern detected (degenerate tlog: very
|
||||
# short, all-quiet, or thresholds badly tuned). Fall through
|
||||
# to whole-tlog NCC so we keep the AZ-405-equivalent
|
||||
# behavior; surface this via flight_count_detected=0.
|
||||
tlog_energy = tlog_energy_full
|
||||
flight_count_detected = 0
|
||||
selected_flight_index = -1
|
||||
|
||||
if len(tlog_energy) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
f"selected flight segment yielded {len(tlog_energy)} IMU "
|
||||
"sample(s); need ≥ 2 for cross-correlation alignment"
|
||||
)
|
||||
|
||||
if video_frames_factory is None:
|
||||
frames = list(
|
||||
_read_video_frames(video_path, config.alignment_video_scan_seconds)
|
||||
)
|
||||
else:
|
||||
frames = list(
|
||||
video_frames_factory(video_path, config.alignment_video_scan_seconds)
|
||||
)
|
||||
if len(frames) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
f"video yielded {len(frames)} frame(s); "
|
||||
"need ≥ 2 for cross-correlation alignment"
|
||||
)
|
||||
flow_samples = _compute_flow_magnitudes(frames)
|
||||
if len(flow_samples) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
f"video produced {len(flow_samples)} flow sample(s); "
|
||||
"need ≥ 2 for cross-correlation alignment"
|
||||
)
|
||||
|
||||
return _align_via_cross_correlation(
|
||||
tlog_energy=tlog_energy,
|
||||
flow_samples=flow_samples,
|
||||
config=config,
|
||||
target_fc_dialect=target_fc_dialect,
|
||||
tlog_path=tlog_path,
|
||||
tlog_source_factory=tlog_source_factory,
|
||||
flight_count_detected=flight_count_detected,
|
||||
selected_flight_index=selected_flight_index,
|
||||
)
|
||||
|
||||
|
||||
def _align_via_cross_correlation(
|
||||
*,
|
||||
tlog_energy: tuple[tuple[int, float], ...],
|
||||
flow_samples: tuple[tuple[int, float], ...],
|
||||
config: AutoSyncConfig,
|
||||
target_fc_dialect: FcKind,
|
||||
tlog_path: Path,
|
||||
tlog_source_factory: Callable[[str], Any] | None,
|
||||
flight_count_detected: int = 0,
|
||||
selected_flight_index: int = -1,
|
||||
) -> AlignedWindow:
|
||||
"""Pure compute kernel: turn pre-loaded streams into an :class:`AlignedWindow`.
|
||||
|
||||
Split out so unit tests can exercise the correlation arithmetic
|
||||
directly with synthetic input without invoking pymavlink / cv2.
|
||||
"""
|
||||
import numpy as _np
|
||||
|
||||
resample_hz = max(config.alignment_resample_hz, 1.0)
|
||||
period_ns = int(1_000_000_000 / resample_hz)
|
||||
|
||||
tlog_origin_ns = tlog_energy[0][0]
|
||||
tlog_resampled = _resample_uniform(tlog_energy, period_ns, tlog_origin_ns)
|
||||
if len(tlog_resampled) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
"tlog resampled stream has < 2 samples; cannot cross-correlate"
|
||||
)
|
||||
|
||||
video_origin_ns = flow_samples[0][0]
|
||||
flow_resampled = _resample_uniform(flow_samples, period_ns, video_origin_ns)
|
||||
if len(flow_resampled) < 2:
|
||||
raise ReplayInputAdapterError(
|
||||
"video flow stream has < 2 samples; cannot cross-correlate"
|
||||
)
|
||||
if len(flow_resampled) > len(tlog_resampled):
|
||||
raise ReplayInputAdapterError(
|
||||
"video flow stream is longer than the tlog energy stream; "
|
||||
"auto-trim requires the video to be a slice of a longer tlog"
|
||||
)
|
||||
|
||||
tlog_arr = _np.asarray(tlog_resampled, dtype=_np.float64)
|
||||
flow_arr = _np.asarray(flow_resampled, dtype=_np.float64)
|
||||
flow_centred = _zero_mean_normalise(flow_arr)
|
||||
if _np.linalg.norm(flow_centred) == 0.0:
|
||||
# Flat video → no information for correlation. Force the
|
||||
# fallback path; confidence reported as 0.
|
||||
peak_idx = 0
|
||||
confidence = 0.0
|
||||
else:
|
||||
# Normalised cross-correlation: each sliding window of the
|
||||
# tlog stream is zero-meaned + unit-normed independently
|
||||
# before the dot product so the peak is invariant to local
|
||||
# signal magnitude. Without per-window normalisation the
|
||||
# tlog's full-length unit-norm drowns short bursts.
|
||||
n_flow = len(flow_centred)
|
||||
n_tlog = len(tlog_arr)
|
||||
n_corr = n_tlog - n_flow + 1
|
||||
correlation = _np.zeros(n_corr, dtype=_np.float64)
|
||||
for i in range(n_corr):
|
||||
window = tlog_arr[i : i + n_flow]
|
||||
win_centred = window - window.mean()
|
||||
win_norm = float(_np.linalg.norm(win_centred))
|
||||
if win_norm > 0.0:
|
||||
correlation[i] = float(_np.dot(win_centred / win_norm, flow_centred))
|
||||
peak_idx = int(_np.argmax(correlation))
|
||||
confidence = max(0.0, min(1.0, float(correlation[peak_idx])))
|
||||
|
||||
video_duration_ns = _stream_duration_ns(flow_samples)
|
||||
if confidence < config.alignment_low_confidence_threshold:
|
||||
return _fallback_to_head_takeoff(
|
||||
tlog_path=tlog_path,
|
||||
tlog_source_factory=tlog_source_factory,
|
||||
target_fc_dialect=target_fc_dialect,
|
||||
config=config,
|
||||
tlog_energy=tlog_energy,
|
||||
video_origin_ns=video_origin_ns,
|
||||
video_flow_duration_ns=video_duration_ns,
|
||||
confidence=confidence,
|
||||
flight_count_detected=flight_count_detected,
|
||||
selected_flight_index=selected_flight_index,
|
||||
)
|
||||
|
||||
# Absolute tlog timeline value where video t=0 aligns. The
|
||||
# adapter's seek check compares this against the raw pymavlink
|
||||
# ``msg._timestamp`` so the value MUST be on the tlog timeline,
|
||||
# NOT a delta.
|
||||
tlog_start_ns = tlog_origin_ns + peak_idx * period_ns
|
||||
tlog_end_ns = tlog_start_ns + video_duration_ns
|
||||
# Offset that, added to a video timestamp, lands on the tlog
|
||||
# timeline. Matches ``AutoSyncDecision.offset_ms`` semantics
|
||||
# (``validate_offset_or_fail`` does ``vts + offset_ns``).
|
||||
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
|
||||
return AlignedWindow(
|
||||
tlog_start_ns=tlog_start_ns,
|
||||
tlog_end_ns=tlog_end_ns,
|
||||
offset_ms=offset_ms,
|
||||
confidence=confidence,
|
||||
fallback_used=False,
|
||||
flight_count_detected=flight_count_detected,
|
||||
selected_flight_index=selected_flight_index,
|
||||
)
|
||||
|
||||
|
||||
def _stream_duration_ns(
|
||||
samples: tuple[tuple[int, float], ...],
|
||||
) -> int:
|
||||
if not samples:
|
||||
return 0
|
||||
return samples[-1][0] - samples[0][0]
|
||||
|
||||
|
||||
def _fallback_to_head_takeoff(
|
||||
*,
|
||||
tlog_path: Path,
|
||||
tlog_source_factory: Callable[[str], Any] | None,
|
||||
target_fc_dialect: FcKind,
|
||||
config: AutoSyncConfig,
|
||||
tlog_energy: tuple[tuple[int, float], ...],
|
||||
video_origin_ns: int,
|
||||
video_flow_duration_ns: int,
|
||||
confidence: float,
|
||||
flight_count_detected: int = 0,
|
||||
selected_flight_index: int = -1,
|
||||
) -> AlignedWindow:
|
||||
"""Low-confidence fallback path.
|
||||
|
||||
Two modes:
|
||||
|
||||
* **Segmented tlog** (``flight_count_detected > 0``): the
|
||||
pre-NCC segmenter already chose the LAST flight. We use
|
||||
``tlog_energy[0][0]`` — the start of that segment — as the
|
||||
``tlog_start_ns`` rather than re-running the AZ-405
|
||||
head-takeoff detector (which would lock onto FLIGHT 1's
|
||||
takeoff on a multi-flight tlog and silently throw away the
|
||||
segmenter's correct answer). This is the AZ-698-after-user-
|
||||
feedback contract: "if 1 flight take it, if multiple take
|
||||
the last" applies to the fallback path too.
|
||||
|
||||
* **Un-segmented tlog** (``flight_count_detected == 0``): no
|
||||
flight pattern fired in the segmenter (degenerate / very
|
||||
short tlog). Fall back to the AZ-405 head-takeoff detector
|
||||
as before — this preserves the single-flight behavior that
|
||||
existed before AZ-698's segmentation stage.
|
||||
|
||||
``fallback_used`` is ``True`` in either case so callers + FDR
|
||||
audit can record the divergence. The reported ``confidence`` is
|
||||
the original (sub-threshold) cross-correlation peak — it is
|
||||
informational only when the fallback path is taken.
|
||||
"""
|
||||
if flight_count_detected > 0 and tlog_energy:
|
||||
tlog_start_ns = tlog_energy[0][0]
|
||||
else:
|
||||
takeoff = detect_tlog_takeoff(
|
||||
tlog_path,
|
||||
target_fc_dialect,
|
||||
config,
|
||||
source_factory=tlog_source_factory,
|
||||
)
|
||||
if takeoff.confidence > 0.0:
|
||||
tlog_start_ns = takeoff.onset_ns
|
||||
elif tlog_energy:
|
||||
tlog_start_ns = tlog_energy[0][0]
|
||||
else:
|
||||
tlog_start_ns = 0
|
||||
tlog_end_ns = tlog_start_ns + video_flow_duration_ns
|
||||
offset_ms = (tlog_start_ns - video_origin_ns) // 1_000_000
|
||||
return AlignedWindow(
|
||||
tlog_start_ns=tlog_start_ns,
|
||||
tlog_end_ns=tlog_end_ns,
|
||||
offset_ms=offset_ms,
|
||||
confidence=confidence,
|
||||
fallback_used=True,
|
||||
flight_count_detected=flight_count_detected,
|
||||
selected_flight_index=selected_flight_index,
|
||||
)
|
||||
|
||||
|
||||
def _resample_uniform(
|
||||
samples: tuple[tuple[int, float], ...],
|
||||
period_ns: int,
|
||||
origin_ns: int,
|
||||
) -> list[float]:
|
||||
"""Resample irregular ``(ts_ns, value)`` samples to a uniform grid.
|
||||
|
||||
Bins by floor-divide; each bin holds the mean of the samples
|
||||
that fall inside it. Empty bins between data carry forward the
|
||||
most recent in-bin mean (zero-order hold). Trailing bins past
|
||||
the LAST sample's bin are dropped so the returned length
|
||||
reflects the actual coverage — but bins that genuinely captured
|
||||
a zero value are preserved.
|
||||
"""
|
||||
if not samples:
|
||||
return []
|
||||
last_ts = samples[-1][0]
|
||||
n_bins = max(1, ((last_ts - origin_ns) // period_ns) + 1)
|
||||
bins: list[list[float]] = [[] for _ in range(n_bins)]
|
||||
for ts, value in samples:
|
||||
idx = (ts - origin_ns) // period_ns
|
||||
if 0 <= idx < n_bins:
|
||||
bins[idx].append(value)
|
||||
# Drop trailing bins past the last data bin (n_bins is already
|
||||
# sized to include the last sample's bin, so this is mostly a
|
||||
# safety net for empty inputs).
|
||||
last_filled = max(
|
||||
(i for i, bucket in enumerate(bins) if bucket), default=-1
|
||||
)
|
||||
if last_filled < 0:
|
||||
return []
|
||||
out: list[float] = []
|
||||
prev: float = 0.0
|
||||
for bucket in bins[: last_filled + 1]:
|
||||
if bucket:
|
||||
prev = sum(bucket) / len(bucket)
|
||||
out.append(prev)
|
||||
return out
|
||||
|
||||
|
||||
def _zero_mean_normalise(
|
||||
arr: "npt.NDArray[np.float64]",
|
||||
) -> "npt.NDArray[np.float64]":
|
||||
import numpy as _np
|
||||
|
||||
centred: "npt.NDArray[np.float64]" = arr - arr.mean()
|
||||
norm = float(_np.linalg.norm(centred))
|
||||
if norm == 0.0:
|
||||
return centred
|
||||
result: "npt.NDArray[np.float64]" = centred / norm
|
||||
return result
|
||||
|
||||
|
||||
def _segment_flights_from_imu_energy(
|
||||
samples: tuple[tuple[int, float], ...],
|
||||
*,
|
||||
motion_threshold: float,
|
||||
min_flight_duration_ns: int,
|
||||
max_internal_gap_ns: int,
|
||||
) -> list[tuple[int, int]]:
|
||||
"""Partition an IMU energy stream into distinct flight segments.
|
||||
|
||||
A flight is a contiguous span where energy stayed ``>=`` the
|
||||
threshold, with no sub-threshold run longer than
|
||||
``max_internal_gap_ns`` (cruise lulls don't split a flight).
|
||||
Spans shorter than ``min_flight_duration_ns`` are discarded as
|
||||
ground-startup noise. Returns ``(start_ns, end_ns)`` per flight,
|
||||
in chronological order.
|
||||
|
||||
AZ-698 / AZ-697 user constraint: a single tlog often spans
|
||||
multiple takeoffs at the same field, but the uploaded video only
|
||||
covers the **last** one. The aligner uses this segmenter to find
|
||||
every flight, then restricts NCC search to the last segment so
|
||||
the trim is unambiguous. ``_find_sustained_event`` (AZ-405)
|
||||
returns only the FIRST qualifying window by design; partitioning
|
||||
all flights needs this fresh one-pass walk.
|
||||
"""
|
||||
if not samples:
|
||||
return []
|
||||
segments: list[tuple[int, int]] = []
|
||||
in_flight = False
|
||||
flight_start_ns = 0
|
||||
last_above_ns = 0
|
||||
last_below_ns: int | None = None
|
||||
for ts, energy in samples:
|
||||
if energy >= motion_threshold:
|
||||
if not in_flight:
|
||||
in_flight = True
|
||||
flight_start_ns = ts
|
||||
last_above_ns = ts
|
||||
last_below_ns = None
|
||||
else:
|
||||
if in_flight:
|
||||
if last_below_ns is None:
|
||||
last_below_ns = ts
|
||||
if (ts - last_below_ns) >= max_internal_gap_ns:
|
||||
if (
|
||||
last_above_ns - flight_start_ns
|
||||
) >= min_flight_duration_ns:
|
||||
segments.append((flight_start_ns, last_above_ns))
|
||||
in_flight = False
|
||||
last_below_ns = None
|
||||
if in_flight and (last_above_ns - flight_start_ns) >= min_flight_duration_ns:
|
||||
segments.append((flight_start_ns, last_above_ns))
|
||||
return segments
|
||||
|
||||
|
||||
def _load_tlog_imu_energy_stream(
|
||||
tlog_path: Path,
|
||||
*,
|
||||
max_messages: int,
|
||||
source_factory: Callable[[str], Any] | None,
|
||||
) -> tuple[tuple[int, float], ...]:
|
||||
"""Walk the WHOLE tlog (up to ``max_messages``) for IMU energy samples.
|
||||
|
||||
Mirrors :func:`_load_tlog_samples` but only collects the
|
||||
accelerometer total-magnitude excess above 1 g (the signal the
|
||||
AZ-698 cross-correlation aligner consumes). The ATTITUDE channel
|
||||
is not needed here.
|
||||
"""
|
||||
source = _open_tlog(tlog_path, source_factory=source_factory)
|
||||
energy: list[tuple[int, float]] = []
|
||||
try:
|
||||
for _ in range(max_messages):
|
||||
try:
|
||||
msg = source.recv_match(
|
||||
type=["RAW_IMU", "SCALED_IMU2"],
|
||||
blocking=False,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover — defensive.
|
||||
raise ReplayInputAdapterError(
|
||||
f"tlog scan failed on {tlog_path}: {exc!r}"
|
||||
) from exc
|
||||
if msg is None:
|
||||
break
|
||||
ts_ns = _msg_timestamp_ns(msg)
|
||||
xa = float(getattr(msg, "xacc", 0.0)) / _MG_PER_G
|
||||
ya = float(getattr(msg, "yacc", 0.0)) / _MG_PER_G
|
||||
za = float(getattr(msg, "zacc", 0.0)) / _MG_PER_G
|
||||
total_g = math.sqrt(xa * xa + ya * ya + za * za)
|
||||
energy.append((ts_ns, abs(total_g - _REST_TOTAL_G)))
|
||||
finally:
|
||||
if hasattr(source, "close"):
|
||||
try:
|
||||
source.close()
|
||||
except Exception: # pragma: no cover — defensive.
|
||||
pass
|
||||
return tuple(energy)
|
||||
|
||||
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AlignedWindow",
|
||||
"AutoSyncConfig",
|
||||
"AutoSyncDecision",
|
||||
"ReplayInputBundle",
|
||||
@@ -76,6 +77,35 @@ class AutoSyncConfig:
|
||||
low_confidence_threshold: Combined-confidence cut-off below
|
||||
which :meth:`ReplayInputAdapter.open` logs WARN and uses
|
||||
the best-guess offset (AC-6). Default 0.80.
|
||||
alignment_resample_hz: Target rate (Hz) the AZ-698 mid-flight
|
||||
cross-correlation aligner subsamples both signals
|
||||
(tlog IMU energy + video optical-flow magnitude) to before
|
||||
running the FFT-based correlation. Default 10.0 — matches
|
||||
the NFR ceiling of < 30 s alignment cost over a 30-min tlog.
|
||||
alignment_video_scan_seconds: Length of the video segment the
|
||||
AZ-698 aligner consumes when building its flow-magnitude
|
||||
stream. Default 30.0. Bounded so the per-frame Farneback
|
||||
cost does not dominate the alignment runtime even for
|
||||
long videos.
|
||||
alignment_low_confidence_threshold: Cross-correlation peak
|
||||
confidence below which :func:`find_aligned_window` falls
|
||||
back to the head-takeoff detector (AZ-405 path).
|
||||
Default 0.60.
|
||||
alignment_segment_motion_threshold_g: Minimum IMU energy
|
||||
(``|a| - 1g`` in g-units) for a sample to count as
|
||||
"in-flight" during segmentation. A 3-flight tlog has 3
|
||||
spans where this threshold is exceeded with gaps below
|
||||
``alignment_segment_max_internal_gap_seconds``. Default
|
||||
0.10 — captures cruise oscillation while ignoring
|
||||
stationary sensor noise (~ 0.02 g).
|
||||
alignment_segment_min_flight_duration_seconds: Minimum span
|
||||
length (in seconds) for a candidate segment to be
|
||||
classified as a flight. Discards short ground-startup
|
||||
blips. Default 30.
|
||||
alignment_segment_max_internal_gap_seconds: Sub-threshold
|
||||
spans shorter than this stay inside a flight (cruise
|
||||
lulls don't split a flight); spans equal-or-longer end
|
||||
the current flight. Default 30.
|
||||
"""
|
||||
|
||||
takeoff_accel_threshold_g: float = 0.5
|
||||
@@ -87,6 +117,12 @@ class AutoSyncConfig:
|
||||
match_threshold_pct: float = 95.0
|
||||
match_window_ms: int = 100
|
||||
low_confidence_threshold: float = 0.80
|
||||
alignment_resample_hz: float = 10.0
|
||||
alignment_video_scan_seconds: float = 30.0
|
||||
alignment_low_confidence_threshold: float = 0.60
|
||||
alignment_segment_motion_threshold_g: float = 0.10
|
||||
alignment_segment_min_flight_duration_seconds: float = 30.0
|
||||
alignment_segment_max_internal_gap_seconds: float = 30.0
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
@@ -114,6 +150,60 @@ class AutoSyncDecision:
|
||||
combined_confidence: float
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class AlignedWindow:
|
||||
"""Outcome of the AZ-698 mid-flight cross-correlation aligner.
|
||||
|
||||
Returned by :func:`find_aligned_window` and consumed by
|
||||
:class:`ReplayInputAdapter` when ``auto_trim=True``. Locates the
|
||||
video's playback window inside a longer tlog and produces both a
|
||||
seek window (``tlog_start_ns`` / ``tlog_end_ns``) and an offset
|
||||
(``offset_ms``) compatible with :class:`AutoSyncDecision`.
|
||||
|
||||
Attributes:
|
||||
tlog_start_ns: Inclusive lower bound on the tlog timeline
|
||||
(raw pymavlink ``msg._timestamp`` ns; NOT offset-shifted).
|
||||
Messages with ``received_at < tlog_start_ns`` are skipped
|
||||
by :class:`TlogReplayFcAdapter` so the runtime loop only
|
||||
sees the relevant window.
|
||||
tlog_end_ns: Exclusive upper bound on the tlog timeline. The
|
||||
adapter does not enforce this — it is reported for the
|
||||
FDR audit trail and the next-batch trimming task.
|
||||
offset_ms: Resolved offset that places video timestamp 0 at
|
||||
``tlog_start_ns`` (``tlog_start_ns - 0`` in ms). Plumbed
|
||||
into :class:`TlogReplayFcAdapter.time_offset_ms` so the
|
||||
published ``received_at`` is referenced against the video.
|
||||
confidence: Peak normalised cross-correlation value in
|
||||
``[0, 1]``. Below
|
||||
:attr:`AutoSyncConfig.alignment_low_confidence_threshold`
|
||||
the coordinator falls back to the head-takeoff path
|
||||
(``fallback_used=True``).
|
||||
fallback_used: ``True`` when cross-correlation confidence
|
||||
dropped below the threshold and the result was built
|
||||
from the head-takeoff detector instead.
|
||||
flight_count_detected: Number of distinct flight segments the
|
||||
pre-NCC segmenter found in the tlog. ``1`` for a clean
|
||||
single-flight tlog. ``> 1`` means the tlog covers
|
||||
multiple flights — the aligner always selects the
|
||||
**last** one (per AZ-698 spec line 23: "the last chunk
|
||||
in tlog is relevant").
|
||||
selected_flight_index: Zero-based index of the segment the
|
||||
aligner restricted NCC to. Always
|
||||
``flight_count_detected - 1`` when ``flight_count_detected
|
||||
> 0``; ``-1`` when segmentation returned no candidate and
|
||||
the aligner fell through to whole-tlog NCC (single-flight
|
||||
edge case where the segmenter's thresholds didn't fire).
|
||||
"""
|
||||
|
||||
tlog_start_ns: int
|
||||
tlog_end_ns: int
|
||||
offset_ms: int
|
||||
confidence: float
|
||||
fallback_used: bool
|
||||
flight_count_detected: int = 0
|
||||
selected_flight_index: int = -1
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ReplayInputBundle:
|
||||
"""Trio of strategies returned by :meth:`ReplayInputAdapter.open`.
|
||||
@@ -136,6 +226,8 @@ class ReplayInputBundle:
|
||||
auto_sync_result: Auto-sync outcome; ``None`` when the
|
||||
constructor received an explicit
|
||||
``manual_time_offset_ms``.
|
||||
aligned_window: AZ-698 cross-correlation window result;
|
||||
``None`` when ``auto_trim`` was not enabled.
|
||||
"""
|
||||
|
||||
frame_source: "VideoFileFrameSource"
|
||||
@@ -143,3 +235,4 @@ class ReplayInputBundle:
|
||||
clock: "Clock"
|
||||
resolved_time_offset_ms: int
|
||||
auto_sync_result: AutoSyncDecision | None
|
||||
aligned_window: AlignedWindow | None = None
|
||||
|
||||
@@ -0,0 +1,247 @@
|
||||
"""Direct binary-tlog GPS-truth extractor (AZ-697 / E-DEMO-REPLAY).
|
||||
|
||||
Streams ``GLOBAL_POSITION_INT`` (preferred) or ``GPS_RAW_INT`` (fallback)
|
||||
from an ArduPilot binary tlog into a typed :class:`TlogGroundTruth` DTO,
|
||||
suitable for the AZ-699 (real-flight validation) and AZ-701 (HTTP
|
||||
replay API) comparison paths.
|
||||
|
||||
Design mirrors :mod:`gps_denied_onboard.replay_input.auto_sync`:
|
||||
|
||||
* Lazy ``pymavlink.mavutil`` import — missing dependency raises
|
||||
:class:`ReplayInputAdapterError` rather than crashing the import.
|
||||
* Optional ``source_factory`` injection point so unit tests can swap in
|
||||
a synthetic source (mirrors the AZ-399 / AZ-405 pattern).
|
||||
* Production helper only — placed under ``replay_input/`` because the
|
||||
GPS extraction is intrinsically tied to the tlog input pipeline; the
|
||||
comparison kernels themselves live in :mod:`helpers.gps_compare`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
|
||||
|
||||
__all__ = [
|
||||
"TlogGpsFix",
|
||||
"TlogGroundTruth",
|
||||
"load_tlog_ground_truth",
|
||||
]
|
||||
|
||||
|
||||
_LOGGER = logging.getLogger("gps_denied_onboard.replay_input.tlog_ground_truth")
|
||||
|
||||
# MAVLink GLOBAL_POSITION_INT / GPS_RAW_INT integer encodings.
|
||||
# lat/lon are deg × 1e7; alt is mm above MSL; vx/vy/vz are cm/s;
|
||||
# hdg/cog are cdeg (0..36000).
|
||||
_LATLON_SCALE: float = 1.0e-7
|
||||
_MM_PER_M: float = 1000.0
|
||||
_CM_PER_M_S: float = 100.0
|
||||
_CDEG_PER_DEG: float = 100.0
|
||||
|
||||
# Source-label constants returned in :attr:`TlogGroundTruth.source`.
|
||||
_SOURCE_GLOBAL_POSITION_INT: str = "GLOBAL_POSITION_INT"
|
||||
_SOURCE_GPS_RAW_INT: str = "GPS_RAW_INT"
|
||||
_SOURCE_NONE: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class TlogGpsFix:
|
||||
"""One time-aligned GPS-truth row extracted from a tlog.
|
||||
|
||||
Attributes:
|
||||
ts_ns: Absolute timestamp (ns) sourced from pymavlink's
|
||||
``_timestamp`` field (Unix time × 1e9). Comparable to the
|
||||
airborne runtime clock during replay.
|
||||
lat_deg, lon_deg: Latitude / longitude in degrees (WGS84).
|
||||
alt_m: Altitude above MSL in metres (MAVLink ``alt`` field).
|
||||
hdg_deg: Aircraft heading in degrees [0, 360). When sourced
|
||||
from ``GPS_RAW_INT``, this is course over ground (cog),
|
||||
not the IMU-derived heading.
|
||||
vx_m_s, vy_m_s, vz_m_s: North / east / down velocity in m/s.
|
||||
For ``GPS_RAW_INT``-sourced rows, ``vx`` / ``vy`` are
|
||||
derived from the ground velocity + course over ground;
|
||||
``vz`` is 0.0 because the message does not expose vertical
|
||||
velocity.
|
||||
"""
|
||||
|
||||
ts_ns: int
|
||||
lat_deg: float
|
||||
lon_deg: float
|
||||
alt_m: float
|
||||
hdg_deg: float
|
||||
vx_m_s: float
|
||||
vy_m_s: float
|
||||
vz_m_s: float
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class TlogGroundTruth:
|
||||
"""Ground-truth GPS series extracted from a tlog.
|
||||
|
||||
Attributes:
|
||||
records: Time-ordered fixes. Empty when no GPS messages were
|
||||
present in the tlog.
|
||||
source: MAVLink message type the records were sourced from —
|
||||
``"GLOBAL_POSITION_INT"`` (preferred), ``"GPS_RAW_INT"``
|
||||
(fallback), or ``""`` (no GPS messages found).
|
||||
"""
|
||||
|
||||
records: tuple[TlogGpsFix, ...]
|
||||
source: str
|
||||
|
||||
|
||||
def load_tlog_ground_truth(
|
||||
tlog_path: Path,
|
||||
*,
|
||||
source_factory: Callable[[str], Any] | None = None,
|
||||
) -> TlogGroundTruth:
|
||||
"""Stream GPS-truth records from a tlog.
|
||||
|
||||
Args:
|
||||
tlog_path: Path to the binary tlog. Existence is checked at
|
||||
entry.
|
||||
source_factory: Test-only injection — when provided, replaces
|
||||
the pymavlink open call with the factory's return value.
|
||||
The factory must yield an object with ``recv_match`` and
|
||||
``close`` semantics matching pymavlink's
|
||||
``mavutil.mavlink_connection``.
|
||||
|
||||
Returns:
|
||||
A :class:`TlogGroundTruth` whose ``records`` contain
|
||||
``GLOBAL_POSITION_INT`` rows when any are present; otherwise
|
||||
``GPS_RAW_INT`` rows; otherwise an empty tuple (with a WARN log).
|
||||
|
||||
Raises:
|
||||
ReplayInputAdapterError: When the tlog file is missing or
|
||||
pymavlink cannot be imported.
|
||||
"""
|
||||
if not tlog_path.is_file():
|
||||
raise ReplayInputAdapterError(f"tlog file not found: {tlog_path}")
|
||||
source = _open_tlog(tlog_path, source_factory=source_factory)
|
||||
gpi_records: list[TlogGpsFix] = []
|
||||
raw_records: list[TlogGpsFix] = []
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
msg = source.recv_match(
|
||||
type=[_SOURCE_GLOBAL_POSITION_INT, _SOURCE_GPS_RAW_INT],
|
||||
blocking=False,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover — defensive.
|
||||
raise ReplayInputAdapterError(
|
||||
f"tlog scan failed on {tlog_path}: {exc!r}"
|
||||
) from exc
|
||||
if msg is None:
|
||||
break
|
||||
msg_type = _safe_msg_type(msg)
|
||||
if not msg_type:
|
||||
continue
|
||||
ts_ns = _msg_timestamp_ns(msg)
|
||||
if msg_type == _SOURCE_GLOBAL_POSITION_INT:
|
||||
gpi_records.append(_from_global_position_int(msg, ts_ns))
|
||||
elif msg_type == _SOURCE_GPS_RAW_INT:
|
||||
raw_records.append(_from_gps_raw_int(msg, ts_ns))
|
||||
finally:
|
||||
if hasattr(source, "close"):
|
||||
try:
|
||||
source.close()
|
||||
except Exception: # pragma: no cover — defensive.
|
||||
pass
|
||||
if gpi_records:
|
||||
return TlogGroundTruth(
|
||||
records=tuple(gpi_records),
|
||||
source=_SOURCE_GLOBAL_POSITION_INT,
|
||||
)
|
||||
if raw_records:
|
||||
return TlogGroundTruth(
|
||||
records=tuple(raw_records),
|
||||
source=_SOURCE_GPS_RAW_INT,
|
||||
)
|
||||
_LOGGER.warning(
|
||||
"tlog %s contains no GLOBAL_POSITION_INT or GPS_RAW_INT messages",
|
||||
tlog_path,
|
||||
)
|
||||
return TlogGroundTruth(records=(), source=_SOURCE_NONE)
|
||||
|
||||
|
||||
def _from_global_position_int(msg: Any, ts_ns: int) -> TlogGpsFix:
|
||||
return TlogGpsFix(
|
||||
ts_ns=ts_ns,
|
||||
lat_deg=int(getattr(msg, "lat", 0)) * _LATLON_SCALE,
|
||||
lon_deg=int(getattr(msg, "lon", 0)) * _LATLON_SCALE,
|
||||
alt_m=int(getattr(msg, "alt", 0)) / _MM_PER_M,
|
||||
hdg_deg=int(getattr(msg, "hdg", 0)) / _CDEG_PER_DEG,
|
||||
vx_m_s=int(getattr(msg, "vx", 0)) / _CM_PER_M_S,
|
||||
vy_m_s=int(getattr(msg, "vy", 0)) / _CM_PER_M_S,
|
||||
vz_m_s=int(getattr(msg, "vz", 0)) / _CM_PER_M_S,
|
||||
)
|
||||
|
||||
|
||||
def _from_gps_raw_int(msg: Any, ts_ns: int) -> TlogGpsFix:
|
||||
# GPS_RAW_INT exposes ground velocity + course over ground rather
|
||||
# than NED components. Derive horizontal components; leave vertical
|
||||
# at 0.0 because the message lacks a vz field. Callers that need
|
||||
# vertical velocity from GPS_RAW_INT must source it elsewhere
|
||||
# (e.g., VFR_HUD.climb).
|
||||
vel_cm_s = int(getattr(msg, "vel", 0))
|
||||
cog_cdeg = int(getattr(msg, "cog", 0))
|
||||
cog_rad = math.radians(cog_cdeg / _CDEG_PER_DEG)
|
||||
vel_m_s = vel_cm_s / _CM_PER_M_S
|
||||
vx_m_s = vel_m_s * math.cos(cog_rad)
|
||||
vy_m_s = vel_m_s * math.sin(cog_rad)
|
||||
return TlogGpsFix(
|
||||
ts_ns=ts_ns,
|
||||
lat_deg=int(getattr(msg, "lat", 0)) * _LATLON_SCALE,
|
||||
lon_deg=int(getattr(msg, "lon", 0)) * _LATLON_SCALE,
|
||||
alt_m=int(getattr(msg, "alt", 0)) / _MM_PER_M,
|
||||
hdg_deg=cog_cdeg / _CDEG_PER_DEG,
|
||||
vx_m_s=vx_m_s,
|
||||
vy_m_s=vy_m_s,
|
||||
vz_m_s=0.0,
|
||||
)
|
||||
|
||||
|
||||
def _open_tlog(
|
||||
tlog_path: Path,
|
||||
*,
|
||||
source_factory: Callable[[str], Any] | None,
|
||||
) -> Any:
|
||||
if source_factory is not None:
|
||||
return source_factory(str(tlog_path))
|
||||
try:
|
||||
from pymavlink import mavutil
|
||||
except ImportError as exc:
|
||||
raise ReplayInputAdapterError(
|
||||
"pymavlink is required for replay tlog ground-truth "
|
||||
"extraction but is not importable in this binary"
|
||||
) from exc
|
||||
return mavutil.mavlink_connection(
|
||||
str(tlog_path),
|
||||
dialect="ardupilotmega",
|
||||
mavlink_version="2.0",
|
||||
)
|
||||
|
||||
|
||||
def _safe_msg_type(msg: Any) -> str:
|
||||
try:
|
||||
if hasattr(msg, "get_type"):
|
||||
return str(msg.get_type())
|
||||
except Exception:
|
||||
return ""
|
||||
return type(msg).__name__
|
||||
|
||||
|
||||
def _msg_timestamp_ns(msg: Any) -> int:
|
||||
raw = getattr(msg, "_timestamp", None)
|
||||
if raw is None:
|
||||
raise ReplayInputAdapterError(
|
||||
"tlog message missing _timestamp attribute; pymavlink "
|
||||
"mavlogfile should populate it on every recv_match() return"
|
||||
)
|
||||
return int(float(raw) * 1_000_000_000)
|
||||
@@ -61,10 +61,12 @@ from gps_denied_onboard.replay_input.auto_sync import (
|
||||
_load_tlog_samples,
|
||||
compute_offset,
|
||||
detect_video_motion_onset,
|
||||
find_aligned_window,
|
||||
validate_offset_or_fail,
|
||||
)
|
||||
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
|
||||
from gps_denied_onboard.replay_input.interface import (
|
||||
AlignedWindow,
|
||||
AutoSyncConfig,
|
||||
AutoSyncDecision,
|
||||
ReplayInputBundle,
|
||||
@@ -86,6 +88,8 @@ _LOG_KIND_AUTO_SYNC_DETECTED = "replay.auto_sync.detected"
|
||||
_LOG_KIND_AUTO_SYNC_LOW_CONF = "replay.auto_sync.low_confidence"
|
||||
_LOG_KIND_AUTO_SYNC_AC8_FAIL = "replay.auto_sync.ac8_validation_failed"
|
||||
_LOG_KIND_OPEN_MANUAL = "replay.input.opened_manual_offset"
|
||||
_LOG_KIND_AUTO_TRIM_RESOLVED = "replay.auto_trim.resolved"
|
||||
_LOG_KIND_AUTO_TRIM_FALLBACK = "replay.auto_trim.fallback_to_takeoff"
|
||||
|
||||
|
||||
class ReplayInputAdapter:
|
||||
@@ -137,6 +141,7 @@ class ReplayInputAdapter:
|
||||
"_pace",
|
||||
"_manual_time_offset_ms",
|
||||
"_skip_auto_sync_validation",
|
||||
"_auto_trim",
|
||||
"_auto_sync_config",
|
||||
"_tlog_source_factory",
|
||||
"_video_frames_factory",
|
||||
@@ -161,6 +166,7 @@ class ReplayInputAdapter:
|
||||
manual_time_offset_ms: int | None,
|
||||
auto_sync_config: AutoSyncConfig,
|
||||
skip_auto_sync_validation: bool = False,
|
||||
auto_trim: bool = False,
|
||||
tlog_source_factory: Any | None = None,
|
||||
video_frames_factory: Any | None = None,
|
||||
video_timestamps_factory: Any | None = None,
|
||||
@@ -199,6 +205,21 @@ class ReplayInputAdapter:
|
||||
"skip_auto_sync_validation=True requires "
|
||||
"manual_time_offset_ms to be set"
|
||||
)
|
||||
if not isinstance(auto_trim, bool):
|
||||
raise ReplayInputAdapterError(
|
||||
"auto_trim must be a bool; got "
|
||||
f"{type(auto_trim).__name__}"
|
||||
)
|
||||
if auto_trim and manual_time_offset_ms is not None:
|
||||
# Mirror the ReplayConfig.__post_init__ gate. An explicit
|
||||
# manual offset means the operator has already aligned
|
||||
# the streams; running the cross-correlation aligner on
|
||||
# top of that would either re-resolve the same window
|
||||
# (wasteful) or overwrite the operator's intent silently.
|
||||
raise ReplayInputAdapterError(
|
||||
"auto_trim=True is mutually exclusive with "
|
||||
"manual_time_offset_ms"
|
||||
)
|
||||
self._video_path = video_path
|
||||
self._tlog_path = tlog_path
|
||||
self._camera_calibration = camera_calibration
|
||||
@@ -208,6 +229,7 @@ class ReplayInputAdapter:
|
||||
self._pace = pace
|
||||
self._manual_time_offset_ms = manual_time_offset_ms
|
||||
self._skip_auto_sync_validation = skip_auto_sync_validation
|
||||
self._auto_trim = auto_trim
|
||||
self._auto_sync_config = auto_sync_config
|
||||
self._tlog_source_factory = tlog_source_factory
|
||||
self._video_frames_factory = video_frames_factory
|
||||
@@ -233,12 +255,20 @@ class ReplayInputAdapter:
|
||||
# surfaces without paying the cv2.VideoCapture cost.
|
||||
tlog_imu_timestamps_ns, tlog_samples_for_auto = self._load_and_validate_tlog()
|
||||
|
||||
# Step 2 — resolve the offset (auto-sync or manual override).
|
||||
# Step 2 — resolve the offset (auto-sync, auto-trim, or
|
||||
# manual override).
|
||||
decision: AutoSyncDecision | None
|
||||
if self._manual_time_offset_ms is None:
|
||||
aligned_window: AlignedWindow | None
|
||||
if self._auto_trim:
|
||||
aligned_window = self._run_auto_trim()
|
||||
decision = None
|
||||
resolved_offset_ms = aligned_window.offset_ms
|
||||
elif self._manual_time_offset_ms is None:
|
||||
aligned_window = None
|
||||
decision = self._run_auto_sync(tlog_samples_for_auto)
|
||||
resolved_offset_ms = decision.offset_ms
|
||||
else:
|
||||
aligned_window = None
|
||||
decision = None
|
||||
resolved_offset_ms = int(self._manual_time_offset_ms)
|
||||
self._log.info(
|
||||
@@ -315,6 +345,11 @@ class ReplayInputAdapter:
|
||||
wgs_converter=self._wgs_converter,
|
||||
fdr_client=self._fdr_client,
|
||||
time_offset_ms=resolved_offset_ms,
|
||||
tlog_start_ns=(
|
||||
aligned_window.tlog_start_ns
|
||||
if aligned_window is not None
|
||||
else None
|
||||
),
|
||||
pace=self._pace,
|
||||
source_factory=self._tlog_source_factory,
|
||||
mavlink_transport=self._mavlink_transport,
|
||||
@@ -345,6 +380,7 @@ class ReplayInputAdapter:
|
||||
clock=clock,
|
||||
resolved_time_offset_ms=resolved_offset_ms,
|
||||
auto_sync_result=decision,
|
||||
aligned_window=aligned_window,
|
||||
)
|
||||
self._bundle = bundle
|
||||
self._opened = True
|
||||
@@ -408,6 +444,54 @@ class ReplayInputAdapter:
|
||||
)
|
||||
return [ts for ts, _ in samples.accel], samples
|
||||
|
||||
def _run_auto_trim(self) -> AlignedWindow:
|
||||
"""AZ-698 auto-trim path — cross-correlate IMU energy ↔ optical flow.
|
||||
|
||||
Returns the located :class:`AlignedWindow`. When the
|
||||
correlation peak falls below
|
||||
:attr:`AutoSyncConfig.alignment_low_confidence_threshold`,
|
||||
:func:`find_aligned_window` falls back to the AZ-405
|
||||
head-takeoff detector and sets ``fallback_used=True`` — the
|
||||
coordinator logs WARN but still proceeds (the
|
||||
AC-9 frame-window validator runs in Step 3 and will
|
||||
hard-fail if the resolved offset is bad).
|
||||
"""
|
||||
window = find_aligned_window(
|
||||
self._tlog_path,
|
||||
self._video_path,
|
||||
self._auto_sync_config,
|
||||
self._target_fc_dialect,
|
||||
tlog_source_factory=self._tlog_source_factory,
|
||||
video_frames_factory=self._video_frames_factory,
|
||||
)
|
||||
kind = (
|
||||
_LOG_KIND_AUTO_TRIM_FALLBACK
|
||||
if window.fallback_used
|
||||
else _LOG_KIND_AUTO_TRIM_RESOLVED
|
||||
)
|
||||
level = "WARN" if window.fallback_used else "INFO"
|
||||
kv = {
|
||||
"tlog_start_ns": window.tlog_start_ns,
|
||||
"tlog_end_ns": window.tlog_end_ns,
|
||||
"offset_ms": window.offset_ms,
|
||||
"confidence": window.confidence,
|
||||
"fallback_used": window.fallback_used,
|
||||
"flight_count_detected": window.flight_count_detected,
|
||||
"selected_flight_index": window.selected_flight_index,
|
||||
}
|
||||
msg = (
|
||||
f"{kind}: tlog_start_ns={window.tlog_start_ns} "
|
||||
f"offset_ms={window.offset_ms} confidence={window.confidence:.3f} "
|
||||
f"flights_detected={window.flight_count_detected} "
|
||||
f"selected_flight={window.selected_flight_index}"
|
||||
)
|
||||
if window.fallback_used:
|
||||
self._log.warning(msg, extra={"kind": kind, "kv": kv})
|
||||
else:
|
||||
self._log.info(msg, extra={"kind": kind, "kv": kv})
|
||||
self._emit_fdr_event(level=level, log_kind=kind, msg=msg, kv=kv)
|
||||
return window
|
||||
|
||||
def _run_auto_sync(self, tlog_samples: Any) -> AutoSyncDecision:
|
||||
"""Auto path — compute the take-off / motion-onset / offset.
|
||||
|
||||
|
||||
@@ -226,6 +226,7 @@ def _build_replay_input_bundle(
|
||||
pace=pace,
|
||||
manual_time_offset_ms=config.replay.time_offset_ms,
|
||||
skip_auto_sync_validation=config.replay.skip_auto_sync_validation,
|
||||
auto_trim=config.replay.auto_trim,
|
||||
auto_sync_config=auto_sync,
|
||||
mavlink_transport=mavlink_transport,
|
||||
)
|
||||
@@ -267,6 +268,18 @@ def _build_auto_sync_config(config: Config) -> AutoSyncConfig:
|
||||
match_threshold_pct=block.match_threshold_pct,
|
||||
match_window_ms=block.match_window_ms,
|
||||
low_confidence_threshold=block.low_confidence_threshold,
|
||||
alignment_resample_hz=block.alignment_resample_hz,
|
||||
alignment_video_scan_seconds=block.alignment_video_scan_seconds,
|
||||
alignment_low_confidence_threshold=block.alignment_low_confidence_threshold,
|
||||
alignment_segment_motion_threshold_g=(
|
||||
block.alignment_segment_motion_threshold_g
|
||||
),
|
||||
alignment_segment_min_flight_duration_seconds=(
|
||||
block.alignment_segment_min_flight_duration_seconds
|
||||
),
|
||||
alignment_segment_max_internal_gap_seconds=(
|
||||
block.alignment_segment_max_internal_gap_seconds
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
+15
-105
@@ -1,18 +1,22 @@
|
||||
"""Helpers shared by the AZ-404 E2E replay tests.
|
||||
|
||||
The numerical kernels (``l2_horizontal_m``, ``match_percentage``,
|
||||
``GroundTruthRow``) moved into production code at
|
||||
:mod:`gps_denied_onboard.helpers.gps_compare` in AZ-697; they're
|
||||
re-exported here so existing import sites stay stable.
|
||||
|
||||
* :func:`parse_jsonl` — read the ``JsonlReplaySink`` output into a list
|
||||
of dicts with one entry per emit.
|
||||
* :func:`l2_horizontal_m` — WGS84-aware L2 horizontal distance between
|
||||
two ``(lat, lon)`` pairs in metres.
|
||||
* :func:`match_percentage` — share of estimator emissions whose
|
||||
L2 distance to the closest ground-truth row is within a threshold.
|
||||
* :class:`CapturingMavlinkTransport` — test-only ``MavlinkTransport``
|
||||
impl that records every ``write`` so AC-4b can compare the byte
|
||||
streams produced by ``compose_root(config_live)`` vs.
|
||||
``compose_root(config_replay)``.
|
||||
* :func:`load_ground_truth_csv` — the IMU CSV's ``GLOBAL_POSITION_INT``
|
||||
columns ARE the AC-3 reference (the original tlog's GPS rows
|
||||
exported to CSV); this helper materialises them.
|
||||
exported to CSV); this helper materialises them. Retained for the
|
||||
CSV-only fallback path; the real-tlog branch uses
|
||||
:func:`gps_denied_onboard.replay_input.load_tlog_ground_truth`
|
||||
instead.
|
||||
|
||||
All functions are pure / deterministic and stay safely importable on
|
||||
dev macOS without ``RUN_REPLAY_E2E``; the regular regression suite
|
||||
@@ -24,11 +28,15 @@ from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from gps_denied_onboard.helpers.gps_compare import (
|
||||
GroundTruthRow,
|
||||
l2_horizontal_m,
|
||||
match_percentage,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"CapturingMavlinkTransport",
|
||||
"GroundTruthRow",
|
||||
@@ -39,22 +47,6 @@ __all__ = [
|
||||
]
|
||||
|
||||
|
||||
# WGS84 mean Earth radius. Matches the value used by
|
||||
# `helpers/wgs_converter.py` (AZ-279) so the e2e check is consistent
|
||||
# with the production converter.
|
||||
_EARTH_RADIUS_M: float = 6_371_008.8
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GroundTruthRow:
|
||||
"""One row from the Derkachi data_imu.csv ground-truth slice."""
|
||||
|
||||
t_s: float
|
||||
lat_deg: float
|
||||
lon_deg: float
|
||||
alt_m: float
|
||||
|
||||
|
||||
def parse_jsonl(path: Path) -> list[dict[str, Any]]:
|
||||
"""Return one dict per line of a JsonlReplaySink output file.
|
||||
|
||||
@@ -77,29 +69,6 @@ def parse_jsonl(path: Path) -> list[dict[str, Any]]:
|
||||
return records
|
||||
|
||||
|
||||
def l2_horizontal_m(
|
||||
lat1_deg: float, lon1_deg: float, lat2_deg: float, lon2_deg: float
|
||||
) -> float:
|
||||
"""WGS84-spherical great-circle distance in metres.
|
||||
|
||||
Uses the haversine formula with the C5/AZ-279 mean Earth radius.
|
||||
Sufficient for the AC-3 ≤ 100 m threshold (sub-metre accuracy at
|
||||
the Derkachi latitude band; the spherical approximation diverges
|
||||
from the WGS84 ellipsoid by < 0.5 % at these latitudes — well
|
||||
within the AC-3 budget).
|
||||
"""
|
||||
phi1 = math.radians(lat1_deg)
|
||||
phi2 = math.radians(lat2_deg)
|
||||
dphi = phi2 - phi1
|
||||
dlam = math.radians(lon2_deg - lon1_deg)
|
||||
a = (
|
||||
math.sin(dphi / 2.0) ** 2
|
||||
+ math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2.0) ** 2
|
||||
)
|
||||
c = 2.0 * math.asin(min(1.0, math.sqrt(a)))
|
||||
return _EARTH_RADIUS_M * c
|
||||
|
||||
|
||||
def load_ground_truth_csv(csv_path: Path) -> list[GroundTruthRow]:
|
||||
"""Load the Derkachi IMU CSV's GPS rows as ground truth.
|
||||
|
||||
@@ -123,65 +92,6 @@ def load_ground_truth_csv(csv_path: Path) -> list[GroundTruthRow]:
|
||||
return rows
|
||||
|
||||
|
||||
def match_percentage(
|
||||
emissions: list[dict[str, Any]],
|
||||
ground_truth: list[GroundTruthRow],
|
||||
*,
|
||||
threshold_m: float,
|
||||
) -> float:
|
||||
"""Share of emissions within ``threshold_m`` of the closest GT row.
|
||||
|
||||
For each emitted ``EstimatorOutput`` JSONL record, find the
|
||||
nearest-in-time ground-truth row, compute the horizontal L2
|
||||
distance, and count it as a hit when ≤ ``threshold_m``. Returns
|
||||
the hit ratio in [0.0, 1.0].
|
||||
|
||||
Nearest-in-time is sufficient because the IMU CSV's 10 Hz cadence
|
||||
(matching the C5 emit rate) means the candidate row is typically
|
||||
< 50 ms off the emit timestamp — well below the AC-3 100 m budget.
|
||||
"""
|
||||
if not emissions:
|
||||
return 0.0
|
||||
if not ground_truth:
|
||||
raise AssertionError("ground_truth must be non-empty")
|
||||
gt_sorted = sorted(ground_truth, key=lambda r: r.t_s)
|
||||
gt_times = [r.t_s for r in gt_sorted]
|
||||
hits = 0
|
||||
for emit in emissions:
|
||||
emit_ts_ns = int(emit["emitted_at"])
|
||||
emit_t_s = emit_ts_ns / 1e9
|
||||
idx = _bisect_left(gt_times, emit_t_s)
|
||||
candidates = []
|
||||
if idx > 0:
|
||||
candidates.append(gt_sorted[idx - 1])
|
||||
if idx < len(gt_sorted):
|
||||
candidates.append(gt_sorted[idx])
|
||||
# Nearest-in-time row.
|
||||
nearest = min(candidates, key=lambda r: abs(r.t_s - emit_t_s))
|
||||
emit_pos = emit["position_wgs84"]
|
||||
d = l2_horizontal_m(
|
||||
emit_pos["lat_deg"],
|
||||
emit_pos["lon_deg"],
|
||||
nearest.lat_deg,
|
||||
nearest.lon_deg,
|
||||
)
|
||||
if d <= threshold_m:
|
||||
hits += 1
|
||||
return hits / len(emissions)
|
||||
|
||||
|
||||
def _bisect_left(seq: list[float], target: float) -> int:
|
||||
"""Stdlib bisect_left, inlined to keep import surface narrow."""
|
||||
lo, hi = 0, len(seq)
|
||||
while lo < hi:
|
||||
mid = (lo + hi) // 2
|
||||
if seq[mid] < target:
|
||||
lo = mid + 1
|
||||
else:
|
||||
hi = mid
|
||||
return lo
|
||||
|
||||
|
||||
class CapturingMavlinkTransport:
|
||||
"""Test-only :class:`MavlinkTransport` that records every write.
|
||||
|
||||
|
||||
@@ -21,18 +21,21 @@ from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard.replay_input import load_tlog_ground_truth
|
||||
from tests.e2e.replay._helpers import GroundTruthRow, load_ground_truth_csv
|
||||
from tests.e2e.replay._tlog_synth import synthesize_tlog
|
||||
|
||||
|
||||
# Derkachi clip range — anchored at the start of the data_imu.csv
|
||||
# (Time=0.0). The fixture clip is deliberately the first 60 s rather
|
||||
# than a mid-flight slice: the take-off region exercises the AZ-405
|
||||
# IMU-take-off auto-sync detector, and the steady cruise that follows
|
||||
# stresses the satellite-anchor + VIO drift-correction path. The
|
||||
# trim is documented in `tests/e2e/replay/README.md`.
|
||||
_CLIP_START_S: float = 0.0
|
||||
_CLIP_END_S: float = 60.0
|
||||
# Derkachi clip range — 60 s starting at the start of the GT series.
|
||||
# For the CSV-synth fallback, the series begins at Time=0.0; for the
|
||||
# real-tlog branch, the series begins at the wall-clock timestamp of
|
||||
# the first GPS message (and the clip becomes [t0, t0 + 60]). The
|
||||
# fixture clip is deliberately the first 60 s rather than a mid-flight
|
||||
# slice: the take-off region exercises the AZ-405 IMU-take-off
|
||||
# auto-sync detector, and the steady cruise that follows stresses the
|
||||
# satellite-anchor + VIO drift-correction path. The trim is documented
|
||||
# in `tests/e2e/replay/README.md`.
|
||||
_CLIP_DURATION_S: float = 60.0
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
@@ -48,11 +51,15 @@ def _derkachi_dir() -> Path:
|
||||
|
||||
|
||||
def _calibration_path() -> Path:
|
||||
# Placeholder calibration: the real Topotek KHP20S30 intrinsics
|
||||
# are unknown per `_docs/00_problem/input_data/flight_derkachi/
|
||||
# camera_info.md`. AC-3 is `xfail`ed until a real calibration
|
||||
# ships; AC-1 / AC-2 / AC-5 / AC-6 do not depend on intrinsics
|
||||
# accuracy.
|
||||
# AZ-702 ships a factory-sheet approximation for the Topotek
|
||||
# KHP20S30 nadir camera at
|
||||
# `_docs/00_problem/input_data/flight_derkachi/khp20s30_factory.json`.
|
||||
# When present we use it; otherwise we fall back to the
|
||||
# `adti26.json` placeholder so the AC-1/2/5/6 path stays
|
||||
# exercisable on dev macOS without the AZ-702 deliverable.
|
||||
factory_path = _derkachi_dir() / "khp20s30_factory.json"
|
||||
if factory_path.is_file():
|
||||
return factory_path
|
||||
return _repo_root() / "tests" / "fixtures" / "calibration" / "adti26.json"
|
||||
|
||||
|
||||
@@ -87,17 +94,45 @@ def derkachi_replay_inputs(tmp_path_factory: pytest.TempPathFactory) -> Derkachi
|
||||
derkachi = _derkachi_dir()
|
||||
csv_path = derkachi / "data_imu.csv"
|
||||
video_path = derkachi / "flight_derkachi.mp4"
|
||||
if not csv_path.is_file():
|
||||
pytest.fail(
|
||||
f"Derkachi fixture missing: {csv_path} — see "
|
||||
"_docs/00_problem/input_data/flight_derkachi/README.md"
|
||||
)
|
||||
real_tlog_path = derkachi / "derkachi.tlog"
|
||||
if not video_path.is_file():
|
||||
pytest.fail(f"Derkachi fixture missing: {video_path}")
|
||||
|
||||
work_dir = tmp_path_factory.mktemp("derkachi")
|
||||
tlog_path = work_dir / "synth.tlog"
|
||||
synthesize_tlog(csv_path, tlog_path)
|
||||
# AZ-697: prefer the real binary tlog when present; fall back to
|
||||
# synthesizing one from the CSV so dev environments without the
|
||||
# 5.8 MB binary blob still exercise the e2e path.
|
||||
if real_tlog_path.is_file():
|
||||
tlog_path = real_tlog_path
|
||||
gt_series = load_tlog_ground_truth(real_tlog_path).records
|
||||
if gt_series:
|
||||
t0_s = gt_series[0].ts_ns / 1e9
|
||||
ground_truth_full = [
|
||||
GroundTruthRow(
|
||||
t_s=fix.ts_ns / 1e9,
|
||||
lat_deg=fix.lat_deg,
|
||||
lon_deg=fix.lon_deg,
|
||||
alt_m=fix.alt_m,
|
||||
)
|
||||
for fix in gt_series
|
||||
]
|
||||
clip_start_s = t0_s
|
||||
clip_end_s = t0_s + _CLIP_DURATION_S
|
||||
else:
|
||||
ground_truth_full = []
|
||||
clip_start_s = 0.0
|
||||
clip_end_s = _CLIP_DURATION_S
|
||||
else:
|
||||
if not csv_path.is_file():
|
||||
pytest.fail(
|
||||
f"Derkachi fixture missing: {csv_path} — see "
|
||||
"_docs/00_problem/input_data/flight_derkachi/README.md"
|
||||
)
|
||||
tlog_path = work_dir / "synth.tlog"
|
||||
synthesize_tlog(csv_path, tlog_path)
|
||||
ground_truth_full = load_ground_truth_csv(csv_path)
|
||||
clip_start_s = 0.0
|
||||
clip_end_s = _CLIP_DURATION_S
|
||||
|
||||
# Empty signing key — the airborne replay path runs the signing
|
||||
# handshake against `NoopMavlinkTransport`, so the key contents do
|
||||
@@ -118,9 +153,8 @@ def derkachi_replay_inputs(tmp_path_factory: pytest.TempPathFactory) -> Derkachi
|
||||
|
||||
output_path = work_dir / "estimator_output.jsonl"
|
||||
|
||||
ground_truth_full = load_ground_truth_csv(csv_path)
|
||||
ground_truth = [
|
||||
r for r in ground_truth_full if _CLIP_START_S <= r.t_s <= _CLIP_END_S
|
||||
r for r in ground_truth_full if clip_start_s <= r.t_s <= clip_end_s
|
||||
]
|
||||
|
||||
return DerkachiReplayInputs(
|
||||
|
||||
@@ -0,0 +1,321 @@
|
||||
"""AZ-699 — Real-flight validation runner + accuracy report.
|
||||
|
||||
Runs the ``gps-denied-replay`` console-script against the **real**
|
||||
``derkachi.tlog`` (binary, not the AZ-265 synth) plus the real
|
||||
flight video and the AZ-702 factory-sheet camera calibration,
|
||||
computes the full horizontal-error distribution, and writes a
|
||||
structured Markdown accuracy report to
|
||||
``_docs/06_metrics/real_flight_validation_{YYYY-MM-DD}.md``.
|
||||
|
||||
Unlike the AZ-404 1-min test (``test_derkachi_1min.py``), this one
|
||||
emits an **honest** PASS/FAIL verdict — no ``@xfail`` mask. The
|
||||
threshold gate is ≥ 80 % of emissions within 100 m (AZ-696 epic
|
||||
AC-3). When the run fails, the failure message references the
|
||||
calibration acquisition method (factory-sheet for AZ-702 or the
|
||||
``adti26.json`` placeholder) so the root cause is attributable
|
||||
without re-reading the source.
|
||||
|
||||
The test SKIPS (not ``@xfail``) when any prerequisite is absent:
|
||||
|
||||
* ``RUN_REPLAY_E2E`` env var not set (matches AZ-404 gating).
|
||||
* Real ``derkachi.tlog`` binary missing.
|
||||
* Real ``flight_derkachi.mp4`` missing OR placeholder-sized
|
||||
(the repo currently ships a 134-byte placeholder for the
|
||||
AZ-404 fixture; this AZ-699 path requires a decodable video).
|
||||
* ``gps-denied-replay`` console-script not installed.
|
||||
|
||||
Style: every test follows the Arrange / Act / Assert pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard.helpers.gps_compare import (
|
||||
GroundTruthRow,
|
||||
horizontal_error_distribution,
|
||||
)
|
||||
from gps_denied_onboard.replay_input import load_tlog_ground_truth
|
||||
from gps_denied_onboard.helpers.accuracy_report import (
|
||||
AC3_GATE_PCT,
|
||||
AC3_GATE_THRESHOLD_M,
|
||||
ReportContext,
|
||||
format_failure_message,
|
||||
render_report,
|
||||
verdict_passes_ac3,
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Skip gates
|
||||
|
||||
|
||||
def _repo_root() -> Path:
|
||||
return Path(__file__).resolve().parents[3]
|
||||
|
||||
|
||||
def _derkachi_dir() -> Path:
|
||||
return _repo_root() / "_docs" / "00_problem" / "input_data" / "flight_derkachi"
|
||||
|
||||
|
||||
# Real video must be more than the placeholder size to actually
|
||||
# decode. The current placeholder is 134 bytes; bump the floor to
|
||||
# 1 MB so any real recording trips the gate but the placeholder
|
||||
# never does.
|
||||
_MIN_REAL_VIDEO_BYTES: int = 1_000_000
|
||||
|
||||
|
||||
def _real_inputs_present() -> tuple[bool, str]:
|
||||
derkachi = _derkachi_dir()
|
||||
tlog = derkachi / "derkachi.tlog"
|
||||
video = derkachi / "flight_derkachi.mp4"
|
||||
calibration = derkachi / "khp20s30_factory.json"
|
||||
if not tlog.is_file():
|
||||
return False, f"real tlog missing: {tlog}"
|
||||
if not video.is_file():
|
||||
return False, f"real video missing: {video}"
|
||||
if video.stat().st_size < _MIN_REAL_VIDEO_BYTES:
|
||||
return (
|
||||
False,
|
||||
f"video at {video} is only {video.stat().st_size} bytes — "
|
||||
"placeholder, not a real recording. Drop the real "
|
||||
"flight_derkachi.mp4 into the fixture dir to enable AZ-699.",
|
||||
)
|
||||
if not calibration.is_file():
|
||||
return False, (
|
||||
f"calibration missing: {calibration}; ship the AZ-702 "
|
||||
"factory-sheet calibration to enable AZ-699."
|
||||
)
|
||||
return True, ""
|
||||
|
||||
|
||||
def _heavy_skip_reason() -> str | None:
|
||||
if os.environ.get("RUN_REPLAY_E2E", "").lower() not in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}:
|
||||
return "AZ-699 e2e test gated by RUN_REPLAY_E2E=1"
|
||||
return None
|
||||
|
||||
|
||||
def _binary_present() -> tuple[bool, str | None]:
|
||||
binary = shutil.which("gps-denied-replay")
|
||||
if binary is not None:
|
||||
return True, binary
|
||||
venv_bin = Path(sys.executable).parent / "gps-denied-replay"
|
||||
if venv_bin.exists():
|
||||
return True, str(venv_bin)
|
||||
return False, None
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Helpers
|
||||
|
||||
|
||||
def _read_calibration_acquisition_method(calibration_path: Path) -> str:
|
||||
"""Extract the ``acquisition_method`` field set by AZ-702.
|
||||
|
||||
AZ-702 documents that the camera calibration JSON carries an
|
||||
``acquisition_method`` string ("factory-sheet" for the
|
||||
KHP20S30 factory deliverable). When the field is absent we
|
||||
label the run as ``"unknown"`` so the failure message stays
|
||||
honest — pretending the calibration is factory-sheet when it
|
||||
is actually a placeholder would mis-attribute the residual.
|
||||
"""
|
||||
try:
|
||||
data = json.loads(calibration_path.read_text())
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return "unknown"
|
||||
method = data.get("acquisition_method")
|
||||
if isinstance(method, str) and method:
|
||||
return method
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _parse_jsonl(path: Path) -> list[dict[str, Any]]:
|
||||
rows: list[dict[str, Any]] = []
|
||||
for line in path.read_text().splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
rows.append(json.loads(line))
|
||||
return rows
|
||||
|
||||
|
||||
def _load_full_ground_truth(tlog_path: Path) -> list[GroundTruthRow]:
|
||||
series = load_tlog_ground_truth(tlog_path).records
|
||||
return [
|
||||
GroundTruthRow(
|
||||
t_s=fix.ts_ns / 1e9,
|
||||
lat_deg=fix.lat_deg,
|
||||
lon_deg=fix.lon_deg,
|
||||
alt_m=fix.alt_m,
|
||||
)
|
||||
for fix in series
|
||||
]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# AC-1 + AC-2 + AC-3: full real-flight run + honest verdict + report
|
||||
|
||||
|
||||
@pytest.mark.tier2
|
||||
def test_az699_real_flight_validation_emits_verdict_and_report(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
# Arrange: gate cleanly on missing prerequisites (no xfail).
|
||||
heavy_reason = _heavy_skip_reason()
|
||||
if heavy_reason is not None:
|
||||
pytest.skip(heavy_reason)
|
||||
inputs_ok, inputs_reason = _real_inputs_present()
|
||||
if not inputs_ok:
|
||||
pytest.skip(inputs_reason)
|
||||
binary_ok, binary = _binary_present()
|
||||
if not binary_ok or binary is None:
|
||||
pytest.skip("gps-denied-replay console-script not installed")
|
||||
|
||||
derkachi = _derkachi_dir()
|
||||
tlog_path = derkachi / "derkachi.tlog"
|
||||
video_path = derkachi / "flight_derkachi.mp4"
|
||||
calibration_path = derkachi / "khp20s30_factory.json"
|
||||
output_path = tmp_path / "estimator_output.jsonl"
|
||||
|
||||
# Signing-key stub (NoopMavlinkTransport ignores bytes; path
|
||||
# validation just needs the file to exist).
|
||||
signing_key_path = tmp_path / "signing_key.bin"
|
||||
signing_key_path.write_bytes(b"\x00" * 32)
|
||||
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text(
|
||||
"mode: replay\n"
|
||||
"replay:\n"
|
||||
" pace: asap\n"
|
||||
" target_fc_dialect: ardupilot_plane\n"
|
||||
)
|
||||
|
||||
argv = [
|
||||
binary,
|
||||
"--video",
|
||||
str(video_path),
|
||||
"--tlog",
|
||||
str(tlog_path),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--camera-calibration",
|
||||
str(calibration_path),
|
||||
"--config",
|
||||
str(config_path),
|
||||
"--mavlink-signing-key",
|
||||
str(signing_key_path),
|
||||
"--pace",
|
||||
"asap",
|
||||
# AZ-698: let the segmenter + NCC find the last flight
|
||||
# window in the multi-flight tlog. No manual offset.
|
||||
"--auto-trim",
|
||||
]
|
||||
|
||||
# Act
|
||||
t0 = time.monotonic()
|
||||
completed = subprocess.run(
|
||||
argv,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=900, # 15-min NFR budget per AZ-699 spec
|
||||
)
|
||||
wall_clock_s = time.monotonic() - t0
|
||||
|
||||
assert completed.returncode == 0, (
|
||||
f"gps-denied-replay exited {completed.returncode}\n"
|
||||
f"stdout:\n{completed.stdout}\nstderr:\n{completed.stderr}"
|
||||
)
|
||||
assert wall_clock_s <= 900.0, (
|
||||
f"AZ-699 NFR: real-flight run took {wall_clock_s:.1f} s; "
|
||||
"budget is 15 min"
|
||||
)
|
||||
|
||||
emissions = _parse_jsonl(output_path)
|
||||
assert emissions, "no JSONL emissions produced — pipeline failure"
|
||||
|
||||
ground_truth = _load_full_ground_truth(tlog_path)
|
||||
assert ground_truth, "ground-truth extraction yielded zero rows"
|
||||
distribution = horizontal_error_distribution(
|
||||
emissions, ground_truth
|
||||
)
|
||||
assert distribution.count > 0, (
|
||||
"no emissions paired with ground truth — JSONL timestamps "
|
||||
"outside the tlog GPS window?"
|
||||
)
|
||||
|
||||
context = ReportContext(
|
||||
run_date_utc=datetime.datetime.now(datetime.timezone.utc)
|
||||
.date()
|
||||
.isoformat(),
|
||||
tlog_path=tlog_path,
|
||||
video_path=video_path,
|
||||
calibration_acquisition_method=_read_calibration_acquisition_method(
|
||||
calibration_path
|
||||
),
|
||||
clip_duration_s=(
|
||||
(ground_truth[-1].t_s - ground_truth[0].t_s)
|
||||
if ground_truth
|
||||
else 0.0
|
||||
),
|
||||
emissions_count=len(emissions),
|
||||
)
|
||||
|
||||
passed = verdict_passes_ac3(distribution)
|
||||
report_text = render_report(distribution, context, passed=passed)
|
||||
|
||||
report_dir = _repo_root() / "_docs" / "06_metrics"
|
||||
report_dir.mkdir(parents=True, exist_ok=True)
|
||||
report_path = (
|
||||
report_dir / f"real_flight_validation_{context.run_date_utc}.md"
|
||||
)
|
||||
report_path.write_text(report_text)
|
||||
|
||||
# Assert AC-2: the report exists and contains all required rows.
|
||||
assert report_path.is_file()
|
||||
written = report_path.read_text()
|
||||
assert "## Horizontal error (metres)" in written
|
||||
assert "## Threshold-hit share" in written
|
||||
assert "Mean" in written
|
||||
assert "p50" in written
|
||||
assert "p95" in written
|
||||
assert "p99" in written
|
||||
for threshold in (10, 25, 50, 100):
|
||||
assert f"| {threshold} |" in written, (
|
||||
f"threshold {threshold} m row missing from report"
|
||||
)
|
||||
|
||||
# Assert AC-1 + AC-3: honest verdict. On FAIL, the message must
|
||||
# reference the calibration method (AZ-699 AC-3).
|
||||
if not passed:
|
||||
msg = format_failure_message(distribution, context)
|
||||
pytest.fail(
|
||||
f"{msg}\n\nReport: {report_path}\n"
|
||||
f"Run wall-clock: {wall_clock_s:.1f} s"
|
||||
)
|
||||
|
||||
# PASS path — emit a concise summary so the CI log carries the
|
||||
# numbers without forcing the operator to open the report.
|
||||
share_100m = distribution.threshold_hit_share[AC3_GATE_THRESHOLD_M] * 100.0
|
||||
assert share_100m >= AC3_GATE_PCT # guarded by verdict_passes_ac3
|
||||
print(
|
||||
f"AZ-699 PASS — {share_100m:.1f} % within "
|
||||
f"{AC3_GATE_THRESHOLD_M:.0f} m, mean="
|
||||
f"{distribution.horizontal_error_mean_m:.1f} m, p95="
|
||||
f"{distribution.horizontal_error_p95_m:.1f} m. "
|
||||
f"Report at {report_path}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
@@ -0,0 +1,184 @@
|
||||
"""AZ-702 — Topotek KHP20S30 factory-sheet calibration.
|
||||
|
||||
Covers AC-1, AC-3, AC-4 of
|
||||
``_docs/02_tasks/todo/AZ-702_khp20s30_calibration.md``:
|
||||
|
||||
* AC-1 (JSON parses against the project schema) — same loader gate the
|
||||
CLI ``replay.py::_load_calibration_json`` uses.
|
||||
* AC-3 (field values match factory inputs) — ``fx == fy`` (square
|
||||
pixels), principal point at image centre, zero distortion.
|
||||
* AC-4 (T3 consumes this calibration) — covered by
|
||||
``tests/e2e/replay/conftest.py::_calibration_path()`` returning this
|
||||
file when present, exercised once T3 (AZ-699) lands.
|
||||
|
||||
AC-2 (`camera_info.md` updated) is a documentation AC and is verified
|
||||
by inspection during code review; it does not lend itself to a runtime
|
||||
assertion beyond the file-existence smoke test below.
|
||||
|
||||
Style: every test follows the Arrange / Act / Assert pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
_FACTORY_JSON_PATH = (
|
||||
Path(__file__).resolve().parents[3]
|
||||
/ "_docs"
|
||||
/ "00_problem"
|
||||
/ "input_data"
|
||||
/ "flight_derkachi"
|
||||
/ "khp20s30_factory.json"
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def calibration_data() -> dict[str, Any]:
|
||||
text = _FACTORY_JSON_PATH.read_text(encoding="utf-8")
|
||||
return json.loads(text)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-1: JSON parses via the project's calibration schema gate
|
||||
|
||||
|
||||
def test_ac1_required_schema_keys_present(
|
||||
calibration_data: dict[str, Any],
|
||||
) -> None:
|
||||
"""Same gate ``cli/replay.py::_load_calibration_json`` enforces."""
|
||||
# Assert
|
||||
for key in ("intrinsics_3x3", "distortion", "body_to_camera_se3"):
|
||||
assert key in calibration_data, f"missing required key: {key}"
|
||||
|
||||
|
||||
def test_ac1_cli_loader_accepts_the_json(
|
||||
calibration_data: dict[str, Any],
|
||||
) -> None:
|
||||
"""The CLI's strict loader (replay.py) returns without raising."""
|
||||
# Arrange
|
||||
from gps_denied_onboard.cli.replay import _load_calibration_json
|
||||
|
||||
# Act
|
||||
loaded = _load_calibration_json(_FACTORY_JSON_PATH)
|
||||
|
||||
# Assert
|
||||
assert loaded == calibration_data
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-3: Field values match the documented factory inputs
|
||||
|
||||
|
||||
def test_ac3_intrinsics_square_pixels_and_centred_principal_point(
|
||||
calibration_data: dict[str, Any],
|
||||
) -> None:
|
||||
# Arrange
|
||||
img_w, img_h = 1920, 1080
|
||||
sensor_w_mm = 5.37
|
||||
focal_mm = 4.7
|
||||
expected_f = focal_mm * (img_w / sensor_w_mm)
|
||||
K = calibration_data["intrinsics_3x3"]
|
||||
|
||||
# Assert — square pixels (fx == fy) and principal point at image centre.
|
||||
fx, fy, cx, cy = K[0][0], K[1][1], K[0][2], K[1][2]
|
||||
assert fx == pytest.approx(fy, rel=1e-12), "expected fx == fy (square pixels)"
|
||||
assert fx == pytest.approx(expected_f, rel=1e-3), (
|
||||
f"fx {fx} does not match factory-sheet derivation "
|
||||
f"f * width/sensor_w = {expected_f}"
|
||||
)
|
||||
assert cx == pytest.approx(img_w / 2, abs=0.5)
|
||||
assert cy == pytest.approx(img_h / 2, abs=0.5)
|
||||
# Off-diagonal entries are zero (no skew).
|
||||
assert K[0][1] == 0.0
|
||||
assert K[1][0] == 0.0
|
||||
assert K[2] == [0.0, 0.0, 1.0]
|
||||
|
||||
|
||||
def test_ac3_distortion_all_zero_for_factory_sheet(
|
||||
calibration_data: dict[str, Any],
|
||||
) -> None:
|
||||
# Assert — factory-sheet approximation skips per-unit distortion.
|
||||
assert calibration_data["distortion"] == [0.0, 0.0, 0.0, 0.0, 0.0]
|
||||
|
||||
|
||||
def test_ac3_body_to_camera_is_identity_for_nadir(
|
||||
calibration_data: dict[str, Any],
|
||||
) -> None:
|
||||
# Arrange
|
||||
expected = [
|
||||
[1.0, 0.0, 0.0, 0.0],
|
||||
[0.0, 1.0, 0.0, 0.0],
|
||||
[0.0, 0.0, 1.0, 0.0],
|
||||
[0.0, 0.0, 0.0, 1.0],
|
||||
]
|
||||
|
||||
# Assert
|
||||
assert calibration_data["body_to_camera_se3"] == expected
|
||||
|
||||
|
||||
def test_ac3_acquisition_method_is_factory_sheet(
|
||||
calibration_data: dict[str, Any],
|
||||
) -> None:
|
||||
# Assert
|
||||
assert calibration_data["acquisition_method"] == "factory_sheet"
|
||||
|
||||
|
||||
def test_metadata_documents_assumptions(
|
||||
calibration_data: dict[str, Any],
|
||||
) -> None:
|
||||
"""Metadata must capture the factory inputs that produced K."""
|
||||
# Arrange
|
||||
meta = calibration_data["metadata"]
|
||||
|
||||
# Assert
|
||||
assert meta["model"] == "Topotek KHP20S30"
|
||||
assert meta["image_resolution_px"] == [1920, 1080]
|
||||
assert meta["assumed_focal_length_mm"] == 4.7
|
||||
assert meta["sensor_width_mm"] == 5.37
|
||||
assert meta["residual_budget_pct"] > 0.0
|
||||
assert "task" in meta and meta["task"] == "AZ-702"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-2 sanity: camera_info.md exists and references this calibration
|
||||
|
||||
|
||||
def test_camera_info_md_references_calibration() -> None:
|
||||
# Arrange
|
||||
camera_info = (
|
||||
Path(__file__).resolve().parents[3]
|
||||
/ "_docs"
|
||||
/ "00_problem"
|
||||
/ "input_data"
|
||||
/ "flight_derkachi"
|
||||
/ "camera_info.md"
|
||||
)
|
||||
|
||||
# Act
|
||||
text = camera_info.read_text(encoding="utf-8")
|
||||
|
||||
# Assert
|
||||
assert "khp20s30_factory.json" in text
|
||||
assert "factory_sheet" in text or "factory-sheet" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-4 sanity: T3 will pick up this calibration when present
|
||||
|
||||
|
||||
def test_ac4_conftest_picks_up_factory_calibration() -> None:
|
||||
"""``tests/e2e/replay/conftest.py::_calibration_path()`` prefers this
|
||||
file when present (the T3 / AZ-699 entry-point)."""
|
||||
# Arrange
|
||||
from tests.e2e.replay.conftest import _calibration_path
|
||||
|
||||
# Act
|
||||
path = _calibration_path()
|
||||
|
||||
# Assert — the factory JSON is committed; conftest must prefer it.
|
||||
assert path == _FACTORY_JSON_PATH
|
||||
@@ -0,0 +1,663 @@
|
||||
"""AZ-701 — replay_api unit tests.
|
||||
|
||||
Covers the AC matrix without invoking the real `gps-denied-replay`
|
||||
subprocess. A fake `ReplayRunner` writes deterministic emissions
|
||||
into the per-job output dir; everything downstream (job state,
|
||||
HTTP handlers, magic-byte validation, auth, concurrency) is then
|
||||
exercised against real FastAPI routing via `httpx.AsyncClient`.
|
||||
|
||||
FastAPI / uvicorn / python-multipart are operator-only deps —
|
||||
the whole module skips cleanly when any is missing.
|
||||
|
||||
Style: every test follows Arrange / Act / Assert.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
fastapi = pytest.importorskip(
|
||||
"fastapi",
|
||||
reason="FastAPI is an operator-only dep; install gps-denied-onboard[operator-tools]",
|
||||
)
|
||||
pytest.importorskip("httpx", reason="httpx required for the FastAPI TestClient")
|
||||
pytest.importorskip("multipart", reason="python-multipart required by FastAPI")
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from gps_denied_onboard.replay_api import (
|
||||
JobState,
|
||||
create_app,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.handlers import (
|
||||
validate_tlog_kind,
|
||||
validate_video_kind,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.interface import (
|
||||
ReplayInputs,
|
||||
ReplayJobResult,
|
||||
)
|
||||
from gps_denied_onboard.replay_api.jobs import JobRegistry
|
||||
from gps_denied_onboard.replay_api.storage import StorageRoot
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Fixtures + fakes
|
||||
|
||||
|
||||
class _FakeRunner:
|
||||
"""Deterministic runner that writes a single emissions row."""
|
||||
|
||||
def __init__(self, *, delay_s: float = 0.0, fail: bool = False) -> None:
|
||||
self.delay_s = delay_s
|
||||
self.fail = fail
|
||||
self.calls: list[ReplayInputs] = []
|
||||
|
||||
def run(self, inputs: ReplayInputs, *, output_dir: Path) -> ReplayJobResult:
|
||||
self.calls.append(inputs)
|
||||
if self.delay_s:
|
||||
time.sleep(self.delay_s)
|
||||
if self.fail:
|
||||
raise RuntimeError("fake runner forced failure")
|
||||
emissions = output_dir / "emissions.jsonl"
|
||||
emissions.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"frame_id": 0,
|
||||
"position_wgs84": {
|
||||
"lat_deg": 50.0,
|
||||
"lon_deg": 30.0,
|
||||
"alt_m": 100.0,
|
||||
},
|
||||
"emitted_at": 0,
|
||||
}
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
report = output_dir / "accuracy_report.md"
|
||||
report.write_text("# Fake report\n\n**Verdict**: PASS\n")
|
||||
map_html = output_dir / "map.html"
|
||||
map_html.write_text("<!DOCTYPE html><html><body>fake map</body></html>")
|
||||
return ReplayJobResult(
|
||||
emissions_jsonl_path=emissions,
|
||||
accuracy_report_md_path=report,
|
||||
map_html_path=map_html,
|
||||
)
|
||||
|
||||
|
||||
def _valid_tlog_bytes() -> bytes:
|
||||
"""First 8 bytes are a microsecond timestamp; byte 8 = MAVLink magic."""
|
||||
return b"\x00\x00\x00\x00\x00\x00\x00\x00\xfd" + b"\x00" * 32
|
||||
|
||||
|
||||
def _valid_mp4_bytes() -> bytes:
|
||||
"""ISO mp4: any size prefix + 'ftyp' marker at offset 4."""
|
||||
return b"\x00\x00\x00\x20ftypisom\x00\x00\x02\x00mp42" + b"\x00" * 16
|
||||
|
||||
|
||||
def _valid_calibration_bytes() -> bytes:
|
||||
return b'{"focal_length": 1, "acquisition_method": "factory-sheet"}'
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _disable_auth_by_default(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
|
||||
monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "false")
|
||||
monkeypatch.delenv("REPLAY_API_BEARER_TOKEN", raising=False)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def storage(tmp_path: Path) -> StorageRoot:
|
||||
return StorageRoot(tmp_path / "replay_api")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_runner() -> _FakeRunner:
|
||||
return _FakeRunner()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_app(
|
||||
storage: StorageRoot,
|
||||
) -> Any:
|
||||
def _factory(
|
||||
runner: Any,
|
||||
*,
|
||||
max_concurrent: int = 1,
|
||||
max_queued: int = 8,
|
||||
sync_max_bytes: int = 10_000_000,
|
||||
) -> Any:
|
||||
registry = JobRegistry(
|
||||
runner=runner,
|
||||
storage=storage,
|
||||
max_concurrent=max_concurrent,
|
||||
max_queued=max_queued,
|
||||
)
|
||||
return create_app(
|
||||
runner=runner,
|
||||
storage=storage,
|
||||
registry=registry,
|
||||
sync_max_bytes=sync_max_bytes,
|
||||
)
|
||||
|
||||
return _factory
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Magic-byte validation (AC-9)
|
||||
|
||||
|
||||
def test_validate_tlog_kind_accepts_mavlink_v2_magic() -> None:
|
||||
# Act / Assert — must not raise
|
||||
validate_tlog_kind(_valid_tlog_bytes())
|
||||
|
||||
|
||||
def test_validate_tlog_kind_rejects_zip_renamed_to_tlog() -> None:
|
||||
# Arrange — ZIP magic bytes at offset 0; pre-bytes 0..7 are
|
||||
# the (forged) timestamp; byte 8 holds the (non-MAVLink) magic.
|
||||
bogus = b"\x00\x00\x00\x00\x00\x00\x00\x00PK\x03\x04rest_of_zip_header"
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(Exception) as exc:
|
||||
validate_tlog_kind(bogus)
|
||||
assert "MAVLink" in str(exc.value)
|
||||
|
||||
|
||||
def test_validate_video_kind_accepts_mp4_ftyp() -> None:
|
||||
validate_video_kind(_valid_mp4_bytes())
|
||||
|
||||
|
||||
def test_validate_video_kind_rejects_arbitrary_bytes() -> None:
|
||||
with pytest.raises(Exception) as exc:
|
||||
validate_video_kind(b"\x00" * 64)
|
||||
assert "ftyp" in str(exc.value)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-1 — sync POST → 200 + JSONL
|
||||
|
||||
|
||||
def test_post_replay_sync_returns_200_with_result_urls(
|
||||
fake_runner: _FakeRunner,
|
||||
make_app: Any,
|
||||
) -> None:
|
||||
# Arrange
|
||||
app = make_app(fake_runner)
|
||||
client = TestClient(app)
|
||||
|
||||
# Act
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("derkachi.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("derkachi.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": (
|
||||
"khp20s30.json",
|
||||
_valid_calibration_bytes(),
|
||||
"application/json",
|
||||
),
|
||||
},
|
||||
data={"pace": "asap"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert response.status_code == 200, response.text
|
||||
body = response.json()
|
||||
assert body["state"] == JobState.DONE.value
|
||||
assert body["sync"] is True
|
||||
assert body["emissions_jsonl_url"].endswith("/result")
|
||||
assert body["map_html_url"].endswith("/map")
|
||||
assert body["accuracy_report_md_url"].endswith("/report")
|
||||
# Runner saw exactly one job with the expected pace + auto-trim default.
|
||||
assert len(fake_runner.calls) == 1
|
||||
assert fake_runner.calls[0].pace == "asap"
|
||||
assert fake_runner.calls[0].auto_trim is True
|
||||
|
||||
|
||||
def test_post_replay_serves_jsonl_and_map_for_done_job(
|
||||
fake_runner: _FakeRunner,
|
||||
make_app: Any,
|
||||
) -> None:
|
||||
# Arrange
|
||||
app = make_app(fake_runner)
|
||||
client = TestClient(app)
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("derkachi.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("derkachi.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
)
|
||||
body = response.json()
|
||||
job_id = body["job_id"]
|
||||
|
||||
# Act
|
||||
jsonl_resp = client.get(f"/jobs/{job_id}/result")
|
||||
map_resp = client.get(f"/jobs/{job_id}/map")
|
||||
report_resp = client.get(f"/jobs/{job_id}/report")
|
||||
|
||||
# Assert
|
||||
assert jsonl_resp.status_code == 200
|
||||
assert "lat_deg" in jsonl_resp.text
|
||||
assert map_resp.status_code == 200
|
||||
assert "fake map" in map_resp.text
|
||||
assert report_resp.status_code == 200
|
||||
assert "**Verdict**: PASS" in report_resp.text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-2 — async POST → 202 + job id
|
||||
|
||||
|
||||
def test_post_replay_async_returns_202_when_video_exceeds_sync_bytes(
|
||||
storage: StorageRoot,
|
||||
) -> None:
|
||||
# Arrange — runner sleeps so we observe the queued/running state.
|
||||
runner = _FakeRunner(delay_s=0.2)
|
||||
registry = JobRegistry(runner=runner, storage=storage, max_concurrent=1)
|
||||
app = create_app(
|
||||
runner=runner,
|
||||
storage=storage,
|
||||
registry=registry,
|
||||
sync_max_bytes=10, # any non-trivial video exceeds this
|
||||
)
|
||||
client = TestClient(app)
|
||||
|
||||
# Act
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": (
|
||||
"k.json",
|
||||
_valid_calibration_bytes(),
|
||||
"application/json",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert response.status_code == 202, response.text
|
||||
body = response.json()
|
||||
assert body["state"] in {JobState.QUEUED.value, JobState.RUNNING.value}
|
||||
assert "Location" in response.headers
|
||||
assert response.headers["Location"] == f"/jobs/{body['job_id']}"
|
||||
_wait_done(client, body["job_id"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-3 — job state transitions queued → running → done
|
||||
|
||||
|
||||
def test_job_state_transitions_observable_via_polling(
|
||||
storage: StorageRoot,
|
||||
) -> None:
|
||||
# Arrange
|
||||
runner = _FakeRunner(delay_s=0.3)
|
||||
registry = JobRegistry(runner=runner, storage=storage, max_concurrent=1)
|
||||
app = create_app(
|
||||
runner=runner,
|
||||
storage=storage,
|
||||
registry=registry,
|
||||
sync_max_bytes=10,
|
||||
)
|
||||
client = TestClient(app)
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
)
|
||||
job_id = response.json()["job_id"]
|
||||
|
||||
# Act + Assert — poll until done; record the unique states seen.
|
||||
seen: set[str] = set()
|
||||
deadline = time.monotonic() + 10.0
|
||||
while time.monotonic() < deadline:
|
||||
snap = client.get(f"/jobs/{job_id}").json()
|
||||
seen.add(snap["state"])
|
||||
if snap["state"] == JobState.DONE.value:
|
||||
break
|
||||
time.sleep(0.05)
|
||||
assert JobState.DONE.value in seen
|
||||
# We expect to have seen at least one of queued/running before done.
|
||||
assert seen & {JobState.QUEUED.value, JobState.RUNNING.value}
|
||||
|
||||
|
||||
def test_failed_runner_marks_job_failed(
|
||||
storage: StorageRoot,
|
||||
) -> None:
|
||||
# Arrange
|
||||
runner = _FakeRunner(fail=True)
|
||||
registry = JobRegistry(runner=runner, storage=storage)
|
||||
app = create_app(
|
||||
runner=runner, storage=storage, registry=registry, sync_max_bytes=10
|
||||
)
|
||||
client = TestClient(app)
|
||||
|
||||
# Act
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
)
|
||||
job_id = response.json()["job_id"]
|
||||
snap = _wait_terminal(client, job_id)
|
||||
|
||||
# Assert
|
||||
assert snap["state"] == JobState.FAILED.value
|
||||
assert "fake runner forced failure" in (snap["error"] or "")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-4 — result + map served from job id (covered above)
|
||||
|
||||
|
||||
def test_result_endpoints_409_when_job_not_done(
|
||||
storage: StorageRoot,
|
||||
) -> None:
|
||||
# Arrange — slow runner so job stays running long enough to probe.
|
||||
runner = _FakeRunner(delay_s=0.5)
|
||||
registry = JobRegistry(runner=runner, storage=storage)
|
||||
app = create_app(
|
||||
runner=runner, storage=storage, registry=registry, sync_max_bytes=10
|
||||
)
|
||||
client = TestClient(app)
|
||||
job_id = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
).json()["job_id"]
|
||||
|
||||
# Act — race the runner; we want to hit the not-done branch.
|
||||
res = client.get(f"/jobs/{job_id}/result")
|
||||
|
||||
# Assert
|
||||
if res.status_code == 200:
|
||||
pytest.skip("runner finished before we could probe the 409 path")
|
||||
assert res.status_code == 409
|
||||
body = res.json()
|
||||
assert body["error_code"] == "job_not_complete"
|
||||
_wait_done(client, job_id)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-5 — auth enforced when configured
|
||||
|
||||
|
||||
def test_post_replay_returns_401_without_bearer_when_required(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
storage: StorageRoot,
|
||||
fake_runner: _FakeRunner,
|
||||
) -> None:
|
||||
# Arrange
|
||||
monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "true")
|
||||
monkeypatch.setenv("REPLAY_API_BEARER_TOKEN", "shibboleth")
|
||||
registry = JobRegistry(runner=fake_runner, storage=storage)
|
||||
app = create_app(
|
||||
runner=fake_runner,
|
||||
storage=storage,
|
||||
registry=registry,
|
||||
sync_max_bytes=10_000_000,
|
||||
)
|
||||
client = TestClient(app)
|
||||
|
||||
# Act
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert response.status_code == 401
|
||||
assert response.json()["error_code"] == "unauthorized"
|
||||
|
||||
|
||||
def test_post_replay_accepts_correct_bearer(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
storage: StorageRoot,
|
||||
fake_runner: _FakeRunner,
|
||||
) -> None:
|
||||
# Arrange
|
||||
monkeypatch.setenv("REPLAY_API_AUTH_REQUIRED", "true")
|
||||
monkeypatch.setenv("REPLAY_API_BEARER_TOKEN", "shibboleth")
|
||||
registry = JobRegistry(runner=fake_runner, storage=storage)
|
||||
app = create_app(
|
||||
runner=fake_runner,
|
||||
storage=storage,
|
||||
registry=registry,
|
||||
sync_max_bytes=10_000_000,
|
||||
)
|
||||
client = TestClient(app)
|
||||
|
||||
# Act
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
headers={"Authorization": "Bearer shibboleth"},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert response.status_code == 200, response.text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-6 — health endpoints
|
||||
|
||||
|
||||
def test_healthz_always_returns_200(fake_runner: _FakeRunner, make_app: Any) -> None:
|
||||
# Arrange
|
||||
client = TestClient(make_app(fake_runner))
|
||||
|
||||
# Act / Assert
|
||||
assert client.get("/healthz").status_code == 200
|
||||
|
||||
|
||||
def test_readyz_returns_503_when_binary_missing(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
fake_runner: _FakeRunner,
|
||||
make_app: Any,
|
||||
) -> None:
|
||||
# Arrange — point readyz at a binary we know doesn't exist.
|
||||
monkeypatch.setenv("REPLAY_API_REPLAY_BINARY", "definitely-not-a-binary-az701")
|
||||
client = TestClient(make_app(fake_runner))
|
||||
|
||||
# Act
|
||||
response = client.get("/readyz")
|
||||
|
||||
# Assert
|
||||
assert response.status_code == 503
|
||||
assert "not on PATH" in response.json()["reason"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-8 — concurrency limit enforced
|
||||
|
||||
|
||||
def test_concurrency_limit_queues_excess_jobs(storage: StorageRoot) -> None:
|
||||
# Arrange
|
||||
runner = _FakeRunner(delay_s=0.5)
|
||||
registry = JobRegistry(
|
||||
runner=runner, storage=storage, max_concurrent=1, max_queued=8
|
||||
)
|
||||
app = create_app(
|
||||
runner=runner, storage=storage, registry=registry, sync_max_bytes=10
|
||||
)
|
||||
client = TestClient(app)
|
||||
job_ids: list[str] = []
|
||||
|
||||
# Act — submit 3 in quick succession; sync_max_bytes=10 forces async mode.
|
||||
for _ in range(3):
|
||||
resp = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": (
|
||||
"k.json",
|
||||
_valid_calibration_bytes(),
|
||||
"application/json",
|
||||
),
|
||||
},
|
||||
)
|
||||
assert resp.status_code == 202, resp.text
|
||||
job_ids.append(resp.json()["job_id"])
|
||||
|
||||
# Sample states quickly — at this instant we expect 1 running and ≥ 1 queued.
|
||||
states = [
|
||||
client.get(f"/jobs/{jid}").json()["state"] for jid in job_ids
|
||||
]
|
||||
assert states.count(JobState.RUNNING.value) <= 1, (
|
||||
f"more than one running at once: {states}"
|
||||
)
|
||||
assert (
|
||||
states.count(JobState.QUEUED.value) >= 1
|
||||
or states.count(JobState.DONE.value) >= 2
|
||||
), f"no queued state observed; states={states}"
|
||||
|
||||
# Wait for everything to finish so the test exits cleanly.
|
||||
for jid in job_ids:
|
||||
_wait_done(client, jid)
|
||||
|
||||
|
||||
def test_queue_full_returns_429(storage: StorageRoot) -> None:
|
||||
# Arrange — max_queued=0 forces the 429 path on the second submit.
|
||||
runner = _FakeRunner(delay_s=0.5)
|
||||
registry = JobRegistry(
|
||||
runner=runner, storage=storage, max_concurrent=1, max_queued=0
|
||||
)
|
||||
app = create_app(
|
||||
runner=runner, storage=storage, registry=registry, sync_max_bytes=10
|
||||
)
|
||||
client = TestClient(app)
|
||||
|
||||
# Act
|
||||
first = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
)
|
||||
second = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert first.status_code == 202
|
||||
assert second.status_code == 429
|
||||
assert second.json()["error_code"] == "concurrency_limit_reached"
|
||||
_wait_done(client, first.json()["job_id"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-9 — magic-byte upload validation (HTTP path)
|
||||
|
||||
|
||||
def test_post_replay_rejects_misnamed_zip_as_tlog(
|
||||
fake_runner: _FakeRunner, make_app: Any
|
||||
) -> None:
|
||||
# Arrange
|
||||
bogus_tlog = b"\x00\x00\x00\x00\x00\x00\x00\x00PK\x03\x04bogus"
|
||||
client = TestClient(make_app(fake_runner))
|
||||
|
||||
# Act
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", bogus_tlog, "application/octet-stream"),
|
||||
"video": ("d.mp4", _valid_mp4_bytes(), "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert response.status_code == 400
|
||||
assert response.json()["error_code"] == "unsupported_file_kind"
|
||||
|
||||
|
||||
def test_post_replay_rejects_misnamed_zip_as_video(
|
||||
fake_runner: _FakeRunner, make_app: Any
|
||||
) -> None:
|
||||
# Arrange
|
||||
bogus_video = b"\x00\x00\x00\x20notftyp..." + b"\x00" * 64
|
||||
client = TestClient(make_app(fake_runner))
|
||||
|
||||
# Act
|
||||
response = client.post(
|
||||
"/replay",
|
||||
files={
|
||||
"tlog": ("d.tlog", _valid_tlog_bytes(), "application/octet-stream"),
|
||||
"video": ("d.mp4", bogus_video, "video/mp4"),
|
||||
"calibration": ("k.json", _valid_calibration_bytes(), "application/json"),
|
||||
},
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert response.status_code == 400
|
||||
assert response.json()["error_code"] == "unsupported_file_kind"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Helpers
|
||||
|
||||
|
||||
def _wait_done(client: TestClient, job_id: str, timeout_s: float = 10.0) -> None:
|
||||
"""Block until ``job_id`` is in state ``done``."""
|
||||
deadline = time.monotonic() + timeout_s
|
||||
while time.monotonic() < deadline:
|
||||
snap = client.get(f"/jobs/{job_id}").json()
|
||||
if snap["state"] == JobState.DONE.value:
|
||||
return
|
||||
if snap["state"] == JobState.FAILED.value:
|
||||
raise AssertionError(f"job {job_id} unexpectedly failed: {snap}")
|
||||
time.sleep(0.05)
|
||||
raise AssertionError(f"job {job_id} did not reach DONE within {timeout_s}s")
|
||||
|
||||
|
||||
def _wait_terminal(
|
||||
client: TestClient, job_id: str, timeout_s: float = 10.0
|
||||
) -> dict[str, Any]:
|
||||
deadline = time.monotonic() + timeout_s
|
||||
while time.monotonic() < deadline:
|
||||
snap = client.get(f"/jobs/{job_id}").json()
|
||||
if snap["state"] in {JobState.DONE.value, JobState.FAILED.value}:
|
||||
return snap
|
||||
time.sleep(0.05)
|
||||
raise AssertionError(f"job {job_id} did not reach terminal state")
|
||||
|
||||
|
||||
# Suppress unused-imports warnings for symbols only the test harness uses.
|
||||
_ = (os, threading, fastapi)
|
||||
@@ -0,0 +1,935 @@
|
||||
"""AZ-698 — tlog trim + mid-flight cross-correlation alignment tests.
|
||||
|
||||
Covers AC-1..AC-4 of ``_docs/02_tasks/todo/AZ-698_tlog_trim_midflight_alignment.md``.
|
||||
AC-5 (end-to-end CLI smoke) is exercised by the existing replay e2e
|
||||
suite in ``tests/e2e/replay/`` and skipped here when its prerequisites
|
||||
(ffmpeg-capable cv2 build + real ``derkachi.tlog``) are absent.
|
||||
|
||||
Style: every test follows the Arrange / Act / Assert pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard._types.fc import (
|
||||
AttitudeSample,
|
||||
FcKind,
|
||||
FcTelemetryFrame,
|
||||
FlightStateSignal,
|
||||
GpsHealth,
|
||||
ImuTelemetrySample,
|
||||
TelemetryKind,
|
||||
)
|
||||
from gps_denied_onboard.clock import Clock
|
||||
from gps_denied_onboard.components.c8_fc_adapter.tlog_replay_adapter import (
|
||||
ReplayPace,
|
||||
TlogReplayFcAdapter,
|
||||
)
|
||||
from gps_denied_onboard.replay_input.auto_sync import (
|
||||
_align_via_cross_correlation,
|
||||
_resample_uniform,
|
||||
_segment_flights_from_imu_energy,
|
||||
compute_offset,
|
||||
detect_video_motion_onset,
|
||||
find_aligned_window,
|
||||
validate_offset_or_fail,
|
||||
)
|
||||
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
|
||||
from gps_denied_onboard.replay_input.interface import (
|
||||
AlignedWindow,
|
||||
AutoSyncConfig,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Synthetic-stream helpers
|
||||
|
||||
|
||||
def _ns(seconds: float) -> int:
|
||||
return int(seconds * 1_000_000_000)
|
||||
|
||||
|
||||
def _build_motion_burst_stream(
|
||||
*,
|
||||
start_s: float,
|
||||
end_s: float,
|
||||
hz: float,
|
||||
burst_at_s: float,
|
||||
burst_amplitude: float,
|
||||
burst_duration_s: float = 1.0,
|
||||
baseline_amplitude: float = 0.0,
|
||||
) -> tuple[tuple[int, float], ...]:
|
||||
"""Build a synthetic ``(ts_ns, magnitude)`` stream.
|
||||
|
||||
Constant at ``baseline_amplitude`` outside a single rectangular
|
||||
burst (``burst_amplitude`` for ``burst_duration_s`` starting at
|
||||
``burst_at_s``). Used so cross-correlation has a clear peak that
|
||||
tests can assert exact-index for.
|
||||
"""
|
||||
out: list[tuple[int, float]] = []
|
||||
period_s = 1.0 / hz
|
||||
t = start_s
|
||||
burst_end_s = burst_at_s + burst_duration_s
|
||||
while t < end_s:
|
||||
if burst_at_s <= t < burst_end_s:
|
||||
out.append((_ns(t), burst_amplitude))
|
||||
else:
|
||||
out.append((_ns(t), baseline_amplitude))
|
||||
t += period_s
|
||||
return tuple(out)
|
||||
|
||||
|
||||
def _build_double_burst_stream(
|
||||
*,
|
||||
start_s: float,
|
||||
end_s: float,
|
||||
hz: float,
|
||||
burst_a_at_s: float,
|
||||
burst_b_at_s: float,
|
||||
burst_amplitude: float,
|
||||
burst_duration_s: float = 1.0,
|
||||
baseline_amplitude: float = 0.0,
|
||||
) -> tuple[tuple[int, float], ...]:
|
||||
"""Two-burst variant to constrain cross-correlation more tightly."""
|
||||
out: list[tuple[int, float]] = []
|
||||
period_s = 1.0 / hz
|
||||
t = start_s
|
||||
while t < end_s:
|
||||
if burst_a_at_s <= t < burst_a_at_s + burst_duration_s:
|
||||
out.append((_ns(t), burst_amplitude))
|
||||
elif burst_b_at_s <= t < burst_b_at_s + burst_duration_s:
|
||||
out.append((_ns(t), burst_amplitude))
|
||||
else:
|
||||
out.append((_ns(t), baseline_amplitude))
|
||||
t += period_s
|
||||
return tuple(out)
|
||||
|
||||
|
||||
def _build_multi_flight_stream(
|
||||
*,
|
||||
flights: tuple[tuple[float, float], ...],
|
||||
end_s: float,
|
||||
hz: float,
|
||||
in_flight_amplitude: float = 0.3,
|
||||
ground_amplitude: float = 0.02,
|
||||
) -> tuple[tuple[int, float], ...]:
|
||||
"""Build a multi-flight IMU energy stream.
|
||||
|
||||
``flights`` is a tuple of ``(start_s, end_s)`` per flight. Between
|
||||
flights the energy is ``ground_amplitude``; inside each flight it
|
||||
is ``in_flight_amplitude``. Used by the multi-flight segmentation
|
||||
tests to mimic a real "3 takeoffs at the same field" tlog.
|
||||
"""
|
||||
out: list[tuple[int, float]] = []
|
||||
period_s = 1.0 / hz
|
||||
t = 0.0
|
||||
while t < end_s:
|
||||
in_flight = any(s <= t < e for s, e in flights)
|
||||
out.append((_ns(t), in_flight_amplitude if in_flight else ground_amplitude))
|
||||
t += period_s
|
||||
return tuple(out)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-1: takeoff-aligned regression — find_aligned_window must produce
|
||||
# the same offset (within ± 50 ms) as the AZ-405 compute_offset path
|
||||
# when the video covers the take-off.
|
||||
|
||||
|
||||
def test_ac1_takeoff_aligned_offset_matches_az405_within_50ms() -> None:
|
||||
# Arrange: 30 s tlog with a take-off-shaped IMU energy burst at
|
||||
# t = 2 s; 5 s video with the same-shaped optical-flow burst at
|
||||
# video_t = 0.5 s (motion onset half a second into the clip).
|
||||
# AZ-405 would resolve offset_ms = (tlog_takeoff_ns -
|
||||
# video_motion_onset_ns) // 1e6 ≈ 1.5 s. The AZ-698 aligner
|
||||
# must agree within 50 ms.
|
||||
tlog_energy = _build_motion_burst_stream(
|
||||
start_s=0.0,
|
||||
end_s=30.0,
|
||||
hz=10.0,
|
||||
burst_at_s=2.0,
|
||||
burst_amplitude=1.2,
|
||||
burst_duration_s=1.5,
|
||||
baseline_amplitude=0.0,
|
||||
)
|
||||
flow_samples = _build_motion_burst_stream(
|
||||
start_s=0.0,
|
||||
end_s=5.0,
|
||||
hz=10.0,
|
||||
burst_at_s=0.5,
|
||||
burst_amplitude=2.0,
|
||||
burst_duration_s=1.5,
|
||||
baseline_amplitude=0.0,
|
||||
)
|
||||
config = AutoSyncConfig()
|
||||
expected_offset_ms = _ns(2.0 - 0.5) // 1_000_000
|
||||
|
||||
# Act
|
||||
window = _align_via_cross_correlation(
|
||||
tlog_energy=tlog_energy,
|
||||
flow_samples=flow_samples,
|
||||
config=config,
|
||||
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
|
||||
tlog_path=Path("/nonexistent.tlog"),
|
||||
tlog_source_factory=None,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert window.fallback_used is False, "expected primary cross-corr path, not fallback"
|
||||
assert abs(window.offset_ms - expected_offset_ms) <= 50, (
|
||||
f"AZ-698 offset {window.offset_ms} ms outside ±50 ms of AZ-405-equivalent "
|
||||
f"{expected_offset_ms} ms"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-2: mid-flight alignment — tlog 0–30 s with motion burst at t=15 s,
|
||||
# video 0–5 s with motion burst at video_t=1 s. Expected:
|
||||
# tlog_start_ns ≈ (15 - 1) s = 14 s (where video t=0 lands)
|
||||
# offset_ms ≈ 14 000
|
||||
|
||||
|
||||
def test_ac2_mid_flight_alignment_locates_correct_window() -> None:
|
||||
# Arrange: distinctive double-burst pattern in both streams so
|
||||
# cross-correlation lock is unambiguous (single-burst patterns
|
||||
# can lock on the wrong baseline at edge bins).
|
||||
tlog_energy = _build_double_burst_stream(
|
||||
start_s=0.0,
|
||||
end_s=30.0,
|
||||
hz=10.0,
|
||||
burst_a_at_s=15.0,
|
||||
burst_b_at_s=18.0,
|
||||
burst_amplitude=1.5,
|
||||
burst_duration_s=0.8,
|
||||
baseline_amplitude=0.0,
|
||||
)
|
||||
flow_samples = _build_double_burst_stream(
|
||||
start_s=0.0,
|
||||
end_s=5.0,
|
||||
hz=10.0,
|
||||
burst_a_at_s=1.0,
|
||||
burst_b_at_s=4.0,
|
||||
burst_amplitude=2.5,
|
||||
burst_duration_s=0.8,
|
||||
baseline_amplitude=0.0,
|
||||
)
|
||||
config = AutoSyncConfig()
|
||||
period_ns = _ns(1.0 / config.alignment_resample_hz)
|
||||
|
||||
# Act
|
||||
window = _align_via_cross_correlation(
|
||||
tlog_energy=tlog_energy,
|
||||
flow_samples=flow_samples,
|
||||
config=config,
|
||||
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
|
||||
tlog_path=Path("/nonexistent.tlog"),
|
||||
tlog_source_factory=None,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert window.fallback_used is False
|
||||
# video burst A at t=1.0s aligns with tlog burst A at t=15.0s
|
||||
# → video t=0 aligns with tlog t=14.0s within ±1 resample period.
|
||||
assert abs(window.tlog_start_ns - _ns(14.0)) <= period_ns, (
|
||||
f"tlog_start_ns={window.tlog_start_ns} not within one resample period "
|
||||
f"({period_ns} ns) of the expected 14 s"
|
||||
)
|
||||
assert abs(window.offset_ms - 14_000) <= 100
|
||||
assert window.tlog_end_ns > window.tlog_start_ns
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-3: TlogReplayFcAdapter seek — messages whose raw _timestamp is
|
||||
# below tlog_start_ns must NOT reach subscribers.
|
||||
|
||||
|
||||
def _make_fake_msg(*, type_name: str, raw_ts_s: float, **fields: Any) -> SimpleNamespace:
|
||||
"""Build a pymavlink-shaped fake message for replay-adapter tests."""
|
||||
msg = SimpleNamespace(_timestamp=raw_ts_s, **fields)
|
||||
|
||||
def _get_type() -> str:
|
||||
return type_name
|
||||
|
||||
msg.get_type = _get_type # type: ignore[attr-defined]
|
||||
return msg
|
||||
|
||||
|
||||
def _build_replay_adapter_with_seek(
|
||||
*,
|
||||
tlog_start_ns: int | None,
|
||||
tmp_path: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> tuple[TlogReplayFcAdapter, list[FcTelemetryFrame]]:
|
||||
"""Construct a TlogReplayFcAdapter wired to deterministic fakes."""
|
||||
monkeypatch.setenv("BUILD_TLOG_REPLAY_ADAPTER", "ON")
|
||||
tlog_file = tmp_path / "fake.tlog"
|
||||
tlog_file.write_bytes(b"\x00")
|
||||
|
||||
received: list[FcTelemetryFrame] = []
|
||||
|
||||
fake_clock = MagicMock(spec=Clock)
|
||||
fake_clock.monotonic_ns.return_value = 0
|
||||
fake_clock.sleep_until_ns.return_value = None
|
||||
fake_wgs = MagicMock()
|
||||
fake_fdr = MagicMock()
|
||||
fake_fdr.enqueue.return_value = None
|
||||
|
||||
adapter = TlogReplayFcAdapter(
|
||||
tlog_path=tlog_file,
|
||||
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
|
||||
clock=fake_clock,
|
||||
wgs_converter=fake_wgs,
|
||||
fdr_client=fake_fdr,
|
||||
time_offset_ms=0,
|
||||
tlog_start_ns=tlog_start_ns,
|
||||
pace=ReplayPace.ASAP,
|
||||
)
|
||||
adapter.subscribe_telemetry(received.append)
|
||||
return adapter, received
|
||||
|
||||
|
||||
def test_ac3_adapter_seek_skips_pre_window_messages(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
# Arrange: adapter opened with tlog_start_ns = 100 s; feed 5
|
||||
# IMU messages, two below 100 s (must be skipped) and three at
|
||||
# or above 100 s (must reach the subscriber).
|
||||
adapter, received = _build_replay_adapter_with_seek(
|
||||
tlog_start_ns=_ns(100.0),
|
||||
tmp_path=tmp_path,
|
||||
monkeypatch=monkeypatch,
|
||||
)
|
||||
pre_window = [
|
||||
_make_fake_msg(
|
||||
type_name="RAW_IMU",
|
||||
raw_ts_s=t,
|
||||
time_usec=int(t * 1e6),
|
||||
xacc=0,
|
||||
yacc=0,
|
||||
zacc=1000,
|
||||
xgyro=0,
|
||||
ygyro=0,
|
||||
zgyro=0,
|
||||
)
|
||||
for t in (50.0, 99.999)
|
||||
]
|
||||
in_window = [
|
||||
_make_fake_msg(
|
||||
type_name="RAW_IMU",
|
||||
raw_ts_s=t,
|
||||
time_usec=int(t * 1e6),
|
||||
xacc=0,
|
||||
yacc=0,
|
||||
zacc=1000,
|
||||
xgyro=0,
|
||||
ygyro=0,
|
||||
zgyro=0,
|
||||
)
|
||||
for t in (100.0, 101.5, 110.0)
|
||||
]
|
||||
|
||||
# Act
|
||||
for msg in pre_window + in_window:
|
||||
adapter.feed_one_message(msg)
|
||||
|
||||
# Assert
|
||||
assert len(received) == 3, "expected three in-window IMU frames"
|
||||
assert all(
|
||||
frame.kind == TelemetryKind.IMU_SAMPLE for frame in received
|
||||
), "non-IMU frame leaked through"
|
||||
# ``received_at`` is the raw _timestamp (no offset). Every
|
||||
# delivered frame's raw timestamp must be ≥ 100 s.
|
||||
for frame in received:
|
||||
assert frame.received_at >= _ns(100.0), (
|
||||
f"frame with received_at={frame.received_at} ns leaked below the seek bound"
|
||||
)
|
||||
|
||||
|
||||
def test_ac3_adapter_default_no_seek_passes_every_message(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
# Arrange: tlog_start_ns=None (default) → no seek; every message reaches subscribers.
|
||||
adapter, received = _build_replay_adapter_with_seek(
|
||||
tlog_start_ns=None,
|
||||
tmp_path=tmp_path,
|
||||
monkeypatch=monkeypatch,
|
||||
)
|
||||
messages = [
|
||||
_make_fake_msg(
|
||||
type_name="RAW_IMU",
|
||||
raw_ts_s=t,
|
||||
time_usec=int(t * 1e6),
|
||||
xacc=0,
|
||||
yacc=0,
|
||||
zacc=1000,
|
||||
xgyro=0,
|
||||
ygyro=0,
|
||||
zgyro=0,
|
||||
)
|
||||
for t in (10.0, 50.0, 100.0)
|
||||
]
|
||||
|
||||
# Act
|
||||
for msg in messages:
|
||||
adapter.feed_one_message(msg)
|
||||
|
||||
# Assert
|
||||
assert len(received) == 3, "default (no seek) must pass every IMU message"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-4: AC-9 frame-window validator passes for both scenarios.
|
||||
|
||||
|
||||
def test_ac4_validator_passes_for_takeoff_aligned_offset() -> None:
|
||||
# Arrange: video frames at 30 fps for 5 s; tlog IMU at 100 Hz
|
||||
# for 30 s covering both pre-take-off and post; offset places
|
||||
# video t=0 at tlog t=2 s.
|
||||
video_ts = [int(t * 1_000_000_000) for t in (i / 30.0 for i in range(150))]
|
||||
tlog_ts = [int(t * 1_000_000_000) for t in (i / 100.0 for i in range(3000))]
|
||||
offset_ms = 2_000
|
||||
|
||||
# Act
|
||||
result = validate_offset_or_fail(
|
||||
offset_ms,
|
||||
tlog_imu_timestamps_ns=tlog_ts,
|
||||
video_frame_timestamps_ns=video_ts,
|
||||
threshold_pct=95.0,
|
||||
window_ms=100,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result == 0
|
||||
|
||||
|
||||
def test_ac4_validator_passes_for_mid_flight_offset() -> None:
|
||||
# Arrange: video covers 0–5 s; tlog covers 0–60 s; mid-flight
|
||||
# offset places video t=0 at tlog t=30 s. Every video frame
|
||||
# still has an IMU sample within ±100 ms of (vts + 30s) because
|
||||
# the tlog covers that range densely.
|
||||
video_ts = [int(t * 1_000_000_000) for t in (i / 30.0 for i in range(150))]
|
||||
tlog_ts = [int(t * 1_000_000_000) for t in (i / 100.0 for i in range(6000))]
|
||||
offset_ms = 30_000
|
||||
|
||||
# Act
|
||||
result = validate_offset_or_fail(
|
||||
offset_ms,
|
||||
tlog_imu_timestamps_ns=tlog_ts,
|
||||
video_frame_timestamps_ns=video_ts,
|
||||
threshold_pct=95.0,
|
||||
window_ms=100,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Resampler unit tests — pin the binning semantics so future
|
||||
# regressions are caught explicitly.
|
||||
|
||||
|
||||
def test_resample_uniform_averages_within_bin() -> None:
|
||||
# Arrange: 3 samples in the first 100 ms bin (values 1, 2, 3 →
|
||||
# mean 2.0), 1 sample in the second bin (value 4 → 4.0).
|
||||
samples = (
|
||||
(_ns(0.00), 1.0),
|
||||
(_ns(0.03), 2.0),
|
||||
(_ns(0.06), 3.0),
|
||||
(_ns(0.15), 4.0),
|
||||
)
|
||||
period_ns = _ns(0.10)
|
||||
|
||||
# Act
|
||||
resampled = _resample_uniform(samples, period_ns, origin_ns=0)
|
||||
|
||||
# Assert
|
||||
assert math.isclose(resampled[0], 2.0)
|
||||
assert math.isclose(resampled[1], 4.0)
|
||||
|
||||
|
||||
def test_resample_uniform_drops_trailing_empty_bins() -> None:
|
||||
# Arrange: one sample in bin 0, then a 1 s gap before the next sample.
|
||||
# The samples between get carry-forward of the previous bin's value;
|
||||
# trailing zeros only appear AFTER the last sample.
|
||||
samples = (
|
||||
(_ns(0.0), 5.0),
|
||||
(_ns(1.05), 7.0),
|
||||
)
|
||||
period_ns = _ns(0.1)
|
||||
|
||||
# Act
|
||||
resampled = _resample_uniform(samples, period_ns, origin_ns=0)
|
||||
|
||||
# Assert
|
||||
# The first bin is 5.0, bins 1..9 carry-forward to 5.0 (the previous
|
||||
# bin's value), and bin 10 captures the t=1.05 s sample as 7.0.
|
||||
assert resampled[0] == 5.0
|
||||
assert resampled[-1] == 7.0
|
||||
# No trailing-zero tail.
|
||||
assert all(v != 0.0 for v in resampled)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Fallback path — when cross-correlation confidence is below the
|
||||
# threshold, find_aligned_window must fall back to the head-takeoff
|
||||
# detector and set fallback_used=True.
|
||||
|
||||
|
||||
def test_low_confidence_triggers_takeoff_fallback(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
# Arrange: flat-line tlog (no motion) → cross-correlation has no
|
||||
# meaningful peak. The fallback path opens the real tlog via
|
||||
# detect_tlog_takeoff which needs a working tlog file. We bypass
|
||||
# the actual fallback work by raising the threshold to 1.1 (no
|
||||
# peak can clear it) and stubbing the takeoff detector.
|
||||
monkeypatch.setattr(
|
||||
"gps_denied_onboard.replay_input.auto_sync.detect_tlog_takeoff",
|
||||
lambda path, dialect, config, *, source_factory=None: SimpleNamespace(
|
||||
onset_ns=_ns(7.0), confidence=0.9
|
||||
),
|
||||
)
|
||||
flat_tlog = tuple(
|
||||
(_ns(t / 10.0), 0.0) for t in range(0, 100)
|
||||
)
|
||||
flat_flow = tuple(
|
||||
(_ns(t / 10.0), 0.0) for t in range(0, 20)
|
||||
)
|
||||
config = AutoSyncConfig(alignment_low_confidence_threshold=0.5)
|
||||
tlog_path = tmp_path / "fake.tlog"
|
||||
tlog_path.write_bytes(b"\x00")
|
||||
|
||||
# Act
|
||||
window = _align_via_cross_correlation(
|
||||
tlog_energy=flat_tlog,
|
||||
flow_samples=flat_flow,
|
||||
config=config,
|
||||
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
|
||||
tlog_path=tlog_path,
|
||||
tlog_source_factory=None,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert window.fallback_used is True
|
||||
assert window.tlog_start_ns == _ns(7.0), "fallback did not pick up the stubbed takeoff onset"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Guard: video stream longer than tlog stream → reject (auto-trim
|
||||
# requires the video to be a SLICE of a longer tlog).
|
||||
|
||||
|
||||
def test_video_longer_than_tlog_raises() -> None:
|
||||
# Arrange
|
||||
tlog_energy = tuple((_ns(t / 10.0), 0.5) for t in range(10))
|
||||
flow_samples = tuple((_ns(t / 10.0), 0.5) for t in range(50))
|
||||
config = AutoSyncConfig()
|
||||
|
||||
# Act + Assert
|
||||
with pytest.raises(ReplayInputAdapterError, match="video flow stream is longer"):
|
||||
_align_via_cross_correlation(
|
||||
tlog_energy=tlog_energy,
|
||||
flow_samples=flow_samples,
|
||||
config=config,
|
||||
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
|
||||
tlog_path=Path("/nonexistent.tlog"),
|
||||
tlog_source_factory=None,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AlignedWindow DTO is frozen + slotted.
|
||||
|
||||
|
||||
def test_aligned_window_is_frozen() -> None:
|
||||
# Arrange
|
||||
w = AlignedWindow(
|
||||
tlog_start_ns=1,
|
||||
tlog_end_ns=2,
|
||||
offset_ms=0,
|
||||
confidence=0.9,
|
||||
fallback_used=False,
|
||||
)
|
||||
|
||||
# Act + Assert
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
w.confidence = 0.5 # type: ignore[misc]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-5: end-to-end CLI smoke — skipped here because it requires
|
||||
# ffmpeg-capable cv2 + the real ``derkachi.tlog``/``.mp4`` binaries.
|
||||
# The actual CLI run is covered by ``tests/e2e/replay/`` when those
|
||||
# prerequisites are available.
|
||||
|
||||
|
||||
def _replay_inputs_present() -> bool:
|
||||
fixtures = Path("_docs/00_problem/input_data/flight_derkachi")
|
||||
return (fixtures / "derkachi.tlog").is_file() and (fixtures / "derkachi.mp4").is_file()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _replay_inputs_present(),
|
||||
reason="AC-5 e2e smoke requires _docs/00_problem/input_data/flight_derkachi/derkachi.{tlog,mp4}",
|
||||
)
|
||||
def test_ac5_cli_auto_trim_smoke_uses_find_aligned_window(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
# Arrange: this test pins the wiring contract — the `--auto-trim`
|
||||
# CLI flag must reach ReplayConfig.auto_trim. A full CLI run
|
||||
# requires the runtime root which is exercised by the e2e suite.
|
||||
from gps_denied_onboard.cli.replay import _build_replay_config
|
||||
from gps_denied_onboard.config.schema import Config, ReplayConfig
|
||||
|
||||
args = SimpleNamespace(
|
||||
video=Path("/tmp/v.mp4"),
|
||||
tlog=Path("/tmp/t.tlog"),
|
||||
output=Path("/tmp/o.jsonl"),
|
||||
camera_calibration=Path("/tmp/c.json"),
|
||||
config_path=Path("/tmp/c.yaml"),
|
||||
mavlink_signing_key=Path("/tmp/k.bin"),
|
||||
pace="asap",
|
||||
time_offset_ms=None,
|
||||
skip_auto_sync_validation=False,
|
||||
auto_trim=True,
|
||||
)
|
||||
key_file = Path("/tmp/k.bin")
|
||||
key_file.write_bytes(b"\x00" * 32)
|
||||
base = Config()
|
||||
base = type(base)(
|
||||
mode=base.mode,
|
||||
log=base.log,
|
||||
fdr=base.fdr,
|
||||
runtime=base.runtime,
|
||||
fc=base.fc,
|
||||
gcs=base.gcs,
|
||||
replay=ReplayConfig(),
|
||||
components=base.components,
|
||||
)
|
||||
|
||||
# Act
|
||||
new_config = _build_replay_config(args, base)
|
||||
|
||||
# Assert
|
||||
assert new_config.replay.auto_trim is True
|
||||
assert new_config.replay.time_offset_ms is None
|
||||
|
||||
|
||||
# Cross-reference: the existing AZ-405 fixture still passes (no regression).
|
||||
|
||||
|
||||
def test_autosync_decision_offset_is_within_ac9_window_for_baseline() -> None:
|
||||
# Arrange: a takeoff-shaped tlog detector result + a video
|
||||
# motion-onset detector result. compute_offset returns the
|
||||
# AZ-405 offset_ms which is the AZ-698 baseline AC-1 references.
|
||||
from gps_denied_onboard.replay_input.auto_sync import _DetectorResult
|
||||
|
||||
tlog_result = _DetectorResult(onset_ns=_ns(2.5), confidence=0.9)
|
||||
video_result = _DetectorResult(onset_ns=_ns(0.5), confidence=0.85)
|
||||
|
||||
# Act
|
||||
decision = compute_offset(tlog_result, video_result)
|
||||
|
||||
# Assert
|
||||
assert decision.offset_ms == 2_000
|
||||
assert decision.combined_confidence == pytest.approx(0.85, abs=1e-6)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Multi-flight tlog handling (user constraint: "if 1 flight take it, if
|
||||
# multiple take the last"). The pre-NCC segmenter is the gatekeeper.
|
||||
|
||||
|
||||
def test_segmenter_one_flight_returns_single_span() -> None:
|
||||
# Arrange: 120 s tlog with a single flight from t=10..100 s.
|
||||
samples = _build_multi_flight_stream(
|
||||
flights=((10.0, 100.0),),
|
||||
end_s=120.0,
|
||||
hz=10.0,
|
||||
)
|
||||
|
||||
# Act
|
||||
segments = _segment_flights_from_imu_energy(
|
||||
samples,
|
||||
motion_threshold=0.1,
|
||||
min_flight_duration_ns=_ns(30.0),
|
||||
max_internal_gap_ns=_ns(5.0),
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert len(segments) == 1
|
||||
seg_start_ns, seg_end_ns = segments[0]
|
||||
assert abs(seg_start_ns - _ns(10.0)) <= _ns(0.2)
|
||||
assert abs(seg_end_ns - _ns(100.0)) <= _ns(0.2)
|
||||
|
||||
|
||||
def test_segmenter_three_flights_returns_three_spans_in_order() -> None:
|
||||
# Arrange: 360 s tlog with three takeoffs (60 s flights with 30 s
|
||||
# ground gaps between them) — mimics the Derkachi scenario the
|
||||
# user flagged: one tlog, three sorties, video covers only the
|
||||
# last one.
|
||||
flights_def = ((10.0, 70.0), (100.0, 160.0), (190.0, 250.0))
|
||||
samples = _build_multi_flight_stream(
|
||||
flights=flights_def,
|
||||
end_s=300.0,
|
||||
hz=10.0,
|
||||
)
|
||||
|
||||
# Act
|
||||
segments = _segment_flights_from_imu_energy(
|
||||
samples,
|
||||
motion_threshold=0.1,
|
||||
min_flight_duration_ns=_ns(30.0),
|
||||
max_internal_gap_ns=_ns(5.0),
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert len(segments) == 3
|
||||
for (actual_start, actual_end), (want_start, want_end) in zip(
|
||||
segments, flights_def
|
||||
):
|
||||
assert abs(actual_start - _ns(want_start)) <= _ns(0.2)
|
||||
assert abs(actual_end - _ns(want_end)) <= _ns(0.2)
|
||||
|
||||
|
||||
def test_segmenter_drops_ground_blip_below_min_duration() -> None:
|
||||
# Arrange: a 5 s ground manoeuvre (engine test) followed by a
|
||||
# real 60 s flight. With min_flight_duration_ns=30 s the blip
|
||||
# must be discarded, leaving only the real flight.
|
||||
samples = _build_multi_flight_stream(
|
||||
flights=((5.0, 10.0), (50.0, 110.0)),
|
||||
end_s=120.0,
|
||||
hz=10.0,
|
||||
)
|
||||
|
||||
# Act
|
||||
segments = _segment_flights_from_imu_energy(
|
||||
samples,
|
||||
motion_threshold=0.1,
|
||||
min_flight_duration_ns=_ns(30.0),
|
||||
max_internal_gap_ns=_ns(5.0),
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert len(segments) == 1
|
||||
seg_start_ns, _seg_end_ns = segments[0]
|
||||
assert abs(seg_start_ns - _ns(50.0)) <= _ns(0.2)
|
||||
|
||||
|
||||
def test_segmenter_keeps_brief_cruise_lull_inside_flight() -> None:
|
||||
# Arrange: one flight with a 3 s cruise lull mid-way. The lull is
|
||||
# below max_internal_gap_ns=5 s, so the segmenter must keep the
|
||||
# whole flight as a single segment.
|
||||
samples = _build_multi_flight_stream(
|
||||
flights=((10.0, 45.0), (48.0, 100.0)),
|
||||
end_s=120.0,
|
||||
hz=10.0,
|
||||
)
|
||||
|
||||
# Act
|
||||
segments = _segment_flights_from_imu_energy(
|
||||
samples,
|
||||
motion_threshold=0.1,
|
||||
min_flight_duration_ns=_ns(30.0),
|
||||
max_internal_gap_ns=_ns(5.0),
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert len(segments) == 1
|
||||
seg_start_ns, seg_end_ns = segments[0]
|
||||
assert abs(seg_start_ns - _ns(10.0)) <= _ns(0.2)
|
||||
assert abs(seg_end_ns - _ns(100.0)) <= _ns(0.2)
|
||||
|
||||
|
||||
def test_find_aligned_window_picks_last_flight_for_multi_flight_tlog(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
# Arrange: a 300 s tlog with three sorties (10..70, 100..160,
|
||||
# 190..250 s). The video covers only the LAST sortie — flow
|
||||
# samples at video-clock 0..30 s with a motion burst at
|
||||
# video t=5 s that, on the tlog timeline, corresponds to
|
||||
# tlog t=200 s (5 s into flight 3 which starts at 190 s).
|
||||
flights_def = ((10.0, 70.0), (100.0, 160.0), (190.0, 250.0))
|
||||
tlog_energy = _build_multi_flight_stream(
|
||||
flights=flights_def,
|
||||
end_s=260.0,
|
||||
hz=10.0,
|
||||
)
|
||||
flow_samples = _build_motion_burst_stream(
|
||||
start_s=0.0,
|
||||
end_s=30.0,
|
||||
hz=10.0,
|
||||
burst_at_s=5.0,
|
||||
burst_amplitude=2.0,
|
||||
burst_duration_s=3.0,
|
||||
baseline_amplitude=0.05,
|
||||
)
|
||||
config = AutoSyncConfig(
|
||||
alignment_segment_min_flight_duration_seconds=30.0,
|
||||
alignment_segment_max_internal_gap_seconds=5.0,
|
||||
)
|
||||
|
||||
# Inject the pre-loaded IMU energy by monkey-patching the loader
|
||||
# used inside find_aligned_window; the function reads a tlog via
|
||||
# pymavlink, but for the unit-level invariant we want to assert
|
||||
# the segment selection — not the binary parser.
|
||||
import gps_denied_onboard.replay_input.auto_sync as auto_sync_mod
|
||||
|
||||
fake_tlog = tmp_path / "multi_flight.tlog"
|
||||
fake_tlog.write_bytes(b"\x00")
|
||||
fake_video = tmp_path / "video.mp4"
|
||||
fake_video.write_bytes(b"\x00")
|
||||
|
||||
def _fake_loader(
|
||||
path: Path,
|
||||
*,
|
||||
max_messages: int,
|
||||
source_factory: Any,
|
||||
) -> tuple[tuple[int, float], ...]:
|
||||
return tlog_energy
|
||||
|
||||
def _fake_frames(
|
||||
path: Path, scan_seconds: float,
|
||||
) -> "list[tuple[int, Any]]":
|
||||
import numpy as np
|
||||
|
||||
rng = np.random.default_rng(42)
|
||||
frames: list[tuple[int, Any]] = []
|
||||
prev_offset = np.zeros((16, 16, 3), dtype=np.int16)
|
||||
for ts_ns, mag in flow_samples:
|
||||
# 3-channel BGR (cvtColor BGR→GRAY needs ≥ 3 channels).
|
||||
# During a burst we shift pixels — that motion is what
|
||||
# Farneback flow magnitudes pick up.
|
||||
base = rng.integers(0, 30, size=(16, 16, 3), dtype=np.int16)
|
||||
shift_px = int(mag * 4)
|
||||
if shift_px > 0:
|
||||
base = np.roll(base, shift=shift_px, axis=0)
|
||||
frame = np.clip(base + prev_offset, 0, 255).astype(np.uint8)
|
||||
frames.append((ts_ns, frame))
|
||||
prev_offset = np.zeros_like(prev_offset)
|
||||
return frames
|
||||
|
||||
monkeypatch = pytest.MonkeyPatch()
|
||||
try:
|
||||
monkeypatch.setattr(
|
||||
auto_sync_mod, "_load_tlog_imu_energy_stream", _fake_loader
|
||||
)
|
||||
|
||||
# Act
|
||||
window = find_aligned_window(
|
||||
fake_tlog,
|
||||
fake_video,
|
||||
config,
|
||||
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
|
||||
video_frames_factory=_fake_frames,
|
||||
)
|
||||
finally:
|
||||
monkeypatch.undo()
|
||||
|
||||
# Assert: the aligner MUST select FLIGHT 3 (190..250 s), NOT
|
||||
# flight 1 (10..70 s). Whether NCC locks on or the fallback
|
||||
# path fires, the resulting window must lie inside flight 3 —
|
||||
# that's the user-visible contract ("take the last flight").
|
||||
flight3_start_ns, flight3_end_ns = (_ns(190.0), _ns(250.0))
|
||||
assert window.flight_count_detected == 3
|
||||
assert window.selected_flight_index == 2
|
||||
assert flight3_start_ns <= window.tlog_start_ns <= flight3_end_ns
|
||||
# Sanity: did NOT lock onto flight 1 or 2.
|
||||
assert window.tlog_start_ns > _ns(160.0)
|
||||
|
||||
|
||||
def test_align_via_cross_correlation_locks_onto_burst_inside_last_segment() -> None:
|
||||
# Arrange: pre-segmented tlog energy restricted to flight 3
|
||||
# (mimicking what find_aligned_window passes after segmentation),
|
||||
# plus a flow stream whose burst pattern matches a specific
|
||||
# offset inside that segment. This directly exercises the NCC
|
||||
# path with the inputs the post-segmentation aligner sees.
|
||||
last_segment_tlog = _build_motion_burst_stream(
|
||||
start_s=190.0,
|
||||
end_s=250.0,
|
||||
hz=10.0,
|
||||
burst_at_s=210.0,
|
||||
burst_amplitude=1.5,
|
||||
burst_duration_s=5.0,
|
||||
baseline_amplitude=0.05,
|
||||
)
|
||||
flow_samples = _build_motion_burst_stream(
|
||||
start_s=0.0,
|
||||
end_s=30.0,
|
||||
hz=10.0,
|
||||
burst_at_s=10.0,
|
||||
burst_amplitude=2.0,
|
||||
burst_duration_s=5.0,
|
||||
baseline_amplitude=0.05,
|
||||
)
|
||||
config = AutoSyncConfig()
|
||||
|
||||
# Act
|
||||
window = _align_via_cross_correlation(
|
||||
tlog_energy=last_segment_tlog,
|
||||
flow_samples=flow_samples,
|
||||
config=config,
|
||||
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
|
||||
tlog_path=Path("/nonexistent.tlog"),
|
||||
tlog_source_factory=None,
|
||||
flight_count_detected=3,
|
||||
selected_flight_index=2,
|
||||
)
|
||||
|
||||
# Assert: NCC must lock on (high confidence, no fallback). The
|
||||
# tlog_start_ns must be the start of the matched 30 s window —
|
||||
# video burst at video_t=10 s lines up with tlog_t=210 s ⇒
|
||||
# tlog_start_ns ≈ 200 s (210 s − 10 s).
|
||||
assert not window.fallback_used
|
||||
assert window.confidence > 0.6
|
||||
assert window.flight_count_detected == 3
|
||||
assert window.selected_flight_index == 2
|
||||
assert abs(window.tlog_start_ns - _ns(200.0)) <= _ns(0.2)
|
||||
|
||||
|
||||
def test_find_aligned_window_uses_only_segment_for_segmented_tlog_fallback(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
# Arrange: a 3-flight tlog where the video flow is flat (no
|
||||
# structure for NCC to lock onto). NCC must produce confidence
|
||||
# ~ 0; the fallback path must use the LAST segment start, NOT
|
||||
# the head-takeoff detector (which would lock onto flight 1).
|
||||
flights_def = ((10.0, 70.0), (100.0, 160.0), (190.0, 250.0))
|
||||
tlog_energy = _build_multi_flight_stream(
|
||||
flights=flights_def,
|
||||
end_s=260.0,
|
||||
hz=10.0,
|
||||
)
|
||||
flat_flow = tuple((_ns(t / 10.0), 0.5) for t in range(0, 50))
|
||||
config = AutoSyncConfig()
|
||||
|
||||
# Act
|
||||
window = _align_via_cross_correlation(
|
||||
tlog_energy=tuple(
|
||||
(ts, e) for ts, e in tlog_energy
|
||||
if _ns(190.0) <= ts <= _ns(250.0)
|
||||
),
|
||||
flow_samples=flat_flow,
|
||||
config=config,
|
||||
target_fc_dialect=FcKind.ARDUPILOT_PLANE,
|
||||
tlog_path=tmp_path / "x.tlog",
|
||||
tlog_source_factory=None,
|
||||
flight_count_detected=3,
|
||||
selected_flight_index=2,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert window.fallback_used is True
|
||||
assert window.flight_count_detected == 3
|
||||
assert window.selected_flight_index == 2
|
||||
# The fallback must use flight 3's start, not flight 1's takeoff.
|
||||
assert window.tlog_start_ns >= _ns(190.0)
|
||||
assert window.tlog_start_ns <= _ns(250.0)
|
||||
@@ -0,0 +1,497 @@
|
||||
"""AZ-697 — Direct binary-tlog GPS-truth extractor.
|
||||
|
||||
Covers AC-1..AC-5 of ``_docs/02_tasks/todo/AZ-697_tlog_ground_truth_extractor.md``:
|
||||
|
||||
* AC-1 (Happy path on real tlog) — gated on the committed
|
||||
``derkachi.tlog`` (5.8 MB binary). When present, asserts ≥ 100
|
||||
records inside the Derkachi geofence.
|
||||
* AC-2 (Empty GPS gracefully) — synthetic source emits no messages.
|
||||
* AC-3 (GPS_RAW_INT fallback / mixed precedence).
|
||||
* AC-4 (mypy --strict) — project-wide strict via ``pyproject.toml
|
||||
[tool.mypy] strict = true``. A scoped smoke test re-runs mypy on the
|
||||
module to catch regressions before CI.
|
||||
* AC-5 (Helper move snapshot) — covered by
|
||||
``tests/unit/helpers/test_gps_compare.py``.
|
||||
|
||||
All tests use a synthetic ``source_factory`` for determinism (no
|
||||
disk IO, no real pymavlink).
|
||||
|
||||
Style: every test follows the Arrange / Act / Assert pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
import subprocess
|
||||
import sys
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard.replay_input.errors import ReplayInputAdapterError
|
||||
from gps_denied_onboard.replay_input.tlog_ground_truth import (
|
||||
TlogGpsFix,
|
||||
TlogGroundTruth,
|
||||
load_tlog_ground_truth,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Synthetic-source fixture helpers
|
||||
|
||||
|
||||
class _FakeMavlinkMessage:
|
||||
"""Stand-in for a pymavlink message object.
|
||||
|
||||
Mirrors the duck-typed surface ``load_tlog_ground_truth`` uses:
|
||||
``get_type()`` returns the message-type string and ``_timestamp``
|
||||
is the Unix-second float that pymavlink's mavlogfile populates on
|
||||
every ``recv_match()`` return.
|
||||
"""
|
||||
|
||||
def __init__(self, msg_type: str, timestamp_s: float, **fields: Any) -> None:
|
||||
self._msg_type = msg_type
|
||||
self._timestamp = timestamp_s
|
||||
for name, value in fields.items():
|
||||
setattr(self, name, value)
|
||||
|
||||
def get_type(self) -> str:
|
||||
return self._msg_type
|
||||
|
||||
|
||||
class _FakeMavlinkSource:
|
||||
"""Stand-in for pymavlink's ``mavutil.mavlink_connection`` return.
|
||||
|
||||
``recv_match`` walks an in-memory message queue, filtering by the
|
||||
``type`` argument. Returns ``None`` once the queue is exhausted —
|
||||
matching mavlogfile's end-of-stream behaviour.
|
||||
"""
|
||||
|
||||
def __init__(self, messages: list[_FakeMavlinkMessage]) -> None:
|
||||
self._iter: Iterator[_FakeMavlinkMessage] = iter(messages)
|
||||
self.closed = False
|
||||
|
||||
def recv_match(
|
||||
self,
|
||||
type: list[str] | str | None = None,
|
||||
blocking: bool = False,
|
||||
) -> _FakeMavlinkMessage | None:
|
||||
wanted = {type} if isinstance(type, str) else set(type or [])
|
||||
for msg in self._iter:
|
||||
if not wanted or msg.get_type() in wanted:
|
||||
return msg
|
||||
return None
|
||||
|
||||
def close(self) -> None:
|
||||
self.closed = True
|
||||
|
||||
|
||||
def _global_position_int(
|
||||
*,
|
||||
ts_s: float,
|
||||
lat_e7: int,
|
||||
lon_e7: int,
|
||||
alt_mm: int,
|
||||
hdg_cdeg: int = 0,
|
||||
vx_cm_s: int = 0,
|
||||
vy_cm_s: int = 0,
|
||||
vz_cm_s: int = 0,
|
||||
) -> _FakeMavlinkMessage:
|
||||
return _FakeMavlinkMessage(
|
||||
"GLOBAL_POSITION_INT",
|
||||
ts_s,
|
||||
lat=lat_e7,
|
||||
lon=lon_e7,
|
||||
alt=alt_mm,
|
||||
hdg=hdg_cdeg,
|
||||
vx=vx_cm_s,
|
||||
vy=vy_cm_s,
|
||||
vz=vz_cm_s,
|
||||
)
|
||||
|
||||
|
||||
def _gps_raw_int(
|
||||
*,
|
||||
ts_s: float,
|
||||
lat_e7: int,
|
||||
lon_e7: int,
|
||||
alt_mm: int,
|
||||
vel_cm_s: int = 0,
|
||||
cog_cdeg: int = 0,
|
||||
) -> _FakeMavlinkMessage:
|
||||
return _FakeMavlinkMessage(
|
||||
"GPS_RAW_INT",
|
||||
ts_s,
|
||||
lat=lat_e7,
|
||||
lon=lon_e7,
|
||||
alt=alt_mm,
|
||||
vel=vel_cm_s,
|
||||
cog=cog_cdeg,
|
||||
)
|
||||
|
||||
|
||||
def _factory_from(messages: list[_FakeMavlinkMessage]) -> Any:
|
||||
"""Return a ``source_factory`` that yields the given message list."""
|
||||
|
||||
def _factory(_path: str) -> _FakeMavlinkSource:
|
||||
return _FakeMavlinkSource(messages)
|
||||
|
||||
return _factory
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-1: Happy path on real tlog (gated on the committed binary)
|
||||
|
||||
|
||||
def _real_derkachi_tlog() -> Path:
|
||||
return (
|
||||
Path(__file__).resolve().parents[3]
|
||||
/ "_docs"
|
||||
/ "00_problem"
|
||||
/ "input_data"
|
||||
/ "flight_derkachi"
|
||||
/ "derkachi.tlog"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not _real_derkachi_tlog().is_file(),
|
||||
reason=(
|
||||
"Real derkachi.tlog binary not present (gitignored 5.8 MB blob). "
|
||||
"Place it at _docs/00_problem/input_data/flight_derkachi/derkachi.tlog "
|
||||
"to exercise AC-1."
|
||||
),
|
||||
)
|
||||
def test_ac1_real_derkachi_tlog_has_geofence_records() -> None:
|
||||
# Arrange
|
||||
tlog = _real_derkachi_tlog()
|
||||
|
||||
# Act
|
||||
truth = load_tlog_ground_truth(tlog)
|
||||
|
||||
# Assert
|
||||
assert len(truth.records) > 100, (
|
||||
f"expected > 100 GPS records, got {len(truth.records)}"
|
||||
)
|
||||
assert truth.source in {"GLOBAL_POSITION_INT", "GPS_RAW_INT"}
|
||||
# Derkachi geofence: lat ≈ 50.08, lon ≈ 36.11 (Kharkiv suburb).
|
||||
lats = [r.lat_deg for r in truth.records if r.lat_deg != 0.0]
|
||||
lons = [r.lon_deg for r in truth.records if r.lon_deg != 0.0]
|
||||
assert lats, "every GPS record has lat == 0; tlog likely malformed"
|
||||
median_lat = sorted(lats)[len(lats) // 2]
|
||||
median_lon = sorted(lons)[len(lons) // 2]
|
||||
assert 49.9 <= median_lat <= 50.3, f"median lat {median_lat} outside Derkachi band"
|
||||
assert 35.9 <= median_lon <= 36.4, f"median lon {median_lon} outside Derkachi band"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-2: Empty GPS gracefully (no messages → empty records + WARN log)
|
||||
|
||||
|
||||
def test_ac2_empty_tlog_returns_empty_records_and_warns(
|
||||
tmp_path: Path,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
# Arrange
|
||||
fake_tlog = tmp_path / "empty.tlog"
|
||||
fake_tlog.write_bytes(b"")
|
||||
factory = _factory_from([])
|
||||
|
||||
# Act
|
||||
with caplog.at_level(
|
||||
logging.WARNING,
|
||||
logger="gps_denied_onboard.replay_input.tlog_ground_truth",
|
||||
):
|
||||
truth = load_tlog_ground_truth(fake_tlog, source_factory=factory)
|
||||
|
||||
# Assert
|
||||
assert truth.records == ()
|
||||
assert truth.source == ""
|
||||
assert any(
|
||||
"contains no GLOBAL_POSITION_INT or GPS_RAW_INT" in rec.message
|
||||
for rec in caplog.records
|
||||
)
|
||||
|
||||
|
||||
def test_missing_file_raises(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
missing = tmp_path / "absent.tlog"
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ReplayInputAdapterError, match="tlog file not found"):
|
||||
load_tlog_ground_truth(missing)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-3: Fallback precedence (GPS_RAW_INT only; mixed source)
|
||||
|
||||
|
||||
def test_ac3_gps_raw_int_fallback_when_no_global_position_int(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
fake_tlog = tmp_path / "raw_only.tlog"
|
||||
fake_tlog.write_bytes(b"")
|
||||
messages = [
|
||||
_gps_raw_int(
|
||||
ts_s=1_700_000_000.000,
|
||||
lat_e7=500_800_000, # 50.08
|
||||
lon_e7=361_100_000, # 36.11
|
||||
alt_mm=200_000, # 200 m MSL
|
||||
vel_cm_s=1500, # 15 m/s
|
||||
cog_cdeg=9000, # 90° (east)
|
||||
),
|
||||
_gps_raw_int(
|
||||
ts_s=1_700_000_000.200,
|
||||
lat_e7=500_801_000,
|
||||
lon_e7=361_101_000,
|
||||
alt_mm=200_500,
|
||||
vel_cm_s=1500,
|
||||
cog_cdeg=9000,
|
||||
),
|
||||
]
|
||||
factory = _factory_from(messages)
|
||||
|
||||
# Act
|
||||
truth = load_tlog_ground_truth(fake_tlog, source_factory=factory)
|
||||
|
||||
# Assert
|
||||
assert truth.source == "GPS_RAW_INT"
|
||||
assert len(truth.records) == 2
|
||||
first = truth.records[0]
|
||||
assert first.lat_deg == pytest.approx(50.08, abs=1e-6)
|
||||
assert first.lon_deg == pytest.approx(36.11, abs=1e-6)
|
||||
assert first.alt_m == pytest.approx(200.0, abs=1e-3)
|
||||
# cog=90° (east) ⇒ vx (north) = 0, vy (east) = 15 m/s, vz = 0.
|
||||
assert first.vx_m_s == pytest.approx(0.0, abs=1e-9)
|
||||
assert first.vy_m_s == pytest.approx(15.0, abs=1e-9)
|
||||
assert first.vz_m_s == 0.0
|
||||
assert first.hdg_deg == pytest.approx(90.0, abs=1e-6)
|
||||
assert first.ts_ns == 1_700_000_000_000_000_000
|
||||
|
||||
|
||||
def test_ac3_mixed_messages_prefer_global_position_int(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
fake_tlog = tmp_path / "mixed.tlog"
|
||||
fake_tlog.write_bytes(b"")
|
||||
messages = [
|
||||
_gps_raw_int(
|
||||
ts_s=1.0,
|
||||
lat_e7=400_000_000, # 40.00 — distinguishable from GPI rows
|
||||
lon_e7=300_000_000, # 30.00
|
||||
alt_mm=100_000,
|
||||
cog_cdeg=0,
|
||||
),
|
||||
_global_position_int(
|
||||
ts_s=1.0,
|
||||
lat_e7=500_800_000, # 50.08
|
||||
lon_e7=361_100_000, # 36.11
|
||||
alt_mm=200_000,
|
||||
hdg_cdeg=4500, # 45°
|
||||
vx_cm_s=500,
|
||||
vy_cm_s=-500,
|
||||
vz_cm_s=100,
|
||||
),
|
||||
_gps_raw_int(
|
||||
ts_s=2.0,
|
||||
lat_e7=400_001_000,
|
||||
lon_e7=300_001_000,
|
||||
alt_mm=100_500,
|
||||
cog_cdeg=0,
|
||||
),
|
||||
_global_position_int(
|
||||
ts_s=2.0,
|
||||
lat_e7=500_801_000,
|
||||
lon_e7=361_101_000,
|
||||
alt_mm=200_500,
|
||||
hdg_cdeg=4500,
|
||||
vx_cm_s=500,
|
||||
vy_cm_s=-500,
|
||||
vz_cm_s=100,
|
||||
),
|
||||
]
|
||||
factory = _factory_from(messages)
|
||||
|
||||
# Act
|
||||
truth = load_tlog_ground_truth(fake_tlog, source_factory=factory)
|
||||
|
||||
# Assert — GLOBAL_POSITION_INT wins; GPS_RAW_INT rows are ignored.
|
||||
assert truth.source == "GLOBAL_POSITION_INT"
|
||||
assert len(truth.records) == 2
|
||||
for rec in truth.records:
|
||||
assert rec.lat_deg == pytest.approx(50.08, abs=1e-3)
|
||||
assert rec.lon_deg == pytest.approx(36.11, abs=1e-3)
|
||||
assert rec.hdg_deg == pytest.approx(45.0, abs=1e-6)
|
||||
assert rec.vx_m_s == pytest.approx(5.0, abs=1e-9)
|
||||
assert rec.vy_m_s == pytest.approx(-5.0, abs=1e-9)
|
||||
assert rec.vz_m_s == pytest.approx(1.0, abs=1e-9)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Unit conversions (MAVLink integer encodings)
|
||||
|
||||
|
||||
def test_global_position_int_unit_conversions(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
fake_tlog = tmp_path / "units.tlog"
|
||||
fake_tlog.write_bytes(b"")
|
||||
messages = [
|
||||
_global_position_int(
|
||||
ts_s=10.5,
|
||||
lat_e7=123_456_789, # 12.3456789 deg
|
||||
lon_e7=-98_765_432, # -9.8765432 deg
|
||||
alt_mm=12_345, # 12.345 m
|
||||
hdg_cdeg=18_000, # 180.00 deg
|
||||
vx_cm_s=-2_500, # -25.00 m/s
|
||||
vy_cm_s=0,
|
||||
vz_cm_s=50, # 0.5 m/s
|
||||
)
|
||||
]
|
||||
factory = _factory_from(messages)
|
||||
|
||||
# Act
|
||||
truth = load_tlog_ground_truth(fake_tlog, source_factory=factory)
|
||||
|
||||
# Assert
|
||||
assert truth.source == "GLOBAL_POSITION_INT"
|
||||
(rec,) = truth.records
|
||||
assert rec.lat_deg == pytest.approx(12.345_678_9, abs=1e-9)
|
||||
assert rec.lon_deg == pytest.approx(-9.876_543_2, abs=1e-9)
|
||||
assert rec.alt_m == pytest.approx(12.345, abs=1e-9)
|
||||
assert rec.hdg_deg == pytest.approx(180.0, abs=1e-9)
|
||||
assert rec.vx_m_s == pytest.approx(-25.0, abs=1e-9)
|
||||
assert rec.vy_m_s == 0.0
|
||||
assert rec.vz_m_s == pytest.approx(0.5, abs=1e-9)
|
||||
assert rec.ts_ns == int(10.5 * 1_000_000_000)
|
||||
|
||||
|
||||
def test_gps_raw_int_cog_to_ned_decomposition(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
fake_tlog = tmp_path / "cog.tlog"
|
||||
fake_tlog.write_bytes(b"")
|
||||
messages = [
|
||||
_gps_raw_int(
|
||||
ts_s=0.0,
|
||||
lat_e7=0,
|
||||
lon_e7=0,
|
||||
alt_mm=0,
|
||||
vel_cm_s=2000, # 20 m/s
|
||||
cog_cdeg=4500, # 45° (NE)
|
||||
)
|
||||
]
|
||||
factory = _factory_from(messages)
|
||||
|
||||
# Act
|
||||
truth = load_tlog_ground_truth(fake_tlog, source_factory=factory)
|
||||
|
||||
# Assert — 20 m/s @ 45° ⇒ vx = vy = 20/sqrt(2) ≈ 14.142.
|
||||
(rec,) = truth.records
|
||||
expected = 20.0 * math.cos(math.radians(45.0))
|
||||
assert rec.vx_m_s == pytest.approx(expected, abs=1e-9)
|
||||
assert rec.vy_m_s == pytest.approx(expected, abs=1e-9)
|
||||
assert rec.vz_m_s == 0.0
|
||||
assert rec.hdg_deg == pytest.approx(45.0, abs=1e-9)
|
||||
|
||||
|
||||
def test_missing_timestamp_raises(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
fake_tlog = tmp_path / "no_ts.tlog"
|
||||
fake_tlog.write_bytes(b"")
|
||||
|
||||
class _MsgNoTimestamp:
|
||||
def get_type(self) -> str:
|
||||
return "GLOBAL_POSITION_INT"
|
||||
|
||||
factory = _factory_from([_MsgNoTimestamp()]) # type: ignore[list-item]
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(
|
||||
ReplayInputAdapterError, match="missing _timestamp attribute"
|
||||
):
|
||||
load_tlog_ground_truth(fake_tlog, source_factory=factory)
|
||||
|
||||
|
||||
def test_source_is_closed_after_load(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
fake_tlog = tmp_path / "close.tlog"
|
||||
fake_tlog.write_bytes(b"")
|
||||
captured: dict[str, _FakeMavlinkSource] = {}
|
||||
|
||||
def _factory(_path: str) -> _FakeMavlinkSource:
|
||||
src = _FakeMavlinkSource([])
|
||||
captured["src"] = src
|
||||
return src
|
||||
|
||||
# Act
|
||||
load_tlog_ground_truth(fake_tlog, source_factory=_factory)
|
||||
|
||||
# Assert
|
||||
assert captured["src"].closed is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# DTO surface
|
||||
|
||||
|
||||
def test_tlog_ground_truth_is_frozen() -> None:
|
||||
# Arrange
|
||||
truth = TlogGroundTruth(records=(), source="")
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
truth.source = "GLOBAL_POSITION_INT" # type: ignore[misc]
|
||||
|
||||
|
||||
def test_tlog_gps_fix_is_frozen() -> None:
|
||||
# Arrange
|
||||
fix = TlogGpsFix(
|
||||
ts_ns=0,
|
||||
lat_deg=0.0,
|
||||
lon_deg=0.0,
|
||||
alt_m=0.0,
|
||||
hdg_deg=0.0,
|
||||
vx_m_s=0.0,
|
||||
vy_m_s=0.0,
|
||||
vz_m_s=0.0,
|
||||
)
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
fix.lat_deg = 1.0 # type: ignore[misc]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# AC-4: mypy --strict scoped to the new module
|
||||
|
||||
|
||||
def test_ac4_mypy_strict_clean(tmp_path: Path) -> None:
|
||||
"""``mypy --strict`` on the AZ-697 module reports zero errors.
|
||||
|
||||
The project is strict-by-default via ``pyproject.toml [tool.mypy]``;
|
||||
this scoped run catches regressions in CI without waiting for the
|
||||
full-suite mypy pass.
|
||||
"""
|
||||
# Arrange
|
||||
module_path = (
|
||||
Path(__file__).resolve().parents[2].parent
|
||||
/ "src"
|
||||
/ "gps_denied_onboard"
|
||||
/ "replay_input"
|
||||
/ "tlog_ground_truth.py"
|
||||
)
|
||||
|
||||
# Act
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "mypy", "--strict", str(module_path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result.returncode == 0, (
|
||||
f"mypy --strict reported errors:\n"
|
||||
f"stdout:\n{result.stdout}\n"
|
||||
f"stderr:\n{result.stderr}"
|
||||
)
|
||||
@@ -0,0 +1,152 @@
|
||||
"""AZ-697 AC-5 — gps_compare helper-move snapshot.
|
||||
|
||||
The ``l2_horizontal_m`` / ``match_percentage`` / ``GroundTruthRow``
|
||||
trio moved from ``tests/e2e/replay/_helpers.py`` into production code
|
||||
at ``src/gps_denied_onboard/helpers/gps_compare.py``. This module
|
||||
pins the post-move numerical behaviour so a future refactor of either
|
||||
the helper or the test re-export can't silently drift.
|
||||
|
||||
The numerical reference values are hand-computed against the WGS84
|
||||
mean Earth radius used by ``helpers/wgs_converter.py`` (AZ-279). The
|
||||
``tests/e2e/replay/test_helpers.py`` module continues to import from
|
||||
``tests/e2e/replay/_helpers`` (which now re-exports from the
|
||||
production location), so both call sites are exercised.
|
||||
|
||||
Style: every test follows the Arrange / Act / Assert pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard.helpers.gps_compare import (
|
||||
GroundTruthRow,
|
||||
l2_horizontal_m,
|
||||
match_percentage,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Snapshot: production location vs prior test-helpers location
|
||||
|
||||
|
||||
def test_l2_zero_at_same_point() -> None:
|
||||
# Act
|
||||
d = l2_horizontal_m(50.08, 36.11, 50.08, 36.11)
|
||||
|
||||
# Assert
|
||||
assert d == pytest.approx(0.0, abs=1e-6)
|
||||
|
||||
|
||||
def test_l2_one_degree_latitude_is_111km() -> None:
|
||||
# Act
|
||||
d = l2_horizontal_m(50.08, 36.11, 51.08, 36.11)
|
||||
|
||||
# Assert — one degree of latitude on a sphere of radius 6_371_008.8 m.
|
||||
assert d == pytest.approx(111_195.0, rel=0.001)
|
||||
|
||||
|
||||
def test_l2_symmetric() -> None:
|
||||
# Arrange
|
||||
a = (49.991, 36.221)
|
||||
b = (50.080, 36.111)
|
||||
|
||||
# Act
|
||||
d_ab = l2_horizontal_m(*a, *b)
|
||||
d_ba = l2_horizontal_m(*b, *a)
|
||||
|
||||
# Assert
|
||||
assert d_ab == pytest.approx(d_ba, rel=1e-12)
|
||||
|
||||
|
||||
def test_l2_kharkiv_to_kyiv_known_pair() -> None:
|
||||
# Arrange — externally known reference distance is ~411 km.
|
||||
kharkiv_lat, kharkiv_lon = 49.9935, 36.2304
|
||||
kyiv_lat, kyiv_lon = 50.4501, 30.5234
|
||||
|
||||
# Act
|
||||
d = l2_horizontal_m(kharkiv_lat, kharkiv_lon, kyiv_lat, kyiv_lon)
|
||||
|
||||
# Assert
|
||||
assert d == pytest.approx(411_000.0, rel=0.005)
|
||||
|
||||
|
||||
def test_match_percentage_all_within_threshold() -> None:
|
||||
# Arrange
|
||||
gt = [GroundTruthRow(t_s=0.0, lat_deg=50.0, lon_deg=36.0, alt_m=100.0)]
|
||||
emissions = [
|
||||
{
|
||||
"emitted_at": 0,
|
||||
"position_wgs84": {"lat_deg": 50.0, "lon_deg": 36.0, "alt_m": 100.0},
|
||||
}
|
||||
]
|
||||
|
||||
# Act
|
||||
pct = match_percentage(emissions, gt, threshold_m=100.0)
|
||||
|
||||
# Assert
|
||||
assert pct == 1.0
|
||||
|
||||
|
||||
def test_match_percentage_none_within_threshold() -> None:
|
||||
# Arrange
|
||||
gt = [GroundTruthRow(t_s=0.0, lat_deg=50.0, lon_deg=36.0, alt_m=100.0)]
|
||||
emissions = [
|
||||
{
|
||||
"emitted_at": 0,
|
||||
# ~111 km north of the GT row.
|
||||
"position_wgs84": {"lat_deg": 51.0, "lon_deg": 36.0, "alt_m": 100.0},
|
||||
}
|
||||
]
|
||||
|
||||
# Act
|
||||
pct = match_percentage(emissions, gt, threshold_m=100.0)
|
||||
|
||||
# Assert
|
||||
assert pct == 0.0
|
||||
|
||||
|
||||
def test_match_percentage_empty_emissions_zero() -> None:
|
||||
# Arrange
|
||||
gt = [GroundTruthRow(t_s=0.0, lat_deg=50.0, lon_deg=36.0, alt_m=100.0)]
|
||||
|
||||
# Act
|
||||
pct = match_percentage([], gt, threshold_m=100.0)
|
||||
|
||||
# Assert
|
||||
assert pct == 0.0
|
||||
|
||||
|
||||
def test_match_percentage_empty_ground_truth_raises() -> None:
|
||||
# Act / Assert
|
||||
with pytest.raises(AssertionError, match="ground_truth must be non-empty"):
|
||||
match_percentage(
|
||||
[{"emitted_at": 0, "position_wgs84": {"lat_deg": 50, "lon_deg": 36}}],
|
||||
[],
|
||||
threshold_m=100.0,
|
||||
)
|
||||
|
||||
|
||||
def test_ground_truth_row_is_frozen() -> None:
|
||||
# Arrange
|
||||
row = GroundTruthRow(t_s=0.0, lat_deg=50.0, lon_deg=36.0, alt_m=100.0)
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises((AttributeError, TypeError)):
|
||||
row.lat_deg = 51.0 # type: ignore[misc]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Snapshot: re-export from prior test-helpers location returns the
|
||||
# same object as the production import. Guarantees there is no second
|
||||
# divergent copy under tests/.
|
||||
|
||||
|
||||
def test_test_helpers_reexport_is_identical() -> None:
|
||||
# Act
|
||||
from tests.e2e.replay import _helpers as test_helpers_module
|
||||
|
||||
# Assert — identity, not just equality.
|
||||
assert test_helpers_module.l2_horizontal_m is l2_horizontal_m
|
||||
assert test_helpers_module.match_percentage is match_percentage
|
||||
assert test_helpers_module.GroundTruthRow is GroundTruthRow
|
||||
@@ -0,0 +1,342 @@
|
||||
"""AZ-699 — Report writer + accuracy-distribution unit tests.
|
||||
|
||||
Covers AZ-699 AC-2 (report layout) and AC-3 (failure-message
|
||||
templating). The blackbox AC-1 (honest PASS/FAIL on real Derkachi
|
||||
inputs) lives in ``tests/e2e/replay/test_derkachi_real_tlog.py``
|
||||
and is skipped without the real video + ``RUN_REPLAY_E2E=1``.
|
||||
|
||||
Style: every test follows the Arrange / Act / Assert pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from gps_denied_onboard.helpers.gps_compare import (
|
||||
GroundTruthRow,
|
||||
HorizontalErrorDistribution,
|
||||
horizontal_error_distribution,
|
||||
percentile_sorted,
|
||||
)
|
||||
from gps_denied_onboard.helpers.accuracy_report import (
|
||||
AC3_GATE_PCT,
|
||||
AC3_GATE_THRESHOLD_M,
|
||||
ReportContext,
|
||||
format_failure_message,
|
||||
render_report,
|
||||
verdict_passes_ac3,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# percentile_sorted: pin numpy-equivalent linear interpolation
|
||||
|
||||
|
||||
def test_percentile_sorted_min_max_and_p50_on_odd_length() -> None:
|
||||
# Arrange
|
||||
values = [1.0, 2.0, 3.0, 4.0, 5.0]
|
||||
|
||||
# Act / Assert
|
||||
assert percentile_sorted(values, 0.0) == 1.0
|
||||
assert percentile_sorted(values, 100.0) == 5.0
|
||||
assert percentile_sorted(values, 50.0) == 3.0
|
||||
|
||||
|
||||
def test_percentile_sorted_p95_linear_interpolation() -> None:
|
||||
# Arrange
|
||||
values = list(range(0, 101)) # 0..100
|
||||
|
||||
# Act
|
||||
p95 = percentile_sorted(values, 95.0)
|
||||
|
||||
# Assert: numpy's linear interpolation puts p95 at index 95 of
|
||||
# [0..100] → value = 95.
|
||||
assert p95 == pytest.approx(95.0, abs=1e-9)
|
||||
|
||||
|
||||
def test_percentile_sorted_empty_returns_zero() -> None:
|
||||
# Act / Assert
|
||||
assert percentile_sorted([], 50.0) == 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# horizontal_error_distribution: walk emissions once, return aggregate
|
||||
|
||||
|
||||
def test_distribution_zero_error_at_collocated_points() -> None:
|
||||
# Arrange: estimator emissions perfectly match ground-truth rows.
|
||||
gt = [
|
||||
GroundTruthRow(t_s=0.0, lat_deg=50.0, lon_deg=30.0, alt_m=100.0),
|
||||
GroundTruthRow(t_s=1.0, lat_deg=50.0, lon_deg=30.0, alt_m=100.0),
|
||||
]
|
||||
emissions = [
|
||||
{
|
||||
"emitted_at": 0,
|
||||
"position_wgs84": {"lat_deg": 50.0, "lon_deg": 30.0, "alt_m": 100.0},
|
||||
},
|
||||
{
|
||||
"emitted_at": 1_000_000_000,
|
||||
"position_wgs84": {"lat_deg": 50.0, "lon_deg": 30.0, "alt_m": 100.0},
|
||||
},
|
||||
]
|
||||
|
||||
# Act
|
||||
dist = horizontal_error_distribution(emissions, gt)
|
||||
|
||||
# Assert
|
||||
assert dist.count == 2
|
||||
assert dist.horizontal_error_mean_m == pytest.approx(0.0, abs=1e-6)
|
||||
assert dist.horizontal_error_p99_m == pytest.approx(0.0, abs=1e-6)
|
||||
for threshold in (10.0, 25.0, 50.0, 100.0):
|
||||
assert dist.threshold_hit_share[threshold] == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_distribution_threshold_hit_share_partials() -> None:
|
||||
# Arrange: 4 emissions at increasing offsets from a single GT
|
||||
# point. ~111 m per 0.001 deg lat near the equator → produce
|
||||
# one each in the < 10, < 25, < 50, < 100 buckets so the
|
||||
# threshold counters are unambiguous.
|
||||
gt = [GroundTruthRow(t_s=0.0, lat_deg=50.0, lon_deg=30.0, alt_m=100.0)]
|
||||
# Distances are dominated by latitude offsets. Use deltas that
|
||||
# bracket the buckets (5 m, 20 m, 40 m, 80 m).
|
||||
emissions = [
|
||||
{
|
||||
"emitted_at": i,
|
||||
"position_wgs84": {
|
||||
"lat_deg": 50.0 + (offset_m / 111_000.0),
|
||||
"lon_deg": 30.0,
|
||||
"alt_m": 100.0,
|
||||
},
|
||||
}
|
||||
for i, offset_m in enumerate((5.0, 20.0, 40.0, 80.0))
|
||||
]
|
||||
|
||||
# Act
|
||||
dist = horizontal_error_distribution(emissions, gt)
|
||||
|
||||
# Assert
|
||||
assert dist.count == 4
|
||||
assert dist.threshold_hit_share[10.0] == pytest.approx(0.25) # only 5 m
|
||||
assert dist.threshold_hit_share[25.0] == pytest.approx(0.50) # + 20 m
|
||||
assert dist.threshold_hit_share[50.0] == pytest.approx(0.75) # + 40 m
|
||||
assert dist.threshold_hit_share[100.0] == pytest.approx(1.0) # + 80 m
|
||||
|
||||
|
||||
def test_distribution_vertical_skipped_when_alt_absent() -> None:
|
||||
# Arrange: emissions without ``alt_m`` produce zero vertical
|
||||
# samples; horizontal stats still compute.
|
||||
gt = [GroundTruthRow(t_s=0.0, lat_deg=50.0, lon_deg=30.0, alt_m=100.0)]
|
||||
emissions = [
|
||||
{
|
||||
"emitted_at": 0,
|
||||
"position_wgs84": {"lat_deg": 50.0, "lon_deg": 30.0},
|
||||
}
|
||||
]
|
||||
|
||||
# Act
|
||||
dist = horizontal_error_distribution(emissions, gt)
|
||||
|
||||
# Assert
|
||||
assert dist.vertical_count == 0
|
||||
assert dist.vertical_error_mean_m == 0.0
|
||||
assert dist.count == 1
|
||||
|
||||
|
||||
def test_distribution_empty_emissions_returns_zero_dist() -> None:
|
||||
# Arrange
|
||||
gt = [GroundTruthRow(t_s=0.0, lat_deg=50.0, lon_deg=30.0, alt_m=100.0)]
|
||||
|
||||
# Act
|
||||
dist = horizontal_error_distribution([], gt)
|
||||
|
||||
# Assert
|
||||
assert dist.count == 0
|
||||
assert dist.horizontal_error_mean_m == 0.0
|
||||
for share in dist.threshold_hit_share.values():
|
||||
assert share == 0.0
|
||||
|
||||
|
||||
def test_distribution_raises_on_empty_ground_truth() -> None:
|
||||
# Arrange
|
||||
emissions = [
|
||||
{
|
||||
"emitted_at": 0,
|
||||
"position_wgs84": {"lat_deg": 50.0, "lon_deg": 30.0},
|
||||
}
|
||||
]
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(AssertionError):
|
||||
horizontal_error_distribution(emissions, [])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Verdict + failure-message: AC-3 honesty
|
||||
|
||||
|
||||
def _passing_distribution() -> HorizontalErrorDistribution:
|
||||
return HorizontalErrorDistribution(
|
||||
count=100,
|
||||
horizontal_error_mean_m=20.0,
|
||||
horizontal_error_p50_m=18.0,
|
||||
horizontal_error_p95_m=70.0,
|
||||
horizontal_error_p99_m=95.0,
|
||||
threshold_hit_share={10.0: 0.2, 25.0: 0.6, 50.0: 0.78, 100.0: 0.92},
|
||||
vertical_count=0,
|
||||
vertical_error_mean_m=0.0,
|
||||
vertical_error_p50_m=0.0,
|
||||
vertical_error_p95_m=0.0,
|
||||
)
|
||||
|
||||
|
||||
def _failing_distribution() -> HorizontalErrorDistribution:
|
||||
return HorizontalErrorDistribution(
|
||||
count=200,
|
||||
horizontal_error_mean_m=180.0,
|
||||
horizontal_error_p50_m=140.0,
|
||||
horizontal_error_p95_m=420.0,
|
||||
horizontal_error_p99_m=580.0,
|
||||
threshold_hit_share={10.0: 0.05, 25.0: 0.18, 50.0: 0.31, 100.0: 0.45},
|
||||
vertical_count=0,
|
||||
vertical_error_mean_m=0.0,
|
||||
vertical_error_p50_m=0.0,
|
||||
vertical_error_p95_m=0.0,
|
||||
)
|
||||
|
||||
|
||||
def _context(method: str = "factory-sheet") -> ReportContext:
|
||||
return ReportContext(
|
||||
run_date_utc="2026-05-20",
|
||||
tlog_path=Path("/tmp/derkachi.tlog"),
|
||||
video_path=Path("/tmp/flight_derkachi.mp4"),
|
||||
calibration_acquisition_method=method,
|
||||
clip_duration_s=180.0,
|
||||
emissions_count=200,
|
||||
)
|
||||
|
||||
|
||||
def test_verdict_passes_when_at_least_80pct_within_100m() -> None:
|
||||
# Act / Assert
|
||||
assert verdict_passes_ac3(_passing_distribution()) is True
|
||||
assert verdict_passes_ac3(_failing_distribution()) is False
|
||||
|
||||
|
||||
def test_verdict_fails_on_zero_count() -> None:
|
||||
# Arrange
|
||||
empty = HorizontalErrorDistribution(
|
||||
count=0,
|
||||
horizontal_error_mean_m=0.0,
|
||||
horizontal_error_p50_m=0.0,
|
||||
horizontal_error_p95_m=0.0,
|
||||
horizontal_error_p99_m=0.0,
|
||||
threshold_hit_share={100.0: 0.0},
|
||||
vertical_count=0,
|
||||
vertical_error_mean_m=0.0,
|
||||
vertical_error_p50_m=0.0,
|
||||
vertical_error_p95_m=0.0,
|
||||
)
|
||||
|
||||
# Act / Assert
|
||||
assert verdict_passes_ac3(empty) is False
|
||||
|
||||
|
||||
def test_failure_message_references_calibration_method_factory_sheet() -> None:
|
||||
# Act
|
||||
msg = format_failure_message(
|
||||
_failing_distribution(),
|
||||
_context(method="factory-sheet"),
|
||||
)
|
||||
|
||||
# Assert: AZ-699 AC-3 — failure message attributes residual to the
|
||||
# calibration acquisition method and surfaces the budget.
|
||||
assert "AZ-699 AC-3" in msg
|
||||
assert "45.0 %" in msg # threshold-hit share at 100 m × 100 = 45.0
|
||||
assert "100 m" in msg
|
||||
assert f"{AC3_GATE_PCT:.0f} %" in msg
|
||||
assert "Calibration: factory-sheet" in msg
|
||||
assert "mean=180.0 m" in msg
|
||||
assert "p95=420.0 m" in msg
|
||||
assert "p99=580.0 m" in msg
|
||||
|
||||
|
||||
def test_failure_message_references_calibration_method_placeholder() -> None:
|
||||
# Act
|
||||
msg = format_failure_message(
|
||||
_failing_distribution(),
|
||||
_context(method="placeholder"),
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert "Calibration: placeholder" in msg
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Render report: AC-2 — every required row present
|
||||
|
||||
|
||||
def test_render_report_contains_all_required_rows_on_pass() -> None:
|
||||
# Act
|
||||
text = render_report(
|
||||
_passing_distribution(), _context(), passed=True
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert text.startswith("# Real-flight validation — 2026-05-20")
|
||||
assert "**Verdict**: PASS" in text
|
||||
assert "## Horizontal error (metres)" in text
|
||||
assert "## Threshold-hit share" in text
|
||||
assert "Calibration acquisition method: factory-sheet" in text
|
||||
for stat in ("Mean", "p50", "p95", "p99"):
|
||||
assert f"| {stat} |" in text
|
||||
for threshold in (10, 25, 50, 100):
|
||||
assert f"| {threshold} |" in text
|
||||
|
||||
|
||||
def test_render_report_marks_failure_when_below_gate() -> None:
|
||||
# Act
|
||||
text = render_report(
|
||||
_failing_distribution(), _context(), passed=False
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert "**Verdict**: FAIL" in text
|
||||
# Threshold row for 100 m must show ~ 45 % (failing distribution).
|
||||
assert "| 100 | 45.0 |" in text
|
||||
|
||||
|
||||
def test_render_report_includes_vertical_when_available() -> None:
|
||||
# Arrange
|
||||
dist = HorizontalErrorDistribution(
|
||||
count=10,
|
||||
horizontal_error_mean_m=10.0,
|
||||
horizontal_error_p50_m=10.0,
|
||||
horizontal_error_p95_m=10.0,
|
||||
horizontal_error_p99_m=10.0,
|
||||
threshold_hit_share={10.0: 1.0, 25.0: 1.0, 50.0: 1.0, 100.0: 1.0},
|
||||
vertical_count=10,
|
||||
vertical_error_mean_m=4.5,
|
||||
vertical_error_p50_m=4.0,
|
||||
vertical_error_p95_m=8.5,
|
||||
)
|
||||
|
||||
# Act
|
||||
text = render_report(dist, _context(), passed=True)
|
||||
|
||||
# Assert
|
||||
assert "## Vertical error (metres)" in text
|
||||
assert "| Mean | 4.50 |" in text
|
||||
assert "| p95 | 8.50 |" in text
|
||||
assert "| Samples | 10 |" in text
|
||||
|
||||
|
||||
def test_render_report_marks_vertical_skipped_when_no_samples() -> None:
|
||||
# Arrange
|
||||
dist = _passing_distribution() # vertical_count == 0
|
||||
|
||||
# Act
|
||||
text = render_report(dist, _context(), passed=True)
|
||||
|
||||
# Assert
|
||||
assert "_No emissions carried a comparable altitude" in text
|
||||
@@ -0,0 +1,373 @@
|
||||
"""AZ-700 — render_map CLI + HTML renderer unit tests.
|
||||
|
||||
Covers AC-1 (CLI smoke + valid HTML), AC-2 (two distinct
|
||||
polylines), AC-3 (4 markers + 100 m + 50 m circles), and AC-4
|
||||
(summary embedding). AC-5 (offline-tiles flag) is exercised via a
|
||||
dedicated test.
|
||||
|
||||
Folium is an optional dependency (``[operator-tools]`` group);
|
||||
these tests skip cleanly when it is not importable so the airborne
|
||||
test suite stays green even when the operator extra is absent.
|
||||
|
||||
Style: every test follows the Arrange / Act / Assert pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import struct
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
folium = pytest.importorskip(
|
||||
"folium",
|
||||
reason="folium is an operator-only dep; install gps-denied-onboard[operator-tools]",
|
||||
)
|
||||
|
||||
from gps_denied_onboard.cli.render_map import (
|
||||
RenderInputs,
|
||||
_build_argparser,
|
||||
load_estimated_track,
|
||||
load_ground_truth_track,
|
||||
main,
|
||||
render_map_html,
|
||||
)
|
||||
|
||||
|
||||
def _write_minimal_tlog(path: Path, fixes: list[tuple[float, float, float]]) -> None:
|
||||
"""Write a tiny binary tlog with ``GLOBAL_POSITION_INT`` only.
|
||||
|
||||
Format: ``<u64 big-endian timestamp_us><MAVLink2 msg bytes>``,
|
||||
repeated. ``load_tlog_ground_truth`` ignores everything except
|
||||
``GLOBAL_POSITION_INT`` / ``GPS_RAW_INT``, so the minimal schema
|
||||
is just one ``GLOBAL_POSITION_INT`` per fix.
|
||||
"""
|
||||
from pymavlink.dialects.v20 import ardupilotmega as mavlink
|
||||
|
||||
mav = mavlink.MAVLink(file=None, srcSystem=1, srcComponent=1)
|
||||
with path.open("wb") as fp:
|
||||
for i, (lat, lon, alt) in enumerate(fixes):
|
||||
time_boot_ms = i * 500
|
||||
msg = mav.global_position_int_encode(
|
||||
time_boot_ms=time_boot_ms,
|
||||
lat=int(lat * 1e7),
|
||||
lon=int(lon * 1e7),
|
||||
alt=int(alt * 1000),
|
||||
relative_alt=int(alt * 1000),
|
||||
vx=0,
|
||||
vy=0,
|
||||
vz=0,
|
||||
hdg=0,
|
||||
)
|
||||
payload = msg.pack(mav)
|
||||
ts_us = i * 500_000
|
||||
fp.write(struct.pack(">Q", ts_us))
|
||||
fp.write(payload)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Helpers
|
||||
|
||||
|
||||
def _write_jsonl(path: Path, rows: list[dict[str, object]]) -> None:
|
||||
path.write_text("\n".join(json.dumps(r) for r in rows) + "\n")
|
||||
|
||||
|
||||
def _example_inputs() -> RenderInputs:
|
||||
return RenderInputs(
|
||||
estimated_track=[
|
||||
(50.0, 30.0),
|
||||
(50.001, 30.001),
|
||||
(50.002, 30.002),
|
||||
],
|
||||
truth_track=[
|
||||
(50.0, 30.0),
|
||||
(50.0005, 30.0005),
|
||||
(50.001, 30.001),
|
||||
],
|
||||
summary_markdown=None,
|
||||
title="unit-test",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# load_estimated_track / load_ground_truth_track
|
||||
|
||||
|
||||
def test_load_estimated_track_skips_blank_lines(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
path = tmp_path / "out.jsonl"
|
||||
path.write_text(
|
||||
'{"position_wgs84":{"lat_deg":50.0,"lon_deg":30.0,"alt_m":100}}\n'
|
||||
"\n"
|
||||
'{"position_wgs84":{"lat_deg":50.1,"lon_deg":30.1,"alt_m":110}}\n'
|
||||
)
|
||||
|
||||
# Act
|
||||
track = load_estimated_track(path)
|
||||
|
||||
# Assert
|
||||
assert track == [(50.0, 30.0), (50.1, 30.1)]
|
||||
|
||||
|
||||
def test_load_estimated_track_raises_on_missing_position(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
path = tmp_path / "out.jsonl"
|
||||
path.write_text('{"frame_id":1}\n')
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="missing position_wgs84"):
|
||||
load_estimated_track(path)
|
||||
|
||||
|
||||
def test_load_estimated_track_raises_on_non_numeric_lat(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
path = tmp_path / "out.jsonl"
|
||||
path.write_text(
|
||||
'{"position_wgs84":{"lat_deg":"oops","lon_deg":30.0}}\n'
|
||||
)
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="non-numeric lat/lon"):
|
||||
load_estimated_track(path)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# render_map_html
|
||||
|
||||
|
||||
def test_render_map_html_emits_two_polylines() -> None:
|
||||
# Act
|
||||
html = render_map_html(_example_inputs())
|
||||
|
||||
# Assert — AC-2: two distinct polyline layers with our pinned colors.
|
||||
assert html.count("L.polyline") == 2, (
|
||||
"expected exactly 2 polylines (truth + estimated); "
|
||||
f"saw {html.count('L.polyline')}"
|
||||
)
|
||||
assert '"color": "red"' in html, "truth polyline (red) missing"
|
||||
assert '"color": "blue"' in html, "estimated polyline (blue) missing"
|
||||
|
||||
|
||||
def test_render_map_html_emits_four_markers_and_two_circles() -> None:
|
||||
# Act
|
||||
html = render_map_html(_example_inputs())
|
||||
|
||||
# Assert — AC-3: 2 markers per track (start + end) = 4 total.
|
||||
assert html.count("L.marker") == 4, (
|
||||
f"expected 4 markers; saw {html.count('L.marker')}"
|
||||
)
|
||||
# Scale circles at the truth start: radius 100 + 50.
|
||||
assert html.count("L.circle") == 2, (
|
||||
f"expected 2 scale circles (100 m + 50 m); "
|
||||
f"saw {html.count('L.circle')}"
|
||||
)
|
||||
assert '"radius": 100.0' in html
|
||||
assert '"radius": 50.0' in html
|
||||
|
||||
|
||||
def test_render_map_html_embeds_summary_when_provided() -> None:
|
||||
# Arrange
|
||||
inputs = RenderInputs(
|
||||
estimated_track=[(50.0, 30.0), (50.001, 30.001)],
|
||||
truth_track=[(50.0, 30.0), (50.0005, 30.0005)],
|
||||
summary_markdown=(
|
||||
"# Real-flight validation — 2026-05-20\n"
|
||||
"**Verdict**: PASS\n"
|
||||
"| Mean | 12.3 |"
|
||||
),
|
||||
title="t",
|
||||
)
|
||||
|
||||
# Act
|
||||
html = render_map_html(inputs)
|
||||
|
||||
# Assert — AC-4: the markdown body shows up in the HTML.
|
||||
assert "Real-flight validation" in html
|
||||
assert "**Verdict**: PASS" in html # noqa: E501 — escape allowed since `*` is not HTML-special
|
||||
# HTML special chars are escaped — pipe characters stay raw, but
|
||||
# the angle brackets used in markdown's emphasis would. We don't
|
||||
# want script injection, so confirm the wrapper div is present.
|
||||
assert "white-space:pre-wrap" in html
|
||||
|
||||
|
||||
def test_render_map_html_raises_on_both_tracks_empty() -> None:
|
||||
# Arrange
|
||||
inputs = RenderInputs(
|
||||
estimated_track=[],
|
||||
truth_track=[],
|
||||
summary_markdown=None,
|
||||
title="empty",
|
||||
)
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="empty"):
|
||||
render_map_html(inputs)
|
||||
|
||||
|
||||
def test_render_map_html_offline_tiles_omits_openstreetmap() -> None:
|
||||
# Act
|
||||
html_default = render_map_html(_example_inputs())
|
||||
html_offline = render_map_html(_example_inputs(), offline_tiles=True)
|
||||
|
||||
# Assert — `tiles=None` removes the default OpenStreetMap tile URL.
|
||||
assert "openstreetmap" in html_default.lower()
|
||||
assert "openstreetmap" not in html_offline.lower()
|
||||
|
||||
|
||||
def test_render_map_html_offline_tiles_template_uses_local_url() -> None:
|
||||
# Act
|
||||
html = render_map_html(
|
||||
_example_inputs(),
|
||||
offline_tiles_template="file:///opt/tiles/{z}/{x}/{y}.png",
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert "file:///opt/tiles/{z}/{x}/{y}.png" in html
|
||||
assert "local offline tile bundle" in html
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# CLI smoke (AC-1)
|
||||
|
||||
|
||||
def test_cli_writes_html_with_default_tiles(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
estimated = tmp_path / "estimator.jsonl"
|
||||
_write_jsonl(
|
||||
estimated,
|
||||
[
|
||||
{"position_wgs84": {"lat_deg": 50.0, "lon_deg": 30.0, "alt_m": 100}},
|
||||
{"position_wgs84": {"lat_deg": 50.001, "lon_deg": 30.001, "alt_m": 101}},
|
||||
],
|
||||
)
|
||||
|
||||
truth = tmp_path / "synth.tlog"
|
||||
_write_minimal_tlog(
|
||||
truth,
|
||||
[(50.0, 30.0, 100.0), (50.0005, 30.0005, 100.0), (50.001, 30.001, 100.0)],
|
||||
)
|
||||
|
||||
output = tmp_path / "map.html"
|
||||
|
||||
# Act
|
||||
rc = main(
|
||||
[
|
||||
"--estimated", str(estimated),
|
||||
"--truth", str(truth),
|
||||
"--output", str(output),
|
||||
]
|
||||
)
|
||||
|
||||
# Assert — AC-1: clean exit + non-empty HTML.
|
||||
assert rc == 0
|
||||
assert output.is_file()
|
||||
body = output.read_text()
|
||||
assert body.startswith("<!DOCTYPE html>")
|
||||
assert len(body) > 1000
|
||||
|
||||
|
||||
def test_cli_embeds_summary_when_flag_supplied(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
estimated = tmp_path / "estimator.jsonl"
|
||||
_write_jsonl(
|
||||
estimated,
|
||||
[
|
||||
{"position_wgs84": {"lat_deg": 50.0, "lon_deg": 30.0, "alt_m": 100}},
|
||||
{"position_wgs84": {"lat_deg": 50.001, "lon_deg": 30.001, "alt_m": 101}},
|
||||
],
|
||||
)
|
||||
|
||||
truth = tmp_path / "synth.tlog"
|
||||
_write_minimal_tlog(truth, [(50.0, 30.0, 100.0), (50.001, 30.001, 100.0)])
|
||||
|
||||
summary = tmp_path / "real_flight_validation_2026-05-20.md"
|
||||
summary.write_text(
|
||||
"# Real-flight validation — 2026-05-20\n"
|
||||
"**Verdict**: FAIL\n\n"
|
||||
"## Horizontal error (metres)\n"
|
||||
"| Mean | 142.5 |\n"
|
||||
)
|
||||
|
||||
output = tmp_path / "map.html"
|
||||
|
||||
# Act
|
||||
rc = main(
|
||||
[
|
||||
"--estimated", str(estimated),
|
||||
"--truth", str(truth),
|
||||
"--output", str(output),
|
||||
"--summary", str(summary),
|
||||
]
|
||||
)
|
||||
|
||||
# Assert — AC-4
|
||||
assert rc == 0
|
||||
body = output.read_text()
|
||||
assert "Real-flight validation" in body
|
||||
assert "**Verdict**: FAIL" in body
|
||||
assert "Mean | 142.5" in body
|
||||
|
||||
|
||||
def test_cli_fails_fast_when_summary_path_missing(tmp_path: Path) -> None:
|
||||
# Arrange
|
||||
estimated = tmp_path / "estimator.jsonl"
|
||||
_write_jsonl(
|
||||
estimated,
|
||||
[
|
||||
{"position_wgs84": {"lat_deg": 50.0, "lon_deg": 30.0, "alt_m": 100}},
|
||||
],
|
||||
)
|
||||
truth = tmp_path / "synth.tlog"
|
||||
_write_minimal_tlog(truth, [(50.0, 30.0, 100.0), (50.001, 30.001, 100.0)])
|
||||
|
||||
output = tmp_path / "map.html"
|
||||
missing_summary = tmp_path / "does_not_exist.md"
|
||||
|
||||
# Act
|
||||
rc = main(
|
||||
[
|
||||
"--estimated", str(estimated),
|
||||
"--truth", str(truth),
|
||||
"--output", str(output),
|
||||
"--summary", str(missing_summary),
|
||||
]
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert rc == 2
|
||||
assert not output.exists(), "must not write the map when summary path is invalid"
|
||||
|
||||
|
||||
def test_argparser_requires_three_paths() -> None:
|
||||
# Arrange
|
||||
parser = _build_argparser()
|
||||
|
||||
# Act / Assert
|
||||
with pytest.raises(SystemExit):
|
||||
parser.parse_args([])
|
||||
with pytest.raises(SystemExit):
|
||||
parser.parse_args(["--estimated", "/tmp/a.jsonl"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# load_ground_truth_track integration with AZ-697
|
||||
|
||||
|
||||
def test_load_ground_truth_track_returns_lat_lon_pairs(tmp_path: Path) -> None:
|
||||
# Arrange — synthesize a minimal tlog and round-trip through AZ-697.
|
||||
tlog_path = tmp_path / "synth.tlog"
|
||||
_write_minimal_tlog(
|
||||
tlog_path,
|
||||
[(50.000, 30.000, 100.0), (50.001, 30.001, 101.0), (50.002, 30.002, 102.0)],
|
||||
)
|
||||
|
||||
# Act
|
||||
track = load_ground_truth_track(tlog_path)
|
||||
|
||||
# Assert
|
||||
assert len(track) == 3
|
||||
for lat, lon in track:
|
||||
assert 49.99 < lat < 50.01
|
||||
assert 29.99 < lon < 30.01
|
||||
Reference in New Issue
Block a user