[AZ-594] Implement core-three harness stubs (fdr_reader, frame_source_replay, imu_replay)

Replaces the NotImplementedError stubs AZ-406 reserved on three runner-
side helpers; these were stranded from any tracker ticket since
AZ-407/408 never came back to fill them. Concrete bodies:

* fdr_reader.iter_records: JSONL parser + wire-envelope validator;
  recursive *.jsonl walk; projects {schema_version, ts, producer_id,
  kind, payload} to runner-side FdrRecord with record_type/monotonic_ms
  renames; yields oldest-first.
* frame_source_replay.replay_video: OpenCV VideoCapture decode + JPEG
  re-encode; auto-detects file vs directory; injectable sleep_fn for
  unit-test pacing.
* imu_replay.ImuReplayer.replay: csv.DictReader parse; degrees->radians
  attitude conversion; tolerates scientific notation; same sleep_fn
  injection pattern.

Adds 34 unit tests (14 + 10 + 10). Full e2e unit suite: 558 passed (+31).
Existing scenario _harness_helpers_implemented probes still return False
because they also depend on sitl_observer / fc_proxy_runtime stubs that
remain pending; scenario probe cleanup is out of AZ-594 scope.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 08:42:12 +03:00
parent 2d6d44af5d
commit 1d260f7e41
10 changed files with 1196 additions and 76 deletions
+90 -12
View File
@@ -1,22 +1,32 @@
"""Post-run filesystem read of the FDR archive.
The FDR archive is a line-delimited JSON record stream per AZ-272 / AZ-273.
Each line is an `FdrRecord` envelope (producer_id, type, monotonic_ms,
payload). The runner image must NEVER import the SUT's FdrRecord schema
directly — it parses the JSON bytes and validates against a duplicate
record-type allowlist baked into this module.
Each line is an FDR envelope on the wire schema
``{schema_version, ts, producer_id, kind, payload, extra?}``. This module
parses the JSON bytes and validates the wire envelope structurally — the
runner image NEVER imports the SUT's FdrRecord schema directly so a
breaking SUT change surfaces as a parse failure here (visible drift)
rather than silently following along.
Public surface only; concrete parser + assertion helpers are owned by
AZ-441 (NFT-LIM-02 — FDR size budget) and the resilience scenario tasks
that need to crawl the archive (AZ-432, AZ-433, AZ-435).
The runner-side `FdrRecord` dataclass renames `kind` → `record_type` and
projects `ts` (ISO 8601 wall-clock) onto an integer `monotonic_ms` field
for downstream evaluators that work in milliseconds. Within one flight,
ISO 8601 ms-since-epoch is monotonic at the millisecond resolution the
evaluators care about (NFR-RES NTP drift is excluded by AC-7 of the FDR
contract: the on-board clock is monotonic over the lifetime of one
flight session).
"""
from __future__ import annotations
import json
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Iterator
_WIRE_REQUIRED_KEYS = ("schema_version", "ts", "producer_id", "kind", "payload")
@dataclass(frozen=True)
class FdrRecord:
@@ -33,15 +43,83 @@ class FdrRecord:
payload: dict[str, object]
def _ts_to_monotonic_ms(ts: str) -> int:
"""Project ISO 8601 ``ts`` onto an int millisecond value.
Accepts trailing ``Z`` (UTC) which ``datetime.fromisoformat`` did not
accept until 3.11; we normalise to ``+00:00`` first.
"""
normalised = ts[:-1] + "+00:00" if ts.endswith("Z") else ts
dt = datetime.fromisoformat(normalised)
return int(dt.timestamp() * 1000)
def _parse_envelope(line_bytes: bytes, source: Path, line_no: int) -> FdrRecord:
"""Decode one JSONL line into a typed envelope.
Wire-side keys are validated structurally; downstream payload keys are
NOT validated here (the consuming evaluator owns its own payload contract).
"""
decoded = json.loads(line_bytes)
if not isinstance(decoded, dict):
raise ValueError(
f"FDR line is not a JSON object: {source}:{line_no}: type={type(decoded).__name__}"
)
missing = [k for k in _WIRE_REQUIRED_KEYS if k not in decoded]
if missing:
raise ValueError(
f"FDR wire envelope missing required keys {missing} at {source}:{line_no}"
)
ts = decoded["ts"]
if not isinstance(ts, str) or not ts:
raise ValueError(f"FDR envelope `ts` must be a non-empty ISO 8601 string at {source}:{line_no}")
producer_id = decoded["producer_id"]
if not isinstance(producer_id, str) or not producer_id:
raise ValueError(
f"FDR envelope `producer_id` must be a non-empty string at {source}:{line_no}"
)
kind = decoded["kind"]
if not isinstance(kind, str) or not kind:
raise ValueError(f"FDR envelope `kind` must be a non-empty string at {source}:{line_no}")
payload = decoded["payload"]
if not isinstance(payload, dict):
raise ValueError(f"FDR envelope `payload` must be an object at {source}:{line_no}")
return FdrRecord(
producer_id=producer_id,
monotonic_ms=_ts_to_monotonic_ms(ts),
record_type=kind,
payload=payload,
)
def iter_records(fdr_archive_root: Path) -> Iterator[FdrRecord]:
"""Iterate every FDR record in the archive root (ordered by monotonic_ms).
Raises NotImplementedError until AZ-441 supplies the orjson-backed parser.
Walks every ``*.jsonl`` file under ``fdr_archive_root`` (recursive),
parses each line as a wire envelope, and yields the runner-side
``FdrRecord`` projection. Records are emitted oldest-first across the
union of all files.
Raises ``FileNotFoundError`` if the archive root does not exist.
Raises ``ValueError`` (with a file + line pointer) on malformed JSON,
a wrong-shape envelope, or an unparseable ``ts``.
"""
raise NotImplementedError(
"fdr_reader.iter_records is owned by AZ-441 — AZ-406 supplies only "
"the public surface."
)
if not fdr_archive_root.exists():
raise FileNotFoundError(
f"FDR archive root not found: {fdr_archive_root}"
)
records: list[FdrRecord] = []
for jsonl_path in sorted(fdr_archive_root.rglob("*.jsonl")):
if not jsonl_path.is_file():
continue
with jsonl_path.open("rb") as fh:
for line_no, raw in enumerate(fh, start=1):
stripped = raw.strip()
if not stripped:
continue
records.append(_parse_envelope(stripped, jsonl_path, line_no))
records.sort(key=lambda r: r.monotonic_ms)
yield from records
def archive_size_bytes(fdr_archive_root: Path) -> int:
+107 -28
View File
@@ -6,25 +6,30 @@ Two replay modes:
SUT polls.
2. Video replay (FT-P-02, FT-P-04, FT-N-01..04, NFT-PERF-*) — decode an
MP4 with OpenCV and emit frames at the encoded FPS (or a user-supplied
rate for fast-forward).
rate for fast-forward). ``replay_video`` also accepts a directory of
extracted frames (`AD000001.jpg`-style) so the AZ-408 injectors that
emit frame directories rather than MP4s can use the same surface.
The actual frame-source path inside the SUT container is configured via the
``ONBOARD_FRAME_SOURCE_PATH`` environment variable on the SUT — the runner
writes to a shared tmpfs volume mounted at the same path inside both
containers.
This file currently provides the public surface used by per-scenario tests;
concrete implementations land alongside their consuming test tasks
(AZ-407 onward). The intent is that `FrameSourceReplayer` is a stable API
the test specs can rely on while the underlying replay strategy is filled
in incrementally.
"""
from __future__ import annotations
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Protocol
from typing import Callable, Protocol
import cv2
# Image extensions handled by ``replay_image_directory`` / ``replay_video``
# when given a directory rather than a file. Sorted-by-name ordering implies
# zero-padded filenames (``AD000001.jpg``) which the AZ-407 / AZ-408
# fixture builders already produce.
_IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".bmp")
@dataclass(frozen=True)
@@ -43,35 +48,109 @@ class FrameSink(Protocol):
class FrameSourceReplayer:
"""Public surface for replaying frames into the SUT's frame-source path.
"""Public surface for replaying frames into the SUT's frame-source path."""
AZ-407 (Static fixture builders) supplies the concrete still-image replay
implementation; AZ-408 (Runtime synthetic-injection) supplies the video
+ injector variants. AZ-406 only commits to the contract.
"""
def __init__(self, sink: FrameSink, cadence: ReplayCadence | None = None) -> None:
def __init__(
self,
sink: FrameSink,
cadence: ReplayCadence | None = None,
*,
sleep_fn: Callable[[float], None] = time.sleep,
) -> None:
self._sink = sink
self._cadence = cadence or ReplayCadence()
# Injected so unit tests can drop wall-clock pacing entirely.
self._sleep = sleep_fn
def replay_image_directory(self, directory: Path) -> int:
"""Replay every image in ``directory`` (sorted by name). Returns count emitted.
"""Replay every image in ``directory`` (sorted by name) at the configured
cadence. Returns the number of frames emitted to the sink.
Raises NotImplementedError until AZ-407 lands. Tests that need this
path should mark themselves @pytest.mark.skip(reason="awaiting AZ-407")
until then; AC-1 (smoke) does not depend on this surface.
Raises ``FileNotFoundError`` if ``directory`` does not exist or is not
a directory. The sink is invoked with raw JPEG-encoded bytes and a
monotonic-ms timestamp that starts at 0 and advances by the period
derived from ``self._cadence.fps``.
"""
raise NotImplementedError(
"FrameSourceReplayer.replay_image_directory is owned by AZ-407 — "
"AZ-406 supplies only the public surface."
if not directory.exists() or not directory.is_dir():
raise FileNotFoundError(
f"frame directory not found: {directory}"
)
files = sorted(
p for p in directory.iterdir()
if p.is_file() and p.suffix.lower() in _IMAGE_EXTENSIONS
)
return self._emit_files(files)
def replay_video(self, video_path: Path) -> int:
"""Replay an MP4 / .h264 file frame-by-frame. Returns count emitted.
"""Replay frames from ``video_path``. Returns count emitted.
Raises NotImplementedError until AZ-408 lands.
Auto-detects:
* a regular file (``.mp4`` / ``.avi`` / ``.h264``) — decoded with
OpenCV ``VideoCapture`` and re-encoded as JPEG before emission.
* a directory — delegates to ``replay_image_directory`` so the
AZ-408 injectors that emit frame directories can use this entry
point without the caller knowing the difference.
Raises ``FileNotFoundError`` on a missing path.
"""
raise NotImplementedError(
"FrameSourceReplayer.replay_video is owned by AZ-408 — "
"AZ-406 supplies only the public surface."
)
if not video_path.exists():
raise FileNotFoundError(f"video path not found: {video_path}")
if video_path.is_dir():
return self.replay_image_directory(video_path)
return self._decode_and_emit_video(video_path)
def _decode_and_emit_video(self, video_path: Path) -> int:
cap = cv2.VideoCapture(str(video_path))
if not cap.isOpened():
raise ValueError(f"OpenCV failed to open video: {video_path}")
try:
encoded_fps = cap.get(cv2.CAP_PROP_FPS) or 0.0
fps = encoded_fps if (self._cadence.realtime and encoded_fps > 0) else self._cadence.fps
period_ms = self._period_ms(fps)
emitted = 0
t_ms = 0
while True:
ok, frame = cap.read()
if not ok:
break
success, encoded = cv2.imencode(".jpg", frame)
if not success:
raise ValueError(
f"OpenCV failed to JPEG-encode frame {emitted} of {video_path}"
)
self._sink.write_frame(encoded.tobytes(), t_ms)
emitted += 1
t_ms += period_ms
if self._cadence.realtime and period_ms > 0:
self._sleep(period_ms / 1000.0)
return emitted
finally:
cap.release()
def _emit_files(self, files: list[Path]) -> int:
period_ms = self._period_ms(self._cadence.fps)
emitted = 0
t_ms = 0
for path in files:
jpeg_bytes = path.read_bytes()
if path.suffix.lower() != ".jpg" and path.suffix.lower() != ".jpeg":
# Re-encode non-JPEG sources so the sink always gets JPEG bytes.
img = cv2.imread(str(path), cv2.IMREAD_UNCHANGED)
if img is None:
raise ValueError(f"OpenCV failed to read image: {path}")
success, encoded = cv2.imencode(".jpg", img)
if not success:
raise ValueError(f"OpenCV failed to JPEG-encode image: {path}")
jpeg_bytes = encoded.tobytes()
self._sink.write_frame(jpeg_bytes, t_ms)
emitted += 1
t_ms += period_ms
if self._cadence.realtime and period_ms > 0:
self._sleep(period_ms / 1000.0)
return emitted
@staticmethod
def _period_ms(fps: float) -> int:
if fps <= 0:
return 0
return int(round(1000.0 / fps))
+69 -12
View File
@@ -3,17 +3,27 @@
CSV schema (from `_docs/00_problem/input_data/flight_derkachi/data_imu.csv`):
timestamp_ms,ax,ay,az,gx,gy,gz,roll_deg,pitch_deg,yaw_deg,baro_m
Owned by AZ-406 (public surface) + AZ-407 (concrete file-driver
implementation). This module commits to the type signatures the
per-scenario tests will import; the actual MAVLink / MSP2 emission is
wired up by the downstream task.
Numeric fields are accepted in any float-parseable form, including
scientific notation (``-4.44E-16``) — the AZ-408 source CSV uses that
form for near-zero values.
"""
from __future__ import annotations
import csv
import math
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Protocol
from typing import Callable, Protocol
_REQUIRED_COLUMNS = (
"timestamp_ms",
"ax", "ay", "az",
"gx", "gy", "gz",
"roll_deg", "pitch_deg", "yaw_deg",
"baro_m",
)
@dataclass(frozen=True)
@@ -34,20 +44,67 @@ class FcInboundEmitter(Protocol):
...
def _parse_row(row: dict[str, str], source: Path, line_no: int) -> ImuSample:
try:
return ImuSample(
timestamp_ms=int(round(float(row["timestamp_ms"]))),
accel_mss=(float(row["ax"]), float(row["ay"]), float(row["az"])),
gyro_rps=(float(row["gx"]), float(row["gy"]), float(row["gz"])),
attitude_rad=(
math.radians(float(row["roll_deg"])),
math.radians(float(row["pitch_deg"])),
math.radians(float(row["yaw_deg"])),
),
baro_alt_m=float(row["baro_m"]),
)
except (KeyError, ValueError) as exc:
raise ValueError(
f"IMU CSV row malformed at {source}:{line_no}: {exc}"
) from exc
class ImuReplayer:
"""Drives an `FcInboundEmitter` from a CSV file at the recorded cadence."""
def __init__(self, emitter: FcInboundEmitter, rate_hz: float = 10.0) -> None:
def __init__(
self,
emitter: FcInboundEmitter,
rate_hz: float = 10.0,
*,
sleep_fn: Callable[[float], None] = time.sleep,
realtime: bool = True,
) -> None:
if rate_hz <= 0:
raise ValueError(f"rate_hz must be positive; got {rate_hz}")
self._emitter = emitter
self._rate_hz = rate_hz
self._sleep = sleep_fn
self._realtime = realtime
def replay(self, csv_path: Path) -> int:
"""Replay the CSV file. Returns the number of samples emitted.
Concrete implementation is owned by AZ-407 (FT-P-02 derkachi-drift
+ FT-P-04 frame-to-frame registration are the first consumers).
Raises ``FileNotFoundError`` on missing CSV. Raises ``ValueError``
on missing columns or a row that does not parse. When ``realtime``
is True (default), sleeps ``1 / rate_hz`` seconds between
emissions; tests should pass ``realtime=False`` or inject a
no-op ``sleep_fn`` to keep the unit suite fast.
"""
raise NotImplementedError(
"ImuReplayer.replay is owned by AZ-407 — AZ-406 supplies only "
"the public surface."
)
if not csv_path.exists():
raise FileNotFoundError(f"IMU CSV not found: {csv_path}")
emitted = 0
period_s = 1.0 / self._rate_hz
with csv_path.open() as fh:
reader = csv.DictReader(fh)
missing = [c for c in _REQUIRED_COLUMNS if c not in (reader.fieldnames or [])]
if missing:
raise ValueError(
f"IMU CSV {csv_path} missing required columns: {missing}"
)
for line_no, row in enumerate(reader, start=2): # +1 for header line
sample = _parse_row(row, csv_path, line_no)
self._emitter.emit(sample)
emitted += 1
if self._realtime:
self._sleep(period_s)
return emitted