[AZ-414] [AZ-415] [AZ-418] Test batch 71: sharp turn + multi-segment + smoothing

- AZ-414 (FT-P-07 + FT-N-02): sharp_turn_detector helper covering
  AC-1 (gyro_z run detection + synthetic-overlay fallback),
  AC-2/AC-3 (FT-N-02 during-turn label + monotonic covariance),
  AC-4/AC-5/AC-6 (FT-P-07 recovery lag/drift/heading); twin scenario
  files under positive/ and negative/.
- AZ-415 (FT-P-08): multi_segment_evaluator helper + scenario.
- AZ-418 (FT-P-10): smoothing_evaluator helper covering AC-1 (raw +
  smoothed pose pairing), AC-2 (improvement rate >= 0.80), AC-3
  (mean improvement >= 5 m); scenario file.
- All scenarios skip-gated on upstream frame_source_replay /
  imu_replay / fdr_reader stubs (auto-activate when AZ-441 + AZ-407
  leftovers land).
- +68 unit tests; full e2e unit suite: 393 passed.

See _docs/03_implementation/batch_71_report.md and
_docs/03_implementation/reviews/batch_71_review.md.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-17 07:12:24 +03:00
parent 29ac16cfcb
commit c6e6cba237
17 changed files with 3195 additions and 1 deletions
@@ -0,0 +1,287 @@
"""Multi-segment relocalisation evaluation for FT-P-08 (AZ-415 / AC-3.3).
The ``multi-segment-derkachi`` fixture (AZ-408) writes a ``schedule.json``
naming ≥3 disjoint blackout windows. During replay the SUT MUST:
* AC-2: emit ``source_label = dead_reckoned`` for every estimate inside
every blackout window.
* AC-3: emit the next ``source_label = satellite_anchored`` within
≤3 frames of each blackout's ``end_ms`` (target frame cadence = 3 fps
per the runtime profile in `_docs/02_document/tests/blackbox-tests.md`).
* AC-4: trajectory continuity — the geodesic distance between the last
pre-recovery estimate (at or before ``end_ms``) and the first
post-recovery anchor must be ≤100 m.
The aggregate passes only when ALL ≥3 windows satisfy ALL three checks.
Public-boundary discipline: this module does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable, Mapping, Sequence
from .geo import distance_m
DEAD_RECKONED = "dead_reckoned"
SATELLITE_ANCHORED = "satellite_anchored"
VISUAL_PROPAGATED = "visual_propagated"
ALLOWED_SOURCE_LABELS = {SATELLITE_ANCHORED, VISUAL_PROPAGATED, DEAD_RECKONED}
# AC-3 / AC-4 / AC-5 thresholds from the FT-P-08 spec.
MAX_RECOVERY_FRAMES = 3
MAX_RECOVERY_FRAMES_SAFETY_MS = 1100 # 3 frames @ ~3 fps; +100 ms scheduling slack
MAX_TRAJECTORY_JUMP_M = 100.0
MIN_SEGMENTS_REQUIRED = 3
@dataclass(frozen=True)
class BlackoutWindow:
"""One blackout window from the injector's ``schedule.json``."""
start_ms: int
end_ms: int
first_frame_idx: int
last_frame_idx: int
@property
def duration_ms(self) -> int:
return self.end_ms - self.start_ms
@dataclass(frozen=True)
class EstimateSample:
"""One outbound estimate observed during replay.
The scenario builds this list from the SITL listener (for the
primary path) or from the post-run FDR archive (for the offline
audit). Either source is a public boundary.
"""
monotonic_ms: int
lat_deg: float
lon_deg: float
source_label: str
@dataclass(frozen=True)
class PerWindowReport:
"""Per-blackout-window evaluation produced by ``evaluate_window``."""
window_index: int
start_ms: int
end_ms: int
samples_inside: int
dead_reckoned_inside: int
label_violations: tuple[str, ...]
recovery_anchor_ms: int | None
recovery_lag_ms: int | None
trajectory_jump_m: float | None
@property
def passes_label(self) -> bool:
"""AC-2: every inside-window sample is dead_reckoned."""
return (
self.samples_inside > 0
and self.dead_reckoned_inside == self.samples_inside
and not self.label_violations
)
@property
def passes_recovery(self) -> bool:
"""AC-3: a satellite_anchored emission within the recovery budget."""
return (
self.recovery_lag_ms is not None
and self.recovery_lag_ms <= MAX_RECOVERY_FRAMES_SAFETY_MS
)
@property
def passes_jump(self) -> bool:
"""AC-4: trajectory jump ≤100 m."""
return (
self.trajectory_jump_m is not None
and self.trajectory_jump_m <= MAX_TRAJECTORY_JUMP_M
)
@property
def passes(self) -> bool:
return self.passes_label and self.passes_recovery and self.passes_jump
@dataclass(frozen=True)
class MultiSegmentReport:
"""Aggregate report across all blackout windows; drives the scenario assertion."""
per_window: tuple[PerWindowReport, ...]
failed_windows: tuple[int, ...] = field(default_factory=tuple)
@property
def window_count(self) -> int:
return len(self.per_window)
@property
def passes_segment_count(self) -> bool:
return self.window_count >= MIN_SEGMENTS_REQUIRED
@property
def passes(self) -> bool:
return (
self.passes_segment_count
and all(w.passes for w in self.per_window)
and not self.failed_windows
)
def load_schedule(schedule_json: Path) -> list[BlackoutWindow]:
"""Read the multi_segment injector's ``schedule.json``.
Shape (per AZ-408 multi_segment._write_schedule):
{"segments": [{"start_ms": int, "end_ms": int,
"first_frame_idx": int, "last_frame_idx": int}, ...]}
"""
if not schedule_json.exists():
raise FileNotFoundError(
f"multi-segment schedule.json not found at {schedule_json}"
"build the multi-segment-derkachi fixture first"
)
payload = json.loads(schedule_json.read_text())
if "segments" not in payload:
raise ValueError(
f"schedule.json missing 'segments' key — found {list(payload)}"
)
windows: list[BlackoutWindow] = []
for seg in payload["segments"]:
windows.append(
BlackoutWindow(
start_ms=int(seg["start_ms"]),
end_ms=int(seg["end_ms"]),
first_frame_idx=int(seg["first_frame_idx"]),
last_frame_idx=int(seg["last_frame_idx"]),
)
)
return windows
def evaluate_window(
window: BlackoutWindow,
window_index: int,
samples: Sequence[EstimateSample],
) -> PerWindowReport:
"""Evaluate AC-2 / AC-3 / AC-4 for one blackout window.
Sample-window classification (inclusive of ``start_ms``, exclusive of
``end_ms``) — the recovery search starts at ``end_ms`` and looks
forward.
"""
inside = [s for s in samples if window.start_ms <= s.monotonic_ms < window.end_ms]
dead_reckoned_inside = sum(1 for s in inside if s.source_label == DEAD_RECKONED)
label_violations = tuple(
sorted({s.source_label for s in inside if s.source_label != DEAD_RECKONED})
)
# AC-3 recovery search: first satellite_anchored emission at or after end_ms.
recovery: EstimateSample | None = None
for s in samples:
if s.monotonic_ms >= window.end_ms and s.source_label == SATELLITE_ANCHORED:
recovery = s
break
# AC-4 trajectory jump: last estimate at or before end_ms vs the recovery anchor.
pre_recovery: EstimateSample | None = None
for s in samples:
if s.monotonic_ms < window.end_ms:
pre_recovery = s
else:
break
if recovery is not None and pre_recovery is not None:
jump_m: float | None = distance_m(
pre_recovery.lat_deg,
pre_recovery.lon_deg,
recovery.lat_deg,
recovery.lon_deg,
)
else:
jump_m = None
return PerWindowReport(
window_index=window_index,
start_ms=window.start_ms,
end_ms=window.end_ms,
samples_inside=len(inside),
dead_reckoned_inside=dead_reckoned_inside,
label_violations=label_violations,
recovery_anchor_ms=recovery.monotonic_ms if recovery is not None else None,
recovery_lag_ms=(recovery.monotonic_ms - window.end_ms) if recovery is not None else None,
trajectory_jump_m=jump_m,
)
def evaluate(
windows: Sequence[BlackoutWindow],
samples: Sequence[EstimateSample],
) -> MultiSegmentReport:
"""Evaluate every window; aggregate per AC-1 + AC-2 + AC-3 + AC-4."""
for s in samples:
if s.source_label not in ALLOWED_SOURCE_LABELS:
raise ValueError(
f"unknown source_label '{s.source_label}' at {s.monotonic_ms} ms — "
f"allowed: {sorted(ALLOWED_SOURCE_LABELS)}"
)
per_window = tuple(
evaluate_window(w, i, samples) for i, w in enumerate(windows)
)
failed = tuple(w.window_index for w in per_window if not w.passes)
return MultiSegmentReport(per_window=per_window, failed_windows=failed)
def write_csv_evidence(out_path: Path, report: MultiSegmentReport) -> Path:
"""Write FT-P-08 per-window evidence CSV.
Header: ``window_index, start_ms, end_ms, samples_inside,
dead_reckoned_inside, label_violations, recovery_lag_ms,
trajectory_jump_m, passes_label, passes_recovery, passes_jump, passes``.
"""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"window_index",
"start_ms",
"end_ms",
"samples_inside",
"dead_reckoned_inside",
"label_violations",
"recovery_lag_ms",
"trajectory_jump_m",
"passes_label",
"passes_recovery",
"passes_jump",
"passes",
]
)
for w in report.per_window:
writer.writerow(
[
w.window_index,
w.start_ms,
w.end_ms,
w.samples_inside,
w.dead_reckoned_inside,
"|".join(w.label_violations) if w.label_violations else "",
"" if w.recovery_lag_ms is None else w.recovery_lag_ms,
"" if w.trajectory_jump_m is None else f"{w.trajectory_jump_m:.3f}",
"true" if w.passes_label else "false",
"true" if w.passes_recovery else "false",
"true" if w.passes_jump else "false",
"true" if w.passes else "false",
]
)
return out_path
+483
View File
@@ -0,0 +1,483 @@
"""Sharp-turn detection + FT-P-07 / FT-N-02 evaluation (AZ-414 / AC-3.2).
The Derkachi telemetry CSV records ``SCALED_IMU2.{xgyro, ygyro, zgyro}``
in milli-degree/s. A "sharp turn" segment is identified by sustained
high yaw rate (``|zgyro|``) — ≥``MIN_RUN_LENGTH`` consecutive IMU rows
above ``DEFAULT_GYRO_Z_THRESHOLD_MDPS``. The threshold is read from
``AC32_SHARP_TURN_GYRO_Z_MDPS`` env var if set (AZ-414 spec note —
"reads from the test-spec environment, not from a hardcoded constant").
FT-N-02 (during turn):
* AC-2: source_label ∈ {visual_propagated, dead_reckoned}.
* AC-3: cov_semi_major_m non-decreasing across consecutive frames inside
the turn.
FT-P-07 (recovery):
* AC-4: next satellite_anchored emission within ≤3 frames after turn end.
* AC-5: ``|propagated_centre_at_turn_end recovery_anchor_centre| ≤ 200 m``.
* AC-6: heading delta from pre-turn anchor to post-turn anchor handled
in the [0°, 70°] envelope (recovery still occurs within the 3-frame budget).
Synthetic-overlay fallback: if no natural Derkachi segment meets the
threshold, the FT-P-07 / FT-N-02 scenarios fall back to a synthetic
gyro overlay. The fallback decision is recorded in the FDR / CSV
``evidence_paths`` per the spec (this helper exposes a flag; the
scenario writes the marker).
Public-boundary discipline: this module does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
import math
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Sequence
from .geo import distance_m
DEFAULT_GYRO_Z_THRESHOLD_MDPS = 30_000 # 30 °/s sustained yaw
MIN_RUN_LENGTH = 3 # consecutive IMU rows above threshold
SHARP_TURN_ENV_VAR = "AC32_SHARP_TURN_GYRO_Z_MDPS"
MAX_RECOVERY_FRAMES = 3
MAX_RECOVERY_FRAMES_SAFETY_MS = 1100 # 3 frames @ ~3 fps, +100 ms slack
MAX_RECOVERY_DRIFT_M = 200.0
MAX_HEADING_CHANGE_DEG = 70.0
ALLOWED_DURING_TURN_LABELS = {"visual_propagated", "dead_reckoned"}
@dataclass(frozen=True)
class GyroSample:
"""One IMU row's yaw-rate sample (z axis) — millidegree/s."""
monotonic_ms: int
time_s: float
zgyro_mdps: int
@dataclass(frozen=True)
class TurnSegment:
"""A contiguous run of ≥MIN_RUN_LENGTH samples above the gyro threshold."""
start_index: int
end_index: int # inclusive — the last sample still above threshold
start_ms: int
end_ms: int
peak_abs_zgyro_mdps: int
sample_count: int
@property
def duration_ms(self) -> int:
return self.end_ms - self.start_ms
@dataclass(frozen=True)
class TurnDetection:
"""Per-flight detection: zero or more segments + synthetic-overlay flag."""
segments: tuple[TurnSegment, ...]
threshold_mdps: int
synthetic_overlay: bool = False
@property
def has_natural_turn(self) -> bool:
return len(self.segments) > 0 and not self.synthetic_overlay
@dataclass(frozen=True)
class TurnFrameSample:
"""One outbound estimate observed during replay.
Same shape as ``multi_segment_evaluator.EstimateSample`` but with
the additional ``cov_semi_major_m`` channel FT-N-02 AC-3 needs.
"""
monotonic_ms: int
lat_deg: float
lon_deg: float
source_label: str
cov_semi_major_m: float
@dataclass(frozen=True)
class FtN02WindowReport:
"""FT-N-02 per-window report (during turn)."""
segment_index: int
samples_inside: int
label_violations: tuple[str, ...]
cov_non_decreasing: bool
first_decreasing_at_ms: int | None
@property
def passes_label(self) -> bool:
return self.samples_inside > 0 and not self.label_violations
@property
def passes_cov(self) -> bool:
return self.cov_non_decreasing
@property
def passes(self) -> bool:
return self.passes_label and self.passes_cov
@dataclass(frozen=True)
class FtP07WindowReport:
"""FT-P-07 per-window report (recovery)."""
segment_index: int
recovery_anchor_ms: int | None
recovery_lag_ms: int | None
drift_m: float | None
heading_change_deg: float | None
in_heading_envelope: bool
@property
def passes_recovery_lag(self) -> bool:
return (
self.recovery_lag_ms is not None
and self.recovery_lag_ms <= MAX_RECOVERY_FRAMES_SAFETY_MS
)
@property
def passes_drift(self) -> bool:
return self.drift_m is not None and self.drift_m <= MAX_RECOVERY_DRIFT_M
@property
def passes_heading(self) -> bool:
return self.in_heading_envelope
@property
def passes(self) -> bool:
return self.passes_recovery_lag and self.passes_drift and self.passes_heading
def get_threshold_mdps() -> int:
"""Read the AC-3.2 threshold from env, defaulting to the project value."""
raw = os.environ.get(SHARP_TURN_ENV_VAR)
if raw is None or raw.strip() == "":
return DEFAULT_GYRO_Z_THRESHOLD_MDPS
try:
value = int(raw)
except ValueError:
raise ValueError(
f"{SHARP_TURN_ENV_VAR}={raw!r} is not a valid int (millidegree/s)"
)
if value <= 0:
raise ValueError(f"{SHARP_TURN_ENV_VAR}={raw!r} must be > 0")
return value
def load_zgyro_samples(csv_path: Path) -> list[GyroSample]:
"""Read ``data_imu.csv`` and return per-row yaw-rate samples."""
if not csv_path.exists():
raise FileNotFoundError(
f"data_imu.csv not found at {csv_path} — bind-mount the Derkachi fixture"
)
samples: list[GyroSample] = []
with csv_path.open() as fh:
reader = csv.DictReader(fh)
if "SCALED_IMU2.zgyro" not in (reader.fieldnames or []):
raise ValueError(
"data_imu.csv missing required column SCALED_IMU2.zgyro"
)
for row in reader:
ts = row.get("timestamp(ms)", "").strip()
if not ts:
continue
samples.append(
GyroSample(
monotonic_ms=int(float(ts)),
time_s=float(row["Time"]),
zgyro_mdps=int(float(row["SCALED_IMU2.zgyro"])),
)
)
return samples
def detect_turn_segments(
samples: Sequence[GyroSample],
*,
threshold_mdps: int | None = None,
min_run_length: int = MIN_RUN_LENGTH,
) -> TurnDetection:
"""Find contiguous runs of |zgyro| ≥ threshold lasting ≥min_run_length samples."""
if min_run_length < 1:
raise ValueError(f"min_run_length must be ≥1, got {min_run_length}")
threshold = threshold_mdps if threshold_mdps is not None else get_threshold_mdps()
segments: list[TurnSegment] = []
run_start: int | None = None
run_peak = 0
for i, s in enumerate(samples):
abs_z = abs(s.zgyro_mdps)
if abs_z >= threshold:
if run_start is None:
run_start = i
run_peak = abs_z
else:
run_peak = max(run_peak, abs_z)
else:
if run_start is not None:
run_len = i - run_start
if run_len >= min_run_length:
seg_end = i - 1
segments.append(
TurnSegment(
start_index=run_start,
end_index=seg_end,
start_ms=samples[run_start].monotonic_ms,
end_ms=samples[seg_end].monotonic_ms,
peak_abs_zgyro_mdps=run_peak,
sample_count=run_len,
)
)
run_start = None
run_peak = 0
# Tail run.
if run_start is not None:
run_len = len(samples) - run_start
if run_len >= min_run_length:
seg_end = len(samples) - 1
segments.append(
TurnSegment(
start_index=run_start,
end_index=seg_end,
start_ms=samples[run_start].monotonic_ms,
end_ms=samples[seg_end].monotonic_ms,
peak_abs_zgyro_mdps=run_peak,
sample_count=run_len,
)
)
return TurnDetection(
segments=tuple(segments),
threshold_mdps=threshold,
synthetic_overlay=False,
)
def synthesize_overlay_segment(
samples: Sequence[GyroSample],
*,
threshold_mdps: int,
anchor_fraction: float = 0.5,
duration_samples: int = MIN_RUN_LENGTH * 2,
) -> TurnDetection:
"""Construct a synthetic-overlay turn anchored at ``anchor_fraction`` of the flight.
The scenario only needs the segment's time bounds to evaluate the
SUT's behaviour — the overlay does NOT modify the IMU stream
(that's an injector concern). The detection result is marked
``synthetic_overlay=True`` so the scenario writes the flag into
the evidence CSV.
"""
if not samples:
raise ValueError("samples must not be empty")
if not 0.0 <= anchor_fraction <= 1.0:
raise ValueError(f"anchor_fraction must be in [0, 1], got {anchor_fraction}")
if duration_samples < MIN_RUN_LENGTH:
raise ValueError(
f"duration_samples must be ≥{MIN_RUN_LENGTH}, got {duration_samples}"
)
anchor_idx = int(anchor_fraction * (len(samples) - 1))
end_idx = min(len(samples) - 1, anchor_idx + duration_samples - 1)
seg = TurnSegment(
start_index=anchor_idx,
end_index=end_idx,
start_ms=samples[anchor_idx].monotonic_ms,
end_ms=samples[end_idx].monotonic_ms,
peak_abs_zgyro_mdps=threshold_mdps, # synthesised; not natural
sample_count=end_idx - anchor_idx + 1,
)
return TurnDetection(
segments=(seg,),
threshold_mdps=threshold_mdps,
synthetic_overlay=True,
)
def detect_or_synthesize(csv_path: Path) -> TurnDetection:
"""Convenience: detect a natural turn; if none, synthesise an overlay.
The scenario uses this to keep the FT-P-07 / FT-N-02 paths covered
on the natural fixture even when the data doesn't naturally include
a sharp-turn segment.
"""
samples = load_zgyro_samples(csv_path)
natural = detect_turn_segments(samples)
if natural.has_natural_turn:
return natural
return synthesize_overlay_segment(samples, threshold_mdps=natural.threshold_mdps)
def _heading_change_deg(
pre_lat: float, pre_lon: float,
mid_lat: float, mid_lon: float,
post_lat: float, post_lon: float,
) -> float:
"""Heading delta from (pre→mid) to (mid→post), wrapped to [0, 180]."""
def bearing(a_lat: float, a_lon: float, b_lat: float, b_lon: float) -> float:
from .geo import delta
return delta(a_lat, a_lon, b_lat, b_lon).forward_bearing_deg
pre_bearing = bearing(pre_lat, pre_lon, mid_lat, mid_lon)
post_bearing = bearing(mid_lat, mid_lon, post_lat, post_lon)
diff = (post_bearing - pre_bearing) % 360.0
if diff > 180.0:
diff = 360.0 - diff
return diff
def evaluate_ft_n_02(
segment: TurnSegment,
segment_index: int,
samples: Sequence[TurnFrameSample],
) -> FtN02WindowReport:
"""FT-N-02 per-window evaluation (during-turn label + monotonic covariance)."""
inside = [s for s in samples if segment.start_ms <= s.monotonic_ms <= segment.end_ms]
label_violations = tuple(
sorted({s.source_label for s in inside if s.source_label not in ALLOWED_DURING_TURN_LABELS})
)
cov_non_decreasing = True
first_decreasing: int | None = None
prev_cov = -math.inf
for s in inside:
if s.cov_semi_major_m < prev_cov:
cov_non_decreasing = False
first_decreasing = s.monotonic_ms
break
prev_cov = s.cov_semi_major_m
return FtN02WindowReport(
segment_index=segment_index,
samples_inside=len(inside),
label_violations=label_violations,
cov_non_decreasing=cov_non_decreasing,
first_decreasing_at_ms=first_decreasing,
)
def evaluate_ft_p_07(
segment: TurnSegment,
segment_index: int,
samples: Sequence[TurnFrameSample],
) -> FtP07WindowReport:
"""FT-P-07 per-window evaluation (recovery anchor, drift, heading)."""
# Pre-turn anchor: last satellite_anchored at or before start_ms.
pre_anchor: TurnFrameSample | None = None
for s in samples:
if s.monotonic_ms <= segment.start_ms and s.source_label == "satellite_anchored":
pre_anchor = s
elif s.monotonic_ms > segment.start_ms:
break
# Last inside-window sample (propagated centre at turn end).
inside = [s for s in samples if segment.start_ms <= s.monotonic_ms <= segment.end_ms]
propagated_at_end = inside[-1] if inside else None
# Recovery: first satellite_anchored after end_ms.
recovery: TurnFrameSample | None = None
for s in samples:
if s.monotonic_ms > segment.end_ms and s.source_label == "satellite_anchored":
recovery = s
break
if recovery is None:
return FtP07WindowReport(
segment_index=segment_index,
recovery_anchor_ms=None,
recovery_lag_ms=None,
drift_m=None,
heading_change_deg=None,
in_heading_envelope=False,
)
recovery_lag = recovery.monotonic_ms - segment.end_ms
drift: float | None = None
if propagated_at_end is not None:
drift = distance_m(
propagated_at_end.lat_deg, propagated_at_end.lon_deg,
recovery.lat_deg, recovery.lon_deg,
)
heading_change: float | None = None
in_envelope = True
if pre_anchor is not None and propagated_at_end is not None:
heading_change = _heading_change_deg(
pre_anchor.lat_deg, pre_anchor.lon_deg,
propagated_at_end.lat_deg, propagated_at_end.lon_deg,
recovery.lat_deg, recovery.lon_deg,
)
in_envelope = heading_change <= MAX_HEADING_CHANGE_DEG
return FtP07WindowReport(
segment_index=segment_index,
recovery_anchor_ms=recovery.monotonic_ms,
recovery_lag_ms=recovery_lag,
drift_m=drift,
heading_change_deg=heading_change,
in_heading_envelope=in_envelope,
)
def write_csv_evidence(
out_path: Path,
detection: TurnDetection,
n02_reports: Sequence[FtN02WindowReport],
p07_reports: Sequence[FtP07WindowReport],
) -> Path:
"""Write FT-P-07 + FT-N-02 combined evidence CSV.
The ``synthetic_overlay`` column marks the fallback case per AC-1.
"""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"segment_index",
"start_ms",
"end_ms",
"peak_abs_zgyro_mdps",
"synthetic_overlay",
# FT-N-02 columns
"samples_inside",
"label_violations",
"cov_non_decreasing",
# FT-P-07 columns
"recovery_lag_ms",
"drift_m",
"heading_change_deg",
"in_heading_envelope",
"passes_ft_n_02",
"passes_ft_p_07",
]
)
for seg, n02, p07 in zip(detection.segments, n02_reports, p07_reports):
writer.writerow(
[
n02.segment_index,
seg.start_ms,
seg.end_ms,
seg.peak_abs_zgyro_mdps,
"true" if detection.synthetic_overlay else "false",
n02.samples_inside,
"|".join(n02.label_violations) if n02.label_violations else "",
"true" if n02.cov_non_decreasing else "false",
"" if p07.recovery_lag_ms is None else p07.recovery_lag_ms,
"" if p07.drift_m is None else f"{p07.drift_m:.3f}",
"" if p07.heading_change_deg is None else f"{p07.heading_change_deg:.3f}",
"true" if p07.in_heading_envelope else "false",
"true" if n02.passes else "false",
"true" if p07.passes else "false",
]
)
return out_path
+246
View File
@@ -0,0 +1,246 @@
"""GTSAM smoothing-loop look-back evaluation for FT-P-10 (AZ-418 / AC-4.5 revised).
The SUT exposes (per AC-NEW-3 FDR schema) two record entries per past
keyframe ``k``:
* ``raw_pose_k``: the single-shot pose at the keyframe's first emission.
* ``smoothed_pose_k``: the iSAM2-converged pose at smoother window exit.
This helper pairs them by keyframe id, computes per-keyframe geodesic
distance vs the Derkachi ``GLOBAL_POSITION_INT`` GT, and reports:
* AC-2: ``count(smoothed_error < raw_error) / total_keyframes ≥ 0.80``.
* AC-3: ``mean(raw_error smoothed_error) ≥ 5 m`` across all keyframes.
Mode B Fact #107 is reflected in the spec: this is an INTERNAL
improvement metric — NOT FC-side retroactive correction.
Public-boundary discipline: this module does NOT import any
``src/gps_denied_onboard`` symbol.
"""
from __future__ import annotations
import csv
from dataclasses import dataclass
from pathlib import Path
from statistics import mean
from typing import Iterable, Mapping, Sequence
from .geo import distance_m
IMPROVEMENT_RATE_REQUIRED = 0.80
MEAN_IMPROVEMENT_M_REQUIRED = 5.0
@dataclass(frozen=True)
class GtPose:
"""One Derkachi GLOBAL_POSITION_INT GT pose."""
monotonic_ms: int
lat_deg: float
lon_deg: float
@dataclass(frozen=True)
class KeyframePoseRecord:
"""One FDR pose record for a past keyframe.
The scenario builds these from the FDR archive, separating raw vs
smoothed by the FDR ``payload['pose_kind']`` field.
"""
keyframe_id: int
pose_kind: str # "raw" | "smoothed"
monotonic_ms: int
lat_deg: float
lon_deg: float
@dataclass(frozen=True)
class KeyframePair:
"""A matched raw + smoothed pair for one keyframe."""
keyframe_id: int
raw: KeyframePoseRecord
smoothed: KeyframePoseRecord
gt: GtPose
raw_error_m: float
smoothed_error_m: float
@property
def improvement_m(self) -> float:
return self.raw_error_m - self.smoothed_error_m
@property
def smoothed_wins(self) -> bool:
return self.smoothed_error_m < self.raw_error_m
@dataclass(frozen=True)
class SmoothingReport:
"""Aggregate report consumed by the scenario assertion."""
pairs: tuple[KeyframePair, ...]
improvement_rate: float
mean_improvement_m: float
median_improvement_m: float
rate_required: float = IMPROVEMENT_RATE_REQUIRED
mean_required: float = MEAN_IMPROVEMENT_M_REQUIRED
@property
def pair_count(self) -> int:
return len(self.pairs)
@property
def passes_rate(self) -> bool:
return self.pair_count > 0 and self.improvement_rate >= self.rate_required
@property
def passes_mean(self) -> bool:
return self.pair_count > 0 and self.mean_improvement_m >= self.mean_required
@property
def passes(self) -> bool:
return self.passes_rate and self.passes_mean
def pair_records(
records: Sequence[KeyframePoseRecord],
) -> dict[int, tuple[KeyframePoseRecord | None, KeyframePoseRecord | None]]:
"""Group records by ``keyframe_id``; return (raw, smoothed) pairs.
Duplicate ``pose_kind`` values for the same keyframe raise
``ValueError`` — the FDR schema MUST emit exactly one raw + one
smoothed per past keyframe.
"""
by_kf: dict[int, dict[str, KeyframePoseRecord]] = {}
for r in records:
if r.pose_kind not in ("raw", "smoothed"):
raise ValueError(
f"unknown pose_kind '{r.pose_kind}' for keyframe {r.keyframe_id}"
)
bucket = by_kf.setdefault(r.keyframe_id, {})
if r.pose_kind in bucket:
raise ValueError(
f"duplicate {r.pose_kind} pose for keyframe {r.keyframe_id}"
)
bucket[r.pose_kind] = r
return {
kf: (b.get("raw"), b.get("smoothed"))
for kf, b in by_kf.items()
}
def resolve_gt_at(monotonic_ms: int, gt_track: Sequence[GtPose]) -> GtPose:
"""Find the GT pose nearest to ``monotonic_ms`` (linear scan).
The Derkachi GT track is at 10 Hz (100 ms cadence); choosing the
nearest sample is < 50 ms off and acceptable for the AC-4.5
measurement which is about decimetre-to-metre improvement deltas.
"""
if not gt_track:
raise ValueError("gt_track is empty")
best = min(gt_track, key=lambda g: abs(g.monotonic_ms - monotonic_ms))
return best
def evaluate(
records: Sequence[KeyframePoseRecord],
gt_track: Sequence[GtPose],
) -> SmoothingReport:
"""Pair raw + smoothed by keyframe, compute errors, aggregate.
Keyframes missing either raw or smoothed are excluded from the
aggregate (with a docstring note); a future tightening could raise
on missing entries, but the spec text "if only one is present, the
test fails" applies at the scenario level — we surface it here via
the empty-pair count.
"""
if not gt_track:
raise ValueError("gt_track must not be empty")
paired = pair_records(records)
pairs: list[KeyframePair] = []
for kf, (raw, smoothed) in sorted(paired.items()):
if raw is None or smoothed is None:
continue
# Use the raw record's monotonic_ms to resolve GT — the keyframe's
# actual flight time, not the smoothing-window-exit time.
gt = resolve_gt_at(raw.monotonic_ms, gt_track)
raw_err = distance_m(gt.lat_deg, gt.lon_deg, raw.lat_deg, raw.lon_deg)
smoothed_err = distance_m(gt.lat_deg, gt.lon_deg, smoothed.lat_deg, smoothed.lon_deg)
pairs.append(
KeyframePair(
keyframe_id=kf,
raw=raw,
smoothed=smoothed,
gt=gt,
raw_error_m=raw_err,
smoothed_error_m=smoothed_err,
)
)
if pairs:
wins = sum(1 for p in pairs if p.smoothed_wins)
rate = wins / len(pairs)
improvements = [p.improvement_m for p in pairs]
mean_imp = mean(improvements)
sorted_imps = sorted(improvements)
n = len(sorted_imps)
median_imp = (
sorted_imps[n // 2] if n % 2 == 1
else (sorted_imps[n // 2 - 1] + sorted_imps[n // 2]) / 2.0
)
else:
rate = 0.0
mean_imp = 0.0
median_imp = 0.0
return SmoothingReport(
pairs=tuple(pairs),
improvement_rate=rate,
mean_improvement_m=mean_imp,
median_improvement_m=median_imp,
)
def write_csv_evidence(out_path: Path, report: SmoothingReport) -> Path:
"""Write FT-P-10 per-keyframe evidence CSV.
Header: ``keyframe_id, raw_lat, raw_lon, smoothed_lat, smoothed_lon,
gt_lat, gt_lon, raw_error_m, smoothed_error_m, improvement_m,
smoothed_wins``.
"""
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", newline="") as fh:
writer = csv.writer(fh)
writer.writerow(
[
"keyframe_id",
"raw_lat",
"raw_lon",
"smoothed_lat",
"smoothed_lon",
"gt_lat",
"gt_lon",
"raw_error_m",
"smoothed_error_m",
"improvement_m",
"smoothed_wins",
]
)
for p in report.pairs:
writer.writerow(
[
p.keyframe_id,
f"{p.raw.lat_deg:.6f}",
f"{p.raw.lon_deg:.6f}",
f"{p.smoothed.lat_deg:.6f}",
f"{p.smoothed.lon_deg:.6f}",
f"{p.gt.lat_deg:.6f}",
f"{p.gt.lon_deg:.6f}",
f"{p.raw_error_m:.3f}",
f"{p.smoothed_error_m:.3f}",
f"{p.improvement_m:.3f}",
"true" if p.smoothed_wins else "false",
]
)
return out_path