mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 07:01:14 +00:00
7d53cef0cf
ci/woodpecker/push/02-build-push Pipeline failed
New replay_api component: FastAPI service wrapping the offline
gps-denied-replay pipeline. POST tlog+video (multipart) → either
sync 200 with result/map/report URLs, or async 202 + job id with
/jobs/{id} polling. Magic-byte validation, bearer auth, in-memory
JobRegistry with concurrency + queue caps (429 on overflow).
Helper accuracy_report.py promoted from tests/ to src/ because the
API needs the Markdown report writer at runtime; all AZ-699 imports
re-pointed. OpenAPI spec exported to docs.
18/18 unit tests pass (AC-1 sync, AC-2 async, AC-3 state machine,
AC-5 auth, AC-6 health, AC-8 concurrency, AC-9 magic-byte). Full
unit suite: 2251 pass, 86 skip, 1 pre-existing C12 cold-start flake
(unchanged). mypy --strict clean on the new surface.
Co-authored-by: Cursor <cursoragent@cursor.com>
191 lines
7.0 KiB
Python
191 lines
7.0 KiB
Python
"""Markdown accuracy-report writer (AZ-699 + AZ-701).
|
|
|
|
Renders a :class:`HorizontalErrorDistribution` (the production
|
|
helper in ``gps_denied_onboard.helpers.gps_compare``) plus run
|
|
context (calibration acquisition method, clip duration, fixture
|
|
paths) into the canonical Markdown layout consumed by
|
|
``_docs/06_metrics/real_flight_validation_{date}.md``.
|
|
|
|
Originally implemented as a test helper under
|
|
``tests/e2e/replay/_report_writer.py`` (AZ-699 batch 100). Promoted
|
|
to production code in AZ-701 (batch 102) because the ``replay_api``
|
|
HTTP service needs to render the same report for every replay job
|
|
the operator submits, and a test-only helper cannot be imported
|
|
from production code per the module-layout rule.
|
|
|
|
Style: every function is pure; the side effect (writing the file)
|
|
is the caller's. Tests in ``tests/unit/test_az699_report_writer.py``
|
|
exercise both the rendering and the threshold-gate verdict logic.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
from gps_denied_onboard.helpers.gps_compare import HorizontalErrorDistribution
|
|
|
|
__all__ = [
|
|
"AC3_GATE_PCT",
|
|
"AC3_GATE_THRESHOLD_M",
|
|
"ReportContext",
|
|
"format_failure_message",
|
|
"render_report",
|
|
"verdict_passes_ac3",
|
|
]
|
|
|
|
|
|
# AZ-696 epic AC-3 threshold + minimum-share gate. Keeping these
|
|
# named constants here (rather than inlined into the test) so the
|
|
# unit tests for the failure-message template can pin them.
|
|
AC3_GATE_THRESHOLD_M: float = 100.0
|
|
AC3_GATE_PCT: float = 80.0
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ReportContext:
|
|
"""Run context surfaced in the report header.
|
|
|
|
Attributes:
|
|
run_date_utc: ISO-8601 date (YYYY-MM-DD) at which the run
|
|
executed — drives the report filename.
|
|
tlog_path: Real tlog the runner consumed.
|
|
video_path: Video clip the runner consumed.
|
|
calibration_acquisition_method: Provenance of the camera
|
|
calibration (e.g. ``"factory-sheet"`` for AZ-702 or
|
|
``"placeholder"`` for the adti26 fallback). Surfaced in
|
|
the failure message per AZ-699 AC-3.
|
|
clip_duration_s: Duration of the analysed clip in seconds.
|
|
emissions_count: Total estimator-output records consumed
|
|
from the JSONL (may differ from
|
|
``distribution.count`` when some emissions land
|
|
outside the GT window).
|
|
"""
|
|
|
|
run_date_utc: str
|
|
tlog_path: Path
|
|
video_path: Path
|
|
calibration_acquisition_method: str
|
|
clip_duration_s: float
|
|
emissions_count: int
|
|
|
|
|
|
def verdict_passes_ac3(distribution: HorizontalErrorDistribution) -> bool:
|
|
"""Return ``True`` when the run meets AZ-696 epic AC-3."""
|
|
if distribution.count == 0:
|
|
return False
|
|
share = distribution.threshold_hit_share.get(AC3_GATE_THRESHOLD_M)
|
|
if share is None:
|
|
return False
|
|
return share * 100.0 >= AC3_GATE_PCT
|
|
|
|
|
|
def format_failure_message(
|
|
distribution: HorizontalErrorDistribution,
|
|
context: ReportContext,
|
|
) -> str:
|
|
"""Build the honest failure message for AZ-699 AC-3.
|
|
|
|
The message references the calibration acquisition method
|
|
(factory-sheet for AZ-702 or placeholder otherwise) and the
|
|
measured residual budget, so the operator can attribute a
|
|
failure to its likely root cause (calibration uncertainty,
|
|
drift, anchor scarcity) without re-reading the source.
|
|
"""
|
|
share = distribution.threshold_hit_share.get(AC3_GATE_THRESHOLD_M, 0.0)
|
|
pct = share * 100.0
|
|
return (
|
|
f"AZ-699 AC-3: only {pct:.1f} % of {distribution.count} "
|
|
f"emissions within {AC3_GATE_THRESHOLD_M:.0f} m of ground "
|
|
f"truth; epic threshold is {AC3_GATE_PCT:.0f} %. "
|
|
f"Residual: mean={distribution.horizontal_error_mean_m:.1f} m, "
|
|
f"p50={distribution.horizontal_error_p50_m:.1f} m, "
|
|
f"p95={distribution.horizontal_error_p95_m:.1f} m, "
|
|
f"p99={distribution.horizontal_error_p99_m:.1f} m. "
|
|
f"Calibration: {context.calibration_acquisition_method}. "
|
|
"See _docs/06_metrics/real_flight_validation_"
|
|
f"{context.run_date_utc}.md for the full distribution."
|
|
)
|
|
|
|
|
|
def render_report(
|
|
distribution: HorizontalErrorDistribution,
|
|
context: ReportContext,
|
|
*,
|
|
passed: bool,
|
|
) -> str:
|
|
"""Render the full Markdown report body.
|
|
|
|
The output layout (header + horizontal-error stats + threshold
|
|
table + vertical-error stats + verdict) is the schema referenced
|
|
by ``_docs/02_document/tests/blackbox-tests.md``.
|
|
"""
|
|
verdict = "PASS" if passed else "FAIL"
|
|
horiz_rows = [
|
|
("Mean", distribution.horizontal_error_mean_m),
|
|
("p50", distribution.horizontal_error_p50_m),
|
|
("p95", distribution.horizontal_error_p95_m),
|
|
("p99", distribution.horizontal_error_p99_m),
|
|
]
|
|
threshold_rows = [
|
|
(t, share)
|
|
for t, share in sorted(distribution.threshold_hit_share.items())
|
|
]
|
|
|
|
lines: list[str] = []
|
|
lines.append(f"# Real-flight validation — {context.run_date_utc}")
|
|
lines.append("")
|
|
lines.append(f"**Verdict**: {verdict} (AC-3 gate: "
|
|
f"≥ {AC3_GATE_PCT:.0f} % within "
|
|
f"{AC3_GATE_THRESHOLD_M:.0f} m)")
|
|
lines.append("")
|
|
lines.append("## Run context")
|
|
lines.append("")
|
|
lines.append(f"- Tlog: `{context.tlog_path}`")
|
|
lines.append(f"- Video: `{context.video_path}`")
|
|
lines.append(
|
|
f"- Calibration acquisition method: {context.calibration_acquisition_method}"
|
|
)
|
|
lines.append(f"- Clip duration: {context.clip_duration_s:.1f} s")
|
|
lines.append(f"- Emissions consumed: {context.emissions_count}")
|
|
lines.append(f"- Ground-truth pairings: {distribution.count}")
|
|
lines.append("")
|
|
lines.append("## Horizontal error (metres)")
|
|
lines.append("")
|
|
lines.append("| Statistic | Value |")
|
|
lines.append("| --------- | ----- |")
|
|
for name, value in horiz_rows:
|
|
lines.append(f"| {name} | {value:.2f} |")
|
|
lines.append("")
|
|
lines.append("## Threshold-hit share")
|
|
lines.append("")
|
|
lines.append("| Threshold (m) | Hit share (%) |")
|
|
lines.append("| ------------- | ------------- |")
|
|
for threshold, share in threshold_rows:
|
|
lines.append(f"| {threshold:g} | {share * 100.0:.1f} |")
|
|
lines.append("")
|
|
if distribution.vertical_count > 0:
|
|
lines.append("## Vertical error (metres)")
|
|
lines.append("")
|
|
lines.append("| Statistic | Value |")
|
|
lines.append("| --------- | ----- |")
|
|
lines.append(
|
|
f"| Mean | {distribution.vertical_error_mean_m:.2f} |"
|
|
)
|
|
lines.append(
|
|
f"| p50 | {distribution.vertical_error_p50_m:.2f} |"
|
|
)
|
|
lines.append(
|
|
f"| p95 | {distribution.vertical_error_p95_m:.2f} |"
|
|
)
|
|
lines.append(
|
|
f"| Samples | {distribution.vertical_count} |"
|
|
)
|
|
lines.append("")
|
|
else:
|
|
lines.append("## Vertical error")
|
|
lines.append("")
|
|
lines.append("_No emissions carried a comparable altitude — vertical stats skipped._")
|
|
lines.append("")
|
|
return "\n".join(lines) + "\n"
|