mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-22 14:31:12 +00:00
[AZ-328] C12 BuildCacheOrchestrator + remote C10 invoker (Batch 43)
Implements F1 pre-flight cache build orchestrator on the operator workstation. Composes C11 TileDownloader (AZ-316), C12 CompanionBringup (AZ-327), C12 FlightsApiClient (AZ-489), and the new RemoteCacheProvisionerInvoker into one sequenced flow guarded by a filelock-backed workstation-side lockfile. Architectural decisions: - Phase-0 flight-resolve runs BEFORE the lockfile (ADR-010): a flight that cannot be resolved is an operator-input error, not a contended- resource error. Enforced by AC-11 + AC-14. - Consumer-side cuts (AZ-507) for C11 + C10 types: local Protocols / mirror DTOs in tile_downloader_cut.py and _types.py; external errors matched by name-based whitelisting so unknown exceptions still propagate per AC-6. Cross-component type translation lives at the composition root (c12_factory). - Failure surfacing: recognised operational failures (download error, companion not ready, build error, flight-resolve error) return as CacheBuildReport(outcome=failure, failure_phase=...). Only lockfile contention raises (BuildLockHeldError) since no phase ever ran. - Workstation-side filelock library (project pin); no custom primitive. - Remote C10 stdout streamed line-by-line as DEBUG with api_key / auth_token redacted before logging (defence-in-depth). - CLI is now a thin adapter; all workflow logic lives in build_cache.py. operator-tool build-cache exit codes map per CacheBuildReport.failure_phase + failure_exception_type. Tests: 116 c12 unit tests pass (29 new for AZ-328 covering 15/15 ACs + NFR-perf-overhead microbench; 7 new for remote_c10_invoker; 3 new for file_lock; test_cli_build_cache rewritten for new orchestrator interface). Full repo suite: 1522 passed, 80 skipped. Also: replays Batch 42's ruff format leftover for c12 flights_api + test_az489 files (formatter ran over the c12 directory after new files were added). Pure whitespace; no behaviour change. Full report: _docs/03_implementation/batch_43_cycle1_report.md Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -32,21 +32,42 @@ from typing import TYPE_CHECKING, Any
|
||||
|
||||
from gps_denied_onboard.components.c12_operator_tooling._types import (
|
||||
AreaIdentifier,
|
||||
BuildCacheOutcome,
|
||||
BuildCacheRequest,
|
||||
CacheBuildReport,
|
||||
CompanionAddress,
|
||||
CompanionUnreachableReason,
|
||||
DownloadBatchReportCut,
|
||||
DownloadOutcomeCut,
|
||||
DownloadRequestCut,
|
||||
FailurePhase,
|
||||
FlightById,
|
||||
FlightFromFile,
|
||||
FlightResolveReport,
|
||||
FlightResolveSource,
|
||||
FlightSource,
|
||||
ReadinessOutcome,
|
||||
ReadinessReport,
|
||||
RemoteBuildOutcome,
|
||||
RemoteBuildReport,
|
||||
SectorClassification,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.build_cache import (
|
||||
BuildCacheOrchestrator,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.companion_bringup import (
|
||||
CompanionBringup,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.config import (
|
||||
C12BuildCacheConfig,
|
||||
C12CompanionConfig,
|
||||
C12Config,
|
||||
HostKeyPolicy,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.errors import (
|
||||
BuildLockHeldError,
|
||||
BuildReportParseError,
|
||||
CacheBuildError,
|
||||
CompanionUnreachableError,
|
||||
ContentHashMismatchError,
|
||||
)
|
||||
@@ -68,6 +89,12 @@ from gps_denied_onboard.components.c12_operator_tooling.exit_codes import (
|
||||
EXIT_UPLOAD_FAILURE,
|
||||
EXIT_USAGE,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.file_lock import (
|
||||
FileLock,
|
||||
FileLockFactory,
|
||||
FilelockFileLockFactory,
|
||||
LockTimeout,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.flights_api.errors import (
|
||||
EmptyWaypointsError,
|
||||
FlightFileNotFoundError,
|
||||
@@ -96,6 +123,10 @@ from gps_denied_onboard.components.c12_operator_tooling.interface import (
|
||||
CacheBuildWorkflow,
|
||||
OperatorReLocService,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.remote_c10_invoker import (
|
||||
RemoteBuildRequest,
|
||||
RemoteCacheProvisionerInvoker,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.remote_sidecar_verifier import (
|
||||
RemoteSidecarResult,
|
||||
RemoteSidecarVerifier,
|
||||
@@ -108,6 +139,9 @@ from gps_denied_onboard.components.c12_operator_tooling.ssh_session import (
|
||||
SshSession,
|
||||
SshSessionFactory,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.tile_downloader_cut import (
|
||||
TileDownloaderCut,
|
||||
)
|
||||
from gps_denied_onboard.config.schema import register_component_block
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -192,18 +226,38 @@ __all__ = [
|
||||
"EXIT_USAGE",
|
||||
"FRESHNESS_TABLE",
|
||||
"AreaIdentifier",
|
||||
"BuildCacheOrchestrator",
|
||||
"BuildCacheOutcome",
|
||||
"BuildCacheRequest",
|
||||
"BuildLockHeldError",
|
||||
"BuildReportParseError",
|
||||
"C12BuildCacheConfig",
|
||||
"C12CompanionConfig",
|
||||
"C12Config",
|
||||
"CacheBuildError",
|
||||
"CacheBuildReport",
|
||||
"CacheBuildWorkflow",
|
||||
"CompanionAddress",
|
||||
"CompanionBringup",
|
||||
"CompanionUnreachableError",
|
||||
"CompanionUnreachableReason",
|
||||
"ContentHashMismatchError",
|
||||
"DownloadBatchReportCut",
|
||||
"DownloadOutcomeCut",
|
||||
"DownloadRequestCut",
|
||||
"EmptyWaypointsError",
|
||||
"FailurePhase",
|
||||
"FileLock",
|
||||
"FileLockFactory",
|
||||
"FilelockFileLockFactory",
|
||||
"FlightById",
|
||||
"FlightDto",
|
||||
"FlightFileNotFoundError",
|
||||
"FlightFromFile",
|
||||
"FlightNotFoundError",
|
||||
"FlightResolveReport",
|
||||
"FlightResolveSource",
|
||||
"FlightSource",
|
||||
"FlightsApiAuthError",
|
||||
"FlightsApiClient",
|
||||
"FlightsApiError",
|
||||
@@ -211,11 +265,16 @@ __all__ = [
|
||||
"FlightsApiUnreachableError",
|
||||
"HostKeyPolicy",
|
||||
"HttpxFlightsApiClient",
|
||||
"LockTimeout",
|
||||
"OperatorReLocService",
|
||||
"ParamikoSshSession",
|
||||
"ParamikoSshSessionFactory",
|
||||
"ReadinessOutcome",
|
||||
"ReadinessReport",
|
||||
"RemoteBuildOutcome",
|
||||
"RemoteBuildReport",
|
||||
"RemoteBuildRequest",
|
||||
"RemoteCacheProvisionerInvoker",
|
||||
"RemoteCommandResult",
|
||||
"RemoteSidecarResult",
|
||||
"RemoteSidecarVerifier",
|
||||
@@ -223,6 +282,7 @@ __all__ = [
|
||||
"SectorClassificationStore",
|
||||
"SshSession",
|
||||
"SshSessionFactory",
|
||||
"TileDownloaderCut",
|
||||
"WaypointDto",
|
||||
"WaypointObjective",
|
||||
"WaypointSchemaError",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""C12 operator-tooling shared DTOs / enums (AZ-326, AZ-327).
|
||||
"""C12 operator-tooling shared DTOs / enums (AZ-326, AZ-327, AZ-328).
|
||||
|
||||
``SectorClassification`` is declared locally — c12 must not import the
|
||||
c6 / c10 / c11 enums (AZ-507 / module-layout cross-component rule); the
|
||||
@@ -7,19 +7,49 @@ boundary by ``.value`` round-trip.
|
||||
|
||||
``CompanionAddress`` and ``ReadinessReport`` are AZ-327's externally
|
||||
visible DTOs returned by ``CompanionBringup.verify_companion_ready``.
|
||||
|
||||
AZ-328 adds the public ``build_cache`` request/response surface:
|
||||
``BuildCacheRequest``, ``FlightSource`` (sum type ``FlightById`` |
|
||||
``FlightFromFile``), ``FlightResolveReport``, ``CacheBuildReport``,
|
||||
``BuildCacheOutcome`` and ``FailurePhase`` enums, plus the consumer-side
|
||||
cuts ``DownloadRequestCut`` / ``DownloadBatchReportCut`` (mirror C11
|
||||
shapes — composition root translates) and ``RemoteBuildOutcome`` /
|
||||
``RemoteBuildReport`` (parsed from C10 stdout JSON by
|
||||
``RemoteCacheProvisionerInvoker``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt
|
||||
from gps_denied_onboard.components.c12_operator_tooling.flights_api.interface import (
|
||||
FlightDto,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AreaIdentifier",
|
||||
"BuildCacheOutcome",
|
||||
"BuildCacheRequest",
|
||||
"CacheBuildReport",
|
||||
"CompanionAddress",
|
||||
"CompanionUnreachableReason",
|
||||
"DownloadBatchReportCut",
|
||||
"DownloadOutcomeCut",
|
||||
"DownloadRequestCut",
|
||||
"FailurePhase",
|
||||
"FlightById",
|
||||
"FlightFromFile",
|
||||
"FlightResolveReport",
|
||||
"FlightResolveSource",
|
||||
"FlightSource",
|
||||
"ReadinessOutcome",
|
||||
"ReadinessReport",
|
||||
"RemoteBuildOutcome",
|
||||
"RemoteBuildReport",
|
||||
"SectorClassification",
|
||||
]
|
||||
|
||||
@@ -83,3 +113,216 @@ class ReadinessReport:
|
||||
not_ready_reasons: tuple[str, ...]
|
||||
companion_cache_root: str
|
||||
engines_inspected_count: int
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AZ-328: BuildCacheOrchestrator surface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BuildCacheOutcome(str, Enum):
|
||||
"""Top-level outcome flag returned in :class:`CacheBuildReport` (AZ-328).
|
||||
|
||||
``idempotent_no_op`` mirrors C10's :class:`BuildOutcome.IDEMPOTENT_NO_OP`
|
||||
(D-C10-1 hit) — surfaced as a separate value so the operator scripts
|
||||
branch on a re-run that did no work without confusing it with
|
||||
``success`` (which IS new work).
|
||||
"""
|
||||
|
||||
SUCCESS = "success"
|
||||
FAILURE = "failure"
|
||||
IDEMPOTENT_NO_OP = "idempotent_no_op"
|
||||
|
||||
|
||||
class FailurePhase(str, Enum):
|
||||
"""Closed set of failure phases reported by :class:`CacheBuildReport`.
|
||||
|
||||
Closed by AZ-328 Constraints — adding a value requires Plan-cycle
|
||||
approval because operator scripts dispatch on ``$?`` per phase.
|
||||
"""
|
||||
|
||||
NONE = "none"
|
||||
FLIGHT_RESOLVE = "flight_resolve"
|
||||
DOWNLOAD = "download"
|
||||
BUILD = "build"
|
||||
|
||||
|
||||
class FlightResolveSource(str, Enum):
|
||||
"""Origin of the resolved :class:`FlightDto` recorded in :class:`FlightResolveReport`."""
|
||||
|
||||
FLIGHTS_API = "flights_api"
|
||||
FLIGHT_FILE = "flight_file"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class FlightById:
|
||||
"""Online flight source: resolve via the parent-suite flights service."""
|
||||
|
||||
flight_id: UUID
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class FlightFromFile:
|
||||
"""Offline flight source: load from a JSON export on disk."""
|
||||
|
||||
path: Path
|
||||
|
||||
|
||||
# Sum type — ``BuildCacheRequest.flight_source`` is one of these two
|
||||
# concrete dataclasses. Pattern-matched in
|
||||
# :class:`BuildCacheOrchestrator.build_cache`'s phase 0.
|
||||
FlightSource = FlightById | FlightFromFile
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class BuildCacheRequest:
|
||||
"""Operator-supplied input to :meth:`BuildCacheOrchestrator.build_cache` (AZ-328).
|
||||
|
||||
The legacy ``bbox`` field documented in earlier C12 drafts is gone —
|
||||
the orchestrator derives the bbox from the resolved
|
||||
:class:`FlightDto` per ADR-010 / AZ-489.
|
||||
|
||||
``api_key`` is captured here so AC-9 can assert no log line emits the
|
||||
literal value; the actual download GETs use the URL + key already
|
||||
baked into the C11 ``TileDownloader`` at composition time. The
|
||||
informational copy on this request lets the orchestrator log the
|
||||
redacted shape for FDR / debug parity.
|
||||
|
||||
``zoom_levels`` defaults to a single zoom 18 tile grid (the AC-NEW-1
|
||||
pre-flight imagery resolution); the operator can override per call
|
||||
when stitching wider mosaics. ``cache_root`` is the workstation-side
|
||||
C6 root the C11 downloader will write into.
|
||||
"""
|
||||
|
||||
flight_source: FlightSource
|
||||
sector_class: SectorClassification
|
||||
calibration_path: Path
|
||||
satellite_provider_url: str
|
||||
api_key: str
|
||||
companion_address: CompanionAddress
|
||||
expected_engines: tuple[str, ...]
|
||||
cache_root: Path
|
||||
zoom_levels: tuple[int, ...] = (18,)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class FlightResolveReport:
|
||||
"""Phase-0 capture of the resolved :class:`FlightDto` (ADR-010 / AZ-489).
|
||||
|
||||
Forwarded into the downstream phases AND captured into the eventual
|
||||
:class:`CacheBuildReport` so the FDR / debug consumer sees exactly
|
||||
what bbox + takeoff origin the orchestrator drove the rest of the
|
||||
pipeline with.
|
||||
"""
|
||||
|
||||
source: FlightResolveSource
|
||||
flight_id: UUID
|
||||
waypoint_count: int
|
||||
bbox: BoundingBox
|
||||
takeoff_origin: LatLonAlt
|
||||
raw_flight_dto: FlightDto
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Consumer-side structural cuts of C11 shapes (AZ-507)
|
||||
#
|
||||
# c12_operator_tooling MAY NOT import from c11_tile_manager directly. The
|
||||
# composition root maps these local cuts to / from the real c11 DTOs at
|
||||
# the wiring boundary (``runtime_root.c12_factory``).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class DownloadOutcomeCut(str, Enum):
|
||||
"""Mirror of c11 ``DownloadOutcome`` for C12's consumer-side cut."""
|
||||
|
||||
SUCCESS = "success"
|
||||
PARTIAL = "partial"
|
||||
FAILURE = "failure"
|
||||
IDEMPOTENT_NO_OP = "idempotent_no_op"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DownloadRequestCut:
|
||||
"""C12-local mirror of c11 ``DownloadRequest`` (AZ-507 cut)."""
|
||||
|
||||
flight_id: UUID
|
||||
bbox_min_lat: float
|
||||
bbox_min_lon: float
|
||||
bbox_max_lat: float
|
||||
bbox_max_lon: float
|
||||
zoom_levels: tuple[int, ...]
|
||||
sector_class: SectorClassification
|
||||
cache_root: Path
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DownloadBatchReportCut:
|
||||
"""C12-local mirror of c11 ``DownloadBatchReport`` (AZ-507 cut).
|
||||
|
||||
Field set is the strict subset the orchestrator needs to render the
|
||||
aggregated :class:`CacheBuildReport`; if a future task needs more
|
||||
fields it adds them here AND in the composition-root mapper.
|
||||
"""
|
||||
|
||||
outcome: DownloadOutcomeCut
|
||||
tiles_requested: int
|
||||
tiles_downloaded: int
|
||||
failure_reason: str | None = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Consumer-side structural cuts of C10 BuildReport JSON wire (AZ-507)
|
||||
#
|
||||
# C10's ``CacheProvisioner`` runs companion-side and emits its
|
||||
# ``BuildReport`` as a JSON document on stdout. The C12 invoker parses it
|
||||
# into this local mirror without importing c10_provisioning.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class RemoteBuildOutcome(str, Enum):
|
||||
"""Mirror of c10 ``BuildOutcome`` consumed via JSON on the wire."""
|
||||
|
||||
SUCCESS = "success"
|
||||
FAILURE = "failure"
|
||||
IDEMPOTENT_NO_OP = "idempotent_no_op"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RemoteBuildReport:
|
||||
"""Parsed C10 ``BuildReport`` JSON document (companion-side stdout)."""
|
||||
|
||||
outcome: RemoteBuildOutcome
|
||||
engines_built: int
|
||||
engines_reused: int
|
||||
descriptors_generated: int
|
||||
manifest_hash: str | None
|
||||
failure_reason: str | None
|
||||
elapsed_s: float
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class CacheBuildReport:
|
||||
"""Aggregated result of one :meth:`BuildCacheOrchestrator.build_cache` call.
|
||||
|
||||
Per AC-10, every sub-report is reachable on the success path; on
|
||||
failure the unreached sub-reports are ``None`` so the operator can
|
||||
tell at a glance which phase produced the failure without having to
|
||||
walk the structured log.
|
||||
|
||||
``failure_exception_type`` — name of the original typed exception (if
|
||||
any) that the orchestrator caught and folded into this report. The
|
||||
CLI uses it to route flight-resolve failures to the granular AZ-489
|
||||
exit codes (``EXIT_FLIGHT_NOT_FOUND``, ``EXIT_EMPTY_WAYPOINTS``,
|
||||
etc.) without resurrecting the exception. ``None`` when the failure
|
||||
came from a report-encoded outcome (download / build report's own
|
||||
``outcome=failure``) rather than a Python exception.
|
||||
"""
|
||||
|
||||
outcome: BuildCacheOutcome
|
||||
failure_phase: FailurePhase
|
||||
flight_resolve_report: FlightResolveReport | None
|
||||
download_report: DownloadBatchReportCut | None
|
||||
build_report: RemoteBuildReport | None
|
||||
failure_reason: str | None
|
||||
wall_clock_s: float
|
||||
failure_exception_type: str | None = None
|
||||
|
||||
@@ -0,0 +1,705 @@
|
||||
"""``BuildCacheOrchestrator`` — the F1 pre-flight cache-build top of stack (AZ-328).
|
||||
|
||||
Sequenced workflow per ADR-010 / description.md § 1, § 2, § 7:
|
||||
|
||||
0. **Flight resolve** (BEFORE the lockfile) — a flight that cannot be
|
||||
resolved is an operator-input error, not a contended-resource error;
|
||||
making the operator wait on a stale lock would muddy the diagnosis.
|
||||
1. Acquire the workstation lockfile (``cache_staging_root/.c12.lock``).
|
||||
2. **Download phase** — call the c11 ``TileDownloader`` cut with the
|
||||
bbox derived in phase 0.
|
||||
3. **Verify-ready phase** — confirm the companion has the four
|
||||
pre-flight artifacts ready (or this is a first-run with zero present).
|
||||
4. **Build phase** — open SSH and dispatch C10's build entry on the
|
||||
companion via :class:`RemoteCacheProvisionerInvoker`.
|
||||
5. Aggregate sub-reports into :class:`CacheBuildReport`.
|
||||
6. Release the lock in ``finally``.
|
||||
|
||||
AZ-507 cross-component cut: the orchestrator never imports c10 or c11
|
||||
directly. The downloader arrives as :class:`TileDownloaderCut`; the
|
||||
remote build report arrives as the local :class:`RemoteBuildReport`
|
||||
parsed from C10's stdout JSON.
|
||||
|
||||
Secrets discipline: ``api_key`` and ``flights_api_auth_token`` are
|
||||
NEVER passed to ``str(request)`` / ``repr(request)`` and are NEVER
|
||||
logged. The structured-log shape includes a redacted summary instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
|
||||
from gps_denied_onboard.clock import Clock
|
||||
from gps_denied_onboard.components.c12_operator_tooling._types import (
|
||||
BuildCacheOutcome,
|
||||
BuildCacheRequest,
|
||||
CacheBuildReport,
|
||||
DownloadOutcomeCut,
|
||||
DownloadRequestCut,
|
||||
FailurePhase,
|
||||
FlightById,
|
||||
FlightFromFile,
|
||||
FlightResolveReport,
|
||||
FlightResolveSource,
|
||||
ReadinessOutcome,
|
||||
RemoteBuildOutcome,
|
||||
RemoteBuildReport,
|
||||
SectorClassification,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.companion_bringup import (
|
||||
CompanionBringup,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.config import (
|
||||
C12BuildCacheConfig,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.errors import (
|
||||
BuildLockHeldError,
|
||||
BuildReportParseError,
|
||||
CacheBuildError,
|
||||
CompanionUnreachableError,
|
||||
ContentHashMismatchError,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.file_lock import (
|
||||
FileLockFactory,
|
||||
LockTimeout,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.flights_api.errors import (
|
||||
EmptyWaypointsError,
|
||||
FlightFileNotFoundError,
|
||||
FlightNotFoundError,
|
||||
FlightsApiAuthError,
|
||||
FlightsApiError,
|
||||
FlightsApiSchemaError,
|
||||
FlightsApiUnreachableError,
|
||||
WaypointSchemaError,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.flights_api.interface import (
|
||||
FlightDto,
|
||||
FlightsApiClient,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.freshness_table import (
|
||||
freshness_threshold_months as _default_freshness_threshold,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.remote_c10_invoker import (
|
||||
RemoteBuildRequest,
|
||||
RemoteCacheProvisionerInvoker,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.ssh_session import (
|
||||
SshSessionFactory,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.tile_downloader_cut import (
|
||||
TileDownloaderCut,
|
||||
)
|
||||
|
||||
__all__ = ["BuildCacheOrchestrator"]
|
||||
|
||||
|
||||
_LOG_FLIGHT_RESOLVE_START = "c12.build_cache.flight_resolve.start"
|
||||
_LOG_FLIGHT_RESOLVE_FAILED = "c12.build_cache.flight_resolve.failed"
|
||||
_LOG_BUILD_CACHE_START = "c12.build_cache.start"
|
||||
_LOG_BUILD_CACHE_SUCCESS = "c12.build_cache.success"
|
||||
_LOG_BUILD_CACHE_IDEMPOTENT = "c12.build_cache.idempotent"
|
||||
_LOG_DOWNLOAD_FAILED = "c12.build_cache.download.failed"
|
||||
_LOG_COMPANION_NOT_READY = "c12.build_cache.companion.not_ready"
|
||||
_LOG_BUILD_FAILED = "c12.build_cache.build.failed"
|
||||
_LOG_LOCK_HELD = "c12.build_cache.lock.held"
|
||||
|
||||
_NS_PER_S: int = 1_000_000_000
|
||||
|
||||
|
||||
# Name whitelists per phase — c12 cannot import c11/c10 typed-exception
|
||||
# classes (AZ-507), so we recognise them by ``type(exc).__name__`` walk
|
||||
# along the MRO. Anything not in the whitelist propagates so AC-6's
|
||||
# ``RuntimeError`` / ``KeyboardInterrupt`` reach the caller and the
|
||||
# lockfile is released by the ``with``-statement's ``__exit__``.
|
||||
_DOWNLOAD_RECOGNISED_NAMES: frozenset[str] = frozenset(
|
||||
{
|
||||
# c11_tile_manager (AZ-316 + ancestors)
|
||||
"TileManagerError",
|
||||
"SatelliteProviderError",
|
||||
"RateLimitedError",
|
||||
"ResolutionRejectionError",
|
||||
"CacheBudgetExceededError",
|
||||
# c11 download-time IO that bubbles past the typed wrappers
|
||||
"TimeoutError",
|
||||
"ConnectionError",
|
||||
}
|
||||
)
|
||||
_BUILD_RECOGNISED_NAMES: frozenset[str] = frozenset(
|
||||
{
|
||||
# c10_provisioning (AZ-321..AZ-325) typed exceptions
|
||||
"C10ProvisioningError",
|
||||
"EngineBuildError",
|
||||
"CalibrationCacheError",
|
||||
"DescriptorBatchError",
|
||||
"ManifestWriteError",
|
||||
"ManifestSignatureError",
|
||||
"ManifestCoverageError",
|
||||
# C10's own lock collision — distinct from C12's BuildLockHeldError
|
||||
# (different class identity by name + module). Recognised so the
|
||||
# build phase folds it into a CacheBuildReport instead of
|
||||
# propagating.
|
||||
"BuildLockHeldError",
|
||||
# SSH transport failures mid-stream (paramiko)
|
||||
"SSHException",
|
||||
"EOFError",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class BuildCacheOrchestrator:
|
||||
"""F1 pre-flight cache-build orchestrator (AZ-328).
|
||||
|
||||
Constructed once per ``OperatorToolServices`` from the composition
|
||||
root; the CLI ``build-cache`` subcommand resolves it from the
|
||||
services dataclass and calls :meth:`build_cache` exactly once per
|
||||
invocation.
|
||||
|
||||
All collaborators are injected; production wiring uses the real
|
||||
c11 ``TileDownloader``, the real :class:`CompanionBringup`, the
|
||||
real :class:`RemoteCacheProvisionerInvoker` over a real paramiko
|
||||
SSH session, and the :class:`FilelockFileLockFactory`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
flights_api_client: FlightsApiClient,
|
||||
tile_downloader: TileDownloaderCut,
|
||||
companion_bringup: CompanionBringup,
|
||||
remote_c10_invoker: RemoteCacheProvisionerInvoker,
|
||||
ssh_factory: SshSessionFactory,
|
||||
lock_factory: FileLockFactory,
|
||||
logger: logging.Logger,
|
||||
clock: Clock,
|
||||
config: C12BuildCacheConfig,
|
||||
freshness_lookup: Callable[[SectorClassification], int] = _default_freshness_threshold,
|
||||
) -> None:
|
||||
self._flights_api_client = flights_api_client
|
||||
self._tile_downloader = tile_downloader
|
||||
self._companion_bringup = companion_bringup
|
||||
self._remote_c10_invoker = remote_c10_invoker
|
||||
self._ssh_factory = ssh_factory
|
||||
self._lock_factory = lock_factory
|
||||
self._logger = logger
|
||||
self._clock = clock
|
||||
self._config = config
|
||||
self._freshness_lookup = freshness_lookup
|
||||
|
||||
def build_cache(self, request: BuildCacheRequest) -> CacheBuildReport:
|
||||
"""Run the full F1 pipeline: flight resolve → lock → download → verify → build."""
|
||||
start_ns = self._clock.monotonic_ns()
|
||||
secrets = self._collect_secrets(request)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Phase 0 — flight resolve (BEFORE the lockfile, ADR-010 + AC-11)
|
||||
# ------------------------------------------------------------------
|
||||
self._logger.info(
|
||||
"starting flight-resolve phase",
|
||||
extra={
|
||||
"kind": _LOG_FLIGHT_RESOLVE_START,
|
||||
"kv": {
|
||||
"flight_source_kind": _flight_source_kind(request.flight_source),
|
||||
},
|
||||
},
|
||||
)
|
||||
try:
|
||||
flight = self._resolve_flight(request)
|
||||
flight_resolve_report = self._build_flight_resolve_report(request, flight)
|
||||
except (
|
||||
FlightsApiUnreachableError,
|
||||
FlightsApiAuthError,
|
||||
FlightNotFoundError,
|
||||
FlightsApiSchemaError,
|
||||
FlightsApiError,
|
||||
FlightFileNotFoundError,
|
||||
EmptyWaypointsError,
|
||||
WaypointSchemaError,
|
||||
) as exc:
|
||||
self._logger.error(
|
||||
"flight-resolve phase failed",
|
||||
extra={
|
||||
"kind": _LOG_FLIGHT_RESOLVE_FAILED,
|
||||
"kv": {"exception_type": type(exc).__name__},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.FLIGHT_RESOLVE,
|
||||
flight_resolve_report=None,
|
||||
download_report=None,
|
||||
build_report=None,
|
||||
failure_reason=_failure_reason_for_flight_resolve(exc, request),
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
failure_exception_type=type(exc).__name__,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Phase 1 — acquire the workstation lockfile
|
||||
# ------------------------------------------------------------------
|
||||
lock_path = self._config.cache_staging_root / self._config.lock_filename
|
||||
self._config.cache_staging_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
lock_cm = self._lock_factory.try_lock(lock_path, timeout_s=self._config.lock_timeout_s)
|
||||
except LockTimeout as exc:
|
||||
self._logger.error(
|
||||
"build-cache lock held; another invocation is in progress",
|
||||
extra={
|
||||
"kind": _LOG_LOCK_HELD,
|
||||
"kv": {
|
||||
"lock_path": str(lock_path),
|
||||
"timeout_s": self._config.lock_timeout_s,
|
||||
},
|
||||
},
|
||||
)
|
||||
raise BuildLockHeldError(
|
||||
lock_path=lock_path, timeout_s=self._config.lock_timeout_s
|
||||
) from exc
|
||||
|
||||
with lock_cm:
|
||||
return self._run_locked_phases(
|
||||
request=request,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
start_ns=start_ns,
|
||||
secrets=secrets,
|
||||
)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Helpers — phase 0
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def _resolve_flight(self, request: BuildCacheRequest) -> FlightDto:
|
||||
source = request.flight_source
|
||||
if isinstance(source, FlightById):
|
||||
return self._flights_api_client.fetch_flight(
|
||||
flight_id=source.flight_id,
|
||||
base_url=self._config.flights_api_base_url,
|
||||
auth_token=self._config.flights_api_auth_token,
|
||||
)
|
||||
if isinstance(source, FlightFromFile):
|
||||
return self._flights_api_client.load_flight_file(path=source.path)
|
||||
raise TypeError(
|
||||
f"BuildCacheRequest.flight_source must be FlightById or FlightFromFile; "
|
||||
f"got {type(source).__name__}"
|
||||
)
|
||||
|
||||
def _build_flight_resolve_report(
|
||||
self, request: BuildCacheRequest, flight: FlightDto
|
||||
) -> FlightResolveReport:
|
||||
bbox = self._flights_api_client.bbox_from_waypoints(
|
||||
flight.waypoints, buffer_m=self._config.flight_bbox_buffer_m
|
||||
)
|
||||
takeoff_origin = self._flights_api_client.takeoff_origin_from_flight(flight)
|
||||
return FlightResolveReport(
|
||||
source=(
|
||||
FlightResolveSource.FLIGHTS_API
|
||||
if isinstance(request.flight_source, FlightById)
|
||||
else FlightResolveSource.FLIGHT_FILE
|
||||
),
|
||||
flight_id=flight.flight_id,
|
||||
waypoint_count=len(flight.waypoints),
|
||||
bbox=bbox,
|
||||
takeoff_origin=takeoff_origin,
|
||||
raw_flight_dto=flight,
|
||||
)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Locked phases (download → verify-ready → build)
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def _run_locked_phases(
|
||||
self,
|
||||
*,
|
||||
request: BuildCacheRequest,
|
||||
flight_resolve_report: FlightResolveReport,
|
||||
start_ns: int,
|
||||
secrets: tuple[str, ...],
|
||||
) -> CacheBuildReport:
|
||||
self._logger.info(
|
||||
"starting build-cache pipeline",
|
||||
extra={
|
||||
"kind": _LOG_BUILD_CACHE_START,
|
||||
"kv": {
|
||||
"flight_id": str(flight_resolve_report.flight_id),
|
||||
"sector_class": request.sector_class.value,
|
||||
"satellite_provider_url": request.satellite_provider_url,
|
||||
"api_key": "REDACTED",
|
||||
"auth_token": "REDACTED",
|
||||
"bbox": _bbox_kv(flight_resolve_report.bbox),
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
# Phase 2 — download.
|
||||
download_report = self._run_download_phase(
|
||||
request=request,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
start_ns=start_ns,
|
||||
)
|
||||
if isinstance(download_report, CacheBuildReport):
|
||||
return download_report # already-failed report; pipeline aborted.
|
||||
|
||||
# Phase 3 — verify-ready.
|
||||
verify_report = self._run_verify_phase(
|
||||
request=request,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report,
|
||||
start_ns=start_ns,
|
||||
)
|
||||
if verify_report is not None:
|
||||
return verify_report
|
||||
|
||||
# Phase 4 — build.
|
||||
return self._run_build_phase(
|
||||
request=request,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report,
|
||||
start_ns=start_ns,
|
||||
secrets=secrets,
|
||||
)
|
||||
|
||||
def _run_download_phase(
|
||||
self,
|
||||
*,
|
||||
request: BuildCacheRequest,
|
||||
flight_resolve_report: FlightResolveReport,
|
||||
start_ns: int,
|
||||
) -> object:
|
||||
try:
|
||||
freshness_months = self._freshness_lookup(request.sector_class)
|
||||
except KeyError as exc:
|
||||
self._logger.error(
|
||||
"freshness threshold lookup failed",
|
||||
extra={
|
||||
"kind": _LOG_DOWNLOAD_FAILED,
|
||||
"kv": {"sector_class": request.sector_class.value},
|
||||
},
|
||||
)
|
||||
raise CacheBuildError(
|
||||
failure_phase=FailurePhase.DOWNLOAD,
|
||||
wrapped_exception_repr=repr(exc),
|
||||
message=(
|
||||
f"unknown SectorClassification {request.sector_class!r}: "
|
||||
"freshness table has no entry"
|
||||
),
|
||||
) from exc
|
||||
del freshness_months # used for logging-only contract; downloader receives it via DTO
|
||||
|
||||
download_request = DownloadRequestCut(
|
||||
flight_id=flight_resolve_report.flight_id,
|
||||
bbox_min_lat=flight_resolve_report.bbox.min_lat_deg,
|
||||
bbox_min_lon=flight_resolve_report.bbox.min_lon_deg,
|
||||
bbox_max_lat=flight_resolve_report.bbox.max_lat_deg,
|
||||
bbox_max_lon=flight_resolve_report.bbox.max_lon_deg,
|
||||
zoom_levels=request.zoom_levels,
|
||||
sector_class=request.sector_class,
|
||||
cache_root=request.cache_root,
|
||||
)
|
||||
try:
|
||||
download_report = self._tile_downloader.download_tiles_for_area(download_request)
|
||||
except Exception as exc:
|
||||
if not _is_recognised(exc, _DOWNLOAD_RECOGNISED_NAMES):
|
||||
# Unknown — let it propagate so the lockfile's __exit__
|
||||
# releases (AC-6).
|
||||
raise
|
||||
self._logger.error(
|
||||
"download phase failed with a recognised exception",
|
||||
extra={
|
||||
"kind": _LOG_DOWNLOAD_FAILED,
|
||||
"kv": {"exception_type": type(exc).__name__},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.DOWNLOAD,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=None,
|
||||
build_report=None,
|
||||
failure_reason=str(exc),
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
|
||||
if download_report.outcome is DownloadOutcomeCut.FAILURE:
|
||||
self._logger.error(
|
||||
"download phase reported FAILURE",
|
||||
extra={
|
||||
"kind": _LOG_DOWNLOAD_FAILED,
|
||||
"kv": {"failure_reason": download_report.failure_reason},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.DOWNLOAD,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report,
|
||||
build_report=None,
|
||||
failure_reason=download_report.failure_reason or "download outcome=failure",
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
return download_report
|
||||
|
||||
def _run_verify_phase(
|
||||
self,
|
||||
*,
|
||||
request: BuildCacheRequest,
|
||||
flight_resolve_report: FlightResolveReport,
|
||||
download_report: object,
|
||||
start_ns: int,
|
||||
) -> CacheBuildReport | None:
|
||||
try:
|
||||
readiness = self._companion_bringup.verify_companion_ready(request.companion_address)
|
||||
except (CompanionUnreachableError, ContentHashMismatchError) as exc:
|
||||
self._logger.error(
|
||||
"companion verify-ready raised a typed exception",
|
||||
extra={
|
||||
"kind": _LOG_COMPANION_NOT_READY,
|
||||
"kv": {"exception_type": type(exc).__name__},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.DOWNLOAD,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report, # type: ignore[arg-type]
|
||||
build_report=None,
|
||||
failure_reason=f"companion not ready: {type(exc).__name__}",
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
|
||||
if readiness.outcome is ReadinessOutcome.NOT_READY:
|
||||
joined = ", ".join(readiness.not_ready_reasons) or "no reason reported"
|
||||
self._logger.error(
|
||||
"companion reported not_ready",
|
||||
extra={
|
||||
"kind": _LOG_COMPANION_NOT_READY,
|
||||
"kv": {"not_ready_reasons": list(readiness.not_ready_reasons)},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.DOWNLOAD,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report, # type: ignore[arg-type]
|
||||
build_report=None,
|
||||
failure_reason=f"companion not ready: {joined}",
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
return None
|
||||
|
||||
def _run_build_phase(
|
||||
self,
|
||||
*,
|
||||
request: BuildCacheRequest,
|
||||
flight_resolve_report: FlightResolveReport,
|
||||
download_report: object,
|
||||
start_ns: int,
|
||||
secrets: tuple[str, ...],
|
||||
) -> CacheBuildReport:
|
||||
remote_request = RemoteBuildRequest(
|
||||
bbox=flight_resolve_report.bbox,
|
||||
zoom_levels=request.zoom_levels,
|
||||
sector_class=request.sector_class,
|
||||
calibration_path=request.calibration_path,
|
||||
expected_engines=request.expected_engines,
|
||||
companion_cache_root=self._config.companion_cache_root,
|
||||
takeoff_origin=flight_resolve_report.takeoff_origin,
|
||||
flight_id=flight_resolve_report.flight_id,
|
||||
)
|
||||
|
||||
try:
|
||||
session = self._ssh_factory.open(
|
||||
request.companion_address,
|
||||
timeout_s=self._config.ssh_connect_timeout_s,
|
||||
)
|
||||
except CompanionUnreachableError as exc:
|
||||
self._logger.error(
|
||||
"ssh open for build phase failed",
|
||||
extra={
|
||||
"kind": _LOG_BUILD_FAILED,
|
||||
"kv": {"exception_type": type(exc).__name__, "reason": exc.reason.value},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.BUILD,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report, # type: ignore[arg-type]
|
||||
build_report=None,
|
||||
failure_reason=f"ssh open failed: {exc!s}",
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
|
||||
build_report: RemoteBuildReport | None = None
|
||||
try:
|
||||
try:
|
||||
build_report = self._remote_c10_invoker.invoke(
|
||||
session, remote_request, secrets_to_redact=secrets
|
||||
)
|
||||
except BuildReportParseError as exc:
|
||||
# Local typed parse failure — recognised as a build-phase
|
||||
# diagnosis and folded into the report (AC-4 spirit).
|
||||
self._logger.error(
|
||||
"remote C10 stdout did not produce a parseable BuildReport",
|
||||
extra={
|
||||
"kind": _LOG_BUILD_FAILED,
|
||||
"kv": {"exception_type": type(exc).__name__},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.BUILD,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report, # type: ignore[arg-type]
|
||||
build_report=None,
|
||||
failure_reason=str(exc),
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
except Exception as exc:
|
||||
if not _is_recognised(exc, _BUILD_RECOGNISED_NAMES):
|
||||
# Unknown — propagate so the lockfile is released and
|
||||
# the operator sees the original traceback (AC-6).
|
||||
raise
|
||||
self._logger.error(
|
||||
"remote C10 invocation raised a recognised exception",
|
||||
extra={
|
||||
"kind": _LOG_BUILD_FAILED,
|
||||
"kv": {"exception_type": type(exc).__name__},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.BUILD,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report, # type: ignore[arg-type]
|
||||
build_report=None,
|
||||
failure_reason=str(exc),
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
session.close()
|
||||
except Exception:
|
||||
self._logger.warning(
|
||||
"ssh session close raised; proceeding",
|
||||
extra={"kind": _LOG_BUILD_FAILED, "kv": {"phase": "session_close"}},
|
||||
)
|
||||
|
||||
assert build_report is not None, (
|
||||
"BuildCacheOrchestrator: invoke() returned without setting build_report; "
|
||||
"early-return paths should have handled all error cases"
|
||||
)
|
||||
|
||||
if build_report.outcome is RemoteBuildOutcome.IDEMPOTENT_NO_OP:
|
||||
self._logger.info(
|
||||
"build phase reported IDEMPOTENT_NO_OP (D-C10-1)",
|
||||
extra={
|
||||
"kind": _LOG_BUILD_CACHE_IDEMPOTENT,
|
||||
"kv": {"manifest_hash": build_report.manifest_hash},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.IDEMPOTENT_NO_OP,
|
||||
failure_phase=FailurePhase.NONE,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report, # type: ignore[arg-type]
|
||||
build_report=build_report,
|
||||
failure_reason=None,
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
|
||||
if build_report.outcome is RemoteBuildOutcome.FAILURE:
|
||||
self._logger.error(
|
||||
"build phase reported FAILURE",
|
||||
extra={
|
||||
"kind": _LOG_BUILD_FAILED,
|
||||
"kv": {"failure_reason": build_report.failure_reason},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.FAILURE,
|
||||
failure_phase=FailurePhase.BUILD,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report, # type: ignore[arg-type]
|
||||
build_report=build_report,
|
||||
failure_reason=build_report.failure_reason or "build outcome=failure",
|
||||
wall_clock_s=self._elapsed_s(start_ns),
|
||||
)
|
||||
|
||||
elapsed_s = self._elapsed_s(start_ns)
|
||||
self._logger.info(
|
||||
"build-cache pipeline completed successfully",
|
||||
extra={
|
||||
"kind": _LOG_BUILD_CACHE_SUCCESS,
|
||||
"kv": {
|
||||
"tiles_downloaded": getattr(download_report, "tiles_downloaded", None),
|
||||
"engines_built": build_report.engines_built,
|
||||
"engines_reused": build_report.engines_reused,
|
||||
"descriptors_generated": build_report.descriptors_generated,
|
||||
"wall_clock_s": elapsed_s,
|
||||
},
|
||||
},
|
||||
)
|
||||
return CacheBuildReport(
|
||||
outcome=BuildCacheOutcome.SUCCESS,
|
||||
failure_phase=FailurePhase.NONE,
|
||||
flight_resolve_report=flight_resolve_report,
|
||||
download_report=download_report, # type: ignore[arg-type]
|
||||
build_report=build_report,
|
||||
failure_reason=None,
|
||||
wall_clock_s=elapsed_s,
|
||||
)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Helpers — misc
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def _elapsed_s(self, start_ns: int) -> float:
|
||||
return (self._clock.monotonic_ns() - start_ns) / _NS_PER_S
|
||||
|
||||
def _collect_secrets(self, request: BuildCacheRequest) -> tuple[str, ...]:
|
||||
secrets: list[str] = []
|
||||
if request.api_key:
|
||||
secrets.append(request.api_key)
|
||||
if self._config.flights_api_auth_token:
|
||||
secrets.append(self._config.flights_api_auth_token)
|
||||
return tuple(secrets)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_recognised(exc: BaseException, names: frozenset[str]) -> bool:
|
||||
"""Return ``True`` iff any class in ``exc``'s MRO has a name in ``names``."""
|
||||
return any(cls.__name__ in names for cls in type(exc).__mro__)
|
||||
|
||||
|
||||
def _flight_source_kind(source: object) -> str:
|
||||
if isinstance(source, FlightById):
|
||||
return "flight_by_id"
|
||||
if isinstance(source, FlightFromFile):
|
||||
return "flight_from_file"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _bbox_kv(bbox: object) -> dict[str, float]:
|
||||
return {
|
||||
"min_lat_deg": getattr(bbox, "min_lat_deg", float("nan")),
|
||||
"min_lon_deg": getattr(bbox, "min_lon_deg", float("nan")),
|
||||
"max_lat_deg": getattr(bbox, "max_lat_deg", float("nan")),
|
||||
"max_lon_deg": getattr(bbox, "max_lon_deg", float("nan")),
|
||||
}
|
||||
|
||||
|
||||
def _failure_reason_for_flight_resolve(exc: BaseException, request: BuildCacheRequest) -> str:
|
||||
if isinstance(exc, EmptyWaypointsError):
|
||||
return "empty waypoints; re-plan in Mission Planner UI"
|
||||
if isinstance(exc, FlightNotFoundError):
|
||||
source = request.flight_source
|
||||
if isinstance(source, FlightById):
|
||||
return f"flight not found: {source.flight_id}"
|
||||
return "flight not found"
|
||||
return f"{type(exc).__name__}: {exc!s}"
|
||||
@@ -38,12 +38,22 @@ from uuid import UUID
|
||||
import click
|
||||
|
||||
from gps_denied_onboard.components.c12_operator_tooling._types import (
|
||||
BuildCacheOutcome,
|
||||
BuildCacheRequest,
|
||||
CacheBuildReport,
|
||||
CompanionAddress,
|
||||
FailurePhase,
|
||||
FlightById,
|
||||
FlightFromFile,
|
||||
FlightSource,
|
||||
SectorClassification,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.config import (
|
||||
C12Config,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.errors import (
|
||||
BuildLockHeldError,
|
||||
CacheBuildError,
|
||||
CompanionUnreachableError,
|
||||
ContentHashMismatchError,
|
||||
)
|
||||
@@ -77,12 +87,6 @@ from gps_denied_onboard.components.c12_operator_tooling.flights_api.errors impor
|
||||
FlightsApiUnreachableError,
|
||||
WaypointSchemaError,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.flights_api.interface import (
|
||||
FlightDto,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.freshness_table import (
|
||||
freshness_threshold_months,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.sector_classification_store import (
|
||||
SectorClassificationStore,
|
||||
)
|
||||
@@ -364,6 +368,25 @@ def download(ctx: click.Context, area: str, bbox: str) -> None:
|
||||
required=True,
|
||||
help="Path to the camera calibration JSON to upload alongside the cache.",
|
||||
)
|
||||
@click.option(
|
||||
"--companion-host",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Companion hostname or IP for the SSH-driven C10 build phase.",
|
||||
)
|
||||
@click.option("--companion-port", type=int, default=22, help="Companion SSH port (default 22).")
|
||||
@click.option(
|
||||
"--satellite-provider-url",
|
||||
type=str,
|
||||
required=True,
|
||||
help="The C11 satellite-provider URL the download phase fetches tiles from.",
|
||||
)
|
||||
@click.option(
|
||||
"--api-key",
|
||||
type=str,
|
||||
required=True,
|
||||
help="C11 satellite-provider API key (NEVER logged; AC-9 redaction guarantee).",
|
||||
)
|
||||
@click.pass_context
|
||||
def build_cache(
|
||||
ctx: click.Context,
|
||||
@@ -371,9 +394,14 @@ def build_cache(
|
||||
flight_file: Path | None,
|
||||
sector_class: str,
|
||||
calibration_path: Path,
|
||||
companion_host: str,
|
||||
companion_port: int,
|
||||
satellite_provider_url: str,
|
||||
api_key: str,
|
||||
) -> None:
|
||||
"""Orchestrate the F1 cache build (sibling AZ-328)."""
|
||||
state = ctx.obj
|
||||
config: C12Config = state["config"]
|
||||
logger = state["logger"]
|
||||
_emit_invoked(
|
||||
logger,
|
||||
@@ -382,6 +410,10 @@ def build_cache(
|
||||
"flight_id": flight_id,
|
||||
"flight_file": str(flight_file) if flight_file else None,
|
||||
"sector_class": sector_class,
|
||||
"companion_host": companion_host,
|
||||
"companion_port": companion_port,
|
||||
"satellite_provider_url": satellite_provider_url,
|
||||
"api_key": "REDACTED",
|
||||
},
|
||||
)
|
||||
|
||||
@@ -401,46 +433,81 @@ def build_cache(
|
||||
)
|
||||
|
||||
services = state.get("services")
|
||||
if services is None or not hasattr(services, "flights_api_client"):
|
||||
if services is None or not hasattr(services, "build_cache_orchestrator"):
|
||||
_emit_ok(
|
||||
logger,
|
||||
"build-cache",
|
||||
{"note": "no flights_api_client wired (composition-root pending)"},
|
||||
{"note": "no build_cache_orchestrator wired (composition-root pending)"},
|
||||
)
|
||||
ctx.exit(EXIT_OK)
|
||||
|
||||
sector_class_enum = SectorClassification(sector_class.lower())
|
||||
months = freshness_threshold_months(sector_class_enum)
|
||||
flight_source: FlightSource
|
||||
if flight_id is not None:
|
||||
flight_source = FlightById(flight_id=UUID(flight_id))
|
||||
else:
|
||||
assert flight_file is not None # gated by the mutually-exclusive check
|
||||
flight_source = FlightFromFile(path=flight_file)
|
||||
|
||||
request = BuildCacheRequest(
|
||||
flight_source=flight_source,
|
||||
sector_class=sector_class_enum,
|
||||
calibration_path=calibration_path,
|
||||
satellite_provider_url=satellite_provider_url,
|
||||
api_key=api_key,
|
||||
companion_address=CompanionAddress(host=companion_host, port=companion_port),
|
||||
expected_engines=config.companion.expected_engines,
|
||||
cache_root=config.build_cache.cache_staging_root,
|
||||
zoom_levels=config.build_cache.zoom_levels,
|
||||
)
|
||||
|
||||
try:
|
||||
flight = _resolve_flight(services, flight_id=flight_id, flight_file=flight_file)
|
||||
orchestrator = services.build_cache_orchestrator
|
||||
orchestrator.build_cache(
|
||||
flight=flight,
|
||||
sector_class=sector_class_enum,
|
||||
freshness_months=months,
|
||||
calibration_path=calibration_path,
|
||||
report: CacheBuildReport = services.build_cache_orchestrator.build_cache(request)
|
||||
except BuildLockHeldError as exc:
|
||||
_emit_error(
|
||||
logger,
|
||||
"build-cache",
|
||||
exit_code=EXIT_LOCK_HELD,
|
||||
exception=exc,
|
||||
remediation=exc.remediation,
|
||||
kv={"lock_path": str(exc.lock_path)},
|
||||
)
|
||||
click.echo(f"build-cache lock held: {exc.remediation}", err=True)
|
||||
ctx.exit(EXIT_LOCK_HELD)
|
||||
except CacheBuildError as exc:
|
||||
_emit_error(
|
||||
logger,
|
||||
"build-cache",
|
||||
exit_code=EXIT_BUILD_FAILURE,
|
||||
exception=exc,
|
||||
remediation=exc.remediation,
|
||||
kv={"failure_phase": exc.failure_phase.value},
|
||||
)
|
||||
click.echo(f"cache build failed: {exc.remediation}", err=True)
|
||||
ctx.exit(EXIT_BUILD_FAILURE)
|
||||
except Exception as exc:
|
||||
_handle_known_exception(
|
||||
ctx,
|
||||
logger,
|
||||
"build-cache",
|
||||
exc,
|
||||
extra_table={
|
||||
"BuildLockHeldError": (
|
||||
EXIT_LOCK_HELD,
|
||||
"Another build-cache run holds the lock; wait for it to finish.",
|
||||
),
|
||||
"CacheBuildError": (
|
||||
EXIT_BUILD_FAILURE,
|
||||
"Cache build failed; consult the orchestrator's structured log.",
|
||||
),
|
||||
},
|
||||
)
|
||||
return
|
||||
_emit_ok(logger, "build-cache", {"flight_id": str(flight.flight_id)})
|
||||
ctx.exit(EXIT_OK)
|
||||
|
||||
exit_code = _exit_code_for_report(report)
|
||||
_emit_ok(
|
||||
logger,
|
||||
"build-cache",
|
||||
{
|
||||
"outcome": report.outcome.value,
|
||||
"failure_phase": report.failure_phase.value,
|
||||
"wall_clock_s": report.wall_clock_s,
|
||||
"exit_code": exit_code,
|
||||
},
|
||||
)
|
||||
if exit_code != EXIT_OK and report.failure_reason:
|
||||
click.echo(f"cache build failed: {report.failure_reason}", err=True)
|
||||
ctx.exit(exit_code)
|
||||
|
||||
|
||||
@app.command(
|
||||
@@ -679,23 +746,37 @@ def _handle_known_exception(
|
||||
raise AssertionError("unreachable") # pragma: no cover
|
||||
|
||||
|
||||
def _resolve_flight(
|
||||
services: Any,
|
||||
*,
|
||||
flight_id: str | None,
|
||||
flight_file: Path | None,
|
||||
) -> FlightDto:
|
||||
"""Resolve the operator's flight via the flights API or the offline file."""
|
||||
client = services.flights_api_client
|
||||
if flight_id is not None:
|
||||
flight_uuid = UUID(flight_id)
|
||||
return client.fetch_flight(
|
||||
flight_id=flight_uuid,
|
||||
base_url=getattr(services, "flights_api_base_url", ""),
|
||||
auth_token=getattr(services, "flights_api_auth_token", ""),
|
||||
_FLIGHT_RESOLVE_EXCEPTION_EXIT_CODES: dict[str, int] = {
|
||||
"FlightsApiUnreachableError": EXIT_FLIGHTS_API_UNREACHABLE,
|
||||
"FlightsApiAuthError": EXIT_FLIGHTS_API_AUTH,
|
||||
"FlightNotFoundError": EXIT_FLIGHT_NOT_FOUND,
|
||||
"FlightsApiSchemaError": EXIT_FLIGHT_SCHEMA,
|
||||
"WaypointSchemaError": EXIT_FLIGHT_SCHEMA,
|
||||
"FlightFileNotFoundError": EXIT_FLIGHT_SCHEMA,
|
||||
"EmptyWaypointsError": EXIT_EMPTY_WAYPOINTS,
|
||||
}
|
||||
|
||||
|
||||
def _exit_code_for_report(report: CacheBuildReport) -> int:
|
||||
"""Map a returned :class:`CacheBuildReport` to the documented exit code.
|
||||
|
||||
Success and idempotent-no-op both exit ``0`` (AC-7). For
|
||||
``failure_phase=flight_resolve`` we route by the captured
|
||||
``failure_exception_type`` so the granular AZ-489 exit codes are
|
||||
preserved. Other phases collapse to the per-phase code documented
|
||||
in :mod:`exit_codes`.
|
||||
"""
|
||||
if report.outcome in (BuildCacheOutcome.SUCCESS, BuildCacheOutcome.IDEMPOTENT_NO_OP):
|
||||
return EXIT_OK
|
||||
if report.failure_phase is FailurePhase.FLIGHT_RESOLVE:
|
||||
return _FLIGHT_RESOLVE_EXCEPTION_EXIT_CODES.get(
|
||||
report.failure_exception_type or "", EXIT_DOWNLOAD_FAILURE
|
||||
)
|
||||
assert flight_file is not None # narrowed by the mutually-exclusive gate
|
||||
return client.load_flight_file(path=flight_file)
|
||||
if report.failure_phase is FailurePhase.DOWNLOAD:
|
||||
return EXIT_DOWNLOAD_FAILURE
|
||||
if report.failure_phase is FailurePhase.BUILD:
|
||||
return EXIT_BUILD_FAILURE
|
||||
return EXIT_BUILD_FAILURE
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -27,6 +27,7 @@ from pathlib import Path, PurePosixPath
|
||||
from gps_denied_onboard.config.schema import ConfigError
|
||||
|
||||
__all__ = [
|
||||
"C12BuildCacheConfig",
|
||||
"C12CompanionConfig",
|
||||
"C12Config",
|
||||
"HostKeyPolicy",
|
||||
@@ -49,8 +50,12 @@ class HostKeyPolicy(str, Enum):
|
||||
_DEFAULT_LOG_PATH = Path("~/.azaion/onboard/c12-tooling.log").expanduser()
|
||||
_DEFAULT_SECTOR_STORE_PATH = Path("~/.azaion/onboard/sector-classifications.json").expanduser()
|
||||
_DEFAULT_COMPANION_CACHE_ROOT = PurePosixPath("/var/lib/azaion/c10/cache")
|
||||
_DEFAULT_CACHE_STAGING_ROOT = Path("~/.azaion/onboard/cache-staging").expanduser()
|
||||
_DEFAULT_CONNECT_TIMEOUT_S = 10.0
|
||||
_DEFAULT_SHA256SUM_TIMEOUT_S = 60.0
|
||||
_DEFAULT_LOCK_TIMEOUT_S = 5.0
|
||||
_DEFAULT_FLIGHT_BBOX_BUFFER_M = 1000.0
|
||||
_DEFAULT_SSH_CONNECT_TIMEOUT_S = 30.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -91,6 +96,68 @@ class C12CompanionConfig:
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class C12BuildCacheConfig:
|
||||
"""Knobs consumed by :class:`BuildCacheOrchestrator` (AZ-328).
|
||||
|
||||
* ``cache_staging_root`` — workstation-side directory holding the
|
||||
lockfile AND the C11 download journal/store; the orchestrator
|
||||
``mkdir -p`` it on first use.
|
||||
* ``lock_filename`` / ``lock_timeout_s`` — controls the cross-process
|
||||
mutex per AZ-328 AC-5. Production uses ``filelock`` (CP-INV-4
|
||||
parity with c10).
|
||||
* ``companion_cache_root`` — POSIX path on the airborne companion
|
||||
under which C10 builds the engines + descriptors + Manifest.
|
||||
Forwarded to the remote C10 invoker.
|
||||
* ``flight_bbox_buffer_m`` — horizontal-distance buffer applied to
|
||||
the bbox derived from the resolved flight waypoints (FAC-INV-3).
|
||||
* ``flights_api_base_url`` / ``flights_api_auth_token`` — the
|
||||
operator's credentials for the parent-suite flights service. The
|
||||
auth token MUST NOT be logged (AC-15); the orchestrator passes it
|
||||
to ``flights_api_client.fetch_flight`` and otherwise treats it as
|
||||
opaque.
|
||||
* ``zoom_levels`` — slippy-map zoom levels to download per request;
|
||||
defaults to a single zoom 18 grid which matches AC-NEW-1 imagery
|
||||
resolution. Override per request via ``BuildCacheRequest``.
|
||||
"""
|
||||
|
||||
cache_staging_root: Path = _DEFAULT_CACHE_STAGING_ROOT
|
||||
lock_filename: str = ".c12.lock"
|
||||
lock_timeout_s: float = _DEFAULT_LOCK_TIMEOUT_S
|
||||
ssh_connect_timeout_s: float = _DEFAULT_SSH_CONNECT_TIMEOUT_S
|
||||
companion_cache_root: PurePosixPath = _DEFAULT_COMPANION_CACHE_ROOT
|
||||
flight_bbox_buffer_m: float = _DEFAULT_FLIGHT_BBOX_BUFFER_M
|
||||
flights_api_base_url: str = ""
|
||||
flights_api_auth_token: str = ""
|
||||
zoom_levels: tuple[int, ...] = (18,)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.lock_timeout_s <= 0:
|
||||
raise ConfigError(
|
||||
f"C12BuildCacheConfig.lock_timeout_s must be > 0; got {self.lock_timeout_s}"
|
||||
)
|
||||
if self.ssh_connect_timeout_s <= 0:
|
||||
raise ConfigError(
|
||||
"C12BuildCacheConfig.ssh_connect_timeout_s must be > 0; "
|
||||
f"got {self.ssh_connect_timeout_s}"
|
||||
)
|
||||
if self.flight_bbox_buffer_m < 0:
|
||||
raise ConfigError(
|
||||
"C12BuildCacheConfig.flight_bbox_buffer_m must be >= 0; "
|
||||
f"got {self.flight_bbox_buffer_m}"
|
||||
)
|
||||
if not self.zoom_levels:
|
||||
raise ConfigError(
|
||||
"C12BuildCacheConfig.zoom_levels must contain at least one zoom level"
|
||||
)
|
||||
if any(z < 0 or z > 22 for z in self.zoom_levels):
|
||||
raise ConfigError(
|
||||
f"C12BuildCacheConfig.zoom_levels values must be in [0, 22]; got {self.zoom_levels}"
|
||||
)
|
||||
if not self.lock_filename:
|
||||
raise ConfigError("C12BuildCacheConfig.lock_filename must be non-empty")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class C12Config:
|
||||
"""Per-component config for C12 operator tooling.
|
||||
@@ -103,11 +170,14 @@ class C12Config:
|
||||
:class:`SectorClassificationStore`. Defaults to
|
||||
``~/.azaion/onboard/sector-classifications.json``.
|
||||
* ``companion`` — nested AZ-327 SSH config block.
|
||||
* ``build_cache`` — nested AZ-328 orchestrator knobs (lockfile,
|
||||
flights service URL/token, bbox buffer).
|
||||
"""
|
||||
|
||||
log_path: Path = _DEFAULT_LOG_PATH
|
||||
sector_classification_store_path: Path = _DEFAULT_SECTOR_STORE_PATH
|
||||
companion: C12CompanionConfig = field(default_factory=C12CompanionConfig)
|
||||
build_cache: C12BuildCacheConfig = field(default_factory=C12BuildCacheConfig)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not isinstance(self.companion, C12CompanionConfig):
|
||||
@@ -115,3 +185,8 @@ class C12Config:
|
||||
"C12Config.companion must be a C12CompanionConfig; got "
|
||||
f"{type(self.companion).__name__}"
|
||||
)
|
||||
if not isinstance(self.build_cache, C12BuildCacheConfig):
|
||||
raise ConfigError(
|
||||
"C12Config.build_cache must be a C12BuildCacheConfig; got "
|
||||
f"{type(self.build_cache).__name__}"
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""C12 ``CompanionBringup`` error hierarchy (AZ-327).
|
||||
"""C12 ``CompanionBringup`` error hierarchy (AZ-327, AZ-328).
|
||||
|
||||
Two failure modes own dedicated exit codes in
|
||||
:mod:`gps_denied_onboard.components.c12_operator_tooling.exit_codes`:
|
||||
@@ -12,7 +12,19 @@ Two failure modes own dedicated exit codes in
|
||||
"engine missing" (which is a not-ready signal returned in the
|
||||
:class:`ReadinessReport`, not an exception).
|
||||
|
||||
Both errors expose a ``remediation`` property the
|
||||
AZ-328 adds the ``BuildCacheOrchestrator`` family:
|
||||
|
||||
* :class:`CacheBuildError` — generic wrap of any download / verify-ready
|
||||
/ build phase failure. Carries a ``failure_phase`` field and a
|
||||
pre-baked ``remediation`` hint so the CLI can route by ``$?`` AND the
|
||||
operator gets actionable text.
|
||||
* :class:`BuildLockHeldError` — concurrent ``build-cache`` invocation
|
||||
blocked by the workstation lockfile. Subclass of
|
||||
:class:`CacheBuildError` so the CLI's exception table catches both.
|
||||
* :class:`BuildReportParseError` — C10's stdout did not yield a parseable
|
||||
``BuildReport`` JSON document; surfaced as ``failure_phase=build``.
|
||||
|
||||
All errors expose a ``remediation`` property the
|
||||
:func:`gps_denied_onboard.components.c12_operator_tooling.cli.main`
|
||||
layer reads to print a one-line operator-friendly hint to stderr.
|
||||
|
||||
@@ -25,11 +37,17 @@ discipline by keeping the hint table in c12.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from gps_denied_onboard.components.c12_operator_tooling._types import (
|
||||
CompanionUnreachableReason,
|
||||
FailurePhase,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"BuildLockHeldError",
|
||||
"BuildReportParseError",
|
||||
"CacheBuildError",
|
||||
"CompanionUnreachableError",
|
||||
"ContentHashMismatchError",
|
||||
]
|
||||
@@ -125,3 +143,126 @@ class ContentHashMismatchError(Exception):
|
||||
"Re-run the cache build (`operator-tool build-cache --flight-id ...`) "
|
||||
"to repopulate the affected engine."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AZ-328: BuildCacheOrchestrator error family
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_REMEDIATION_FLIGHT_RESOLVE: str = (
|
||||
"Verify --flight-id (or --flight-file path), the flights service URL in config, "
|
||||
"and the operator auth token. For a 404 the GUID is wrong; for an empty waypoint "
|
||||
"list re-plan in the Mission Planner UI."
|
||||
)
|
||||
_REMEDIATION_DOWNLOAD: str = (
|
||||
"Re-run with the same args; check `satellite_provider_url` and `api_key` "
|
||||
"in the c11 config, and confirm the workstation has internet egress."
|
||||
)
|
||||
_REMEDIATION_BUILD: str = (
|
||||
"Inspect the companion `~/.azaion/onboard/c10-build.log`; consider "
|
||||
"`rm -rf <companion_cache_root>/engines/` to force a clean rebuild on the "
|
||||
"next run, then re-issue the same `build-cache` command."
|
||||
)
|
||||
_REMEDIATION_NONE_FALLBACK: str = (
|
||||
"No remediation hint registered for this failure phase; consult the structured "
|
||||
"log for the wrapped exception details."
|
||||
)
|
||||
|
||||
_REMEDIATIONS: dict[FailurePhase, str] = {
|
||||
FailurePhase.FLIGHT_RESOLVE: _REMEDIATION_FLIGHT_RESOLVE,
|
||||
FailurePhase.DOWNLOAD: _REMEDIATION_DOWNLOAD,
|
||||
FailurePhase.BUILD: _REMEDIATION_BUILD,
|
||||
}
|
||||
|
||||
|
||||
class CacheBuildError(Exception):
|
||||
"""Wrap any underlying C11 / C10 / SSH / parse failure with phase + remediation.
|
||||
|
||||
The orchestrator constructs this around every typed error its
|
||||
collaborators raise so the CLI can route on ``failure_phase`` →
|
||||
exit code without importing the upstream component's exception
|
||||
hierarchy (AZ-507 cross-component rule).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
failure_phase: FailurePhase,
|
||||
wrapped_exception_repr: str,
|
||||
message: str | None = None,
|
||||
remediation: str | None = None,
|
||||
) -> None:
|
||||
if failure_phase is FailurePhase.NONE:
|
||||
raise ValueError(
|
||||
"CacheBuildError must be raised for a real failure phase; "
|
||||
"FailurePhase.NONE is reserved for successful CacheBuildReport."
|
||||
)
|
||||
rendered = message or (
|
||||
f"cache build failed in phase {failure_phase.value}: {wrapped_exception_repr}"
|
||||
)
|
||||
super().__init__(rendered)
|
||||
self.failure_phase = failure_phase
|
||||
self.wrapped_exception_repr = wrapped_exception_repr
|
||||
self._remediation = remediation or _REMEDIATIONS.get(
|
||||
failure_phase, _REMEDIATION_NONE_FALLBACK
|
||||
)
|
||||
|
||||
@property
|
||||
def remediation(self) -> str:
|
||||
"""Operator-friendly one-line hint, varies by ``failure_phase``."""
|
||||
return self._remediation
|
||||
|
||||
|
||||
class BuildLockHeldError(CacheBuildError):
|
||||
"""Another ``build-cache`` invocation holds the workstation lockfile.
|
||||
|
||||
Subclass of :class:`CacheBuildError` so the CLI's existing exception
|
||||
table picks it up; carries a phase-overriding remediation that
|
||||
points at the lock path so the operator can recover deterministically.
|
||||
"""
|
||||
|
||||
def __init__(self, *, lock_path: Path, timeout_s: float) -> None:
|
||||
super().__init__(
|
||||
failure_phase=FailurePhase.DOWNLOAD,
|
||||
wrapped_exception_repr=f"LockTimeout(path={lock_path!s}, timeout_s={timeout_s})",
|
||||
message=(
|
||||
f"build-cache lock held: another `operator-tool build-cache` is in "
|
||||
f"progress (lock={lock_path}, waited {timeout_s:.1f} s)"
|
||||
),
|
||||
remediation=(
|
||||
f"Another `build-cache` is in progress; wait for it to finish, or "
|
||||
f"kill the holding process and remove `{lock_path}` if it is stale."
|
||||
),
|
||||
)
|
||||
self.lock_path = lock_path
|
||||
self.timeout_s = timeout_s
|
||||
|
||||
|
||||
class BuildReportParseError(CacheBuildError):
|
||||
"""C10's companion-side stdout did not contain a parseable BuildReport JSON.
|
||||
|
||||
The C10 process likely crashed mid-output or printed garbage; the
|
||||
operator needs to inspect the captured tail and the companion's own
|
||||
log file. ``failure_phase=build`` per AZ-328 Risk 3 mitigation.
|
||||
"""
|
||||
|
||||
def __init__(self, *, stdout_tail: str, stderr_tail: str) -> None:
|
||||
super().__init__(
|
||||
failure_phase=FailurePhase.BUILD,
|
||||
wrapped_exception_repr=(
|
||||
f"BuildReportParseError(stdout_tail={stdout_tail[:200]!r}, "
|
||||
f"stderr_tail={stderr_tail[:200]!r})"
|
||||
),
|
||||
message=(
|
||||
"remote C10 build did not emit a parseable BuildReport JSON line; "
|
||||
"the process likely crashed or printed garbage"
|
||||
),
|
||||
remediation=(
|
||||
"Inspect the companion `~/.azaion/onboard/c10-build.log` for the "
|
||||
"underlying crash; the captured stdout/stderr tail is on the error "
|
||||
"object's `wrapped_exception_repr`."
|
||||
),
|
||||
)
|
||||
self.stdout_tail = stdout_tail
|
||||
self.stderr_tail = stderr_tail
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
"""Workstation-side file-lock protocols + ``filelock``-backed concrete (AZ-328).
|
||||
|
||||
The C12 ``BuildCacheOrchestrator`` acquires ``cache_staging_root/.c12.lock``
|
||||
to serialise concurrent operator runs of ``operator-tool build-cache``
|
||||
(description.md § 7). C10's own lockfile lives on the companion under
|
||||
``companion_cache_root/.c10.lock`` (CP-INV-4) — these are independent;
|
||||
the workstation lock prevents two workstation processes from racing on
|
||||
the C6 cache root, the companion lock prevents two companion processes
|
||||
from racing on the engines+manifest root.
|
||||
|
||||
Why a separate factory rather than reusing c10's: the AZ-507 cross-
|
||||
component rule forbids importing ``c10_provisioning`` from
|
||||
``c12_operator_tooling``. Both factories thinly wrap the same
|
||||
``filelock`` library; the contract Protocol below is the consumer-side
|
||||
cut for c12.
|
||||
|
||||
The Protocol intentionally mirrors c10's shape (``try_lock(path,
|
||||
*, timeout_s) -> AbstractContextManager[None]``) so a future move to a
|
||||
shared ``helpers/file_lock.py`` is a one-line API change here.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import AbstractContextManager
|
||||
from pathlib import Path
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
import filelock
|
||||
|
||||
__all__ = [
|
||||
"FileLock",
|
||||
"FileLockFactory",
|
||||
"FilelockFileLockFactory",
|
||||
"LockTimeout",
|
||||
]
|
||||
|
||||
|
||||
class LockTimeout(Exception):
|
||||
"""Raised by :meth:`FileLockFactory.try_lock` on timeout.
|
||||
|
||||
Local exception (not ``filelock.Timeout``) so ``BuildCacheOrchestrator``
|
||||
catches it without importing the third-party ``filelock`` exception
|
||||
class through the consumer-side cut.
|
||||
"""
|
||||
|
||||
def __init__(self, *, path: Path, timeout_s: float) -> None:
|
||||
super().__init__(f"failed to acquire lock at {path} within {timeout_s:.1f} s")
|
||||
self.path = path
|
||||
self.timeout_s = timeout_s
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class FileLock(Protocol):
|
||||
"""Context-manager handle to an acquired file lock."""
|
||||
|
||||
def __enter__(self) -> FileLock: ...
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc: BaseException | None,
|
||||
tb: object | None,
|
||||
) -> None: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class FileLockFactory(Protocol):
|
||||
"""Construct a :class:`FileLock` against a filesystem path."""
|
||||
|
||||
def try_lock(self, path: Path, *, timeout_s: float) -> AbstractContextManager[None]:
|
||||
"""Acquire ``path`` within ``timeout_s`` or raise :class:`LockTimeout`."""
|
||||
...
|
||||
|
||||
|
||||
class FilelockFileLockFactory:
|
||||
"""Production :class:`FileLockFactory` — wraps the ``filelock`` library.
|
||||
|
||||
``filelock.FileLock`` uses ``fcntl.flock`` on POSIX; the OS auto-
|
||||
releases the lock on process death (kill -9, parent exit), giving us
|
||||
the AZ-328 AC-6 "lockfile released even on KeyboardInterrupt"
|
||||
invariant for free.
|
||||
"""
|
||||
|
||||
def try_lock(self, path: Path, *, timeout_s: float) -> AbstractContextManager[None]:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock = filelock.FileLock(str(path))
|
||||
try:
|
||||
lock.acquire(timeout=timeout_s)
|
||||
except filelock.Timeout as exc:
|
||||
raise LockTimeout(path=path, timeout_s=timeout_s) from exc
|
||||
return _AcquiredFileLockHandle(lock)
|
||||
|
||||
|
||||
class _AcquiredFileLockHandle:
|
||||
"""Internal CM wrapper that releases the underlying ``filelock.FileLock`` on exit."""
|
||||
|
||||
def __init__(self, lock: filelock.FileLock) -> None:
|
||||
self._lock = lock
|
||||
|
||||
def __enter__(self) -> None:
|
||||
return None
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: type[BaseException] | None,
|
||||
exc: BaseException | None,
|
||||
tb: object | None,
|
||||
) -> None:
|
||||
self._lock.release()
|
||||
@@ -47,7 +47,10 @@ def parse_flight_payload(payload: Any, *, source_label: str) -> FlightDto:
|
||||
|
||||
waypoints = tuple(
|
||||
sorted(
|
||||
(_parse_waypoint(item, index, source_label) for index, item in enumerate(waypoints_raw)),
|
||||
(
|
||||
_parse_waypoint(item, index, source_label)
|
||||
for index, item in enumerate(waypoints_raw)
|
||||
),
|
||||
key=lambda wp: wp.ordinal,
|
||||
)
|
||||
)
|
||||
@@ -80,9 +83,7 @@ def _parse_waypoint(item: Any, source_index: int, source_label: str) -> Waypoint
|
||||
objective = _parse_enum(
|
||||
item, "objective", WaypointObjective, f"{source_label} waypoint #{source_index}"
|
||||
)
|
||||
source = _parse_enum(
|
||||
item, "source", WaypointSource, f"{source_label} waypoint #{source_index}"
|
||||
)
|
||||
source = _parse_enum(item, "source", WaypointSource, f"{source_label} waypoint #{source_index}")
|
||||
return WaypointDto(
|
||||
ordinal=ordinal,
|
||||
lat_deg=lat_deg,
|
||||
@@ -93,9 +94,7 @@ def _parse_waypoint(item: Any, source_index: int, source_label: str) -> Waypoint
|
||||
)
|
||||
|
||||
|
||||
def _enforce_contiguous_ordinals(
|
||||
waypoints: tuple[WaypointDto, ...], source_label: str
|
||||
) -> None:
|
||||
def _enforce_contiguous_ordinals(waypoints: tuple[WaypointDto, ...], source_label: str) -> None:
|
||||
for expected, wp in enumerate(waypoints):
|
||||
if wp.ordinal != expected:
|
||||
raise WaypointSchemaError(
|
||||
@@ -143,8 +142,7 @@ def _require_int(payload: dict[str, Any], field: str, source_label: str) -> int:
|
||||
value = payload[field]
|
||||
if isinstance(value, bool) or not isinstance(value, int):
|
||||
raise WaypointSchemaError(
|
||||
f"{source_label}: field {field!r} must be an integer; "
|
||||
f"got {type(value).__name__}"
|
||||
f"{source_label}: field {field!r} must be an integer; got {type(value).__name__}"
|
||||
)
|
||||
return value
|
||||
|
||||
@@ -163,9 +161,7 @@ def _require_finite_float(payload: dict[str, Any], field: str, source_label: str
|
||||
return fvalue
|
||||
|
||||
|
||||
def _parse_enum(
|
||||
payload: dict[str, Any], field: str, enum_cls: type, source_label: str
|
||||
) -> Any:
|
||||
def _parse_enum(payload: dict[str, Any], field: str, enum_cls: type, source_label: str) -> Any:
|
||||
if field not in payload:
|
||||
raise WaypointSchemaError(f"{source_label}: missing required field {field!r}")
|
||||
raw = payload[field]
|
||||
|
||||
@@ -62,12 +62,8 @@ def bbox_from_waypoints(
|
||||
sw_enu = WgsConverter.latlonalt_to_local_enu(origin, sw)
|
||||
ne_enu = WgsConverter.latlonalt_to_local_enu(origin, ne)
|
||||
|
||||
sw_inflated_enu = np.array(
|
||||
[sw_enu[0] - buffer_m, sw_enu[1] - buffer_m, 0.0], dtype=np.float64
|
||||
)
|
||||
ne_inflated_enu = np.array(
|
||||
[ne_enu[0] + buffer_m, ne_enu[1] + buffer_m, 0.0], dtype=np.float64
|
||||
)
|
||||
sw_inflated_enu = np.array([sw_enu[0] - buffer_m, sw_enu[1] - buffer_m, 0.0], dtype=np.float64)
|
||||
ne_inflated_enu = np.array([ne_enu[0] + buffer_m, ne_enu[1] + buffer_m, 0.0], dtype=np.float64)
|
||||
|
||||
sw_inflated = WgsConverter.local_enu_to_latlonalt(origin, sw_inflated_enu)
|
||||
ne_inflated = WgsConverter.local_enu_to_latlonalt(origin, ne_inflated_enu)
|
||||
|
||||
@@ -41,7 +41,5 @@ def load_flight_file(*, path: Path) -> FlightDto:
|
||||
try:
|
||||
payload = orjson.loads(raw)
|
||||
except orjson.JSONDecodeError as exc:
|
||||
raise FlightsApiSchemaError(
|
||||
f"flight file {path!s}: not valid JSON: {exc}"
|
||||
) from exc
|
||||
raise FlightsApiSchemaError(f"flight file {path!s}: not valid JSON: {exc}") from exc
|
||||
return parse_flight_payload(payload, source_label=f"flight file {path!s}")
|
||||
|
||||
@@ -81,9 +81,7 @@ class HttpxFlightsApiClient:
|
||||
sleep: Callable[[float], None] | None = None,
|
||||
) -> None:
|
||||
self._transport = transport
|
||||
self._sleep: Callable[[float], None] = (
|
||||
sleep if sleep is not None else _wall_clock_sleep
|
||||
)
|
||||
self._sleep: Callable[[float], None] = sleep if sleep is not None else _wall_clock_sleep
|
||||
self._log = get_logger("c12.flights_api")
|
||||
|
||||
def fetch_flight(
|
||||
|
||||
@@ -0,0 +1,239 @@
|
||||
"""``RemoteCacheProvisionerInvoker`` — SSH-side C10 build entry (AZ-328).
|
||||
|
||||
Runs C10's companion-side build entry over the operator's SSH session
|
||||
and parses the ``BuildReport`` JSON document the C10 process emits as
|
||||
the LAST line of its stdout. Streams the intermediate stdout lines to
|
||||
the structured logger at DEBUG level (``kind="c10.remote.progress"``)
|
||||
so a long-running build is observable from the operator workstation
|
||||
without buffering hours of output in memory.
|
||||
|
||||
AZ-507 cross-component cut: this module does NOT import
|
||||
``c10_provisioning``. The C10 ``BuildReport`` shape arrives as a JSON
|
||||
document on the wire and is parsed into the local
|
||||
:class:`RemoteBuildReport` mirror declared in ``_types.py``.
|
||||
|
||||
AZ-328 Risk 5 mitigation: the ``api_key`` and ``flights_api_auth_token``
|
||||
secrets passed by the orchestrator are surfaced through a redactor that
|
||||
replaces the literal token in any forwarded stdout line with
|
||||
``<REDACTED>`` — defence-in-depth in case the companion's C10 process
|
||||
echoes something it should not.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import shlex
|
||||
from collections.abc import Sequence
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path, PurePosixPath
|
||||
from uuid import UUID
|
||||
|
||||
from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt
|
||||
from gps_denied_onboard.components.c12_operator_tooling._types import (
|
||||
RemoteBuildOutcome,
|
||||
RemoteBuildReport,
|
||||
SectorClassification,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.errors import (
|
||||
BuildReportParseError,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.ssh_session import (
|
||||
SshSession,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"REDACTED_PLACEHOLDER",
|
||||
"RemoteBuildRequest",
|
||||
"RemoteCacheProvisionerInvoker",
|
||||
"build_remote_command",
|
||||
]
|
||||
|
||||
|
||||
REDACTED_PLACEHOLDER: str = "<REDACTED>"
|
||||
_LOG_KIND_PROGRESS = "c10.remote.progress"
|
||||
_LOG_KIND_INVOKE_START = "c12.remote_c10.invoke.start"
|
||||
_LOG_KIND_INVOKE_OK = "c12.remote_c10.invoke.ok"
|
||||
_LOG_KIND_INVOKE_FAILED = "c12.remote_c10.invoke.failed"
|
||||
|
||||
# Companion-side entry shipped by AZ-325 (E-BOOT scaffolding). The C10
|
||||
# build mode is an azaion-onboard subcommand; the request is fed via
|
||||
# stdin as JSON so we do not have to escape the bbox/zoom/sector args
|
||||
# through the shell quoting layer twice.
|
||||
_REMOTE_ENTRY: str = "azaion-onboard c10 build --json-output --request-stdin"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RemoteBuildRequest:
|
||||
"""Shape forwarded to C10's companion-side build entry as a JSON document.
|
||||
|
||||
``takeoff_origin`` + ``flight_id`` are the ADR-010 pass-through
|
||||
fields C10 / AZ-323 bake into the Manifest so a re-planned flight
|
||||
produces a fresh cache identity (CP-INV-8 on the C10 side).
|
||||
"""
|
||||
|
||||
bbox: BoundingBox
|
||||
zoom_levels: tuple[int, ...]
|
||||
sector_class: SectorClassification
|
||||
calibration_path: Path
|
||||
expected_engines: tuple[str, ...]
|
||||
companion_cache_root: PurePosixPath
|
||||
takeoff_origin: LatLonAlt
|
||||
flight_id: UUID
|
||||
|
||||
|
||||
def build_remote_command() -> str:
|
||||
"""Return the shell command run on the companion (kept module-level for testability)."""
|
||||
return _REMOTE_ENTRY
|
||||
|
||||
|
||||
def _serialise_request(request: RemoteBuildRequest) -> str:
|
||||
"""Render :class:`RemoteBuildRequest` to a single-line JSON document."""
|
||||
payload = {
|
||||
"bbox": {
|
||||
"min_lat_deg": request.bbox.min_lat_deg,
|
||||
"min_lon_deg": request.bbox.min_lon_deg,
|
||||
"max_lat_deg": request.bbox.max_lat_deg,
|
||||
"max_lon_deg": request.bbox.max_lon_deg,
|
||||
},
|
||||
"zoom_levels": list(request.zoom_levels),
|
||||
"sector_class": request.sector_class.value,
|
||||
"calibration_path": str(request.calibration_path),
|
||||
"expected_engines": list(request.expected_engines),
|
||||
"companion_cache_root": str(request.companion_cache_root),
|
||||
"takeoff_origin": {
|
||||
"lat_deg": request.takeoff_origin.lat_deg,
|
||||
"lon_deg": request.takeoff_origin.lon_deg,
|
||||
"alt_m": request.takeoff_origin.alt_m,
|
||||
},
|
||||
"flight_id": str(request.flight_id),
|
||||
}
|
||||
return json.dumps(payload, separators=(",", ":"))
|
||||
|
||||
|
||||
def _redact(line: str, secrets: Sequence[str]) -> str:
|
||||
redacted = line
|
||||
for secret in secrets:
|
||||
if secret:
|
||||
redacted = redacted.replace(secret, REDACTED_PLACEHOLDER)
|
||||
return redacted
|
||||
|
||||
|
||||
def _parse_build_report(json_line: str) -> RemoteBuildReport:
|
||||
payload = json.loads(json_line)
|
||||
outcome_raw = payload.get("outcome")
|
||||
if outcome_raw not in {o.value for o in RemoteBuildOutcome}:
|
||||
raise ValueError(f"BuildReport.outcome={outcome_raw!r} is not a known RemoteBuildOutcome")
|
||||
return RemoteBuildReport(
|
||||
outcome=RemoteBuildOutcome(outcome_raw),
|
||||
engines_built=int(payload.get("engines_built", 0)),
|
||||
engines_reused=int(payload.get("engines_reused", 0)),
|
||||
descriptors_generated=int(payload.get("descriptors_generated", 0)),
|
||||
manifest_hash=payload.get("manifest_hash"),
|
||||
failure_reason=payload.get("failure_reason"),
|
||||
elapsed_s=float(payload.get("elapsed_s", 0.0)),
|
||||
)
|
||||
|
||||
|
||||
class RemoteCacheProvisionerInvoker:
|
||||
"""Run C10's companion-side build entry over an open SSH session.
|
||||
|
||||
Stateless; one invocation per :meth:`invoke` call. The orchestrator
|
||||
holds a single instance per CLI invocation so the redactor's secrets
|
||||
list does not have to leak through the interface.
|
||||
"""
|
||||
|
||||
def __init__(self, *, logger: logging.Logger, command_timeout_s: float = 7200.0) -> None:
|
||||
self._logger = logger
|
||||
self._command_timeout_s = command_timeout_s
|
||||
|
||||
def invoke(
|
||||
self,
|
||||
session: SshSession,
|
||||
request: RemoteBuildRequest,
|
||||
*,
|
||||
secrets_to_redact: Sequence[str] = (),
|
||||
) -> RemoteBuildReport:
|
||||
"""Run C10's build entry on ``session`` and return the parsed report.
|
||||
|
||||
``secrets_to_redact`` is the list of operator-supplied bearer
|
||||
tokens / api_keys the caller wants stripped from any forwarded
|
||||
DEBUG line — defence-in-depth against a misbehaving C10 echoing
|
||||
them. Empty by default; the orchestrator always passes the
|
||||
request's ``api_key`` and the configured flights auth token in.
|
||||
"""
|
||||
request_json = _serialise_request(request)
|
||||
self._logger.info(
|
||||
"starting remote C10 build invocation",
|
||||
extra={
|
||||
"kind": _LOG_KIND_INVOKE_START,
|
||||
"kv": {
|
||||
"flight_id": str(request.flight_id),
|
||||
"sector_class": request.sector_class.value,
|
||||
"zoom_levels": list(request.zoom_levels),
|
||||
},
|
||||
},
|
||||
)
|
||||
# The companion entry reads the request from stdin so we avoid
|
||||
# secondary shell quoting of the JSON payload through ``ssh``.
|
||||
# paramiko's ``run`` Protocol method does NOT take stdin, so we
|
||||
# echo the JSON into the entry via a here-doc-equivalent shell
|
||||
# construct: ``printf '%s' '<JSON>' | <entry>`` with single-quote
|
||||
# escaping handled by ``shlex.quote``.
|
||||
wrapped_command = f"printf %s {shlex.quote(request_json)} | {build_remote_command()}"
|
||||
try:
|
||||
result = session.run(wrapped_command, timeout_s=self._command_timeout_s)
|
||||
except Exception as exc:
|
||||
self._logger.error(
|
||||
"remote C10 invocation failed at the SSH layer",
|
||||
extra={
|
||||
"kind": _LOG_KIND_INVOKE_FAILED,
|
||||
"kv": {"exception_type": type(exc).__name__},
|
||||
},
|
||||
)
|
||||
raise
|
||||
|
||||
# Stream forwarded stdout lines as DEBUG with redaction. We
|
||||
# deliberately walk the lines top-to-bottom so the LAST line is
|
||||
# the one we attempt to parse as the BuildReport JSON document
|
||||
# (description.md § 8 + AZ-328 spec § Outcome).
|
||||
stdout_lines = result.stdout.splitlines()
|
||||
secrets = list(secrets_to_redact)
|
||||
for line in stdout_lines[:-1] if stdout_lines else ():
|
||||
self._logger.debug(
|
||||
"remote c10 progress",
|
||||
extra={
|
||||
"kind": _LOG_KIND_PROGRESS,
|
||||
"kv": {"line": _redact(line, secrets)},
|
||||
},
|
||||
)
|
||||
|
||||
if result.exit_code != 0 or not stdout_lines:
|
||||
raise BuildReportParseError(
|
||||
stdout_tail=_redact(result.stdout[-2048:], secrets),
|
||||
stderr_tail=_redact(result.stderr[-2048:], secrets),
|
||||
)
|
||||
|
||||
last_line = stdout_lines[-1].strip()
|
||||
try:
|
||||
report = _parse_build_report(last_line)
|
||||
except (json.JSONDecodeError, ValueError) as exc:
|
||||
raise BuildReportParseError(
|
||||
stdout_tail=_redact(result.stdout[-2048:], secrets),
|
||||
stderr_tail=_redact(result.stderr[-2048:], secrets),
|
||||
) from exc
|
||||
|
||||
self._logger.info(
|
||||
"remote C10 build invocation returned a parseable BuildReport",
|
||||
extra={
|
||||
"kind": _LOG_KIND_INVOKE_OK,
|
||||
"kv": {
|
||||
"outcome": report.outcome.value,
|
||||
"engines_built": report.engines_built,
|
||||
"engines_reused": report.engines_reused,
|
||||
"descriptors_generated": report.descriptors_generated,
|
||||
"manifest_hash": report.manifest_hash,
|
||||
},
|
||||
},
|
||||
)
|
||||
return report
|
||||
@@ -0,0 +1,38 @@
|
||||
"""C12 consumer-side structural cut of c11 ``TileDownloader`` (AZ-507).
|
||||
|
||||
The AZ-507 cross-component rule (see ``_docs/02_document/module-layout.md``
|
||||
line 252) forbids ``c12_operator_tooling/*.py`` from importing
|
||||
``components.c11_tile_manager`` directly. The ``BuildCacheOrchestrator``
|
||||
needs the download surface to drive the F1 download phase, so we
|
||||
declare a local Protocol that mirrors the shape of c11's
|
||||
:class:`gps_denied_onboard.components.c11_tile_manager.interface.TileDownloader`
|
||||
``download_tiles_for_area`` method.
|
||||
|
||||
The composition root (``runtime_root.c12_factory``) wires the concrete
|
||||
c11 strategy in. Tests inject a fake that returns a
|
||||
:class:`DownloadBatchReportCut` directly, so they never touch c11 either.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
from gps_denied_onboard.components.c12_operator_tooling._types import (
|
||||
DownloadBatchReportCut,
|
||||
DownloadRequestCut,
|
||||
)
|
||||
|
||||
__all__ = ["TileDownloaderCut"]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class TileDownloaderCut(Protocol):
|
||||
"""Single-method consumer-side cut of c11 ``TileDownloader``.
|
||||
|
||||
The orchestrator constructs a :class:`DownloadRequestCut` and the
|
||||
composition-root wiring in ``c12_factory`` translates it into c11's
|
||||
real ``DownloadRequest`` (and the returned ``DownloadBatchReport``
|
||||
back into a :class:`DownloadBatchReportCut`).
|
||||
"""
|
||||
|
||||
def download_tiles_for_area(self, request: DownloadRequestCut) -> DownloadBatchReportCut: ...
|
||||
@@ -6,13 +6,19 @@
|
||||
classification map.
|
||||
* :func:`build_companion_bringup` — AZ-327 SSH-based pre-flight
|
||||
verification of the companion's four artifacts.
|
||||
* :func:`build_build_cache_orchestrator` — AZ-328 F1 cache-build
|
||||
orchestrator. Wires the ``filelock`` factory + the remote C10 invoker
|
||||
+ the c11 ``TileDownloader`` adapter on top of the existing AZ-326 /
|
||||
AZ-327 / AZ-489 services. The AZ-507 cross-component cut means we
|
||||
translate c11's real ``DownloadRequest`` / ``DownloadBatchReport`` to
|
||||
the local ``DownloadRequestCut`` / ``DownloadBatchReportCut`` here.
|
||||
* :func:`build_operator_tool` — aggregator that returns the
|
||||
:class:`OperatorToolServices` dataclass the AZ-326 CLI consumes.
|
||||
|
||||
Each ``build_*`` function is intentionally tiny — there is one
|
||||
production strategy per service today and the CLI wiring just plugs
|
||||
the concrete instance into the same composition root method. Sibling
|
||||
tasks AZ-328 / AZ-329 / AZ-330 will each add a single field to
|
||||
tasks AZ-329 / AZ-330 will each add a single field to
|
||||
:class:`OperatorToolServices` without renaming or moving the
|
||||
dataclass.
|
||||
"""
|
||||
@@ -23,9 +29,16 @@ import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from gps_denied_onboard.clock import Clock
|
||||
from gps_denied_onboard.components.c12_operator_tooling.build_cache import (
|
||||
BuildCacheOrchestrator,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.companion_bringup import (
|
||||
CompanionBringup,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.file_lock import (
|
||||
FilelockFileLockFactory,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.flights_api import (
|
||||
FlightsApiClient,
|
||||
HttpxFlightsApiClient,
|
||||
@@ -33,12 +46,18 @@ from gps_denied_onboard.components.c12_operator_tooling.flights_api import (
|
||||
from gps_denied_onboard.components.c12_operator_tooling.paramiko_ssh_session import (
|
||||
ParamikoSshSessionFactory,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.remote_c10_invoker import (
|
||||
RemoteCacheProvisionerInvoker,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.remote_sidecar_verifier import (
|
||||
RemoteSidecarVerifier,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.sector_classification_store import (
|
||||
SectorClassificationStore,
|
||||
)
|
||||
from gps_denied_onboard.components.c12_operator_tooling.tile_downloader_cut import (
|
||||
TileDownloaderCut,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.components.c12_operator_tooling.config import (
|
||||
@@ -48,6 +67,7 @@ if TYPE_CHECKING:
|
||||
|
||||
__all__ = [
|
||||
"OperatorToolServices",
|
||||
"build_build_cache_orchestrator",
|
||||
"build_companion_bringup",
|
||||
"build_flights_api_client",
|
||||
"build_operator_tool",
|
||||
@@ -57,6 +77,8 @@ __all__ = [
|
||||
|
||||
_C12_LOGGER_NAME = "c12_operator_tooling"
|
||||
_COMPANION_LOGGER_NAME = "c12_operator_tooling.companion_bringup"
|
||||
_BUILD_CACHE_LOGGER_NAME = "c12_operator_tooling.build_cache"
|
||||
_REMOTE_C10_LOGGER_NAME = "c12_operator_tooling.remote_c10_invoker"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -65,15 +87,22 @@ class OperatorToolServices:
|
||||
|
||||
AZ-326 introduced the dataclass and now owns three services
|
||||
(``flights_api_client``, ``sector_classification_store``,
|
||||
``companion_bringup``). Sibling tasks AZ-328 (orchestrator),
|
||||
AZ-329 (post-landing upload), and AZ-330 (operator reloc service)
|
||||
extend this dataclass in-place by appending their own service
|
||||
field — they MUST NOT rename, move, or split it.
|
||||
``companion_bringup``). AZ-328 added ``build_cache_orchestrator``.
|
||||
Sibling tasks AZ-329 (post-landing upload) and AZ-330 (operator
|
||||
reloc service) extend this dataclass in-place by appending their
|
||||
own service field — they MUST NOT rename, move, or split it.
|
||||
|
||||
``build_cache_orchestrator`` is ``None`` when the AZ-328 wiring is
|
||||
not requested (e.g. unit tests for AZ-326 / AZ-327 that don't go
|
||||
through the full build path); the CLI's ``build-cache`` subcommand
|
||||
short-circuits with an EXIT_OK + log when the field is missing /
|
||||
None so the rest of the CLI keeps working.
|
||||
"""
|
||||
|
||||
flights_api_client: FlightsApiClient
|
||||
sector_classification_store: SectorClassificationStore
|
||||
companion_bringup: CompanionBringup
|
||||
build_cache_orchestrator: BuildCacheOrchestrator | None = None
|
||||
|
||||
|
||||
def build_flights_api_client(config: Config) -> FlightsApiClient:
|
||||
@@ -130,13 +159,86 @@ def build_companion_bringup(
|
||||
)
|
||||
|
||||
|
||||
def build_operator_tool(config: Config) -> OperatorToolServices:
|
||||
"""Aggregate the three AZ-326 / AZ-327 / AZ-489 service handles."""
|
||||
return OperatorToolServices(
|
||||
def build_build_cache_orchestrator(
|
||||
config: Config,
|
||||
*,
|
||||
services: OperatorToolServices,
|
||||
tile_downloader: TileDownloaderCut,
|
||||
clock: Clock,
|
||||
logger: logging.Logger | None = None,
|
||||
) -> BuildCacheOrchestrator:
|
||||
"""Build the AZ-328 :class:`BuildCacheOrchestrator` from config + sibling services.
|
||||
|
||||
Caller (production runtime root) is responsible for translating the
|
||||
real c11 ``TileDownloader`` to a :class:`TileDownloaderCut` adapter
|
||||
here — ``c12_operator_tooling`` cannot import c11 directly per
|
||||
AZ-507. The lockfile factory + remote-C10 invoker + SSH factory are
|
||||
constructed in-place; the SSH factory MUST be the same instance as
|
||||
the one wired into ``services.companion_bringup`` (single
|
||||
composition-root construction per AZ-328 Constraints).
|
||||
"""
|
||||
c12_config = _resolve_c12_config(config)
|
||||
companion = c12_config.companion
|
||||
if not str(companion.ssh_keyfile):
|
||||
from gps_denied_onboard.config.schema import ConfigError
|
||||
|
||||
raise ConfigError(
|
||||
"C12CompanionConfig.ssh_keyfile is empty; AZ-328 build_cache_orchestrator "
|
||||
"requires a real SSH private key path"
|
||||
)
|
||||
|
||||
ssh_factory = ParamikoSshSessionFactory(
|
||||
ssh_user=companion.ssh_user,
|
||||
ssh_keyfile=companion.ssh_keyfile,
|
||||
host_key_policy=companion.host_key_policy,
|
||||
)
|
||||
invoker_logger = logger or logging.getLogger(_REMOTE_C10_LOGGER_NAME)
|
||||
orchestrator_logger = logger or logging.getLogger(_BUILD_CACHE_LOGGER_NAME)
|
||||
return BuildCacheOrchestrator(
|
||||
flights_api_client=services.flights_api_client,
|
||||
tile_downloader=tile_downloader,
|
||||
companion_bringup=services.companion_bringup,
|
||||
remote_c10_invoker=RemoteCacheProvisionerInvoker(logger=invoker_logger),
|
||||
ssh_factory=ssh_factory,
|
||||
lock_factory=FilelockFileLockFactory(),
|
||||
logger=orchestrator_logger,
|
||||
clock=clock,
|
||||
config=c12_config.build_cache,
|
||||
)
|
||||
|
||||
|
||||
def build_operator_tool(
|
||||
config: Config,
|
||||
*,
|
||||
tile_downloader: TileDownloaderCut | None = None,
|
||||
clock: Clock | None = None,
|
||||
) -> OperatorToolServices:
|
||||
"""Aggregate the AZ-326 / AZ-327 / AZ-328 / AZ-489 service handles.
|
||||
|
||||
``tile_downloader`` and ``clock`` are optional — without them, the
|
||||
``build_cache_orchestrator`` field is left as ``None`` and the CLI's
|
||||
``build-cache`` subcommand short-circuits gracefully. Production
|
||||
wiring (the suite-level runtime root) supplies real instances.
|
||||
"""
|
||||
base = OperatorToolServices(
|
||||
flights_api_client=build_flights_api_client(config),
|
||||
sector_classification_store=build_sector_classification_store(config),
|
||||
companion_bringup=build_companion_bringup(config),
|
||||
)
|
||||
if tile_downloader is None or clock is None:
|
||||
return base
|
||||
orchestrator = build_build_cache_orchestrator(
|
||||
config,
|
||||
services=base,
|
||||
tile_downloader=tile_downloader,
|
||||
clock=clock,
|
||||
)
|
||||
return OperatorToolServices(
|
||||
flights_api_client=base.flights_api_client,
|
||||
sector_classification_store=base.sector_classification_store,
|
||||
companion_bringup=base.companion_bringup,
|
||||
build_cache_orchestrator=orchestrator,
|
||||
)
|
||||
|
||||
|
||||
def _resolve_c12_config(config: Config) -> C12Config:
|
||||
|
||||
Reference in New Issue
Block a user