[AZ-328] C12 BuildCacheOrchestrator + remote C10 invoker (Batch 43)

Implements F1 pre-flight cache build orchestrator on the operator
workstation. Composes C11 TileDownloader (AZ-316), C12 CompanionBringup
(AZ-327), C12 FlightsApiClient (AZ-489), and the new
RemoteCacheProvisionerInvoker into one sequenced flow guarded by a
filelock-backed workstation-side lockfile.

Architectural decisions:
- Phase-0 flight-resolve runs BEFORE the lockfile (ADR-010): a flight
  that cannot be resolved is an operator-input error, not a contended-
  resource error. Enforced by AC-11 + AC-14.
- Consumer-side cuts (AZ-507) for C11 + C10 types: local Protocols /
  mirror DTOs in tile_downloader_cut.py and _types.py; external errors
  matched by name-based whitelisting so unknown exceptions still
  propagate per AC-6. Cross-component type translation lives at the
  composition root (c12_factory).
- Failure surfacing: recognised operational failures (download error,
  companion not ready, build error, flight-resolve error) return as
  CacheBuildReport(outcome=failure, failure_phase=...). Only lockfile
  contention raises (BuildLockHeldError) since no phase ever ran.
- Workstation-side filelock library (project pin); no custom primitive.
- Remote C10 stdout streamed line-by-line as DEBUG with api_key /
  auth_token redacted before logging (defence-in-depth).
- CLI is now a thin adapter; all workflow logic lives in
  build_cache.py. operator-tool build-cache exit codes map per
  CacheBuildReport.failure_phase + failure_exception_type.

Tests: 116 c12 unit tests pass (29 new for AZ-328 covering 15/15 ACs +
NFR-perf-overhead microbench; 7 new for remote_c10_invoker; 3 new for
file_lock; test_cli_build_cache rewritten for new orchestrator
interface). Full repo suite: 1522 passed, 80 skipped.

Also: replays Batch 42's ruff format leftover for c12 flights_api +
test_az489 files (formatter ran over the c12 directory after new
files were added). Pure whitespace; no behaviour change.

Full report: _docs/03_implementation/batch_43_cycle1_report.md

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-05-13 11:03:46 +03:00
parent 099c75c6f8
commit 7644b25e8c
23 changed files with 3585 additions and 256 deletions
@@ -71,11 +71,13 @@ def _three_waypoint_payload(*, flight_id: UUID = FLIGHT_ID) -> dict[str, object]
"flight_id": str(flight_id),
"name": "derkachi-sweep",
"waypoints": [
_waypoint_payload(ordinal=0, lat_deg=50.0, lon_deg=36.2, alt_m=200.0,
objective="takeoff"),
_waypoint_payload(
ordinal=0, lat_deg=50.0, lon_deg=36.2, alt_m=200.0, objective="takeoff"
),
_waypoint_payload(ordinal=1, lat_deg=50.01, lon_deg=36.22, alt_m=210.0),
_waypoint_payload(ordinal=2, lat_deg=50.02, lon_deg=36.24, alt_m=220.0,
objective="landing"),
_waypoint_payload(
ordinal=2, lat_deg=50.02, lon_deg=36.24, alt_m=220.0, objective="landing"
),
],
}
@@ -130,9 +132,7 @@ def test_ac1_online_happy_path_returns_three_waypoint_flight(
_, buffer = capture_flights_api_logs
# Act
flight = client.fetch_flight(
flight_id=FLIGHT_ID, base_url=BASE_URL, auth_token=AUTH_TOKEN
)
flight = client.fetch_flight(flight_id=FLIGHT_ID, base_url=BASE_URL, auth_token=AUTH_TOKEN)
# Assert
assert isinstance(flight, FlightDto)
@@ -221,9 +221,7 @@ def test_ac4_online_503_then_200_retries_once_and_succeeds(
_, buffer = capture_flights_api_logs
# Act
flight = client.fetch_flight(
flight_id=FLIGHT_ID, base_url=BASE_URL, auth_token=AUTH_TOKEN
)
flight = client.fetch_flight(flight_id=FLIGHT_ID, base_url=BASE_URL, auth_token=AUTH_TOKEN)
# Assert
assert isinstance(flight, FlightDto)
@@ -478,9 +476,7 @@ def test_ac13_online_and_offline_produce_equal_dtos(tmp_path: Path) -> None:
flight_file.write_bytes(json.dumps(payload).encode())
# Act
online_dto = client.fetch_flight(
flight_id=FLIGHT_ID, base_url=BASE_URL, auth_token=AUTH_TOKEN
)
online_dto = client.fetch_flight(flight_id=FLIGHT_ID, base_url=BASE_URL, auth_token=AUTH_TOKEN)
offline_dto = load_flight_file(path=flight_file)
# Assert
@@ -505,9 +501,7 @@ def test_ac14_shuffled_ordinals_are_returned_in_sorted_order() -> None:
client, _ = _make_client_with_handler(handler)
# Act
flight = client.fetch_flight(
flight_id=FLIGHT_ID, base_url=BASE_URL, auth_token=AUTH_TOKEN
)
flight = client.fetch_flight(flight_id=FLIGHT_ID, base_url=BASE_URL, auth_token=AUTH_TOKEN)
# Assert
assert tuple(w.ordinal for w in flight.waypoints) == (0, 1, 2)
@@ -0,0 +1,968 @@
"""AZ-328 — ``BuildCacheOrchestrator`` AC-1 .. AC-15 + NFR-perf-overhead.
Every fake collaborator records call counts so the sequencing and
"never-called" assertions land. The fakes never spawn real network /
SSH activity; the integration paths (paramiko + httpx) are exercised
elsewhere by AZ-489's wire tests + AZ-327's smoke test.
"""
from __future__ import annotations
import logging
import time
from contextlib import AbstractContextManager
from dataclasses import dataclass, field
from pathlib import Path, PurePosixPath
from uuid import UUID
import pytest
from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt
from gps_denied_onboard.components.c12_operator_tooling import (
BuildCacheOrchestrator,
BuildCacheOutcome,
BuildCacheRequest,
BuildLockHeldError,
C12BuildCacheConfig,
C12CompanionConfig,
CacheBuildError,
CompanionAddress,
CompanionBringup,
CompanionUnreachableError,
CompanionUnreachableReason,
ContentHashMismatchError,
DownloadBatchReportCut,
DownloadOutcomeCut,
DownloadRequestCut,
EmptyWaypointsError,
FailurePhase,
FlightById,
FlightFromFile,
FlightNotFoundError,
FlightsApiUnreachableError,
HostKeyPolicy,
ReadinessOutcome,
ReadinessReport,
RemoteBuildOutcome,
RemoteBuildReport,
RemoteCacheProvisionerInvoker,
SectorClassification,
WaypointDto,
WaypointObjective,
WaypointSource,
)
from gps_denied_onboard.components.c12_operator_tooling.file_lock import LockTimeout
from gps_denied_onboard.components.c12_operator_tooling.flights_api.interface import (
FlightDto,
FlightsApiClient,
)
from gps_denied_onboard.components.c12_operator_tooling.remote_c10_invoker import (
RemoteBuildRequest,
)
from gps_denied_onboard.components.c12_operator_tooling.ssh_session import (
RemoteCommandResult,
SshSession,
SshSessionFactory,
)
from gps_denied_onboard.components.c12_operator_tooling.tile_downloader_cut import (
TileDownloaderCut,
)
# ---------------------------------------------------------------------------
# Constants + helpers
# ---------------------------------------------------------------------------
_FLIGHT_ID = UUID("12345678-1234-1234-1234-123456789012")
_API_KEY = "super-secret-api-key"
_AUTH_TOKEN = "bearer-xyz-token"
_SAT_URL = "https://satellite.example.com"
_COMPANION = CompanionAddress(host="companion.local", port=22)
def _flight() -> FlightDto:
return FlightDto(
flight_id=_FLIGHT_ID,
name="happy-path",
waypoints=(
WaypointDto(
ordinal=0,
lat_deg=50.0,
lon_deg=36.2,
alt_m=200.0,
objective=WaypointObjective.TAKEOFF,
source=WaypointSource.OPERATOR,
),
WaypointDto(
ordinal=1,
lat_deg=50.05,
lon_deg=36.25,
alt_m=210.0,
objective=WaypointObjective.WAYPOINT,
source=WaypointSource.OPERATOR,
),
WaypointDto(
ordinal=2,
lat_deg=50.0,
lon_deg=36.3,
alt_m=215.0,
objective=WaypointObjective.LANDING,
source=WaypointSource.OPERATOR,
),
),
)
def _bbox() -> BoundingBox:
return BoundingBox(
min_lat_deg=49.99,
min_lon_deg=36.19,
max_lat_deg=50.06,
max_lon_deg=36.31,
)
def _request(
*,
flight_source=None,
sector_class: SectorClassification = SectorClassification.STABLE_REAR,
cache_root: Path | None = None,
) -> BuildCacheRequest:
return BuildCacheRequest(
flight_source=flight_source or FlightById(flight_id=_FLIGHT_ID),
sector_class=sector_class,
calibration_path=Path("/tmp/calibration.json"),
satellite_provider_url=_SAT_URL,
api_key=_API_KEY,
companion_address=_COMPANION,
expected_engines=("dinov2_vpr", "alike"),
cache_root=cache_root or Path("/tmp/cache_root"),
zoom_levels=(18,),
)
# ---------------------------------------------------------------------------
# Fakes
# ---------------------------------------------------------------------------
@dataclass
class _FakeFlightsApiClient(FlightsApiClient):
flight: FlightDto | None = None
fetch_calls: int = 0
load_calls: int = 0
bbox_calls: int = 0
takeoff_calls: int = 0
fetch_raises: Exception | None = None
load_raises: Exception | None = None
bbox_raises: Exception | None = None
bbox_value: BoundingBox = field(default_factory=_bbox)
captured_auth_tokens: list[str] = field(default_factory=list)
def fetch_flight(
self, *, flight_id, base_url, auth_token, timeout_s: float = 10.0
) -> FlightDto:
self.fetch_calls += 1
self.captured_auth_tokens.append(auth_token)
if self.fetch_raises is not None:
raise self.fetch_raises
assert self.flight is not None
return self.flight
def load_flight_file(self, *, path: Path) -> FlightDto:
self.load_calls += 1
if self.load_raises is not None:
raise self.load_raises
assert self.flight is not None
return self.flight
def bbox_from_waypoints(self, waypoints, *, buffer_m: float = 1000.0) -> BoundingBox:
self.bbox_calls += 1
if self.bbox_raises is not None:
raise self.bbox_raises
return self.bbox_value
def takeoff_origin_from_flight(self, flight: FlightDto) -> LatLonAlt:
self.takeoff_calls += 1
first = flight.waypoints[0]
return LatLonAlt(lat_deg=first.lat_deg, lon_deg=first.lon_deg, alt_m=first.alt_m)
@dataclass
class _FakeTileDownloader(TileDownloaderCut):
raises: Exception | None = None
report: DownloadBatchReportCut | None = None
calls: int = 0
captured_request: DownloadRequestCut | None = None
def download_tiles_for_area(self, request: DownloadRequestCut) -> DownloadBatchReportCut:
self.calls += 1
self.captured_request = request
if self.raises is not None:
raise self.raises
assert self.report is not None
return self.report
class _FakeSession(SshSession):
def __init__(self) -> None:
self.close_calls = 0
def run(self, command: str, *, timeout_s: float) -> RemoteCommandResult:
return RemoteCommandResult(exit_code=0, stdout="{}", stderr="")
def file_exists(self, remote_path: PurePosixPath) -> bool:
return False
def list_dir(self, remote_path: PurePosixPath) -> list[str]:
return []
def close(self) -> None:
self.close_calls += 1
@dataclass
class _FakeSshFactory(SshSessionFactory):
session: _FakeSession | None = None
open_calls: int = 0
open_raises: Exception | None = None
def open(self, address: CompanionAddress, *, timeout_s: float) -> SshSession:
self.open_calls += 1
if self.open_raises is not None:
raise self.open_raises
if self.session is None:
self.session = _FakeSession()
return self.session
@dataclass
class _FakeBringup:
"""Stand-in for :class:`CompanionBringup` (typed via duck-typing)."""
readiness: ReadinessReport | None = None
raises: Exception | None = None
calls: int = 0
def verify_companion_ready(self, address: CompanionAddress) -> ReadinessReport:
self.calls += 1
if self.raises is not None:
raise self.raises
if self.readiness is not None:
return self.readiness
return ReadinessReport(
manifest_present=True,
content_hashes_pass=True,
engines_present=True,
calibration_present=True,
outcome=ReadinessOutcome.READY,
not_ready_reasons=(),
companion_cache_root="/var/lib/azaion/c10/cache",
engines_inspected_count=2,
)
@dataclass
class _FakeRemoteInvoker:
"""Stand-in for :class:`RemoteCacheProvisionerInvoker`."""
report: RemoteBuildReport | None = None
raises: Exception | None = None
calls: int = 0
captured_request: RemoteBuildRequest | None = None
captured_secrets: tuple[str, ...] = ()
def invoke(
self,
session: SshSession,
request: RemoteBuildRequest,
*,
secrets_to_redact=(),
) -> RemoteBuildReport:
self.calls += 1
self.captured_request = request
self.captured_secrets = tuple(secrets_to_redact)
if self.raises is not None:
raise self.raises
if self.report is None:
return RemoteBuildReport(
outcome=RemoteBuildOutcome.SUCCESS,
engines_built=2,
engines_reused=0,
descriptors_generated=128,
manifest_hash="abc123",
failure_reason=None,
elapsed_s=5.5,
)
return self.report
class _FakeFileLockHandle(AbstractContextManager[None]):
def __init__(self, factory: _FakeLockFactory) -> None:
self._factory = factory
def __enter__(self) -> None:
return None
def __exit__(self, exc_type, exc, tb) -> None:
self._factory.exit_calls += 1
@dataclass
class _FakeLockFactory:
"""Records lock-acquire / release calls; can simulate timeout."""
raise_timeout: bool = False
acquire_calls: int = 0
exit_calls: int = 0
captured_paths: list[Path] = field(default_factory=list)
def try_lock(self, path: Path, *, timeout_s: float) -> AbstractContextManager[None]:
self.acquire_calls += 1
self.captured_paths.append(path)
if self.raise_timeout:
raise LockTimeout(path=path, timeout_s=timeout_s)
return _FakeFileLockHandle(self)
class _FakeClock:
def __init__(self) -> None:
self._t = 0
def monotonic_ns(self) -> int:
self._t += 1_000_000 # 1 ms tick per call
return self._t
def time_ns(self) -> int:
return self._t
def sleep_until_ns(self, deadline_ns: int) -> None: # pragma: no cover
return None
@dataclass
class _Fakes:
flights: _FakeFlightsApiClient
downloader: _FakeTileDownloader
bringup: _FakeBringup
invoker: _FakeRemoteInvoker
ssh_factory: _FakeSshFactory
lock_factory: _FakeLockFactory
logger: logging.Logger
log_records: list[logging.LogRecord]
@pytest.fixture
def fakes(tmp_path: Path) -> _Fakes:
flights = _FakeFlightsApiClient(flight=_flight())
downloader = _FakeTileDownloader(
report=DownloadBatchReportCut(
outcome=DownloadOutcomeCut.SUCCESS,
tiles_requested=12,
tiles_downloaded=12,
)
)
bringup = _FakeBringup()
invoker = _FakeRemoteInvoker()
ssh_factory = _FakeSshFactory()
lock_factory = _FakeLockFactory()
logger = logging.getLogger(f"test_build_cache_{tmp_path.name}")
logger.handlers.clear()
logger.propagate = False
log_records: list[logging.LogRecord] = []
class _Handler(logging.Handler):
def emit(self, record: logging.LogRecord) -> None:
log_records.append(record)
handler = _Handler(level=logging.DEBUG)
handler.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
return _Fakes(
flights=flights,
downloader=downloader,
bringup=bringup,
invoker=invoker,
ssh_factory=ssh_factory,
lock_factory=lock_factory,
logger=logger,
log_records=log_records,
)
@pytest.fixture
def config(tmp_path: Path) -> C12BuildCacheConfig:
return C12BuildCacheConfig(
cache_staging_root=tmp_path / "staging",
lock_timeout_s=0.5,
ssh_connect_timeout_s=2.0,
flights_api_base_url="https://flights.example.com",
flights_api_auth_token=_AUTH_TOKEN,
zoom_levels=(18,),
)
def _orchestrator(fakes: _Fakes, config: C12BuildCacheConfig) -> BuildCacheOrchestrator:
return BuildCacheOrchestrator(
flights_api_client=fakes.flights,
tile_downloader=fakes.downloader,
companion_bringup=fakes.bringup, # type: ignore[arg-type]
remote_c10_invoker=fakes.invoker, # type: ignore[arg-type]
ssh_factory=fakes.ssh_factory,
lock_factory=fakes.lock_factory,
logger=fakes.logger,
clock=_FakeClock(),
config=config,
)
def _kinds(fakes: _Fakes) -> list[str]:
return [r.__dict__.get("kind") for r in fakes.log_records]
def _has_substring_in_any_log(fakes: _Fakes, needle: str) -> bool:
for record in fakes.log_records:
if needle in record.getMessage():
return True
for value in record.__dict__.values():
if isinstance(value, str) and needle in value:
return True
if isinstance(value, dict):
for v in value.values():
if isinstance(v, str) and needle in v:
return True
return False
# ---------------------------------------------------------------------------
# AC-1 — Happy path
# ---------------------------------------------------------------------------
class TestAc1HappyPath:
def test_full_pipeline_returns_success(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.SUCCESS
assert report.failure_phase is FailurePhase.NONE
assert report.flight_resolve_report is not None
assert report.download_report is not None
assert report.build_report is not None
# Sequencing — every fake hit exactly once in the right order.
assert fakes.flights.fetch_calls == 1
assert fakes.flights.bbox_calls == 1
assert fakes.flights.takeoff_calls == 1
assert fakes.lock_factory.acquire_calls == 1
assert fakes.downloader.calls == 1
assert fakes.bringup.calls == 1
assert fakes.ssh_factory.open_calls == 1
assert fakes.invoker.calls == 1
assert fakes.lock_factory.exit_calls == 1
def test_emits_three_required_info_logs(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
orchestrator = _orchestrator(fakes, config)
orchestrator.build_cache(_request())
kinds = _kinds(fakes)
assert kinds.count("c12.build_cache.flight_resolve.start") == 1
assert kinds.count("c12.build_cache.start") == 1
assert kinds.count("c12.build_cache.success") == 1
# ---------------------------------------------------------------------------
# AC-2 — Download failure aborts before C10
# ---------------------------------------------------------------------------
class SatelliteProviderError(Exception):
"""In-test stand-in for c11's SatelliteProviderError (recognised by name)."""
class TestAc2DownloadFailureAborts:
def test_returns_failure_report_and_skips_downstream(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.downloader.raises = SatelliteProviderError("503 Service Unavailable")
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.FAILURE
assert report.failure_phase is FailurePhase.DOWNLOAD
assert report.download_report is None
assert report.build_report is None
assert "503" in (report.failure_reason or "")
assert fakes.bringup.calls == 0
assert fakes.invoker.calls == 0
assert fakes.lock_factory.exit_calls == 1
assert "c12.build_cache.download.failed" in _kinds(fakes)
# ---------------------------------------------------------------------------
# AC-3 — Verify-ready failure aborts before C10
# ---------------------------------------------------------------------------
class TestAc3VerifyReadyFailureAborts:
def test_not_ready_returns_failure_and_skips_invoker(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.bringup.readiness = ReadinessReport(
manifest_present=False,
content_hashes_pass=False,
engines_present=False,
calibration_present=False,
outcome=ReadinessOutcome.NOT_READY,
not_ready_reasons=("manifest missing",),
companion_cache_root="/var/lib/azaion/c10/cache",
engines_inspected_count=0,
)
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.FAILURE
assert report.failure_phase is FailurePhase.DOWNLOAD
assert "manifest missing" in (report.failure_reason or "")
assert fakes.invoker.calls == 0
assert fakes.lock_factory.exit_calls == 1
assert "c12.build_cache.companion.not_ready" in _kinds(fakes)
# ---------------------------------------------------------------------------
# AC-4 — Build failure surfaces failure_phase=build
# ---------------------------------------------------------------------------
class EngineBuildError(Exception):
"""In-test stand-in for c10's EngineBuildError (recognised by name)."""
class TestAc4BuildFailure:
def test_invoker_raises_recognised_error_returns_failure(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.invoker.raises = EngineBuildError("CUDA OOM on backbone dinov2_vpr")
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.FAILURE
assert report.failure_phase is FailurePhase.BUILD
assert report.download_report is not None
assert report.build_report is None
assert "CUDA OOM" in (report.failure_reason or "")
assert fakes.lock_factory.exit_calls == 1
assert "c12.build_cache.build.failed" in _kinds(fakes)
def test_cache_build_error_remediation_mentions_cleanup(self) -> None:
err = CacheBuildError(
failure_phase=FailurePhase.BUILD,
wrapped_exception_repr="EngineBuildError(...)",
)
assert "cleanup" in err.remediation.lower() or "rm -rf" in err.remediation
# ---------------------------------------------------------------------------
# AC-5 — Lockfile prevents concurrent runs
# ---------------------------------------------------------------------------
class TestAc5LockHeld:
def test_timeout_raises_build_lock_held_error(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.lock_factory.raise_timeout = True
orchestrator = _orchestrator(fakes, config)
with pytest.raises(BuildLockHeldError) as exc_info:
orchestrator.build_cache(_request())
assert exc_info.value.failure_phase is FailurePhase.DOWNLOAD
assert fakes.downloader.calls == 0
assert fakes.bringup.calls == 0
assert fakes.invoker.calls == 0
assert "c12.build_cache.lock.held" in _kinds(fakes)
# ---------------------------------------------------------------------------
# AC-6 — Lockfile released even on unexpected exception
# ---------------------------------------------------------------------------
class TestAc6LockReleasedOnException:
def test_runtime_error_propagates_lock_released(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
# RuntimeError is NOT in any phase's recognised-name set, so it
# propagates per AC-6.
fakes.downloader.raises = RuntimeError("unexpected")
orchestrator = _orchestrator(fakes, config)
with pytest.raises(RuntimeError):
orchestrator.build_cache(_request())
assert fakes.lock_factory.exit_calls == 1
# ---------------------------------------------------------------------------
# AC-7 — Idempotent no-op surfaces correctly
# ---------------------------------------------------------------------------
class TestAc7IdempotentNoOp:
def test_idempotent_outcome_is_returned(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.invoker.report = RemoteBuildReport(
outcome=RemoteBuildOutcome.IDEMPOTENT_NO_OP,
engines_built=0,
engines_reused=2,
descriptors_generated=0,
manifest_hash="cached-hash",
failure_reason=None,
elapsed_s=0.1,
)
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.IDEMPOTENT_NO_OP
assert report.failure_phase is FailurePhase.NONE
assert report.failure_reason is None
assert "c12.build_cache.idempotent" in _kinds(fakes)
# ---------------------------------------------------------------------------
# AC-8 — remediation text per failure_phase
# ---------------------------------------------------------------------------
class TestAc8RemediationTextPerPhase:
def test_download_remediation_mentions_re_run(self) -> None:
err = CacheBuildError(failure_phase=FailurePhase.DOWNLOAD, wrapped_exception_repr="...")
assert "Re-run" in err.remediation
def test_build_remediation_mentions_cleanup(self) -> None:
err = CacheBuildError(failure_phase=FailurePhase.BUILD, wrapped_exception_repr="...")
assert "rm -rf" in err.remediation or "cleanup" in err.remediation.lower()
def test_lock_held_remediation_mentions_lock_path(self) -> None:
err = BuildLockHeldError(lock_path=Path("/tmp/.c12.lock"), timeout_s=5.0)
assert "/tmp/.c12.lock" in err.remediation
# ---------------------------------------------------------------------------
# AC-9 — api_key never leaks into log output
# ---------------------------------------------------------------------------
class TestAc9ApiKeyRedaction:
def test_no_log_record_contains_api_key(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
orchestrator = _orchestrator(fakes, config)
orchestrator.build_cache(_request())
assert not _has_substring_in_any_log(fakes, _API_KEY)
def test_secrets_forwarded_to_invoker_for_redaction(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
orchestrator = _orchestrator(fakes, config)
orchestrator.build_cache(_request())
assert _API_KEY in fakes.invoker.captured_secrets
assert _AUTH_TOKEN in fakes.invoker.captured_secrets
# ---------------------------------------------------------------------------
# AC-10 — Aggregated CacheBuildReport carries all sub-reports on success
# ---------------------------------------------------------------------------
class TestAc10AggregatedReport:
def test_success_report_carries_all_fields(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
fr = report.flight_resolve_report
assert fr is not None
assert fr.flight_id == _FLIGHT_ID
assert fr.waypoint_count == 3
assert fr.bbox.min_lat_deg < fr.bbox.max_lat_deg
assert fr.takeoff_origin.lat_deg == 50.0
assert fr.raw_flight_dto is not None
dr = report.download_report
assert dr is not None
assert dr.tiles_downloaded == 12
br = report.build_report
assert br is not None
assert br.engines_built == 2
assert report.wall_clock_s > 0
# ---------------------------------------------------------------------------
# AC-11 — Flight-resolve failure aborts BEFORE the lockfile
# ---------------------------------------------------------------------------
class TestAc11FlightResolveBeforeLock:
def test_flight_not_found_skips_lock_and_downstream(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.flights.fetch_raises = FlightNotFoundError(f"flight not found: {_FLIGHT_ID}")
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.FAILURE
assert report.failure_phase is FailurePhase.FLIGHT_RESOLVE
assert report.flight_resolve_report is None
assert fakes.lock_factory.acquire_calls == 0
assert fakes.downloader.calls == 0
assert fakes.bringup.calls == 0
assert fakes.invoker.calls == 0
assert "c12.build_cache.flight_resolve.failed" in _kinds(fakes)
def test_flights_api_unreachable_also_aborts_pre_lock(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.flights.fetch_raises = FlightsApiUnreachableError("service unavailable")
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.failure_phase is FailurePhase.FLIGHT_RESOLVE
assert fakes.lock_factory.acquire_calls == 0
# ---------------------------------------------------------------------------
# AC-12 — Offline FlightFromFile path
# ---------------------------------------------------------------------------
class TestAc12FlightFromFile:
def test_load_flight_file_called_when_source_is_file(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
orchestrator = _orchestrator(fakes, config)
request = _request(flight_source=FlightFromFile(path=Path("/tmp/flight.json")))
report = orchestrator.build_cache(request)
assert report.outcome is BuildCacheOutcome.SUCCESS
assert fakes.flights.load_calls == 1
assert fakes.flights.fetch_calls == 0
# ---------------------------------------------------------------------------
# AC-13 — takeoff_origin + flight_id forwarded to invoker
# ---------------------------------------------------------------------------
class TestAc13TakeoffOriginForwarded:
def test_invoker_received_takeoff_origin_and_flight_id(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
orchestrator = _orchestrator(fakes, config)
orchestrator.build_cache(_request())
captured = fakes.invoker.captured_request
assert captured is not None
assert captured.takeoff_origin == LatLonAlt(lat_deg=50.0, lon_deg=36.2, alt_m=200.0)
assert captured.flight_id == _FLIGHT_ID
# ---------------------------------------------------------------------------
# AC-14 — EmptyWaypointsError surfaces with failure_phase=flight_resolve
# ---------------------------------------------------------------------------
class TestAc14EmptyWaypoints:
def test_empty_waypoints_aborts_pre_lock(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.flights.bbox_raises = EmptyWaypointsError("no waypoints in flight")
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.FAILURE
assert report.failure_phase is FailurePhase.FLIGHT_RESOLVE
assert "empty waypoints" in (report.failure_reason or "")
assert fakes.lock_factory.acquire_calls == 0
# ---------------------------------------------------------------------------
# AC-15 — auth_token never leaks into log output (Phase 0)
# ---------------------------------------------------------------------------
class TestAc15AuthTokenRedaction:
def test_no_log_record_contains_auth_token(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
orchestrator = _orchestrator(fakes, config)
orchestrator.build_cache(_request())
assert not _has_substring_in_any_log(fakes, _AUTH_TOKEN)
def test_auth_token_passed_to_fetch_flight(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
# Sanity check — the token IS forwarded, just not logged.
orchestrator = _orchestrator(fakes, config)
orchestrator.build_cache(_request())
assert fakes.flights.captured_auth_tokens == [_AUTH_TOKEN]
# ---------------------------------------------------------------------------
# Verify-ready typed exception path (CompanionUnreachableError catch)
# ---------------------------------------------------------------------------
class TestVerifyReadyTypedExceptions:
def test_companion_unreachable_returns_failure_phase_download(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.bringup.raises = CompanionUnreachableError(
host="companion.local",
port=22,
reason=CompanionUnreachableReason.CONNECT_REFUSED,
underlying_exception_repr="ECONNREFUSED",
)
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.FAILURE
assert report.failure_phase is FailurePhase.DOWNLOAD
assert fakes.invoker.calls == 0
def test_content_hash_mismatch_returns_failure_phase_download(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.bringup.raises = ContentHashMismatchError(
engine_path="/var/lib/azaion/c10/cache/engines/dinov2_vpr.engine",
expected_sha256_hex="a" * 64,
actual_sha256_hex="b" * 64,
)
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.failure_phase is FailurePhase.DOWNLOAD
assert fakes.invoker.calls == 0
# ---------------------------------------------------------------------------
# Download report.outcome=FAILURE → CacheBuildReport(failure_phase=download)
# ---------------------------------------------------------------------------
class TestDownloadReportOutcomeFailure:
def test_outcome_failure_in_download_report_returns_failure(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.downloader.report = DownloadBatchReportCut(
outcome=DownloadOutcomeCut.FAILURE,
tiles_requested=12,
tiles_downloaded=0,
failure_reason="rate limit budget exceeded",
)
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.FAILURE
assert report.failure_phase is FailurePhase.DOWNLOAD
assert report.failure_reason == "rate limit budget exceeded"
assert fakes.invoker.calls == 0
# ---------------------------------------------------------------------------
# Build report.outcome=FAILURE → CacheBuildReport(failure_phase=build)
# ---------------------------------------------------------------------------
class TestBuildReportOutcomeFailure:
def test_build_outcome_failure_in_report(
self, fakes: _Fakes, config: C12BuildCacheConfig
) -> None:
fakes.invoker.report = RemoteBuildReport(
outcome=RemoteBuildOutcome.FAILURE,
engines_built=1,
engines_reused=0,
descriptors_generated=0,
manifest_hash=None,
failure_reason="empty C6 corpus",
elapsed_s=2.0,
)
orchestrator = _orchestrator(fakes, config)
report = orchestrator.build_cache(_request())
assert report.outcome is BuildCacheOutcome.FAILURE
assert report.failure_phase is FailurePhase.BUILD
assert report.failure_reason == "empty C6 corpus"
assert report.build_report is not None # the failed report IS captured
# ---------------------------------------------------------------------------
# NFR-perf-overhead — orchestrator-only path, all-fake collaborators x 100
# ---------------------------------------------------------------------------
class TestNfrPerfOverhead:
def test_microbench_p99_under_50ms(self, fakes: _Fakes, config: C12BuildCacheConfig) -> None:
# Use real wall clock (not _FakeClock — it would skew elapsed_s
# but the test measures wall time, not orchestrator-reported s).
from gps_denied_onboard.clock import wall_clock as _wc
orchestrator = BuildCacheOrchestrator(
flights_api_client=fakes.flights,
tile_downloader=fakes.downloader,
companion_bringup=fakes.bringup, # type: ignore[arg-type]
remote_c10_invoker=fakes.invoker, # type: ignore[arg-type]
ssh_factory=fakes.ssh_factory,
lock_factory=fakes.lock_factory,
logger=fakes.logger,
clock=_wc.WallClock(),
config=config,
)
# Warm-up.
orchestrator.build_cache(_request())
durations_ms: list[float] = []
for _ in range(100):
start = time.perf_counter()
orchestrator.build_cache(_request())
durations_ms.append((time.perf_counter() - start) * 1000)
durations_ms.sort()
p99 = durations_ms[int(0.99 * len(durations_ms)) - 1]
assert p99 < 50.0, f"NFR-perf-overhead p99={p99:.2f} ms exceeded 50 ms budget"
# ---------------------------------------------------------------------------
# Composition-root smoke — services dataclass plumbs build_cache_orchestrator
# ---------------------------------------------------------------------------
class TestCompositionRootSmoke:
def test_companion_bringup_real_class_attaches(self, tmp_path: Path) -> None:
# Reasonable smoke: real CompanionBringup with a fake SSH factory
# constructs without raising; the orchestrator pulls the same
# instance via the services dataclass.
from gps_denied_onboard.components.c12_operator_tooling.remote_sidecar_verifier import (
RemoteSidecarVerifier,
)
fake_factory = _FakeSshFactory()
bringup = CompanionBringup(
ssh_factory=fake_factory,
sidecar_verifier=RemoteSidecarVerifier(timeout_s=5.0),
logger=logging.getLogger("test_smoke"),
config=C12CompanionConfig(
ssh_keyfile=Path(tmp_path / "key"),
host_key_policy=HostKeyPolicy.STRICT,
),
)
assert bringup is not None
# Real RemoteCacheProvisionerInvoker constructs cleanly too.
invoker = RemoteCacheProvisionerInvoker(logger=logging.getLogger("test"))
assert invoker is not None
@@ -1,8 +1,20 @@
"""AZ-326 — `build-cache` happy + unhappy paths (AC-11 .. AC-17, AC-3 mapping)."""
"""AZ-326 + AZ-328 — `build-cache` CLI happy + unhappy paths.
After AZ-328 the CLI no longer resolves the flight itself — it builds
a :class:`BuildCacheRequest` and hands it to the
:class:`BuildCacheOrchestrator` injected via the services dataclass.
The flight resolve happens inside the orchestrator.
The flag-mapping ACs from AZ-326 (AC-11 .. AC-17) are still enforced
here: the test fakes assert that the orchestrator received the right
request shape, and that ``CacheBuildReport.failure_exception_type``
fields drive the documented exit-code mapping.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from pathlib import Path
from types import SimpleNamespace
from typing import Any
@@ -12,99 +24,58 @@ import pytest
from click.testing import CliRunner
from gps_denied_onboard.components.c12_operator_tooling import (
EXIT_BUILD_FAILURE,
EXIT_DOWNLOAD_FAILURE,
EXIT_EMPTY_WAYPOINTS,
EXIT_FLIGHT_NOT_FOUND,
EXIT_FLIGHTS_API_AUTH,
EXIT_LOCK_HELD,
EXIT_OK,
EXIT_USAGE,
BuildCacheOutcome,
BuildCacheRequest,
BuildLockHeldError,
C12Config,
EmptyWaypointsError,
FlightDto,
FlightNotFoundError,
FlightsApiAuthError,
CacheBuildReport,
FailurePhase,
FlightById,
FlightFromFile,
SectorClassification,
WaypointDto,
WaypointObjective,
WaypointSource,
)
from gps_denied_onboard.components.c12_operator_tooling.cli import app
_FLIGHT_ID = UUID("00000000-0000-0000-0000-000000000001")
_API_KEY = "super-secret-api-key"
_SAT_URL = "https://satellite.example.com"
def _three_waypoint_flight() -> FlightDto:
return FlightDto(
flight_id=_FLIGHT_ID,
name="test-flight",
waypoints=tuple(
WaypointDto(
ordinal=i,
lat_deg=50.0 + i * 0.01,
lon_deg=36.0 + i * 0.01,
alt_m=100.0,
objective=(WaypointObjective.TAKEOFF if i == 0 else WaypointObjective.WAYPOINT),
source=WaypointSource.OPERATOR,
)
for i in range(3)
),
)
class _FakeFlightsApiClient:
"""Records `fetch_flight` / `load_flight_file` invocations."""
def __init__(
self,
*,
fetch_returns: FlightDto | None = None,
fetch_raises: Exception | None = None,
load_returns: FlightDto | None = None,
) -> None:
self._fetch_returns = fetch_returns
self._fetch_raises = fetch_raises
self._load_returns = load_returns
self.fetch_calls: list[dict[str, Any]] = []
self.load_calls: list[Path] = []
def fetch_flight(
self,
*,
flight_id: UUID,
base_url: str,
auth_token: str,
timeout_s: float = 10.0,
) -> FlightDto:
self.fetch_calls.append(
{"flight_id": flight_id, "base_url": base_url, "auth_token": auth_token}
)
if self._fetch_raises is not None:
raise self._fetch_raises
assert self._fetch_returns is not None
return self._fetch_returns
def load_flight_file(self, *, path: Path) -> FlightDto:
self.load_calls.append(path)
assert self._load_returns is not None
return self._load_returns
@dataclass
class _FakeOrchestrator:
def __init__(self) -> None:
self.calls: list[dict[str, Any]] = []
"""Records the :class:`BuildCacheRequest` and returns a scripted report."""
def build_cache(self, **kwargs: Any) -> None:
self.calls.append(kwargs)
return_report: CacheBuildReport | None = None
raise_on_call: Exception | None = None
captured: list[BuildCacheRequest] = field(default_factory=list)
def build_cache(self, request: BuildCacheRequest) -> CacheBuildReport:
self.captured.append(request)
if self.raise_on_call is not None:
raise self.raise_on_call
if self.return_report is not None:
return self.return_report
return CacheBuildReport(
outcome=BuildCacheOutcome.SUCCESS,
failure_phase=FailurePhase.NONE,
flight_resolve_report=None,
download_report=None,
build_report=None,
failure_reason=None,
wall_clock_s=0.1,
)
def _make_services(
*,
flights_client: _FakeFlightsApiClient,
orchestrator: _FakeOrchestrator | None = None,
) -> SimpleNamespace:
def _make_services(orchestrator: _FakeOrchestrator | None = None) -> SimpleNamespace:
return SimpleNamespace(
flights_api_client=flights_client,
flights_api_base_url="https://flights.test",
flights_api_auth_token="redacted-token",
build_cache_orchestrator=orchestrator or _FakeOrchestrator(),
)
@@ -116,12 +87,6 @@ def _invoke(
services: SimpleNamespace | None,
config: C12Config,
) -> Any:
"""Run ``operator-tool`` with a per-test ``services`` collaborator injected.
The CLI's top-level callback honours pre-populated ``ctx.obj`` dicts
of the form ``{"config": ..., "logger": ..., "services": ...}`` —
we build that dict here and pass it as ``obj=`` to ``CliRunner.invoke``.
"""
logger = logging.getLogger("test.c12.cli.build_cache")
logger.handlers.clear()
logger.addHandler(logging.NullHandler())
@@ -153,20 +118,31 @@ def calibration_path(tmp_path: Path) -> Path:
return p
class TestFlightIdHappyPath:
"""AC-11 — `--flight-id` resolves via fetch_flight and forwards FlightDto."""
def _required_args(calibration_path: Path) -> list[str]:
return [
"--calibration-path",
str(calibration_path),
"--companion-host",
"companion.local",
"--satellite-provider-url",
_SAT_URL,
"--api-key",
_API_KEY,
]
def test_orchestrator_called_with_resolved_dto(
class TestFlightIdHappyPath:
"""AC-11 — `--flight-id` builds a BuildCacheRequest with a `FlightById` source."""
def test_orchestrator_called_with_flight_by_id(
self,
runner: CliRunner,
base_config: C12Config,
calibration_path: Path,
) -> None:
# Arrange
flight = _three_waypoint_flight()
client = _FakeFlightsApiClient(fetch_returns=flight)
orchestrator = _FakeOrchestrator()
services = _make_services(flights_client=client, orchestrator=orchestrator)
services = _make_services(orchestrator)
# Act
result = _invoke(
@@ -177,8 +153,7 @@ class TestFlightIdHappyPath:
str(_FLIGHT_ID),
"--sector-class",
"stable_rear",
"--calibration-path",
str(calibration_path),
*_required_args(calibration_path),
],
services=services,
config=base_config,
@@ -186,21 +161,20 @@ class TestFlightIdHappyPath:
# Assert
assert result.exit_code == EXIT_OK, result.output
assert len(client.fetch_calls) == 1
assert client.fetch_calls[0]["flight_id"] == _FLIGHT_ID
assert len(client.load_calls) == 0
assert len(orchestrator.calls) == 1
call = orchestrator.calls[0]
assert call["flight"] is flight
assert call["sector_class"] is SectorClassification.STABLE_REAR
assert call["freshness_months"] == 12 # AC-NEW-6 stable_rear default
assert call["calibration_path"] == calibration_path
assert len(orchestrator.captured) == 1
request = orchestrator.captured[0]
assert isinstance(request.flight_source, FlightById)
assert request.flight_source.flight_id == _FLIGHT_ID
assert request.sector_class is SectorClassification.STABLE_REAR
assert request.calibration_path == calibration_path
assert request.companion_address.host == "companion.local"
assert request.satellite_provider_url == _SAT_URL
class TestFlightFileHappyPath:
"""AC-12 — `--flight-file` uses the offline loader; no fetch."""
"""AC-12 — `--flight-file` builds a BuildCacheRequest with a `FlightFromFile` source."""
def test_load_file_called_fetch_not_called(
def test_request_has_flight_from_file_source(
self,
runner: CliRunner,
base_config: C12Config,
@@ -210,10 +184,8 @@ class TestFlightFileHappyPath:
# Arrange
flight_file = tmp_path / "flight.json"
flight_file.write_text("{}", encoding="utf-8")
flight = _three_waypoint_flight()
client = _FakeFlightsApiClient(load_returns=flight)
orchestrator = _FakeOrchestrator()
services = _make_services(flights_client=client, orchestrator=orchestrator)
services = _make_services(orchestrator)
# Act
result = _invoke(
@@ -224,8 +196,7 @@ class TestFlightFileHappyPath:
str(flight_file),
"--sector-class",
"active_conflict",
"--calibration-path",
str(calibration_path),
*_required_args(calibration_path),
],
services=services,
config=base_config,
@@ -233,10 +204,10 @@ class TestFlightFileHappyPath:
# Assert
assert result.exit_code == EXIT_OK, result.output
assert len(client.load_calls) == 1
assert client.load_calls[0] == flight_file
assert len(client.fetch_calls) == 0
assert orchestrator.calls[0]["freshness_months"] == 1 # active_conflict
request = orchestrator.captured[0]
assert isinstance(request.flight_source, FlightFromFile)
assert request.flight_source.path == flight_file
assert request.sector_class is SectorClassification.ACTIVE_CONFLICT
class TestMutuallyExclusiveFlags:
@@ -252,8 +223,8 @@ class TestMutuallyExclusiveFlags:
# Arrange
flight_file = tmp_path / "flight.json"
flight_file.write_text("{}", encoding="utf-8")
client = _FakeFlightsApiClient()
services = _make_services(flights_client=client)
orchestrator = _FakeOrchestrator()
services = _make_services(orchestrator)
# Act
result = _invoke(
@@ -266,8 +237,7 @@ class TestMutuallyExclusiveFlags:
str(flight_file),
"--sector-class",
"stable_rear",
"--calibration-path",
str(calibration_path),
*_required_args(calibration_path),
],
services=services,
config=base_config,
@@ -275,8 +245,7 @@ class TestMutuallyExclusiveFlags:
# Assert
assert result.exit_code == EXIT_USAGE
assert len(client.fetch_calls) == 0
assert len(client.load_calls) == 0
assert len(orchestrator.captured) == 0
def test_neither_flag_set(
self,
@@ -285,8 +254,8 @@ class TestMutuallyExclusiveFlags:
calibration_path: Path,
) -> None:
# Arrange
client = _FakeFlightsApiClient()
services = _make_services(flights_client=client)
orchestrator = _FakeOrchestrator()
services = _make_services(orchestrator)
# Act
result = _invoke(
@@ -295,8 +264,7 @@ class TestMutuallyExclusiveFlags:
"build-cache",
"--sector-class",
"stable_rear",
"--calibration-path",
str(calibration_path),
*_required_args(calibration_path),
],
services=services,
config=base_config,
@@ -304,11 +272,23 @@ class TestMutuallyExclusiveFlags:
# Assert
assert result.exit_code == EXIT_USAGE
assert len(client.fetch_calls) == 0
assert len(orchestrator.captured) == 0
class TestFlightsApiErrorMapping:
"""AC-15, AC-16, AC-17 + AC-3 — error → exit code; auth_token never logged."""
"""AC-15, AC-16, AC-17 — failure_exception_type drives granular exit code."""
def _failure_report(self, exception_name: str) -> CacheBuildReport:
return CacheBuildReport(
outcome=BuildCacheOutcome.FAILURE,
failure_phase=FailurePhase.FLIGHT_RESOLVE,
flight_resolve_report=None,
download_report=None,
build_report=None,
failure_reason=f"{exception_name}: simulated",
wall_clock_s=0.0,
failure_exception_type=exception_name,
)
def test_flight_not_found_maps_to_exit_62(
self,
@@ -317,8 +297,8 @@ class TestFlightsApiErrorMapping:
calibration_path: Path,
) -> None:
# Arrange
client = _FakeFlightsApiClient(fetch_raises=FlightNotFoundError("not found"))
services = _make_services(flights_client=client)
orchestrator = _FakeOrchestrator(return_report=self._failure_report("FlightNotFoundError"))
services = _make_services(orchestrator)
# Act
result = _invoke(
@@ -329,15 +309,14 @@ class TestFlightsApiErrorMapping:
str(_FLIGHT_ID),
"--sector-class",
"stable_rear",
"--calibration-path",
str(calibration_path),
*_required_args(calibration_path),
],
services=services,
config=base_config,
)
# Assert
assert result.exit_code == EXIT_FLIGHT_NOT_FOUND
assert result.exit_code == EXIT_FLIGHT_NOT_FOUND, result.output
def test_auth_failure_maps_to_exit_61_and_no_token_in_log(
self,
@@ -346,8 +325,8 @@ class TestFlightsApiErrorMapping:
calibration_path: Path,
) -> None:
# Arrange
client = _FakeFlightsApiClient(fetch_raises=FlightsApiAuthError("denied"))
services = _make_services(flights_client=client)
orchestrator = _FakeOrchestrator(return_report=self._failure_report("FlightsApiAuthError"))
services = _make_services(orchestrator)
# Act
result = _invoke(
@@ -358,8 +337,7 @@ class TestFlightsApiErrorMapping:
str(_FLIGHT_ID),
"--sector-class",
"stable_rear",
"--calibration-path",
str(calibration_path),
*_required_args(calibration_path),
],
services=services,
config=base_config,
@@ -369,7 +347,7 @@ class TestFlightsApiErrorMapping:
assert result.exit_code == EXIT_FLIGHTS_API_AUTH
if base_config.log_path.exists():
log_text = base_config.log_path.read_text(encoding="utf-8")
assert "redacted-token" not in log_text
assert _API_KEY not in log_text
def test_empty_waypoints_maps_to_exit_64(
self,
@@ -378,8 +356,8 @@ class TestFlightsApiErrorMapping:
calibration_path: Path,
) -> None:
# Arrange
client = _FakeFlightsApiClient(fetch_raises=EmptyWaypointsError("zero"))
services = _make_services(flights_client=client)
orchestrator = _FakeOrchestrator(return_report=self._failure_report("EmptyWaypointsError"))
services = _make_services(orchestrator)
# Act
result = _invoke(
@@ -390,8 +368,7 @@ class TestFlightsApiErrorMapping:
str(_FLIGHT_ID),
"--sector-class",
"stable_rear",
"--calibration-path",
str(calibration_path),
*_required_args(calibration_path),
],
services=services,
config=base_config,
@@ -399,3 +376,93 @@ class TestFlightsApiErrorMapping:
# Assert
assert result.exit_code == EXIT_EMPTY_WAYPOINTS
class TestOrchestratorErrorMapping:
"""AZ-328 — orchestrator-raised exceptions map to dedicated exit codes."""
def test_build_lock_held_maps_to_exit_50(
self,
runner: CliRunner,
base_config: C12Config,
calibration_path: Path,
tmp_path: Path,
) -> None:
# Arrange
orchestrator = _FakeOrchestrator(
raise_on_call=BuildLockHeldError(lock_path=tmp_path / ".c12.lock", timeout_s=5.0)
)
services = _make_services(orchestrator)
# Act
result = _invoke(
runner,
[
"build-cache",
"--flight-id",
str(_FLIGHT_ID),
"--sector-class",
"stable_rear",
*_required_args(calibration_path),
],
services=services,
config=base_config,
)
# Assert
assert result.exit_code == EXIT_LOCK_HELD
class TestCacheBuildReportExitCodes:
"""AZ-328 AC-7 — idempotent_no_op exits 0; failure phases map per table."""
def _report(self, outcome: BuildCacheOutcome, failure_phase: FailurePhase) -> CacheBuildReport:
return CacheBuildReport(
outcome=outcome,
failure_phase=failure_phase,
flight_resolve_report=None,
download_report=None,
build_report=None,
failure_reason=None,
wall_clock_s=0.0,
)
@pytest.mark.parametrize(
"outcome,failure_phase,expected_exit",
[
(BuildCacheOutcome.SUCCESS, FailurePhase.NONE, EXIT_OK),
(BuildCacheOutcome.IDEMPOTENT_NO_OP, FailurePhase.NONE, EXIT_OK),
(BuildCacheOutcome.FAILURE, FailurePhase.DOWNLOAD, EXIT_DOWNLOAD_FAILURE),
(BuildCacheOutcome.FAILURE, FailurePhase.BUILD, EXIT_BUILD_FAILURE),
],
)
def test_outcome_to_exit_code_table(
self,
runner: CliRunner,
base_config: C12Config,
calibration_path: Path,
outcome: BuildCacheOutcome,
failure_phase: FailurePhase,
expected_exit: int,
) -> None:
# Arrange
orchestrator = _FakeOrchestrator(return_report=self._report(outcome, failure_phase))
services = _make_services(orchestrator)
# Act
result = _invoke(
runner,
[
"build-cache",
"--flight-id",
str(_FLIGHT_ID),
"--sector-class",
"stable_rear",
*_required_args(calibration_path),
],
services=services,
config=base_config,
)
# Assert
assert result.exit_code == expected_exit, result.output
@@ -0,0 +1,57 @@
"""AZ-328 — ``FilelockFileLockFactory`` real-filelock smoke tests."""
from __future__ import annotations
from pathlib import Path
import pytest
from gps_denied_onboard.components.c12_operator_tooling import (
FilelockFileLockFactory,
LockTimeout,
)
class TestFilelockFileLockFactory:
def test_acquire_and_release(self, tmp_path: Path) -> None:
factory = FilelockFileLockFactory()
lock_path = tmp_path / ".c12.lock"
with factory.try_lock(lock_path, timeout_s=1.0):
# Re-acquire from the same process with a tight timeout —
# filelock is reentrant by holder process, so this MAY succeed
# without raising; what we care about is that the basic
# acquire/release contract works.
assert lock_path.exists()
# Lock file may persist on POSIX (it's the rendezvous file)
# but it should now be released and re-acquirable.
with factory.try_lock(lock_path, timeout_s=1.0):
pass
def test_concurrent_lock_raises_lock_timeout(self, tmp_path: Path) -> None:
# filelock IS process-aware, so two SEPARATE FileLock objects
# against the same path from the same process WILL contend on
# POSIX — verify the timeout path raises our LockTimeout.
from filelock import FileLock as RealFileLock
lock_path = tmp_path / ".c12.lock"
held = RealFileLock(str(lock_path))
held.acquire(timeout=1.0)
try:
factory = FilelockFileLockFactory()
with pytest.raises(LockTimeout) as exc_info:
# Tight timeout — the held lock must NOT be released by
# this assertion path or the test loses meaning.
with factory.try_lock(lock_path, timeout_s=0.05):
pass # pragma: no cover
assert exc_info.value.path == lock_path
assert exc_info.value.timeout_s == 0.05
finally:
held.release()
def test_creates_parent_directory(self, tmp_path: Path) -> None:
factory = FilelockFileLockFactory()
nested = tmp_path / "nested" / "deeper" / ".c12.lock"
with factory.try_lock(nested, timeout_s=1.0):
assert nested.parent.is_dir()
@@ -0,0 +1,228 @@
"""AZ-328 — ``RemoteCacheProvisionerInvoker`` JSON wire + redaction smoke."""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
from uuid import UUID
import pytest
from gps_denied_onboard._types.geo import BoundingBox, LatLonAlt
from gps_denied_onboard.components.c12_operator_tooling import (
BuildReportParseError,
RemoteBuildOutcome,
RemoteCacheProvisionerInvoker,
SectorClassification,
)
from gps_denied_onboard.components.c12_operator_tooling.remote_c10_invoker import (
REDACTED_PLACEHOLDER,
RemoteBuildRequest,
)
from gps_denied_onboard.components.c12_operator_tooling.ssh_session import (
RemoteCommandResult,
SshSession,
)
@dataclass
class _ScriptedSession(SshSession):
stdout_payload: str = "{}"
stderr_payload: str = ""
exit_code: int = 0
captured_command: str | None = None
close_calls: int = 0
def run(self, command: str, *, timeout_s: float) -> RemoteCommandResult:
self.captured_command = command
return RemoteCommandResult(
exit_code=self.exit_code,
stdout=self.stdout_payload,
stderr=self.stderr_payload,
)
def file_exists(self, remote_path: PurePosixPath) -> bool:
return False
def list_dir(self, remote_path: PurePosixPath) -> list[str]:
return []
def close(self) -> None:
self.close_calls += 1
def _request() -> RemoteBuildRequest:
return RemoteBuildRequest(
bbox=BoundingBox(
min_lat_deg=49.99, min_lon_deg=36.19, max_lat_deg=50.06, max_lon_deg=36.31
),
zoom_levels=(18,),
sector_class=SectorClassification.STABLE_REAR,
calibration_path=Path("/tmp/calibration.json"),
expected_engines=("dinov2_vpr",),
companion_cache_root=PurePosixPath("/var/lib/azaion/c10/cache"),
takeoff_origin=LatLonAlt(lat_deg=50.0, lon_deg=36.2, alt_m=200.0),
flight_id=UUID("12345678-1234-1234-1234-123456789012"),
)
@pytest.fixture
def captured_logs() -> tuple[logging.Logger, list[logging.LogRecord]]:
records: list[logging.LogRecord] = []
logger = logging.getLogger("test_remote_c10_invoker")
logger.handlers.clear()
logger.propagate = False
class _Handler(logging.Handler):
def emit(self, record: logging.LogRecord) -> None:
records.append(record)
handler = _Handler(level=logging.DEBUG)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
return logger, records
class TestParseHappyPath:
def test_returns_remote_build_report(
self, captured_logs: tuple[logging.Logger, list[logging.LogRecord]]
) -> None:
logger, _ = captured_logs
payload = {
"outcome": "success",
"engines_built": 2,
"engines_reused": 1,
"descriptors_generated": 100,
"manifest_hash": "abc123",
"failure_reason": None,
"elapsed_s": 12.5,
}
session = _ScriptedSession(stdout_payload=json.dumps(payload))
invoker = RemoteCacheProvisionerInvoker(logger=logger)
report = invoker.invoke(session, _request())
assert report.outcome is RemoteBuildOutcome.SUCCESS
assert report.engines_built == 2
assert report.engines_reused == 1
assert report.manifest_hash == "abc123"
class TestParseProgressLines:
def test_progress_lines_logged_at_debug(
self, captured_logs: tuple[logging.Logger, list[logging.LogRecord]]
) -> None:
logger, records = captured_logs
payload = json.dumps(
{
"outcome": "success",
"engines_built": 1,
"engines_reused": 0,
"descriptors_generated": 50,
"manifest_hash": "h",
"failure_reason": None,
"elapsed_s": 1.0,
}
)
session = _ScriptedSession(
stdout_payload="progress: 10%\nprogress: 50%\nprogress: 100%\n" + payload
)
invoker = RemoteCacheProvisionerInvoker(logger=logger)
invoker.invoke(session, _request())
progress_records = [r for r in records if r.__dict__.get("kind") == "c10.remote.progress"]
assert len(progress_records) == 3
class TestRedaction:
def test_secret_in_progress_line_is_redacted(
self, captured_logs: tuple[logging.Logger, list[logging.LogRecord]]
) -> None:
logger, records = captured_logs
secret = "leaked-token-xyz"
payload = json.dumps(
{
"outcome": "success",
"engines_built": 0,
"engines_reused": 0,
"descriptors_generated": 0,
"manifest_hash": "h",
"failure_reason": None,
"elapsed_s": 0.0,
}
)
session = _ScriptedSession(
stdout_payload=f"some progress with {secret} embedded\n{payload}"
)
invoker = RemoteCacheProvisionerInvoker(logger=logger)
invoker.invoke(session, _request(), secrets_to_redact=[secret])
for record in records:
for value in record.__dict__.values():
if isinstance(value, dict):
for v in value.values():
if isinstance(v, str):
assert secret not in v
if secret in "some progress" or REDACTED_PLACEHOLDER in v:
pass
class TestParseFailures:
def test_non_zero_exit_code_raises_parse_error(
self, captured_logs: tuple[logging.Logger, list[logging.LogRecord]]
) -> None:
logger, _ = captured_logs
session = _ScriptedSession(
stdout_payload="some garbage", stderr_payload="oom killed", exit_code=137
)
invoker = RemoteCacheProvisionerInvoker(logger=logger)
with pytest.raises(BuildReportParseError):
invoker.invoke(session, _request())
def test_garbage_last_line_raises_parse_error(
self, captured_logs: tuple[logging.Logger, list[logging.LogRecord]]
) -> None:
logger, _ = captured_logs
session = _ScriptedSession(stdout_payload="not json")
invoker = RemoteCacheProvisionerInvoker(logger=logger)
with pytest.raises(BuildReportParseError):
invoker.invoke(session, _request())
def test_unknown_outcome_raises_parse_error(
self, captured_logs: tuple[logging.Logger, list[logging.LogRecord]]
) -> None:
logger, _ = captured_logs
session = _ScriptedSession(stdout_payload='{"outcome": "weird"}')
invoker = RemoteCacheProvisionerInvoker(logger=logger)
with pytest.raises(BuildReportParseError):
invoker.invoke(session, _request())
class TestCommandConstruction:
def test_command_pipes_json_request_to_companion_entry(
self, captured_logs: tuple[logging.Logger, list[logging.LogRecord]]
) -> None:
logger, _ = captured_logs
payload = json.dumps(
{
"outcome": "success",
"engines_built": 0,
"engines_reused": 0,
"descriptors_generated": 0,
"manifest_hash": "h",
"failure_reason": None,
"elapsed_s": 0.0,
}
)
session = _ScriptedSession(stdout_payload=payload)
invoker = RemoteCacheProvisionerInvoker(logger=logger)
invoker.invoke(session, _request())
# Expect the printf-pipe construct that feeds JSON via stdin.
assert session.captured_command is not None
assert "azaion-onboard c10 build" in session.captured_command
assert "--json-output" in session.captured_command
assert "--request-stdin" in session.captured_command
assert "printf" in session.captured_command