mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-06-21 07:01:14 +00:00
[AZ-701] Fix Jetson e2e harness infrastructure blockers
- gtsam_isam2_estimator: shim for gtsam>=4.3a0 aarch64 pre-release where IncrementalFixedLagSmoother/FixedLagSmootherKeyTimestampMap moved from gtsam_unstable to gtsam - inference_factory: eager import of c7_inference package so register_component_block runs before config.components is read - docker-compose.test.jetson.yml: remove companion and operator-orchestrator (not needed by replay CLI tests and crash in test env due to AZ-618 live-mode deps); add db-migrate and tile-init setup-profile services for Alembic migrations and FAISS fixture provisioning; update e2e-runner depends_on to db only - scripts/mk_test_faiss_fixture.py: generate minimal HNSW32 FAISS descriptor index into the tile-data volume for the test harness Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -33,31 +33,66 @@
|
||||
# gps-denied client (AZ-691) lands.
|
||||
|
||||
services:
|
||||
companion:
|
||||
extends:
|
||||
file: docker-compose.yml
|
||||
service: companion
|
||||
environment:
|
||||
LOG_LEVEL: INFO
|
||||
# Jetson is the canonical test env (2026-05-20 policy); the FAISS
|
||||
# HNSW descriptor index is required by c2_vpr in this binary.
|
||||
# Without this flag airborne_bootstrap fails at
|
||||
# _build_c6_descriptor_index → RuntimeNotAvailableError. faiss-cpu
|
||||
# is installed via the [dev] extra; the gate is build-flag, not
|
||||
# wheel availability.
|
||||
BUILD_FAISS_INDEX: "ON"
|
||||
# ------------------------------------------------------------------
|
||||
# Init services (profiles: [setup]) — NOT started by the default
|
||||
# `docker compose up`. They are invoked explicitly as one-shot jobs
|
||||
# via `docker compose run --rm --profile setup <service>` before the
|
||||
# main harness run:
|
||||
#
|
||||
# 1. db-migrate — applies Alembic migrations so companion's
|
||||
# FreshnessGate / PostgresFilesystemStore find their tables.
|
||||
# (AZ-618 ordering gap: build_pre_constructed queries the DB
|
||||
# before the composition root can call apply_migrations.)
|
||||
#
|
||||
# 2. tile-init — writes a minimal valid HNSW32 FAISS descriptor
|
||||
# index into the tile-data volume so FaissDescriptorIndex._load()
|
||||
# succeeds during build_pre_constructed.
|
||||
# (AZ-618 gap: the production provisioning pipeline normally
|
||||
# writes the index; in the test harness it must pre-exist.)
|
||||
#
|
||||
# They are in profile "setup" so they do not participate in the
|
||||
# default `docker compose up` and do not trip --abort-on-container-exit.
|
||||
# ------------------------------------------------------------------
|
||||
db-migrate:
|
||||
profiles: ["setup"]
|
||||
image: gps-denied-onboard/e2e-runner:jetson
|
||||
entrypoint: ["alembic"]
|
||||
command: ["upgrade", "head"]
|
||||
working_dir: /opt/project
|
||||
volumes:
|
||||
- .:/opt/project:ro
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: "no"
|
||||
|
||||
operator-orchestrator:
|
||||
extends:
|
||||
file: docker-compose.yml
|
||||
service: operator-orchestrator
|
||||
environment:
|
||||
BUILD_FAISS_INDEX: "ON"
|
||||
tile-init:
|
||||
profiles: ["setup"]
|
||||
image: gps-denied-onboard/e2e-runner:jetson
|
||||
entrypoint: ["python3"]
|
||||
command: ["/opt/project/scripts/mk_test_faiss_fixture.py"]
|
||||
volumes:
|
||||
- .:/opt/project:ro
|
||||
- tile-data:/var/lib/gps-denied/tiles
|
||||
restart: "no"
|
||||
|
||||
mock-sat:
|
||||
extends:
|
||||
file: docker-compose.yml
|
||||
service: mock-sat
|
||||
# companion and operator-orchestrator are intentionally absent from
|
||||
# the Jetson e2e test harness.
|
||||
#
|
||||
# Every test in tests/e2e/replay/ invokes the ``gps-denied-replay``
|
||||
# console-script directly as a subprocess and does not call the
|
||||
# companion or operator-orchestrator HTTP APIs. Including either
|
||||
# service caused the harness to abort before any test could run:
|
||||
#
|
||||
# * companion crashes at startup because live-mode requires a
|
||||
# production-provisioned C7 inference engine (PyTorch FP16 or
|
||||
# TensorRT) that is absent from the test environment. This is the
|
||||
# pre-existing AZ-618 gap (build_pre_constructed fails before the
|
||||
# composition root can apply_migrations + engine artifacts).
|
||||
# * operator-orchestrator crashed for the same C7 inference reason.
|
||||
#
|
||||
# When the AZ-618 epic ships the full airborne boot-up in a sandboxed
|
||||
# environment (Phase E / engine stubs), companion can be re-added here.
|
||||
|
||||
db:
|
||||
extends:
|
||||
@@ -81,10 +116,6 @@ services:
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
depends_on:
|
||||
companion:
|
||||
condition: service_healthy
|
||||
mock-sat:
|
||||
condition: service_healthy
|
||||
db:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
@@ -96,6 +127,9 @@ services:
|
||||
# execute. This is the WHOLE POINT of the Jetson harness.
|
||||
GPS_DENIED_TIER: "2"
|
||||
DB_URL: postgresql://gps_denied:dev@db:5432/gps_denied
|
||||
# SATELLITE_PROVIDER_URL / COMPANION_URL are set but not used by
|
||||
# the replay CLI tests (gps-denied-replay runs as a subprocess and
|
||||
# does not call the companion or satellite-provider HTTP APIs).
|
||||
SATELLITE_PROVIDER_URL: http://mock-sat:5100
|
||||
COMPANION_URL: http://companion:8080
|
||||
CAMERA_CALIBRATION_PATH: /opt/tests/fixtures/calibration/adti26.json
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Create a minimal valid FAISS HNSW32 + IndexIDMap2 fixture for the test harness.
|
||||
|
||||
Used by the `tile-init` init service in docker-compose.test.jetson.yml.
|
||||
Writes three files to /var/lib/gps-denied/tiles/:
|
||||
descriptor.index — empty HNSW32 dim=512 binary
|
||||
descriptor.index.sha256 — sha256 sidecar (matches FaissDescriptorIndex._load)
|
||||
descriptor.index.meta.json — metadata (descriptor_dim, hnsw_params.metric, ...)
|
||||
|
||||
Running this twice is idempotent (overwrites the previous fixture).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import faiss # type: ignore[import-untyped]
|
||||
|
||||
DESCRIPTOR_DIM = 512
|
||||
HNSW_M = 32
|
||||
|
||||
root = Path("/var/lib/gps-denied/tiles")
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
inner = faiss.IndexHNSWFlat(DESCRIPTOR_DIM, HNSW_M, faiss.METRIC_INNER_PRODUCT)
|
||||
index = faiss.IndexIDMap2(inner)
|
||||
|
||||
idx_path = root / "descriptor.index"
|
||||
faiss.write_index(index, str(idx_path))
|
||||
idx_bytes = idx_path.read_bytes()
|
||||
sha256 = hashlib.sha256(idx_bytes).hexdigest()
|
||||
|
||||
(idx_path.parent / (idx_path.name + ".sha256")).write_text(sha256, encoding="ascii")
|
||||
|
||||
meta = {
|
||||
"descriptor_dim": DESCRIPTOR_DIM,
|
||||
"n_vectors": 0,
|
||||
"backbone_label": "ultra_vpr",
|
||||
"backbone_sha256_hex": "0" * 64,
|
||||
"built_at": datetime.now(timezone.utc).isoformat(),
|
||||
"hnsw_params": {
|
||||
"m": HNSW_M,
|
||||
"ef_construction": 40,
|
||||
"ef_search": 16,
|
||||
"metric": "INNER_PRODUCT",
|
||||
},
|
||||
"sidecar_sha256_hex": sha256,
|
||||
"file_path": str(idx_path),
|
||||
"id_mapping": [],
|
||||
}
|
||||
(idx_path.parent / (idx_path.name + ".meta.json")).write_text(
|
||||
json.dumps(meta, sort_keys=True, indent=2), encoding="utf-8"
|
||||
)
|
||||
|
||||
print(
|
||||
f"[tile-init] OK: empty HNSW32 dim={DESCRIPTOR_DIM} index "
|
||||
f"at {idx_path} sha256={sha256[:16]}..."
|
||||
)
|
||||
@@ -39,6 +39,17 @@ from uuid import UUID, uuid4
|
||||
import gtsam
|
||||
import gtsam_unstable
|
||||
import numpy as np
|
||||
|
||||
# gtsam >=4.3a0 (aarch64 pre-release) moved IncrementalFixedLagSmoother and
|
||||
# FixedLagSmootherKeyTimestampMap to the main gtsam module. Fall back to gtsam
|
||||
# when gtsam_unstable no longer carries these symbols so the estimator works
|
||||
# on both 4.2.x (x86_64 PyPI) and 4.3a0 (aarch64 pre-release).
|
||||
try:
|
||||
_IncrementalFixedLagSmoother = gtsam_unstable.IncrementalFixedLagSmoother
|
||||
_FixedLagSmootherKeyTimestampMap = gtsam_unstable.FixedLagSmootherKeyTimestampMap
|
||||
except AttributeError:
|
||||
_IncrementalFixedLagSmoother = gtsam.IncrementalFixedLagSmoother
|
||||
_FixedLagSmootherKeyTimestampMap = gtsam.FixedLagSmootherKeyTimestampMap
|
||||
from numpy.linalg import LinAlgError
|
||||
|
||||
from gps_denied_onboard._types.geo import LatLonAlt
|
||||
@@ -168,7 +179,7 @@ class GtsamIsam2StateEstimator(StateEstimator):
|
||||
|
||||
self._isam2 = gtsam.ISAM2(gtsam.ISAM2Params())
|
||||
window_seconds: float = block.keyframe_window_size * _FRAME_PERIOD_S
|
||||
self._smoother = gtsam_unstable.IncrementalFixedLagSmoother(window_seconds)
|
||||
self._smoother = _IncrementalFixedLagSmoother(window_seconds)
|
||||
self._graph = gtsam.NonlinearFactorGraph()
|
||||
self._values = gtsam.Values()
|
||||
|
||||
@@ -1689,14 +1700,14 @@ def _build_pose_noise(covariance: Any | None) -> gtsam.noiseModel.Base:
|
||||
|
||||
def _make_timestamp_map(
|
||||
keys: list[int], ts_ns: int
|
||||
) -> gtsam_unstable.FixedLagSmootherKeyTimestampMap:
|
||||
) -> _FixedLagSmootherKeyTimestampMap:
|
||||
"""Build a ``FixedLagSmootherKeyTimestampMap`` for the smoother.
|
||||
|
||||
The smoother needs per-key arrival timestamps in seconds (its
|
||||
sliding-window evict logic uses them); we feed every newly
|
||||
inserted key the same window-end timestamp.
|
||||
"""
|
||||
ts_map = gtsam_unstable.FixedLagSmootherKeyTimestampMap()
|
||||
ts_map = _FixedLagSmootherKeyTimestampMap()
|
||||
ts_seconds = ts_ns * 1e-9
|
||||
for key in keys:
|
||||
ts_map.insert((key, ts_seconds))
|
||||
|
||||
@@ -20,6 +20,13 @@ from typing import TYPE_CHECKING
|
||||
|
||||
from gps_denied_onboard.runtime_root.errors import RuntimeNotAvailableError
|
||||
|
||||
# Eager package import so c7_inference.__init__.py runs
|
||||
# `register_component_block("c7_inference", C7InferenceConfig)` before
|
||||
# `_c7_config(config)` reads `config.components["c7_inference"]` below.
|
||||
# The package __init__.py is import-safe (no concrete strategy modules)
|
||||
# per the Risk-2 mitigation documented in c7_inference/__init__.py.
|
||||
import gps_denied_onboard.components.c7_inference # noqa: F401
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gps_denied_onboard.components.c7_inference import (
|
||||
C7InferenceConfig,
|
||||
|
||||
Reference in New Issue
Block a user