feat: stage7 — Model Manager (F16) and Sequential VO (F07)

This commit is contained in:
Yuzviak
2026-03-22 22:59:55 +02:00
parent 9ef046d623
commit 058ed315dd
8 changed files with 494 additions and 2 deletions
+2
View File
@@ -15,6 +15,8 @@
| **Трансформація координат (F13)** | Зберігання локального ENU Origin, конвертація WGS84 ↔ Local ENU ↔ Pixels | | **Трансформація координат (F13)** | Зберігання локального ENU Origin, конвертація WGS84 ↔ Local ENU ↔ Pixels |
| **Вхідний пайплайн (F05)** | `cv2`, `asyncio.Queue`. Керує FIFO чергою батчів кадрів з БПЛА, здійснює базову валідацію послідовностей та збереження фотографій на диск. | | **Вхідний пайплайн (F05)** | `cv2`, `asyncio.Queue`. Керує FIFO чергою батчів кадрів з БПЛА, здійснює базову валідацію послідовностей та збереження фотографій на диск. |
| **Менеджер ротацій (F06)** | Оберти 360° блоками по 30° для підбору орієнтації; трекінг історії курсу з виявленням різких поворотів (>45°). | | **Менеджер ротацій (F06)** | Оберти 360° блоками по 30° для підбору орієнтації; трекінг історії курсу з виявленням різких поворотів (>45°). |
| **Model Manager (F16)** | Архітектура завантаження ML моделей (Mock/Fallback). |
| **Візуальна Одометрія (F07)** | Суперпоінт / LightGlue імітація. OpenCV (`findEssentialMat` + RANSAC + `recoverPose`) для розрахунку відносного руху між кадрами без відомого масштабу. |
| **Граф поз (VO/GPR)** | GTSAM (Python) - очікується в наступних етапах | | **Граф поз (VO/GPR)** | GTSAM (Python) - очікується в наступних етапах |
## Швидкий старт ## Швидкий старт
+2 -2
View File
@@ -88,8 +88,8 @@
- FIFO батчів (ImageInputPipeline), менеджер ротацій (ImageRotationManager). - FIFO батчів (ImageInputPipeline), менеджер ротацій (ImageRotationManager).
- Асинхронне збереження в кеш `image_storage`. - Асинхронне збереження в кеш `image_storage`.
### Етап 7 — Model manager та послідовний VO ### Етап 7 — Model manager та послідовний VO
- Завантаження локальних вагів (SuperPoint+LightGlue), побудова ланцюжка відносних оцінок. - Завантаження локальних вагів (SuperPoint+LightGlue - Mock), побудова ланцюжка відносних оцінок (`SequentialVisualOdometry`).
### Етап 8 — Глобальне місце та метричне уточнення ### Етап 8 — Глобальне місце та метричне уточнення
- Кросс-вью вирівнювання до тайла Google Maps. - Кросс-вью вирівнювання до тайла Google Maps.
+122
View File
@@ -0,0 +1,122 @@
"""Model Manager (Component F16)."""
import logging
from abc import ABC, abstractmethod
from typing import Any
import numpy as np
from gps_denied.schemas.model import InferenceEngine
logger = logging.getLogger(__name__)
class IModelManager(ABC):
@abstractmethod
def load_model(self, model_name: str, model_format: str) -> bool:
pass
@abstractmethod
def get_inference_engine(self, model_name: str) -> InferenceEngine:
pass
@abstractmethod
def optimize_to_tensorrt(self, model_name: str, onnx_path: str) -> str:
pass
@abstractmethod
def fallback_to_onnx(self, model_name: str) -> bool:
pass
@abstractmethod
def warmup_model(self, model_name: str) -> bool:
pass
class MockInferenceEngine(InferenceEngine):
"""A mock implementation of Inference Engine for rapid testing."""
def infer(self, input_data: Any) -> Any:
if self.model_name == "SuperPoint":
# Mock extracting 500 features
n_features = 500
# Assuming input_data is an image of shape (H, W, 3)
h, w = input_data.shape[:2] if hasattr(input_data, "shape") else (480, 640)
keypoints = np.random.rand(n_features, 2) * [w, h]
descriptors = np.random.rand(n_features, 256)
scores = np.random.rand(n_features)
return {
"keypoints": keypoints,
"descriptors": descriptors,
"scores": scores
}
elif self.model_name == "LightGlue":
# Mock matching
# input_data expected to be a tuple/dict of two feature sets
f1, f2 = input_data["features1"], input_data["features2"]
kp1 = f1.keypoints
kp2 = f2.keypoints
# Create ~100 random matches
n_matches = min(100, len(kp1), len(kp2))
indices1 = np.random.choice(len(kp1), n_matches, replace=False)
indices2 = np.random.choice(len(kp2), n_matches, replace=False)
matches = np.stack([indices1, indices2], axis=1)
scores = np.random.rand(n_matches)
return {
"matches": matches,
"scores": scores,
"keypoints1": kp1[indices1],
"keypoints2": kp2[indices2]
}
elif self.model_name == "LiteSAM":
# Just a placeholder for F09
pass
raise ValueError(f"Unknown mock model: {self.model_name}")
class ModelManager(IModelManager):
"""Manages ML models lifecycle and provisioning."""
def __init__(self):
self._loaded_models: dict[str, InferenceEngine] = {}
def load_model(self, model_name: str, model_format: str) -> bool:
"""Loads a model (or mock)."""
logger.info(f"Loading {model_name} in format {model_format}")
# For prototype, we strictly use Mock
engine = MockInferenceEngine(model_name, model_format)
self._loaded_models[model_name] = engine
self.warmup_model(model_name)
return True
def get_inference_engine(self, model_name: str) -> InferenceEngine:
"""Gets an inference engine for a specific model."""
if model_name not in self._loaded_models:
# Auto load if not loaded
self.load_model(model_name, "mock")
return self._loaded_models[model_name]
def optimize_to_tensorrt(self, model_name: str, onnx_path: str) -> str:
"""Placeholder for TensorRT optimization."""
return f"{onnx_path}.trt"
def fallback_to_onnx(self, model_name: str) -> bool:
"""Placeholder for fallback logic."""
logger.warning(f"Falling back to ONNX for {model_name}")
return True
def warmup_model(self, model_name: str) -> bool:
"""Warms up a loaded model."""
logger.info(f"Warming up {model_name}")
return True
+147
View File
@@ -0,0 +1,147 @@
"""Sequential Visual Odometry (Component F07)."""
import logging
from abc import ABC, abstractmethod
import cv2
import numpy as np
from gps_denied.core.models import IModelManager
from gps_denied.schemas.flight import CameraParameters
from gps_denied.schemas.vo import Features, Matches, Motion, RelativePose
logger = logging.getLogger(__name__)
class ISequentialVisualOdometry(ABC):
@abstractmethod
def compute_relative_pose(
self, prev_image: np.ndarray, curr_image: np.ndarray, camera_params: CameraParameters
) -> RelativePose | None:
pass
@abstractmethod
def extract_features(self, image: np.ndarray) -> Features:
pass
@abstractmethod
def match_features(self, features1: Features, features2: Features) -> Matches:
pass
@abstractmethod
def estimate_motion(self, matches: Matches, camera_params: CameraParameters) -> Motion | None:
pass
class SequentialVisualOdometry(ISequentialVisualOdometry):
"""Frame-to-frame visual odometry using SuperPoint + LightGlue."""
def __init__(self, model_manager: IModelManager):
self.model_manager = model_manager
def extract_features(self, image: np.ndarray) -> Features:
"""Extracts keypoints and descriptors using SuperPoint."""
engine = self.model_manager.get_inference_engine("SuperPoint")
result = engine.infer(image)
return Features(
keypoints=result["keypoints"],
descriptors=result["descriptors"],
scores=result["scores"]
)
def match_features(self, features1: Features, features2: Features) -> Matches:
"""Matches features using LightGlue."""
engine = self.model_manager.get_inference_engine("LightGlue")
result = engine.infer({
"features1": features1,
"features2": features2
})
return Matches(
matches=result["matches"],
scores=result["scores"],
keypoints1=result["keypoints1"],
keypoints2=result["keypoints2"]
)
def estimate_motion(self, matches: Matches, camera_params: CameraParameters) -> Motion | None:
"""Estimates camera motion using Essential Matrix (RANSAC)."""
inlier_threshold = 20
if len(matches.matches) < 8:
return None
pts1 = np.ascontiguousarray(matches.keypoints1)
pts2 = np.ascontiguousarray(matches.keypoints2)
# Build camera matrix
f_px = camera_params.focal_length * (camera_params.resolution_width / camera_params.sensor_width)
if camera_params.principal_point:
cx, cy = camera_params.principal_point
else:
cx = camera_params.resolution_width / 2.0
cy = camera_params.resolution_height / 2.0
K = np.array([
[f_px, 0, cx],
[0, f_px, cy],
[0, 0, 1]
], dtype=np.float64)
try:
E, inliers = cv2.findEssentialMat(
pts1, pts2, cameraMatrix=K, method=cv2.RANSAC, prob=0.999, threshold=1.0
)
except Exception as e:
logger.error(f"Error finding essential matrix: {e}")
return None
if E is None or E.shape != (3, 3):
return None
inliers_mask = inliers.flatten().astype(bool)
inlier_count = np.sum(inliers_mask)
if inlier_count < inlier_threshold:
logger.warning(f"Insufficient inliers: {inlier_count} < {inlier_threshold}")
return None
# Recover pose
try:
_, R, t, mask = cv2.recoverPose(E, pts1, pts2, cameraMatrix=K, mask=inliers)
except Exception as e:
logger.error(f"Error recovering pose: {e}")
return None
return Motion(
translation=t.flatten(),
rotation=R,
inliers=inliers_mask,
inlier_count=inlier_count
)
def compute_relative_pose(
self, prev_image: np.ndarray, curr_image: np.ndarray, camera_params: CameraParameters
) -> RelativePose | None:
"""Computes relative pose between two frames."""
f1 = self.extract_features(prev_image)
f2 = self.extract_features(curr_image)
matches = self.match_features(f1, f2)
motion = self.estimate_motion(matches, camera_params)
if motion is None:
return None
tracking_good = motion.inlier_count > 50
return RelativePose(
translation=motion.translation,
rotation=motion.rotation,
confidence=float(motion.inlier_count / max(1, len(matches.matches))),
inlier_count=motion.inlier_count,
total_matches=len(matches.matches),
tracking_good=tracking_good,
scale_ambiguous=True
)
+21
View File
@@ -0,0 +1,21 @@
"""Model Manager schemas (Component F16)."""
from typing import Any
from pydantic import BaseModel
class ModelConfig(BaseModel):
"""Configuration for an ML model."""
model_name: str
model_path: str
format: str
precision: str # "fp16", "fp32"
warmup_iterations: int = 3
class InferenceEngine:
"""Base definition for an inference engine."""
def __init__(self, model_name: str, format_: str):
self.model_name = model_name
self.format = format_
def infer(self, input_data: Any) -> Any:
raise NotImplementedError
+49
View File
@@ -0,0 +1,49 @@
"""Sequential Visual Odometry schemas (Component F07)."""
from typing import Optional
import numpy as np
from pydantic import BaseModel
class Features(BaseModel):
"""Extracted image features (e.g., from SuperPoint)."""
model_config = {"arbitrary_types_allowed": True}
keypoints: np.ndarray # (N, 2)
descriptors: np.ndarray # (N, 256)
scores: np.ndarray # (N,)
class Matches(BaseModel):
"""Matches between two sets of features (e.g., from LightGlue)."""
model_config = {"arbitrary_types_allowed": True}
matches: np.ndarray # (M, 2)
scores: np.ndarray # (M,)
keypoints1: np.ndarray # (M, 2)
keypoints2: np.ndarray # (M, 2)
class RelativePose(BaseModel):
"""Relative pose between two frames."""
model_config = {"arbitrary_types_allowed": True}
translation: np.ndarray # (3,)
rotation: np.ndarray # (3, 3)
confidence: float
inlier_count: int
total_matches: int
tracking_good: bool
scale_ambiguous: bool = True
chunk_id: Optional[str] = None
class Motion(BaseModel):
"""Motion estimate from OpenCV."""
model_config = {"arbitrary_types_allowed": True}
translation: np.ndarray # (3,) unit vector
rotation: np.ndarray # (3, 3) rotation matrix
inliers: np.ndarray # Boolean mask of inliers
inlier_count: int
+49
View File
@@ -0,0 +1,49 @@
"""Tests for Model Manager (F16)."""
import numpy as np
from gps_denied.core.models import ModelManager
def test_load_and_get_model():
manager = ModelManager()
# Should auto-load
engine = manager.get_inference_engine("SuperPoint")
assert engine.model_name == "SuperPoint"
# Check fallback/dummy
assert manager.fallback_to_onnx("SuperPoint") is True
assert manager.optimize_to_tensorrt("SuperPoint", "path.onnx") == "path.onnx.trt"
def test_mock_superpoint():
manager = ModelManager()
engine = manager.get_inference_engine("SuperPoint")
dummy_img = np.zeros((480, 640, 3), dtype=np.uint8)
res = engine.infer(dummy_img)
assert "keypoints" in res
assert "descriptors" in res
assert "scores" in res
assert len(res["keypoints"]) == 500
assert res["descriptors"].shape == (500, 256)
def test_mock_lightglue():
manager = ModelManager()
engine = manager.get_inference_engine("LightGlue")
# Need mock features
class DummyF:
def __init__(self, keypoints):
self.keypoints = keypoints
f1 = DummyF(np.random.rand(120, 2))
f2 = DummyF(np.random.rand(150, 2))
res = engine.infer({"features1": f1, "features2": f2})
assert "matches" in res
assert len(res["matches"]) == 100 # min(100, 120, 150)
assert res["keypoints1"].shape == (100, 2)
assert res["keypoints2"].shape == (100, 2)
+102
View File
@@ -0,0 +1,102 @@
"""Tests for Sequential Visual Odometry (F07)."""
import numpy as np
import pytest
from gps_denied.core.models import ModelManager
from gps_denied.core.vo import SequentialVisualOdometry
from gps_denied.schemas.flight import CameraParameters
from gps_denied.schemas.vo import Features, Matches
@pytest.fixture
def vo():
manager = ModelManager()
return SequentialVisualOdometry(manager)
@pytest.fixture
def cam_params():
return CameraParameters(
focal_length=5.0,
sensor_width=6.4,
sensor_height=4.8,
resolution_width=640,
resolution_height=480,
principal_point=(320.0, 240.0)
)
def test_extract_features(vo):
img = np.zeros((480, 640, 3), dtype=np.uint8)
features = vo.extract_features(img)
assert isinstance(features, Features)
assert features.keypoints.shape == (500, 2)
assert features.descriptors.shape == (500, 256)
def test_match_features(vo):
f1 = Features(
keypoints=np.random.rand(100, 2),
descriptors=np.random.rand(100, 256),
scores=np.random.rand(100)
)
f2 = Features(
keypoints=np.random.rand(100, 2),
descriptors=np.random.rand(100, 256),
scores=np.random.rand(100)
)
matches = vo.match_features(f1, f2)
assert isinstance(matches, Matches)
assert matches.matches.shape == (100, 2)
def test_estimate_motion_insufficient_matches(vo, cam_params):
matches = Matches(
matches=np.zeros((5, 2)),
scores=np.zeros(5),
keypoints1=np.zeros((5, 2)),
keypoints2=np.zeros((5, 2))
)
# Less than 8 points should return None
motion = vo.estimate_motion(matches, cam_params)
assert motion is None
def test_estimate_motion_synthetic(vo, cam_params):
# To reliably test compute_relative_pose, we create points strictly satisfying epipolar constraint
# Simple straight motion: Add a small shift on X axis
n_pts = 100
pts1 = np.random.rand(n_pts, 2) * 400 + 100
pts2 = pts1 + np.array([10.0, 0.0]) # moving 10 pixels right
matches = Matches(
matches=np.column_stack([np.arange(n_pts), np.arange(n_pts)]),
scores=np.ones(n_pts),
keypoints1=pts1,
keypoints2=pts2
)
motion = vo.estimate_motion(matches, cam_params)
assert motion is not None
assert motion.inlier_count > 20
assert motion.translation.shape == (3,)
assert motion.rotation.shape == (3, 3)
def test_compute_relative_pose(vo, cam_params):
img1 = np.zeros((480, 640, 3), dtype=np.uint8)
img2 = np.zeros((480, 640, 3), dtype=np.uint8)
# Given the random nature of our mock, OpenCV's findEssentialMat will likely find 0 inliers
# or fail. We expect compute_relative_pose to gracefully return None or low confidence.
pose = vo.compute_relative_pose(img1, img2, cam_params)
if pose is not None:
assert pose.translation.shape == (3,)
assert pose.rotation.shape == (3, 3)
# Because we randomize points in the mock manager, inliers will be extremely low
assert pose.tracking_good is False