feat: stage7 — Model Manager (F16) and Sequential VO (F07)

2026-06-22 17:31:13 +00:00 · 2026-03-22 22:59:55 +02:00
parent 9ef046d623
commit 058ed315dd
8 changed files with 494 additions and 2 deletions
@@ -0,0 +1,147 @@
+"""Sequential Visual Odometry (Component F07)."""
+
+import logging
+from abc import ABC, abstractmethod
+
+import cv2
+import numpy as np
+
+from gps_denied.core.models import IModelManager
+from gps_denied.schemas.flight import CameraParameters
+from gps_denied.schemas.vo import Features, Matches, Motion, RelativePose
+
+logger = logging.getLogger(__name__)
+
+
+class ISequentialVisualOdometry(ABC):
+    @abstractmethod
+    def compute_relative_pose(
+        self, prev_image: np.ndarray, curr_image: np.ndarray, camera_params: CameraParameters
+    ) -> RelativePose | None:
+        pass
+    
+    @abstractmethod
+    def extract_features(self, image: np.ndarray) -> Features:
+        pass
+    
+    @abstractmethod
+    def match_features(self, features1: Features, features2: Features) -> Matches:
+        pass
+    
+    @abstractmethod
+    def estimate_motion(self, matches: Matches, camera_params: CameraParameters) -> Motion | None:
+        pass
+
+
+class SequentialVisualOdometry(ISequentialVisualOdometry):
+    """Frame-to-frame visual odometry using SuperPoint + LightGlue."""
+
+    def __init__(self, model_manager: IModelManager):
+        self.model_manager = model_manager
+
+    def extract_features(self, image: np.ndarray) -> Features:
+        """Extracts keypoints and descriptors using SuperPoint."""
+        engine = self.model_manager.get_inference_engine("SuperPoint")
+        result = engine.infer(image)
+        
+        return Features(
+            keypoints=result["keypoints"],
+            descriptors=result["descriptors"],
+            scores=result["scores"]
+        )
+
+    def match_features(self, features1: Features, features2: Features) -> Matches:
+        """Matches features using LightGlue."""
+        engine = self.model_manager.get_inference_engine("LightGlue")
+        result = engine.infer({
+            "features1": features1,
+            "features2": features2
+        })
+        
+        return Matches(
+            matches=result["matches"],
+            scores=result["scores"],
+            keypoints1=result["keypoints1"],
+            keypoints2=result["keypoints2"]
+        )
+
+    def estimate_motion(self, matches: Matches, camera_params: CameraParameters) -> Motion | None:
+        """Estimates camera motion using Essential Matrix (RANSAC)."""
+        inlier_threshold = 20
+        if len(matches.matches) < 8:
+            return None
+            
+        pts1 = np.ascontiguousarray(matches.keypoints1)
+        pts2 = np.ascontiguousarray(matches.keypoints2)
+        
+        # Build camera matrix
+        f_px = camera_params.focal_length * (camera_params.resolution_width / camera_params.sensor_width)
+        if camera_params.principal_point:
+            cx, cy = camera_params.principal_point
+        else:
+            cx = camera_params.resolution_width / 2.0
+            cy = camera_params.resolution_height / 2.0
+            
+        K = np.array([
+            [f_px, 0, cx],
+            [0, f_px, cy],
+            [0, 0,  1]
+        ], dtype=np.float64)
+        
+        try:
+            E, inliers = cv2.findEssentialMat(
+                pts1, pts2, cameraMatrix=K, method=cv2.RANSAC, prob=0.999, threshold=1.0
+            )
+        except Exception as e:
+            logger.error(f"Error finding essential matrix: {e}")
+            return None
+            
+        if E is None or E.shape != (3, 3):
+            return None
+            
+        inliers_mask = inliers.flatten().astype(bool)
+        inlier_count = np.sum(inliers_mask)
+        
+        if inlier_count < inlier_threshold:
+            logger.warning(f"Insufficient inliers: {inlier_count} < {inlier_threshold}")
+            return None
+            
+        # Recover pose
+        try:
+            _, R, t, mask = cv2.recoverPose(E, pts1, pts2, cameraMatrix=K, mask=inliers)
+        except Exception as e:
+            logger.error(f"Error recovering pose: {e}")
+            return None
+            
+        return Motion(
+            translation=t.flatten(),
+            rotation=R,
+            inliers=inliers_mask,
+            inlier_count=inlier_count
+        )
+
+    def compute_relative_pose(
+        self, prev_image: np.ndarray, curr_image: np.ndarray, camera_params: CameraParameters
+    ) -> RelativePose | None:
+        """Computes relative pose between two frames."""
+        f1 = self.extract_features(prev_image)
+        f2 = self.extract_features(curr_image)
+        
+        matches = self.match_features(f1, f2)
+        
+        motion = self.estimate_motion(matches, camera_params)
+        
+        if motion is None:
+            return None
+            
+        tracking_good = motion.inlier_count > 50
+        
+        return RelativePose(
+            translation=motion.translation,
+            rotation=motion.rotation,
+            confidence=float(motion.inlier_count / max(1, len(matches.matches))),
+            inlier_count=motion.inlier_count,
+            total_matches=len(matches.matches),
+            tracking_good=tracking_good,
+            scale_ambiguous=True
+        )