mirror of
https://github.com/azaion/gps-denied-onboard.git
synced 2026-04-22 19:56:37 +00:00
Initial commit
This commit is contained in:
@@ -0,0 +1,274 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import logging
|
||||
from typing import Optional, Tuple, Dict, Any
|
||||
from pydantic import BaseModel
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from f02_1_flight_lifecycle_manager import CameraParameters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# --- Data Models ---
|
||||
|
||||
class Features(BaseModel):
|
||||
keypoints: np.ndarray # (N, 2) array of (x, y) coordinates
|
||||
descriptors: np.ndarray # (N, 256) array of descriptors
|
||||
scores: np.ndarray # (N,) array of confidence scores
|
||||
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
|
||||
class Matches(BaseModel):
|
||||
matches: np.ndarray # (M, 2) pairs of indices
|
||||
scores: np.ndarray # (M,) match confidence
|
||||
keypoints1: np.ndarray # (M, 2)
|
||||
keypoints2: np.ndarray # (M, 2)
|
||||
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
|
||||
class RelativePose(BaseModel):
|
||||
translation: np.ndarray # (3,) unit vector
|
||||
rotation: np.ndarray # (3, 3) matrix
|
||||
confidence: float
|
||||
inlier_count: int
|
||||
total_matches: int
|
||||
tracking_good: bool
|
||||
scale_ambiguous: bool = True
|
||||
chunk_id: Optional[str] = None
|
||||
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
|
||||
class Motion(BaseModel):
|
||||
translation: np.ndarray
|
||||
rotation: np.ndarray
|
||||
inliers: np.ndarray
|
||||
inlier_count: int
|
||||
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
|
||||
# --- Interface ---
|
||||
|
||||
class ISequentialVisualOdometry(ABC):
|
||||
@abstractmethod
|
||||
def compute_relative_pose(self, prev_image: np.ndarray, curr_image: np.ndarray) -> Optional[RelativePose]: pass
|
||||
|
||||
@abstractmethod
|
||||
def extract_features(self, image: np.ndarray) -> Features: pass
|
||||
|
||||
@abstractmethod
|
||||
def match_features(self, features1: Features, features2: Features) -> Matches: pass
|
||||
|
||||
@abstractmethod
|
||||
def estimate_motion(self, matches: Matches, camera_params: CameraParameters) -> Optional[Motion]: pass
|
||||
|
||||
# --- Implementation ---
|
||||
|
||||
class SequentialVisualOdometry(ISequentialVisualOdometry):
|
||||
"""
|
||||
F07: Sequential Visual Odometry
|
||||
Performs frame-to-frame metric tracking, relying on SuperPoint for feature extraction
|
||||
and LightGlue for matching to handle low-overlap and low-texture scenarios.
|
||||
"""
|
||||
def __init__(self, model_manager=None):
|
||||
self.model_manager = model_manager
|
||||
|
||||
# --- Feature Extraction (07.01) ---
|
||||
|
||||
def _preprocess_image(self, image: np.ndarray) -> np.ndarray:
|
||||
if len(image.shape) == 3 and image.shape[2] == 3:
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
gray = image
|
||||
return gray.astype(np.float32) / 255.0
|
||||
|
||||
def _run_superpoint_inference(self, preprocessed: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
if self.model_manager and hasattr(self.model_manager, 'run_superpoint'):
|
||||
return self.model_manager.run_superpoint(preprocessed)
|
||||
|
||||
# Functional Classical CV Fallback (SIFT) for testing on real images without TensorRT
|
||||
sift = cv2.SIFT_create(nfeatures=2000)
|
||||
img_uint8 = (preprocessed * 255.0).astype(np.uint8)
|
||||
|
||||
kpts, descs = sift.detectAndCompute(img_uint8, None)
|
||||
if kpts is None or len(kpts) == 0:
|
||||
return np.empty((0, 2)), np.empty((0, 256)), np.empty((0,))
|
||||
|
||||
keypoints = np.array([k.pt for k in kpts]).astype(np.float32)
|
||||
scores = np.array([k.response for k in kpts]).astype(np.float32)
|
||||
|
||||
# Pad SIFT's 128-dim descriptors to 256 to match the expected interface dimensions
|
||||
descs_padded = np.pad(descs, ((0, 0), (0, 128)), 'constant').astype(np.float32)
|
||||
|
||||
return keypoints, descs_padded, scores
|
||||
|
||||
def _apply_nms(self, keypoints: np.ndarray, scores: np.ndarray, nms_radius: int) -> np.ndarray:
|
||||
# Simplified Mock NMS: Sort by score and keep top 2000 for standard tracking
|
||||
if len(scores) == 0:
|
||||
return np.array([], dtype=int)
|
||||
sorted_indices = np.argsort(scores)[::-1]
|
||||
return sorted_indices[:2000]
|
||||
|
||||
def extract_features(self, image: np.ndarray) -> Features:
|
||||
if image is None or image.size == 0:
|
||||
return Features(keypoints=np.empty((0, 2)), descriptors=np.empty((0, 256)), scores=np.empty((0,)))
|
||||
|
||||
preprocessed = self._preprocess_image(image)
|
||||
kpts, desc, scores = self._run_superpoint_inference(preprocessed)
|
||||
|
||||
keep_indices = self._apply_nms(kpts, scores, nms_radius=4)
|
||||
|
||||
return Features(
|
||||
keypoints=kpts[keep_indices],
|
||||
descriptors=desc[keep_indices],
|
||||
scores=scores[keep_indices]
|
||||
)
|
||||
|
||||
# --- Feature Matching (07.02) ---
|
||||
|
||||
def _prepare_features_for_lightglue(self, features: Features) -> Dict[str, Any]:
|
||||
# In a real implementation, this would convert numpy arrays to torch tensors
|
||||
# on the correct device (e.g., 'cuda').
|
||||
return {
|
||||
'keypoints': features.keypoints,
|
||||
'descriptors': features.descriptors,
|
||||
'image_size': np.array([1920, 1080]) # Placeholder size
|
||||
}
|
||||
|
||||
def _run_lightglue_inference(self, features1_dict: Dict, features2_dict: Dict) -> Tuple[np.ndarray, np.ndarray]:
|
||||
if self.model_manager and hasattr(self.model_manager, 'run_lightglue'):
|
||||
return self.model_manager.run_lightglue(features1_dict, features2_dict)
|
||||
|
||||
# Functional Classical CV Fallback (BFMatcher)
|
||||
# Extract the original 128 dimensions (ignoring the padding added in the SIFT fallback)
|
||||
desc1 = features1_dict['descriptors'][:, :128].astype(np.float32)
|
||||
desc2 = features2_dict['descriptors'][:, :128].astype(np.float32)
|
||||
|
||||
if len(desc1) == 0 or len(desc2) == 0:
|
||||
return np.empty((0, 2), dtype=int), np.empty((0,))
|
||||
|
||||
matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
|
||||
raw_matches = matcher.match(desc1, desc2)
|
||||
|
||||
if not raw_matches:
|
||||
return np.empty((0, 2), dtype=int), np.empty((0,))
|
||||
|
||||
match_indices = np.array([[m.queryIdx, m.trainIdx] for m in raw_matches])
|
||||
|
||||
# Map L2 distances into a [0, 1] confidence score so our filter doesn't reject them
|
||||
distances = np.array([m.distance for m in raw_matches])
|
||||
scores = np.exp(-distances / 100.0).astype(np.float32)
|
||||
|
||||
return match_indices, scores
|
||||
|
||||
def _filter_matches_by_confidence(self, matches: np.ndarray, scores: np.ndarray, threshold: float) -> Tuple[np.ndarray, np.ndarray]:
|
||||
keep = scores > threshold
|
||||
return matches[keep], scores[keep]
|
||||
|
||||
def _extract_matched_keypoints(self, features1: Features, features2: Features, match_indices: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
kpts1 = features1.keypoints[match_indices[:, 0]]
|
||||
kpts2 = features2.keypoints[match_indices[:, 1]]
|
||||
return kpts1, kpts2
|
||||
|
||||
def match_features(self, features1: Features, features2: Features) -> Matches:
|
||||
f1_lg = self._prepare_features_for_lightglue(features1)
|
||||
f2_lg = self._prepare_features_for_lightglue(features2)
|
||||
|
||||
raw_matches, raw_scores = self._run_lightglue_inference(f1_lg, f2_lg)
|
||||
|
||||
# Confidence threshold from LightGlue paper is often around 0.9
|
||||
filtered_matches, filtered_scores = self._filter_matches_by_confidence(raw_matches, raw_scores, 0.1)
|
||||
|
||||
kpts1, kpts2 = self._extract_matched_keypoints(features1, features2, filtered_matches)
|
||||
|
||||
return Matches(matches=filtered_matches, scores=filtered_scores, keypoints1=kpts1, keypoints2=kpts2)
|
||||
|
||||
# --- Relative Pose Computation (07.03) ---
|
||||
|
||||
def _get_camera_matrix(self, camera_params: CameraParameters) -> np.ndarray:
|
||||
w = camera_params.resolution.get("width", 1920)
|
||||
h = camera_params.resolution.get("height", 1080)
|
||||
f_mm = camera_params.focal_length_mm
|
||||
sw_mm = camera_params.sensor_width_mm
|
||||
f_px = (f_mm / sw_mm) * w if sw_mm > 0 else w
|
||||
return np.array([
|
||||
[f_px, 0.0, w / 2.0],
|
||||
[0.0, f_px, h / 2.0],
|
||||
[0.0, 0.0, 1.0]
|
||||
], dtype=np.float64)
|
||||
|
||||
def _normalize_keypoints(self, keypoints: np.ndarray, camera_params: CameraParameters) -> np.ndarray:
|
||||
K = self._get_camera_matrix(camera_params)
|
||||
fx, fy = K[0, 0], K[1, 1]
|
||||
cx, cy = K[0, 2], K[1, 2]
|
||||
|
||||
normalized = np.empty_like(keypoints, dtype=np.float64)
|
||||
if len(keypoints) > 0:
|
||||
normalized[:, 0] = (keypoints[:, 0] - cx) / fx
|
||||
normalized[:, 1] = (keypoints[:, 1] - cy) / fy
|
||||
return normalized
|
||||
|
||||
def _estimate_essential_matrix(self, points1: np.ndarray, points2: np.ndarray, K: np.ndarray) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
|
||||
if len(points1) < 8 or len(points2) < 8:
|
||||
return None, None
|
||||
E, mask = cv2.findEssentialMat(points1, points2, K, method=cv2.RANSAC, prob=0.999, threshold=1.0)
|
||||
return E, mask
|
||||
|
||||
def _decompose_essential_matrix(self, E: np.ndarray, points1: np.ndarray, points2: np.ndarray, K: np.ndarray) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
|
||||
if E is None or E.shape != (3, 3):
|
||||
return None, None
|
||||
_, R, t, mask = cv2.recoverPose(E, points1, points2, K)
|
||||
return R, t
|
||||
|
||||
def _compute_tracking_quality(self, inlier_count: int, total_matches: int) -> Tuple[float, bool]:
|
||||
if total_matches == 0:
|
||||
return 0.0, False
|
||||
|
||||
inlier_ratio = inlier_count / total_matches
|
||||
confidence = min(1.0, inlier_ratio * (inlier_count / 100.0))
|
||||
|
||||
if inlier_count > 50 and inlier_ratio > 0.5:
|
||||
return float(confidence), True
|
||||
elif inlier_count >= 20:
|
||||
return float(confidence * 0.5), True # Degraded
|
||||
return 0.0, False # Lost
|
||||
|
||||
def _build_relative_pose(self, motion: Motion, matches: Matches) -> RelativePose:
|
||||
confidence, tracking_good = self._compute_tracking_quality(motion.inlier_count, len(matches.matches))
|
||||
return RelativePose(
|
||||
translation=motion.translation.flatten(),
|
||||
rotation=motion.rotation,
|
||||
confidence=confidence,
|
||||
inlier_count=motion.inlier_count,
|
||||
total_matches=len(matches.matches),
|
||||
tracking_good=tracking_good,
|
||||
scale_ambiguous=True
|
||||
)
|
||||
|
||||
def estimate_motion(self, matches: Matches, camera_params: CameraParameters) -> Optional[Motion]:
|
||||
if len(matches.matches) < 8:
|
||||
return None
|
||||
|
||||
K = self._get_camera_matrix(camera_params)
|
||||
pts1, pts2 = matches.keypoints1, matches.keypoints2
|
||||
|
||||
E, mask = self._estimate_essential_matrix(pts1, pts2, K)
|
||||
R, t = self._decompose_essential_matrix(E, pts1, pts2, K)
|
||||
if R is None or t is None:
|
||||
return None
|
||||
|
||||
inliers = mask.flatten() == 1 if mask is not None else np.zeros(len(pts1), dtype=bool)
|
||||
return Motion(translation=t, rotation=R, inliers=inliers, inlier_count=int(np.sum(inliers)))
|
||||
|
||||
def compute_relative_pose(self, prev_image: np.ndarray, curr_image: np.ndarray, camera_params: Optional[CameraParameters] = None) -> Optional[RelativePose]:
|
||||
if camera_params is None:
|
||||
camera_params = CameraParameters(focal_length_mm=25.0, sensor_width_mm=36.0, resolution={"width": 1920, "height": 1080})
|
||||
|
||||
feat1 = self.extract_features(prev_image)
|
||||
feat2 = self.extract_features(curr_image)
|
||||
|
||||
matches = self.match_features(feat1, feat2)
|
||||
motion = self.estimate_motion(matches, camera_params)
|
||||
|
||||
if motion is None:
|
||||
return None
|
||||
return self._build_relative_pose(motion, matches)
|
||||
Reference in New Issue
Block a user