diff --git a/docs/02_components/07_sequential_visual_odometry/geolocator.py b/docs/02_components/07_sequential_visual_odometry/geolocator.py new file mode 100644 index 0000000..91a5a9b --- /dev/null +++ b/docs/02_components/07_sequential_visual_odometry/geolocator.py @@ -0,0 +1,129 @@ +import numpy as np +import math + +class GeoTracker: + def __init__(self, focal_length_mm, sensor_width_mm, image_width_px, image_height_px): + # Intrinsics + self.fx = (focal_length_mm / sensor_width_mm) * image_width_px + self.fy = self.fx + self.cx = image_width_px / 2.0 + self.cy = image_height_px / 2.0 + self.image_w = image_width_px + self.image_h = image_height_px + + # Standard Camera Matrix + self.K = np.array([ + [self.fx, 0, self.cx], + [0, self.fy, self.cy], + [0, 0, 1] + ]) + self.K_inv = np.linalg.inv(self.K) + + def _get_rotation_matrix(self, yaw, pitch, roll): + # Converts NED Euler angles to Rotation Matrix (Body to NED) + # Using ZYX convention + y, p, r = np.radians(yaw), np.radians(pitch), np.radians(roll) + + Rz = np.array([[np.cos(y), -np.sin(y), 0], [np.sin(y), np.cos(y), 0], [0, 0, 1]]) + Ry = np.array([[np.cos(p), 0, np.sin(p)], [0, 1, 0], [-np.sin(p), 0, np.cos(p)]]) + Rx = np.array([[1, 0, 0], [0, np.cos(r), -np.sin(r)], [0, np.sin(r), np.cos(r)]]) + + R_body_to_ned = Rz @ Ry @ Rx + + # Camera Frame Alignment (Camera Z is Forward, NED X is North) + # Standard assumption: Camera is mounted Forward-Right-Down relative to Body + # But usually for drones, Camera Z (Optical) aligns with Body X (Forward) or Body Z (Down) + # Assuming standard "Forward Looking" camera: + # Cam Z -> Body X, Cam X -> Body Y, Cam Y -> Body Z + T_cam_to_body = np.array([ + [0, 0, 1], + [1, 0, 0], + [0, 1, 0] + ]) + + return R_body_to_ned @ T_cam_to_body + + def pixel_to_ned_ground(self, u, v, altitude, R_cam_to_ned): + """ + Ray-plane intersection. + Projects pixel (u,v) to the ground plane (z=0) in NED frame. + """ + # 1. Pixel to Normalized Camera Ray + pixel_homo = np.array([u, v, 1.0]) + ray_cam = self.K_inv @ pixel_homo + + # 2. Rotate Ray to NED Frame + ray_ned = R_cam_to_ned @ ray_cam + + # 3. Intersect with Ground (Z = 0) + # Camera is at [0, 0, -altitude] in NED + # Ray equation: P = O + t * D + # We want P.z = 0 + # 0 = -altitude + t * ray_ned.z + # t = altitude / ray_ned.z + + if ray_ned[2] <= 0: # Ray pointing up or parallel to horizon + return None + + t = altitude / ray_ned[2] + + # P_ground relative to camera position (which is 0,0 in horizontal) + north = t * ray_ned[0] + east = t * ray_ned[1] + + return np.array([north, east]) + + def calculate_movement(self, kpts0, kpts1, altitude, yaw, start_lat, start_lon): + """ + Calculates the drone's movement based on optical flow on the ground. + Returns dictionary with shift in meters and new lat/lon. + """ + # Rotation Matrix (Assuming simplified Pitch=-90 for Nadir or 0 for Forward) + # For a downward facing drone, Pitch is usually -90. + # For this example, we assume the camera is stabilized or we use the passed yaw. + # We will assume a gimbal pitch of -90 (looking down) if not specified, + # or -45 if looking forward. + # Let's assume standard Nadir view for mapping (Pitch = -90). + R = self._get_rotation_matrix(yaw, -90, 0) + + shifts_n = [] + shifts_e = [] + + # Process a subset of points to save time + step = max(1, len(kpts0) // 50) + + for i in range(0, len(kpts0), step): + p0 = self.pixel_to_ned_ground(kpts0[i][0], kpts0[i][1], altitude, R) + p1 = self.pixel_to_ned_ground(kpts1[i][0], kpts1[i][1], altitude, R) + + if p0 is not None and p1 is not None: + # If the image moved LEFT, the drone moved RIGHT. + # If pixel 0 is at (10,10) and pixel 1 is at (5,5), the world moved "back" relative to camera. + # Drone movement = P1_world - P0_world? + # NO. If we are tracking static ground, P0 and P1 correspond to the SAME physical point. + # So the Ground Point relative to Camera 0 is X. + # The Ground Point relative to Camera 1 is Y. + # Camera Movement = X - Y. + diff = p0 - p1 + shifts_n.append(diff[0]) + shifts_e.append(diff[1]) + + if not shifts_n: + return None + + # Robust average (Median) to reject outliers + avg_n = np.median(shifts_n) + avg_e = np.median(shifts_e) + + # Convert Meters to Lat/Lon + # Earth Radius approx 6,378,137m + d_lat = (avg_n / 6378137.0) * (180 / math.pi) + d_lon = (avg_e / (6378137.0 * math.cos(math.radians(start_lat)))) * (180 / math.pi) + + return { + 'move_north_m': avg_n, + 'move_east_m': avg_e, + 'new_lat': start_lat + d_lat, + 'new_lon': start_lon + d_lon, + 'ground_points': len(shifts_n) + } \ No newline at end of file diff --git a/docs/02_components/07_sequential_visual_odometry/gps_denied.py b/docs/02_components/07_sequential_visual_odometry/gps_denied.py new file mode 100644 index 0000000..d6f0bc1 --- /dev/null +++ b/docs/02_components/07_sequential_visual_odometry/gps_denied.py @@ -0,0 +1,100 @@ +import matplotlib.pyplot as plt +from lightglue import viz2d +import numpy as np +import cv2 +import time + +# --- IMPORTS --- +# Assumes your matcher class is in 'lightglue_onnx.py' +from lightglue_onnx import LightGlueMatcher +from geolocator import GeoTracker + +# --- CONFIGURATION --- +IMAGE_0_PATH = "gps-denied-ai-sandbox/docs/00_problem/input_data/AD000001.jpg" +IMAGE_1_PATH = "gps-denied-ai-sandbox/docs/00_problem/input_data/AD000002.jpg" +MODEL_PATH = "superpoint_lightglue_end2end_fused_gpu.onnx" + +# FLIGHT DATA (Frame 0 Telemetry) +ALTITUDE = 100.0 # Meters +HEADING = 0.0 # Degrees (0 = North) +START_LAT = 48.275292 +START_LON = 37.385220 + +# CAMERA SPECS +FOCAL_MM = 25.0 +SENSOR_MM = 23.5 +W_PX = 1920 +H_PX = 1280 + +# Matcher Settings +PROCESS_DIM = 1024 # Size to resize images for processing + +def main(): + # 1. SETUP + print("1. Initializing Systems...") + matcher = LightGlueMatcher(MODEL_PATH, max_dimension=PROCESS_DIM) + geo = GeoTracker(FOCAL_MM, SENSOR_MM, W_PX, H_PX) + + # 2. MATCHING + print(f"2. Matching Images...") + start_t = time.time() + + # YOUR MATCHER RETURNS: F, mkpts0, mkpts1 + # It already handles scaling internally! + F, kpts0, kpts1 = matcher.match(IMAGE_0_PATH, IMAGE_1_PATH) + + dt = time.time() - start_t + + # Check if matching failed (Your matcher returns empty arrays if <8 matches) + if len(kpts0) == 0: + print("[!] Not enough matches found.") + return + + print(f" -> Success! Found {len(kpts0)} matches in {dt:.2f}s") + + # 3. GEOLOCATION CALCULATION + print("3. Calculating Coordinates...") + nav_result = geo.calculate_movement( + kpts0, kpts1, + altitude=ALTITUDE, + yaw=HEADING, + start_lat=START_LAT, + start_lon=START_LON + ) + + # 4. REPORTING + title = "Calculation Failed" + + if nav_result: + print("\n" + "="*45) + print(" 🛰️ VISUAL ODOMETRY RESULT") + print("="*45) + print(f" Start Pos: {START_LAT:.6f}, {START_LON:.6f}") + print("-" * 45) + print(f" Detected Shift: North {nav_result['move_north_m']:.2f} m") + print(f" East {nav_result['move_east_m']:.2f} m") + print("-" * 45) + print(f" NEW LATITUDE: {nav_result['new_lat']:.8f}") # <--- RESULT + print(f" NEW LONGITUDE: {nav_result['new_lon']:.8f}") # <--- RESULT + print("="*45 + "\n") + + title = (f"Shift: N {nav_result['move_north_m']:.1f}m, E {nav_result['move_east_m']:.1f}m\n" + f"Lat: {nav_result['new_lat']:.6f}, Lon: {nav_result['new_lon']:.6f}") + else: + print("[!] Math failed (likely bad geometry or points at infinity).") + + # 5. VISUALIZATION + # We must load images manually because your matcher doesn't return them + img0 = cv2.imread(IMAGE_0_PATH) + img1 = cv2.imread(IMAGE_1_PATH) + # Convert BGR to RGB for matplotlib + img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB) + img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB) + + viz2d.plot_images([img0, img1]) + viz2d.plot_matches(kpts0, kpts1, color='lime') + plt.suptitle(title, fontsize=14, backgroundcolor='white') + plt.show() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/docs/02_components/07_sequential_visual_odometry/lightglue_onnx.py b/docs/02_components/07_sequential_visual_odometry/lightglue_onnx.py new file mode 100644 index 0000000..ac3009b --- /dev/null +++ b/docs/02_components/07_sequential_visual_odometry/lightglue_onnx.py @@ -0,0 +1,110 @@ +import cv2 +import numpy as np +import onnxruntime as ort +import os + +class LightGlueMatcher: + def __init__(self, onnx_model_path, max_dimension=512): + """ + Initializes the ONNX runtime session. + + Args: + onnx_model_path (str): Path to the .onnx model file. + max_dimension (int): Maximum edge length for resizing. + """ + self.max_dim = max_dimension + + if not os.path.exists(onnx_model_path): + raise FileNotFoundError(f"Model not found at: {onnx_model_path}") + + # Setup ONNX Runtime + if 'CUDAExecutionProvider' in ort.get_available_providers(): + self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + print("LightGlueMatcher: Using CUDA (GPU).") + else: + self.providers = ['CPUExecutionProvider'] + print("LightGlueMatcher: Using CPU.") + + sess_options = ort.SessionOptions() + self.session = ort.InferenceSession(onnx_model_path, sess_options, providers=self.providers) + + # Cache input/output names + self.input_names = [inp.name for inp in self.session.get_inputs()] + self.output_names = [out.name for out in self.session.get_outputs()] + + def _preprocess(self, img_input): + """ + Internal helper: Resize and normalize image for ONNX. + Handles both file paths and numpy arrays. + """ + # Load image if input is a path + if isinstance(img_input, str): + img_raw = cv2.imread(img_input, cv2.IMREAD_GRAYSCALE) + if img_raw is None: + raise FileNotFoundError(f"Could not load image at {img_input}") + elif isinstance(img_input, np.ndarray): + if len(img_input.shape) == 3: + img_raw = cv2.cvtColor(img_input, cv2.COLOR_BGR2GRAY) + else: + img_raw = img_input + else: + raise ValueError("Input must be an image path or numpy array.") + + h, w = img_raw.shape + scale = self.max_dim / max(h, w) + + # Resize logic + if scale < 1: + h_new, w_new = int(round(h * scale)), int(round(w * scale)) + img_resized = cv2.resize(img_raw, (w_new, h_new), interpolation=cv2.INTER_AREA) + else: + scale = 1.0 + img_resized = img_raw + + # Normalize and reshape (1, 1, H, W) + img_normalized = img_resized.astype(np.float32) / 255.0 + img_tensor = img_normalized[None, None] + + return img_tensor, scale + + def match(self, img0_input, img1_input): + """ + Matches two images and returns the Fundamental Matrix and Keypoints. + + Returns: + F (np.ndarray): The 3x3 Fundamental Matrix (or None if not enough matches). + mkpts0 (np.ndarray): Matched keypoints in Image 0 (N, 2). + mkpts1 (np.ndarray): Matched keypoints in Image 1 (N, 2). + """ + # Preprocess + tensor0, scale0 = self._preprocess(img0_input) + tensor1, scale1 = self._preprocess(img1_input) + + # Inference + inputs = { + self.input_names[0]: tensor0, + self.input_names[1]: tensor1 + } + outputs = self.session.run(None, inputs) + outputs_dict = dict(zip(self.output_names, outputs)) + + # Extract Raw Matches + kpts0 = outputs_dict['kpts0'].squeeze(0) + kpts1 = outputs_dict['kpts1'].squeeze(0) + matches0 = outputs_dict['matches0'] + + # Filter and Rescale Keypoints + if len(matches0) < 8: + print("Not enough matches to compute matrix.") + return None, np.array([]), np.array([]) + + mkpts0 = kpts0[matches0[:, 0]] / scale0 + mkpts1 = kpts1[matches0[:, 1]] / scale1 + + # Calculate Fundamental Matrix (F) using RANSAC + # This is usually "The Matrix" required for relative pose estimation + F, mask = cv2.findFundamentalMat(mkpts0, mkpts1, cv2.FM_RANSAC, 3, 0.99) + + return F, mkpts0, mkpts1 + + diff --git a/docs/02_components/07_sequential_visual_odometry/requirements.txt b/docs/02_components/07_sequential_visual_odometry/requirements.txt new file mode 100644 index 0000000..c3d2f5d --- /dev/null +++ b/docs/02_components/07_sequential_visual_odometry/requirements.txt @@ -0,0 +1,47 @@ +coloredlogs==15.0.1 +contourpy==1.3.3 +cycler==0.12.1 +filelock==3.20.0 +flatbuffers==25.9.23 +fonttools==4.60.1 +fsspec==2025.10.0 +humanfriendly==10.0 +Jinja2==3.1.6 +kiwisolver==1.4.9 +kornia==0.8.2 +kornia_rs==0.1.10 +-e git+https://github.com/cvg/LightGlue/@746fac2c042e05d1865315b1413419f1c1e7ba55#egg=lightglue +MarkupSafe==3.0.3 +matplotlib==3.10.7 +mpmath==1.3.0 +networkx==3.5 +numpy==2.2.6 +nvidia-cublas-cu12==12.8.4.1 +nvidia-cuda-cupti-cu12==12.8.90 +nvidia-cuda-nvrtc-cu12==12.8.93 +nvidia-cuda-runtime-cu12==12.8.90 +nvidia-cudnn-cu12==9.10.2.21 +nvidia-cufft-cu12==11.3.3.83 +nvidia-cufile-cu12==1.13.1.3 +nvidia-curand-cu12==10.3.9.90 +nvidia-cusolver-cu12==11.7.3.90 +nvidia-cusparse-cu12==12.5.8.93 +nvidia-cusparselt-cu12==0.7.1 +nvidia-nccl-cu12==2.27.5 +nvidia-nvjitlink-cu12==12.8.93 +nvidia-nvshmem-cu12==3.3.20 +nvidia-nvtx-cu12==12.8.90 +onnxruntime-gpu==1.23.2 +opencv-python==4.12.0.88 +packaging==25.0 +pillow==12.0.0 +protobuf==6.33.0 +pyparsing==3.2.5 +python-dateutil==2.9.0.post0 +setuptools==80.9.0 +six==1.17.0 +sympy==1.14.0 +torch==2.9.0 +torchvision==0.24.0 +triton==3.5.0 +typing_extensions==4.15.0