merge main

2026-06-21 07:31:12 +00:00 · 2025-12-10 19:34:01 +02:00
parent fd75243a84 9d0bfd18a4
commit 749c8e674d
4 changed files with 386 additions and 0 deletions
@@ -0,0 +1,129 @@
+import numpy as np
+import math
+
+class GeoTracker:
+    def __init__(self, focal_length_mm, sensor_width_mm, image_width_px, image_height_px):
+        # Intrinsics
+        self.fx = (focal_length_mm / sensor_width_mm) * image_width_px
+        self.fy = self.fx
+        self.cx = image_width_px / 2.0
+        self.cy = image_height_px / 2.0
+        self.image_w = image_width_px
+        self.image_h = image_height_px
+
+        # Standard Camera Matrix
+        self.K = np.array([
+            [self.fx, 0, self.cx],
+            [0, self.fy, self.cy],
+            [0, 0, 1]
+        ])
+        self.K_inv = np.linalg.inv(self.K)
+
+    def _get_rotation_matrix(self, yaw, pitch, roll):
+        # Converts NED Euler angles to Rotation Matrix (Body to NED)
+        # Using ZYX convention
+        y, p, r = np.radians(yaw), np.radians(pitch), np.radians(roll)
+        
+        Rz = np.array([[np.cos(y), -np.sin(y), 0], [np.sin(y), np.cos(y), 0], [0, 0, 1]])
+        Ry = np.array([[np.cos(p), 0, np.sin(p)], [0, 1, 0], [-np.sin(p), 0, np.cos(p)]])
+        Rx = np.array([[1, 0, 0], [0, np.cos(r), -np.sin(r)], [0, np.sin(r), np.cos(r)]])
+        
+        R_body_to_ned = Rz @ Ry @ Rx
+        
+        # Camera Frame Alignment (Camera Z is Forward, NED X is North)
+        # Standard assumption: Camera is mounted Forward-Right-Down relative to Body
+        # But usually for drones, Camera Z (Optical) aligns with Body X (Forward) or Body Z (Down)
+        # Assuming standard "Forward Looking" camera:
+        # Cam Z -> Body X, Cam X -> Body Y, Cam Y -> Body Z
+        T_cam_to_body = np.array([
+            [0, 0, 1],
+            [1, 0, 0],
+            [0, 1, 0]
+        ])
+        
+        return R_body_to_ned @ T_cam_to_body
+
+    def pixel_to_ned_ground(self, u, v, altitude, R_cam_to_ned):
+        """
+        Ray-plane intersection. 
+        Projects pixel (u,v) to the ground plane (z=0) in NED frame.
+        """
+        # 1. Pixel to Normalized Camera Ray
+        pixel_homo = np.array([u, v, 1.0])
+        ray_cam = self.K_inv @ pixel_homo
+        
+        # 2. Rotate Ray to NED Frame
+        ray_ned = R_cam_to_ned @ ray_cam
+        
+        # 3. Intersect with Ground (Z = 0)
+        # Camera is at [0, 0, -altitude] in NED
+        # Ray equation: P = O + t * D
+        # We want P.z = 0
+        # 0 = -altitude + t * ray_ned.z
+        # t = altitude / ray_ned.z
+        
+        if ray_ned[2] <= 0: # Ray pointing up or parallel to horizon
+            return None
+
+        t = altitude / ray_ned[2]
+        
+        # P_ground relative to camera position (which is 0,0 in horizontal)
+        north = t * ray_ned[0]
+        east = t * ray_ned[1]
+        
+        return np.array([north, east])
+
+    def calculate_movement(self, kpts0, kpts1, altitude, yaw, start_lat, start_lon):
+        """
+        Calculates the drone's movement based on optical flow on the ground.
+        Returns dictionary with shift in meters and new lat/lon.
+        """
+        # Rotation Matrix (Assuming simplified Pitch=-90 for Nadir or 0 for Forward)
+        # For a downward facing drone, Pitch is usually -90. 
+        # For this example, we assume the camera is stabilized or we use the passed yaw.
+        # We will assume a gimbal pitch of -90 (looking down) if not specified, 
+        # or -45 if looking forward. 
+        # Let's assume standard Nadir view for mapping (Pitch = -90).
+        R = self._get_rotation_matrix(yaw, -90, 0)
+
+        shifts_n = []
+        shifts_e = []
+
+        # Process a subset of points to save time
+        step = max(1, len(kpts0) // 50) 
+        
+        for i in range(0, len(kpts0), step):
+            p0 = self.pixel_to_ned_ground(kpts0[i][0], kpts0[i][1], altitude, R)
+            p1 = self.pixel_to_ned_ground(kpts1[i][0], kpts1[i][1], altitude, R)
+
+            if p0 is not None and p1 is not None:
+                # If the image moved LEFT, the drone moved RIGHT.
+                # If pixel 0 is at (10,10) and pixel 1 is at (5,5), the world moved "back" relative to camera.
+                # Drone movement = P1_world - P0_world? 
+                # NO. If we are tracking static ground, P0 and P1 correspond to the SAME physical point.
+                # So the Ground Point relative to Camera 0 is X.
+                # The Ground Point relative to Camera 1 is Y.
+                # Camera Movement = X - Y.
+                diff = p0 - p1
+                shifts_n.append(diff[0])
+                shifts_e.append(diff[1])
+
+        if not shifts_n:
+            return None
+
+        # Robust average (Median) to reject outliers
+        avg_n = np.median(shifts_n)
+        avg_e = np.median(shifts_e)
+
+        # Convert Meters to Lat/Lon
+        # Earth Radius approx 6,378,137m
+        d_lat = (avg_n / 6378137.0) * (180 / math.pi)
+        d_lon = (avg_e / (6378137.0 * math.cos(math.radians(start_lat)))) * (180 / math.pi)
+
+        return {
+            'move_north_m': avg_n,
+            'move_east_m': avg_e,
+            'new_lat': start_lat + d_lat,
+            'new_lon': start_lon + d_lon,
+            'ground_points': len(shifts_n)
+        }
@@ -0,0 +1,100 @@
+import matplotlib.pyplot as plt
+from lightglue import viz2d
+import numpy as np
+import cv2
+import time
+
+# --- IMPORTS ---
+# Assumes your matcher class is in 'lightglue_onnx.py'
+from lightglue_onnx import LightGlueMatcher
+from geolocator import GeoTracker
+
+# --- CONFIGURATION ---
+IMAGE_0_PATH = "gps-denied-ai-sandbox/docs/00_problem/input_data/AD000001.jpg"
+IMAGE_1_PATH = "gps-denied-ai-sandbox/docs/00_problem/input_data/AD000002.jpg"
+MODEL_PATH = "superpoint_lightglue_end2end_fused_gpu.onnx"
+
+# FLIGHT DATA (Frame 0 Telemetry)
+ALTITUDE = 100.0   # Meters
+HEADING = 0.0      # Degrees (0 = North)
+START_LAT = 48.275292
+START_LON = 37.385220
+
+# CAMERA SPECS
+FOCAL_MM = 25.0
+SENSOR_MM = 23.5
+W_PX = 1920
+H_PX = 1280
+
+# Matcher Settings
+PROCESS_DIM = 1024  # Size to resize images for processing
+
+def main():
+    # 1. SETUP
+    print("1. Initializing Systems...")
+    matcher = LightGlueMatcher(MODEL_PATH, max_dimension=PROCESS_DIM)
+    geo = GeoTracker(FOCAL_MM, SENSOR_MM, W_PX, H_PX)
+
+    # 2. MATCHING
+    print(f"2. Matching Images...")
+    start_t = time.time()
+    
+    # YOUR MATCHER RETURNS: F, mkpts0, mkpts1
+    # It already handles scaling internally!
+    F, kpts0, kpts1 = matcher.match(IMAGE_0_PATH, IMAGE_1_PATH)
+    
+    dt = time.time() - start_t
+    
+    # Check if matching failed (Your matcher returns empty arrays if <8 matches)
+    if len(kpts0) == 0:
+        print("[!] Not enough matches found.")
+        return
+
+    print(f"   -> Success! Found {len(kpts0)} matches in {dt:.2f}s")
+
+    # 3. GEOLOCATION CALCULATION
+    print("3. Calculating Coordinates...")
+    nav_result = geo.calculate_movement(
+        kpts0, kpts1, 
+        altitude=ALTITUDE, 
+        yaw=HEADING, 
+        start_lat=START_LAT, 
+        start_lon=START_LON
+    )
+
+    # 4. REPORTING
+    title = "Calculation Failed"
+    
+    if nav_result:
+        print("\n" + "="*45)
+        print("  🛰️  VISUAL ODOMETRY RESULT")
+        print("="*45)
+        print(f"  Start Pos:   {START_LAT:.6f}, {START_LON:.6f}")
+        print("-" * 45)
+        print(f"  Detected Shift:  North {nav_result['move_north_m']:.2f} m")
+        print(f"                   East  {nav_result['move_east_m']:.2f} m")
+        print("-" * 45)
+        print(f"  NEW LATITUDE:    {nav_result['new_lat']:.8f}")  # <--- RESULT
+        print(f"  NEW LONGITUDE:   {nav_result['new_lon']:.8f}")  # <--- RESULT
+        print("="*45 + "\n")
+        
+        title = (f"Shift: N {nav_result['move_north_m']:.1f}m, E {nav_result['move_east_m']:.1f}m\n"
+                 f"Lat: {nav_result['new_lat']:.6f}, Lon: {nav_result['new_lon']:.6f}")
+    else:
+        print("[!] Math failed (likely bad geometry or points at infinity).")
+
+    # 5. VISUALIZATION
+    # We must load images manually because your matcher doesn't return them
+    img0 = cv2.imread(IMAGE_0_PATH)
+    img1 = cv2.imread(IMAGE_1_PATH)
+    # Convert BGR to RGB for matplotlib
+    img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB)
+    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
+
+    viz2d.plot_images([img0, img1])
+    viz2d.plot_matches(kpts0, kpts1, color='lime')
+    plt.suptitle(title, fontsize=14, backgroundcolor='white')
+    plt.show()
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,110 @@
+import cv2
+import numpy as np
+import onnxruntime as ort
+import os
+
+class LightGlueMatcher:
+    def __init__(self, onnx_model_path, max_dimension=512):
+        """
+        Initializes the ONNX runtime session.
+        
+        Args:
+            onnx_model_path (str): Path to the .onnx model file.
+            max_dimension (int): Maximum edge length for resizing.
+        """
+        self.max_dim = max_dimension
+        
+        if not os.path.exists(onnx_model_path):
+            raise FileNotFoundError(f"Model not found at: {onnx_model_path}")
+
+        # Setup ONNX Runtime
+        if 'CUDAExecutionProvider' in ort.get_available_providers():
+            self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+            print("LightGlueMatcher: Using CUDA (GPU).")
+        else:
+            self.providers = ['CPUExecutionProvider']
+            print("LightGlueMatcher: Using CPU.")
+
+        sess_options = ort.SessionOptions()
+        self.session = ort.InferenceSession(onnx_model_path, sess_options, providers=self.providers)
+        
+        # Cache input/output names
+        self.input_names = [inp.name for inp in self.session.get_inputs()]
+        self.output_names = [out.name for out in self.session.get_outputs()]
+
+    def _preprocess(self, img_input):
+        """
+        Internal helper: Resize and normalize image for ONNX.
+        Handles both file paths and numpy arrays.
+        """
+        # Load image if input is a path
+        if isinstance(img_input, str):
+            img_raw = cv2.imread(img_input, cv2.IMREAD_GRAYSCALE)
+            if img_raw is None:
+                raise FileNotFoundError(f"Could not load image at {img_input}")
+        elif isinstance(img_input, np.ndarray):
+            if len(img_input.shape) == 3:
+                img_raw = cv2.cvtColor(img_input, cv2.COLOR_BGR2GRAY)
+            else:
+                img_raw = img_input
+        else:
+            raise ValueError("Input must be an image path or numpy array.")
+
+        h, w = img_raw.shape
+        scale = self.max_dim / max(h, w)
+        
+        # Resize logic
+        if scale < 1:
+            h_new, w_new = int(round(h * scale)), int(round(w * scale))
+            img_resized = cv2.resize(img_raw, (w_new, h_new), interpolation=cv2.INTER_AREA)
+        else:
+            scale = 1.0
+            img_resized = img_raw
+
+        # Normalize and reshape (1, 1, H, W)
+        img_normalized = img_resized.astype(np.float32) / 255.0
+        img_tensor = img_normalized[None, None]
+        
+        return img_tensor, scale
+
+    def match(self, img0_input, img1_input):
+        """
+        Matches two images and returns the Fundamental Matrix and Keypoints.
+
+        Returns:
+            F (np.ndarray): The 3x3 Fundamental Matrix (or None if not enough matches).
+            mkpts0 (np.ndarray): Matched keypoints in Image 0 (N, 2).
+            mkpts1 (np.ndarray): Matched keypoints in Image 1 (N, 2).
+        """
+        # Preprocess
+        tensor0, scale0 = self._preprocess(img0_input)
+        tensor1, scale1 = self._preprocess(img1_input)
+
+        # Inference
+        inputs = {
+            self.input_names[0]: tensor0,
+            self.input_names[1]: tensor1
+        }
+        outputs = self.session.run(None, inputs)
+        outputs_dict = dict(zip(self.output_names, outputs))
+
+        # Extract Raw Matches
+        kpts0 = outputs_dict['kpts0'].squeeze(0)
+        kpts1 = outputs_dict['kpts1'].squeeze(0)
+        matches0 = outputs_dict['matches0']
+
+        # Filter and Rescale Keypoints
+        if len(matches0) < 8:
+            print("Not enough matches to compute matrix.")
+            return None, np.array([]), np.array([])
+
+        mkpts0 = kpts0[matches0[:, 0]] / scale0
+        mkpts1 = kpts1[matches0[:, 1]] / scale1
+
+        # Calculate Fundamental Matrix (F) using RANSAC
+        # This is usually "The Matrix" required for relative pose estimation
+        F, mask = cv2.findFundamentalMat(mkpts0, mkpts1, cv2.FM_RANSAC, 3, 0.99)
+        
+        return F, mkpts0, mkpts1
+
+
@@ -0,0 +1,47 @@
+coloredlogs==15.0.1
+contourpy==1.3.3
+cycler==0.12.1
+filelock==3.20.0
+flatbuffers==25.9.23
+fonttools==4.60.1
+fsspec==2025.10.0
+humanfriendly==10.0
+Jinja2==3.1.6
+kiwisolver==1.4.9
+kornia==0.8.2
+kornia_rs==0.1.10
+-e git+https://github.com/cvg/LightGlue/@746fac2c042e05d1865315b1413419f1c1e7ba55#egg=lightglue
+MarkupSafe==3.0.3
+matplotlib==3.10.7
+mpmath==1.3.0
+networkx==3.5
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+onnxruntime-gpu==1.23.2
+opencv-python==4.12.0.88
+packaging==25.0
+pillow==12.0.0
+protobuf==6.33.0
+pyparsing==3.2.5
+python-dateutil==2.9.0.post0
+setuptools==80.9.0
+six==1.17.0
+sympy==1.14.0
+torch==2.9.0
+torchvision==0.24.0
+triton==3.5.0
+typing_extensions==4.15.0