component 07

2026-06-21 07:21:12 +00:00 · 2025-12-01 20:21:48 +01:00
parent 54be35fde7
commit 25e58c7474
4 changed files with 386 additions and 0 deletions
@@ -0,0 +1,129 @@
 import numpy as np
 import math
 class GeoTracker:
    def __init__(self, focal_length_mm, sensor_width_mm, image_width_px, image_height_px):
        # Intrinsics
        self.fx = (focal_length_mm / sensor_width_mm) * image_width_px
        self.fy = self.fx
        self.cx = image_width_px / 2.0
        self.cy = image_height_px / 2.0
        self.image_w = image_width_px
        self.image_h = image_height_px
        # Standard Camera Matrix
        self.K = np.array([
            [self.fx, 0, self.cx],
            [0, self.fy, self.cy],
            [0, 0, 1]
        ])
        self.K_inv = np.linalg.inv(self.K)
    def _get_rotation_matrix(self, yaw, pitch, roll):
        # Converts NED Euler angles to Rotation Matrix (Body to NED)
        # Using ZYX convention
        y, p, r = np.radians(yaw), np.radians(pitch), np.radians(roll)
        Rz = np.array([[np.cos(y), -np.sin(y), 0], [np.sin(y), np.cos(y), 0], [0, 0, 1]])
        Ry = np.array([[np.cos(p), 0, np.sin(p)], [0, 1, 0], [-np.sin(p), 0, np.cos(p)]])
        Rx = np.array([[1, 0, 0], [0, np.cos(r), -np.sin(r)], [0, np.sin(r), np.cos(r)]])
        R_body_to_ned = Rz @ Ry @ Rx
        # Camera Frame Alignment (Camera Z is Forward, NED X is North)
        # Standard assumption: Camera is mounted Forward-Right-Down relative to Body
        # But usually for drones, Camera Z (Optical) aligns with Body X (Forward) or Body Z (Down)
        # Assuming standard "Forward Looking" camera:
        # Cam Z -> Body X, Cam X -> Body Y, Cam Y -> Body Z
        T_cam_to_body = np.array([
            [0, 0, 1],
            [1, 0, 0],
            [0, 1, 0]
        ])
        return R_body_to_ned @ T_cam_to_body
    def pixel_to_ned_ground(self, u, v, altitude, R_cam_to_ned):
        """
        Ray-plane intersection. 
        Projects pixel (u,v) to the ground plane (z=0) in NED frame.
        """
        # 1. Pixel to Normalized Camera Ray
        pixel_homo = np.array([u, v, 1.0])
        ray_cam = self.K_inv @ pixel_homo
        # 2. Rotate Ray to NED Frame
        ray_ned = R_cam_to_ned @ ray_cam
        # 3. Intersect with Ground (Z = 0)
        # Camera is at [0, 0, -altitude] in NED
        # Ray equation: P = O + t * D
        # We want P.z = 0
        # 0 = -altitude + t * ray_ned.z
        # t = altitude / ray_ned.z
        if ray_ned[2] <= 0: # Ray pointing up or parallel to horizon
            return None
        t = altitude / ray_ned[2]
        # P_ground relative to camera position (which is 0,0 in horizontal)
        north = t * ray_ned[0]
        east = t * ray_ned[1]
        return np.array([north, east])
    def calculate_movement(self, kpts0, kpts1, altitude, yaw, start_lat, start_lon):
        """
        Calculates the drone's movement based on optical flow on the ground.
        Returns dictionary with shift in meters and new lat/lon.
        """
        # Rotation Matrix (Assuming simplified Pitch=-90 for Nadir or 0 for Forward)
        # For a downward facing drone, Pitch is usually -90. 
        # For this example, we assume the camera is stabilized or we use the passed yaw.
        # We will assume a gimbal pitch of -90 (looking down) if not specified, 
        # or -45 if looking forward. 
        # Let's assume standard Nadir view for mapping (Pitch = -90).
        R = self._get_rotation_matrix(yaw, -90, 0)
        shifts_n = []
        shifts_e = []
        # Process a subset of points to save time
        step = max(1, len(kpts0) // 50) 
        for i in range(0, len(kpts0), step):
            p0 = self.pixel_to_ned_ground(kpts0[i][0], kpts0[i][1], altitude, R)
            p1 = self.pixel_to_ned_ground(kpts1[i][0], kpts1[i][1], altitude, R)
            if p0 is not None and p1 is not None:
                # If the image moved LEFT, the drone moved RIGHT.
                # If pixel 0 is at (10,10) and pixel 1 is at (5,5), the world moved "back" relative to camera.
                # Drone movement = P1_world - P0_world? 
                # NO. If we are tracking static ground, P0 and P1 correspond to the SAME physical point.
                # So the Ground Point relative to Camera 0 is X.
                # The Ground Point relative to Camera 1 is Y.
                # Camera Movement = X - Y.
                diff = p0 - p1
                shifts_n.append(diff[0])
                shifts_e.append(diff[1])
        if not shifts_n:
            return None
        # Robust average (Median) to reject outliers
        avg_n = np.median(shifts_n)
        avg_e = np.median(shifts_e)
        # Convert Meters to Lat/Lon
        # Earth Radius approx 6,378,137m
        d_lat = (avg_n / 6378137.0) * (180 / math.pi)
        d_lon = (avg_e / (6378137.0 * math.cos(math.radians(start_lat)))) * (180 / math.pi)
        return {
            'move_north_m': avg_n,
            'move_east_m': avg_e,
            'new_lat': start_lat + d_lat,
            'new_lon': start_lon + d_lon,
            'ground_points': len(shifts_n)
        }
@@ -0,0 +1,100 @@
 import matplotlib.pyplot as plt
 from lightglue import viz2d
 import numpy as np
 import cv2
 import time
 # --- IMPORTS ---
 # Assumes your matcher class is in 'lightglue_onnx.py'
 from lightglue_onnx import LightGlueMatcher
 from geolocator import GeoTracker
 # --- CONFIGURATION ---
 IMAGE_0_PATH = "gps-denied-ai-sandbox/docs/00_problem/input_data/AD000001.jpg"
 IMAGE_1_PATH = "gps-denied-ai-sandbox/docs/00_problem/input_data/AD000002.jpg"
 MODEL_PATH = "superpoint_lightglue_end2end_fused_gpu.onnx"
 # FLIGHT DATA (Frame 0 Telemetry)
 ALTITUDE = 100.0   # Meters
 HEADING = 0.0      # Degrees (0 = North)
 START_LAT = 48.275292
 START_LON = 37.385220
 # CAMERA SPECS
 FOCAL_MM = 25.0
 SENSOR_MM = 23.5
 W_PX = 1920
 H_PX = 1280
 # Matcher Settings
 PROCESS_DIM = 1024  # Size to resize images for processing
 def main():
    # 1. SETUP
    print("1. Initializing Systems...")
    matcher = LightGlueMatcher(MODEL_PATH, max_dimension=PROCESS_DIM)
    geo = GeoTracker(FOCAL_MM, SENSOR_MM, W_PX, H_PX)
    # 2. MATCHING
    print(f"2. Matching Images...")
    start_t = time.time()
    # YOUR MATCHER RETURNS: F, mkpts0, mkpts1
    # It already handles scaling internally!
    F, kpts0, kpts1 = matcher.match(IMAGE_0_PATH, IMAGE_1_PATH)
    dt = time.time() - start_t
    # Check if matching failed (Your matcher returns empty arrays if <8 matches)
    if len(kpts0) == 0:
        print("[!] Not enough matches found.")
        return
    print(f"   -> Success! Found {len(kpts0)} matches in {dt:.2f}s")
    # 3. GEOLOCATION CALCULATION
    print("3. Calculating Coordinates...")
    nav_result = geo.calculate_movement(
        kpts0, kpts1, 
        altitude=ALTITUDE, 
        yaw=HEADING, 
        start_lat=START_LAT, 
        start_lon=START_LON
    )
    # 4. REPORTING
    title = "Calculation Failed"
    if nav_result:
        print("\n" + "="*45)
        print("  🛰️  VISUAL ODOMETRY RESULT")
        print("="*45)
        print(f"  Start Pos:   {START_LAT:.6f}, {START_LON:.6f}")
        print("-" * 45)
        print(f"  Detected Shift:  North {nav_result['move_north_m']:.2f} m")
        print(f"                   East  {nav_result['move_east_m']:.2f} m")
        print("-" * 45)
        print(f"  NEW LATITUDE:    {nav_result['new_lat']:.8f}")  # <--- RESULT
        print(f"  NEW LONGITUDE:   {nav_result['new_lon']:.8f}")  # <--- RESULT
        print("="*45 + "\n")
        title = (f"Shift: N {nav_result['move_north_m']:.1f}m, E {nav_result['move_east_m']:.1f}m\n"
                 f"Lat: {nav_result['new_lat']:.6f}, Lon: {nav_result['new_lon']:.6f}")
    else:
        print("[!] Math failed (likely bad geometry or points at infinity).")
    # 5. VISUALIZATION
    # We must load images manually because your matcher doesn't return them
    img0 = cv2.imread(IMAGE_0_PATH)
    img1 = cv2.imread(IMAGE_1_PATH)
    # Convert BGR to RGB for matplotlib
    img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB)
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
    viz2d.plot_images([img0, img1])
    viz2d.plot_matches(kpts0, kpts1, color='lime')
    plt.suptitle(title, fontsize=14, backgroundcolor='white')
    plt.show()
 if __name__ == "__main__":
    main()
@@ -0,0 +1,110 @@
 import cv2
 import numpy as np
 import onnxruntime as ort
 import os
 class LightGlueMatcher:
    def __init__(self, onnx_model_path, max_dimension=512):
        """
        Initializes the ONNX runtime session.
        Args:
            onnx_model_path (str): Path to the .onnx model file.
            max_dimension (int): Maximum edge length for resizing.
        """
        self.max_dim = max_dimension
        if not os.path.exists(onnx_model_path):
            raise FileNotFoundError(f"Model not found at: {onnx_model_path}")
        # Setup ONNX Runtime
        if 'CUDAExecutionProvider' in ort.get_available_providers():
            self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
            print("LightGlueMatcher: Using CUDA (GPU).")
        else:
            self.providers = ['CPUExecutionProvider']
            print("LightGlueMatcher: Using CPU.")
        sess_options = ort.SessionOptions()
        self.session = ort.InferenceSession(onnx_model_path, sess_options, providers=self.providers)
        # Cache input/output names
        self.input_names = [inp.name for inp in self.session.get_inputs()]
        self.output_names = [out.name for out in self.session.get_outputs()]
    def _preprocess(self, img_input):
        """
        Internal helper: Resize and normalize image for ONNX.
        Handles both file paths and numpy arrays.
        """
        # Load image if input is a path
        if isinstance(img_input, str):
            img_raw = cv2.imread(img_input, cv2.IMREAD_GRAYSCALE)
            if img_raw is None:
                raise FileNotFoundError(f"Could not load image at {img_input}")
        elif isinstance(img_input, np.ndarray):
            if len(img_input.shape) == 3:
                img_raw = cv2.cvtColor(img_input, cv2.COLOR_BGR2GRAY)
            else:
                img_raw = img_input
        else:
            raise ValueError("Input must be an image path or numpy array.")
        h, w = img_raw.shape
        scale = self.max_dim / max(h, w)
        # Resize logic
        if scale < 1:
            h_new, w_new = int(round(h * scale)), int(round(w * scale))
            img_resized = cv2.resize(img_raw, (w_new, h_new), interpolation=cv2.INTER_AREA)
        else:
            scale = 1.0
            img_resized = img_raw
        # Normalize and reshape (1, 1, H, W)
        img_normalized = img_resized.astype(np.float32) / 255.0
        img_tensor = img_normalized[None, None]
        return img_tensor, scale
    def match(self, img0_input, img1_input):
        """
        Matches two images and returns the Fundamental Matrix and Keypoints.
        Returns:
            F (np.ndarray): The 3x3 Fundamental Matrix (or None if not enough matches).
            mkpts0 (np.ndarray): Matched keypoints in Image 0 (N, 2).
            mkpts1 (np.ndarray): Matched keypoints in Image 1 (N, 2).
        """
        # Preprocess
        tensor0, scale0 = self._preprocess(img0_input)
        tensor1, scale1 = self._preprocess(img1_input)
        # Inference
        inputs = {
            self.input_names[0]: tensor0,
            self.input_names[1]: tensor1
        }
        outputs = self.session.run(None, inputs)
        outputs_dict = dict(zip(self.output_names, outputs))
        # Extract Raw Matches
        kpts0 = outputs_dict['kpts0'].squeeze(0)
        kpts1 = outputs_dict['kpts1'].squeeze(0)
        matches0 = outputs_dict['matches0']
        # Filter and Rescale Keypoints
        if len(matches0) < 8:
            print("Not enough matches to compute matrix.")
            return None, np.array([]), np.array([])
        mkpts0 = kpts0[matches0[:, 0]] / scale0
        mkpts1 = kpts1[matches0[:, 1]] / scale1
        # Calculate Fundamental Matrix (F) using RANSAC
        # This is usually "The Matrix" required for relative pose estimation
        F, mask = cv2.findFundamentalMat(mkpts0, mkpts1, cv2.FM_RANSAC, 3, 0.99)
        return F, mkpts0, mkpts1
@@ -0,0 +1,47 @@
 coloredlogs==15.0.1
 contourpy==1.3.3
 cycler==0.12.1
 filelock==3.20.0
 flatbuffers==25.9.23
 fonttools==4.60.1
 fsspec==2025.10.0
 humanfriendly==10.0
 Jinja2==3.1.6
 kiwisolver==1.4.9
 kornia==0.8.2
 kornia_rs==0.1.10
 -e git+https://github.com/cvg/LightGlue/@746fac2c042e05d1865315b1413419f1c1e7ba55#egg=lightglue
 MarkupSafe==3.0.3
 matplotlib==3.10.7
 mpmath==1.3.0
 networkx==3.5
 numpy==2.2.6
 nvidia-cublas-cu12==12.8.4.1
 nvidia-cuda-cupti-cu12==12.8.90
 nvidia-cuda-nvrtc-cu12==12.8.93
 nvidia-cuda-runtime-cu12==12.8.90
 nvidia-cudnn-cu12==9.10.2.21
 nvidia-cufft-cu12==11.3.3.83
 nvidia-cufile-cu12==1.13.1.3
 nvidia-curand-cu12==10.3.9.90
 nvidia-cusolver-cu12==11.7.3.90
 nvidia-cusparse-cu12==12.5.8.93
 nvidia-cusparselt-cu12==0.7.1
 nvidia-nccl-cu12==2.27.5
 nvidia-nvjitlink-cu12==12.8.93
 nvidia-nvshmem-cu12==3.3.20
 nvidia-nvtx-cu12==12.8.90
 onnxruntime-gpu==1.23.2
 opencv-python==4.12.0.88
 packaging==25.0
 pillow==12.0.0
 protobuf==6.33.0
 pyparsing==3.2.5
 python-dateutil==2.9.0.post0
 setuptools==80.9.0
 six==1.17.0
 sympy==1.14.0
 torch==2.9.0
 torchvision==0.24.0
 triton==3.5.0
 typing_extensions==4.15.0