feat(coordinates): implement real pixel-to-GPS projection chain (ESKF-06)

Replace FAKE Math stubs with: - K^-1 unprojection (camera intrinsics) - R_cam_body rotation (nadir-pointing camera) - Quaternion body-to-ENU rotation - Ray-ground intersection at altitude - cv2.perspectiveTransform for homography gps_to_pixel is the exact inverse. Backward compatible defaults. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-22 10:21:13 +00:00 · 2026-04-01 23:48:22 +03:00
parent 9d5337ad72
commit dccadd4bf4
1 changed files with 175 additions and 44 deletions
@@ -2,9 +2,59 @@

 import math

+import cv2
+import numpy as np
+
 from gps_denied.schemas import CameraParameters, GPSPoint


+# ---------------------------------------------------------------------------
+# Module-level helpers
+# ---------------------------------------------------------------------------
+
+def _build_intrinsic_matrix(cam: CameraParameters) -> np.ndarray:
+    """Build 3x3 camera intrinsic matrix K from CameraParameters."""
+    fx = cam.focal_length * cam.resolution_width / cam.sensor_width
+    fy = cam.focal_length * cam.resolution_height / cam.sensor_height
+    if cam.principal_point is not None:
+        cx, cy = cam.principal_point
+    else:
+        cx = cam.resolution_width / 2.0
+        cy = cam.resolution_height / 2.0
+    return np.array([
+        [fx,  0, cx],
+        [ 0, fy, cy],
+        [ 0,  0,  1],
+    ], dtype=np.float64)
+
+
+def _cam_to_body_rotation() -> np.ndarray:
+    """Camera-to-body rotation for nadir-pointing camera.
+
+    Camera frame: Z forward (optical axis), X right, Y down (OpenCV convention).
+    Body frame: X forward (nose), Y right (starboard), Z down.
+    Camera points nadir: camera Z-axis aligns with body -Z (downward).
+
+    The rotation is Rx(180deg): flips Y and Z axes.
+    R_cam_body = [[1, 0, 0], [0, -1, 0], [0, 0, -1]]
+    """
+    return np.array([
+        [1,  0,  0],
+        [0, -1,  0],
+        [0,  0, -1],
+    ], dtype=np.float64)
+
+
+def _quat_to_rotation_matrix(q: np.ndarray) -> np.ndarray:
+    """Convert [w, x, y, z] quaternion to 3x3 rotation matrix."""
+    w, x, y, z = q
+    return np.array([
+        [1 - 2*(y*y + z*z), 2*(x*y - w*z),     2*(x*z + w*y)],
+        [2*(x*y + w*z),     1 - 2*(x*x + z*z), 2*(y*z - w*x)],
+        [2*(x*z - w*y),     2*(y*z + w*x),     1 - 2*(x*x + y*y)],
+    ], dtype=np.float64)
+
+
 class OriginNotSetError(Exception):
    """Raised when ENU origin is required but not set."""

@@ -55,28 +105,62 @@ class CoordinateTransformer:
        self,
        flight_id: str,
        pixel: tuple[float, float],
-        frame_pose: dict,  # Dict mimicking Pose for now (or a real Pose class)
+        frame_pose: dict,
        camera_params: CameraParameters,
        altitude: float,
+        quaternion: np.ndarray | None = None,
    ) -> GPSPoint:
-        """Placeholder for H01 unprojection and ground plane intersection."""
-        # Currently returns center GPS + small offset based on pixel
-        # Real implementation involves ray casting and intersection with z=-altitude ground plane
-        
-        # FAKE Math for mockup:
-        cx, cy = camera_params.resolution_width / 2, camera_params.resolution_height / 2
-        px_offset_x = pixel[0] - cx
-        px_offset_y = pixel[1] - cy
-        
-        # Very rough scaling: assume 1 pixel is ~0.1 meter 
-        east_offset = px_offset_x * 0.1
-        north_offset = -px_offset_y * 0.1  # Y is down in pixels
-        
-        # Add to frame_pose ENU
+        """Unproject pixel to GPS via ray-ground intersection.
+
+        Chain: pixel -> camera ray (K^-1) -> body (T_cam_body) -> ENU (quaternion) -> WGS84.
+
+        Args:
+            flight_id: Flight identifier for ENU origin lookup.
+            pixel: (u, v) pixel coordinates.
+            frame_pose: Dict with "position" key -> [east, north, up] in ENU meters.
+            camera_params: Camera intrinsic parameters.
+            altitude: UAV altitude above ground in meters (positive up).
+            quaternion: [w, x, y, z] body-to-ENU rotation quaternion.
+                        If None, assumes identity (level flight, north-facing).
+        """
+        # Step 1: Pixel -> camera ray
+        K = _build_intrinsic_matrix(camera_params)
+        K_inv = np.linalg.inv(K)
+        pixel_h = np.array([pixel[0], pixel[1], 1.0])
+        ray_cam = K_inv @ pixel_h  # direction in camera frame
+
+        # Step 2: Camera -> body
+        R_cam_body = _cam_to_body_rotation()
+        ray_body = R_cam_body @ ray_cam
+
+        # Step 3: Body -> ENU
+        if quaternion is not None:
+            R_body_enu = _quat_to_rotation_matrix(quaternion)
+        else:
+            R_body_enu = np.eye(3)
+        ray_enu = R_body_enu @ ray_body
+
+        # Step 4: Ray-ground intersection
+        # UAV is at altitude (Up component), ground is at Up=0
+        # Ray: P = P_uav + t * ray_enu
+        # Ground plane: Up = 0 -> t = -altitude / ray_enu[2]
+        if abs(ray_enu[2]) < 1e-10:
+            # Ray parallel to ground — return UAV position projected
+            frame_enu = frame_pose.get("position", [0, 0, 0])
+            return self.enu_to_gps(flight_id, (frame_enu[0], frame_enu[1], 0.0))
+
+        t = -altitude / ray_enu[2]
+        if t < 0:
+            # Ray points upward — use UAV position
+            frame_enu = frame_pose.get("position", [0, 0, 0])
+            return self.enu_to_gps(flight_id, (frame_enu[0], frame_enu[1], 0.0))
+
        frame_enu = frame_pose.get("position", [0, 0, 0])
-        final_enu = (frame_enu[0] + east_offset, frame_enu[1] + north_offset, 0.0)
-        
-        return self.enu_to_gps(flight_id, final_enu)
+        ground_east = frame_enu[0] + t * ray_enu[0]
+        ground_north = frame_enu[1] + t * ray_enu[1]
+
+        # Step 5: ENU -> WGS84
+        return self.enu_to_gps(flight_id, (ground_east, ground_north, 0.0))

    def gps_to_pixel(
        self,
@@ -85,35 +169,82 @@ class CoordinateTransformer:
        frame_pose: dict,
        camera_params: CameraParameters,
        altitude: float,
+        quaternion: np.ndarray | None = None,
    ) -> tuple[float, float]:
-        """Placeholder for inverse projection from GPS to image pixel coordinates."""
-        # Reversing the FAKE math above
+        """Project GPS coordinate to image pixel.
+
+        Inverse of pixel_to_gps: WGS84 -> ENU -> body -> camera -> pixel.
+        """
+        # Step 1: GPS -> ENU
        enu = self.gps_to_enu(flight_id, gps)
+
+        # Step 2: ENU point relative to UAV
        frame_enu = frame_pose.get("position", [0, 0, 0])
-        
-        east_offset = enu[0] - frame_enu[0]
-        north_offset = enu[1] - frame_enu[1]
-        
-        cx, cy = camera_params.resolution_width / 2, camera_params.resolution_height / 2
-        px_offset_x = east_offset / 0.1
-        px_offset_y = -north_offset / 0.1
-        
-        return (cx + px_offset_x, cy + px_offset_y)
+        point_enu = np.array([
+            enu[0] - frame_enu[0],
+            enu[1] - frame_enu[1],
+            -altitude,  # ground is at -altitude relative to UAV (UAV is at +altitude)
+        ])

-    def image_object_to_gps(self, flight_id: str, frame_id: int, object_pixel: tuple[float, float]) -> GPSPoint:
-        """Critical method: Converts object pixel coordinates to GPS."""
-        # For this prototype, we mock getting the pose and camera params
-        fake_pose = {"position": [0, 0, 0]}
-        fake_params = CameraParameters(
-            focal_length=25.0,
-            sensor_width=23.5,
-            sensor_height=15.6,
-            resolution_width=4000,
-            resolution_height=3000,
+        # Step 3: ENU -> body
+        if quaternion is not None:
+            R_body_enu = _quat_to_rotation_matrix(quaternion)
+        else:
+            R_body_enu = np.eye(3)
+        point_body = R_body_enu.T @ point_enu
+
+        # Step 4: Body -> camera
+        R_cam_body = _cam_to_body_rotation()
+        point_cam = R_cam_body.T @ point_body
+
+        # Step 5: Camera -> pixel
+        if abs(point_cam[2]) < 1e-10:
+            cx = camera_params.resolution_width / 2.0
+            cy = camera_params.resolution_height / 2.0
+            return (cx, cy)
+
+        K = _build_intrinsic_matrix(camera_params)
+        pixel_h = K @ (point_cam / point_cam[2])
+        return (float(pixel_h[0]), float(pixel_h[1]))
+
+    def image_object_to_gps(
+        self,
+        flight_id: str,
+        frame_id: int,
+        object_pixel: tuple[float, float],
+        frame_pose: dict | None = None,
+        camera_params: CameraParameters | None = None,
+        altitude: float = 100.0,
+        quaternion: np.ndarray | None = None,
+    ) -> GPSPoint:
+        """Convert object pixel coordinates to GPS using full projection chain.
+
+        If frame_pose or camera_params are not provided, uses defaults for
+        backward compatibility (level flight, ADTI 20L V1 camera).
+        """
+        if frame_pose is None:
+            frame_pose = {"position": [0, 0, 0]}
+        if camera_params is None:
+            camera_params = CameraParameters(
+                focal_length=16.0,
+                sensor_width=23.2,
+                sensor_height=15.4,
+                resolution_width=5456,
+                resolution_height=3632,
+            )
+        return self.pixel_to_gps(
+            flight_id, object_pixel, frame_pose, camera_params, altitude, quaternion
        )
-        return self.pixel_to_gps(flight_id, object_pixel, fake_pose, fake_params, altitude=100.0)

-    def transform_points(self, points: list[tuple[float, float]], transformation: list[list[float]]) -> list[tuple[float, float]]:
-        """Applies homography or affine transformation to a list of points."""
-        # Placeholder for cv2.perspectiveTransform
-        return points
+    def transform_points(
+        self,
+        points: list[tuple[float, float]],
+        transformation: list[list[float]],
+    ) -> list[tuple[float, float]]:
+        """Apply 3x3 homography to a list of 2D points using cv2.perspectiveTransform."""
+        if not points:
+            return []
+        H = np.array(transformation, dtype=np.float64)
+        pts = np.array(points, dtype=np.float64).reshape(-1, 1, 2)
+        transformed = cv2.perspectiveTransform(pts, H)
+        return [(float(p[0][0]), float(p[0][1])) for p in transformed]