diff --git a/src/gps_denied/core/coordinates.py b/src/gps_denied/core/coordinates.py index 7ab00a0..59974d3 100644 --- a/src/gps_denied/core/coordinates.py +++ b/src/gps_denied/core/coordinates.py @@ -2,9 +2,59 @@ import math +import cv2 +import numpy as np + from gps_denied.schemas import CameraParameters, GPSPoint +# --------------------------------------------------------------------------- +# Module-level helpers +# --------------------------------------------------------------------------- + +def _build_intrinsic_matrix(cam: CameraParameters) -> np.ndarray: + """Build 3x3 camera intrinsic matrix K from CameraParameters.""" + fx = cam.focal_length * cam.resolution_width / cam.sensor_width + fy = cam.focal_length * cam.resolution_height / cam.sensor_height + if cam.principal_point is not None: + cx, cy = cam.principal_point + else: + cx = cam.resolution_width / 2.0 + cy = cam.resolution_height / 2.0 + return np.array([ + [fx, 0, cx], + [ 0, fy, cy], + [ 0, 0, 1], + ], dtype=np.float64) + + +def _cam_to_body_rotation() -> np.ndarray: + """Camera-to-body rotation for nadir-pointing camera. + + Camera frame: Z forward (optical axis), X right, Y down (OpenCV convention). + Body frame: X forward (nose), Y right (starboard), Z down. + Camera points nadir: camera Z-axis aligns with body -Z (downward). + + The rotation is Rx(180deg): flips Y and Z axes. + R_cam_body = [[1, 0, 0], [0, -1, 0], [0, 0, -1]] + """ + return np.array([ + [1, 0, 0], + [0, -1, 0], + [0, 0, -1], + ], dtype=np.float64) + + +def _quat_to_rotation_matrix(q: np.ndarray) -> np.ndarray: + """Convert [w, x, y, z] quaternion to 3x3 rotation matrix.""" + w, x, y, z = q + return np.array([ + [1 - 2*(y*y + z*z), 2*(x*y - w*z), 2*(x*z + w*y)], + [2*(x*y + w*z), 1 - 2*(x*x + z*z), 2*(y*z - w*x)], + [2*(x*z - w*y), 2*(y*z + w*x), 1 - 2*(x*x + y*y)], + ], dtype=np.float64) + + class OriginNotSetError(Exception): """Raised when ENU origin is required but not set.""" @@ -55,28 +105,62 @@ class CoordinateTransformer: self, flight_id: str, pixel: tuple[float, float], - frame_pose: dict, # Dict mimicking Pose for now (or a real Pose class) + frame_pose: dict, camera_params: CameraParameters, altitude: float, + quaternion: np.ndarray | None = None, ) -> GPSPoint: - """Placeholder for H01 unprojection and ground plane intersection.""" - # Currently returns center GPS + small offset based on pixel - # Real implementation involves ray casting and intersection with z=-altitude ground plane - - # FAKE Math for mockup: - cx, cy = camera_params.resolution_width / 2, camera_params.resolution_height / 2 - px_offset_x = pixel[0] - cx - px_offset_y = pixel[1] - cy - - # Very rough scaling: assume 1 pixel is ~0.1 meter - east_offset = px_offset_x * 0.1 - north_offset = -px_offset_y * 0.1 # Y is down in pixels - - # Add to frame_pose ENU + """Unproject pixel to GPS via ray-ground intersection. + + Chain: pixel -> camera ray (K^-1) -> body (T_cam_body) -> ENU (quaternion) -> WGS84. + + Args: + flight_id: Flight identifier for ENU origin lookup. + pixel: (u, v) pixel coordinates. + frame_pose: Dict with "position" key -> [east, north, up] in ENU meters. + camera_params: Camera intrinsic parameters. + altitude: UAV altitude above ground in meters (positive up). + quaternion: [w, x, y, z] body-to-ENU rotation quaternion. + If None, assumes identity (level flight, north-facing). + """ + # Step 1: Pixel -> camera ray + K = _build_intrinsic_matrix(camera_params) + K_inv = np.linalg.inv(K) + pixel_h = np.array([pixel[0], pixel[1], 1.0]) + ray_cam = K_inv @ pixel_h # direction in camera frame + + # Step 2: Camera -> body + R_cam_body = _cam_to_body_rotation() + ray_body = R_cam_body @ ray_cam + + # Step 3: Body -> ENU + if quaternion is not None: + R_body_enu = _quat_to_rotation_matrix(quaternion) + else: + R_body_enu = np.eye(3) + ray_enu = R_body_enu @ ray_body + + # Step 4: Ray-ground intersection + # UAV is at altitude (Up component), ground is at Up=0 + # Ray: P = P_uav + t * ray_enu + # Ground plane: Up = 0 -> t = -altitude / ray_enu[2] + if abs(ray_enu[2]) < 1e-10: + # Ray parallel to ground — return UAV position projected + frame_enu = frame_pose.get("position", [0, 0, 0]) + return self.enu_to_gps(flight_id, (frame_enu[0], frame_enu[1], 0.0)) + + t = -altitude / ray_enu[2] + if t < 0: + # Ray points upward — use UAV position + frame_enu = frame_pose.get("position", [0, 0, 0]) + return self.enu_to_gps(flight_id, (frame_enu[0], frame_enu[1], 0.0)) + frame_enu = frame_pose.get("position", [0, 0, 0]) - final_enu = (frame_enu[0] + east_offset, frame_enu[1] + north_offset, 0.0) - - return self.enu_to_gps(flight_id, final_enu) + ground_east = frame_enu[0] + t * ray_enu[0] + ground_north = frame_enu[1] + t * ray_enu[1] + + # Step 5: ENU -> WGS84 + return self.enu_to_gps(flight_id, (ground_east, ground_north, 0.0)) def gps_to_pixel( self, @@ -85,35 +169,82 @@ class CoordinateTransformer: frame_pose: dict, camera_params: CameraParameters, altitude: float, + quaternion: np.ndarray | None = None, ) -> tuple[float, float]: - """Placeholder for inverse projection from GPS to image pixel coordinates.""" - # Reversing the FAKE math above + """Project GPS coordinate to image pixel. + + Inverse of pixel_to_gps: WGS84 -> ENU -> body -> camera -> pixel. + """ + # Step 1: GPS -> ENU enu = self.gps_to_enu(flight_id, gps) + + # Step 2: ENU point relative to UAV frame_enu = frame_pose.get("position", [0, 0, 0]) - - east_offset = enu[0] - frame_enu[0] - north_offset = enu[1] - frame_enu[1] - - cx, cy = camera_params.resolution_width / 2, camera_params.resolution_height / 2 - px_offset_x = east_offset / 0.1 - px_offset_y = -north_offset / 0.1 - - return (cx + px_offset_x, cy + px_offset_y) + point_enu = np.array([ + enu[0] - frame_enu[0], + enu[1] - frame_enu[1], + -altitude, # ground is at -altitude relative to UAV (UAV is at +altitude) + ]) - def image_object_to_gps(self, flight_id: str, frame_id: int, object_pixel: tuple[float, float]) -> GPSPoint: - """Critical method: Converts object pixel coordinates to GPS.""" - # For this prototype, we mock getting the pose and camera params - fake_pose = {"position": [0, 0, 0]} - fake_params = CameraParameters( - focal_length=25.0, - sensor_width=23.5, - sensor_height=15.6, - resolution_width=4000, - resolution_height=3000, + # Step 3: ENU -> body + if quaternion is not None: + R_body_enu = _quat_to_rotation_matrix(quaternion) + else: + R_body_enu = np.eye(3) + point_body = R_body_enu.T @ point_enu + + # Step 4: Body -> camera + R_cam_body = _cam_to_body_rotation() + point_cam = R_cam_body.T @ point_body + + # Step 5: Camera -> pixel + if abs(point_cam[2]) < 1e-10: + cx = camera_params.resolution_width / 2.0 + cy = camera_params.resolution_height / 2.0 + return (cx, cy) + + K = _build_intrinsic_matrix(camera_params) + pixel_h = K @ (point_cam / point_cam[2]) + return (float(pixel_h[0]), float(pixel_h[1])) + + def image_object_to_gps( + self, + flight_id: str, + frame_id: int, + object_pixel: tuple[float, float], + frame_pose: dict | None = None, + camera_params: CameraParameters | None = None, + altitude: float = 100.0, + quaternion: np.ndarray | None = None, + ) -> GPSPoint: + """Convert object pixel coordinates to GPS using full projection chain. + + If frame_pose or camera_params are not provided, uses defaults for + backward compatibility (level flight, ADTI 20L V1 camera). + """ + if frame_pose is None: + frame_pose = {"position": [0, 0, 0]} + if camera_params is None: + camera_params = CameraParameters( + focal_length=16.0, + sensor_width=23.2, + sensor_height=15.4, + resolution_width=5456, + resolution_height=3632, + ) + return self.pixel_to_gps( + flight_id, object_pixel, frame_pose, camera_params, altitude, quaternion ) - return self.pixel_to_gps(flight_id, object_pixel, fake_pose, fake_params, altitude=100.0) - def transform_points(self, points: list[tuple[float, float]], transformation: list[list[float]]) -> list[tuple[float, float]]: - """Applies homography or affine transformation to a list of points.""" - # Placeholder for cv2.perspectiveTransform - return points + def transform_points( + self, + points: list[tuple[float, float]], + transformation: list[list[float]], + ) -> list[tuple[float, float]]: + """Apply 3x3 homography to a list of 2D points using cv2.perspectiveTransform.""" + if not points: + return [] + H = np.array(transformation, dtype=np.float64) + pts = np.array(points, dtype=np.float64).reshape(-1, 1, 2) + transformed = cv2.perspectiveTransform(pts, H) + return [(float(p[0][0]), float(p[0][1])) for p in transformed]