Merge branch 'dev' of https://github.com/azaion/detections into dev

2026-06-21 16:41:08 +00:00 · 2026-05-15 12:49:37 +03:00
parent 255ec36f8a c9aeed3dd9
commit a0ec2cd563
19 changed files with 282 additions and 48 deletions
@@ -12,6 +12,11 @@ cdef class AIRecognitionConfig:

    cdef public int model_batch_size

+    cdef public bint has_camera_config
+    cdef public double current_height
+    cdef public double current_zoom
+    cdef public double current_angle
+
    cdef public bint has_altitude
    cdef public double altitude
    cdef public double focal_length
@@ -9,9 +9,12 @@ cdef class AIRecognitionConfig:
                 tracking_intersection_threshold,
                 model_batch_size,
                 big_image_tile_overlap_percent,
+                 camera_config,
                 altitude,
                 focal_length,
-                 sensor_width
+                 sensor_width,
+                 current_zoom,
+                 current_angle
                 ):
        self.frame_period_recognition = frame_period_recognition
        self.frame_recognition_seconds = frame_recognition_seconds
@@ -25,10 +28,15 @@ cdef class AIRecognitionConfig:

        self.big_image_tile_overlap_percent = big_image_tile_overlap_percent

-        self.has_altitude = altitude is not None
-        self.altitude = 0.0 if altitude is None else float(altitude)
-        self.focal_length = focal_length
-        self.sensor_width = sensor_width
+        self.has_camera_config = camera_config is not None or altitude is not None
+        self.current_height = 0.0 if altitude is None else float(altitude)
+        self.current_zoom = float(current_zoom)
+        self.current_angle = float(current_angle)
+
+        self.has_altitude = self.has_camera_config
+        self.altitude = self.current_height
+        self.focal_length = float(focal_length)
+        self.sensor_width = float(sensor_width)

    def __str__(self):
        return (f'frame_seconds : {self.frame_recognition_seconds}, distance_confidence : {self.tracking_distance_confidence}, '
@@ -37,13 +45,57 @@ cdef class AIRecognitionConfig:
                f'frame_period_recognition : {self.frame_period_recognition}, '
                f'big_image_tile_overlap_percent: {self.big_image_tile_overlap_percent}, '
                f'model_batch_size: {self.model_batch_size}, '
-                f'altitude: {self.altitude if self.has_altitude else None}, '
+                f'camera_config: {self.has_camera_config}, '
+                f'current_height: {self.current_height if self.has_camera_config else None}, '
+                f'current_zoom: {self.current_zoom}, '
+                f'current_angle: {self.current_angle}, '
                f'focal_length: {self.focal_length}, '
                f'sensor_width: {self.sensor_width}'
                )

    @staticmethod
    cdef AIRecognitionConfig from_dict(dict data):
+        cdef object camera_config = data.get("camera_config", data.get("cameraConfig", None))
+        if camera_config is not None and not isinstance(camera_config, dict):
+            camera_config = None
+
+        cdef object altitude = data.get("altitude", None)
+        cdef object focal_length = data.get("focal_length", data.get("focalLength", 24))
+        cdef object sensor_width = data.get("sensor_width", data.get("sensorWidth", 23.5))
+        cdef object current_zoom = data.get("current_zoom", data.get("currentZoom", 1))
+        cdef object current_angle = data.get("current_angle", data.get("currentAngle", 90))
+
+        if camera_config is not None:
+            altitude = camera_config.get(
+                "current_height",
+                camera_config.get("currentHeight", camera_config.get("altitude", altitude)),
+            )
+            focal_length = camera_config.get(
+                "focal_length",
+                camera_config.get("focalLength", focal_length),
+            )
+            sensor_width = camera_config.get(
+                "sensor_width",
+                camera_config.get("sensorWidth", sensor_width),
+            )
+            current_zoom = camera_config.get(
+                "current_zoom",
+                camera_config.get("currentZoom", current_zoom),
+            )
+            current_angle = camera_config.get(
+                "current_angle",
+                camera_config.get("currentAngle", current_angle),
+            )
+
+        if focal_length is None:
+            focal_length = 24
+        if sensor_width is None:
+            sensor_width = 23.5
+        if current_zoom is None:
+            current_zoom = 1
+        if current_angle is None:
+            current_angle = 90
+
        return AIRecognitionConfig(
            data.get("frame_period_recognition", 4),
            data.get("frame_recognition_seconds", 2),
@@ -57,7 +109,10 @@ cdef class AIRecognitionConfig:

            data.get("big_image_tile_overlap_percent", 20),

-            data.get("altitude", None),
-            data.get("focal_length", 24),
-            data.get("sensor_width", 23.5)
+            camera_config,
+            altitude,
+            focal_length,
+            sensor_width,
+            current_zoom,
+            current_angle
        )
@@ -5,6 +5,7 @@ import av
 import cv2
 import numpy as np
 cimport constants_inf
+from libc.math cimport sin

 from ai_availability_status cimport AIAvailabilityEnum, AIAvailabilityStatus
 from annotation cimport Detection, Annotation
@@ -309,25 +310,42 @@ cdef class Inference:

    cdef _append_image_frame_entries(self, AIRecognitionConfig ai_config, list all_frame_data, frame, str original_media_name):
        cdef double ground_sampling_distance
+        cdef double angle_radians
+        cdef double angle_scale
+        cdef double effective_focal_length
        cdef int model_h, model_w
        cdef int img_h, img_w
        cdef bint has_gsd
        model_h, model_w = self.engine.get_input_shape()
        img_h, img_w, _ = frame.shape
-        has_gsd = ai_config.has_altitude and ai_config.focal_length > 0 and ai_config.sensor_width > 0 and img_w > 0
+        angle_radians = ai_config.current_angle * 3.141592653589793 / 180.0
+        angle_scale = sin(angle_radians)
+        effective_focal_length = ai_config.focal_length * ai_config.current_zoom
+        has_gsd = (
+            ai_config.has_camera_config
+            and ai_config.current_height > 0
+            and effective_focal_length > 0
+            and ai_config.sensor_width > 0
+            and angle_scale > 0
+            and img_w > 0
+        )
        ground_sampling_distance = 0.0
        if has_gsd:
-            ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w)
+            ground_sampling_distance = (
+                ai_config.sensor_width
+                * ai_config.current_height
+                / (effective_focal_length * img_w * angle_scale)
+            )
            constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}')
        else:
-            constants_inf.log(<str>'ground sampling distance: skipped (altitude unavailable)')
+            constants_inf.log(<str>'ground sampling distance: skipped (camera_config unavailable)')
        if img_h <= 1.5 * model_h and img_w <= 1.5 * model_w:
            all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
        else:
            if not has_gsd:
                all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
                return
-            tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
+            tile_size = max(1, int(constants_inf.METERS_IN_TILE / ground_sampling_distance))
            constants_inf.log(<str> f'calc tile size: {tile_size}')
            res = self.split_to_tiles(frame, original_media_name, tile_size, ai_config.big_image_tile_overlap_percent)
            for tile_frame, omn, tile_name in res:
@@ -362,8 +380,8 @@ cdef class Inference:
    cdef split_to_tiles(self, frame, str media_stem, tile_size, overlap_percent):
        constants_inf.log(<str>f'splitting image {media_stem} to tiles...')
        img_h, img_w, _ = frame.shape
-        stride_w = int(tile_size * (1 - overlap_percent / 100))
-        stride_h = int(tile_size * (1 - overlap_percent / 100))
+        stride_w = max(1, int(tile_size * (1 - overlap_percent / 100)))
+        stride_h = max(1, int(tile_size * (1 - overlap_percent / 100)))

        results = []
        original_media_name = media_stem
@@ -155,6 +155,14 @@ class HealthResponse(BaseModel):
    errorMessage: Optional[str] = None


+class CameraConfigDto(BaseModel):
+    focal_length: float = 24
+    sensor_width: float = 23.5
+    current_zoom: float = 1
+    current_angle: float = 90
+    current_height: Optional[float] = None
+
+
 class AIConfigDto(BaseModel):
    frame_period_recognition: int = 4
    frame_recognition_seconds: int = 2
@@ -164,6 +172,7 @@ class AIConfigDto(BaseModel):
    tracking_intersection_threshold: float = 0.6
    model_batch_size: int = 8
    big_image_tile_overlap_percent: int = 20
+    camera_config: Optional[CameraConfigDto] = None
    altitude: Optional[float] = None
    focal_length: float = 24
    sensor_width: float = 23.5
@@ -218,9 +227,12 @@ _AI_SETTINGS_FIELD_KEYS = (
            "BigImageTileOverlapPercent",
        ),
    ),
+)
+
+_CAMERA_SETTINGS_FIELD_KEYS = (
    (
-        "altitude",
-        ("altitude", "Altitude"),
+        "current_height",
+        ("current_height", "currentHeight", "CurrentHeight", "altitude", "Altitude"),
    ),
    (
        "focal_length",
@@ -230,6 +242,14 @@ _AI_SETTINGS_FIELD_KEYS = (
        "sensor_width",
        ("sensor_width", "sensorWidth", "SensorWidth"),
    ),
+    (
+        "current_zoom",
+        ("current_zoom", "currentZoom", "CurrentZoom"),
+    ),
+    (
+        "current_angle",
+        ("current_angle", "currentAngle", "CurrentAngle"),
+    ),
 )


@@ -249,6 +269,21 @@ def _merged_annotation_settings_payload(raw: object) -> dict:
            if key in merged and merged[key] is not None:
                out[snake] = merged[key]
                break
+    camera_source = {}
+    for key in ("camera_config", "cameraConfig", "cameraSettings"):
+        value = raw.get(key)
+        if isinstance(value, dict):
+            camera_source.update(value)
+    camera_merged = dict(merged)
+    camera_merged.update(camera_source)
+    camera_config = {}
+    for snake, aliases in _CAMERA_SETTINGS_FIELD_KEYS:
+        for key in aliases:
+            if key in camera_merged and camera_merged[key] is not None:
+                camera_config[snake] = camera_merged[key]
+                break
+    if camera_config:
+        out["camera_config"] = camera_config
    return out


@@ -306,7 +341,13 @@ def _resolve_media_for_detect(
            cfg.update(_merged_annotation_settings_payload(raw))
    if override is not None:
        for k, v in override.model_dump(exclude_defaults=True).items():
-            cfg[k] = v
+            if k == "camera_config" and isinstance(v, dict):
+                existing = cfg.get("camera_config")
+                camera_cfg = dict(existing) if isinstance(existing, dict) else {}
+                camera_cfg.update(v)
+                cfg[k] = camera_cfg
+            else:
+                cfg[k] = v
    media_path = annotations_client.fetch_media_path(media_id, bearer)
    if not media_path:
        raise HTTPException(