From c9aeed3dd95f38573e3b6f289d880aa34f66f5c6 Mon Sep 17 00:00:00 2001 From: Roman Meshko Date: Thu, 14 May 2026 22:31:29 +0300 Subject: [PATCH] Added camera config --- _docs/00_problem/acceptance_criteria.md | 2 +- .../00_problem/input_data/data_parameters.md | 12 ++- _docs/01_solution/solution.md | 2 +- .../03_inference_pipeline/description.md | 2 +- _docs/02_document/data_model.md | 12 ++- _docs/02_document/modules/ai_config.md | 10 ++- _docs/02_document/modules/inference.md | 2 +- _docs/02_document/modules/main.md | 5 +- _docs/02_document/tests/blackbox-tests.md | 6 +- e2e/mocks/annotations/app.py | 10 ++- e2e/tests/test_performance.py | 12 ++- e2e/tests/test_single_image.py | 10 ++- e2e/tests/test_tiling.py | 10 ++- src/ai_config.pxd | 5 ++ src/ai_config.pyx | 73 ++++++++++++++++--- src/inference.pyx | 30 ++++++-- src/main.py | 47 +++++++++++- tests/test_ai_config_from_dict.py | 37 ++++++++++ tests/test_az174_db_driven_config.py | 43 +++++++++-- 19 files changed, 282 insertions(+), 48 deletions(-) diff --git a/_docs/00_problem/acceptance_criteria.md b/_docs/00_problem/acceptance_criteria.md index 586770b..2bb2f6d 100644 --- a/_docs/00_problem/acceptance_criteria.md +++ b/_docs/00_problem/acceptance_criteria.md @@ -17,7 +17,7 @@ - Images ≤ 1.5× model dimensions (1280×1280): processed as single frame. - Larger images: tiled based on ground sampling distance. Tile physical size: 25 meters (METERS_IN_TILE). Tile overlap: `big_image_tile_overlap_percent` (default: 20%). -- GSD calculation: `sensor_width * altitude / (focal_length * image_width)` when `altitude` is provided. +- GSD calculation: `sensor_width * current_height / (focal_length * current_zoom * image_width * sin(current_angle))` when `camera_config.current_height` and valid camera parameters are provided. `current_angle` is in degrees and defaults to 90. ## API diff --git a/_docs/00_problem/input_data/data_parameters.md b/_docs/00_problem/input_data/data_parameters.md index 0f4ca60..31b08a2 100644 --- a/_docs/00_problem/input_data/data_parameters.md +++ b/_docs/00_problem/input_data/data_parameters.md @@ -36,9 +36,19 @@ Media path is resolved from the Annotations service via `GET /api/media/{media_i | tracking_intersection_threshold | float | 0.6 | Overlap ratio for NMS deduplication | | model_batch_size | int | 8 | Inference batch size | | big_image_tile_overlap_percent | int | 20 | Tile overlap for large images (0-100%) | -| altitude | float | optional | Camera altitude in meters. When omitted, GSD-based size filtering and image tiling are skipped. | +| camera_config | object | null | Camera parameters for GSD. When omitted or missing height, GSD-based size filtering and image tiling are skipped. | + +### camera_config + +| Field | Type | Default | Range/Meaning | +|-------|------|---------|---------------| | focal_length | float | 24 | Camera focal length in mm | | sensor_width | float | 23.5 | Camera sensor width in mm | +| current_zoom | float | 1 | Optical zoom multiplier; effective focal length is `focal_length * current_zoom` | +| current_angle | float | 90 | Camera angle in degrees; 90 is nadir/downward | +| current_height | float | optional | Camera height in meters | + +Legacy flat `altitude`, `focal_length`, and `sensor_width` keys are still accepted for backward compatibility, but new clients should send `camera_config`. `paths` field was removed in AZ-174 — media paths are now resolved via the Annotations service. diff --git a/_docs/01_solution/solution.md b/_docs/01_solution/solution.md index 49ae146..9ec54dc 100644 --- a/_docs/01_solution/solution.md +++ b/_docs/01_solution/solution.md @@ -32,7 +32,7 @@ graph LR | Cython inference pipeline | Python 3, Cython 3.1.3, OpenCV 4.10 | Near-C performance for tight detection loops while retaining Python ecosystem | Build complexity, limited IDE/debug support | Compilation step via setup.py | N/A | Low (open-source) | High — critical for postprocessing throughput | | Dual engine strategy (TensorRT + ONNX) | TensorRT 10.11, ONNX Runtime 1.22 | Maximum GPU speed with CPU fallback; auto-conversion and caching | Two code paths; GPU-specific engine files not portable | NVIDIA GPU (CC ≥ 6.1) for TensorRT | N/A | TensorRT free for NVIDIA GPUs | High — balances performance and portability | | FastAPI HTTP service | FastAPI, Uvicorn, Pydantic | Async SSE, auto-generated docs, fast development | Sync inference offloaded to ThreadPoolExecutor (2 workers) | Python 3.8+ | Bearer token pass-through | Low (open-source) | High — fits async streaming + sync inference pattern | -| GSD-based image tiling | OpenCV, NumPy | Preserves small object detail in large aerial images | Complex tile dedup logic; overlap increases compute | Camera metadata (altitude, focal length, sensor width) | N/A | Compute cost scales with image size | High — essential for aerial imagery use case | +| GSD-based image tiling | OpenCV, NumPy | Preserves small object detail in large aerial images | Complex tile dedup logic; overlap increases compute | Camera metadata (`camera_config`: height, angle, zoom, focal length, sensor width) | N/A | Compute cost scales with image size | High — essential for aerial imagery use case | | Lazy engine initialization | pynvml, threading | Fast API startup; background model conversion | First request has high latency; engine may be unavailable | None | N/A | N/A | High — prevents blocking startup on slow model download/conversion | ## 3. Testing Strategy diff --git a/_docs/02_document/components/03_inference_pipeline/description.md b/_docs/02_document/components/03_inference_pipeline/description.md index 4b7c16c..6e1fcef 100644 --- a/_docs/02_document/components/03_inference_pipeline/description.md +++ b/_docs/02_document/components/03_inference_pipeline/description.md @@ -109,7 +109,7 @@ None — internal component, consumed by API layer. ### Large Image Tiling -- Ground Sampling Distance: `sensor_width * altitude / (focal_length * image_width)` +- Ground Sampling Distance: `sensor_width * current_height / (focal_length * current_zoom * image_width * sin(current_angle))` - Tile size: `METERS_IN_TILE / GSD` pixels - Overlap: configurable percentage - Tile deduplication: absolute-coordinate Detection equality across adjacent tiles diff --git a/_docs/02_document/data_model.md b/_docs/02_document/data_model.md index 496c570..fcabb0c 100644 --- a/_docs/02_document/data_model.md +++ b/_docs/02_document/data_model.md @@ -37,9 +37,13 @@ erDiagram double tracking_intersection_threshold int big_image_tile_overlap_percent int model_batch_size - double altitude + bool has_camera_config + double current_height + double current_zoom + double current_angle double focal_length double sensor_width + double altitude } AIAvailabilityStatus { @@ -107,7 +111,7 @@ Groups detections for a single frame or image tile. ### AIRecognitionConfig -Runtime configuration for inference behavior. Created from dict (API) or msgpack (internal). +Runtime configuration for inference behavior. Created from dict (API). Camera values are grouped under `camera_config` at the API boundary and expanded into `current_height`, `current_zoom`, `current_angle`, `focal_length`, and `sensor_width` internally. `altitude` remains as a legacy alias for `current_height`. ### AIAvailabilityStatus @@ -125,7 +129,7 @@ SSE event payload. Status values: AIProcessing, AIProcessed, Error. ### AIConfigDto -API input configuration. Same fields as AIRecognitionConfig with defaults. +API input configuration. Same inference fields as `AIRecognitionConfig` with defaults, plus nested `camera_config` for GSD and physical-size filtering. ### HealthResponse @@ -144,7 +148,7 @@ Annotation names encode media source and processing context: | Entity | Format | Usage | |--------|--------|-------| | Detection/Annotation | msgpack (compact keys) | `annotation.serialize()` | -| AIRecognitionConfig | msgpack (compact keys) | `from_msgpack()` | +| AIRecognitionConfig | Python dict | `AIRecognitionConfig.from_dict()` | | AIAvailabilityStatus | msgpack | `serialize()` | | DetectionDto/Event | JSON (Pydantic) | HTTP API responses, SSE | diff --git a/_docs/02_document/modules/ai_config.md b/_docs/02_document/modules/ai_config.md index f07757e..01e7895 100644 --- a/_docs/02_document/modules/ai_config.md +++ b/_docs/02_document/modules/ai_config.md @@ -20,9 +20,13 @@ Data class holding all AI recognition configuration parameters, with factory met | `tracking_intersection_threshold` | double | 0.6 | IoU threshold for overlapping detection removal | | `model_batch_size` | int | 1 | Batch size for inference | | `big_image_tile_overlap_percent` | int | 20 | Tile overlap percentage for large image splitting | -| `altitude` | double? | optional | Camera altitude in meters. When missing, GSD-based filtering is disabled | +| `has_camera_config` | bool | false | Whether camera parameters were supplied | +| `current_height` | double | 0.0 | Camera height in meters, from `camera_config.current_height` | +| `current_zoom` | double | 1.0 | Camera zoom multiplier | +| `current_angle` | double | 90.0 | Camera angle in degrees; 90 is nadir/downward | | `focal_length` | double | 24 | Camera focal length in mm | | `sensor_width` | double | 23.5 | Camera sensor width in mm | +| `altitude` / `has_altitude` | double / bool | legacy | Backward-compatible aliases for older flat camera config | #### Methods @@ -32,7 +36,7 @@ Data class holding all AI recognition configuration parameters, with factory met ## Internal Logic -`from_dict` applies defaults for missing keys using full descriptive key names. +`from_dict` applies defaults for missing keys using full descriptive key names. Camera parameters are read from nested `camera_config` first; legacy flat `altitude`, `focal_length`, and `sensor_width` keys remain supported for older clients. **Removed**: `paths` field and `file_data` field were removed as part of the distributed architecture shift (AZ-174). Media paths are now resolved via the Annotations service API, not passed in config. `from_msgpack()` was also removed as it was unused. @@ -51,7 +55,7 @@ Data class holding all AI recognition configuration parameters, with factory met ## Configuration -Camera/altitude parameters (`altitude`, `focal_length`, `sensor_width`) are used for ground sampling distance calculation in aerial image processing. If `altitude` is missing, the service skips GSD-based size filtering and does not tile large images by physical size. +Camera parameters (`camera_config.focal_length`, `camera_config.sensor_width`, `camera_config.current_zoom`, `camera_config.current_angle`, `camera_config.current_height`) are used for ground sampling distance calculation in aerial image processing. If `camera_config` is missing or height/optics are invalid, the service skips GSD-based size filtering and does not tile large images by physical size. ## External Integrations diff --git a/_docs/02_document/modules/inference.md b/_docs/02_document/modules/inference.md index 9577557..450ded5 100644 --- a/_docs/02_document/modules/inference.md +++ b/_docs/02_document/modules/inference.md @@ -90,7 +90,7 @@ Both `run_detect_image` and `run_detect_video` accept raw bytes instead of file ### Ground Sampling Distance (GSD) -`GSD = sensor_width * altitude / (focal_length * image_width)` — meters per pixel, used for physical size filtering of aerial detections. +`GSD = sensor_width * current_height / (focal_length * current_zoom * image_width * sin(current_angle))` — meters per pixel, used for physical size filtering of aerial detections. `current_angle` is configured in degrees and defaults to 90. ## Dependencies diff --git a/_docs/02_document/modules/main.md b/_docs/02_document/modules/main.md index 1c4f4e5..8007d9a 100644 --- a/_docs/02_document/modules/main.md +++ b/_docs/02_document/modules/main.md @@ -23,7 +23,8 @@ FastAPI application entry point — exposes HTTP API for object detection on ima | `DetectionDto` | centerX, centerY, width, height, classNum, label, confidence | Single detection result | | `DetectionEvent` | annotations (list[DetectionDto]), mediaId, mediaStatus, mediaPercent | SSE event payload | | `HealthResponse` | status, aiAvailability, engineType, errorMessage | Health check response | -| `AIConfigDto` | frame_period_recognition, frame_recognition_seconds, probability_threshold, tracking_*, model_batch_size, big_image_tile_overlap_percent, altitude, focal_length, sensor_width | Configuration input (no `paths` field — removed in AZ-174) | +| `CameraConfigDto` | focal_length, sensor_width, current_zoom, current_angle, current_height | Camera input used for GSD and physical-size filtering | +| `AIConfigDto` | frame_period_recognition, frame_recognition_seconds, probability_threshold, tracking_*, model_batch_size, big_image_tile_overlap_percent, camera_config | Configuration input (no `paths` field — removed in AZ-174) | ### Class: TokenManager @@ -37,7 +38,7 @@ FastAPI application entry point — exposes HTTP API for object detection on ima | Function | Signature | Description | |----------|-----------|-------------| -| `_merged_annotation_settings_payload` | `(raw: object) -> dict` | Merges nested AI settings from Annotations service response (handles `aiRecognitionSettings`, `cameraSettings` sub-objects and PascalCase/camelCase/snake_case aliases) | +| `_merged_annotation_settings_payload` | `(raw: object) -> dict` | Merges nested AI settings from Annotations service response (handles `aiRecognitionSettings`, `camera_config`/`cameraSettings` sub-objects and PascalCase/camelCase/snake_case aliases) | | `_resolve_media_for_detect` | `(media_id, token_mgr, override) -> tuple[dict, str]` | Fetches user AI settings + media path from Annotations service, merges with client overrides | | `_detect_upload_kind` | `(filename, data) -> tuple[str, str]` | Determines if upload is image or video by extension, falls back to content probing (cv2/PyAV) | | `_post_media_record` | `(payload, bearer) -> bool` | Creates media record via `POST /api/media` on Annotations service | diff --git a/_docs/02_document/tests/blackbox-tests.md b/_docs/02_document/tests/blackbox-tests.md index d091481..18ec4d6 100644 --- a/_docs/02_document/tests/blackbox-tests.md +++ b/_docs/02_document/tests/blackbox-tests.md @@ -83,7 +83,7 @@ **Preconditions**: - Engine is initialized -- Config includes altitude, focal_length, sensor_width for GSD calculation +- Config includes `camera_config` with `current_height`, `focal_length`, `sensor_width`, `current_zoom`, and `current_angle` for GSD calculation **Input data**: large-image (4000×3000) @@ -91,7 +91,7 @@ | Step | Consumer Action | Expected System Response | |------|----------------|------------------------| -| 1 | `POST /detect` with large-image and config `{"altitude": 400, "focal_length": 24, "sensor_width": 23.5}` | 200 OK | +| 1 | `POST /detect` with large-image and config `{"camera_config":{"current_height":400,"focal_length":24,"sensor_width":23.5,"current_zoom":1,"current_angle":90}}` | 200 OK | | 2 | Parse response JSON | Array of detections | | 3 | Verify detection coordinates | Bounding box coordinates are in 0.0–1.0 range relative to the full original image | @@ -167,7 +167,7 @@ | Step | Consumer Action | Expected System Response | |------|----------------|------------------------| -| 1 | `POST /detect` with small-image and config `{"altitude": 400, "focal_length": 24, "sensor_width": 23.5}` | 200 OK | +| 1 | `POST /detect` with small-image and config `{"camera_config":{"current_height":400,"focal_length":24,"sensor_width":23.5,"current_zoom":1,"current_angle":90}}` | 200 OK | | 2 | For each detection, compute physical size from bounding box + GSD | No detection's physical size exceeds the MaxSizeM defined for its class in classes.json | **Expected outcome**: All returned detections have plausible physical dimensions for their class. diff --git a/e2e/mocks/annotations/app.py b/e2e/mocks/annotations/app.py index 6a9b05d..19ab131 100644 --- a/e2e/mocks/annotations/app.py +++ b/e2e/mocks/annotations/app.py @@ -41,9 +41,13 @@ def user_ai_settings(user_id): "tracking_intersection_threshold": 0.6, "model_batch_size": 8, "big_image_tile_overlap_percent": 20, - "altitude": 400, - "focal_length": 24, - "sensor_width": 23.5, + "camera_config": { + "focal_length": 24, + "sensor_width": 23.5, + "current_zoom": 1, + "current_angle": 90, + "current_height": 400, + }, } diff --git a/e2e/tests/test_performance.py b/e2e/tests/test_performance.py index 89e456f..4eb7645 100644 --- a/e2e/tests/test_performance.py +++ b/e2e/tests/test_performance.py @@ -46,7 +46,17 @@ def test_nft_perf_03_tiling_overhead_large_image( _, small_ms = image_detect(image_small, "small.jpg", timeout=20) _, large_ms = image_detect( image_large, "large.jpg", - config=json.dumps({"altitude": 400, "focal_length": 24, "sensor_width": 23.5}), + config=json.dumps( + { + "camera_config": { + "focal_length": 24, + "sensor_width": 23.5, + "current_zoom": 1, + "current_angle": 90, + "current_height": 400, + } + } + ), timeout=20, ) assert large_ms < 30_000.0 diff --git a/e2e/tests/test_single_image.py b/e2e/tests/test_single_image.py index 44d9f31..2be9720 100644 --- a/e2e/tests/test_single_image.py +++ b/e2e/tests/test_single_image.py @@ -149,9 +149,13 @@ def test_ft_p_07_physical_size_filtering_ac4(image_detect, image_small, warm_eng gsd = (sensor_width * altitude) / (focal_length * image_width_px) cfg = json.dumps( { - "altitude": altitude, - "focal_length": focal_length, - "sensor_width": sensor_width, + "camera_config": { + "focal_length": focal_length, + "sensor_width": sensor_width, + "current_zoom": 1, + "current_angle": 90, + "current_height": altitude, + }, } ) body, _ = image_detect(image_small, "img.jpg", config=cfg, timeout=_DETECT_SLOW_TIMEOUT) diff --git a/e2e/tests/test_tiling.py b/e2e/tests/test_tiling.py index d027bbc..fc61f7f 100644 --- a/e2e/tests/test_tiling.py +++ b/e2e/tests/test_tiling.py @@ -3,7 +3,15 @@ import json import pytest _TILING_TIMEOUT = 120 -_GSD = {"altitude": 400, "focal_length": 24, "sensor_width": 23.5} +_GSD = { + "camera_config": { + "focal_length": 24, + "sensor_width": 23.5, + "current_zoom": 1, + "current_angle": 90, + "current_height": 400, + } +} _DUP_THRESHOLD = 0.01 diff --git a/src/ai_config.pxd b/src/ai_config.pxd index 9a77716..db6ae09 100644 --- a/src/ai_config.pxd +++ b/src/ai_config.pxd @@ -12,6 +12,11 @@ cdef class AIRecognitionConfig: cdef public int model_batch_size + cdef public bint has_camera_config + cdef public double current_height + cdef public double current_zoom + cdef public double current_angle + cdef public bint has_altitude cdef public double altitude cdef public double focal_length diff --git a/src/ai_config.pyx b/src/ai_config.pyx index 45dd5c6..2421bca 100644 --- a/src/ai_config.pyx +++ b/src/ai_config.pyx @@ -9,9 +9,12 @@ cdef class AIRecognitionConfig: tracking_intersection_threshold, model_batch_size, big_image_tile_overlap_percent, + camera_config, altitude, focal_length, - sensor_width + sensor_width, + current_zoom, + current_angle ): self.frame_period_recognition = frame_period_recognition self.frame_recognition_seconds = frame_recognition_seconds @@ -25,10 +28,15 @@ cdef class AIRecognitionConfig: self.big_image_tile_overlap_percent = big_image_tile_overlap_percent - self.has_altitude = altitude is not None - self.altitude = 0.0 if altitude is None else float(altitude) - self.focal_length = focal_length - self.sensor_width = sensor_width + self.has_camera_config = camera_config is not None or altitude is not None + self.current_height = 0.0 if altitude is None else float(altitude) + self.current_zoom = float(current_zoom) + self.current_angle = float(current_angle) + + self.has_altitude = self.has_camera_config + self.altitude = self.current_height + self.focal_length = float(focal_length) + self.sensor_width = float(sensor_width) def __str__(self): return (f'frame_seconds : {self.frame_recognition_seconds}, distance_confidence : {self.tracking_distance_confidence}, ' @@ -37,13 +45,57 @@ cdef class AIRecognitionConfig: f'frame_period_recognition : {self.frame_period_recognition}, ' f'big_image_tile_overlap_percent: {self.big_image_tile_overlap_percent}, ' f'model_batch_size: {self.model_batch_size}, ' - f'altitude: {self.altitude if self.has_altitude else None}, ' + f'camera_config: {self.has_camera_config}, ' + f'current_height: {self.current_height if self.has_camera_config else None}, ' + f'current_zoom: {self.current_zoom}, ' + f'current_angle: {self.current_angle}, ' f'focal_length: {self.focal_length}, ' f'sensor_width: {self.sensor_width}' ) @staticmethod cdef AIRecognitionConfig from_dict(dict data): + cdef object camera_config = data.get("camera_config", data.get("cameraConfig", None)) + if camera_config is not None and not isinstance(camera_config, dict): + camera_config = None + + cdef object altitude = data.get("altitude", None) + cdef object focal_length = data.get("focal_length", data.get("focalLength", 24)) + cdef object sensor_width = data.get("sensor_width", data.get("sensorWidth", 23.5)) + cdef object current_zoom = data.get("current_zoom", data.get("currentZoom", 1)) + cdef object current_angle = data.get("current_angle", data.get("currentAngle", 90)) + + if camera_config is not None: + altitude = camera_config.get( + "current_height", + camera_config.get("currentHeight", camera_config.get("altitude", altitude)), + ) + focal_length = camera_config.get( + "focal_length", + camera_config.get("focalLength", focal_length), + ) + sensor_width = camera_config.get( + "sensor_width", + camera_config.get("sensorWidth", sensor_width), + ) + current_zoom = camera_config.get( + "current_zoom", + camera_config.get("currentZoom", current_zoom), + ) + current_angle = camera_config.get( + "current_angle", + camera_config.get("currentAngle", current_angle), + ) + + if focal_length is None: + focal_length = 24 + if sensor_width is None: + sensor_width = 23.5 + if current_zoom is None: + current_zoom = 1 + if current_angle is None: + current_angle = 90 + return AIRecognitionConfig( data.get("frame_period_recognition", 4), data.get("frame_recognition_seconds", 2), @@ -57,7 +109,10 @@ cdef class AIRecognitionConfig: data.get("big_image_tile_overlap_percent", 20), - data.get("altitude", None), - data.get("focal_length", 24), - data.get("sensor_width", 23.5) + camera_config, + altitude, + focal_length, + sensor_width, + current_zoom, + current_angle ) diff --git a/src/inference.pyx b/src/inference.pyx index 7433bc5..27023ed 100644 --- a/src/inference.pyx +++ b/src/inference.pyx @@ -5,6 +5,7 @@ import av import cv2 import numpy as np cimport constants_inf +from libc.math cimport sin from ai_availability_status cimport AIAvailabilityEnum, AIAvailabilityStatus from annotation cimport Detection, Annotation @@ -309,25 +310,42 @@ cdef class Inference: cdef _append_image_frame_entries(self, AIRecognitionConfig ai_config, list all_frame_data, frame, str original_media_name): cdef double ground_sampling_distance + cdef double angle_radians + cdef double angle_scale + cdef double effective_focal_length cdef int model_h, model_w cdef int img_h, img_w cdef bint has_gsd model_h, model_w = self.engine.get_input_shape() img_h, img_w, _ = frame.shape - has_gsd = ai_config.has_altitude and ai_config.focal_length > 0 and ai_config.sensor_width > 0 and img_w > 0 + angle_radians = ai_config.current_angle * 3.141592653589793 / 180.0 + angle_scale = sin(angle_radians) + effective_focal_length = ai_config.focal_length * ai_config.current_zoom + has_gsd = ( + ai_config.has_camera_config + and ai_config.current_height > 0 + and effective_focal_length > 0 + and ai_config.sensor_width > 0 + and angle_scale > 0 + and img_w > 0 + ) ground_sampling_distance = 0.0 if has_gsd: - ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w) + ground_sampling_distance = ( + ai_config.sensor_width + * ai_config.current_height + / (effective_focal_length * img_w * angle_scale) + ) constants_inf.log(f'ground sampling distance: {ground_sampling_distance}') else: - constants_inf.log('ground sampling distance: skipped (altitude unavailable)') + constants_inf.log('ground sampling distance: skipped (camera_config unavailable)') if img_h <= 1.5 * model_h and img_w <= 1.5 * model_w: all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance)) else: if not has_gsd: all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance)) return - tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance) + tile_size = max(1, int(constants_inf.METERS_IN_TILE / ground_sampling_distance)) constants_inf.log( f'calc tile size: {tile_size}') res = self.split_to_tiles(frame, original_media_name, tile_size, ai_config.big_image_tile_overlap_percent) for tile_frame, omn, tile_name in res: @@ -362,8 +380,8 @@ cdef class Inference: cdef split_to_tiles(self, frame, str media_stem, tile_size, overlap_percent): constants_inf.log(f'splitting image {media_stem} to tiles...') img_h, img_w, _ = frame.shape - stride_w = int(tile_size * (1 - overlap_percent / 100)) - stride_h = int(tile_size * (1 - overlap_percent / 100)) + stride_w = max(1, int(tile_size * (1 - overlap_percent / 100))) + stride_h = max(1, int(tile_size * (1 - overlap_percent / 100))) results = [] original_media_name = media_stem diff --git a/src/main.py b/src/main.py index f8470a5..7947114 100644 --- a/src/main.py +++ b/src/main.py @@ -155,6 +155,14 @@ class HealthResponse(BaseModel): errorMessage: Optional[str] = None +class CameraConfigDto(BaseModel): + focal_length: float = 24 + sensor_width: float = 23.5 + current_zoom: float = 1 + current_angle: float = 90 + current_height: Optional[float] = None + + class AIConfigDto(BaseModel): frame_period_recognition: int = 4 frame_recognition_seconds: int = 2 @@ -164,6 +172,7 @@ class AIConfigDto(BaseModel): tracking_intersection_threshold: float = 0.6 model_batch_size: int = 8 big_image_tile_overlap_percent: int = 20 + camera_config: Optional[CameraConfigDto] = None altitude: Optional[float] = None focal_length: float = 24 sensor_width: float = 23.5 @@ -218,9 +227,12 @@ _AI_SETTINGS_FIELD_KEYS = ( "BigImageTileOverlapPercent", ), ), +) + +_CAMERA_SETTINGS_FIELD_KEYS = ( ( - "altitude", - ("altitude", "Altitude"), + "current_height", + ("current_height", "currentHeight", "CurrentHeight", "altitude", "Altitude"), ), ( "focal_length", @@ -230,6 +242,14 @@ _AI_SETTINGS_FIELD_KEYS = ( "sensor_width", ("sensor_width", "sensorWidth", "SensorWidth"), ), + ( + "current_zoom", + ("current_zoom", "currentZoom", "CurrentZoom"), + ), + ( + "current_angle", + ("current_angle", "currentAngle", "CurrentAngle"), + ), ) @@ -249,6 +269,21 @@ def _merged_annotation_settings_payload(raw: object) -> dict: if key in merged and merged[key] is not None: out[snake] = merged[key] break + camera_source = {} + for key in ("camera_config", "cameraConfig", "cameraSettings"): + value = raw.get(key) + if isinstance(value, dict): + camera_source.update(value) + camera_merged = dict(merged) + camera_merged.update(camera_source) + camera_config = {} + for snake, aliases in _CAMERA_SETTINGS_FIELD_KEYS: + for key in aliases: + if key in camera_merged and camera_merged[key] is not None: + camera_config[snake] = camera_merged[key] + break + if camera_config: + out["camera_config"] = camera_config return out @@ -306,7 +341,13 @@ def _resolve_media_for_detect( cfg.update(_merged_annotation_settings_payload(raw)) if override is not None: for k, v in override.model_dump(exclude_defaults=True).items(): - cfg[k] = v + if k == "camera_config" and isinstance(v, dict): + existing = cfg.get("camera_config") + camera_cfg = dict(existing) if isinstance(existing, dict) else {} + camera_cfg.update(v) + cfg[k] = camera_cfg + else: + cfg[k] = v media_path = annotations_client.fetch_media_path(media_id, bearer) if not media_path: raise HTTPException( diff --git a/tests/test_ai_config_from_dict.py b/tests/test_ai_config_from_dict.py index 8d8f66b..be61de4 100644 --- a/tests/test_ai_config_from_dict.py +++ b/tests/test_ai_config_from_dict.py @@ -1,24 +1,61 @@ def test_ai_config_from_dict_defaults(): + # Arrange from inference import ai_config_from_dict + # Act cfg = ai_config_from_dict({}) + # Assert assert cfg.model_batch_size == 8 assert cfg.frame_period_recognition == 4 assert cfg.frame_recognition_seconds == 2 + assert cfg.has_camera_config is False assert cfg.has_altitude is False def test_ai_config_from_dict_altitude_override_sets_flag(): + # Arrange from inference import ai_config_from_dict + # Act cfg = ai_config_from_dict({"altitude": 400}) + # Assert + assert cfg.has_camera_config is True assert cfg.has_altitude is True assert cfg.altitude == 400 + assert cfg.current_height == 400 def test_ai_config_from_dict_overrides(): + # Arrange from inference import ai_config_from_dict + # Act cfg = ai_config_from_dict({"model_batch_size": 4, "probability_threshold": 0.5}) + # Assert assert cfg.model_batch_size == 4 assert cfg.probability_threshold == 0.5 + + +def test_ai_config_from_dict_camera_config_sets_physical_filter_fields(): + # Arrange + from inference import ai_config_from_dict + + # Act + cfg = ai_config_from_dict( + { + "camera_config": { + "focal_length": 35, + "sensor_width": 36, + "current_zoom": 2, + "current_angle": 80, + "current_height": 300, + } + } + ) + # Assert + assert cfg.has_camera_config is True + assert cfg.current_height == 300 + assert cfg.focal_length == 35 + assert cfg.sensor_width == 36 + assert cfg.current_zoom == 2 + assert cfg.current_angle == 80 diff --git a/tests/test_az174_db_driven_config.py b/tests/test_az174_db_driven_config.py index 955738a..50afa3b 100644 --- a/tests/test_az174_db_driven_config.py +++ b/tests/test_az174_db_driven_config.py @@ -61,7 +61,9 @@ def test_merged_annotation_settings_pascal_case(): # Assert assert out["frame_period_recognition"] == 5 assert out["probability_threshold"] == 0.4 - assert out["altitude"] == 300 + assert out["camera_config"]["current_height"] == 300 + assert out["camera_config"]["focal_length"] == 35 + assert out["camera_config"]["sensor_width"] == 36 def test_merged_annotation_nested_sections(): @@ -76,7 +78,7 @@ def test_merged_annotation_nested_sections(): out = _merged_annotation_settings_payload(raw) # Assert assert out["model_batch_size"] == 4 - assert out["altitude"] == 100 + assert out["camera_config"]["current_height"] == 100 def test_resolve_media_for_detect_uses_api_path_and_defaults_when_api_empty(): @@ -105,7 +107,7 @@ def test_resolve_media_for_detect_override_wins(): mock_ann = MagicMock() mock_ann.fetch_user_ai_settings.return_value = { "probabilityThreshold": 0.2, - "altitude": 500, + "camera_config": {"current_height": 500}, } mock_ann.fetch_media_path.return_value = "/m/v.mp4" with patch("main.annotations_client", mock_ann): @@ -113,11 +115,42 @@ def test_resolve_media_for_detect_override_wins(): cfg, path = main._resolve_media_for_detect("vid-1", tm, override) # Assert assert cfg["probability_threshold"] == 0.99 - assert cfg["altitude"] == 500 + assert cfg["camera_config"]["current_height"] == 500 assert path == "/m/v.mp4" assert "paths" not in cfg +def test_resolve_media_for_detect_merges_camera_config_override(): + # Arrange + import main + + tm = main.TokenManager(_access_jwt(), "") + override = main.AIConfigDto( + camera_config=main.CameraConfigDto(current_height=500) + ) + mock_ann = MagicMock() + mock_ann.fetch_user_ai_settings.return_value = { + "camera_config": { + "focal_length": 35, + "sensor_width": 36, + "current_zoom": 2, + "current_angle": 80, + "current_height": 300, + } + } + mock_ann.fetch_media_path.return_value = "/m/v.mp4" + with patch("main.annotations_client", mock_ann): + # Act + cfg, path = main._resolve_media_for_detect("vid-1", tm, override) + # Assert + assert cfg["camera_config"]["current_height"] == 500 + assert cfg["camera_config"]["focal_length"] == 35 + assert cfg["camera_config"]["sensor_width"] == 36 + assert cfg["camera_config"]["current_zoom"] == 2 + assert cfg["camera_config"]["current_angle"] == 80 + assert path == "/m/v.mp4" + + def test_resolve_media_for_detect_omits_altitude_when_not_provided(): # Arrange import main @@ -130,7 +163,7 @@ def test_resolve_media_for_detect_omits_altitude_when_not_provided(): # Act cfg, path = main._resolve_media_for_detect("vid-2", tm, None) # Assert - assert "altitude" not in cfg + assert "camera_config" not in cfg assert cfg["probability_threshold"] == 0.2 assert path == "/m/v.mp4"