mirror of
https://github.com/azaion/detections.git
synced 2026-04-22 22:26:33 +00:00
Compare commits
2 Commits
3b61dc4564
..
dev
| Author | SHA1 | Date | |
|---|---|---|---|
| 00164d9e54 | |||
| 5cfcdb5fd5 |
@@ -5,7 +5,7 @@
|
|||||||
- Detections with confidence below `probability_threshold` (default: 0.25) are filtered out.
|
- Detections with confidence below `probability_threshold` (default: 0.25) are filtered out.
|
||||||
- Overlapping detections with containment ratio > `tracking_intersection_threshold` (default: 0.6) are deduplicated, keeping the higher-confidence detection.
|
- Overlapping detections with containment ratio > `tracking_intersection_threshold` (default: 0.6) are deduplicated, keeping the higher-confidence detection.
|
||||||
- Tile duplicate detections are identified when all bounding box coordinates differ by less than 0.01 (TILE_DUPLICATE_CONFIDENCE_THRESHOLD).
|
- Tile duplicate detections are identified when all bounding box coordinates differ by less than 0.01 (TILE_DUPLICATE_CONFIDENCE_THRESHOLD).
|
||||||
- Physical size filtering: detections exceeding `max_object_size_meters` for their class (defined in classes.json, range 2–20 meters) are removed.
|
- Physical size filtering: detections exceeding `max_object_size_meters` for their class (defined in classes.json, range 2–20 meters) are removed when ground sampling distance can be computed from camera settings.
|
||||||
|
|
||||||
## Video Processing
|
## Video Processing
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
- Images ≤ 1.5× model dimensions (1280×1280): processed as single frame.
|
- Images ≤ 1.5× model dimensions (1280×1280): processed as single frame.
|
||||||
- Larger images: tiled based on ground sampling distance. Tile physical size: 25 meters (METERS_IN_TILE). Tile overlap: `big_image_tile_overlap_percent` (default: 20%).
|
- Larger images: tiled based on ground sampling distance. Tile physical size: 25 meters (METERS_IN_TILE). Tile overlap: `big_image_tile_overlap_percent` (default: 20%).
|
||||||
- GSD calculation: `sensor_width * altitude / (focal_length * image_width)`.
|
- GSD calculation: `sensor_width * altitude / (focal_length * image_width)` when `altitude` is provided.
|
||||||
|
|
||||||
## API
|
## API
|
||||||
|
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ Media path is resolved from the Annotations service via `GET /api/media/{media_i
|
|||||||
| tracking_intersection_threshold | float | 0.6 | Overlap ratio for NMS deduplication |
|
| tracking_intersection_threshold | float | 0.6 | Overlap ratio for NMS deduplication |
|
||||||
| model_batch_size | int | 8 | Inference batch size |
|
| model_batch_size | int | 8 | Inference batch size |
|
||||||
| big_image_tile_overlap_percent | int | 20 | Tile overlap for large images (0-100%) |
|
| big_image_tile_overlap_percent | int | 20 | Tile overlap for large images (0-100%) |
|
||||||
| altitude | float | 400 | Camera altitude in meters |
|
| altitude | float | optional | Camera altitude in meters. When omitted, GSD-based size filtering and image tiling are skipped. |
|
||||||
| focal_length | float | 24 | Camera focal length in mm |
|
| focal_length | float | 24 | Camera focal length in mm |
|
||||||
| sensor_width | float | 23.5 | Camera sensor width in mm |
|
| sensor_width | float | 23.5 | Camera sensor width in mm |
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ Data class holding all AI recognition configuration parameters, with factory met
|
|||||||
| `tracking_intersection_threshold` | double | 0.6 | IoU threshold for overlapping detection removal |
|
| `tracking_intersection_threshold` | double | 0.6 | IoU threshold for overlapping detection removal |
|
||||||
| `model_batch_size` | int | 1 | Batch size for inference |
|
| `model_batch_size` | int | 1 | Batch size for inference |
|
||||||
| `big_image_tile_overlap_percent` | int | 20 | Tile overlap percentage for large image splitting |
|
| `big_image_tile_overlap_percent` | int | 20 | Tile overlap percentage for large image splitting |
|
||||||
| `altitude` | double | 400 | Camera altitude in meters |
|
| `altitude` | double? | optional | Camera altitude in meters. When missing, GSD-based filtering is disabled |
|
||||||
| `focal_length` | double | 24 | Camera focal length in mm |
|
| `focal_length` | double | 24 | Camera focal length in mm |
|
||||||
| `sensor_width` | double | 23.5 | Camera sensor width in mm |
|
| `sensor_width` | double | 23.5 | Camera sensor width in mm |
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ Data class holding all AI recognition configuration parameters, with factory met
|
|||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
Camera/altitude parameters (`altitude`, `focal_length`, `sensor_width`) are used for ground sampling distance calculation in aerial image processing.
|
Camera/altitude parameters (`altitude`, `focal_length`, `sensor_width`) are used for ground sampling distance calculation in aerial image processing. If `altitude` is missing, the service skips GSD-based size filtering and does not tile large images by physical size.
|
||||||
|
|
||||||
## External Integrations
|
## External Integrations
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ cdef class AIRecognitionConfig:
|
|||||||
|
|
||||||
cdef public int model_batch_size
|
cdef public int model_batch_size
|
||||||
|
|
||||||
|
cdef public bint has_altitude
|
||||||
cdef public double altitude
|
cdef public double altitude
|
||||||
cdef public double focal_length
|
cdef public double focal_length
|
||||||
cdef public double sensor_width
|
cdef public double sensor_width
|
||||||
|
|||||||
+4
-3
@@ -25,7 +25,8 @@ cdef class AIRecognitionConfig:
|
|||||||
|
|
||||||
self.big_image_tile_overlap_percent = big_image_tile_overlap_percent
|
self.big_image_tile_overlap_percent = big_image_tile_overlap_percent
|
||||||
|
|
||||||
self.altitude = altitude
|
self.has_altitude = altitude is not None
|
||||||
|
self.altitude = 0.0 if altitude is None else float(altitude)
|
||||||
self.focal_length = focal_length
|
self.focal_length = focal_length
|
||||||
self.sensor_width = sensor_width
|
self.sensor_width = sensor_width
|
||||||
|
|
||||||
@@ -36,7 +37,7 @@ cdef class AIRecognitionConfig:
|
|||||||
f'frame_period_recognition : {self.frame_period_recognition}, '
|
f'frame_period_recognition : {self.frame_period_recognition}, '
|
||||||
f'big_image_tile_overlap_percent: {self.big_image_tile_overlap_percent}, '
|
f'big_image_tile_overlap_percent: {self.big_image_tile_overlap_percent}, '
|
||||||
f'model_batch_size: {self.model_batch_size}, '
|
f'model_batch_size: {self.model_batch_size}, '
|
||||||
f'altitude: {self.altitude}, '
|
f'altitude: {self.altitude if self.has_altitude else None}, '
|
||||||
f'focal_length: {self.focal_length}, '
|
f'focal_length: {self.focal_length}, '
|
||||||
f'sensor_width: {self.sensor_width}'
|
f'sensor_width: {self.sensor_width}'
|
||||||
)
|
)
|
||||||
@@ -56,7 +57,7 @@ cdef class AIRecognitionConfig:
|
|||||||
|
|
||||||
data.get("big_image_tile_overlap_percent", 20),
|
data.get("big_image_tile_overlap_percent", 20),
|
||||||
|
|
||||||
data.get("altitude", 400),
|
data.get("altitude", None),
|
||||||
data.get("focal_length", 24),
|
data.get("focal_length", 24),
|
||||||
data.get("sensor_width", 23.5)
|
data.get("sensor_width", 23.5)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -311,13 +311,22 @@ cdef class Inference:
|
|||||||
cdef double ground_sampling_distance
|
cdef double ground_sampling_distance
|
||||||
cdef int model_h, model_w
|
cdef int model_h, model_w
|
||||||
cdef int img_h, img_w
|
cdef int img_h, img_w
|
||||||
|
cdef bint has_gsd
|
||||||
model_h, model_w = self.engine.get_input_shape()
|
model_h, model_w = self.engine.get_input_shape()
|
||||||
img_h, img_w, _ = frame.shape
|
img_h, img_w, _ = frame.shape
|
||||||
|
has_gsd = ai_config.has_altitude and ai_config.focal_length > 0 and ai_config.sensor_width > 0 and img_w > 0
|
||||||
|
ground_sampling_distance = 0.0
|
||||||
|
if has_gsd:
|
||||||
ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w)
|
ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w)
|
||||||
constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}')
|
constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}')
|
||||||
|
else:
|
||||||
|
constants_inf.log(<str>'ground sampling distance: skipped (altitude unavailable)')
|
||||||
if img_h <= 1.5 * model_h and img_w <= 1.5 * model_w:
|
if img_h <= 1.5 * model_h and img_w <= 1.5 * model_w:
|
||||||
all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
|
all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
|
||||||
else:
|
else:
|
||||||
|
if not has_gsd:
|
||||||
|
all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
|
||||||
|
return
|
||||||
tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
|
tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
|
||||||
constants_inf.log(<str> f'calc tile size: {tile_size}')
|
constants_inf.log(<str> f'calc tile size: {tile_size}')
|
||||||
res = self.split_to_tiles(frame, original_media_name, tile_size, ai_config.big_image_tile_overlap_percent)
|
res = self.split_to_tiles(frame, original_media_name, tile_size, ai_config.big_image_tile_overlap_percent)
|
||||||
@@ -410,6 +419,12 @@ cdef class Inference:
|
|||||||
if annotation.detections:
|
if annotation.detections:
|
||||||
constants_inf.log(<str> f'Initial ann: {annotation}')
|
constants_inf.log(<str> f'Initial ann: {annotation}')
|
||||||
|
|
||||||
|
if ground_sampling_distance <= 0:
|
||||||
|
if not annotation.detections:
|
||||||
|
return <bint>False
|
||||||
|
constants_inf.log(<str>'Skipping physical-size filtering (ground sampling distance unavailable)')
|
||||||
|
return <bint>True
|
||||||
|
|
||||||
cdef list[Detection] valid_detections = []
|
cdef list[Detection] valid_detections = []
|
||||||
for det in annotation.detections:
|
for det in annotation.detections:
|
||||||
m_w = det.w * img_w * ground_sampling_distance
|
m_w = det.w * img_w * ground_sampling_distance
|
||||||
|
|||||||
+1
-1
@@ -163,7 +163,7 @@ class AIConfigDto(BaseModel):
|
|||||||
tracking_intersection_threshold: float = 0.6
|
tracking_intersection_threshold: float = 0.6
|
||||||
model_batch_size: int = 8
|
model_batch_size: int = 8
|
||||||
big_image_tile_overlap_percent: int = 20
|
big_image_tile_overlap_percent: int = 20
|
||||||
altitude: float = 400
|
altitude: Optional[float] = None
|
||||||
focal_length: float = 24
|
focal_length: float = 24
|
||||||
sensor_width: float = 23.5
|
sensor_width: float = 23.5
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,15 @@ def test_ai_config_from_dict_defaults():
|
|||||||
assert cfg.model_batch_size == 8
|
assert cfg.model_batch_size == 8
|
||||||
assert cfg.frame_period_recognition == 4
|
assert cfg.frame_period_recognition == 4
|
||||||
assert cfg.frame_recognition_seconds == 2
|
assert cfg.frame_recognition_seconds == 2
|
||||||
|
assert cfg.has_altitude is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_ai_config_from_dict_altitude_override_sets_flag():
|
||||||
|
from inference import ai_config_from_dict
|
||||||
|
|
||||||
|
cfg = ai_config_from_dict({"altitude": 400})
|
||||||
|
assert cfg.has_altitude is True
|
||||||
|
assert cfg.altitude == 400
|
||||||
|
|
||||||
|
|
||||||
def test_ai_config_from_dict_overrides():
|
def test_ai_config_from_dict_overrides():
|
||||||
|
|||||||
@@ -118,6 +118,23 @@ def test_resolve_media_for_detect_override_wins():
|
|||||||
assert "paths" not in cfg
|
assert "paths" not in cfg
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_media_for_detect_omits_altitude_when_not_provided():
|
||||||
|
# Arrange
|
||||||
|
import main
|
||||||
|
|
||||||
|
tm = main.TokenManager(_access_jwt(), "")
|
||||||
|
mock_ann = MagicMock()
|
||||||
|
mock_ann.fetch_user_ai_settings.return_value = {"probabilityThreshold": 0.2}
|
||||||
|
mock_ann.fetch_media_path.return_value = "/m/v.mp4"
|
||||||
|
with patch("main.annotations_client", mock_ann):
|
||||||
|
# Act
|
||||||
|
cfg, path = main._resolve_media_for_detect("vid-2", tm, None)
|
||||||
|
# Assert
|
||||||
|
assert "altitude" not in cfg
|
||||||
|
assert cfg["probability_threshold"] == 0.2
|
||||||
|
assert path == "/m/v.mp4"
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_media_for_detect_raises_when_no_media_path():
|
def test_resolve_media_for_detect_raises_when_no_media_path():
|
||||||
# Arrange
|
# Arrange
|
||||||
import main
|
import main
|
||||||
|
|||||||
Reference in New Issue
Block a user