Add AIAvailabilityStatus and AIRecognitionConfig classes for AI model management

- Introduced `AIAvailabilityStatus` class to manage the availability status of AI models, including methods for setting status and logging messages.
- Added `AIRecognitionConfig` class to encapsulate configuration parameters for AI recognition, with a static method for creating instances from dictionaries.
- Implemented enums for AI availability states to enhance clarity and maintainability.
- Updated related Cython files to support the new classes and ensure proper type handling.

These changes aim to improve the structure and functionality of the AI model management system, facilitating better status tracking and configuration handling.
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-31 05:49:51 +03:00
parent fc57d677b4
commit 8ce40a9385
43 changed files with 1190 additions and 462 deletions
+17
View File
@@ -0,0 +1,17 @@
cdef enum AIAvailabilityEnum:
NONE = 0
DOWNLOADING = 10
CONVERTING = 20
UPLOADING = 30
ENABLED = 200
WARNING = 300
ERROR = 500
from cython cimport pymutex
cdef class AIAvailabilityStatus:
cdef int status
cdef str error_message
cdef pymutex _lock
cdef set_status(self, int status, str error_message=*)
+37
View File
@@ -0,0 +1,37 @@
cimport cython
cimport constants_inf
AIStatus2Text = {
AIAvailabilityEnum.NONE: "None",
AIAvailabilityEnum.DOWNLOADING: "Downloading",
AIAvailabilityEnum.CONVERTING: "Converting",
AIAvailabilityEnum.UPLOADING: "Uploading",
AIAvailabilityEnum.ENABLED: "Enabled",
AIAvailabilityEnum.WARNING: "Warning",
AIAvailabilityEnum.ERROR: "Error",
}
cdef class AIAvailabilityStatus:
def __init__(self):
self.status = AIAvailabilityEnum.NONE
self.error_message = ""
def __str__(self):
with self._lock:
status_text = AIStatus2Text.get(self.status, "Unknown")
error_text = self.error_message if self.error_message else ""
return f"{status_text} {error_text}"
cdef set_status(self, int status, str error_message=""):
log_message = ""
with self._lock:
self.status = status
self.error_message = error_message
status_text = AIStatus2Text.get(self.status, "Unknown")
error_text = self.error_message if self.error_message else ""
log_message = f"{status_text} {error_text}"
if error_message:
constants_inf.logerror(<str>error_message)
else:
constants_inf.log(<str>log_message)
+21
View File
@@ -0,0 +1,21 @@
cdef class AIRecognitionConfig:
cdef public double frame_recognition_seconds
cdef public int frame_period_recognition
cdef public double probability_threshold
cdef public double tracking_distance_confidence
cdef public double tracking_probability_increase
cdef public double tracking_intersection_threshold
cdef public int big_image_tile_overlap_percent
cdef public list[str] paths
cdef public int model_batch_size
cdef public double altitude
cdef public double focal_length
cdef public double sensor_width
@staticmethod
cdef AIRecognitionConfig from_dict(dict data)
+66
View File
@@ -0,0 +1,66 @@
cdef class AIRecognitionConfig:
def __init__(self,
frame_period_recognition,
frame_recognition_seconds,
probability_threshold,
tracking_distance_confidence,
tracking_probability_increase,
tracking_intersection_threshold,
paths,
model_batch_size,
big_image_tile_overlap_percent,
altitude,
focal_length,
sensor_width
):
self.frame_period_recognition = frame_period_recognition
self.frame_recognition_seconds = frame_recognition_seconds
self.probability_threshold = probability_threshold
self.tracking_distance_confidence = tracking_distance_confidence
self.tracking_probability_increase = tracking_probability_increase
self.tracking_intersection_threshold = tracking_intersection_threshold
self.paths = paths
self.model_batch_size = model_batch_size
self.big_image_tile_overlap_percent = big_image_tile_overlap_percent
self.altitude = altitude
self.focal_length = focal_length
self.sensor_width = sensor_width
def __str__(self):
return (f'frame_seconds : {self.frame_recognition_seconds}, distance_confidence : {self.tracking_distance_confidence}, '
f'probability_increase : {self.tracking_probability_increase}, '
f'intersection_threshold : {self.tracking_intersection_threshold}, '
f'frame_period_recognition : {self.frame_period_recognition}, '
f'big_image_tile_overlap_percent: {self.big_image_tile_overlap_percent}, '
f'paths: {self.paths}, '
f'model_batch_size: {self.model_batch_size}, '
f'altitude: {self.altitude}, '
f'focal_length: {self.focal_length}, '
f'sensor_width: {self.sensor_width}'
)
@staticmethod
cdef AIRecognitionConfig from_dict(dict data):
return AIRecognitionConfig(
data.get("frame_period_recognition", 4),
data.get("frame_recognition_seconds", 2),
data.get("probability_threshold", 0.25),
data.get("tracking_distance_confidence", 0.0),
data.get("tracking_probability_increase", 0.0),
data.get("tracking_intersection_threshold", 0.6),
data.get("paths", []),
data.get("model_batch_size", 8),
data.get("big_image_tile_overlap_percent", 20),
data.get("altitude", 400),
data.get("focal_length", 24),
data.get("sensor_width", 23.5)
)
+12
View File
@@ -0,0 +1,12 @@
cdef class Detection:
cdef public double x, y, w, h, confidence
cdef public int cls
cdef bint overlaps(self, Detection det2, float confidence_threshold)
cdef class Annotation:
cdef public str name
cdef public str original_media_name
cdef long time
cdef public list[Detection] detections
cdef public bytes image
+50
View File
@@ -0,0 +1,50 @@
cimport constants_inf
cdef class Detection:
def __init__(self, double x, double y, double w, double h, int cls, double confidence):
self.x = x
self.y = y
self.w = w
self.h = h
self.cls = cls
self.confidence = confidence
def __str__(self):
return f'{self.cls}: {self.x:.2f} {self.y:.2f} {self.w:.2f} {self.h:.2f}, prob: {(self.confidence*100):.1f}%'
def __eq__(self, other):
if not isinstance(other, Detection):
return False
if max(abs(self.x - other.x),
abs(self.y - other.y),
abs(self.w - other.w),
abs(self.h - other.h)) > constants_inf.TILE_DUPLICATE_CONFIDENCE_THRESHOLD:
return False
return True
cdef bint overlaps(self, Detection det2, float confidence_threshold):
cdef double overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x)
cdef double overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y)
cdef double overlap_area = <double>(max(0.0, overlap_x) * max(0.0, overlap_y))
cdef double min_area = min(self.w * self.h, det2.w * det2.h)
return <bint>(overlap_area / min_area > confidence_threshold)
cdef class Annotation:
def __init__(self, str name, str original_media_name, long ms, list[Detection] detections):
self.name = name
self.original_media_name = original_media_name
self.time = ms
self.detections = detections if detections is not None else []
self.image = b''
def __str__(self):
if not self.detections:
return f"{self.name}: No detections"
detections_str = ", ".join(
f"class: {d.cls} {d.confidence * 100:.1f}% ({d.x:.2f}, {d.y:.2f}) ({d.w:.2f}, {d.h:.2f})"
for d in self.detections
)
return f"{self.name}: {detections_str}"
+29
View File
@@ -0,0 +1,29 @@
cdef str CONFIG_FILE
cdef str AI_ONNX_MODEL_FILE
cdef str CDN_CONFIG
cdef str MODELS_FOLDER
cdef int SMALL_SIZE_KB
cdef str SPLIT_SUFFIX
cdef double TILE_DUPLICATE_CONFIDENCE_THRESHOLD
cdef int METERS_IN_TILE
cdef log(str log_message)
cdef logerror(str error)
cdef format_time(long ms)
cdef dict[int, AnnotationClass] annotations_dict
cdef class AnnotationClass:
cdef public int id
cdef public str name
cdef public str color
cdef public int max_object_size_meters
cdef enum WeatherMode:
Norm = 0
Wint = 20
Night = 40
+95
View File
@@ -0,0 +1,95 @@
import json
import os
import sys
from loguru import logger
cdef str CONFIG_FILE = "config.yaml"
cdef str AI_ONNX_MODEL_FILE = "azaion.onnx"
cdef str CDN_CONFIG = "cdn.yaml"
cdef str MODELS_FOLDER = "models"
cdef int SMALL_SIZE_KB = 3
cdef str SPLIT_SUFFIX = "!split!"
cdef double TILE_DUPLICATE_CONFIDENCE_THRESHOLD = <double>0.01
cdef int METERS_IN_TILE = 25
cdef class AnnotationClass:
def __init__(self, id, name, color, max_object_size_meters):
self.id = id
self.name = name
self.color = color
self.max_object_size_meters = max_object_size_meters
def __str__(self):
return f'{self.id} {self.name} {self.color} {self.max_object_size_meters}'
cdef int weather_switcher_increase = 20
WEATHER_MODE_NAMES = {
Norm: "Norm",
Wint: "Wint",
Night: "Night"
}
_classes_path = os.environ.get("CLASSES_JSON_PATH", "classes.json")
with open(_classes_path, 'r', encoding='utf-8') as f:
j = json.loads(f.read())
annotations_dict = {}
for i in range(0, weather_switcher_increase * 3, weather_switcher_increase):
for cl in j:
id = i + cl['Id']
mode_name = WEATHER_MODE_NAMES.get(i, "Unknown")
name = cl['Name'] if i == 0 else f'{cl["Name"]}({mode_name})'
annotations_dict[id] = AnnotationClass(id, name, cl['Color'], cl['MaxSizeM'])
_log_dir = os.environ.get("LOG_DIR", "Logs")
os.makedirs(_log_dir, exist_ok=True)
logger.remove()
log_format = "[{time:HH:mm:ss} {level}] {message}"
logger.add(
sink=f"{_log_dir}/log_inference_{{time:YYYYMMDD}}.txt",
level="INFO",
format=log_format,
enqueue=True,
rotation="1 day",
retention="30 days",
)
logger.add(
sys.stdout,
level="DEBUG",
format=log_format,
filter=lambda record: record["level"].name in ("INFO", "DEBUG", "SUCCESS"),
colorize=True
)
logger.add(
sys.stderr,
level="WARNING",
format=log_format,
colorize=True
)
def get_annotation_name(int cls_id):
if cls_id in annotations_dict:
return (<AnnotationClass>annotations_dict[cls_id]).name
return ""
cdef log(str log_message):
logger.info(log_message)
cdef logerror(str error):
logger.error(error)
cdef format_time(long ms):
# Calculate hours, minutes, seconds, and hundreds of milliseconds.
h = ms // 3600000 # Total full hours.
ms_remaining = ms % 3600000
m = ms_remaining // 60000 # Full minutes.
ms_remaining %= 60000
s = ms_remaining // 1000 # Full seconds.
f = (ms_remaining % 1000) // 100 # Hundreds of milliseconds.
h = h % 10
return f"{h}{m:02}{s:02}{f}"
+52
View File
@@ -0,0 +1,52 @@
import platform
import sys
def _check_tensor_gpu_index():
try:
import pynvml
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
if device_count == 0:
return -1
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
if major > 6 or (major == 6 and minor >= 1):
return i
return -1
except Exception:
return -1
finally:
try:
import pynvml
pynvml.nvmlShutdown()
except Exception:
pass
def _is_apple_silicon():
if sys.platform != "darwin" or platform.machine() != "arm64":
return False
try:
import coremltools
return True
except ImportError:
return False
tensor_gpu_index = _check_tensor_gpu_index()
def _select_engine_class():
if tensor_gpu_index > -1:
from engines.tensorrt_engine import TensorRTEngine # pyright: ignore[reportMissingImports]
return TensorRTEngine
if _is_apple_silicon():
from engines.coreml_engine import CoreMLEngine
return CoreMLEngine
from engines.onnx_engine import OnnxEngine
return OnnxEngine
EngineClass = _select_engine_class()
+13
View File
@@ -0,0 +1,13 @@
from engines.inference_engine cimport InferenceEngine
cdef class CoreMLEngine(InferenceEngine):
cdef object model
cdef int img_width
cdef int img_height
cdef tuple get_input_shape(self)
cdef run(self, input_data)
cdef preprocess(self, list frames)
cdef list postprocess(self, output, object ai_config)
+100
View File
@@ -0,0 +1,100 @@
from engines.inference_engine cimport InferenceEngine
from annotation cimport Detection
cimport constants_inf
import numpy as np
from PIL import Image
import cv2
import io
import os
import tempfile
import zipfile
cdef class CoreMLEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, max_batch_size: int = 1, **kwargs):
InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="coreml")
import coremltools as ct
model_path = kwargs.get('model_path')
if model_path is None:
model_path = self._extract_from_zip(model_bytes)
self.model = ct.models.MLModel(
model_path, compute_units=ct.ComputeUnit.ALL)
spec = self.model.get_spec()
img_input = spec.description.input[0]
self.img_width = int(img_input.type.imageType.width)
self.img_height = int(img_input.type.imageType.height)
constants_inf.log(<str>f'CoreML model: {self.img_width}x{self.img_height}')
@staticmethod
def get_engine_filename():
return "azaion_coreml.zip"
@staticmethod
def _extract_from_zip(model_bytes):
tmpdir = tempfile.mkdtemp()
buf = io.BytesIO(model_bytes)
with zipfile.ZipFile(buf, 'r') as zf:
zf.extractall(tmpdir)
for item in os.listdir(tmpdir):
if item.endswith('.mlpackage') or item.endswith('.mlmodel'):
return os.path.join(tmpdir, item)
raise ValueError("No .mlpackage or .mlmodel found in zip")
cdef tuple get_input_shape(self):
return <tuple>(self.img_height, self.img_width)
cdef preprocess(self, list frames):
frame = frames[0]
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
resized = cv2.resize(rgb, (self.img_width, self.img_height))
return Image.fromarray(resized)
cdef run(self, input_data):
predict = getattr(self.model, 'predict')
return predict({
'image': input_data,
'iouThreshold': 0.45,
'confidenceThreshold': 0.25,
})
cdef list postprocess(self, output, object ai_config):
cdef int w = self.img_width
cdef int h = self.img_height
coords = output.get('coordinates', np.empty((0, 4), dtype=np.float32))
confs = output.get('confidence', np.empty((0, 80), dtype=np.float32))
cdef list[Detection] detections = []
if coords.size == 0:
return [detections]
cx, cy, bw, bh = coords[:, 0], coords[:, 1], coords[:, 2], coords[:, 3]
x1 = (cx - bw / 2) * w
y1 = (cy - bh / 2) * h
x2 = (cx + bw / 2) * w
y2 = (cy + bh / 2) * h
class_ids = np.argmax(confs, axis=1)
conf_values = np.max(confs, axis=1)
for i in range(len(conf_values)):
conf = round(float(conf_values[i]), 2)
if conf < ai_config.probability_threshold:
continue
det_x1 = float(x1[i]) / w
det_y1 = float(y1[i]) / h
det_x2 = float(x2[i]) / w
det_y2 = float(y2[i]) / h
det_cx = (det_x1 + det_x2) / 2
det_cy = (det_y1 + det_y2) / 2
det_w = det_x2 - det_x1
det_h = det_y2 - det_y1
detections.append(Detection(det_cx, det_cy, det_w, det_h, int(class_ids[i]), conf))
filtered = self.remove_overlapping(detections, ai_config.tracking_intersection_threshold)
return [filtered]
+12
View File
@@ -0,0 +1,12 @@
from annotation cimport Detection
cdef class InferenceEngine:
cdef public int max_batch_size
cdef public str engine_name
cdef tuple get_input_shape(self)
cdef run(self, input_data)
cdef preprocess(self, list frames)
cdef list postprocess(self, output, object ai_config)
cdef list remove_overlapping(self, list[Detection] detections, float threshold)
cpdef list process_frames(self, list frames, object ai_config)
+106
View File
@@ -0,0 +1,106 @@
import cv2
import numpy as np
from annotation cimport Detection
cdef class InferenceEngine:
def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
self.max_batch_size = max_batch_size
self.engine_name = <str>kwargs.get('engine_name', "onnx")
@staticmethod
def get_engine_filename():
return None
@staticmethod
def get_source_filename():
return None
@staticmethod
def convert_from_source(bytes source_bytes):
return source_bytes
cdef tuple get_input_shape(self):
raise NotImplementedError("Subclass must implement get_input_shape")
cdef run(self, input_data):
raise NotImplementedError("Subclass must implement run")
cdef preprocess(self, list frames):
cdef int h, w
h, w = self.get_input_shape()
blobs = [cv2.dnn.blobFromImage(frame,
scalefactor=1.0 / 255.0,
size=(w, h),
mean=(0, 0, 0),
swapRB=True,
crop=False)
for frame in frames]
return np.vstack(blobs)
cdef list postprocess(self, output, object ai_config):
cdef list[Detection] detections
cdef int ann_index
cdef float x1, y1, x2, y2, conf
cdef int class_id
cdef list results = []
cdef int h, w
h, w = self.get_input_shape()
for ann_index in range(len(output[0])):
detections = []
for det in output[0][ann_index]:
if det[4] == 0:
break
x1 = det[0] / w
y1 = det[1] / h
x2 = det[2] / w
y2 = det[3] / h
conf = round(det[4], 2)
class_id = int(det[5])
x = (x1 + x2) / 2
y = (y1 + y2) / 2
bw = x2 - x1
bh = y2 - y1
if conf >= ai_config.probability_threshold:
detections.append(Detection(x, y, bw, bh, class_id, conf))
filtered = self.remove_overlapping(detections, ai_config.tracking_intersection_threshold)
results.append(filtered)
return results
cdef list remove_overlapping(self, list[Detection] detections, float threshold):
cdef Detection det1, det2
filtered_output = []
filtered_out_indexes = []
for det1_index in range(len(detections)):
if det1_index in filtered_out_indexes:
continue
det1 = detections[det1_index]
res = det1_index
for det2_index in range(det1_index + 1, len(detections)):
det2 = detections[det2_index]
if det1.overlaps(det2, threshold):
if det1.confidence > det2.confidence or (
det1.confidence == det2.confidence and det1.cls < det2.cls):
filtered_out_indexes.append(det2_index)
else:
filtered_out_indexes.append(res)
res = det2_index
filtered_output.append(detections[res])
filtered_out_indexes.append(res)
return filtered_output
cpdef list process_frames(self, list frames, object ai_config):
cdef int effective_batch = min(self.max_batch_size, ai_config.model_batch_size)
if effective_batch < 1:
effective_batch = 1
cdef list all_detections = []
cdef int i
for i in range(0, len(frames), effective_batch):
chunk = frames[i:i + effective_batch]
input_blob = self.preprocess(chunk)
raw_output = self.run(input_blob)
batch_dets = self.postprocess(raw_output, ai_config)
all_detections.extend(batch_dets)
return all_detections
+13
View File
@@ -0,0 +1,13 @@
from engines.inference_engine cimport InferenceEngine
cdef class OnnxEngine(InferenceEngine):
cdef public object session
cdef object _cpu_session
cdef object model_inputs
cdef str input_name
cdef object input_shape
cdef tuple get_input_shape(self)
cdef run(self, input_data)
+48
View File
@@ -0,0 +1,48 @@
from engines.inference_engine cimport InferenceEngine
import onnxruntime as onnx
cimport constants_inf
import os
def _select_providers():
available = set(onnx.get_available_providers())
skip_coreml = os.environ.get("SKIP_COREML", "").lower() in ("1", "true", "yes")
preferred = ["CoreMLExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]
if skip_coreml:
preferred = [p for p in preferred if p != "CoreMLExecutionProvider"]
selected = [p for p in preferred if p in available]
return selected or ["CPUExecutionProvider"]
cdef class OnnxEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
InferenceEngine.__init__(self, model_bytes, max_batch_size)
providers = _select_providers()
constants_inf.log(<str>f'ONNX providers: {providers}')
self.session = onnx.InferenceSession(model_bytes, providers=providers)
self.model_inputs = self.session.get_inputs()
self.input_name = self.model_inputs[0].name
self.input_shape = self.model_inputs[0].shape
if self.input_shape[0] not in (-1, None, "N"):
self.max_batch_size = self.input_shape[0]
constants_inf.log(f'AI detection model input: {self.model_inputs} {self.input_shape}')
model_meta = self.session.get_modelmeta()
constants_inf.log(f"Metadata: {model_meta.custom_metadata_map}")
self._cpu_session = None
if any("CoreML" in p for p in self.session.get_providers()):
constants_inf.log(<str>'CoreML active — creating CPU fallback session')
self._cpu_session = onnx.InferenceSession(
model_bytes, providers=["CPUExecutionProvider"])
cdef tuple get_input_shape(self):
shape = self.input_shape
return <tuple>(shape[2], shape[3])
cdef run(self, input_data):
try:
return self.session.run(None, {self.input_name: input_data})
except Exception:
if self._cpu_session is not None:
return self._cpu_session.run(None, {self.input_name: input_data})
raise
+20
View File
@@ -0,0 +1,20 @@
from engines.inference_engine cimport InferenceEngine
cdef class TensorRTEngine(InferenceEngine):
cdef public object context
cdef public object d_input
cdef public object d_output
cdef str input_name
cdef list input_shape
cdef object h_output
cdef str output_name
cdef list output_shape
cdef object stream
cdef tuple get_input_shape(self)
cdef run(self, input_data)
+169
View File
@@ -0,0 +1,169 @@
from engines.inference_engine cimport InferenceEngine
import tensorrt as trt # pyright: ignore[reportMissingImports]
import pycuda.driver as cuda # pyright: ignore[reportMissingImports]
import pycuda.autoinit # pyright: ignore[reportMissingImports]
import pynvml
import numpy as np
cimport constants_inf
GPU_MEMORY_FRACTION = 0.8
cdef class TensorRTEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="tensorrt")
try:
logger = trt.Logger(trt.Logger.WARNING)
runtime = trt.Runtime(logger)
engine = runtime.deserialize_cuda_engine(model_bytes)
if engine is None:
raise RuntimeError("Failed to load TensorRT engine from bytes")
self.context = engine.create_execution_context()
self.input_name = engine.get_tensor_name(0)
engine_input_shape = engine.get_tensor_shape(self.input_name)
C = engine_input_shape[1]
H = 1280 if engine_input_shape[2] == -1 else engine_input_shape[2]
W = 1280 if engine_input_shape[3] == -1 else engine_input_shape[3]
if engine_input_shape[0] == -1:
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
self.max_batch_size = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
else:
self.max_batch_size = engine_input_shape[0]
self.input_shape = [self.max_batch_size, C, H, W]
self.context.set_input_shape(self.input_name, self.input_shape)
input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsize
self.d_input = cuda.mem_alloc(input_size)
self.output_name = engine.get_tensor_name(1)
engine_output_shape = tuple(engine.get_tensor_shape(self.output_name))
self.output_shape = [
self.max_batch_size,
300 if engine_output_shape[1] == -1 else engine_output_shape[1],
6 if engine_output_shape[2] == -1 else engine_output_shape[2],
]
self.h_output = cuda.pagelocked_empty(tuple(self.output_shape), dtype=np.float32)
self.d_output = cuda.mem_alloc(self.h_output.nbytes)
self.stream = cuda.Stream()
except Exception as e:
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
@staticmethod
def calculate_max_batch_size(gpu_memory_bytes, int input_h, int input_w):
frame_input_bytes = 3 * input_h * input_w * 4
estimated_per_frame = frame_input_bytes * 12
available = gpu_memory_bytes * GPU_MEMORY_FRACTION
calculated = max(1, int(available / estimated_per_frame))
return min(calculated, 32)
@staticmethod
def get_gpu_memory_bytes(int device_id):
total_memory = None
try:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total_memory = mem_info.total
except pynvml.NVMLError:
total_memory = None
finally:
try:
pynvml.nvmlShutdown()
except pynvml.NVMLError:
pass
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory
@staticmethod
def get_engine_filename():
try:
from engines import tensor_gpu_index
device = cuda.Device(max(tensor_gpu_index, 0))
sm_count = device.multiprocessor_count
cc_major, cc_minor = device.compute_capability()
return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
except Exception:
return None
@staticmethod
def get_source_filename():
import constants_inf
return constants_inf.AI_ONNX_MODEL_FILE
@staticmethod
def convert_from_source(bytes onnx_model):
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
workspace_bytes = int(gpu_mem * 0.9)
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
trt_logger = trt.Logger(trt.Logger.WARNING)
with trt.Builder(trt_logger) as builder, \
builder.create_network(explicit_batch_flag) as network, \
trt.OnnxParser(network, trt_logger) as parser, \
builder.create_builder_config() as config:
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
if not parser.parse(onnx_model):
return None
input_tensor = network.get_input(0)
shape = input_tensor.shape
C = shape[1]
H = max(shape[2], 1280) if shape[2] != -1 else 1280
W = max(shape[3], 1280) if shape[3] != -1 else 1280
if shape[0] == -1:
max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
profile = builder.create_optimization_profile()
profile.set_shape(
input_tensor.name,
(1, C, H, W),
(max_batch, C, H, W),
(max_batch, C, H, W),
)
config.add_optimization_profile(profile)
if builder.platform_has_fast_fp16:
constants_inf.log(<str>'Converting to supported fp16')
config.set_flag(trt.BuilderFlag.FP16)
else:
constants_inf.log(<str>'Converting to supported fp32. (fp16 is not supported)')
plan = builder.build_serialized_network(network, config)
if plan is None:
constants_inf.logerror(<str>'Conversion failed.')
return None
constants_inf.log('conversion done!')
return bytes(plan)
cdef tuple get_input_shape(self):
return <tuple>(self.input_shape[2], self.input_shape[3])
cdef run(self, input_data):
try:
actual_batch = input_data.shape[0]
if actual_batch != self.input_shape[0]:
actual_shape = [actual_batch, self.input_shape[1], self.input_shape[2], self.input_shape[3]]
self.context.set_input_shape(self.input_name, actual_shape)
cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
self.context.set_tensor_address(self.input_name, int(self.d_input))
self.context.set_tensor_address(self.output_name, int(self.d_output))
self.context.execute_async_v3(stream_handle=self.stream.handle)
self.stream.synchronize()
cuda.memcpy_dtoh(self.h_output, self.d_output)
output_shape = [actual_batch, self.output_shape[1], self.output_shape[2]]
output = self.h_output[:actual_batch].reshape(output_shape)
return [output]
except Exception as e:
raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}")
+426
View File
@@ -0,0 +1,426 @@
import mimetypes
from pathlib import Path
import cv2
cimport constants_inf
from ai_availability_status cimport AIAvailabilityEnum, AIAvailabilityStatus
from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig
from engines.inference_engine cimport InferenceEngine
from loader_http_client cimport LoaderHttpClient
from threading import Thread
from engines import EngineClass
cdef class Inference:
cdef LoaderHttpClient loader_client
cdef InferenceEngine engine
cdef object _annotation_callback
cdef object _status_callback
cdef Annotation _previous_annotation
cdef dict[str, list[Detection]] _tile_detections
cdef dict[str, int] detection_counts
cdef AIRecognitionConfig ai_config
cdef bint stop_signal
cdef public AIAvailabilityStatus ai_availability_status
cdef str model_input
cdef bytes _converted_model_bytes
cdef bint is_building_engine
def __init__(self, loader_client):
self.loader_client = loader_client
self._annotation_callback = None
self._status_callback = None
self.stop_signal = <bint>False
self.model_input = <str>None
self.detection_counts = {}
self.engine = <InferenceEngine>None
self.is_building_engine = <bint>False
self.ai_availability_status = AIAvailabilityStatus()
self._converted_model_bytes = <bytes>None
self.init_ai()
@property
def is_engine_ready(self):
return self.engine is not None
@property
def engine_name(self):
if self.engine is not None:
return self.engine.engine_name
return None
cdef bytes download_model(self, str filename):
models_dir = constants_inf.MODELS_FOLDER
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
res = self.loader_client.load_big_small_resource(filename, models_dir)
if res.err is not None:
raise Exception(res.err)
return <bytes>res.data
cdef convert_and_upload_model(self, bytes source_bytes, str engine_filename):
try:
self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING)
models_dir = constants_inf.MODELS_FOLDER
model_bytes = EngineClass.convert_from_source(source_bytes)
self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING)
res = self.loader_client.upload_big_small_resource(model_bytes, engine_filename, models_dir)
if res.err is not None:
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>f"Failed to upload converted model: {res.err}")
self._converted_model_bytes = model_bytes
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
self._converted_model_bytes = <bytes>None
finally:
self.is_building_engine = <bint>False
cdef init_ai(self):
constants_inf.log(<str> 'init AI...')
try:
if self.engine is not None:
return
if self.is_building_engine:
return
if self._converted_model_bytes is not None:
try:
self.engine = EngineClass(self._converted_model_bytes)
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
finally:
self._converted_model_bytes = <bytes>None
return
models_dir = constants_inf.MODELS_FOLDER
engine_filename = EngineClass.get_engine_filename()
if engine_filename is not None:
try:
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
res = self.loader_client.load_big_small_resource(engine_filename, models_dir)
if res.err is not None:
raise Exception(res.err)
self.engine = EngineClass(res.data)
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
source_filename = EngineClass.get_source_filename()
if source_filename is None:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>f"Pre-built engine not found: {str(e)}")
return
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>str(e))
source_bytes = self.download_model(source_filename)
self.is_building_engine = <bint>True
thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, engine_filename))
thread.daemon = True
thread.start()
return
else:
self.engine = EngineClass(<bytes>self.download_model(constants_inf.AI_ONNX_MODEL_FILE))
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
self.is_building_engine = <bint>False
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>str(e))
self.is_building_engine = <bint>False
cdef bint is_video(self, str filepath):
mime_type, _ = mimetypes.guess_type(<str>filepath)
return <bint>(mime_type and mime_type.startswith("video"))
cpdef run_detect(self, dict config_dict, object annotation_callback, object status_callback=None):
cdef list[str] videos = []
cdef list[str] images = []
cdef AIRecognitionConfig ai_config = AIRecognitionConfig.from_dict(config_dict)
if ai_config is None:
raise Exception('ai recognition config is empty')
self._annotation_callback = annotation_callback
self._status_callback = status_callback
self.stop_signal = <bint>False
self.init_ai()
if self.engine is None:
constants_inf.log(<str> "AI engine not available. Conversion may be in progress. Skipping inference.")
return
self.detection_counts = {}
for p in ai_config.paths:
media_name = Path(<str>p).stem.replace(" ", "")
self.detection_counts[media_name] = 0
if self.is_video(p):
videos.append(p)
else:
images.append(p)
if len(images) > 0:
constants_inf.log(<str>f'run inference on {" ".join(images)}...')
self._process_images(ai_config, images)
if len(videos) > 0:
for v in videos:
constants_inf.log(<str>f'run inference on {v}...')
self._process_video(ai_config, v)
cdef _process_video(self, AIRecognitionConfig ai_config, str video_name):
cdef int frame_count = 0
cdef int batch_count = 0
cdef list batch_frames = []
cdef list[long] batch_timestamps = []
cdef Annotation annotation
cdef int model_h, model_w
self._previous_annotation = <Annotation>None
model_h, model_w = self.engine.get_input_shape()
v_input = cv2.VideoCapture(<str>video_name)
if not v_input.isOpened():
constants_inf.logerror(<str>f'Failed to open video: {video_name}')
return
total_frames = int(v_input.get(cv2.CAP_PROP_FRAME_COUNT))
fps = v_input.get(cv2.CAP_PROP_FPS)
width = int(v_input.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(v_input.get(cv2.CAP_PROP_FRAME_HEIGHT))
constants_inf.log(<str>f'Video: {total_frames} frames, {fps:.1f} fps, {width}x{height}')
cdef int effective_batch = min(self.engine.max_batch_size, ai_config.model_batch_size)
if effective_batch < 1:
effective_batch = 1
while v_input.isOpened() and not self.stop_signal:
ret, frame = v_input.read()
if not ret or frame is None:
break
frame_count += 1
if frame_count % ai_config.frame_period_recognition == 0:
batch_frames.append(frame)
batch_timestamps.append(<long>v_input.get(cv2.CAP_PROP_POS_MSEC))
if len(batch_frames) >= effective_batch:
batch_count += 1
constants_inf.log(<str>f'Video batch {batch_count}: frame {frame_count}/{total_frames} ({frame_count*100//total_frames}%)')
self._process_video_batch(ai_config, batch_frames, batch_timestamps, video_name, frame_count, total_frames, model_w)
batch_frames = []
batch_timestamps = []
if batch_frames:
batch_count += 1
constants_inf.log(<str>f'Video batch {batch_count} (flush): {len(batch_frames)} remaining frames')
self._process_video_batch(ai_config, batch_frames, batch_timestamps, video_name, frame_count, total_frames, model_w)
v_input.release()
constants_inf.log(<str>f'Video done: {frame_count} frames read, {batch_count} batches processed')
self.send_detection_status()
cdef _process_video_batch(self, AIRecognitionConfig ai_config, list batch_frames,
list batch_timestamps, str video_name,
int frame_count, int total_frames, int model_w):
cdef Annotation annotation
list_detections = self.engine.process_frames(batch_frames, ai_config)
total_dets = sum(len(d) for d in list_detections)
if total_dets > 0:
constants_inf.log(<str>f'Video batch: {total_dets} detections from postprocess')
for i in range(len(list_detections)):
detections = list_detections[i]
original_media_name = Path(<str>video_name).stem.replace(" ", "")
name = f'{original_media_name}_{constants_inf.format_time(batch_timestamps[i])}'
annotation = Annotation(name, original_media_name, batch_timestamps[i], detections)
if detections:
valid = self.is_valid_video_annotation(annotation, ai_config, model_w)
constants_inf.log(<str>f'Video frame {name}: {len(detections)} dets, valid={valid}')
if valid:
_, image = cv2.imencode('.jpg', batch_frames[i])
annotation.image = image.tobytes()
self._previous_annotation = annotation
self.on_annotation(annotation, frame_count, total_frames)
else:
self.is_valid_video_annotation(annotation, ai_config, model_w)
cdef on_annotation(self, Annotation annotation, int frame_count=0, int total_frames=0):
self.detection_counts[annotation.original_media_name] = self.detection_counts.get(annotation.original_media_name, 0) + 1
if self._annotation_callback is not None:
percent = int(frame_count * 100 / total_frames) if total_frames > 0 else 0
cb = self._annotation_callback
cb(annotation, percent)
cdef _process_images(self, AIRecognitionConfig ai_config, list[str] image_paths):
cdef list all_frame_data = []
cdef double ground_sampling_distance
cdef int model_h, model_w
model_h, model_w = self.engine.get_input_shape()
self._tile_detections = {}
for path in image_paths:
frame = cv2.imread(<str>path)
if frame is None:
constants_inf.logerror(<str>f'Failed to read image {path}')
continue
img_h, img_w, _ = frame.shape
original_media_name = Path(<str> path).stem.replace(" ", "")
ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w)
constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}')
if img_h <= 1.5 * model_h and img_w <= 1.5 * model_w:
all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
else:
tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
constants_inf.log(<str> f'calc tile size: {tile_size}')
res = self.split_to_tiles(frame, path, tile_size, ai_config.big_image_tile_overlap_percent)
for tile_frame, omn, tile_name in res:
all_frame_data.append((tile_frame, omn, tile_name, ground_sampling_distance))
if not all_frame_data:
return
frames = [fd[0] for fd in all_frame_data]
all_dets = self.engine.process_frames(frames, ai_config)
for i in range(len(all_dets)):
frame_entry = all_frame_data[i]
f = frame_entry[0]
original_media_name = frame_entry[1]
name = frame_entry[2]
gsd = frame_entry[3]
annotation = Annotation(name, original_media_name, 0, all_dets[i])
if self.is_valid_image_annotation(annotation, gsd, f.shape):
constants_inf.log(<str> f'Detected {annotation}')
_, image = cv2.imencode('.jpg', f)
annotation.image = image.tobytes()
self.on_annotation(annotation)
self.send_detection_status()
cdef send_detection_status(self):
if self._status_callback is not None:
cb = self._status_callback
for media_name in self.detection_counts.keys():
cb(media_name, self.detection_counts[media_name])
self.detection_counts.clear()
cdef split_to_tiles(self, frame, path, tile_size, overlap_percent):
constants_inf.log(<str>f'splitting image {path} to tiles...')
img_h, img_w, _ = frame.shape
stride_w = int(tile_size * (1 - overlap_percent / 100))
stride_h = int(tile_size * (1 - overlap_percent / 100))
results = []
original_media_name = Path(<str> path).stem.replace(" ", "")
for y in range(0, img_h, stride_h):
for x in range(0, img_w, stride_w):
x_end = min(x + tile_size, img_w)
y_end = min(y + tile_size, img_h)
if x_end - x < tile_size:
if img_w - (x - stride_w) <= tile_size:
continue
x = img_w - tile_size
if y_end - y < tile_size:
if img_h - (y - stride_h) <= tile_size:
continue
y = img_h - tile_size
tile = frame[y:y_end, x:x_end]
name = f'{original_media_name}{constants_inf.SPLIT_SUFFIX}{tile_size:04d}_{x:04d}_{y:04d}!_000000'
results.append((tile, original_media_name, name))
return results
cpdef stop(self):
self.stop_signal = <bint>True
cdef remove_tiled_duplicates(self, Annotation annotation):
right = annotation.name.rindex('!')
left = annotation.name.index(constants_inf.SPLIT_SUFFIX) + len(constants_inf.SPLIT_SUFFIX)
tile_size_str, x_str, y_str = annotation.name[left:right].split('_')
tile_size = int(tile_size_str)
x = int(x_str)
y = int(y_str)
cdef list[Detection] unique_detections = []
existing_abs_detections = self._tile_detections.setdefault(annotation.original_media_name, [])
for det in annotation.detections:
x1 = det.x * tile_size
y1 = det.y * tile_size
det_abs = Detection(x + x1, y + y1, det.w * tile_size, det.h * tile_size, det.cls, det.confidence)
if det_abs not in existing_abs_detections:
unique_detections.append(det)
existing_abs_detections.append(det_abs)
annotation.detections = unique_detections
cdef bint is_valid_image_annotation(self, Annotation annotation, double ground_sampling_distance, frame_shape):
if constants_inf.SPLIT_SUFFIX in annotation.name:
self.remove_tiled_duplicates(annotation)
img_h, img_w, _ = frame_shape
if annotation.detections:
constants_inf.log(<str> f'Initial ann: {annotation}')
cdef list[Detection] valid_detections = []
for det in annotation.detections:
m_w = det.w * img_w * ground_sampling_distance
m_h = det.h * img_h * ground_sampling_distance
max_size = constants_inf.annotations_dict[det.cls].max_object_size_meters
if m_w <= max_size and m_h <= max_size:
valid_detections.append(det)
constants_inf.log(<str> f'Kept ({m_w} {m_h}) <= {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
else:
constants_inf.log(<str> f'Removed ({m_w} {m_h}) > {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
annotation.detections = valid_detections
if not annotation.detections:
return <bint>False
return <bint>True
cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config, int model_w):
if constants_inf.SPLIT_SUFFIX in annotation.name:
self.remove_tiled_duplicates(annotation)
if not annotation.detections:
return <bint>False
if self._previous_annotation is None:
return <bint>True
if annotation.time >= self._previous_annotation.time + <long>(ai_config.frame_recognition_seconds * 1000):
return <bint>True
if len(annotation.detections) > len(self._previous_annotation.detections):
return <bint>True
cdef:
Detection current_det, prev_det
double dx, dy, distance_sq, min_distance_sq
Detection closest_det
for current_det in annotation.detections:
min_distance_sq = <double>1e18
closest_det = <Detection>None
for prev_det in self._previous_annotation.detections:
dx = current_det.x - prev_det.x
dy = current_det.y - prev_det.y
distance_sq = dx * dx + dy * dy
if distance_sq < min_distance_sq:
min_distance_sq = distance_sq
closest_det = prev_det
dist_px = ai_config.tracking_distance_confidence * model_w
dist_px_sq = dist_px * dist_px
if min_distance_sq > dist_px_sq:
return <bint>True
if current_det.confidence >= closest_det.confidence + ai_config.tracking_probability_increase:
return <bint>True
return <bint>False
+8
View File
@@ -0,0 +1,8 @@
cdef class LoadResult:
cdef public object err
cdef public object data
cdef class LoaderHttpClient:
cdef str base_url
cdef LoadResult load_big_small_resource(self, str filename, str directory)
cdef LoadResult upload_big_small_resource(self, bytes content, str filename, str directory)
+43
View File
@@ -0,0 +1,43 @@
import requests
from loguru import logger
HTTP_TIMEOUT = 120
cdef class LoadResult:
def __init__(self, err, data=None):
self.err = err
self.data = data
cdef class LoaderHttpClient:
def __init__(self, base_url: str):
self.base_url = base_url.rstrip("/")
cdef LoadResult load_big_small_resource(self, str filename, str directory):
try:
response = requests.post(
f"{self.base_url}/load/{filename}",
json={"filename": filename, "folder": directory},
stream=True,
timeout=HTTP_TIMEOUT,
)
response.raise_for_status()
return LoadResult(None, response.content)
except Exception as e:
logger.error(f"LoaderHttpClient.load_big_small_resource failed: {e}")
return LoadResult(str(e))
cdef LoadResult upload_big_small_resource(self, bytes content, str filename, str directory):
try:
response = requests.post(
f"{self.base_url}/upload/{filename}",
files={"data": (filename, content)},
data={"folder": directory},
timeout=HTTP_TIMEOUT,
)
response.raise_for_status()
return LoadResult(None)
except Exception as e:
logger.error(f"LoaderHttpClient.upload_big_small_resource failed: {e}")
return LoadResult(str(e))
+305
View File
@@ -0,0 +1,305 @@
import asyncio
import base64
import json
import os
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Optional
import requests as http_requests
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from loader_http_client import LoaderHttpClient, LoadResult
app = FastAPI(title="Azaion.Detections")
executor = ThreadPoolExecutor(max_workers=2)
LOADER_URL = os.environ.get("LOADER_URL", "http://loader:8080")
ANNOTATIONS_URL = os.environ.get("ANNOTATIONS_URL", "http://annotations:8080")
loader_client = LoaderHttpClient(LOADER_URL)
inference = None
_event_queues: list[asyncio.Queue] = []
_active_detections: dict[str, asyncio.Task] = {}
class TokenManager:
def __init__(self, access_token: str, refresh_token: str):
self.access_token = access_token
self.refresh_token = refresh_token
def get_valid_token(self) -> str:
exp = self._decode_exp(self.access_token)
if exp and exp - time.time() < 60:
self._refresh()
return self.access_token
def _refresh(self):
try:
resp = http_requests.post(
f"{ANNOTATIONS_URL}/auth/refresh",
json={"refreshToken": self.refresh_token},
timeout=10,
)
if resp.status_code == 200:
self.access_token = resp.json()["token"]
except Exception:
pass
@staticmethod
def _decode_exp(token: str) -> Optional[float]:
try:
payload = token.split(".")[1]
padding = 4 - len(payload) % 4
if padding != 4:
payload += "=" * padding
data = json.loads(base64.urlsafe_b64decode(payload))
return float(data.get("exp", 0))
except Exception:
return None
def get_inference():
global inference
if inference is None:
from inference import Inference
inference = Inference(loader_client)
return inference
class DetectionDto(BaseModel):
centerX: float
centerY: float
width: float
height: float
classNum: int
label: str
confidence: float
class DetectionEvent(BaseModel):
annotations: list[DetectionDto]
mediaId: str
mediaStatus: str
mediaPercent: int
class HealthResponse(BaseModel):
status: str
aiAvailability: str
engineType: Optional[str] = None
errorMessage: Optional[str] = None
class AIConfigDto(BaseModel):
frame_period_recognition: int = 4
frame_recognition_seconds: int = 2
probability_threshold: float = 0.25
tracking_distance_confidence: float = 0.0
tracking_probability_increase: float = 0.0
tracking_intersection_threshold: float = 0.6
model_batch_size: int = 8
big_image_tile_overlap_percent: int = 20
altitude: float = 400
focal_length: float = 24
sensor_width: float = 23.5
paths: list[str] = []
def detection_to_dto(det) -> DetectionDto:
import constants_inf
label = constants_inf.get_annotation_name(det.cls)
return DetectionDto(
centerX=det.x,
centerY=det.y,
width=det.w,
height=det.h,
classNum=det.cls,
label=label,
confidence=det.confidence,
)
@app.get("/health")
def health() -> HealthResponse:
if inference is None:
return HealthResponse(status="healthy", aiAvailability="None")
try:
status = inference.ai_availability_status
status_str = str(status).split()[0] if str(status).strip() else "None"
error_msg = status.error_message if hasattr(status, 'error_message') else None
engine_type = inference.engine_name
return HealthResponse(
status="healthy",
aiAvailability=status_str,
engineType=engine_type,
errorMessage=error_msg,
)
except Exception as e:
return HealthResponse(
status="healthy",
aiAvailability="None",
errorMessage=str(e),
)
@app.post("/detect")
async def detect_image(
file: UploadFile = File(...),
config: Optional[str] = Form(None),
):
import tempfile
import cv2
import numpy as np
image_bytes = await file.read()
if not image_bytes:
raise HTTPException(status_code=400, detail="Image is empty")
arr = np.frombuffer(image_bytes, dtype=np.uint8)
if cv2.imdecode(arr, cv2.IMREAD_COLOR) is None:
raise HTTPException(status_code=400, detail="Invalid image data")
config_dict = {}
if config:
config_dict = json.loads(config)
suffix = os.path.splitext(file.filename or "upload.jpg")[1] or ".jpg"
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
try:
tmp.write(image_bytes)
tmp.close()
config_dict["paths"] = [tmp.name]
loop = asyncio.get_event_loop()
inf = get_inference()
results = []
def on_annotation(annotation, percent):
results.extend(annotation.detections)
await loop.run_in_executor(executor, inf.run_detect, config_dict, on_annotation)
return [detection_to_dto(d) for d in results]
except RuntimeError as e:
if "not available" in str(e):
raise HTTPException(status_code=503, detail=str(e))
raise HTTPException(status_code=422, detail=str(e))
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
finally:
os.unlink(tmp.name)
def _post_annotation_to_service(token_mgr: TokenManager, media_id: str,
annotation, dtos: list[DetectionDto]):
try:
token = token_mgr.get_valid_token()
image_b64 = base64.b64encode(annotation.image).decode() if annotation.image else None
payload = {
"mediaId": media_id,
"source": 0,
"videoTime": f"00:00:{annotation.time // 1000:02d}" if annotation.time else "00:00:00",
"detections": [d.model_dump() for d in dtos],
}
if image_b64:
payload["image"] = image_b64
http_requests.post(
f"{ANNOTATIONS_URL}/annotations",
json=payload,
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
except Exception:
pass
@app.post("/detect/{media_id}")
async def detect_media(media_id: str, request: Request, config: Optional[AIConfigDto] = None):
existing = _active_detections.get(media_id)
if existing is not None and not existing.done():
raise HTTPException(status_code=409, detail="Detection already in progress for this media")
auth_header = request.headers.get("authorization", "")
access_token = auth_header.removeprefix("Bearer ").strip() if auth_header else ""
refresh_token = request.headers.get("x-refresh-token", "")
token_mgr = TokenManager(access_token, refresh_token) if access_token else None
cfg = config or AIConfigDto()
config_dict = cfg.model_dump()
async def run_detection():
loop = asyncio.get_event_loop()
def _enqueue(event):
for q in _event_queues:
try:
q.put_nowait(event)
except asyncio.QueueFull:
pass
try:
inf = get_inference()
if not inf.is_engine_ready:
raise RuntimeError("Detection service unavailable")
def on_annotation(annotation, percent):
dtos = [detection_to_dto(d) for d in annotation.detections]
event = DetectionEvent(
annotations=dtos,
mediaId=media_id,
mediaStatus="AIProcessing",
mediaPercent=percent,
)
loop.call_soon_threadsafe(_enqueue, event)
if token_mgr and dtos:
_post_annotation_to_service(token_mgr, media_id, annotation, dtos)
def on_status(media_name, count):
event = DetectionEvent(
annotations=[],
mediaId=media_id,
mediaStatus="AIProcessed",
mediaPercent=100,
)
loop.call_soon_threadsafe(_enqueue, event)
await loop.run_in_executor(
executor, inf.run_detect, config_dict, on_annotation, on_status
)
except Exception:
error_event = DetectionEvent(
annotations=[],
mediaId=media_id,
mediaStatus="Error",
mediaPercent=0,
)
_enqueue(error_event)
finally:
_active_detections.pop(media_id, None)
_active_detections[media_id] = asyncio.create_task(run_detection())
return {"status": "started", "mediaId": media_id}
@app.get("/detect/stream")
async def detect_stream():
queue: asyncio.Queue = asyncio.Queue(maxsize=100)
_event_queues.append(queue)
async def event_generator():
try:
while True:
event = await queue.get()
yield f"data: {event.model_dump_json()}\n\n"
except asyncio.CancelledError:
pass
finally:
_event_queues.remove(queue)
return StreamingResponse(
event_generator(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)