Add AIAvailabilityStatus and AIRecognitionConfig classes for AI model management

- Introduced `AIAvailabilityStatus` class to manage the availability status of AI models, including methods for setting status and logging messages. - Added `AIRecognitionConfig` class to encapsulate configuration parameters for AI recognition, with a static method for creating instances from dictionaries. - Implemented enums for AI availability states to enhance clarity and maintainability. - Updated related Cython files to support the new classes and ensure proper type handling. These changes aim to improve the structure and functionality of the AI model management system, facilitating better status tracking and configuration handling.
2026-06-21 05:51:08 +00:00 · 2026-03-31 05:49:51 +03:00
parent fc57d677b4
commit 8ce40a9385
43 changed files with 1190 additions and 462 deletions
@@ -0,0 +1,17 @@
+cdef enum AIAvailabilityEnum:
+    NONE = 0
+    DOWNLOADING = 10
+    CONVERTING = 20
+    UPLOADING = 30
+    ENABLED = 200
+    WARNING = 300
+    ERROR = 500
+
+from cython cimport pymutex
+
+cdef class AIAvailabilityStatus:
+    cdef int status
+    cdef str error_message
+    cdef pymutex _lock
+
+    cdef set_status(self, int status, str error_message=*)
@@ -0,0 +1,37 @@
+cimport cython
+cimport constants_inf
+
+AIStatus2Text = {
+    AIAvailabilityEnum.NONE: "None",
+    AIAvailabilityEnum.DOWNLOADING: "Downloading",
+    AIAvailabilityEnum.CONVERTING: "Converting",
+    AIAvailabilityEnum.UPLOADING: "Uploading",
+    AIAvailabilityEnum.ENABLED: "Enabled",
+    AIAvailabilityEnum.WARNING: "Warning",
+    AIAvailabilityEnum.ERROR: "Error",
+}
+
+cdef class AIAvailabilityStatus:
+    def __init__(self):
+        self.status = AIAvailabilityEnum.NONE
+        self.error_message = ""
+
+    def __str__(self):
+        with self._lock:
+            status_text = AIStatus2Text.get(self.status, "Unknown")
+            error_text = self.error_message if self.error_message else ""
+            return f"{status_text} {error_text}"
+
+    cdef set_status(self, int status, str error_message=""):
+        log_message = ""
+        with self._lock:
+            self.status = status
+            self.error_message = error_message
+            status_text = AIStatus2Text.get(self.status, "Unknown")
+            error_text = self.error_message if self.error_message else ""
+            log_message = f"{status_text} {error_text}"
+
+        if error_message:
+            constants_inf.logerror(<str>error_message)
+        else:
+            constants_inf.log(<str>log_message)
@@ -0,0 +1,21 @@
+cdef class AIRecognitionConfig:
+
+    cdef public double frame_recognition_seconds
+    cdef public int frame_period_recognition
+    cdef public double probability_threshold
+
+    cdef public double tracking_distance_confidence
+    cdef public double tracking_probability_increase
+    cdef public double tracking_intersection_threshold
+
+    cdef public int big_image_tile_overlap_percent
+
+    cdef public list[str] paths
+    cdef public int model_batch_size
+
+    cdef public double altitude
+    cdef public double focal_length
+    cdef public double sensor_width
+
+    @staticmethod
+    cdef AIRecognitionConfig from_dict(dict data)
@@ -0,0 +1,66 @@
+cdef class AIRecognitionConfig:
+    def __init__(self,
+                 frame_period_recognition,
+                 frame_recognition_seconds,
+                 probability_threshold,
+
+                 tracking_distance_confidence,
+                 tracking_probability_increase,
+                 tracking_intersection_threshold,
+                 paths,
+                 model_batch_size,
+                 big_image_tile_overlap_percent,
+                 altitude,
+                 focal_length,
+                 sensor_width
+                 ):
+        self.frame_period_recognition = frame_period_recognition
+        self.frame_recognition_seconds = frame_recognition_seconds
+        self.probability_threshold = probability_threshold
+
+        self.tracking_distance_confidence = tracking_distance_confidence
+        self.tracking_probability_increase = tracking_probability_increase
+        self.tracking_intersection_threshold = tracking_intersection_threshold
+
+        self.paths = paths
+        self.model_batch_size = model_batch_size
+
+        self.big_image_tile_overlap_percent = big_image_tile_overlap_percent
+
+        self.altitude = altitude
+        self.focal_length = focal_length
+        self.sensor_width = sensor_width
+
+    def __str__(self):
+        return (f'frame_seconds : {self.frame_recognition_seconds}, distance_confidence : {self.tracking_distance_confidence}, '
+                f'probability_increase : {self.tracking_probability_increase}, '
+                f'intersection_threshold : {self.tracking_intersection_threshold}, '
+                f'frame_period_recognition : {self.frame_period_recognition}, '
+                f'big_image_tile_overlap_percent: {self.big_image_tile_overlap_percent}, '
+                f'paths: {self.paths}, '
+                f'model_batch_size: {self.model_batch_size}, '
+                f'altitude: {self.altitude}, '
+                f'focal_length: {self.focal_length}, '
+                f'sensor_width: {self.sensor_width}'
+                )
+
+    @staticmethod
+    cdef AIRecognitionConfig from_dict(dict data):
+        return AIRecognitionConfig(
+            data.get("frame_period_recognition", 4),
+            data.get("frame_recognition_seconds", 2),
+            data.get("probability_threshold", 0.25),
+
+            data.get("tracking_distance_confidence", 0.0),
+            data.get("tracking_probability_increase", 0.0),
+            data.get("tracking_intersection_threshold", 0.6),
+
+            data.get("paths", []),
+            data.get("model_batch_size", 8),
+
+            data.get("big_image_tile_overlap_percent", 20),
+
+            data.get("altitude", 400),
+            data.get("focal_length", 24),
+            data.get("sensor_width", 23.5)
+        )
@@ -0,0 +1,12 @@
+cdef class Detection:
+    cdef public double x, y, w, h, confidence
+    cdef public int cls
+
+    cdef bint overlaps(self, Detection det2, float confidence_threshold)
+
+cdef class Annotation:
+    cdef public str name
+    cdef public str original_media_name
+    cdef long time
+    cdef public list[Detection] detections
+    cdef public bytes image
@@ -0,0 +1,50 @@
+cimport constants_inf
+
+cdef class Detection:
+    def __init__(self, double x, double y, double w, double h, int cls, double confidence):
+        self.x = x
+        self.y = y
+        self.w = w
+        self.h = h
+        self.cls = cls
+        self.confidence = confidence
+
+    def __str__(self):
+        return f'{self.cls}: {self.x:.2f} {self.y:.2f} {self.w:.2f} {self.h:.2f}, prob: {(self.confidence*100):.1f}%'
+
+    def __eq__(self, other):
+        if not isinstance(other, Detection):
+            return False
+
+        if max(abs(self.x - other.x),
+               abs(self.y - other.y),
+               abs(self.w - other.w),
+               abs(self.h - other.h)) > constants_inf.TILE_DUPLICATE_CONFIDENCE_THRESHOLD:
+            return False
+        return True
+
+    cdef bint overlaps(self, Detection det2, float confidence_threshold):
+        cdef double overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x)
+        cdef double overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y)
+        cdef double overlap_area = <double>(max(0.0, overlap_x) * max(0.0, overlap_y))
+        cdef double min_area = min(self.w * self.h, det2.w * det2.h)
+
+        return <bint>(overlap_area / min_area > confidence_threshold)
+
+cdef class Annotation:
+    def __init__(self, str name, str original_media_name, long ms, list[Detection] detections):
+        self.name = name
+        self.original_media_name = original_media_name
+        self.time = ms
+        self.detections = detections if detections is not None else []
+        self.image = b''
+
+    def __str__(self):
+        if not self.detections:
+            return f"{self.name}: No detections"
+
+        detections_str = ", ".join(
+            f"class: {d.cls} {d.confidence * 100:.1f}% ({d.x:.2f}, {d.y:.2f}) ({d.w:.2f}, {d.h:.2f})"
+            for d in self.detections
+        )
+        return f"{self.name}: {detections_str}"
@@ -0,0 +1,29 @@
+cdef str CONFIG_FILE
+
+cdef str AI_ONNX_MODEL_FILE
+
+cdef str CDN_CONFIG
+cdef str MODELS_FOLDER
+
+cdef int SMALL_SIZE_KB
+
+cdef str SPLIT_SUFFIX
+cdef double TILE_DUPLICATE_CONFIDENCE_THRESHOLD
+cdef int METERS_IN_TILE
+
+cdef log(str log_message)
+cdef logerror(str error)
+cdef format_time(long ms)
+
+cdef dict[int, AnnotationClass] annotations_dict
+
+cdef class AnnotationClass:
+    cdef public int id
+    cdef public str name
+    cdef public str color
+    cdef public int max_object_size_meters
+
+cdef enum WeatherMode:
+    Norm = 0
+    Wint = 20
+    Night = 40
@@ -0,0 +1,95 @@
+import json
+import os
+import sys
+
+from loguru import logger
+
+cdef str CONFIG_FILE = "config.yaml"
+cdef str AI_ONNX_MODEL_FILE = "azaion.onnx"
+
+cdef str CDN_CONFIG = "cdn.yaml"
+cdef str MODELS_FOLDER = "models"
+
+cdef int SMALL_SIZE_KB = 3
+
+cdef str SPLIT_SUFFIX = "!split!"
+cdef double TILE_DUPLICATE_CONFIDENCE_THRESHOLD = <double>0.01
+cdef int METERS_IN_TILE = 25
+
+cdef class AnnotationClass:
+    def __init__(self, id, name, color, max_object_size_meters):
+        self.id = id
+        self.name = name
+        self.color = color
+        self.max_object_size_meters = max_object_size_meters
+
+    def __str__(self):
+        return f'{self.id} {self.name} {self.color} {self.max_object_size_meters}'
+
+cdef int weather_switcher_increase = 20
+
+WEATHER_MODE_NAMES = {
+    Norm: "Norm",
+    Wint: "Wint",
+    Night: "Night"
+}
+
+_classes_path = os.environ.get("CLASSES_JSON_PATH", "classes.json")
+with open(_classes_path, 'r', encoding='utf-8') as f:
+    j = json.loads(f.read())
+    annotations_dict = {}
+
+    for i in range(0, weather_switcher_increase * 3, weather_switcher_increase):
+        for cl in j:
+            id = i + cl['Id']
+            mode_name = WEATHER_MODE_NAMES.get(i, "Unknown")
+            name = cl['Name'] if i == 0 else f'{cl["Name"]}({mode_name})'
+            annotations_dict[id] = AnnotationClass(id, name, cl['Color'], cl['MaxSizeM'])
+
+_log_dir = os.environ.get("LOG_DIR", "Logs")
+os.makedirs(_log_dir, exist_ok=True)
+logger.remove()
+log_format = "[{time:HH:mm:ss} {level}] {message}"
+logger.add(
+    sink=f"{_log_dir}/log_inference_{{time:YYYYMMDD}}.txt",
+    level="INFO",
+    format=log_format,
+    enqueue=True,
+    rotation="1 day",
+    retention="30 days",
+)
+logger.add(
+    sys.stdout,
+    level="DEBUG",
+    format=log_format,
+    filter=lambda record: record["level"].name in ("INFO", "DEBUG", "SUCCESS"),
+    colorize=True
+)
+logger.add(
+    sys.stderr,
+    level="WARNING",
+    format=log_format,
+    colorize=True
+)
+
+def get_annotation_name(int cls_id):
+    if cls_id in annotations_dict:
+        return (<AnnotationClass>annotations_dict[cls_id]).name
+    return ""
+
+cdef log(str log_message):
+    logger.info(log_message)
+
+cdef logerror(str error):
+    logger.error(error)
+
+cdef format_time(long ms):
+    # Calculate hours, minutes, seconds, and hundreds of milliseconds.
+    h = ms // 3600000  # Total full hours.
+    ms_remaining = ms % 3600000
+    m = ms_remaining // 60000  # Full minutes.
+    ms_remaining %= 60000
+    s = ms_remaining // 1000  # Full seconds.
+    f = (ms_remaining % 1000) // 100  # Hundreds of milliseconds.
+    h = h % 10
+    return f"{h}{m:02}{s:02}{f}"
@@ -0,0 +1,52 @@
+import platform
+import sys
+
+
+def _check_tensor_gpu_index():
+    try:
+        import pynvml
+        pynvml.nvmlInit()
+        device_count = pynvml.nvmlDeviceGetCount()
+        if device_count == 0:
+            return -1
+        for i in range(device_count):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+            if major > 6 or (major == 6 and minor >= 1):
+                return i
+        return -1
+    except Exception:
+        return -1
+    finally:
+        try:
+            import pynvml
+            pynvml.nvmlShutdown()
+        except Exception:
+            pass
+
+
+def _is_apple_silicon():
+    if sys.platform != "darwin" or platform.machine() != "arm64":
+        return False
+    try:
+        import coremltools
+        return True
+    except ImportError:
+        return False
+
+
+tensor_gpu_index = _check_tensor_gpu_index()
+
+
+def _select_engine_class():
+    if tensor_gpu_index > -1:
+        from engines.tensorrt_engine import TensorRTEngine  # pyright: ignore[reportMissingImports]
+        return TensorRTEngine
+    if _is_apple_silicon():
+        from engines.coreml_engine import CoreMLEngine
+        return CoreMLEngine
+    from engines.onnx_engine import OnnxEngine
+    return OnnxEngine
+
+
+EngineClass = _select_engine_class()
@@ -0,0 +1,13 @@
+from engines.inference_engine cimport InferenceEngine
+
+
+cdef class CoreMLEngine(InferenceEngine):
+
+    cdef object model
+    cdef int img_width
+    cdef int img_height
+
+    cdef tuple get_input_shape(self)
+    cdef run(self, input_data)
+    cdef preprocess(self, list frames)
+    cdef list postprocess(self, output, object ai_config)
@@ -0,0 +1,100 @@
+from engines.inference_engine cimport InferenceEngine
+from annotation cimport Detection
+cimport constants_inf
+import numpy as np
+from PIL import Image
+import cv2
+import io
+import os
+import tempfile
+import zipfile
+
+
+cdef class CoreMLEngine(InferenceEngine):
+
+    def __init__(self, model_bytes: bytes, max_batch_size: int = 1, **kwargs):
+        InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="coreml")
+        import coremltools as ct
+
+        model_path = kwargs.get('model_path')
+        if model_path is None:
+            model_path = self._extract_from_zip(model_bytes)
+
+        self.model = ct.models.MLModel(
+            model_path, compute_units=ct.ComputeUnit.ALL)
+        spec = self.model.get_spec()
+
+        img_input = spec.description.input[0]
+        self.img_width = int(img_input.type.imageType.width)
+        self.img_height = int(img_input.type.imageType.height)
+
+        constants_inf.log(<str>f'CoreML model: {self.img_width}x{self.img_height}')
+
+    @staticmethod
+    def get_engine_filename():
+        return "azaion_coreml.zip"
+
+    @staticmethod
+    def _extract_from_zip(model_bytes):
+        tmpdir = tempfile.mkdtemp()
+        buf = io.BytesIO(model_bytes)
+        with zipfile.ZipFile(buf, 'r') as zf:
+            zf.extractall(tmpdir)
+        for item in os.listdir(tmpdir):
+            if item.endswith('.mlpackage') or item.endswith('.mlmodel'):
+                return os.path.join(tmpdir, item)
+        raise ValueError("No .mlpackage or .mlmodel found in zip")
+
+    cdef tuple get_input_shape(self):
+        return <tuple>(self.img_height, self.img_width)
+
+    cdef preprocess(self, list frames):
+        frame = frames[0]
+        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        resized = cv2.resize(rgb, (self.img_width, self.img_height))
+        return Image.fromarray(resized)
+
+    cdef run(self, input_data):
+        predict = getattr(self.model, 'predict')
+        return predict({
+            'image': input_data,
+            'iouThreshold': 0.45,
+            'confidenceThreshold': 0.25,
+        })
+
+    cdef list postprocess(self, output, object ai_config):
+        cdef int w = self.img_width
+        cdef int h = self.img_height
+
+        coords = output.get('coordinates', np.empty((0, 4), dtype=np.float32))
+        confs = output.get('confidence', np.empty((0, 80), dtype=np.float32))
+
+        cdef list[Detection] detections = []
+        if coords.size == 0:
+            return [detections]
+
+        cx, cy, bw, bh = coords[:, 0], coords[:, 1], coords[:, 2], coords[:, 3]
+        x1 = (cx - bw / 2) * w
+        y1 = (cy - bh / 2) * h
+        x2 = (cx + bw / 2) * w
+        y2 = (cy + bh / 2) * h
+
+        class_ids = np.argmax(confs, axis=1)
+        conf_values = np.max(confs, axis=1)
+
+        for i in range(len(conf_values)):
+            conf = round(float(conf_values[i]), 2)
+            if conf < ai_config.probability_threshold:
+                continue
+            det_x1 = float(x1[i]) / w
+            det_y1 = float(y1[i]) / h
+            det_x2 = float(x2[i]) / w
+            det_y2 = float(y2[i]) / h
+            det_cx = (det_x1 + det_x2) / 2
+            det_cy = (det_y1 + det_y2) / 2
+            det_w = det_x2 - det_x1
+            det_h = det_y2 - det_y1
+            detections.append(Detection(det_cx, det_cy, det_w, det_h, int(class_ids[i]), conf))
+
+        filtered = self.remove_overlapping(detections, ai_config.tracking_intersection_threshold)
+        return [filtered]
@@ -0,0 +1,12 @@
+from annotation cimport Detection
+
+
+cdef class InferenceEngine:
+    cdef public int max_batch_size
+    cdef public str engine_name
+    cdef tuple get_input_shape(self)
+    cdef run(self, input_data)
+    cdef preprocess(self, list frames)
+    cdef list postprocess(self, output, object ai_config)
+    cdef list remove_overlapping(self, list[Detection] detections, float threshold)
+    cpdef list process_frames(self, list frames, object ai_config)
@@ -0,0 +1,106 @@
+import cv2
+import numpy as np
+from annotation cimport Detection
+
+cdef class InferenceEngine:
+    def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
+        self.max_batch_size = max_batch_size
+        self.engine_name = <str>kwargs.get('engine_name', "onnx")
+
+    @staticmethod
+    def get_engine_filename():
+        return None
+
+    @staticmethod
+    def get_source_filename():
+        return None
+
+    @staticmethod
+    def convert_from_source(bytes source_bytes):
+        return source_bytes
+
+    cdef tuple get_input_shape(self):
+        raise NotImplementedError("Subclass must implement get_input_shape")
+
+    cdef run(self, input_data):
+        raise NotImplementedError("Subclass must implement run")
+
+    cdef preprocess(self, list frames):
+        cdef int h, w
+        h, w = self.get_input_shape()
+        blobs = [cv2.dnn.blobFromImage(frame,
+                                       scalefactor=1.0 / 255.0,
+                                       size=(w, h),
+                                       mean=(0, 0, 0),
+                                       swapRB=True,
+                                       crop=False)
+                 for frame in frames]
+        return np.vstack(blobs)
+
+    cdef list postprocess(self, output, object ai_config):
+        cdef list[Detection] detections
+        cdef int ann_index
+        cdef float x1, y1, x2, y2, conf
+        cdef int class_id
+        cdef list results = []
+        cdef int h, w
+        h, w = self.get_input_shape()
+
+        for ann_index in range(len(output[0])):
+            detections = []
+            for det in output[0][ann_index]:
+                if det[4] == 0:
+                    break
+                x1 = det[0] / w
+                y1 = det[1] / h
+                x2 = det[2] / w
+                y2 = det[3] / h
+                conf = round(det[4], 2)
+                class_id = int(det[5])
+
+                x = (x1 + x2) / 2
+                y = (y1 + y2) / 2
+                bw = x2 - x1
+                bh = y2 - y1
+                if conf >= ai_config.probability_threshold:
+                    detections.append(Detection(x, y, bw, bh, class_id, conf))
+            filtered = self.remove_overlapping(detections, ai_config.tracking_intersection_threshold)
+            results.append(filtered)
+        return results
+
+    cdef list remove_overlapping(self, list[Detection] detections, float threshold):
+        cdef Detection det1, det2
+        filtered_output = []
+        filtered_out_indexes = []
+
+        for det1_index in range(len(detections)):
+            if det1_index in filtered_out_indexes:
+                continue
+            det1 = detections[det1_index]
+            res = det1_index
+            for det2_index in range(det1_index + 1, len(detections)):
+                det2 = detections[det2_index]
+                if det1.overlaps(det2, threshold):
+                    if det1.confidence > det2.confidence or (
+                            det1.confidence == det2.confidence and det1.cls < det2.cls):
+                        filtered_out_indexes.append(det2_index)
+                    else:
+                        filtered_out_indexes.append(res)
+                        res = det2_index
+            filtered_output.append(detections[res])
+            filtered_out_indexes.append(res)
+        return filtered_output
+
+    cpdef list process_frames(self, list frames, object ai_config):
+        cdef int effective_batch = min(self.max_batch_size, ai_config.model_batch_size)
+        if effective_batch < 1:
+            effective_batch = 1
+        cdef list all_detections = []
+        cdef int i
+        for i in range(0, len(frames), effective_batch):
+            chunk = frames[i:i + effective_batch]
+            input_blob = self.preprocess(chunk)
+            raw_output = self.run(input_blob)
+            batch_dets = self.postprocess(raw_output, ai_config)
+            all_detections.extend(batch_dets)
+        return all_detections
@@ -0,0 +1,13 @@
+from engines.inference_engine cimport InferenceEngine
+
+
+cdef class OnnxEngine(InferenceEngine):
+
+    cdef public object session
+    cdef object _cpu_session
+    cdef object model_inputs
+    cdef str input_name
+    cdef object input_shape
+
+    cdef tuple get_input_shape(self)
+    cdef run(self, input_data)
@@ -0,0 +1,48 @@
+from engines.inference_engine cimport InferenceEngine
+import onnxruntime as onnx
+cimport constants_inf
+
+import os
+
+def _select_providers():
+    available = set(onnx.get_available_providers())
+    skip_coreml = os.environ.get("SKIP_COREML", "").lower() in ("1", "true", "yes")
+    preferred = ["CoreMLExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]
+    if skip_coreml:
+        preferred = [p for p in preferred if p != "CoreMLExecutionProvider"]
+    selected = [p for p in preferred if p in available]
+    return selected or ["CPUExecutionProvider"]
+
+cdef class OnnxEngine(InferenceEngine):
+    def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
+        InferenceEngine.__init__(self, model_bytes, max_batch_size)
+
+        providers = _select_providers()
+        constants_inf.log(<str>f'ONNX providers: {providers}')
+        self.session = onnx.InferenceSession(model_bytes, providers=providers)
+        self.model_inputs = self.session.get_inputs()
+        self.input_name = self.model_inputs[0].name
+        self.input_shape = self.model_inputs[0].shape
+        if self.input_shape[0] not in (-1, None, "N"):
+            self.max_batch_size = self.input_shape[0]
+        constants_inf.log(f'AI detection model input: {self.model_inputs} {self.input_shape}')
+        model_meta = self.session.get_modelmeta()
+        constants_inf.log(f"Metadata: {model_meta.custom_metadata_map}")
+
+        self._cpu_session = None
+        if any("CoreML" in p for p in self.session.get_providers()):
+            constants_inf.log(<str>'CoreML active — creating CPU fallback session')
+            self._cpu_session = onnx.InferenceSession(
+                model_bytes, providers=["CPUExecutionProvider"])
+
+    cdef tuple get_input_shape(self):
+        shape = self.input_shape
+        return <tuple>(shape[2], shape[3])
+
+    cdef run(self, input_data):
+        try:
+            return self.session.run(None, {self.input_name: input_data})
+        except Exception:
+            if self._cpu_session is not None:
+                return self._cpu_session.run(None, {self.input_name: input_data})
+            raise
@@ -0,0 +1,20 @@
+from engines.inference_engine cimport InferenceEngine
+
+
+cdef class TensorRTEngine(InferenceEngine):
+
+    cdef public object context
+
+    cdef public object d_input
+    cdef public object d_output
+    cdef str input_name
+    cdef list input_shape
+
+    cdef object h_output
+    cdef str output_name
+    cdef list output_shape
+
+    cdef object stream
+
+    cdef tuple get_input_shape(self)
+    cdef run(self, input_data)
@@ -0,0 +1,169 @@
+from engines.inference_engine cimport InferenceEngine
+import tensorrt as trt  # pyright: ignore[reportMissingImports]
+import pycuda.driver as cuda  # pyright: ignore[reportMissingImports]
+import pycuda.autoinit  # pyright: ignore[reportMissingImports]
+import pynvml
+import numpy as np
+cimport constants_inf
+
+GPU_MEMORY_FRACTION = 0.8
+
+
+cdef class TensorRTEngine(InferenceEngine):
+    def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
+        InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="tensorrt")
+        try:
+            logger = trt.Logger(trt.Logger.WARNING)
+            runtime = trt.Runtime(logger)
+            engine = runtime.deserialize_cuda_engine(model_bytes)
+            if engine is None:
+                raise RuntimeError("Failed to load TensorRT engine from bytes")
+
+            self.context = engine.create_execution_context()
+
+            self.input_name = engine.get_tensor_name(0)
+            engine_input_shape = engine.get_tensor_shape(self.input_name)
+
+            C = engine_input_shape[1]
+            H = 1280 if engine_input_shape[2] == -1 else engine_input_shape[2]
+            W = 1280 if engine_input_shape[3] == -1 else engine_input_shape[3]
+
+            if engine_input_shape[0] == -1:
+                gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
+                self.max_batch_size = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
+            else:
+                self.max_batch_size = engine_input_shape[0]
+
+            self.input_shape = [self.max_batch_size, C, H, W]
+            self.context.set_input_shape(self.input_name, self.input_shape)
+            input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsize
+            self.d_input = cuda.mem_alloc(input_size)
+
+            self.output_name = engine.get_tensor_name(1)
+            engine_output_shape = tuple(engine.get_tensor_shape(self.output_name))
+            self.output_shape = [
+                self.max_batch_size,
+                300 if engine_output_shape[1] == -1 else engine_output_shape[1],
+                6 if engine_output_shape[2] == -1 else engine_output_shape[2],
+            ]
+            self.h_output = cuda.pagelocked_empty(tuple(self.output_shape), dtype=np.float32)
+            self.d_output = cuda.mem_alloc(self.h_output.nbytes)
+
+            self.stream = cuda.Stream()
+
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
+
+    @staticmethod
+    def calculate_max_batch_size(gpu_memory_bytes, int input_h, int input_w):
+        frame_input_bytes = 3 * input_h * input_w * 4
+        estimated_per_frame = frame_input_bytes * 12
+        available = gpu_memory_bytes * GPU_MEMORY_FRACTION
+        calculated = max(1, int(available / estimated_per_frame))
+        return min(calculated, 32)
+
+    @staticmethod
+    def get_gpu_memory_bytes(int device_id):
+        total_memory = None
+        try:
+            pynvml.nvmlInit()
+            handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
+            mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            total_memory = mem_info.total
+        except pynvml.NVMLError:
+            total_memory = None
+        finally:
+            try:
+                pynvml.nvmlShutdown()
+            except pynvml.NVMLError:
+                pass
+        return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory
+
+    @staticmethod
+    def get_engine_filename():
+        try:
+            from engines import tensor_gpu_index
+            device = cuda.Device(max(tensor_gpu_index, 0))
+            sm_count = device.multiprocessor_count
+            cc_major, cc_minor = device.compute_capability()
+            return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
+        except Exception:
+            return None
+
+    @staticmethod
+    def get_source_filename():
+        import constants_inf
+        return constants_inf.AI_ONNX_MODEL_FILE
+
+    @staticmethod
+    def convert_from_source(bytes onnx_model):
+        gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
+        workspace_bytes = int(gpu_mem * 0.9)
+
+        explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        trt_logger = trt.Logger(trt.Logger.WARNING)
+
+        with trt.Builder(trt_logger) as builder, \
+                builder.create_network(explicit_batch_flag) as network, \
+                trt.OnnxParser(network, trt_logger) as parser, \
+                builder.create_builder_config() as config:
+
+            config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
+
+            if not parser.parse(onnx_model):
+                return None
+
+            input_tensor = network.get_input(0)
+            shape = input_tensor.shape
+            C = shape[1]
+            H = max(shape[2], 1280) if shape[2] != -1 else 1280
+            W = max(shape[3], 1280) if shape[3] != -1 else 1280
+
+            if shape[0] == -1:
+                max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
+                profile = builder.create_optimization_profile()
+                profile.set_shape(
+                    input_tensor.name,
+                    (1, C, H, W),
+                    (max_batch, C, H, W),
+                    (max_batch, C, H, W),
+                )
+                config.add_optimization_profile(profile)
+
+            if builder.platform_has_fast_fp16:
+                constants_inf.log(<str>'Converting to supported fp16')
+                config.set_flag(trt.BuilderFlag.FP16)
+            else:
+                constants_inf.log(<str>'Converting to supported fp32. (fp16 is not supported)')
+
+            plan = builder.build_serialized_network(network, config)
+            if plan is None:
+                constants_inf.logerror(<str>'Conversion failed.')
+                return None
+            constants_inf.log('conversion done!')
+            return bytes(plan)
+
+    cdef tuple get_input_shape(self):
+        return <tuple>(self.input_shape[2], self.input_shape[3])
+
+    cdef run(self, input_data):
+        try:
+            actual_batch = input_data.shape[0]
+            if actual_batch != self.input_shape[0]:
+                actual_shape = [actual_batch, self.input_shape[1], self.input_shape[2], self.input_shape[3]]
+                self.context.set_input_shape(self.input_name, actual_shape)
+
+            cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
+            self.context.set_tensor_address(self.input_name, int(self.d_input))
+            self.context.set_tensor_address(self.output_name, int(self.d_output))
+
+            self.context.execute_async_v3(stream_handle=self.stream.handle)
+            self.stream.synchronize()
+
+            cuda.memcpy_dtoh(self.h_output, self.d_output)
+            output_shape = [actual_batch, self.output_shape[1], self.output_shape[2]]
+            output = self.h_output[:actual_batch].reshape(output_shape)
+            return [output]
+
+        except Exception as e:
+            raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}")
@@ -0,0 +1,426 @@
+import mimetypes
+from pathlib import Path
+
+import cv2
+cimport constants_inf
+
+from ai_availability_status cimport AIAvailabilityEnum, AIAvailabilityStatus
+from annotation cimport Detection, Annotation
+from ai_config cimport AIRecognitionConfig
+from engines.inference_engine cimport InferenceEngine
+from loader_http_client cimport LoaderHttpClient
+from threading import Thread
+from engines import EngineClass
+
+
+cdef class Inference:
+    cdef LoaderHttpClient loader_client
+    cdef InferenceEngine engine
+    cdef object _annotation_callback
+    cdef object _status_callback
+    cdef Annotation _previous_annotation
+    cdef dict[str, list[Detection]] _tile_detections
+    cdef dict[str, int] detection_counts
+    cdef AIRecognitionConfig ai_config
+    cdef bint stop_signal
+    cdef public AIAvailabilityStatus ai_availability_status
+    cdef str model_input
+    cdef bytes _converted_model_bytes
+    cdef bint is_building_engine
+
+    def __init__(self, loader_client):
+        self.loader_client = loader_client
+        self._annotation_callback = None
+        self._status_callback = None
+        self.stop_signal = <bint>False
+        self.model_input = <str>None
+        self.detection_counts = {}
+        self.engine = <InferenceEngine>None
+        self.is_building_engine = <bint>False
+        self.ai_availability_status = AIAvailabilityStatus()
+        self._converted_model_bytes = <bytes>None
+        self.init_ai()
+
+    @property
+    def is_engine_ready(self):
+        return self.engine is not None
+
+    @property
+    def engine_name(self):
+        if self.engine is not None:
+            return self.engine.engine_name
+        return None
+
+
+    cdef bytes download_model(self, str filename):
+        models_dir = constants_inf.MODELS_FOLDER
+        self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
+        res = self.loader_client.load_big_small_resource(filename, models_dir)
+        if res.err is not None:
+            raise Exception(res.err)
+        return <bytes>res.data
+
+    cdef convert_and_upload_model(self, bytes source_bytes, str engine_filename):
+        try:
+            self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING)
+            models_dir = constants_inf.MODELS_FOLDER
+            model_bytes = EngineClass.convert_from_source(source_bytes)
+
+            self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING)
+            res = self.loader_client.upload_big_small_resource(model_bytes, engine_filename, models_dir)
+            if res.err is not None:
+                self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>f"Failed to upload converted model: {res.err}")
+
+            self._converted_model_bytes = model_bytes
+            self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
+        except Exception as e:
+            self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
+            self._converted_model_bytes = <bytes>None
+        finally:
+            self.is_building_engine = <bint>False
+
+    cdef init_ai(self):
+        constants_inf.log(<str> 'init AI...')
+        try:
+            if self.engine is not None:
+                return
+            if self.is_building_engine:
+                return
+
+            if self._converted_model_bytes is not None:
+                try:
+                    self.engine = EngineClass(self._converted_model_bytes)
+                    self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
+                except Exception as e:
+                    self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
+                finally:
+                    self._converted_model_bytes = <bytes>None
+                return
+
+            models_dir = constants_inf.MODELS_FOLDER
+            engine_filename = EngineClass.get_engine_filename()
+            if engine_filename is not None:
+                try:
+                    self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
+                    res = self.loader_client.load_big_small_resource(engine_filename, models_dir)
+                    if res.err is not None:
+                        raise Exception(res.err)
+                    self.engine = EngineClass(res.data)
+                    self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
+                except Exception as e:
+                    source_filename = EngineClass.get_source_filename()
+                    if source_filename is None:
+                        self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>f"Pre-built engine not found: {str(e)}")
+                        return
+                    self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>str(e))
+                    source_bytes = self.download_model(source_filename)
+                    self.is_building_engine = <bint>True
+
+                    thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, engine_filename))
+                    thread.daemon = True
+                    thread.start()
+                    return
+            else:
+                self.engine = EngineClass(<bytes>self.download_model(constants_inf.AI_ONNX_MODEL_FILE))
+                self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
+            self.is_building_engine = <bint>False
+        except Exception as e:
+            self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>str(e))
+            self.is_building_engine = <bint>False
+
+    cdef bint is_video(self, str filepath):
+        mime_type, _ = mimetypes.guess_type(<str>filepath)
+        return <bint>(mime_type and mime_type.startswith("video"))
+
+    cpdef run_detect(self, dict config_dict, object annotation_callback, object status_callback=None):
+        cdef list[str] videos = []
+        cdef list[str] images = []
+        cdef AIRecognitionConfig ai_config = AIRecognitionConfig.from_dict(config_dict)
+        if ai_config is None:
+            raise Exception('ai recognition config is empty')
+
+        self._annotation_callback = annotation_callback
+        self._status_callback = status_callback
+        self.stop_signal = <bint>False
+        self.init_ai()
+        if self.engine is None:
+            constants_inf.log(<str> "AI engine not available. Conversion may be in progress. Skipping inference.")
+            return
+
+        self.detection_counts = {}
+        for p in ai_config.paths:
+            media_name = Path(<str>p).stem.replace(" ", "")
+            self.detection_counts[media_name] = 0
+            if self.is_video(p):
+                videos.append(p)
+            else:
+                images.append(p)
+        if len(images) > 0:
+            constants_inf.log(<str>f'run inference on {" ".join(images)}...')
+            self._process_images(ai_config, images)
+        if len(videos) > 0:
+            for v in videos:
+                constants_inf.log(<str>f'run inference on {v}...')
+                self._process_video(ai_config, v)
+
+    cdef _process_video(self, AIRecognitionConfig ai_config, str video_name):
+        cdef int frame_count = 0
+        cdef int batch_count = 0
+        cdef list batch_frames = []
+        cdef list[long] batch_timestamps = []
+        cdef Annotation annotation
+        cdef int model_h, model_w
+        self._previous_annotation = <Annotation>None
+
+        model_h, model_w = self.engine.get_input_shape()
+
+        v_input = cv2.VideoCapture(<str>video_name)
+        if not v_input.isOpened():
+            constants_inf.logerror(<str>f'Failed to open video: {video_name}')
+            return
+        total_frames = int(v_input.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps = v_input.get(cv2.CAP_PROP_FPS)
+        width = int(v_input.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(v_input.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        constants_inf.log(<str>f'Video: {total_frames} frames, {fps:.1f} fps, {width}x{height}')
+
+        cdef int effective_batch = min(self.engine.max_batch_size, ai_config.model_batch_size)
+        if effective_batch < 1:
+            effective_batch = 1
+
+        while v_input.isOpened() and not self.stop_signal:
+            ret, frame = v_input.read()
+            if not ret or frame is None:
+                break
+
+            frame_count += 1
+            if frame_count % ai_config.frame_period_recognition == 0:
+                batch_frames.append(frame)
+                batch_timestamps.append(<long>v_input.get(cv2.CAP_PROP_POS_MSEC))
+
+            if len(batch_frames) >= effective_batch:
+                batch_count += 1
+                constants_inf.log(<str>f'Video batch {batch_count}: frame {frame_count}/{total_frames} ({frame_count*100//total_frames}%)')
+                self._process_video_batch(ai_config, batch_frames, batch_timestamps, video_name, frame_count, total_frames, model_w)
+                batch_frames = []
+                batch_timestamps = []
+
+        if batch_frames:
+            batch_count += 1
+            constants_inf.log(<str>f'Video batch {batch_count} (flush): {len(batch_frames)} remaining frames')
+            self._process_video_batch(ai_config, batch_frames, batch_timestamps, video_name, frame_count, total_frames, model_w)
+
+        v_input.release()
+        constants_inf.log(<str>f'Video done: {frame_count} frames read, {batch_count} batches processed')
+        self.send_detection_status()
+
+    cdef _process_video_batch(self, AIRecognitionConfig ai_config, list batch_frames,
+                              list batch_timestamps, str video_name,
+                              int frame_count, int total_frames, int model_w):
+        cdef Annotation annotation
+        list_detections = self.engine.process_frames(batch_frames, ai_config)
+        total_dets = sum(len(d) for d in list_detections)
+        if total_dets > 0:
+            constants_inf.log(<str>f'Video batch: {total_dets} detections from postprocess')
+
+        for i in range(len(list_detections)):
+            detections = list_detections[i]
+            original_media_name = Path(<str>video_name).stem.replace(" ", "")
+            name = f'{original_media_name}_{constants_inf.format_time(batch_timestamps[i])}'
+            annotation = Annotation(name, original_media_name, batch_timestamps[i], detections)
+
+            if detections:
+                valid = self.is_valid_video_annotation(annotation, ai_config, model_w)
+                constants_inf.log(<str>f'Video frame {name}: {len(detections)} dets, valid={valid}')
+                if valid:
+                    _, image = cv2.imencode('.jpg', batch_frames[i])
+                    annotation.image = image.tobytes()
+                    self._previous_annotation = annotation
+                    self.on_annotation(annotation, frame_count, total_frames)
+            else:
+                self.is_valid_video_annotation(annotation, ai_config, model_w)
+
+    cdef on_annotation(self, Annotation annotation, int frame_count=0, int total_frames=0):
+        self.detection_counts[annotation.original_media_name] = self.detection_counts.get(annotation.original_media_name, 0) + 1
+        if self._annotation_callback is not None:
+            percent = int(frame_count * 100 / total_frames) if total_frames > 0 else 0
+            cb = self._annotation_callback
+            cb(annotation, percent)
+
+    cdef _process_images(self, AIRecognitionConfig ai_config, list[str] image_paths):
+        cdef list all_frame_data = []
+        cdef double ground_sampling_distance
+        cdef int model_h, model_w
+
+        model_h, model_w = self.engine.get_input_shape()
+        self._tile_detections = {}
+
+        for path in image_paths:
+            frame = cv2.imread(<str>path)
+            if frame is None:
+                constants_inf.logerror(<str>f'Failed to read image {path}')
+                continue
+            img_h, img_w, _ = frame.shape
+            original_media_name = Path(<str> path).stem.replace(" ", "")
+
+            ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w)
+            constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}')
+
+            if img_h <= 1.5 * model_h and img_w <= 1.5 * model_w:
+                all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
+            else:
+                tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
+                constants_inf.log(<str> f'calc tile size: {tile_size}')
+                res = self.split_to_tiles(frame, path, tile_size, ai_config.big_image_tile_overlap_percent)
+                for tile_frame, omn, tile_name in res:
+                    all_frame_data.append((tile_frame, omn, tile_name, ground_sampling_distance))
+
+        if not all_frame_data:
+            return
+
+        frames = [fd[0] for fd in all_frame_data]
+        all_dets = self.engine.process_frames(frames, ai_config)
+
+        for i in range(len(all_dets)):
+            frame_entry = all_frame_data[i]
+            f = frame_entry[0]
+            original_media_name = frame_entry[1]
+            name = frame_entry[2]
+            gsd = frame_entry[3]
+
+            annotation = Annotation(name, original_media_name, 0, all_dets[i])
+            if self.is_valid_image_annotation(annotation, gsd, f.shape):
+                constants_inf.log(<str> f'Detected {annotation}')
+                _, image = cv2.imencode('.jpg', f)
+                annotation.image = image.tobytes()
+                self.on_annotation(annotation)
+
+        self.send_detection_status()
+
+    cdef send_detection_status(self):
+        if self._status_callback is not None:
+            cb = self._status_callback
+            for media_name in self.detection_counts.keys():
+                cb(media_name, self.detection_counts[media_name])
+        self.detection_counts.clear()
+
+    cdef split_to_tiles(self, frame, path, tile_size, overlap_percent):
+        constants_inf.log(<str>f'splitting image {path} to tiles...')
+        img_h, img_w, _ = frame.shape
+        stride_w = int(tile_size * (1 - overlap_percent / 100))
+        stride_h = int(tile_size * (1 - overlap_percent / 100))
+
+        results = []
+        original_media_name = Path(<str> path).stem.replace(" ", "")
+        for y in range(0, img_h, stride_h):
+            for x in range(0, img_w, stride_w):
+                x_end = min(x + tile_size, img_w)
+                y_end = min(y + tile_size, img_h)
+
+                if x_end - x < tile_size:
+                    if img_w - (x - stride_w) <= tile_size:
+                        continue
+                    x = img_w - tile_size
+                if y_end - y < tile_size:
+                    if img_h - (y - stride_h) <= tile_size:
+                        continue
+                    y = img_h - tile_size
+
+                tile = frame[y:y_end, x:x_end]
+                name = f'{original_media_name}{constants_inf.SPLIT_SUFFIX}{tile_size:04d}_{x:04d}_{y:04d}!_000000'
+                results.append((tile, original_media_name, name))
+        return results
+
+    cpdef stop(self):
+        self.stop_signal = <bint>True
+
+    cdef remove_tiled_duplicates(self, Annotation annotation):
+        right = annotation.name.rindex('!')
+        left = annotation.name.index(constants_inf.SPLIT_SUFFIX) + len(constants_inf.SPLIT_SUFFIX)
+        tile_size_str, x_str, y_str = annotation.name[left:right].split('_')
+        tile_size = int(tile_size_str)
+        x = int(x_str)
+        y = int(y_str)
+
+        cdef list[Detection] unique_detections = []
+
+        existing_abs_detections = self._tile_detections.setdefault(annotation.original_media_name, [])
+
+        for det in annotation.detections:
+            x1 = det.x * tile_size
+            y1 = det.y * tile_size
+            det_abs = Detection(x + x1, y + y1, det.w * tile_size, det.h * tile_size, det.cls, det.confidence)
+
+            if det_abs not in existing_abs_detections:
+                unique_detections.append(det)
+                existing_abs_detections.append(det_abs)
+
+        annotation.detections = unique_detections
+
+    cdef bint is_valid_image_annotation(self, Annotation annotation, double ground_sampling_distance, frame_shape):
+        if constants_inf.SPLIT_SUFFIX in annotation.name:
+            self.remove_tiled_duplicates(annotation)
+        img_h, img_w, _ = frame_shape
+        if annotation.detections:
+            constants_inf.log(<str> f'Initial ann: {annotation}')
+
+        cdef list[Detection] valid_detections = []
+        for det in annotation.detections:
+            m_w = det.w * img_w * ground_sampling_distance
+            m_h = det.h * img_h * ground_sampling_distance
+            max_size = constants_inf.annotations_dict[det.cls].max_object_size_meters
+
+            if m_w <= max_size and m_h <= max_size:
+                valid_detections.append(det)
+                constants_inf.log(<str> f'Kept ({m_w} {m_h}) <= {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
+            else:
+                constants_inf.log(<str> f'Removed ({m_w} {m_h}) > {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
+
+        annotation.detections = valid_detections
+
+        if not annotation.detections:
+            return <bint>False
+        return <bint>True
+
+    cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config, int model_w):
+        if constants_inf.SPLIT_SUFFIX in annotation.name:
+            self.remove_tiled_duplicates(annotation)
+        if not annotation.detections:
+            return <bint>False
+
+        if self._previous_annotation is None:
+            return <bint>True
+
+        if annotation.time >= self._previous_annotation.time + <long>(ai_config.frame_recognition_seconds * 1000):
+            return <bint>True
+
+        if len(annotation.detections) > len(self._previous_annotation.detections):
+            return <bint>True
+
+        cdef:
+            Detection current_det, prev_det
+            double dx, dy, distance_sq, min_distance_sq
+            Detection closest_det
+
+        for current_det in annotation.detections:
+            min_distance_sq = <double>1e18
+            closest_det = <Detection>None
+
+            for prev_det in self._previous_annotation.detections:
+                dx = current_det.x - prev_det.x
+                dy = current_det.y - prev_det.y
+                distance_sq = dx * dx + dy * dy
+
+                if distance_sq < min_distance_sq:
+                    min_distance_sq = distance_sq
+                    closest_det = prev_det
+
+            dist_px = ai_config.tracking_distance_confidence * model_w
+            dist_px_sq = dist_px * dist_px
+            if min_distance_sq > dist_px_sq:
+                return <bint>True
+
+            if current_det.confidence >= closest_det.confidence + ai_config.tracking_probability_increase:
+                return <bint>True
+
+        return <bint>False
@@ -0,0 +1,8 @@
+cdef class LoadResult:
+    cdef public object err
+    cdef public object data
+
+cdef class LoaderHttpClient:
+    cdef str base_url
+    cdef LoadResult load_big_small_resource(self, str filename, str directory)
+    cdef LoadResult upload_big_small_resource(self, bytes content, str filename, str directory)
@@ -0,0 +1,43 @@
+import requests
+from loguru import logger
+
+HTTP_TIMEOUT = 120
+
+
+cdef class LoadResult:
+    def __init__(self, err, data=None):
+        self.err = err
+        self.data = data
+
+
+cdef class LoaderHttpClient:
+    def __init__(self, base_url: str):
+        self.base_url = base_url.rstrip("/")
+
+    cdef LoadResult load_big_small_resource(self, str filename, str directory):
+        try:
+            response = requests.post(
+                f"{self.base_url}/load/{filename}",
+                json={"filename": filename, "folder": directory},
+                stream=True,
+                timeout=HTTP_TIMEOUT,
+            )
+            response.raise_for_status()
+            return LoadResult(None, response.content)
+        except Exception as e:
+            logger.error(f"LoaderHttpClient.load_big_small_resource failed: {e}")
+            return LoadResult(str(e))
+
+    cdef LoadResult upload_big_small_resource(self, bytes content, str filename, str directory):
+        try:
+            response = requests.post(
+                f"{self.base_url}/upload/{filename}",
+                files={"data": (filename, content)},
+                data={"folder": directory},
+                timeout=HTTP_TIMEOUT,
+            )
+            response.raise_for_status()
+            return LoadResult(None)
+        except Exception as e:
+            logger.error(f"LoaderHttpClient.upload_big_small_resource failed: {e}")
+            return LoadResult(str(e))
@@ -0,0 +1,305 @@
+import asyncio
+import base64
+import json
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor
+from typing import Optional
+
+import requests as http_requests
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+
+from loader_http_client import LoaderHttpClient, LoadResult
+
+app = FastAPI(title="Azaion.Detections")
+executor = ThreadPoolExecutor(max_workers=2)
+
+LOADER_URL = os.environ.get("LOADER_URL", "http://loader:8080")
+ANNOTATIONS_URL = os.environ.get("ANNOTATIONS_URL", "http://annotations:8080")
+
+loader_client = LoaderHttpClient(LOADER_URL)
+inference = None
+_event_queues: list[asyncio.Queue] = []
+_active_detections: dict[str, asyncio.Task] = {}
+
+
+class TokenManager:
+    def __init__(self, access_token: str, refresh_token: str):
+        self.access_token = access_token
+        self.refresh_token = refresh_token
+
+    def get_valid_token(self) -> str:
+        exp = self._decode_exp(self.access_token)
+        if exp and exp - time.time() < 60:
+            self._refresh()
+        return self.access_token
+
+    def _refresh(self):
+        try:
+            resp = http_requests.post(
+                f"{ANNOTATIONS_URL}/auth/refresh",
+                json={"refreshToken": self.refresh_token},
+                timeout=10,
+            )
+            if resp.status_code == 200:
+                self.access_token = resp.json()["token"]
+        except Exception:
+            pass
+
+    @staticmethod
+    def _decode_exp(token: str) -> Optional[float]:
+        try:
+            payload = token.split(".")[1]
+            padding = 4 - len(payload) % 4
+            if padding != 4:
+                payload += "=" * padding
+            data = json.loads(base64.urlsafe_b64decode(payload))
+            return float(data.get("exp", 0))
+        except Exception:
+            return None
+
+
+def get_inference():
+    global inference
+    if inference is None:
+        from inference import Inference
+        inference = Inference(loader_client)
+    return inference
+
+
+class DetectionDto(BaseModel):
+    centerX: float
+    centerY: float
+    width: float
+    height: float
+    classNum: int
+    label: str
+    confidence: float
+
+
+class DetectionEvent(BaseModel):
+    annotations: list[DetectionDto]
+    mediaId: str
+    mediaStatus: str
+    mediaPercent: int
+
+
+class HealthResponse(BaseModel):
+    status: str
+    aiAvailability: str
+    engineType: Optional[str] = None
+    errorMessage: Optional[str] = None
+
+
+class AIConfigDto(BaseModel):
+    frame_period_recognition: int = 4
+    frame_recognition_seconds: int = 2
+    probability_threshold: float = 0.25
+    tracking_distance_confidence: float = 0.0
+    tracking_probability_increase: float = 0.0
+    tracking_intersection_threshold: float = 0.6
+    model_batch_size: int = 8
+    big_image_tile_overlap_percent: int = 20
+    altitude: float = 400
+    focal_length: float = 24
+    sensor_width: float = 23.5
+    paths: list[str] = []
+
+
+def detection_to_dto(det) -> DetectionDto:
+    import constants_inf
+    label = constants_inf.get_annotation_name(det.cls)
+    return DetectionDto(
+        centerX=det.x,
+        centerY=det.y,
+        width=det.w,
+        height=det.h,
+        classNum=det.cls,
+        label=label,
+        confidence=det.confidence,
+    )
+
+
+@app.get("/health")
+def health() -> HealthResponse:
+    if inference is None:
+        return HealthResponse(status="healthy", aiAvailability="None")
+    try:
+        status = inference.ai_availability_status
+        status_str = str(status).split()[0] if str(status).strip() else "None"
+        error_msg = status.error_message if hasattr(status, 'error_message') else None
+        engine_type = inference.engine_name
+        return HealthResponse(
+            status="healthy",
+            aiAvailability=status_str,
+            engineType=engine_type,
+            errorMessage=error_msg,
+        )
+    except Exception as e:
+        return HealthResponse(
+            status="healthy",
+            aiAvailability="None",
+            errorMessage=str(e),
+        )
+
+
+@app.post("/detect")
+async def detect_image(
+    file: UploadFile = File(...),
+    config: Optional[str] = Form(None),
+):
+    import tempfile
+    import cv2
+    import numpy as np
+
+    image_bytes = await file.read()
+    if not image_bytes:
+        raise HTTPException(status_code=400, detail="Image is empty")
+
+    arr = np.frombuffer(image_bytes, dtype=np.uint8)
+    if cv2.imdecode(arr, cv2.IMREAD_COLOR) is None:
+        raise HTTPException(status_code=400, detail="Invalid image data")
+
+    config_dict = {}
+    if config:
+        config_dict = json.loads(config)
+
+    suffix = os.path.splitext(file.filename or "upload.jpg")[1] or ".jpg"
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
+    try:
+        tmp.write(image_bytes)
+        tmp.close()
+        config_dict["paths"] = [tmp.name]
+
+        loop = asyncio.get_event_loop()
+        inf = get_inference()
+        results = []
+
+        def on_annotation(annotation, percent):
+            results.extend(annotation.detections)
+
+        await loop.run_in_executor(executor, inf.run_detect, config_dict, on_annotation)
+        return [detection_to_dto(d) for d in results]
+    except RuntimeError as e:
+        if "not available" in str(e):
+            raise HTTPException(status_code=503, detail=str(e))
+        raise HTTPException(status_code=422, detail=str(e))
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    finally:
+        os.unlink(tmp.name)
+
+
+def _post_annotation_to_service(token_mgr: TokenManager, media_id: str,
+                                 annotation, dtos: list[DetectionDto]):
+    try:
+        token = token_mgr.get_valid_token()
+        image_b64 = base64.b64encode(annotation.image).decode() if annotation.image else None
+        payload = {
+            "mediaId": media_id,
+            "source": 0,
+            "videoTime": f"00:00:{annotation.time // 1000:02d}" if annotation.time else "00:00:00",
+            "detections": [d.model_dump() for d in dtos],
+        }
+        if image_b64:
+            payload["image"] = image_b64
+        http_requests.post(
+            f"{ANNOTATIONS_URL}/annotations",
+            json=payload,
+            headers={"Authorization": f"Bearer {token}"},
+            timeout=30,
+        )
+    except Exception:
+        pass
+
+
+@app.post("/detect/{media_id}")
+async def detect_media(media_id: str, request: Request, config: Optional[AIConfigDto] = None):
+    existing = _active_detections.get(media_id)
+    if existing is not None and not existing.done():
+        raise HTTPException(status_code=409, detail="Detection already in progress for this media")
+
+    auth_header = request.headers.get("authorization", "")
+    access_token = auth_header.removeprefix("Bearer ").strip() if auth_header else ""
+    refresh_token = request.headers.get("x-refresh-token", "")
+    token_mgr = TokenManager(access_token, refresh_token) if access_token else None
+
+    cfg = config or AIConfigDto()
+    config_dict = cfg.model_dump()
+
+    async def run_detection():
+        loop = asyncio.get_event_loop()
+
+        def _enqueue(event):
+            for q in _event_queues:
+                try:
+                    q.put_nowait(event)
+                except asyncio.QueueFull:
+                    pass
+
+        try:
+            inf = get_inference()
+            if not inf.is_engine_ready:
+                raise RuntimeError("Detection service unavailable")
+
+            def on_annotation(annotation, percent):
+                dtos = [detection_to_dto(d) for d in annotation.detections]
+                event = DetectionEvent(
+                    annotations=dtos,
+                    mediaId=media_id,
+                    mediaStatus="AIProcessing",
+                    mediaPercent=percent,
+                )
+                loop.call_soon_threadsafe(_enqueue, event)
+                if token_mgr and dtos:
+                    _post_annotation_to_service(token_mgr, media_id, annotation, dtos)
+
+            def on_status(media_name, count):
+                event = DetectionEvent(
+                    annotations=[],
+                    mediaId=media_id,
+                    mediaStatus="AIProcessed",
+                    mediaPercent=100,
+                )
+                loop.call_soon_threadsafe(_enqueue, event)
+
+            await loop.run_in_executor(
+                executor, inf.run_detect, config_dict, on_annotation, on_status
+            )
+        except Exception:
+            error_event = DetectionEvent(
+                annotations=[],
+                mediaId=media_id,
+                mediaStatus="Error",
+                mediaPercent=0,
+            )
+            _enqueue(error_event)
+        finally:
+            _active_detections.pop(media_id, None)
+
+    _active_detections[media_id] = asyncio.create_task(run_detection())
+    return {"status": "started", "mediaId": media_id}
+
+
+@app.get("/detect/stream")
+async def detect_stream():
+    queue: asyncio.Queue = asyncio.Queue(maxsize=100)
+    _event_queues.append(queue)
+
+    async def event_generator():
+        try:
+            while True:
+                event = await queue.get()
+                yield f"data: {event.model_dump_json()}\n\n"
+        except asyncio.CancelledError:
+            pass
+        finally:
+            _event_queues.remove(queue)
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )