Add AIAvailabilityStatus and AIRecognitionConfig classes for AI model management

- Introduced `AIAvailabilityStatus` class to manage the availability status of AI models, including methods for setting status and logging messages. - Added `AIRecognitionConfig` class to encapsulate configuration parameters for AI recognition, with a static method for creating instances from dictionaries. - Implemented enums for AI availability states to enhance clarity and maintainability. - Updated related Cython files to support the new classes and ensure proper type handling. These changes aim to improve the structure and functionality of the AI model management system, facilitating better status tracking and configuration handling.
2026-06-21 20:51:08 +00:00 · 2026-03-31 05:49:51 +03:00
parent fc57d677b4
commit 8ce40a9385
43 changed files with 1190 additions and 462 deletions
@@ -0,0 +1,52 @@
+import platform
+import sys
+
+
+def _check_tensor_gpu_index():
+    try:
+        import pynvml
+        pynvml.nvmlInit()
+        device_count = pynvml.nvmlDeviceGetCount()
+        if device_count == 0:
+            return -1
+        for i in range(device_count):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+            if major > 6 or (major == 6 and minor >= 1):
+                return i
+        return -1
+    except Exception:
+        return -1
+    finally:
+        try:
+            import pynvml
+            pynvml.nvmlShutdown()
+        except Exception:
+            pass
+
+
+def _is_apple_silicon():
+    if sys.platform != "darwin" or platform.machine() != "arm64":
+        return False
+    try:
+        import coremltools
+        return True
+    except ImportError:
+        return False
+
+
+tensor_gpu_index = _check_tensor_gpu_index()
+
+
+def _select_engine_class():
+    if tensor_gpu_index > -1:
+        from engines.tensorrt_engine import TensorRTEngine  # pyright: ignore[reportMissingImports]
+        return TensorRTEngine
+    if _is_apple_silicon():
+        from engines.coreml_engine import CoreMLEngine
+        return CoreMLEngine
+    from engines.onnx_engine import OnnxEngine
+    return OnnxEngine
+
+
+EngineClass = _select_engine_class()
@@ -0,0 +1,13 @@
+from engines.inference_engine cimport InferenceEngine
+
+
+cdef class CoreMLEngine(InferenceEngine):
+
+    cdef object model
+    cdef int img_width
+    cdef int img_height
+
+    cdef tuple get_input_shape(self)
+    cdef run(self, input_data)
+    cdef preprocess(self, list frames)
+    cdef list postprocess(self, output, object ai_config)
@@ -0,0 +1,100 @@
+from engines.inference_engine cimport InferenceEngine
+from annotation cimport Detection
+cimport constants_inf
+import numpy as np
+from PIL import Image
+import cv2
+import io
+import os
+import tempfile
+import zipfile
+
+
+cdef class CoreMLEngine(InferenceEngine):
+
+    def __init__(self, model_bytes: bytes, max_batch_size: int = 1, **kwargs):
+        InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="coreml")
+        import coremltools as ct
+
+        model_path = kwargs.get('model_path')
+        if model_path is None:
+            model_path = self._extract_from_zip(model_bytes)
+
+        self.model = ct.models.MLModel(
+            model_path, compute_units=ct.ComputeUnit.ALL)
+        spec = self.model.get_spec()
+
+        img_input = spec.description.input[0]
+        self.img_width = int(img_input.type.imageType.width)
+        self.img_height = int(img_input.type.imageType.height)
+
+        constants_inf.log(<str>f'CoreML model: {self.img_width}x{self.img_height}')
+
+    @staticmethod
+    def get_engine_filename():
+        return "azaion_coreml.zip"
+
+    @staticmethod
+    def _extract_from_zip(model_bytes):
+        tmpdir = tempfile.mkdtemp()
+        buf = io.BytesIO(model_bytes)
+        with zipfile.ZipFile(buf, 'r') as zf:
+            zf.extractall(tmpdir)
+        for item in os.listdir(tmpdir):
+            if item.endswith('.mlpackage') or item.endswith('.mlmodel'):
+                return os.path.join(tmpdir, item)
+        raise ValueError("No .mlpackage or .mlmodel found in zip")
+
+    cdef tuple get_input_shape(self):
+        return <tuple>(self.img_height, self.img_width)
+
+    cdef preprocess(self, list frames):
+        frame = frames[0]
+        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        resized = cv2.resize(rgb, (self.img_width, self.img_height))
+        return Image.fromarray(resized)
+
+    cdef run(self, input_data):
+        predict = getattr(self.model, 'predict')
+        return predict({
+            'image': input_data,
+            'iouThreshold': 0.45,
+            'confidenceThreshold': 0.25,
+        })
+
+    cdef list postprocess(self, output, object ai_config):
+        cdef int w = self.img_width
+        cdef int h = self.img_height
+
+        coords = output.get('coordinates', np.empty((0, 4), dtype=np.float32))
+        confs = output.get('confidence', np.empty((0, 80), dtype=np.float32))
+
+        cdef list[Detection] detections = []
+        if coords.size == 0:
+            return [detections]
+
+        cx, cy, bw, bh = coords[:, 0], coords[:, 1], coords[:, 2], coords[:, 3]
+        x1 = (cx - bw / 2) * w
+        y1 = (cy - bh / 2) * h
+        x2 = (cx + bw / 2) * w
+        y2 = (cy + bh / 2) * h
+
+        class_ids = np.argmax(confs, axis=1)
+        conf_values = np.max(confs, axis=1)
+
+        for i in range(len(conf_values)):
+            conf = round(float(conf_values[i]), 2)
+            if conf < ai_config.probability_threshold:
+                continue
+            det_x1 = float(x1[i]) / w
+            det_y1 = float(y1[i]) / h
+            det_x2 = float(x2[i]) / w
+            det_y2 = float(y2[i]) / h
+            det_cx = (det_x1 + det_x2) / 2
+            det_cy = (det_y1 + det_y2) / 2
+            det_w = det_x2 - det_x1
+            det_h = det_y2 - det_y1
+            detections.append(Detection(det_cx, det_cy, det_w, det_h, int(class_ids[i]), conf))
+
+        filtered = self.remove_overlapping(detections, ai_config.tracking_intersection_threshold)
+        return [filtered]
@@ -0,0 +1,12 @@
+from annotation cimport Detection
+
+
+cdef class InferenceEngine:
+    cdef public int max_batch_size
+    cdef public str engine_name
+    cdef tuple get_input_shape(self)
+    cdef run(self, input_data)
+    cdef preprocess(self, list frames)
+    cdef list postprocess(self, output, object ai_config)
+    cdef list remove_overlapping(self, list[Detection] detections, float threshold)
+    cpdef list process_frames(self, list frames, object ai_config)
@@ -0,0 +1,106 @@
+import cv2
+import numpy as np
+from annotation cimport Detection
+
+cdef class InferenceEngine:
+    def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
+        self.max_batch_size = max_batch_size
+        self.engine_name = <str>kwargs.get('engine_name', "onnx")
+
+    @staticmethod
+    def get_engine_filename():
+        return None
+
+    @staticmethod
+    def get_source_filename():
+        return None
+
+    @staticmethod
+    def convert_from_source(bytes source_bytes):
+        return source_bytes
+
+    cdef tuple get_input_shape(self):
+        raise NotImplementedError("Subclass must implement get_input_shape")
+
+    cdef run(self, input_data):
+        raise NotImplementedError("Subclass must implement run")
+
+    cdef preprocess(self, list frames):
+        cdef int h, w
+        h, w = self.get_input_shape()
+        blobs = [cv2.dnn.blobFromImage(frame,
+                                       scalefactor=1.0 / 255.0,
+                                       size=(w, h),
+                                       mean=(0, 0, 0),
+                                       swapRB=True,
+                                       crop=False)
+                 for frame in frames]
+        return np.vstack(blobs)
+
+    cdef list postprocess(self, output, object ai_config):
+        cdef list[Detection] detections
+        cdef int ann_index
+        cdef float x1, y1, x2, y2, conf
+        cdef int class_id
+        cdef list results = []
+        cdef int h, w
+        h, w = self.get_input_shape()
+
+        for ann_index in range(len(output[0])):
+            detections = []
+            for det in output[0][ann_index]:
+                if det[4] == 0:
+                    break
+                x1 = det[0] / w
+                y1 = det[1] / h
+                x2 = det[2] / w
+                y2 = det[3] / h
+                conf = round(det[4], 2)
+                class_id = int(det[5])
+
+                x = (x1 + x2) / 2
+                y = (y1 + y2) / 2
+                bw = x2 - x1
+                bh = y2 - y1
+                if conf >= ai_config.probability_threshold:
+                    detections.append(Detection(x, y, bw, bh, class_id, conf))
+            filtered = self.remove_overlapping(detections, ai_config.tracking_intersection_threshold)
+            results.append(filtered)
+        return results
+
+    cdef list remove_overlapping(self, list[Detection] detections, float threshold):
+        cdef Detection det1, det2
+        filtered_output = []
+        filtered_out_indexes = []
+
+        for det1_index in range(len(detections)):
+            if det1_index in filtered_out_indexes:
+                continue
+            det1 = detections[det1_index]
+            res = det1_index
+            for det2_index in range(det1_index + 1, len(detections)):
+                det2 = detections[det2_index]
+                if det1.overlaps(det2, threshold):
+                    if det1.confidence > det2.confidence or (
+                            det1.confidence == det2.confidence and det1.cls < det2.cls):
+                        filtered_out_indexes.append(det2_index)
+                    else:
+                        filtered_out_indexes.append(res)
+                        res = det2_index
+            filtered_output.append(detections[res])
+            filtered_out_indexes.append(res)
+        return filtered_output
+
+    cpdef list process_frames(self, list frames, object ai_config):
+        cdef int effective_batch = min(self.max_batch_size, ai_config.model_batch_size)
+        if effective_batch < 1:
+            effective_batch = 1
+        cdef list all_detections = []
+        cdef int i
+        for i in range(0, len(frames), effective_batch):
+            chunk = frames[i:i + effective_batch]
+            input_blob = self.preprocess(chunk)
+            raw_output = self.run(input_blob)
+            batch_dets = self.postprocess(raw_output, ai_config)
+            all_detections.extend(batch_dets)
+        return all_detections
@@ -0,0 +1,13 @@
+from engines.inference_engine cimport InferenceEngine
+
+
+cdef class OnnxEngine(InferenceEngine):
+
+    cdef public object session
+    cdef object _cpu_session
+    cdef object model_inputs
+    cdef str input_name
+    cdef object input_shape
+
+    cdef tuple get_input_shape(self)
+    cdef run(self, input_data)
@@ -0,0 +1,48 @@
+from engines.inference_engine cimport InferenceEngine
+import onnxruntime as onnx
+cimport constants_inf
+
+import os
+
+def _select_providers():
+    available = set(onnx.get_available_providers())
+    skip_coreml = os.environ.get("SKIP_COREML", "").lower() in ("1", "true", "yes")
+    preferred = ["CoreMLExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]
+    if skip_coreml:
+        preferred = [p for p in preferred if p != "CoreMLExecutionProvider"]
+    selected = [p for p in preferred if p in available]
+    return selected or ["CPUExecutionProvider"]
+
+cdef class OnnxEngine(InferenceEngine):
+    def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
+        InferenceEngine.__init__(self, model_bytes, max_batch_size)
+
+        providers = _select_providers()
+        constants_inf.log(<str>f'ONNX providers: {providers}')
+        self.session = onnx.InferenceSession(model_bytes, providers=providers)
+        self.model_inputs = self.session.get_inputs()
+        self.input_name = self.model_inputs[0].name
+        self.input_shape = self.model_inputs[0].shape
+        if self.input_shape[0] not in (-1, None, "N"):
+            self.max_batch_size = self.input_shape[0]
+        constants_inf.log(f'AI detection model input: {self.model_inputs} {self.input_shape}')
+        model_meta = self.session.get_modelmeta()
+        constants_inf.log(f"Metadata: {model_meta.custom_metadata_map}")
+
+        self._cpu_session = None
+        if any("CoreML" in p for p in self.session.get_providers()):
+            constants_inf.log(<str>'CoreML active — creating CPU fallback session')
+            self._cpu_session = onnx.InferenceSession(
+                model_bytes, providers=["CPUExecutionProvider"])
+
+    cdef tuple get_input_shape(self):
+        shape = self.input_shape
+        return <tuple>(shape[2], shape[3])
+
+    cdef run(self, input_data):
+        try:
+            return self.session.run(None, {self.input_name: input_data})
+        except Exception:
+            if self._cpu_session is not None:
+                return self._cpu_session.run(None, {self.input_name: input_data})
+            raise
@@ -0,0 +1,20 @@
+from engines.inference_engine cimport InferenceEngine
+
+
+cdef class TensorRTEngine(InferenceEngine):
+
+    cdef public object context
+
+    cdef public object d_input
+    cdef public object d_output
+    cdef str input_name
+    cdef list input_shape
+
+    cdef object h_output
+    cdef str output_name
+    cdef list output_shape
+
+    cdef object stream
+
+    cdef tuple get_input_shape(self)
+    cdef run(self, input_data)
@@ -0,0 +1,169 @@
+from engines.inference_engine cimport InferenceEngine
+import tensorrt as trt  # pyright: ignore[reportMissingImports]
+import pycuda.driver as cuda  # pyright: ignore[reportMissingImports]
+import pycuda.autoinit  # pyright: ignore[reportMissingImports]
+import pynvml
+import numpy as np
+cimport constants_inf
+
+GPU_MEMORY_FRACTION = 0.8
+
+
+cdef class TensorRTEngine(InferenceEngine):
+    def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
+        InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="tensorrt")
+        try:
+            logger = trt.Logger(trt.Logger.WARNING)
+            runtime = trt.Runtime(logger)
+            engine = runtime.deserialize_cuda_engine(model_bytes)
+            if engine is None:
+                raise RuntimeError("Failed to load TensorRT engine from bytes")
+
+            self.context = engine.create_execution_context()
+
+            self.input_name = engine.get_tensor_name(0)
+            engine_input_shape = engine.get_tensor_shape(self.input_name)
+
+            C = engine_input_shape[1]
+            H = 1280 if engine_input_shape[2] == -1 else engine_input_shape[2]
+            W = 1280 if engine_input_shape[3] == -1 else engine_input_shape[3]
+
+            if engine_input_shape[0] == -1:
+                gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
+                self.max_batch_size = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
+            else:
+                self.max_batch_size = engine_input_shape[0]
+
+            self.input_shape = [self.max_batch_size, C, H, W]
+            self.context.set_input_shape(self.input_name, self.input_shape)
+            input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsize
+            self.d_input = cuda.mem_alloc(input_size)
+
+            self.output_name = engine.get_tensor_name(1)
+            engine_output_shape = tuple(engine.get_tensor_shape(self.output_name))
+            self.output_shape = [
+                self.max_batch_size,
+                300 if engine_output_shape[1] == -1 else engine_output_shape[1],
+                6 if engine_output_shape[2] == -1 else engine_output_shape[2],
+            ]
+            self.h_output = cuda.pagelocked_empty(tuple(self.output_shape), dtype=np.float32)
+            self.d_output = cuda.mem_alloc(self.h_output.nbytes)
+
+            self.stream = cuda.Stream()
+
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
+
+    @staticmethod
+    def calculate_max_batch_size(gpu_memory_bytes, int input_h, int input_w):
+        frame_input_bytes = 3 * input_h * input_w * 4
+        estimated_per_frame = frame_input_bytes * 12
+        available = gpu_memory_bytes * GPU_MEMORY_FRACTION
+        calculated = max(1, int(available / estimated_per_frame))
+        return min(calculated, 32)
+
+    @staticmethod
+    def get_gpu_memory_bytes(int device_id):
+        total_memory = None
+        try:
+            pynvml.nvmlInit()
+            handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
+            mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            total_memory = mem_info.total
+        except pynvml.NVMLError:
+            total_memory = None
+        finally:
+            try:
+                pynvml.nvmlShutdown()
+            except pynvml.NVMLError:
+                pass
+        return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory
+
+    @staticmethod
+    def get_engine_filename():
+        try:
+            from engines import tensor_gpu_index
+            device = cuda.Device(max(tensor_gpu_index, 0))
+            sm_count = device.multiprocessor_count
+            cc_major, cc_minor = device.compute_capability()
+            return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
+        except Exception:
+            return None
+
+    @staticmethod
+    def get_source_filename():
+        import constants_inf
+        return constants_inf.AI_ONNX_MODEL_FILE
+
+    @staticmethod
+    def convert_from_source(bytes onnx_model):
+        gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
+        workspace_bytes = int(gpu_mem * 0.9)
+
+        explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        trt_logger = trt.Logger(trt.Logger.WARNING)
+
+        with trt.Builder(trt_logger) as builder, \
+                builder.create_network(explicit_batch_flag) as network, \
+                trt.OnnxParser(network, trt_logger) as parser, \
+                builder.create_builder_config() as config:
+
+            config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
+
+            if not parser.parse(onnx_model):
+                return None
+
+            input_tensor = network.get_input(0)
+            shape = input_tensor.shape
+            C = shape[1]
+            H = max(shape[2], 1280) if shape[2] != -1 else 1280
+            W = max(shape[3], 1280) if shape[3] != -1 else 1280
+
+            if shape[0] == -1:
+                max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
+                profile = builder.create_optimization_profile()
+                profile.set_shape(
+                    input_tensor.name,
+                    (1, C, H, W),
+                    (max_batch, C, H, W),
+                    (max_batch, C, H, W),
+                )
+                config.add_optimization_profile(profile)
+
+            if builder.platform_has_fast_fp16:
+                constants_inf.log(<str>'Converting to supported fp16')
+                config.set_flag(trt.BuilderFlag.FP16)
+            else:
+                constants_inf.log(<str>'Converting to supported fp32. (fp16 is not supported)')
+
+            plan = builder.build_serialized_network(network, config)
+            if plan is None:
+                constants_inf.logerror(<str>'Conversion failed.')
+                return None
+            constants_inf.log('conversion done!')
+            return bytes(plan)
+
+    cdef tuple get_input_shape(self):
+        return <tuple>(self.input_shape[2], self.input_shape[3])
+
+    cdef run(self, input_data):
+        try:
+            actual_batch = input_data.shape[0]
+            if actual_batch != self.input_shape[0]:
+                actual_shape = [actual_batch, self.input_shape[1], self.input_shape[2], self.input_shape[3]]
+                self.context.set_input_shape(self.input_name, actual_shape)
+
+            cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
+            self.context.set_tensor_address(self.input_name, int(self.d_input))
+            self.context.set_tensor_address(self.output_name, int(self.d_output))
+
+            self.context.execute_async_v3(stream_handle=self.stream.handle)
+            self.stream.synchronize()
+
+            cuda.memcpy_dtoh(self.h_output, self.d_output)
+            output_shape = [actual_batch, self.output_shape[1], self.output_shape[2]]
+            output = self.h_output[:actual_batch].reshape(output_shape)
+            return [output]
+
+        except Exception as e:
+            raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}")