[AZ-180] Refactor detection event handling and improve SSE support

- Updated the detection image endpoint to require a channel ID for event streaming.
- Introduced a new endpoint for streaming detection events, allowing clients to receive real-time updates.
- Enhanced the internal buffering mechanism for detection events to manage multiple channels.
- Refactored the inference module to support the new event handling structure.

Made-with: Cursor
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-04-03 02:42:05 +03:00
parent 2c35e59a77
commit 8baa96978b
26 changed files with 819 additions and 413 deletions
+29 -8
View File
@@ -1,6 +1,16 @@
import os
import platform
import sys
from loguru import logger
from engines.engine_factory import (
EngineFactory,
OnnxEngineFactory,
CoreMLEngineFactory,
TensorRTEngineFactory,
JetsonTensorRTEngineFactory,
)
def _check_tensor_gpu_index():
try:
@@ -35,18 +45,29 @@ def _is_apple_silicon():
return False
def _is_jetson():
return (
platform.machine() == "aarch64"
and tensor_gpu_index > -1
and os.path.isfile("/etc/nv_tegra_release")
)
tensor_gpu_index = _check_tensor_gpu_index()
def _select_engine_class():
def _create_engine_factory() -> EngineFactory:
if _is_jetson():
logger.info("Engine factory: JetsonTensorRTEngineFactory")
return JetsonTensorRTEngineFactory()
if tensor_gpu_index > -1:
from engines.tensorrt_engine import TensorRTEngine # pyright: ignore[reportMissingImports]
return TensorRTEngine
logger.info("Engine factory: TensorRTEngineFactory")
return TensorRTEngineFactory()
if _is_apple_silicon():
from engines.coreml_engine import CoreMLEngine
return CoreMLEngine
from engines.onnx_engine import OnnxEngine
return OnnxEngine
logger.info("Engine factory: CoreMLEngineFactory")
return CoreMLEngineFactory()
logger.info("Engine factory: OnnxEngineFactory")
return OnnxEngineFactory()
EngineClass = _select_engine_class()
engine_factory = _create_engine_factory()
-4
View File
@@ -30,10 +30,6 @@ cdef class CoreMLEngine(InferenceEngine):
constants_inf.log(<str>f'CoreML model: {self.img_width}x{self.img_height}')
@staticmethod
def get_engine_filename():
return "azaion_coreml.zip"
@staticmethod
def _extract_from_zip(model_bytes):
tmpdir = tempfile.mkdtemp()
+109
View File
@@ -0,0 +1,109 @@
import os
import tempfile
class EngineFactory:
has_build_step = False
def create(self, model_bytes: bytes):
raise NotImplementedError
def load_engine(self, loader_client, models_dir: str):
filename = self._get_ai_engine_filename()
if filename is None:
return None
try:
res = loader_client.load_big_small_resource(filename, models_dir)
if res.err is None:
return self.create(res.data)
except Exception:
pass
return None
def _get_ai_engine_filename(self) -> str | None:
return None
def get_source_filename(self) -> str | None:
return None
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
raise NotImplementedError(f"{type(self).__name__} does not support building from source")
class OnnxEngineFactory(EngineFactory):
def create(self, model_bytes: bytes):
from engines.onnx_engine import OnnxEngine
return OnnxEngine(model_bytes)
def get_source_filename(self) -> str:
import constants_inf
return constants_inf.AI_ONNX_MODEL_FILE
class CoreMLEngineFactory(EngineFactory):
def create(self, model_bytes: bytes):
from engines.coreml_engine import CoreMLEngine
return CoreMLEngine(model_bytes)
def _get_ai_engine_filename(self) -> str:
return "azaion_coreml.zip"
class TensorRTEngineFactory(EngineFactory):
has_build_step = True
def create(self, model_bytes: bytes):
from engines.tensorrt_engine import TensorRTEngine
return TensorRTEngine(model_bytes)
def _get_ai_engine_filename(self) -> str | None:
from engines.tensorrt_engine import TensorRTEngine
return TensorRTEngine.get_engine_filename()
def get_source_filename(self) -> str:
import constants_inf
return constants_inf.AI_ONNX_MODEL_FILE
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
from engines.tensorrt_engine import TensorRTEngine
engine_bytes = TensorRTEngine.convert_from_source(onnx_bytes, None)
return engine_bytes, TensorRTEngine.get_engine_filename()
class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
def create(self, model_bytes: bytes):
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
return JetsonTensorRTEngine(model_bytes)
def _get_ai_engine_filename(self) -> str | None:
from engines.tensorrt_engine import TensorRTEngine
return TensorRTEngine.get_engine_filename("int8")
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
from engines.tensorrt_engine import TensorRTEngine
calib_cache_path = self._download_calib_cache(loader_client, models_dir)
try:
engine_bytes = TensorRTEngine.convert_from_source(onnx_bytes, calib_cache_path)
return engine_bytes, TensorRTEngine.get_engine_filename("int8")
finally:
if calib_cache_path is not None:
try:
os.unlink(calib_cache_path)
except Exception:
pass
def _download_calib_cache(self, loader_client, models_dir: str) -> str | None:
import constants_inf
try:
res = loader_client.load_big_small_resource(constants_inf.INT8_CALIB_CACHE_FILE, models_dir)
if res.err is not None:
constants_inf.log(f"INT8 calibration cache not available: {res.err}")
return None
fd, path = tempfile.mkstemp(suffix=".cache")
with os.fdopen(fd, "wb") as f:
f.write(res.data)
constants_inf.log("INT8 calibration cache downloaded")
return path
except Exception as e:
constants_inf.log(f"INT8 calibration cache download failed: {str(e)}")
return None
+5
View File
@@ -0,0 +1,5 @@
from engines.tensorrt_engine cimport TensorRTEngine
cdef class JetsonTensorRTEngine(TensorRTEngine):
pass
+5
View File
@@ -0,0 +1,5 @@
from engines.tensorrt_engine cimport TensorRTEngine
cdef class JetsonTensorRTEngine(TensorRTEngine):
pass
+1 -1
View File
@@ -23,7 +23,7 @@ cdef class OnnxEngine(InferenceEngine):
self.model_inputs = self.session.get_inputs()
self.input_name = self.model_inputs[0].name
self.input_shape = self.model_inputs[0].shape
if self.input_shape[0] not in (-1, None, "N"):
if isinstance(self.input_shape[0], int) and self.input_shape[0] > 0:
self.max_batch_size = self.input_shape[0]
constants_inf.log(f'AI detection model input: {self.model_inputs} {self.input_shape}')
model_meta = self.session.get_modelmeta()
-5
View File
@@ -113,11 +113,6 @@ cdef class TensorRTEngine(InferenceEngine):
except Exception:
return None
@staticmethod
def get_source_filename():
import constants_inf
return constants_inf.AI_ONNX_MODEL_FILE
@staticmethod
def convert_from_source(bytes onnx_model, str calib_cache_path=None):
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)