[AZ-180] Enhance setup and improve inference logging

- Added a new Cython extension for the engine factory to the setup configuration.
- Updated the inference module to include additional logging for video batch processing and annotation callbacks.
- Refactored test cases to standardize the detection endpoint responses and include channel IDs in headers for better event handling.
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-04-03 05:58:55 +03:00
parent 8baa96978b
commit 834f846dc8
9 changed files with 64 additions and 38 deletions
@@ -1,5 +1,6 @@
import os
import tempfile
from loader_http_client cimport LoaderHttpClient, LoadResult
class EngineFactory:
@@ -8,7 +9,9 @@ class EngineFactory:
def create(self, model_bytes: bytes):
raise NotImplementedError
def load_engine(self, loader_client, models_dir: str):
def load_engine(self, LoaderHttpClient loader_client, str models_dir):
cdef str filename
cdef LoadResult res
filename = self._get_ai_engine_filename()
if filename is None:
return None
@@ -20,13 +23,13 @@ class EngineFactory:
pass
return None
def _get_ai_engine_filename(self) -> str | None:
def _get_ai_engine_filename(self):
return None
def get_source_filename(self) -> str | None:
def get_source_filename(self):
return None
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
def build_from_source(self, onnx_bytes, loader_client, models_dir):
raise NotImplementedError(f"{type(self).__name__} does not support building from source")
@@ -35,7 +38,7 @@ class OnnxEngineFactory(EngineFactory):
from engines.onnx_engine import OnnxEngine
return OnnxEngine(model_bytes)
def get_source_filename(self) -> str:
def get_source_filename(self):
import constants_inf
return constants_inf.AI_ONNX_MODEL_FILE
@@ -45,7 +48,7 @@ class CoreMLEngineFactory(EngineFactory):
from engines.coreml_engine import CoreMLEngine
return CoreMLEngine(model_bytes)
def _get_ai_engine_filename(self) -> str:
def _get_ai_engine_filename(self):
return "azaion_coreml.zip"
@@ -56,15 +59,15 @@ class TensorRTEngineFactory(EngineFactory):
from engines.tensorrt_engine import TensorRTEngine
return TensorRTEngine(model_bytes)
def _get_ai_engine_filename(self) -> str | None:
def _get_ai_engine_filename(self):
from engines.tensorrt_engine import TensorRTEngine
return TensorRTEngine.get_engine_filename()
def get_source_filename(self) -> str:
def get_source_filename(self):
import constants_inf
return constants_inf.AI_ONNX_MODEL_FILE
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
def build_from_source(self, onnx_bytes, loader_client, models_dir):
from engines.tensorrt_engine import TensorRTEngine
engine_bytes = TensorRTEngine.convert_from_source(onnx_bytes, None)
return engine_bytes, TensorRTEngine.get_engine_filename()
@@ -75,11 +78,12 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
return JetsonTensorRTEngine(model_bytes)
def _get_ai_engine_filename(self) -> str | None:
def _get_ai_engine_filename(self):
from engines.tensorrt_engine import TensorRTEngine
return TensorRTEngine.get_engine_filename("int8")
def build_from_source(self, onnx_bytes: bytes, loader_client, models_dir: str):
def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir):
cdef str calib_cache_path
from engines.tensorrt_engine import TensorRTEngine
calib_cache_path = self._download_calib_cache(loader_client, models_dir)
try:
@@ -92,10 +96,13 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
except Exception:
pass
def _download_calib_cache(self, loader_client, models_dir: str) -> str | None:
def _download_calib_cache(self, LoaderHttpClient loader_client, str models_dir):
cdef LoadResult res
import constants_inf
try:
res = loader_client.load_big_small_resource(constants_inf.INT8_CALIB_CACHE_FILE, models_dir)
res = loader_client.load_big_small_resource(
constants_inf.INT8_CALIB_CACHE_FILE, models_dir
)
if res.err is not None:
constants_inf.log(f"INT8 calibration cache not available: {res.err}")
return None
+10
View File
@@ -268,14 +268,24 @@ cdef class Inference:
batch_count += 1
tf = total_frames if total_frames > 0 else max(frame_count, 1)
constants_inf.log(<str>f'Video batch {batch_count}: frame {frame_count}/{tf} ({frame_count*100//tf}%)')
last_ts = batch_timestamps[len(batch_timestamps) - 1] if batch_timestamps else 0
self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w)
if self._annotation_callback is not None:
pann = Annotation(original_media_name, original_media_name, last_ts, [])
cb = self._annotation_callback
cb(pann, int(frame_count * 100 / tf))
batch_frames = []
batch_timestamps = []
if batch_frames:
batch_count += 1
tf = total_frames if total_frames > 0 else max(frame_count, 1)
constants_inf.log(<str>f'Video batch {batch_count} (flush): {len(batch_frames)} remaining frames')
last_ts = batch_timestamps[len(batch_timestamps) - 1] if batch_timestamps else 0
self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w)
if self._annotation_callback is not None:
pann = Annotation(original_media_name, original_media_name, last_ts, [])
cb = self._annotation_callback
cb(pann, 100)
constants_inf.log(<str>f'Video done: {frame_count} frames read, {batch_count} batches processed')
self.send_detection_status()
+3 -3
View File
@@ -645,6 +645,7 @@ async def detect_video_upload(
content_hash, _MEDIA_STATUS_AI_PROCESSED,
token_mgr.get_valid_token(),
)
await asyncio.sleep(0.01)
_enqueue(channel_id, DetectionEvent(
annotations=[],
mediaId=content_hash,
@@ -681,8 +682,7 @@ async def detect_media(
config: Annotated[Optional[AIConfigDto], Body()] = None,
user_id: str = Depends(require_auth),
):
existing = _active_detections.get(media_id)
if existing is not None and not existing.done():
if media_id in _active_detections:
raise HTTPException(status_code=409, detail="Detection already in progress for this media")
channel_id = request.headers.get("x-channel-id", "")
@@ -779,7 +779,7 @@ async def detect_media(
)
_enqueue(channel_id, error_event)
finally:
_active_detections.pop(media_id, None)
loop.call_later(5.0, lambda: _active_detections.pop(media_id, None))
loop.call_later(10.0, _cleanup_channel, channel_id)
_active_detections[media_id] = asyncio.create_task(run_detection())