read cdn yaml config from api

automate tensorrt model conversion in case of no existing one for user's gpu
This commit is contained in:
Alex Bezdieniezhnykh
2025-04-23 23:20:08 +03:00
parent c68c293448
commit e798af470b
23 changed files with 265 additions and 93 deletions
+35 -7
View File
@@ -1,14 +1,19 @@
import json
import mimetypes
import os
import subprocess
import time
import cv2
import numpy as np
cimport constants
from remote_command cimport RemoteCommand
from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig
from inference_engine cimport OnnxEngine, TensorRTEngine
from hardware_service cimport HardwareService
from security cimport Security
cdef class Inference:
def __init__(self, api_client, on_annotation):
@@ -20,18 +25,41 @@ cdef class Inference:
self.model_height = 0
self.engine = None
self.class_names = None
self.is_building_engine = False
def init_ai(self):
cdef build_tensor_engine(self):
is_nvidia = HardwareService.has_nvidia_gpu()
if not is_nvidia:
return
engine_filename = TensorRTEngine.get_engine_filename()
key = Security.get_model_encryption_key()
models_dir = constants.MODELS_FOLDER
if not os.path.exists(os.path.join(<str> models_dir, f'{engine_filename}.big')):
self.is_building_engine = True
onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
self.is_building_engine = False
cdef init_ai(self):
if self.engine is not None:
return
is_nvidia = HardwareService.has_nvidia_gpu()
key = Security.get_model_encryption_key()
models_dir = constants.MODELS_FOLDER
if is_nvidia:
model_bytes = self.api_client.load_ai_model(is_tensor=True)
self.engine = TensorRTEngine(model_bytes, batch_size=4)
while self.is_building_engine:
time.sleep(1)
engine_filename = TensorRTEngine.get_engine_filename()
model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
self.engine = TensorRTEngine(model_bytes)
else:
model_bytes = self.api_client.load_ai_model()
self.engine = OnnxEngine(model_bytes, batch_size=4)
model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
self.engine = OnnxEngine(model_bytes)
self.model_height, self.model_width = self.engine.get_input_shape()
self.class_names = self.engine.get_class_names()
@@ -135,7 +163,7 @@ cdef class Inference:
images.append(m)
# images first, it's faster
if len(images) > 0:
for chunk in self.split_list_extend(images, ai_config.model_batch_size):
for chunk in self.split_list_extend(images, self.engine.get_input_shape()):
print(f'run inference on {" ".join(chunk)}...')
self._process_images(cmd, ai_config, chunk)
if len(videos) > 0:
@@ -161,7 +189,7 @@ cdef class Inference:
batch_frames.append(frame)
batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC)))
if len(batch_frames) == ai_config.model_batch_size:
if len(batch_frames) == self.engine.get_input_shape():
input_blob = self.preprocess(batch_frames)
outputs = self.engine.run(input_blob)