separate load functionality from inference client to loader client. Call loader client from inference to get the model.

remove dummy dlls, remove resource loader from c#.

TODO: Load dlls separately by Loader UI and loader client

WIP
This commit is contained in:
Alex Bezdieniezhnykh
2025-06-06 20:04:03 +03:00
parent 500db31142
commit 7750025631
54 changed files with 353 additions and 571 deletions
+45 -22
View File
@@ -1,29 +1,54 @@
import json
import mimetypes
import os
import subprocess
import sys
import time
import cv2
import numpy as np
cimport constants
from remote_command cimport RemoteCommand
from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig
from hardware_service cimport HardwareService
from security cimport Security
import pynvml
if HardwareService.has_nvidia_gpu():
cdef int tensor_gpu_index
cdef int check_tensor_gpu_index():
try:
pynvml.nvmlInit()
deviceCount = pynvml.nvmlDeviceGetCount()
if deviceCount == 0:
print('No NVIDIA GPUs found.')
return -1
for i in range(deviceCount):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
if major > 6 or (major == 6 and minor >= 1):
print('found NVIDIA GPU!')
return i
print('NVIDIA GPU doesnt support TensorRT!')
return -1
except pynvml.NVMLError:
return -1
finally:
try:
pynvml.nvmlShutdown()
except:
print('Failed to shutdown pynvml cause probably no NVidia GPU')
pass
tensor_gpu_index = check_tensor_gpu_index()
if tensor_gpu_index > -1:
from tensorrt_engine import TensorRTEngine
else:
from onnx_engine import OnnxEngine
cdef class Inference:
def __init__(self, api_client, on_annotation):
self.api_client = api_client
def __init__(self, loader_client, on_annotation):
self.loader_client = loader_client
self.on_annotation = on_annotation
self.stop_signal = False
self.model_input = None
@@ -33,27 +58,26 @@ cdef class Inference:
self.is_building_engine = False
cdef build_tensor_engine(self, object updater_callback):
is_nvidia = HardwareService.has_nvidia_gpu()
if not is_nvidia:
if not tensor_gpu_index == -1:
return
engine_filename = TensorRTEngine.get_engine_filename(0)
key = Security.get_model_encryption_key()
models_dir = constants.MODELS_FOLDER
self.is_building_engine = True
updater_callback('downloading')
if self.api_client.load_big_small_resource(engine_filename, models_dir, key):
if self.loader_client.load_big_small_resource(engine_filename, models_dir):
print('tensor rt engine is here, no need to build')
self.is_building_engine = False
return
# time.sleep(8) # prevent simultaneously loading dll and models
updater_callback('converting')
onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
onnx_model = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
updater_callback('uploading')
self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
self.loader_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir)
print(f'uploaded {engine_filename} to CDN and API')
self.is_building_engine = False
@@ -61,17 +85,16 @@ cdef class Inference:
if self.engine is not None:
return
is_nvidia = HardwareService.has_nvidia_gpu()
key = Security.get_model_encryption_key()
models_dir = constants.MODELS_FOLDER
if is_nvidia:
if tensor_gpu_index > -1:
while self.is_building_engine:
time.sleep(1)
engine_filename = TensorRTEngine.get_engine_filename(0)
model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
model_bytes = self.loader_client.load_big_small_resource(engine_filename, models_dir)
self.engine = TensorRTEngine(model_bytes)
else:
model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
model_bytes = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
self.engine = OnnxEngine(model_bytes)
self.model_height, self.model_width = self.engine.get_input_shape()