mirror of
https://github.com/azaion/annotations.git
synced 2026-04-22 10:36:30 +00:00
separate load functionality from inference client to loader client. Call loader client from inference to get the model.
remove dummy dlls, remove resource loader from c#. TODO: Load dlls separately by Loader UI and loader client WIP
This commit is contained in:
@@ -1,29 +1,54 @@
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
cimport constants
|
||||
from remote_command cimport RemoteCommand
|
||||
from annotation cimport Detection, Annotation
|
||||
from ai_config cimport AIRecognitionConfig
|
||||
from hardware_service cimport HardwareService
|
||||
from security cimport Security
|
||||
import pynvml
|
||||
|
||||
if HardwareService.has_nvidia_gpu():
|
||||
cdef int tensor_gpu_index
|
||||
|
||||
cdef int check_tensor_gpu_index():
|
||||
try:
|
||||
pynvml.nvmlInit()
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
|
||||
if deviceCount == 0:
|
||||
print('No NVIDIA GPUs found.')
|
||||
return -1
|
||||
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
|
||||
|
||||
if major > 6 or (major == 6 and minor >= 1):
|
||||
print('found NVIDIA GPU!')
|
||||
return i
|
||||
|
||||
print('NVIDIA GPU doesnt support TensorRT!')
|
||||
return -1
|
||||
|
||||
except pynvml.NVMLError:
|
||||
return -1
|
||||
finally:
|
||||
try:
|
||||
pynvml.nvmlShutdown()
|
||||
except:
|
||||
print('Failed to shutdown pynvml cause probably no NVidia GPU')
|
||||
pass
|
||||
|
||||
tensor_gpu_index = check_tensor_gpu_index()
|
||||
if tensor_gpu_index > -1:
|
||||
from tensorrt_engine import TensorRTEngine
|
||||
else:
|
||||
from onnx_engine import OnnxEngine
|
||||
|
||||
|
||||
cdef class Inference:
|
||||
def __init__(self, api_client, on_annotation):
|
||||
self.api_client = api_client
|
||||
def __init__(self, loader_client, on_annotation):
|
||||
self.loader_client = loader_client
|
||||
self.on_annotation = on_annotation
|
||||
self.stop_signal = False
|
||||
self.model_input = None
|
||||
@@ -33,27 +58,26 @@ cdef class Inference:
|
||||
self.is_building_engine = False
|
||||
|
||||
cdef build_tensor_engine(self, object updater_callback):
|
||||
is_nvidia = HardwareService.has_nvidia_gpu()
|
||||
if not is_nvidia:
|
||||
if not tensor_gpu_index == -1:
|
||||
return
|
||||
|
||||
engine_filename = TensorRTEngine.get_engine_filename(0)
|
||||
key = Security.get_model_encryption_key()
|
||||
models_dir = constants.MODELS_FOLDER
|
||||
|
||||
self.is_building_engine = True
|
||||
updater_callback('downloading')
|
||||
if self.api_client.load_big_small_resource(engine_filename, models_dir, key):
|
||||
|
||||
if self.loader_client.load_big_small_resource(engine_filename, models_dir):
|
||||
print('tensor rt engine is here, no need to build')
|
||||
self.is_building_engine = False
|
||||
return
|
||||
|
||||
# time.sleep(8) # prevent simultaneously loading dll and models
|
||||
updater_callback('converting')
|
||||
onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
|
||||
onnx_model = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
|
||||
model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
|
||||
updater_callback('uploading')
|
||||
self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
|
||||
self.loader_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir)
|
||||
print(f'uploaded {engine_filename} to CDN and API')
|
||||
self.is_building_engine = False
|
||||
|
||||
@@ -61,17 +85,16 @@ cdef class Inference:
|
||||
if self.engine is not None:
|
||||
return
|
||||
|
||||
is_nvidia = HardwareService.has_nvidia_gpu()
|
||||
key = Security.get_model_encryption_key()
|
||||
models_dir = constants.MODELS_FOLDER
|
||||
if is_nvidia:
|
||||
if tensor_gpu_index > -1:
|
||||
while self.is_building_engine:
|
||||
time.sleep(1)
|
||||
engine_filename = TensorRTEngine.get_engine_filename(0)
|
||||
model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
|
||||
|
||||
model_bytes = self.loader_client.load_big_small_resource(engine_filename, models_dir)
|
||||
self.engine = TensorRTEngine(model_bytes)
|
||||
else:
|
||||
model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
|
||||
model_bytes = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
|
||||
self.engine = OnnxEngine(model_bytes)
|
||||
|
||||
self.model_height, self.model_width = self.engine.get_input_shape()
|
||||
|
||||
Reference in New Issue
Block a user