mirror of
https://github.com/azaion/annotations.git
synced 2026-04-23 00:36:31 +00:00
Reapply "import Tensorrt not in compile time in order to dynamically load tensorrt only if nvidia gpu is present"
This reverts commit cf01e5d952.
This commit is contained in:
@@ -16,7 +16,7 @@ from hardware_service cimport HardwareService
|
||||
from security cimport Security
|
||||
|
||||
if HardwareService.has_nvidia_gpu():
|
||||
from tensorrt_engine cimport TensorRTEngine
|
||||
from tensorrt_engine import TensorRTEngine
|
||||
else:
|
||||
from onnx_engine import OnnxEngine
|
||||
|
||||
|
||||
@@ -14,11 +14,11 @@ cdef class OnnxEngine(InferenceEngine):
|
||||
model_meta = self.session.get_modelmeta()
|
||||
print("Metadata:", model_meta.custom_metadata_map)
|
||||
|
||||
cdef tuple get_input_shape(self):
|
||||
cpdef tuple get_input_shape(self):
|
||||
shape = self.input_shape
|
||||
return shape[2], shape[3]
|
||||
|
||||
cdef int get_batch_size(self):
|
||||
cpdef int get_batch_size(self):
|
||||
return self.batch_size
|
||||
|
||||
cpdef run(self, input_data):
|
||||
|
||||
@@ -16,17 +16,9 @@ cdef class TensorRTEngine(InferenceEngine):
|
||||
|
||||
cdef object stream
|
||||
|
||||
@staticmethod
|
||||
cdef get_gpu_memory_bytes(int device_id)
|
||||
|
||||
@staticmethod
|
||||
cdef get_engine_filename(int device_id)
|
||||
cpdef tuple get_input_shape(self)
|
||||
|
||||
@staticmethod
|
||||
cdef convert_from_onnx(bytes onnx_model)
|
||||
cpdef int get_batch_size(self)
|
||||
|
||||
cdef tuple get_input_shape(self)
|
||||
|
||||
cdef int get_batch_size(self)
|
||||
|
||||
cdef run(self, input_data)
|
||||
cpdef run(self, input_data)
|
||||
|
||||
@@ -56,7 +56,7 @@ cdef class TensorRTEngine(InferenceEngine):
|
||||
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
|
||||
|
||||
@staticmethod
|
||||
cdef get_gpu_memory_bytes(int device_id):
|
||||
def get_gpu_memory_bytes(int device_id):
|
||||
total_memory = None
|
||||
try:
|
||||
pynvml.nvmlInit()
|
||||
@@ -73,7 +73,7 @@ cdef class TensorRTEngine(InferenceEngine):
|
||||
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb
|
||||
|
||||
@staticmethod
|
||||
cdef get_engine_filename(int device_id):
|
||||
def get_engine_filename(int device_id):
|
||||
try:
|
||||
device = cuda.Device(device_id)
|
||||
sm_count = device.multiprocessor_count
|
||||
@@ -83,7 +83,7 @@ cdef class TensorRTEngine(InferenceEngine):
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
cdef convert_from_onnx(bytes onnx_model):
|
||||
def convert_from_onnx(bytes onnx_model):
|
||||
workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes(0) * 0.9)
|
||||
|
||||
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
|
||||
@@ -112,13 +112,13 @@ cdef class TensorRTEngine(InferenceEngine):
|
||||
constants.log('conversion done!')
|
||||
return bytes(plan)
|
||||
|
||||
cdef tuple get_input_shape(self):
|
||||
cpdef tuple get_input_shape(self):
|
||||
return self.input_shape[2], self.input_shape[3]
|
||||
|
||||
cdef int get_batch_size(self):
|
||||
cpdef int get_batch_size(self):
|
||||
return self.batch_size
|
||||
|
||||
cdef run(self, input_data):
|
||||
cpdef run(self, input_data):
|
||||
try:
|
||||
cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
|
||||
self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer
|
||||
|
||||
Reference in New Issue
Block a user