import Tensorrt not in compile time in order to dynamically load tensorrt only if nvidia gpu is present

This commit is contained in:
Alex Bezdieniezhnykh
2025-04-30 23:08:53 +03:00
parent ae83bc8542
commit 1c4bdabfb5
4 changed files with 12 additions and 20 deletions
+1 -1
View File
@@ -16,7 +16,7 @@ from hardware_service cimport HardwareService
from security cimport Security from security cimport Security
if HardwareService.has_nvidia_gpu(): if HardwareService.has_nvidia_gpu():
from tensorrt_engine cimport TensorRTEngine from tensorrt_engine import TensorRTEngine
else: else:
from onnx_engine import OnnxEngine from onnx_engine import OnnxEngine
+2 -2
View File
@@ -14,11 +14,11 @@ cdef class OnnxEngine(InferenceEngine):
model_meta = self.session.get_modelmeta() model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map) print("Metadata:", model_meta.custom_metadata_map)
cdef tuple get_input_shape(self): cpdef tuple get_input_shape(self):
shape = self.input_shape shape = self.input_shape
return shape[2], shape[3] return shape[2], shape[3]
cdef int get_batch_size(self): cpdef int get_batch_size(self):
return self.batch_size return self.batch_size
cpdef run(self, input_data): cpdef run(self, input_data):
+3 -11
View File
@@ -16,17 +16,9 @@ cdef class TensorRTEngine(InferenceEngine):
cdef object stream cdef object stream
@staticmethod
cdef get_gpu_memory_bytes(int device_id)
@staticmethod cpdef tuple get_input_shape(self)
cdef get_engine_filename(int device_id)
@staticmethod cpdef int get_batch_size(self)
cdef convert_from_onnx(bytes onnx_model)
cdef tuple get_input_shape(self) cpdef run(self, input_data)
cdef int get_batch_size(self)
cdef run(self, input_data)
+6 -6
View File
@@ -56,7 +56,7 @@ cdef class TensorRTEngine(InferenceEngine):
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}") raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
@staticmethod @staticmethod
cdef get_gpu_memory_bytes(int device_id): def get_gpu_memory_bytes(int device_id):
total_memory = None total_memory = None
try: try:
pynvml.nvmlInit() pynvml.nvmlInit()
@@ -73,7 +73,7 @@ cdef class TensorRTEngine(InferenceEngine):
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb
@staticmethod @staticmethod
cdef get_engine_filename(int device_id): def get_engine_filename(int device_id):
try: try:
device = cuda.Device(device_id) device = cuda.Device(device_id)
sm_count = device.multiprocessor_count sm_count = device.multiprocessor_count
@@ -83,7 +83,7 @@ cdef class TensorRTEngine(InferenceEngine):
return None return None
@staticmethod @staticmethod
cdef convert_from_onnx(bytes onnx_model): def convert_from_onnx(bytes onnx_model):
workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes(0) * 0.9) workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes(0) * 0.9)
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
@@ -112,13 +112,13 @@ cdef class TensorRTEngine(InferenceEngine):
constants.log('conversion done!') constants.log('conversion done!')
return bytes(plan) return bytes(plan)
cdef tuple get_input_shape(self): cpdef tuple get_input_shape(self):
return self.input_shape[2], self.input_shape[3] return self.input_shape[2], self.input_shape[3]
cdef int get_batch_size(self): cpdef int get_batch_size(self):
return self.batch_size return self.batch_size
cdef run(self, input_data): cpdef run(self, input_data):
try: try:
cuda.memcpy_htod_async(self.d_input, input_data, self.stream) cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer