autoconvert tensor rt engine from onnx to specific CUDA gpu

This commit is contained in:
Alex Bezdieniezhnykh
2025-04-24 16:30:21 +03:00
parent e798af470b
commit e9a44e368d
14 changed files with 25 additions and 44 deletions
+6 -12
View File
@@ -22,9 +22,6 @@ cdef class InferenceEngine:
cpdef run(self, input_data):
raise NotImplementedError("Subclass must implement run")
cdef get_class_names(self):
raise NotImplementedError("Subclass must implement get_class_names")
cdef class OnnxEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs):
@@ -39,7 +36,6 @@ cdef class OnnxEngine(InferenceEngine):
print(f'AI detection model input: {self.model_inputs} {self.input_shape}')
model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map)
self.class_names = eval(model_meta.custom_metadata_map["names"])
cdef tuple get_input_shape(self):
shape = self.input_shape
@@ -48,9 +44,6 @@ cdef class OnnxEngine(InferenceEngine):
cdef int get_batch_size(self):
return self.batch_size
cdef get_class_names(self):
return self.class_names
cpdef run(self, input_data):
return self.session.run(None, {self.input_name: input_data})
@@ -69,10 +62,14 @@ cdef class TensorRTEngine(InferenceEngine):
raise RuntimeError(f"Failed to load TensorRT engine from bytes")
self.context = engine.create_execution_context()
# input
self.input_name = engine.get_tensor_name(0)
engine_input_shape = engine.get_tensor_shape(self.input_name)
self.batch_size = self.input_shape[0] if self.input_shape[0] != -1 else batch_size
if engine_input_shape[0] != -1:
self.batch_size = engine_input_shape[0]
else:
self.batch_size = batch_size
self.input_shape = [
self.batch_size,
@@ -154,7 +151,7 @@ cdef class TensorRTEngine(InferenceEngine):
if plan is None:
print('Conversion failed.')
return None
print('conversion done!')
return bytes(plan)
cdef tuple get_input_shape(self):
@@ -163,9 +160,6 @@ cdef class TensorRTEngine(InferenceEngine):
cdef int get_batch_size(self):
return self.batch_size
cdef get_class_names(self):
return self.class_names
cpdef run(self, input_data):
try:
cuda.memcpy_htod_async(self.d_input, input_data, self.stream)