diff --git a/Azaion.Inference/inference.pyx b/Azaion.Inference/inference.pyx index 48925de..d505e60 100644 --- a/Azaion.Inference/inference.pyx +++ b/Azaion.Inference/inference.pyx @@ -16,7 +16,7 @@ from hardware_service cimport HardwareService from security cimport Security if HardwareService.has_nvidia_gpu(): - from tensorrt_engine import TensorRTEngine + from tensorrt_engine cimport TensorRTEngine else: from onnx_engine import OnnxEngine diff --git a/Azaion.Inference/onnx_engine.pyx b/Azaion.Inference/onnx_engine.pyx index de68d73..ed8cdeb 100644 --- a/Azaion.Inference/onnx_engine.pyx +++ b/Azaion.Inference/onnx_engine.pyx @@ -14,11 +14,11 @@ cdef class OnnxEngine(InferenceEngine): model_meta = self.session.get_modelmeta() print("Metadata:", model_meta.custom_metadata_map) - cpdef tuple get_input_shape(self): + cdef tuple get_input_shape(self): shape = self.input_shape return shape[2], shape[3] - cpdef int get_batch_size(self): + cdef int get_batch_size(self): return self.batch_size cpdef run(self, input_data): diff --git a/Azaion.Inference/tensorrt_engine.pxd b/Azaion.Inference/tensorrt_engine.pxd index 6fc31bd..90d8b79 100644 --- a/Azaion.Inference/tensorrt_engine.pxd +++ b/Azaion.Inference/tensorrt_engine.pxd @@ -16,9 +16,17 @@ cdef class TensorRTEngine(InferenceEngine): cdef object stream + @staticmethod + cdef get_gpu_memory_bytes(int device_id) - cpdef tuple get_input_shape(self) + @staticmethod + cdef get_engine_filename(int device_id) - cpdef int get_batch_size(self) + @staticmethod + cdef convert_from_onnx(bytes onnx_model) - cpdef run(self, input_data) + cdef tuple get_input_shape(self) + + cdef int get_batch_size(self) + + cdef run(self, input_data) diff --git a/Azaion.Inference/tensorrt_engine.pyx b/Azaion.Inference/tensorrt_engine.pyx index d8c4189..c7cf1a5 100644 --- a/Azaion.Inference/tensorrt_engine.pyx +++ b/Azaion.Inference/tensorrt_engine.pyx @@ -56,7 +56,7 @@ cdef class TensorRTEngine(InferenceEngine): raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}") @staticmethod - def get_gpu_memory_bytes(int device_id): + cdef get_gpu_memory_bytes(int device_id): total_memory = None try: pynvml.nvmlInit() @@ -73,7 +73,7 @@ cdef class TensorRTEngine(InferenceEngine): return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb @staticmethod - def get_engine_filename(int device_id): + cdef get_engine_filename(int device_id): try: device = cuda.Device(device_id) sm_count = device.multiprocessor_count @@ -83,7 +83,7 @@ cdef class TensorRTEngine(InferenceEngine): return None @staticmethod - def convert_from_onnx(bytes onnx_model): + cdef convert_from_onnx(bytes onnx_model): workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes(0) * 0.9) explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) @@ -112,13 +112,13 @@ cdef class TensorRTEngine(InferenceEngine): constants.log('conversion done!') return bytes(plan) - cpdef tuple get_input_shape(self): + cdef tuple get_input_shape(self): return self.input_shape[2], self.input_shape[3] - cpdef int get_batch_size(self): + cdef int get_batch_size(self): return self.batch_size - cpdef run(self, input_data): + cdef run(self, input_data): try: cuda.memcpy_htod_async(self.d_input, input_data, self.stream) self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer