Refactor type casting in Cython files for improved clarity and consistency

- Updated various Cython files to explicitly cast types, enhancing type safety and readability.
- Adjusted the `engine_name` property in `InferenceEngine` and its subclasses to be set directly in the constructor.
- Modified the `request` method in `_SessionWithBase` to accept `*args` for better flexibility.
- Ensured proper type casting for return values in methods across multiple classes, including `Inference`, `CoreMLEngine`, and `TensorRTEngine`.

These changes aim to streamline the codebase and improve maintainability by enforcing consistent type usage.
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-30 06:17:16 +03:00
parent 3b30a17e11
commit fc57d677b4
16 changed files with 676 additions and 63 deletions
+11 -15
View File
@@ -1,7 +1,7 @@
from engines.inference_engine cimport InferenceEngine
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit # required for automatically initialize CUDA, do not remove.
import tensorrt as trt # pyright: ignore[reportMissingImports]
import pycuda.driver as cuda # pyright: ignore[reportMissingImports]
import pycuda.autoinit # pyright: ignore[reportMissingImports] # required for automatically initialize CUDA, do not remove.
import pynvml
import numpy as np
cimport constants_inf
@@ -54,6 +54,7 @@ cdef class TensorRTEngine(InferenceEngine):
except Exception as e:
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
self.engine_name = <str>"tensorrt"
@staticmethod
def get_gpu_memory_bytes(int device_id):
@@ -72,10 +73,6 @@ cdef class TensorRTEngine(InferenceEngine):
pass
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb
@property
def engine_name(self):
return "tensorrt"
@staticmethod
def get_engine_filename():
try:
@@ -123,23 +120,22 @@ cdef class TensorRTEngine(InferenceEngine):
return bytes(plan)
cdef tuple get_input_shape(self):
return self.input_shape[2], self.input_shape[3]
return <tuple>(self.input_shape[2], self.input_shape[3])
cdef int get_batch_size(self):
return self.batch_size
return <int>self.batch_size
cdef run(self, input_data):
try:
cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer
self.context.set_tensor_address(self.output_name, int(self.d_output)) # output buffer
self.context.set_tensor_address(self.input_name, int(self.d_input)) # type: ignore
self.context.set_tensor_address(self.output_name, int(self.d_output)) # type: ignore
self.context.execute_async_v3(stream_handle=self.stream.handle)
self.stream.synchronize()
self.context.execute_async_v3(stream_handle=self.stream.handle) # type: ignore
self.stream.synchronize() # type: ignore
# Fix: Remove the stream parameter from memcpy_dtoh
cuda.memcpy_dtoh(self.h_output, self.d_output)
output = self.h_output.reshape(self.output_shape)
output = self.h_output.reshape(self.output_shape) # type: ignore
return [output]
except Exception as e: