Refactor type casting in Cython files for improved clarity and consistency

- Updated various Cython files to explicitly cast types, enhancing type safety and readability. - Adjusted the `engine_name` property in `InferenceEngine` and its subclasses to be set directly in the constructor. - Modified the `request` method in `_SessionWithBase` to accept `*args` for better flexibility. - Ensured proper type casting for return values in methods across multiple classes, including `Inference`, `CoreMLEngine`, and `TensorRTEngine`. These changes aim to streamline the codebase and improve maintainability by enforcing consistent type usage.
2026-04-22 13:06:33 +00:00 · 2026-03-30 06:17:16 +03:00
parent 3b30a17e11
commit fc57d677b4
16 changed files with 676 additions and 63 deletions
@@ -40,7 +40,7 @@ tensor_gpu_index = _check_tensor_gpu_index()

 def _select_engine_class():
    if tensor_gpu_index > -1:
-        from engines.tensorrt_engine import TensorRTEngine
+        from engines.tensorrt_engine import TensorRTEngine  # pyright: ignore[reportMissingImports]
        return TensorRTEngine
    if _is_apple_silicon():
        from engines.coreml_engine import CoreMLEngine
@@ -28,10 +28,7 @@ cdef class CoreMLEngine(InferenceEngine):
        self.batch_size = 1

        constants_inf.log(<str>f'CoreML model: {self.img_width}x{self.img_height}')
-
-    @property
-    def engine_name(self):
-        return "coreml"
+        self.engine_name = <str>"coreml"

    @staticmethod
    def get_engine_filename():
@@ -49,10 +46,10 @@ cdef class CoreMLEngine(InferenceEngine):
        raise ValueError("No .mlpackage or .mlmodel found in zip")

    cdef tuple get_input_shape(self):
-        return self.img_height, self.img_width
+        return <tuple>(self.img_height, self.img_width)

    cdef int get_batch_size(self):
-        return 1
+        return <int>1

    cdef run(self, input_data):
        cdef int w = self.img_width
@@ -4,6 +4,7 @@ import numpy as np

 cdef class InferenceEngine:
    cdef public int batch_size
-    cdef tuple get_input_shape(self)
-    cdef int get_batch_size(self)
-    cdef run(self, input_data)
+    cdef public str engine_name
+    cdef tuple get_input_shape(self)  # type: ignore
+    cdef int get_batch_size(self)  # type: ignore
+    cdef run(self, input_data)  # type: ignore
@@ -1,10 +1,7 @@
 cdef class InferenceEngine:
    def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs):
        self.batch_size = batch_size
-
-    @property
-    def engine_name(self):
-        return "onnx"
+        self.engine_name = <str>"onnx"

    @staticmethod
    def get_engine_filename():
@@ -22,7 +19,7 @@ cdef class InferenceEngine:
        raise NotImplementedError("Subclass must implement get_input_shape")

    cdef int get_batch_size(self):
-        return self.batch_size
+        return <int>self.batch_size

    cdef run(self, input_data):
        raise NotImplementedError("Subclass must implement run")
@@ -36,15 +36,15 @@ cdef class OnnxEngine(InferenceEngine):

    cdef tuple get_input_shape(self):
        shape = self.input_shape
-        return shape[2], shape[3]
+        return <tuple>(shape[2], shape[3])

    cdef int get_batch_size(self):
-        return self.batch_size
+        return <int>self.batch_size

    cdef run(self, input_data):
        try:
-            return self.session.run(None, {self.input_name: input_data})
+            return self.session.run(None, {self.input_name: input_data})  # type: ignore[attr-defined]
        except Exception:
            if self._cpu_session is not None:
-                return self._cpu_session.run(None, {self.input_name: input_data})
+                return self._cpu_session.run(None, {self.input_name: input_data})  # type: ignore[attr-defined]
            raise
@@ -8,7 +8,7 @@ cdef class TensorRTEngine(InferenceEngine):
    cdef public object d_input
    cdef public object d_output
    cdef str input_name
-    cdef object input_shape
+    cdef list input_shape

    cdef object h_output
    cdef str output_name
@@ -1,7 +1,7 @@
 from engines.inference_engine cimport InferenceEngine
-import tensorrt as trt
-import pycuda.driver as cuda
-import pycuda.autoinit  # required for automatically initialize CUDA, do not remove.
+import tensorrt as trt  # pyright: ignore[reportMissingImports]
+import pycuda.driver as cuda  # pyright: ignore[reportMissingImports]
+import pycuda.autoinit  # pyright: ignore[reportMissingImports]  # required for automatically initialize CUDA, do not remove.
 import pynvml
 import numpy as np
 cimport constants_inf
@@ -54,6 +54,7 @@ cdef class TensorRTEngine(InferenceEngine):

        except Exception as e:
            raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
+        self.engine_name = <str>"tensorrt"

    @staticmethod
    def get_gpu_memory_bytes(int device_id):
@@ -72,10 +73,6 @@ cdef class TensorRTEngine(InferenceEngine):
                pass
        return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb

-    @property
-    def engine_name(self):
-        return "tensorrt"
-
    @staticmethod
    def get_engine_filename():
        try:
@@ -123,23 +120,22 @@ cdef class TensorRTEngine(InferenceEngine):
            return bytes(plan)

    cdef tuple get_input_shape(self):
-        return self.input_shape[2], self.input_shape[3]
+        return <tuple>(self.input_shape[2], self.input_shape[3])

    cdef int get_batch_size(self):
-        return self.batch_size
+        return <int>self.batch_size

    cdef run(self, input_data):
        try:
            cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
-            self.context.set_tensor_address(self.input_name, int(self.d_input))  # input buffer
-            self.context.set_tensor_address(self.output_name, int(self.d_output))  # output buffer
+            self.context.set_tensor_address(self.input_name, int(self.d_input))  # type: ignore
+            self.context.set_tensor_address(self.output_name, int(self.d_output))  # type: ignore

-            self.context.execute_async_v3(stream_handle=self.stream.handle)
-            self.stream.synchronize()
+            self.context.execute_async_v3(stream_handle=self.stream.handle)  # type: ignore
+            self.stream.synchronize()  # type: ignore

-            # Fix: Remove the stream parameter from memcpy_dtoh
            cuda.memcpy_dtoh(self.h_output, self.d_output)
-            output = self.h_output.reshape(self.output_shape)
+            output = self.h_output.reshape(self.output_shape)  # type: ignore
            return [output]

        except Exception as e: