import json import struct from typing import List, Tuple import numpy as np import onnxruntime as onnx import tensorrt as trt import pycuda.driver as cuda import pycuda.autoinit # required for automatically initialize CUDA, do not remove. cdef class InferenceEngine: def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs): self.batch_size = batch_size cdef tuple get_input_shape(self): raise NotImplementedError("Subclass must implement get_input_shape") cdef int get_batch_size(self): return self.batch_size cpdef run(self, input_data): raise NotImplementedError("Subclass must implement run") cdef get_class_names(self): raise NotImplementedError("Subclass must implement get_class_names") cdef class OnnxEngine(InferenceEngine): def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs): super().__init__(model_bytes, batch_size) self.batch_size = batch_size self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) self.model_inputs = self.session.get_inputs() self.input_name = self.model_inputs[0].name self.input_shape = self.model_inputs[0].shape if self.input_shape[0] != -1: self.batch_size = self.input_shape[0] print(f'AI detection model input: {self.model_inputs} {self.input_shape}') model_meta = self.session.get_modelmeta() print("Metadata:", model_meta.custom_metadata_map) self.class_names = eval(model_meta.custom_metadata_map["names"]) cdef tuple get_input_shape(self): shape = self.input_shape return shape[2], shape[3] cdef int get_batch_size(self): return self.batch_size cdef get_class_names(self): return self.class_names cpdef run(self, input_data): return self.session.run(None, {self.input_name: input_data}) cdef class TensorRTEngine(InferenceEngine): def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs): super().__init__(model_bytes, batch_size) self.batch_size = batch_size print('Enter init TensorRT') try: logger = trt.Logger(trt.Logger.WARNING) metadata_len = struct.unpack("