import re import struct import subprocess from pathlib import Path from typing import List, Tuple import json import numpy as np import tensorrt as trt import pycuda.driver as cuda from inference.onnx_engine import InferenceEngine import pycuda.autoinit # required for automatically initialize CUDA, do not remove. class TensorRTEngine(InferenceEngine): def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs): self.batch_size = batch_size try: logger = trt.Logger(trt.Logger.WARNING) metadata_len = struct.unpack(" Tuple[int, int]: return self.input_shape[2], self.input_shape[3] def get_batch_size(self) -> int: return self.batch_size # In tensorrt_engine.py, modify the run method: def run(self, input_data: np.ndarray) -> List[np.ndarray]: try: cuda.memcpy_htod_async(self.d_input, input_data, self.stream) self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer self.context.set_tensor_address(self.output_name, int(self.d_output)) # output buffer self.context.execute_async_v3(stream_handle=self.stream.handle) self.stream.synchronize() # Fix: Remove the stream parameter from memcpy_dtoh cuda.memcpy_dtoh(self.h_output, self.d_output) output = self.h_output.reshape(self.output_shape) return [output] except Exception as e: raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}")