fix tensor rt engine

This commit is contained in:
zxsanny
2025-03-28 14:50:43 +02:00
committed by Alex Bezdieniezhnykh
parent 5b89a21b36
commit 06a23525a6
16 changed files with 272 additions and 94 deletions
View File
+1
View File
@@ -33,6 +33,7 @@ class WeatherMode(Enum):
Wint = 20
Night = 40
class AnnotationClass:
def __init__(self, id, name, color):
self.id = id
+2 -3
View File
@@ -1,8 +1,7 @@
import cv2
import numpy as np
from onnx_engine import InferenceEngine
from dto import AnnotationClass, Annotation, Detection
from inference.dto import Annotation, Detection, AnnotationClass
from inference.onnx_engine import InferenceEngine
class Inference:
+8 -4
View File
@@ -22,15 +22,19 @@ class InferenceEngine(abc.ABC):
pass
class OnnxEngine(InferenceEngine):
def __init__(self, model_path: str, batch_size: int = 1, **kwargs):
self.model_path = model_path
def __init__(self, model_bytes, batch_size: int = 1, **kwargs):
self.batch_size = batch_size
self.session = onnx.InferenceSession(model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
self.model_inputs = self.session.get_inputs()
self.input_name = self.model_inputs[0].name
self.input_shape = self.model_inputs[0].shape
if self.input_shape[0] != -1:
self.batch_size = self.input_shape[0]
model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map)
self.class_names = eval(model_meta.custom_metadata_map["names"])
pass
def get_input_shape(self) -> Tuple[int, int]:
shape = self.input_shape
-20
View File
@@ -1,20 +0,0 @@
from onnx_engine import OnnxEngine
from tensorrt_engine import TensorRTEngine
from inference import Inference
if __name__ == "__main__":
# Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
# detection for the first 200sec of video:
# onnxInference: 81 sec, 6.3Gb VRAM
# tensorrt: 54 sec, 3.7Gb VRAM
# Inference(TensorRTEngine('azaion-2025-03-10_int8.engine', batch_size=16),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
# INT8 for 200sec: 54 sec 3.7Gb
# Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
Inference(TensorRTEngine('azaion-2025-03-10-half_batch4.engine', batch_size=4),
confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
+18 -16
View File
@@ -1,46 +1,48 @@
import re
import struct
import subprocess
from pathlib import Path
from typing import List, Tuple
import json
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
from inference.onnx_engine import InferenceEngine
import pycuda.autoinit # required for automatically initialize CUDA, do not remove.
from onnx_engine import InferenceEngine
class TensorRTEngine(InferenceEngine):
def __init__(self, model_path: str, batch_size: int = 4, **kwargs):
self.model_path = model_path
def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs):
self.batch_size = batch_size
try:
logger = trt.Logger(trt.Logger.WARNING)
with open(model_path, 'rb') as f:
metadata_len = int.from_bytes(f.read(4), byteorder='little', signed=True)
metadata_bytes = f.read(metadata_len)
try:
self.metadata = json.loads(metadata_bytes)
print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
except json.JSONDecodeError:
print(f"Failed to parse metadata: {metadata_bytes}")
self.metadata = {}
engine_data = f.read()
metadata_len = struct.unpack("<I", model_bytes[:4])[0]
try:
self.metadata = json.loads(model_bytes[4:4 + metadata_len])
self.class_names = self.metadata['names']
print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
except json.JSONDecodeError as err:
print(f"Failed to parse metadata")
return
engine_data = model_bytes[4 + metadata_len:]
runtime = trt.Runtime(logger)
self.engine = runtime.deserialize_cuda_engine(engine_data)
if self.engine is None:
raise RuntimeError(f"Failed to load TensorRT engine from {model_path}")
raise RuntimeError(f"Failed to load TensorRT engine!")
self.context = self.engine.create_execution_context()
# input
self.input_name = self.engine.get_tensor_name(0)
engine_input_shape = self.engine.get_tensor_shape(self.input_name)
if engine_input_shape[0] != -1:
self.batch_size = engine_input_shape[0]
self.input_shape = [
batch_size if engine_input_shape[0] == -1 else engine_input_shape[0],
self.batch_size,
engine_input_shape[1], # Channels (usually fixed at 3 for RGB)
1280 if engine_input_shape[2] == -1 else engine_input_shape[2], # Height
1280 if engine_input_shape[3] == -1 else engine_input_shape[3] # Width