fix tensor rt engine

2026-06-22 10:41:12 +00:00 · 2025-03-28 14:50:43 +02:00
parent 5b89a21b36
commit 06a23525a6
16 changed files with 272 additions and 94 deletions
@@ -33,6 +33,7 @@ class WeatherMode(Enum):
    Wint = 20
    Night = 40

+
 class AnnotationClass:
    def __init__(self, id, name, color):
        self.id = id
@@ -1,8 +1,7 @@
 import cv2
 import numpy as np
-
-from onnx_engine import InferenceEngine
-from dto import AnnotationClass, Annotation, Detection
+from inference.dto import Annotation, Detection, AnnotationClass
+from inference.onnx_engine import InferenceEngine


 class Inference:
@@ -22,15 +22,19 @@ class InferenceEngine(abc.ABC):
        pass


-
 class OnnxEngine(InferenceEngine):
-    def __init__(self, model_path: str, batch_size: int = 1, **kwargs):
-        self.model_path = model_path
+    def __init__(self, model_bytes, batch_size: int = 1, **kwargs):
        self.batch_size = batch_size
-        self.session = onnx.InferenceSession(model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+        self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
        self.model_inputs = self.session.get_inputs()
        self.input_name = self.model_inputs[0].name
        self.input_shape = self.model_inputs[0].shape
+        if self.input_shape[0] != -1:
+            self.batch_size = self.input_shape[0]
+        model_meta = self.session.get_modelmeta()
+        print("Metadata:", model_meta.custom_metadata_map)
+        self.class_names = eval(model_meta.custom_metadata_map["names"])
+        pass

    def get_input_shape(self) -> Tuple[int, int]:
        shape = self.input_shape
@@ -1,20 +0,0 @@
-from onnx_engine import OnnxEngine
-from tensorrt_engine import TensorRTEngine
-from inference import Inference
-
-if __name__ == "__main__":
-    # Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4),
-    #            confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
-    # detection for the first 200sec of video:
-    # onnxInference: 81 sec, 6.3Gb VRAM
-    # tensorrt: 54 sec, 3.7Gb VRAM
-
-    # Inference(TensorRTEngine('azaion-2025-03-10_int8.engine', batch_size=16),
-    #          confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
-    # INT8 for 200sec: 54 sec 3.7Gb
-
-    # Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8),
-    #           confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
-
-    Inference(TensorRTEngine('azaion-2025-03-10-half_batch4.engine', batch_size=4),
-               confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
@@ -1,46 +1,48 @@
+import re
+import struct
+import subprocess
 from pathlib import Path
 from typing import List, Tuple
 import json
 import numpy as np
 import tensorrt as trt
 import pycuda.driver as cuda
+from inference.onnx_engine import InferenceEngine
 import pycuda.autoinit # required for automatically initialize CUDA, do not remove.

-from onnx_engine import InferenceEngine
-

 class TensorRTEngine(InferenceEngine):
-    def __init__(self, model_path: str, batch_size: int = 4, **kwargs):
-        self.model_path = model_path
+    def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs):
        self.batch_size = batch_size

        try:
            logger = trt.Logger(trt.Logger.WARNING)

-            with open(model_path, 'rb') as f:
-                metadata_len = int.from_bytes(f.read(4), byteorder='little', signed=True)
-                metadata_bytes = f.read(metadata_len)
-                try:
-                    self.metadata = json.loads(metadata_bytes)
-                    print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
-                except json.JSONDecodeError:
-                    print(f"Failed to parse metadata: {metadata_bytes}")
-                    self.metadata = {}
-                engine_data = f.read()
+            metadata_len = struct.unpack("<I", model_bytes[:4])[0]
+            try:
+                self.metadata = json.loads(model_bytes[4:4 + metadata_len])
+                self.class_names = self.metadata['names']
+                print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
+            except json.JSONDecodeError as err:
+                print(f"Failed to parse metadata")
+                return
+            engine_data = model_bytes[4 + metadata_len:]

            runtime = trt.Runtime(logger)
            self.engine = runtime.deserialize_cuda_engine(engine_data)

            if self.engine is None:
-                raise RuntimeError(f"Failed to load TensorRT engine from {model_path}")
+                raise RuntimeError(f"Failed to load TensorRT engine!")

            self.context = self.engine.create_execution_context()

            # input
            self.input_name = self.engine.get_tensor_name(0)
            engine_input_shape = self.engine.get_tensor_shape(self.input_name)
+            if engine_input_shape[0] != -1:
+                self.batch_size = engine_input_shape[0]
            self.input_shape = [
-                batch_size if engine_input_shape[0] == -1 else engine_input_shape[0],
+                self.batch_size,
                engine_input_shape[1],  # Channels (usually fixed at 3 for RGB)
                1280 if engine_input_shape[2] == -1 else engine_input_shape[2],  # Height
                1280 if engine_input_shape[3] == -1 else engine_input_shape[3]  # Width