Changed to prepare onnx

2026-06-23 06:21:09 +00:00 · 2026-04-26 23:14:08 +03:00
parent 4ec9633902
commit 73c9d57827
5 changed files with 158 additions and 5 deletions
@@ -6,6 +6,7 @@ python-multipart==0.0.22
 Cython==3.2.4
 opencv-python==4.10.0.84
 numpy==1.26.4
 onnx==1.17.0
 pynvml==12.0.0
 requests==2.32.4
 loguru==0.7.3
@@ -44,6 +44,10 @@ class EngineFactory:
    def build_and_cache(self, bytes source_bytes, LoaderHttpClient loader_client, str models_dir):
        cdef LoadResult res
        engine_bytes, engine_filename = self.build_from_source(source_bytes, loader_client, models_dir)
        if engine_bytes is None:
            raise RuntimeError("TensorRT conversion failed: no engine bytes produced")
        if engine_filename is None:
            raise RuntimeError("TensorRT conversion failed: engine filename could not be resolved")
        res = loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir)
        if res.err is not None:
            constants_inf.log(f"Failed to upload converted model: {res.err}")
@@ -93,6 +97,22 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
        from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
        return JetsonTensorRTEngine(model_bytes)
    def load_engine(self, LoaderHttpClient loader_client, str models_dir):
        cdef str filename
        cdef LoadResult res
        from engines.tensorrt_engine import TensorRTEngine
        for precision in ("int8", "fp16"):
            filename = TensorRTEngine.get_engine_filename(precision)
            if filename is None:
                continue
            try:
                res = loader_client.load_big_small_resource(filename, models_dir)
                if res.err is None:
                    return self.create(res.data)
            except Exception:
                pass
        return None
    def _get_ai_engine_filename(self):
        from engines.tensorrt_engine import TensorRTEngine
        return TensorRTEngine.get_engine_filename("int8")
@@ -100,5 +120,5 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
    def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir):
        from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
        from engines.tensorrt_engine import TensorRTEngine
-        engine_bytes = JetsonTensorRTEngine.convert_from_source(onnx_bytes, loader_client, models_dir)
+        engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision(onnx_bytes, loader_client, models_dir)
-        return engine_bytes, TensorRTEngine.get_engine_filename("int8")
+        return engine_bytes, TensorRTEngine.get_engine_filename(precision)
@@ -8,10 +8,19 @@ from loader_http_client cimport LoaderHttpClient, LoadResult
 cdef class JetsonTensorRTEngine(TensorRTEngine):
    @staticmethod
    def convert_from_source(bytes onnx_model, LoaderHttpClient loader_client, str models_dir):
        engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision(
            onnx_model, loader_client, models_dir
        )
        return engine_bytes
    @staticmethod
    def convert_from_source_with_precision(bytes onnx_model, LoaderHttpClient loader_client, str models_dir):
        cdef str calib_cache_path
        calib_cache_path = JetsonTensorRTEngine._download_calib_cache(loader_client, models_dir)
        try:
-            return TensorRTEngine.convert_from_source(onnx_model, calib_cache_path)
+            engine_bytes = TensorRTEngine.convert_from_source(onnx_model, calib_cache_path, True)
            precision = "int8" if calib_cache_path is not None else "fp16"
            return engine_bytes, precision
        finally:
            if calib_cache_path is not None:
                try:
@@ -0,0 +1,111 @@
 import ast
 import io
 import onnx
 from onnx import helper, numpy_helper
 _REDUCE_OPS_WITH_AXES_INPUT = {
    "ReduceL1",
    "ReduceL2",
    "ReduceLogSum",
    "ReduceLogSumExp",
    "ReduceMax",
    "ReduceMean",
    "ReduceMin",
    "ReduceProd",
    "ReduceSum",
    "ReduceSumSquare",
 }
 def _metadata(model):
    return {p.key: p.value for p in model.metadata_props}
 def _input_size(model):
    try:
        imgsz = _metadata(model).get("imgsz")
        parsed = ast.literal_eval(imgsz)
        if isinstance(parsed, (list, tuple)) and len(parsed) == 2:
            h, w = int(parsed[0]), int(parsed[1])
            if h > 0 and w > 0:
                return h, w
    except Exception:
        pass
    return 1280, 1280
 def _constant_values(graph):
    values = {init.name: numpy_helper.to_array(init) for init in graph.initializer}
    for node in graph.node:
        if node.op_type != "Constant" or not node.output:
            continue
        for attr in node.attribute:
            if attr.name == "value":
                values[node.output[0]] = numpy_helper.to_array(attr.t)
                break
    return values
 def _as_int_list(value):
    if value is None:
        return None
    if getattr(value, "shape", ()) == ():
        return [int(value)]
    return [int(v) for v in value.reshape(-1).tolist()]
 def _set_static_input_shape(model, batch=1):
    h, w = _input_size(model)
    for graph_input in model.graph.input:
        tensor_type = graph_input.type.tensor_type
        if tensor_type.elem_type != onnx.TensorProto.FLOAT:
            continue
        dims = tensor_type.shape.dim
        if len(dims) != 4:
            continue
        for dim, value in zip(dims, (batch, 3, h, w)):
            dim.dim_value = value
        return True
    return False
 def _rewrite_reduce_axes_inputs(model):
    constants = _constant_values(model.graph)
    changed = False
    for node in model.graph.node:
        if node.op_type not in _REDUCE_OPS_WITH_AXES_INPUT or len(node.input) < 2:
            continue
        axes = _as_int_list(constants.get(node.input[1]))
        if axes is None:
            continue
        kept_attrs = [attr for attr in node.attribute if attr.name != "axes"]
        del node.attribute[:]
        node.attribute.extend(kept_attrs)
        node.attribute.extend([helper.make_attribute("axes", axes)])
        del node.input[1:]
        changed = True
    return changed
 def _cap_default_opset(model, max_opset=17):
    for opset in model.opset_import:
        if opset.domain in ("", "ai.onnx") and opset.version > max_opset:
            opset.version = max_opset
            return True
    return False
 def prepare_for_tensorrt(model_bytes):
    model = onnx.load_model_from_string(model_bytes)
    changed = False
    changed = _set_static_input_shape(model) or changed
    changed = _rewrite_reduce_axes_inputs(model) or changed
    changed = _cap_default_opset(model) or changed
    if not changed:
        return model_bytes
    buffer = io.BytesIO()
    onnx.save_model(model, buffer)
    return buffer.getvalue()
@@ -114,13 +114,21 @@ cdef class TensorRTEngine(InferenceEngine):
            return None
    @staticmethod
-    def convert_from_source(bytes onnx_model, str calib_cache_path=None):
+    def convert_from_source(bytes onnx_model, str calib_cache_path=None, bint force_static_input=False):
        gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
        workspace_bytes = int(gpu_mem * 0.9)
        explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
        trt_logger = trt.Logger(trt.Logger.WARNING)
        if force_static_input:
            try:
                from engines.onnx_tensorrt_compat import prepare_for_tensorrt
                onnx_model = prepare_for_tensorrt(onnx_model)
                constants_inf.log(<str>'Prepared ONNX model for TensorRT static Jetson build')
            except Exception as e:
                constants_inf.logerror(<str>f'ONNX TensorRT compatibility preparation failed: {str(e)}')
        with trt.Builder(trt_logger) as builder, \
                builder.create_network(explicit_batch_flag) as network, \
                trt.OnnxParser(network, trt_logger) as parser, \
@@ -129,6 +137,8 @@ cdef class TensorRTEngine(InferenceEngine):
            config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
            if not parser.parse(onnx_model):
                for i in range(parser.num_errors):
                    constants_inf.logerror(<str>f'TensorRT ONNX parser error: {parser.get_error(i)}')
                return None
            input_tensor = network.get_input(0)
@@ -137,7 +147,9 @@ cdef class TensorRTEngine(InferenceEngine):
            H = max(shape[2], 1280) if shape[2] != -1 else 1280
            W = max(shape[3], 1280) if shape[3] != -1 else 1280
-            if shape[0] == -1:
+            if force_static_input:
                input_tensor.shape = (1, C, H, W)
            elif shape[0] == -1 or shape[2] == -1 or shape[3] == -1:
                max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
                profile = builder.create_optimization_profile()
                profile.set_shape(