From 73c9d57827217b479db10cf930af218283944adf Mon Sep 17 00:00:00 2001 From: Roman Meshko Date: Sun, 26 Apr 2026 23:14:08 +0300 Subject: [PATCH] Changed to prepare onnx --- requirements-jetson.txt | 1 + src/engines/engine_factory.pyx | 24 +++++- src/engines/jetson_tensorrt_engine.pyx | 11 ++- src/engines/onnx_tensorrt_compat.py | 111 +++++++++++++++++++++++++ src/engines/tensorrt_engine.pyx | 16 +++- 5 files changed, 158 insertions(+), 5 deletions(-) create mode 100644 src/engines/onnx_tensorrt_compat.py diff --git a/requirements-jetson.txt b/requirements-jetson.txt index 03e58da..79fb685 100644 --- a/requirements-jetson.txt +++ b/requirements-jetson.txt @@ -6,6 +6,7 @@ python-multipart==0.0.22 Cython==3.2.4 opencv-python==4.10.0.84 numpy==1.26.4 +onnx==1.17.0 pynvml==12.0.0 requests==2.32.4 loguru==0.7.3 diff --git a/src/engines/engine_factory.pyx b/src/engines/engine_factory.pyx index df01aa6..beb921c 100644 --- a/src/engines/engine_factory.pyx +++ b/src/engines/engine_factory.pyx @@ -44,6 +44,10 @@ class EngineFactory: def build_and_cache(self, bytes source_bytes, LoaderHttpClient loader_client, str models_dir): cdef LoadResult res engine_bytes, engine_filename = self.build_from_source(source_bytes, loader_client, models_dir) + if engine_bytes is None: + raise RuntimeError("TensorRT conversion failed: no engine bytes produced") + if engine_filename is None: + raise RuntimeError("TensorRT conversion failed: engine filename could not be resolved") res = loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir) if res.err is not None: constants_inf.log(f"Failed to upload converted model: {res.err}") @@ -93,6 +97,22 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory): from engines.jetson_tensorrt_engine import JetsonTensorRTEngine return JetsonTensorRTEngine(model_bytes) + def load_engine(self, LoaderHttpClient loader_client, str models_dir): + cdef str filename + cdef LoadResult res + from engines.tensorrt_engine import TensorRTEngine + for precision in ("int8", "fp16"): + filename = TensorRTEngine.get_engine_filename(precision) + if filename is None: + continue + try: + res = loader_client.load_big_small_resource(filename, models_dir) + if res.err is None: + return self.create(res.data) + except Exception: + pass + return None + def _get_ai_engine_filename(self): from engines.tensorrt_engine import TensorRTEngine return TensorRTEngine.get_engine_filename("int8") @@ -100,5 +120,5 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory): def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir): from engines.jetson_tensorrt_engine import JetsonTensorRTEngine from engines.tensorrt_engine import TensorRTEngine - engine_bytes = JetsonTensorRTEngine.convert_from_source(onnx_bytes, loader_client, models_dir) - return engine_bytes, TensorRTEngine.get_engine_filename("int8") + engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision(onnx_bytes, loader_client, models_dir) + return engine_bytes, TensorRTEngine.get_engine_filename(precision) diff --git a/src/engines/jetson_tensorrt_engine.pyx b/src/engines/jetson_tensorrt_engine.pyx index 0826f32..a554d12 100644 --- a/src/engines/jetson_tensorrt_engine.pyx +++ b/src/engines/jetson_tensorrt_engine.pyx @@ -8,10 +8,19 @@ from loader_http_client cimport LoaderHttpClient, LoadResult cdef class JetsonTensorRTEngine(TensorRTEngine): @staticmethod def convert_from_source(bytes onnx_model, LoaderHttpClient loader_client, str models_dir): + engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision( + onnx_model, loader_client, models_dir + ) + return engine_bytes + + @staticmethod + def convert_from_source_with_precision(bytes onnx_model, LoaderHttpClient loader_client, str models_dir): cdef str calib_cache_path calib_cache_path = JetsonTensorRTEngine._download_calib_cache(loader_client, models_dir) try: - return TensorRTEngine.convert_from_source(onnx_model, calib_cache_path) + engine_bytes = TensorRTEngine.convert_from_source(onnx_model, calib_cache_path, True) + precision = "int8" if calib_cache_path is not None else "fp16" + return engine_bytes, precision finally: if calib_cache_path is not None: try: diff --git a/src/engines/onnx_tensorrt_compat.py b/src/engines/onnx_tensorrt_compat.py new file mode 100644 index 0000000..5db5ac5 --- /dev/null +++ b/src/engines/onnx_tensorrt_compat.py @@ -0,0 +1,111 @@ +import ast +import io + +import onnx +from onnx import helper, numpy_helper + + +_REDUCE_OPS_WITH_AXES_INPUT = { + "ReduceL1", + "ReduceL2", + "ReduceLogSum", + "ReduceLogSumExp", + "ReduceMax", + "ReduceMean", + "ReduceMin", + "ReduceProd", + "ReduceSum", + "ReduceSumSquare", +} + + +def _metadata(model): + return {p.key: p.value for p in model.metadata_props} + + +def _input_size(model): + try: + imgsz = _metadata(model).get("imgsz") + parsed = ast.literal_eval(imgsz) + if isinstance(parsed, (list, tuple)) and len(parsed) == 2: + h, w = int(parsed[0]), int(parsed[1]) + if h > 0 and w > 0: + return h, w + except Exception: + pass + return 1280, 1280 + + +def _constant_values(graph): + values = {init.name: numpy_helper.to_array(init) for init in graph.initializer} + for node in graph.node: + if node.op_type != "Constant" or not node.output: + continue + for attr in node.attribute: + if attr.name == "value": + values[node.output[0]] = numpy_helper.to_array(attr.t) + break + return values + + +def _as_int_list(value): + if value is None: + return None + if getattr(value, "shape", ()) == (): + return [int(value)] + return [int(v) for v in value.reshape(-1).tolist()] + + +def _set_static_input_shape(model, batch=1): + h, w = _input_size(model) + for graph_input in model.graph.input: + tensor_type = graph_input.type.tensor_type + if tensor_type.elem_type != onnx.TensorProto.FLOAT: + continue + dims = tensor_type.shape.dim + if len(dims) != 4: + continue + for dim, value in zip(dims, (batch, 3, h, w)): + dim.dim_value = value + return True + return False + + +def _rewrite_reduce_axes_inputs(model): + constants = _constant_values(model.graph) + changed = False + for node in model.graph.node: + if node.op_type not in _REDUCE_OPS_WITH_AXES_INPUT or len(node.input) < 2: + continue + axes = _as_int_list(constants.get(node.input[1])) + if axes is None: + continue + kept_attrs = [attr for attr in node.attribute if attr.name != "axes"] + del node.attribute[:] + node.attribute.extend(kept_attrs) + node.attribute.extend([helper.make_attribute("axes", axes)]) + del node.input[1:] + changed = True + return changed + + +def _cap_default_opset(model, max_opset=17): + for opset in model.opset_import: + if opset.domain in ("", "ai.onnx") and opset.version > max_opset: + opset.version = max_opset + return True + return False + + +def prepare_for_tensorrt(model_bytes): + model = onnx.load_model_from_string(model_bytes) + changed = False + changed = _set_static_input_shape(model) or changed + changed = _rewrite_reduce_axes_inputs(model) or changed + changed = _cap_default_opset(model) or changed + if not changed: + return model_bytes + + buffer = io.BytesIO() + onnx.save_model(model, buffer) + return buffer.getvalue() diff --git a/src/engines/tensorrt_engine.pyx b/src/engines/tensorrt_engine.pyx index 3d05cd6..cad0b3d 100644 --- a/src/engines/tensorrt_engine.pyx +++ b/src/engines/tensorrt_engine.pyx @@ -114,13 +114,21 @@ cdef class TensorRTEngine(InferenceEngine): return None @staticmethod - def convert_from_source(bytes onnx_model, str calib_cache_path=None): + def convert_from_source(bytes onnx_model, str calib_cache_path=None, bint force_static_input=False): gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0) workspace_bytes = int(gpu_mem * 0.9) explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) trt_logger = trt.Logger(trt.Logger.WARNING) + if force_static_input: + try: + from engines.onnx_tensorrt_compat import prepare_for_tensorrt + onnx_model = prepare_for_tensorrt(onnx_model) + constants_inf.log('Prepared ONNX model for TensorRT static Jetson build') + except Exception as e: + constants_inf.logerror(f'ONNX TensorRT compatibility preparation failed: {str(e)}') + with trt.Builder(trt_logger) as builder, \ builder.create_network(explicit_batch_flag) as network, \ trt.OnnxParser(network, trt_logger) as parser, \ @@ -129,6 +137,8 @@ cdef class TensorRTEngine(InferenceEngine): config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes) if not parser.parse(onnx_model): + for i in range(parser.num_errors): + constants_inf.logerror(f'TensorRT ONNX parser error: {parser.get_error(i)}') return None input_tensor = network.get_input(0) @@ -137,7 +147,9 @@ cdef class TensorRTEngine(InferenceEngine): H = max(shape[2], 1280) if shape[2] != -1 else 1280 W = max(shape[3], 1280) if shape[3] != -1 else 1280 - if shape[0] == -1: + if force_static_input: + input_tensor.shape = (1, C, H, W) + elif shape[0] == -1 or shape[2] == -1 or shape[3] == -1: max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W) profile = builder.create_optimization_profile() profile.set_shape(