Changed to prepare onnx
ci/woodpecker/manual/e2e-smoke-jetson Pipeline was successful

This commit is contained in:
Roman Meshko
2026-04-26 23:14:08 +03:00
parent 4ec9633902
commit 73c9d57827
5 changed files with 158 additions and 5 deletions
+1
View File
@@ -6,6 +6,7 @@ python-multipart==0.0.22
Cython==3.2.4 Cython==3.2.4
opencv-python==4.10.0.84 opencv-python==4.10.0.84
numpy==1.26.4 numpy==1.26.4
onnx==1.17.0
pynvml==12.0.0 pynvml==12.0.0
requests==2.32.4 requests==2.32.4
loguru==0.7.3 loguru==0.7.3
+22 -2
View File
@@ -44,6 +44,10 @@ class EngineFactory:
def build_and_cache(self, bytes source_bytes, LoaderHttpClient loader_client, str models_dir): def build_and_cache(self, bytes source_bytes, LoaderHttpClient loader_client, str models_dir):
cdef LoadResult res cdef LoadResult res
engine_bytes, engine_filename = self.build_from_source(source_bytes, loader_client, models_dir) engine_bytes, engine_filename = self.build_from_source(source_bytes, loader_client, models_dir)
if engine_bytes is None:
raise RuntimeError("TensorRT conversion failed: no engine bytes produced")
if engine_filename is None:
raise RuntimeError("TensorRT conversion failed: engine filename could not be resolved")
res = loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir) res = loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir)
if res.err is not None: if res.err is not None:
constants_inf.log(f"Failed to upload converted model: {res.err}") constants_inf.log(f"Failed to upload converted model: {res.err}")
@@ -93,6 +97,22 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
return JetsonTensorRTEngine(model_bytes) return JetsonTensorRTEngine(model_bytes)
def load_engine(self, LoaderHttpClient loader_client, str models_dir):
cdef str filename
cdef LoadResult res
from engines.tensorrt_engine import TensorRTEngine
for precision in ("int8", "fp16"):
filename = TensorRTEngine.get_engine_filename(precision)
if filename is None:
continue
try:
res = loader_client.load_big_small_resource(filename, models_dir)
if res.err is None:
return self.create(res.data)
except Exception:
pass
return None
def _get_ai_engine_filename(self): def _get_ai_engine_filename(self):
from engines.tensorrt_engine import TensorRTEngine from engines.tensorrt_engine import TensorRTEngine
return TensorRTEngine.get_engine_filename("int8") return TensorRTEngine.get_engine_filename("int8")
@@ -100,5 +120,5 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir): def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir):
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
from engines.tensorrt_engine import TensorRTEngine from engines.tensorrt_engine import TensorRTEngine
engine_bytes = JetsonTensorRTEngine.convert_from_source(onnx_bytes, loader_client, models_dir) engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision(onnx_bytes, loader_client, models_dir)
return engine_bytes, TensorRTEngine.get_engine_filename("int8") return engine_bytes, TensorRTEngine.get_engine_filename(precision)
+10 -1
View File
@@ -8,10 +8,19 @@ from loader_http_client cimport LoaderHttpClient, LoadResult
cdef class JetsonTensorRTEngine(TensorRTEngine): cdef class JetsonTensorRTEngine(TensorRTEngine):
@staticmethod @staticmethod
def convert_from_source(bytes onnx_model, LoaderHttpClient loader_client, str models_dir): def convert_from_source(bytes onnx_model, LoaderHttpClient loader_client, str models_dir):
engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision(
onnx_model, loader_client, models_dir
)
return engine_bytes
@staticmethod
def convert_from_source_with_precision(bytes onnx_model, LoaderHttpClient loader_client, str models_dir):
cdef str calib_cache_path cdef str calib_cache_path
calib_cache_path = JetsonTensorRTEngine._download_calib_cache(loader_client, models_dir) calib_cache_path = JetsonTensorRTEngine._download_calib_cache(loader_client, models_dir)
try: try:
return TensorRTEngine.convert_from_source(onnx_model, calib_cache_path) engine_bytes = TensorRTEngine.convert_from_source(onnx_model, calib_cache_path, True)
precision = "int8" if calib_cache_path is not None else "fp16"
return engine_bytes, precision
finally: finally:
if calib_cache_path is not None: if calib_cache_path is not None:
try: try:
+111
View File
@@ -0,0 +1,111 @@
import ast
import io
import onnx
from onnx import helper, numpy_helper
_REDUCE_OPS_WITH_AXES_INPUT = {
"ReduceL1",
"ReduceL2",
"ReduceLogSum",
"ReduceLogSumExp",
"ReduceMax",
"ReduceMean",
"ReduceMin",
"ReduceProd",
"ReduceSum",
"ReduceSumSquare",
}
def _metadata(model):
return {p.key: p.value for p in model.metadata_props}
def _input_size(model):
try:
imgsz = _metadata(model).get("imgsz")
parsed = ast.literal_eval(imgsz)
if isinstance(parsed, (list, tuple)) and len(parsed) == 2:
h, w = int(parsed[0]), int(parsed[1])
if h > 0 and w > 0:
return h, w
except Exception:
pass
return 1280, 1280
def _constant_values(graph):
values = {init.name: numpy_helper.to_array(init) for init in graph.initializer}
for node in graph.node:
if node.op_type != "Constant" or not node.output:
continue
for attr in node.attribute:
if attr.name == "value":
values[node.output[0]] = numpy_helper.to_array(attr.t)
break
return values
def _as_int_list(value):
if value is None:
return None
if getattr(value, "shape", ()) == ():
return [int(value)]
return [int(v) for v in value.reshape(-1).tolist()]
def _set_static_input_shape(model, batch=1):
h, w = _input_size(model)
for graph_input in model.graph.input:
tensor_type = graph_input.type.tensor_type
if tensor_type.elem_type != onnx.TensorProto.FLOAT:
continue
dims = tensor_type.shape.dim
if len(dims) != 4:
continue
for dim, value in zip(dims, (batch, 3, h, w)):
dim.dim_value = value
return True
return False
def _rewrite_reduce_axes_inputs(model):
constants = _constant_values(model.graph)
changed = False
for node in model.graph.node:
if node.op_type not in _REDUCE_OPS_WITH_AXES_INPUT or len(node.input) < 2:
continue
axes = _as_int_list(constants.get(node.input[1]))
if axes is None:
continue
kept_attrs = [attr for attr in node.attribute if attr.name != "axes"]
del node.attribute[:]
node.attribute.extend(kept_attrs)
node.attribute.extend([helper.make_attribute("axes", axes)])
del node.input[1:]
changed = True
return changed
def _cap_default_opset(model, max_opset=17):
for opset in model.opset_import:
if opset.domain in ("", "ai.onnx") and opset.version > max_opset:
opset.version = max_opset
return True
return False
def prepare_for_tensorrt(model_bytes):
model = onnx.load_model_from_string(model_bytes)
changed = False
changed = _set_static_input_shape(model) or changed
changed = _rewrite_reduce_axes_inputs(model) or changed
changed = _cap_default_opset(model) or changed
if not changed:
return model_bytes
buffer = io.BytesIO()
onnx.save_model(model, buffer)
return buffer.getvalue()
+14 -2
View File
@@ -114,13 +114,21 @@ cdef class TensorRTEngine(InferenceEngine):
return None return None
@staticmethod @staticmethod
def convert_from_source(bytes onnx_model, str calib_cache_path=None): def convert_from_source(bytes onnx_model, str calib_cache_path=None, bint force_static_input=False):
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0) gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
workspace_bytes = int(gpu_mem * 0.9) workspace_bytes = int(gpu_mem * 0.9)
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
trt_logger = trt.Logger(trt.Logger.WARNING) trt_logger = trt.Logger(trt.Logger.WARNING)
if force_static_input:
try:
from engines.onnx_tensorrt_compat import prepare_for_tensorrt
onnx_model = prepare_for_tensorrt(onnx_model)
constants_inf.log(<str>'Prepared ONNX model for TensorRT static Jetson build')
except Exception as e:
constants_inf.logerror(<str>f'ONNX TensorRT compatibility preparation failed: {str(e)}')
with trt.Builder(trt_logger) as builder, \ with trt.Builder(trt_logger) as builder, \
builder.create_network(explicit_batch_flag) as network, \ builder.create_network(explicit_batch_flag) as network, \
trt.OnnxParser(network, trt_logger) as parser, \ trt.OnnxParser(network, trt_logger) as parser, \
@@ -129,6 +137,8 @@ cdef class TensorRTEngine(InferenceEngine):
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes) config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
if not parser.parse(onnx_model): if not parser.parse(onnx_model):
for i in range(parser.num_errors):
constants_inf.logerror(<str>f'TensorRT ONNX parser error: {parser.get_error(i)}')
return None return None
input_tensor = network.get_input(0) input_tensor = network.get_input(0)
@@ -137,7 +147,9 @@ cdef class TensorRTEngine(InferenceEngine):
H = max(shape[2], 1280) if shape[2] != -1 else 1280 H = max(shape[2], 1280) if shape[2] != -1 else 1280
W = max(shape[3], 1280) if shape[3] != -1 else 1280 W = max(shape[3], 1280) if shape[3] != -1 else 1280
if shape[0] == -1: if force_static_input:
input_tensor.shape = (1, C, H, W)
elif shape[0] == -1 or shape[2] == -1 or shape[3] == -1:
max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W) max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
profile = builder.create_optimization_profile() profile = builder.create_optimization_profile()
profile.set_shape( profile.set_shape(