mirror of
https://github.com/azaion/detections.git
synced 2026-06-23 05:41:09 +00:00
This commit is contained in:
@@ -6,6 +6,7 @@ python-multipart==0.0.22
|
|||||||
Cython==3.2.4
|
Cython==3.2.4
|
||||||
opencv-python==4.10.0.84
|
opencv-python==4.10.0.84
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
|
onnx==1.17.0
|
||||||
pynvml==12.0.0
|
pynvml==12.0.0
|
||||||
requests==2.32.4
|
requests==2.32.4
|
||||||
loguru==0.7.3
|
loguru==0.7.3
|
||||||
|
|||||||
@@ -44,6 +44,10 @@ class EngineFactory:
|
|||||||
def build_and_cache(self, bytes source_bytes, LoaderHttpClient loader_client, str models_dir):
|
def build_and_cache(self, bytes source_bytes, LoaderHttpClient loader_client, str models_dir):
|
||||||
cdef LoadResult res
|
cdef LoadResult res
|
||||||
engine_bytes, engine_filename = self.build_from_source(source_bytes, loader_client, models_dir)
|
engine_bytes, engine_filename = self.build_from_source(source_bytes, loader_client, models_dir)
|
||||||
|
if engine_bytes is None:
|
||||||
|
raise RuntimeError("TensorRT conversion failed: no engine bytes produced")
|
||||||
|
if engine_filename is None:
|
||||||
|
raise RuntimeError("TensorRT conversion failed: engine filename could not be resolved")
|
||||||
res = loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir)
|
res = loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir)
|
||||||
if res.err is not None:
|
if res.err is not None:
|
||||||
constants_inf.log(f"Failed to upload converted model: {res.err}")
|
constants_inf.log(f"Failed to upload converted model: {res.err}")
|
||||||
@@ -93,6 +97,22 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
|
|||||||
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
|
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
|
||||||
return JetsonTensorRTEngine(model_bytes)
|
return JetsonTensorRTEngine(model_bytes)
|
||||||
|
|
||||||
|
def load_engine(self, LoaderHttpClient loader_client, str models_dir):
|
||||||
|
cdef str filename
|
||||||
|
cdef LoadResult res
|
||||||
|
from engines.tensorrt_engine import TensorRTEngine
|
||||||
|
for precision in ("int8", "fp16"):
|
||||||
|
filename = TensorRTEngine.get_engine_filename(precision)
|
||||||
|
if filename is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
res = loader_client.load_big_small_resource(filename, models_dir)
|
||||||
|
if res.err is None:
|
||||||
|
return self.create(res.data)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
def _get_ai_engine_filename(self):
|
def _get_ai_engine_filename(self):
|
||||||
from engines.tensorrt_engine import TensorRTEngine
|
from engines.tensorrt_engine import TensorRTEngine
|
||||||
return TensorRTEngine.get_engine_filename("int8")
|
return TensorRTEngine.get_engine_filename("int8")
|
||||||
@@ -100,5 +120,5 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory):
|
|||||||
def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir):
|
def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir):
|
||||||
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
|
from engines.jetson_tensorrt_engine import JetsonTensorRTEngine
|
||||||
from engines.tensorrt_engine import TensorRTEngine
|
from engines.tensorrt_engine import TensorRTEngine
|
||||||
engine_bytes = JetsonTensorRTEngine.convert_from_source(onnx_bytes, loader_client, models_dir)
|
engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision(onnx_bytes, loader_client, models_dir)
|
||||||
return engine_bytes, TensorRTEngine.get_engine_filename("int8")
|
return engine_bytes, TensorRTEngine.get_engine_filename(precision)
|
||||||
|
|||||||
@@ -8,10 +8,19 @@ from loader_http_client cimport LoaderHttpClient, LoadResult
|
|||||||
cdef class JetsonTensorRTEngine(TensorRTEngine):
|
cdef class JetsonTensorRTEngine(TensorRTEngine):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def convert_from_source(bytes onnx_model, LoaderHttpClient loader_client, str models_dir):
|
def convert_from_source(bytes onnx_model, LoaderHttpClient loader_client, str models_dir):
|
||||||
|
engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision(
|
||||||
|
onnx_model, loader_client, models_dir
|
||||||
|
)
|
||||||
|
return engine_bytes
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def convert_from_source_with_precision(bytes onnx_model, LoaderHttpClient loader_client, str models_dir):
|
||||||
cdef str calib_cache_path
|
cdef str calib_cache_path
|
||||||
calib_cache_path = JetsonTensorRTEngine._download_calib_cache(loader_client, models_dir)
|
calib_cache_path = JetsonTensorRTEngine._download_calib_cache(loader_client, models_dir)
|
||||||
try:
|
try:
|
||||||
return TensorRTEngine.convert_from_source(onnx_model, calib_cache_path)
|
engine_bytes = TensorRTEngine.convert_from_source(onnx_model, calib_cache_path, True)
|
||||||
|
precision = "int8" if calib_cache_path is not None else "fp16"
|
||||||
|
return engine_bytes, precision
|
||||||
finally:
|
finally:
|
||||||
if calib_cache_path is not None:
|
if calib_cache_path is not None:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -0,0 +1,111 @@
|
|||||||
|
import ast
|
||||||
|
import io
|
||||||
|
|
||||||
|
import onnx
|
||||||
|
from onnx import helper, numpy_helper
|
||||||
|
|
||||||
|
|
||||||
|
_REDUCE_OPS_WITH_AXES_INPUT = {
|
||||||
|
"ReduceL1",
|
||||||
|
"ReduceL2",
|
||||||
|
"ReduceLogSum",
|
||||||
|
"ReduceLogSumExp",
|
||||||
|
"ReduceMax",
|
||||||
|
"ReduceMean",
|
||||||
|
"ReduceMin",
|
||||||
|
"ReduceProd",
|
||||||
|
"ReduceSum",
|
||||||
|
"ReduceSumSquare",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _metadata(model):
|
||||||
|
return {p.key: p.value for p in model.metadata_props}
|
||||||
|
|
||||||
|
|
||||||
|
def _input_size(model):
|
||||||
|
try:
|
||||||
|
imgsz = _metadata(model).get("imgsz")
|
||||||
|
parsed = ast.literal_eval(imgsz)
|
||||||
|
if isinstance(parsed, (list, tuple)) and len(parsed) == 2:
|
||||||
|
h, w = int(parsed[0]), int(parsed[1])
|
||||||
|
if h > 0 and w > 0:
|
||||||
|
return h, w
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return 1280, 1280
|
||||||
|
|
||||||
|
|
||||||
|
def _constant_values(graph):
|
||||||
|
values = {init.name: numpy_helper.to_array(init) for init in graph.initializer}
|
||||||
|
for node in graph.node:
|
||||||
|
if node.op_type != "Constant" or not node.output:
|
||||||
|
continue
|
||||||
|
for attr in node.attribute:
|
||||||
|
if attr.name == "value":
|
||||||
|
values[node.output[0]] = numpy_helper.to_array(attr.t)
|
||||||
|
break
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
def _as_int_list(value):
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if getattr(value, "shape", ()) == ():
|
||||||
|
return [int(value)]
|
||||||
|
return [int(v) for v in value.reshape(-1).tolist()]
|
||||||
|
|
||||||
|
|
||||||
|
def _set_static_input_shape(model, batch=1):
|
||||||
|
h, w = _input_size(model)
|
||||||
|
for graph_input in model.graph.input:
|
||||||
|
tensor_type = graph_input.type.tensor_type
|
||||||
|
if tensor_type.elem_type != onnx.TensorProto.FLOAT:
|
||||||
|
continue
|
||||||
|
dims = tensor_type.shape.dim
|
||||||
|
if len(dims) != 4:
|
||||||
|
continue
|
||||||
|
for dim, value in zip(dims, (batch, 3, h, w)):
|
||||||
|
dim.dim_value = value
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _rewrite_reduce_axes_inputs(model):
|
||||||
|
constants = _constant_values(model.graph)
|
||||||
|
changed = False
|
||||||
|
for node in model.graph.node:
|
||||||
|
if node.op_type not in _REDUCE_OPS_WITH_AXES_INPUT or len(node.input) < 2:
|
||||||
|
continue
|
||||||
|
axes = _as_int_list(constants.get(node.input[1]))
|
||||||
|
if axes is None:
|
||||||
|
continue
|
||||||
|
kept_attrs = [attr for attr in node.attribute if attr.name != "axes"]
|
||||||
|
del node.attribute[:]
|
||||||
|
node.attribute.extend(kept_attrs)
|
||||||
|
node.attribute.extend([helper.make_attribute("axes", axes)])
|
||||||
|
del node.input[1:]
|
||||||
|
changed = True
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def _cap_default_opset(model, max_opset=17):
|
||||||
|
for opset in model.opset_import:
|
||||||
|
if opset.domain in ("", "ai.onnx") and opset.version > max_opset:
|
||||||
|
opset.version = max_opset
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_for_tensorrt(model_bytes):
|
||||||
|
model = onnx.load_model_from_string(model_bytes)
|
||||||
|
changed = False
|
||||||
|
changed = _set_static_input_shape(model) or changed
|
||||||
|
changed = _rewrite_reduce_axes_inputs(model) or changed
|
||||||
|
changed = _cap_default_opset(model) or changed
|
||||||
|
if not changed:
|
||||||
|
return model_bytes
|
||||||
|
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
onnx.save_model(model, buffer)
|
||||||
|
return buffer.getvalue()
|
||||||
@@ -114,13 +114,21 @@ cdef class TensorRTEngine(InferenceEngine):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def convert_from_source(bytes onnx_model, str calib_cache_path=None):
|
def convert_from_source(bytes onnx_model, str calib_cache_path=None, bint force_static_input=False):
|
||||||
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
|
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
|
||||||
workspace_bytes = int(gpu_mem * 0.9)
|
workspace_bytes = int(gpu_mem * 0.9)
|
||||||
|
|
||||||
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
|
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
|
||||||
trt_logger = trt.Logger(trt.Logger.WARNING)
|
trt_logger = trt.Logger(trt.Logger.WARNING)
|
||||||
|
|
||||||
|
if force_static_input:
|
||||||
|
try:
|
||||||
|
from engines.onnx_tensorrt_compat import prepare_for_tensorrt
|
||||||
|
onnx_model = prepare_for_tensorrt(onnx_model)
|
||||||
|
constants_inf.log(<str>'Prepared ONNX model for TensorRT static Jetson build')
|
||||||
|
except Exception as e:
|
||||||
|
constants_inf.logerror(<str>f'ONNX TensorRT compatibility preparation failed: {str(e)}')
|
||||||
|
|
||||||
with trt.Builder(trt_logger) as builder, \
|
with trt.Builder(trt_logger) as builder, \
|
||||||
builder.create_network(explicit_batch_flag) as network, \
|
builder.create_network(explicit_batch_flag) as network, \
|
||||||
trt.OnnxParser(network, trt_logger) as parser, \
|
trt.OnnxParser(network, trt_logger) as parser, \
|
||||||
@@ -129,6 +137,8 @@ cdef class TensorRTEngine(InferenceEngine):
|
|||||||
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
|
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
|
||||||
|
|
||||||
if not parser.parse(onnx_model):
|
if not parser.parse(onnx_model):
|
||||||
|
for i in range(parser.num_errors):
|
||||||
|
constants_inf.logerror(<str>f'TensorRT ONNX parser error: {parser.get_error(i)}')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
input_tensor = network.get_input(0)
|
input_tensor = network.get_input(0)
|
||||||
@@ -137,7 +147,9 @@ cdef class TensorRTEngine(InferenceEngine):
|
|||||||
H = max(shape[2], 1280) if shape[2] != -1 else 1280
|
H = max(shape[2], 1280) if shape[2] != -1 else 1280
|
||||||
W = max(shape[3], 1280) if shape[3] != -1 else 1280
|
W = max(shape[3], 1280) if shape[3] != -1 else 1280
|
||||||
|
|
||||||
if shape[0] == -1:
|
if force_static_input:
|
||||||
|
input_tensor.shape = (1, C, H, W)
|
||||||
|
elif shape[0] == -1 or shape[2] == -1 or shape[3] == -1:
|
||||||
max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
|
max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
|
||||||
profile = builder.create_optimization_profile()
|
profile = builder.create_optimization_profile()
|
||||||
profile.set_shape(
|
profile.set_shape(
|
||||||
|
|||||||
Reference in New Issue
Block a user