fix tensor rt engine

This commit is contained in:
zxsanny
2025-03-28 14:50:43 +02:00
committed by Alex Bezdieniezhnykh
parent 5b89a21b36
commit 06a23525a6
16 changed files with 272 additions and 94 deletions
+4 -1
View File
@@ -26,12 +26,15 @@ Linux
python -m venv env
env\Scripts\activate
pip install -r requirements.txt
pip uninstall -y torch
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
```
**3. Fix possible problems**
* cv2.error: OpenCV(4.10.0) ...\window.cpp:1301: error: (-2:Unspecified error)
```
pip uninstall opencv-python
pip uninstall -y opencv-python
pip install opencv-python
```
* fbgemm.dll error (Windows specific)
+58 -1
View File
@@ -1,6 +1,13 @@
import io
import json
from http import HTTPStatus
import requests
import constants
from hardware_service import get_hardware_info
from security import Security
class ApiCredentials:
def __init__(self, url, email, password, folder):
@@ -34,4 +41,54 @@ class Api:
r.raise_for_status()
print(f"Upload {len(file_bytes)} bytes ({filename}) to {self.credentials.url}. Result: {r.status_code}")
except Exception as e:
print(f"Upload fail: {e}")
print(f"Upload fail: {e}")
def load_bytes(self, filename, folder = None):
folder = folder or self.credentials.folder
hardware = get_hardware_info()
if self.token is None:
self.login()
url = f"{self.credentials.url}/resources/get/{folder}"
headers = {
"Authorization": f"Bearer {self.token}",
"Content-Type": "application/json"
}
payload = json.dumps(
{
"password": self.credentials.password,
"hardware": hardware.to_json_object(),
"fileName": filename
}, indent=4)
response = requests.post(url, data=payload, headers=headers, stream=True)
if response.status_code == HTTPStatus.UNAUTHORIZED or response.status_code == HTTPStatus.FORBIDDEN:
self.login()
headers = {
"Authorization": f"Bearer {self.token}",
"Content-Type": "application/json"
}
response = requests.post(url, data=payload, headers=headers, stream=True)
if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
print('500!')
hw_hash = Security.get_hw_hash(hardware)
key = Security.get_api_encryption_key(self.credentials, hw_hash)
resp_bytes = response.raw.read()
data = Security.decrypt_to(resp_bytes, key)
print(f'Downloaded file: {filename}, {len(data)} bytes')
return data
def load_resource(self, big_part, small_part):
with open(big_part, 'rb') as binary_file:
encrypted_bytes_big = binary_file.read()
encrypted_bytes_small = self.load_bytes(small_part)
encrypted_model_bytes = encrypted_bytes_small + encrypted_bytes_big
key = Security.get_model_encryption_key()
model_bytes = Security.decrypt_to(encrypted_model_bytes, key)
return model_bytes
+5 -3
View File
@@ -5,12 +5,14 @@
{ "Id": 3, "Name": "Artillery", "Color": "#80FFFF00" },
{ "Id": 4, "Name": "Shadow", "Color": "#80FF00FF" },
{ "Id": 5, "Name": "Trenches", "Color": "#8000FFFF" },
{ "Id": 6, "Name": "Military-men", "Color": "#80000000" },
{ "Id": 6, "Name": "Military-men", "Color": "#80188021" },
{ "Id": 7, "Name": "Tyre-tracks", "Color": "#80800000" },
{ "Id": 8, "Name": "Additional-armored-tank", "Color": "#80008000" },
{ "Id": 9, "Name": "Smoke", "Color": "#80000080" },
{ "Id": 10, "Name": "Plane", "Color": "#80000080" },
{ "Id": 11, "Name": "Moto", "Color": "#80808000" },
{ "Id": 12, "Name": "Camouflage-net", "Color": "#80800080" },
{ "Id": 13, "Name": "Camouflage-branches", "Color": "#80008080" }
]
{ "Id": 13, "Name": "Camouflage-branches", "Color": "#80008080" },
{ "Id": 14, "Name": "Roof", "Color": "#80008080" },
{ "Id": 15, "Name": "Building", "Color": "#80008080" }
]
+6 -2
View File
@@ -1,5 +1,4 @@
from os import path
from dto.annotationClass import AnnotationClass
azaion = '/azaion'
prefix = 'azaion-'
@@ -26,7 +25,6 @@ datasets_dir = path.join(azaion, 'datasets')
models_dir = path.join(azaion, 'models')
annotation_classes = AnnotationClass.read_json()
date_format = '%Y-%m-%d'
checkpoint_file = 'checkpoint.txt'
checkpoint_date_format = '%Y-%m-%d %H:%M:%S'
@@ -35,3 +33,9 @@ CONFIG_FILE = 'config.yaml'
ANNOTATIONS_QUEUE = 'azaion-annotations'
ANNOTATIONS_CONFIRMED_QUEUE = 'azaion-annotations-confirm'
OFFSET_FILE = 'offset.yaml'
AI_ONNX_MODEL_FILE_BIG = "azaion.onnx.big"
AI_ONNX_MODEL_FILE_SMALL = "azaion.onnx.small"
AI_TENSOR_MODEL_FILE_BIG = "azaion.engine.big"
AI_TENSOR_MODEL_FILE_SMALL = "azaion.engine.small"
+28 -5
View File
@@ -2,12 +2,16 @@ import shutil
from os import path, scandir, makedirs
from pathlib import Path
import random
import netron
import yaml
from ultralytics import YOLO
import constants
from azaion_api import Api, ApiCredentials
from cdn_manager import CDNManager, CDNCredentials
from constants import datasets_dir, processed_images_dir
from security import Security
from utils import Dotdict
def export_rknn(model_path):
@@ -40,6 +44,7 @@ def export_tensorrt(model_path):
nms=True
)
def form_data_sample(destination_path, size=500, write_txt_log=False):
images = []
with scandir(processed_images_dir) as imd:
@@ -62,11 +67,29 @@ def form_data_sample(destination_path, size=500, write_txt_log=False):
with open(path.join(destination_path, 'azaion_subset.txt'), 'w', encoding='utf-8') as f:
f.writelines([f'{line}\n' for line in lines])
def show_model(model: str = None):
netron.start(model)
if __name__ == '__main__':
export_tensorrt('azaion-2025-03-10.pt')
# export_rknn('azaion-2025-03-10.pt')
# export_onnx('azaion-2025-03-10.pt')
def upload_model(model_path: str, filename: str, size_small_in_kb: int=3):
with open(model_path, 'rb') as f_in:
model_bytes = f_in.read()
key = Security.get_model_encryption_key()
model_encrypted = Security.encrypt_to(model_bytes, key)
part1_size = min(size_small_in_kb * 1024, int(0.9 * len(model_encrypted)))
model_part_small = model_encrypted[:part1_size] # slice bytes for part1
model_part_big = model_encrypted[part1_size:]
with open(constants.CONFIG_FILE, "r") as f:
config_dict = yaml.safe_load(f)
d_config = Dotdict(config_dict)
cdn_c = Dotdict(d_config.cdn)
api_c = Dotdict(d_config.api)
cdn_manager = CDNManager(CDNCredentials(cdn_c.host, cdn_c.access_key, cdn_c.secret_key))
cdn_manager.upload(cdn_c.bucket, f'{filename}.big', model_part_big)
api = Api(ApiCredentials(api_c.url, api_c.user, api_c.pw, api_c.folder))
api.upload_file(f'{filename}.small', model_part_small)
+66
View File
@@ -0,0 +1,66 @@
import os
import subprocess
import psutil
class HardwareInfo:
def __init__(self, cpu, gpu, memory, mac_address):
self.cpu = cpu
self.gpu = gpu
self.memory = memory
self.mac_address = mac_address
def to_json_object(self):
return {
"CPU": self.cpu,
"GPU": self.gpu,
"MacAddress": self.mac_address,
"Memory": self.memory
}
def __str__(self):
return f'CPU: {self.cpu}. GPU: {self.gpu}. Memory: {self.memory}. MAC Address: {self.mac_address}'
def get_mac_address(interface="Ethernet"):
addresses = psutil.net_if_addrs()
for interface_name, interface_info in addresses.items():
if interface_name == interface:
for addr in interface_info:
if addr.family == psutil.AF_LINK:
return addr.address.replace('-', '')
return None
def get_hardware_info():
is_windows = os.name == 'nt'
res = subprocess.check_output("ver", shell=True).decode('utf-8')
if "Microsoft Windows" in res:
is_windows = True
else:
is_windows = False
if is_windows:
os_command = (
"powershell -Command \""
"Get-CimInstance -ClassName Win32_Processor | Select-Object -ExpandProperty Name | Write-Output; "
"Get-CimInstance -ClassName Win32_VideoController | Select-Object -ExpandProperty Name | Write-Output; "
"Get-CimInstance -ClassName Win32_OperatingSystem | Select-Object -ExpandProperty TotalVisibleMemorySize | Write-Output"
"\""
)
else:
os_command = (
"/bin/bash -c \" lscpu | grep 'Model name:' | cut -d':' -f2 && "
"lspci | grep VGA | cut -d':' -f3 && "
"free -g | grep Mem: | awk '{print $2}' && \""
)
result = subprocess.check_output(os_command, shell=True).decode('utf-8')
lines = [line.strip() for line in result.splitlines() if line.strip()]
cpu = lines[0].replace("Name=", "").replace(" ", " ")
gpu = lines[1].replace("Name=", "").replace(" ", " ")
memory = lines[2].replace("TotalVisibleMemorySize=", "").replace(" ", " ")
mac_address = get_mac_address()
return HardwareInfo(cpu, gpu, memory, mac_address)
View File
+1
View File
@@ -33,6 +33,7 @@ class WeatherMode(Enum):
Wint = 20
Night = 40
class AnnotationClass:
def __init__(self, id, name, color):
self.id = id
+2 -3
View File
@@ -1,8 +1,7 @@
import cv2
import numpy as np
from onnx_engine import InferenceEngine
from dto import AnnotationClass, Annotation, Detection
from inference.dto import Annotation, Detection, AnnotationClass
from inference.onnx_engine import InferenceEngine
class Inference:
+8 -4
View File
@@ -22,15 +22,19 @@ class InferenceEngine(abc.ABC):
pass
class OnnxEngine(InferenceEngine):
def __init__(self, model_path: str, batch_size: int = 1, **kwargs):
self.model_path = model_path
def __init__(self, model_bytes, batch_size: int = 1, **kwargs):
self.batch_size = batch_size
self.session = onnx.InferenceSession(model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
self.model_inputs = self.session.get_inputs()
self.input_name = self.model_inputs[0].name
self.input_shape = self.model_inputs[0].shape
if self.input_shape[0] != -1:
self.batch_size = self.input_shape[0]
model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map)
self.class_names = eval(model_meta.custom_metadata_map["names"])
pass
def get_input_shape(self) -> Tuple[int, int]:
shape = self.input_shape
-20
View File
@@ -1,20 +0,0 @@
from onnx_engine import OnnxEngine
from tensorrt_engine import TensorRTEngine
from inference import Inference
if __name__ == "__main__":
# Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
# detection for the first 200sec of video:
# onnxInference: 81 sec, 6.3Gb VRAM
# tensorrt: 54 sec, 3.7Gb VRAM
# Inference(TensorRTEngine('azaion-2025-03-10_int8.engine', batch_size=16),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
# INT8 for 200sec: 54 sec 3.7Gb
# Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
Inference(TensorRTEngine('azaion-2025-03-10-half_batch4.engine', batch_size=4),
confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
+18 -16
View File
@@ -1,46 +1,48 @@
import re
import struct
import subprocess
from pathlib import Path
from typing import List, Tuple
import json
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
from inference.onnx_engine import InferenceEngine
import pycuda.autoinit # required for automatically initialize CUDA, do not remove.
from onnx_engine import InferenceEngine
class TensorRTEngine(InferenceEngine):
def __init__(self, model_path: str, batch_size: int = 4, **kwargs):
self.model_path = model_path
def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs):
self.batch_size = batch_size
try:
logger = trt.Logger(trt.Logger.WARNING)
with open(model_path, 'rb') as f:
metadata_len = int.from_bytes(f.read(4), byteorder='little', signed=True)
metadata_bytes = f.read(metadata_len)
try:
self.metadata = json.loads(metadata_bytes)
print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
except json.JSONDecodeError:
print(f"Failed to parse metadata: {metadata_bytes}")
self.metadata = {}
engine_data = f.read()
metadata_len = struct.unpack("<I", model_bytes[:4])[0]
try:
self.metadata = json.loads(model_bytes[4:4 + metadata_len])
self.class_names = self.metadata['names']
print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
except json.JSONDecodeError as err:
print(f"Failed to parse metadata")
return
engine_data = model_bytes[4 + metadata_len:]
runtime = trt.Runtime(logger)
self.engine = runtime.deserialize_cuda_engine(engine_data)
if self.engine is None:
raise RuntimeError(f"Failed to load TensorRT engine from {model_path}")
raise RuntimeError(f"Failed to load TensorRT engine!")
self.context = self.engine.create_execution_context()
# input
self.input_name = self.engine.get_tensor_name(0)
engine_input_shape = self.engine.get_tensor_shape(self.input_name)
if engine_input_shape[0] != -1:
self.batch_size = engine_input_shape[0]
self.input_shape = [
batch_size if engine_input_shape[0] == -1 else engine_input_shape[0],
self.batch_size,
engine_input_shape[1], # Channels (usually fixed at 3 for RGB)
1280 if engine_input_shape[2] == -1 else engine_input_shape[2], # Height
1280 if engine_input_shape[3] == -1 else engine_input_shape[3] # Width
+2 -5
View File
@@ -1,10 +1,6 @@
huggingface_hub
torch
torchvision
torchaudio
ultralytics
albumentations
opencv-python
matplotlib
PyYAML
@@ -17,4 +13,5 @@ msgpack
rstream
onnxruntime-gpu
netron
pycuda
pycuda
tensorrt
+15 -5
View File
@@ -45,14 +45,24 @@ class Security:
return bytes(plaintext_bytes)
@staticmethod
def get_model_encryption_key():
key = '-#%@AzaionKey@%#---234sdfklgvhjbnn'
return Security.calc_hash(key)
@staticmethod
def calc_hash(key):
str_bytes = key.encode('utf-8')
hash_bytes = hashlib.sha384(str_bytes).digest()
h = base64.b64encode(hash_bytes).decode('utf-8')
return h
@staticmethod
def get_hw_hash(hardware):
key = f'Azaion_{hardware.mac_address}_{hardware.cpu}_{hardware.gpu}'
return Security.calc_hash(key)
@staticmethod
def get_api_encryption_key(creds, hardware_hash):
key = f'{creds.email}-{creds.password}-{hardware_hash}-#%@AzaionKey@%#---'
return Security.calc_hash(key)
@staticmethod
def get_model_encryption_key():
key = '-#%@AzaionKey@%#---234sdfklgvhjbnn'
return Security.calc_hash(key)
+49
View File
@@ -0,0 +1,49 @@
import re
import yaml
import constants
from azaion_api import Api, ApiCredentials
from inference.inference import Inference
from inference.onnx_engine import OnnxEngine
from inference.tensorrt_engine import TensorRTEngine
from utils import Dotdict
if __name__ == "__main__":
# Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
# detection for the first 200sec of video:
# onnxInference: 81 sec, 6.3Gb VRAM
# tensorrt: 54 sec, 3.7Gb VRAM
# Inference(TensorRTEngine('azaion-2025-03-10_int8.engine', batch_size=16),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
# INT8 for 200sec: 54 sec 3.7Gb
# Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
with open(constants.CONFIG_FILE, "r") as f:
config_dict = yaml.safe_load(f)
d_config = Dotdict(config_dict)
cdn_c = Dotdict(d_config.cdn)
api_c = Dotdict(d_config.api)
api_client = Api(ApiCredentials(api_c.url, api_c.user, api_c.pw, api_c.folder))
tensor_model_bytes = api_client.load_resource(constants.AI_TENSOR_MODEL_FILE_BIG, constants.AI_TENSOR_MODEL_FILE_SMALL)
onxx_model_bytes = api_client.load_resource(constants.AI_ONNX_MODEL_FILE_BIG, constants.AI_ONNX_MODEL_FILE_SMALL)
input_string2 = "{'0': 'Armored-Vehicle', '1': 'Truck', '2': 'Vehicle', '3': 'Artillery', '4': 'Shadow', '5': 'Trenches', '6': 'Military-men', '7': 'Tyre-tracks', '8': 'Additional-armored-tank', '9': 'Smoke', '10': 'Plane', '11': 'Moto', '12': 'Camouflage-net', '13': 'Camouflage-branches', '14': 'Class-15', '15': 'Class-16', '16': 'Class-17', '17': 'Class-18', '18': 'Class-19', '19': 'Class-20'}"
result_dict2 = eval(input_string2)
try:
input_string3 = "{'0': 'Armored-Vehicle', '1': 'Truck', '2': 'Vehicle', '3': 'Artillery', '4': 'Shadow', '5': 'Trenches', '6': 'Military-men', '7': 'Tyre-tracks', '8': 'Additional-armored-tank', '9': 'Smoke', '10': 'Plane', '11': 'Moto', '12': 'Camouflage-net', '13': 'Camouflage-branches', '14': 'Class-15', '15': 'Class-16', '16': 'Class-17', '17': 'Class-18', '18': 'Class-19', '19': 'Class-20', '20': 'Armored-Vehicle(Wint)', '21': 'Truck(Wint)', '22': 'Vehicle(Wint)', '23': 'Artillery(Wint)', '24': 'Shadow(Wint)', '25': 'Trenches(Wint)', '26': 'Military-men(Wint)', '27': 'Tyre-tracks(Wint)', '28': 'Additional-armored-tank(Wint)', '29': 'Smoke(Wint)', '30': 'Plane(Wint)', '31': 'Moto(Wint)', '32': 'Camouflage-net(Wint)', '33': 'Camouflage-branches(Wint)', '34': 'Class-35', '35': 'Class-36', '36': 'Class-37', '37': 'Class-38', '38': 'Class-39', '39': 'Class-40', '40': 'Armored-Vehicle(Night)', '41': 'Truck(Night)', '42': 'Vehicle(Night)', '43': 'Artillery(Night)', '44': 'Shadow(Night)', '45': 'Trenches(Night)', '46': 'Military-men(Night)', '47': 'Tyre-tracks(Night)', '48': 'Additional-armored-tank(Night)', '49': 'Smoke(Night)', '50': 'Plane(Night)', '51': 'Moto(Night)', '52': 'Camouflage-net(Night)', '53': 'Camouflage-branches(Night)', '54': 'Class-55', '55': 'Class-56', '56': 'Class-57', '57': 'Class-58', '58': 'Class-59', '59': 'Class-60', '60': 'Class-61', '61': 'Class-62', '62': 'Class-63', '63': 'Class-64', '64': 'Class-65', '65': 'Class-66', '66': 'Class-67', '67': 'Class-68', '68': 'Class-69', '69': 'Class-70', '70': 'Class-71', '71': 'Class-72', '72': 'Class-73', '73': 'Class-74', '74': 'Class-75', '75': 'Class-76', '76': 'Class-77', '77': 'Class-78', '78': 'Class-79', '79': 'Class-80'}"
result_dict3 = eval(input_string3)
print(result_dict3)
except Exception as e:
print(e)
# Inference(OnnxEngine(onxx_model_bytes, batch_size=4),
# confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4')
Inference(TensorRTEngine(tensor_model_bytes, batch_size=4),
confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4')
+10 -29
View File
@@ -16,13 +16,14 @@ from azaion_api import ApiCredentials, Api
from cdn_manager import CDNCredentials, CDNManager
from constants import (processed_images_dir,
processed_labels_dir,
annotation_classes,
prefix, date_format,
datasets_dir, models_dir,
corrupted_images_dir, corrupted_labels_dir, sample_dir)
from exports.export import form_data_sample
from dto.annotationClass import AnnotationClass
from security import Security
from utils import Dotdict
from exports import export_tensorrt, upload_model
today_folder = f'{prefix}{datetime.now():{date_format}}'
today_dataset = path.join(datasets_dir, today_folder)
@@ -120,6 +121,7 @@ def check_label(label_path):
def create_yaml():
print('creating yaml...')
lines = ['names:']
annotation_classes = AnnotationClass.read_json()
for i in range(DEFAULT_CLASS_NUM):
if i in annotation_classes:
lines.append(f'- {annotation_classes[i].name}')
@@ -217,36 +219,15 @@ def validate(model_path):
pass
def upload_model(model_path: str, size_small_in_kb: int=3):
# model = YOLO(model_path)
# model.export(format="onnx", imgsz=1280, nms=True, batch=4)
onnx_model = path.dirname(model_path) + Path(model_path).stem + '.onnx'
with open(onnx_model, 'rb') as f_in:
onnx_bytes = f_in.read()
key = Security.get_model_encryption_key()
onnx_encrypted = Security.encrypt_to(onnx_bytes, key)
part1_size = min(size_small_in_kb * 1024, int(0.9 * len(onnx_encrypted)))
onnx_part_small = onnx_encrypted[:part1_size] # slice bytes for part1
onnx_part_big = onnx_encrypted[part1_size:]
with open(constants.CONFIG_FILE, "r") as f:
config_dict = yaml.safe_load(f)
d_config = Dotdict(config_dict)
cdn_c = Dotdict(d_config.cdn)
api_c = Dotdict(d_config.api)
cdn_manager = CDNManager(CDNCredentials(cdn_c.host, cdn_c.access_key, cdn_c.secret_key))
cdn_manager.upload(cdn_c.bucket, 'azaion.onnx.big', onnx_part_big)
api = Api(ApiCredentials(api_c.url, api_c.user, api_c.pw, api_c.folder))
api.upload_file('azaion.onnx.small', onnx_part_small)
if __name__ == '__main__':
# model_path = train_dataset(from_scratch=True)
# validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt'))
# form_data_sample(500)
# convert2rknn()
model_path = 'azaion.pt'
export_tensorrt(model_path)
engine_model_path = f'{Path(model_path).stem}.engine'
upload_model(engine_model_path, engine_model_path)
upload_model('azaion-2024-10-26.onnx')
onnx_model_path = f'{Path(model_path).stem}.onnx'
upload_model(onnx_model_path, onnx_model_path)