add download big engine file to cdn manager

revise onnx export process
fixes
This commit is contained in:
Alex Bezdieniezhnykh
2025-04-24 16:23:39 +03:00
parent c4732cec8b
commit 26eb3b991d
9 changed files with 187 additions and 86 deletions
+47 -16
View File
@@ -1,26 +1,43 @@
import io import io
import json import json
from http import HTTPStatus from http import HTTPStatus
from os import path
import requests import requests
import yaml
import constants import constants
from cdn_manager import CDNCredentials, CDNManager
from hardware_service import get_hardware_info from hardware_service import get_hardware_info
from security import Security from security import Security
class ApiCredentials: class ApiCredentials:
def __init__(self, url, email, password, folder): def __init__(self, url, email, password):
self.url = url self.url = url
self.email = email self.email = email
self.password = password self.password = password
self.folder = folder
class Api: class ApiClient:
def __init__(self, credentials): def __init__(self):
self.token = None self.token = None
self.credentials = credentials with open(constants.CONFIG_FILE, "r") as f:
config_dict = yaml.safe_load(f)
api_c = config_dict["api"]
self.credentials = ApiCredentials(api_c["url"],
api_c["email"],
api_c["password"])
yaml_bytes = self.load_bytes(constants.CDN_CONFIG, '')
data = yaml.safe_load(io.BytesIO(yaml_bytes))
creds = CDNCredentials(data["host"],
data["downloader_access_key"],
data["downloader_access_secret"],
data["uploader_access_key"],
data["uploader_access_secret"])
self.cdn_manager = CDNManager(creds)
def login(self): def login(self):
response = requests.post(f'{self.credentials.url}/login', response = requests.post(f'{self.credentials.url}/login',
@@ -29,8 +46,7 @@ class Api:
token = response.json()["token"] token = response.json()["token"]
self.token = token self.token = token
def upload_file(self, filename: str, file_bytes: bytearray): def upload_file(self, filename: str, file_bytes: bytearray, folder):
folder = self.credentials.folder
if self.token is None: if self.token is None:
self.login() self.login()
url = f"{self.credentials.url}/resources/{folder}" url = f"{self.credentials.url}/resources/{folder}"
@@ -43,9 +59,7 @@ class Api:
except Exception as e: except Exception as e:
print(f"Upload fail: {e}") print(f"Upload fail: {e}")
def load_bytes(self, filename, folder = None): def load_bytes(self, filename, folder):
folder = folder or self.credentials.folder
hardware = get_hardware_info() hardware = get_hardware_info()
if self.token is None: if self.token is None:
@@ -82,13 +96,30 @@ class Api:
print(f'Downloaded file: {filename}, {len(data)} bytes') print(f'Downloaded file: {filename}, {len(data)} bytes')
return data return data
def load_resource(self, big_part, small_part): def load_big_small_resource(self, resource_name, folder, key):
big_part = path.join(folder, f'{resource_name}.big')
small_part = f'{resource_name}.small'
with open(big_part, 'rb') as binary_file: with open(big_part, 'rb') as binary_file:
encrypted_bytes_big = binary_file.read() encrypted_bytes_big = binary_file.read()
encrypted_bytes_small = self.load_bytes(small_part) encrypted_bytes_small = self.load_bytes(small_part, folder)
encrypted_model_bytes = encrypted_bytes_small + encrypted_bytes_big encrypted_bytes = encrypted_bytes_small + encrypted_bytes_big
key = Security.get_model_encryption_key()
model_bytes = Security.decrypt_to(encrypted_model_bytes, key) result = Security.decrypt_to(encrypted_bytes, key)
return model_bytes return result
def upload_big_small_resource(self, resource, resource_name, folder, key):
big_part_name = f'{resource_name}.big'
small_part_name = f'{resource_name}.small'
resource_encrypted = Security.encrypt_to(resource, key)
part_small_size = min(constants.SMALL_SIZE_KB * 1024, int(0.2 * len(resource_encrypted)))
part_small = resource_encrypted[:part_small_size] # slice bytes for part1
part_big = resource_encrypted[part_small_size:]
self.cdn_manager.upload(constants.MODELS_FOLDER, big_part_name, part_big)
with open(path.join(folder, big_part_name), 'wb') as f:
f.write(part_big)
self.upload_file(small_part_name, part_small, constants.MODELS_FOLDER)
+2 -11
View File
@@ -1,16 +1,7 @@
cdn:
host: 'https://cdnapi.azaion.com/'
downloader_access_key: '8ynZ0rrMLL00GLBopklw'
downloader_access_secret: 'McNgEKhAJUxoa3t4WDDbCbhYPg4Qhe7FNQEKrtbk'
uploader_access_key: 'YhdHtKaq8DmvrYohetu6'
uploader_access_secret: 'nlOtjo1c4UWiMiJOjcIpR0aJFPitIhcwU6zFev7H'
bucket: 'models'
api: api:
url: 'https://api.azaion.com' url: 'https://api.azaion.com'
user: 'admin@azaion.com' email: 'uploader@azaion.com'
pw: 'Az@1on1000Odm$n' password: 'Az@1on_10Upl0@der'
folder: ''
queue: queue:
host: '188.245.120.247' host: '188.245.120.247'
+4
View File
@@ -39,3 +39,7 @@ AI_ONNX_MODEL_FILE_SMALL = "azaion.onnx.small"
AI_TENSOR_MODEL_FILE_BIG = "azaion.engine.big" AI_TENSOR_MODEL_FILE_BIG = "azaion.engine.big"
AI_TENSOR_MODEL_FILE_SMALL = "azaion.engine.small" AI_TENSOR_MODEL_FILE_SMALL = "azaion.engine.small"
SMALL_SIZE_KB = 3
CDN_CONFIG = 'cdn.yaml'
MODELS_FOLDER = 'models'
+27 -11
View File
@@ -1,13 +1,15 @@
import os
import shutil import shutil
from os import path, scandir, makedirs from os import path, scandir, makedirs
from pathlib import Path from pathlib import Path
import random import random
import netron import netron
import yaml import yaml
from ultralytics import YOLO from ultralytics import YOLO
import constants import constants
from azaion_api import Api, ApiCredentials from api_client import ApiClient, ApiCredentials
from cdn_manager import CDNManager, CDNCredentials from cdn_manager import CDNManager, CDNCredentials
from constants import datasets_dir, processed_images_dir from constants import datasets_dir, processed_images_dir
from security import Security from security import Security
@@ -24,15 +26,21 @@ def export_rknn(model_path):
pass pass
def export_onnx(model_path): def export_onnx(model_path, batch_size=4):
model = YOLO(model_path) model = YOLO(model_path)
onnx_path = Path(model_path).stem + '.onnx'
if path.exists(onnx_path):
os.remove(onnx_path)
model.export( model.export(
format="onnx", format="onnx",
imgsz=1280, imgsz=1280,
batch=2, batch=batch_size,
simplify=True, simplify=True,
nms=True) nms=True,
return Path(model_path).stem + '.onnx' device=0
)
return onnx_path
def export_tensorrt(model_path): def export_tensorrt(model_path):
@@ -79,17 +87,25 @@ def upload_model(model_path: str, filename: str, size_small_in_kb: int=3):
key = Security.get_model_encryption_key() key = Security.get_model_encryption_key()
model_encrypted = Security.encrypt_to(model_bytes, key) model_encrypted = Security.encrypt_to(model_bytes, key)
part1_size = min(size_small_in_kb * 1024, int(0.9 * len(model_encrypted))) part1_size = min(size_small_in_kb * 1024, int(0.3 * len(model_encrypted)))
model_part_small = model_encrypted[:part1_size] # slice bytes for part1 model_part_small = model_encrypted[:part1_size] # slice bytes for part1
model_part_big = model_encrypted[part1_size:] model_part_big = model_encrypted[part1_size:]
with open(constants.CONFIG_FILE, "r") as f: with open(constants.CONFIG_FILE, "r") as f:
config_dict = yaml.safe_load(f) config_dict = yaml.safe_load(f)
d_config = Dotdict(config_dict) d_config = Dotdict(config_dict)
cdn_c = Dotdict(d_config.cdn)
api_c = Dotdict(d_config.api) api_c = Dotdict(d_config.api)
cdn_manager = CDNManager(CDNCredentials(cdn_c.host, cdn_c.access_key, cdn_c.secret_key)) api = ApiClient(ApiCredentials(api_c.url, api_c.user, api_c.pw, api_c.folder))
cdn_manager.upload(cdn_c.bucket, f'{filename}.big', model_part_big)
api = Api(ApiCredentials(api_c.url, api_c.user, api_c.pw, api_c.folder)) yaml_bytes = api.load_bytes(constants.CDN_CONFIG, '')
api.upload_file(f'{filename}.small', model_part_small) data = yaml.safe_load(yaml_bytes)
creds = CDNCredentials(data["host"],
data["downloader_access_key"],
data["downloader_access_secret"],
data["uploader_access_key"],
data["uploader_access_secret"])
cdn_manager = CDNManager(creds)
api.upload_file(f'{filename}.small', model_part_small, constants.MODELS_FOLDER)
cdn_manager.upload(constants.MODELS_FOLDER, f'{filename}.big', model_part_big)
-1
View File
@@ -34,7 +34,6 @@ def get_mac_address(interface="Ethernet"):
def get_hardware_info(): def get_hardware_info():
is_windows = os.name == 'nt'
res = subprocess.check_output("ver", shell=True).decode('utf-8') res = subprocess.check_output("ver", shell=True).decode('utf-8')
if "Microsoft Windows" in res: if "Microsoft Windows" in res:
is_windows = True is_windows = True
+73 -18
View File
@@ -8,28 +8,29 @@ import numpy as np
import tensorrt as trt import tensorrt as trt
import pycuda.driver as cuda import pycuda.driver as cuda
from inference.onnx_engine import InferenceEngine from inference.onnx_engine import InferenceEngine
import pycuda.autoinit # required for automatically initialize CUDA, do not remove. # required for automatically initialize CUDA, do not remove.
import pycuda.autoinit
import pynvml
# TODO: 2. Convert onnx model with 4 batch and make sure it is working
class TensorRTEngine(InferenceEngine): class TensorRTEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs): TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
self.batch_size = batch_size
def __init__(self, model_bytes: bytes, **kwargs):
try: try:
logger = trt.Logger(trt.Logger.WARNING) # metadata_len = struct.unpack("<I", model_bytes[:4])[0]
# try:
# self.metadata = json.loads(model_bytes[4:4 + metadata_len])
# self.class_names = self.metadata['names']
# print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
# except json.JSONDecodeError as err:
# print(f"Failed to parse metadata")
# return
# engine_data = model_bytes[4 + metadata_len:]
metadata_len = struct.unpack("<I", model_bytes[:4])[0] runtime = trt.Runtime(self.TRT_LOGGER)
try: self.engine = runtime.deserialize_cuda_engine(model_bytes)
self.metadata = json.loads(model_bytes[4:4 + metadata_len])
self.class_names = self.metadata['names']
print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
except json.JSONDecodeError as err:
print(f"Failed to parse metadata")
return
engine_data = model_bytes[4 + metadata_len:]
runtime = trt.Runtime(logger)
self.engine = runtime.deserialize_cuda_engine(engine_data)
if self.engine is None: if self.engine is None:
raise RuntimeError(f"Failed to load TensorRT engine!") raise RuntimeError(f"Failed to load TensorRT engine!")
@@ -55,7 +56,7 @@ class TensorRTEngine(InferenceEngine):
self.output_name = self.engine.get_tensor_name(1) self.output_name = self.engine.get_tensor_name(1)
engine_output_shape = tuple(self.engine.get_tensor_shape(self.output_name)) engine_output_shape = tuple(self.engine.get_tensor_shape(self.output_name))
self.output_shape = [ self.output_shape = [
batch_size if self.input_shape[0] == -1 else self.input_shape[0], 4 if self.input_shape[0] == -1 else self.input_shape[0], # by default, batch size is 4
300 if engine_output_shape[1] == -1 else engine_output_shape[1], # max detections number 300 if engine_output_shape[1] == -1 else engine_output_shape[1], # max detections number
6 if engine_output_shape[2] == -1 else engine_output_shape[2] # x1 y1 x2 y2 conf cls 6 if engine_output_shape[2] == -1 else engine_output_shape[2] # x1 y1 x2 y2 conf cls
] ]
@@ -73,7 +74,61 @@ class TensorRTEngine(InferenceEngine):
def get_batch_size(self) -> int: def get_batch_size(self) -> int:
return self.batch_size return self.batch_size
# In tensorrt_engine.py, modify the run method: @staticmethod
def get_gpu_memory_bytes(device_id=0) -> int:
total_memory = None
try:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total_memory = mem_info.total
except pynvml.NVMLError:
total_memory = None
finally:
try:
pynvml.nvmlShutdown()
except pynvml.NVMLError:
pass
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb
@staticmethod
def get_engine_filename(device_id=0) -> str | None:
try:
device = cuda.Device(device_id)
sm_count = device.multiprocessor_count
cc_major, cc_minor = device.compute_capability()
return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
except Exception:
return None
@staticmethod
def convert_from_onnx(onnx_model: bytes) -> bytes | None:
workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes() * 0.9)
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
with trt.Builder(TensorRTEngine.TRT_LOGGER) as builder, \
builder.create_network(explicit_batch_flag) as network, \
trt.OnnxParser(network, TensorRTEngine.TRT_LOGGER) as parser, \
builder.create_builder_config() as config:
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
if not parser.parse(onnx_model):
return None
if builder.platform_has_fast_fp16:
print('Converting to supported fp16')
config.set_flag(trt.BuilderFlag.FP16)
else:
print('Converting to supported fp32. (fp16 is not supported)')
plan = builder.build_serialized_network(network, config)
if plan is None:
print('Conversion failed.')
return None
return bytes(plan)
def run(self, input_data: np.ndarray) -> List[np.ndarray]: def run(self, input_data: np.ndarray) -> List[np.ndarray]:
try: try:
+1 -1
View File
@@ -9,7 +9,7 @@ opencv-python
matplotlib matplotlib
PyYAML PyYAML
cryptography cryptography
numpy numpy==1.26.4
requests requests
pyyaml pyyaml
boto3 boto3
+22 -20
View File
@@ -1,14 +1,23 @@
import re import pycuda.driver as cuda
import yaml import yaml
import constants import constants
from azaion_api import Api, ApiCredentials from api_client import ApiClient, ApiCredentials
from cdn_manager import CDNManager, CDNCredentials from cdn_manager import CDNManager, CDNCredentials
from inference.inference import Inference from inference.inference import Inference
from inference.onnx_engine import OnnxEngine
from inference.tensorrt_engine import TensorRTEngine from inference.tensorrt_engine import TensorRTEngine
from security import Security
from utils import Dotdict from utils import Dotdict
def get_engine_filename(device_id=0):
try:
device = cuda.Device(device_id)
sm_count = device.multiprocessor_count
cc_major, cc_minor = device.compute_capability()
return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
except Exception:
return None
if __name__ == "__main__": if __name__ == "__main__":
# Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4), # Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') # confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
@@ -23,26 +32,19 @@ if __name__ == "__main__":
# Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8), # Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8),
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') # confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
with open(constants.CONFIG_FILE, "r") as f: api_client = ApiClient()
config_dict = yaml.safe_load(f) key = Security.get_model_encryption_key()
d_config = Dotdict(config_dict) engine_filename = TensorRTEngine.get_engine_filename()
cdn_c = Dotdict(d_config.cdn) model_bytes = api_client.load_big_small_resource(engine_filename, 'models', key)
api_c = Dotdict(d_config.api)
cdn_manager = CDNManager(CDNCredentials(cdn_c.host, Inference(TensorRTEngine(model_bytes),
cdn_c.downloader_access_key, cdn_c.downloader_access_secret, confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4')
cdn_c.uploader_access_key, cdn_c.uploader_access_secret))
cdn_manager.download(cdn_c.bucket, constants.AI_TENSOR_MODEL_FILE_BIG)
cdn_manager.download(cdn_c.bucket, constants.AI_ONNX_MODEL_FILE_BIG)
api_client = Api(ApiCredentials(api_c.url, api_c.user, api_c.pw, api_c.folder))
tensor_model_bytes = api_client.load_resource(constants.AI_TENSOR_MODEL_FILE_BIG, constants.AI_TENSOR_MODEL_FILE_SMALL)
onxx_model_bytes = api_client.load_resource(constants.AI_ONNX_MODEL_FILE_BIG, constants.AI_ONNX_MODEL_FILE_SMALL)
# cdn_manager.download(cdn_c.bucket, constants.AI_TENSOR_MODEL_FILE_BIG)
# tensor_model_bytes = api_client.load_resource(constants.AI_TENSOR_MODEL_FILE_BIG, constants.AI_TENSOR_MODEL_FILE_SMALL)
# Inference(OnnxEngine(onxx_model_bytes, batch_size=4), # Inference(OnnxEngine(onxx_model_bytes, batch_size=4),
# confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4') # confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4')
Inference(TensorRTEngine(tensor_model_bytes, batch_size=4),
confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4')
+11 -8
View File
@@ -7,12 +7,13 @@ from datetime import datetime
from os import path, replace, listdir, makedirs, scandir from os import path, replace, listdir, makedirs, scandir
from os.path import abspath from os.path import abspath
from pathlib import Path from pathlib import Path
from time import sleep
import yaml import yaml
from ultralytics import YOLO from ultralytics import YOLO
import constants import constants
from azaion_api import ApiCredentials, Api from api_client import ApiCredentials, ApiClient
from cdn_manager import CDNCredentials, CDNManager from cdn_manager import CDNCredentials, CDNManager
from constants import (processed_images_dir, from constants import (processed_images_dir,
processed_labels_dir, processed_labels_dir,
@@ -20,10 +21,11 @@ from constants import (processed_images_dir,
datasets_dir, models_dir, datasets_dir, models_dir,
corrupted_images_dir, corrupted_labels_dir, sample_dir) corrupted_images_dir, corrupted_labels_dir, sample_dir)
from dto.annotationClass import AnnotationClass from dto.annotationClass import AnnotationClass
from inference.onnx_engine import OnnxEngine
from security import Security from security import Security
from utils import Dotdict from utils import Dotdict
from exports import export_tensorrt, upload_model from exports import export_tensorrt, upload_model, export_onnx
today_folder = f'{prefix}{datetime.now():{date_format}}' today_folder = f'{prefix}{datetime.now():{date_format}}'
today_dataset = path.join(datasets_dir, today_folder) today_dataset = path.join(datasets_dir, today_folder)
@@ -224,10 +226,11 @@ if __name__ == '__main__':
# validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt')) # validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt'))
# form_data_sample(500) # form_data_sample(500)
# convert2rknn() # convert2rknn()
model_path = 'azaion.pt' api_client = ApiClient()
export_tensorrt(model_path) onnx_path = export_onnx('azaion.pt')
engine_model_path = f'{Path(model_path).stem}.engine'
upload_model(engine_model_path, engine_model_path)
onnx_model_path = f'{Path(model_path).stem}.onnx' with open(onnx_path, 'rb') as binary_file:
upload_model(onnx_model_path, onnx_model_path) onnx_bytes = binary_file.read()
key = Security.get_model_encryption_key()
api_client.upload_big_small_resource(onnx_bytes, onnx_path, constants.MODELS_FOLDER, key)