From e798af470b64c49465fd98bf81606cb4acf798f8 Mon Sep 17 00:00:00 2001 From: Alex Bezdieniezhnykh Date: Wed, 23 Apr 2025 23:20:08 +0300 Subject: [PATCH] read cdn yaml config from api automate tensorrt model conversion in case of no existing one for user's gpu --- .../Services/AuthProvider.cs | 6 +- .../Services/InferenceClient.cs | 2 +- Azaion.Inference/api_client.pxd | 9 +- Azaion.Inference/api_client.pyx | 64 +++++++++----- Azaion.Inference/build.cmd | 1 + Azaion.Inference/cdn_manager.pxd | 14 ++++ Azaion.Inference/cdn_manager.pyx | 41 +++++++++ Azaion.Inference/constants.pxd | 8 +- Azaion.Inference/constants.pyx | 9 +- Azaion.Inference/credentials.pxd | 1 - Azaion.Inference/credentials.pyx | 6 +- Azaion.Inference/inference.pxd | 4 + Azaion.Inference/inference.pyx | 42 ++++++++-- Azaion.Inference/inference_engine.pxd | 11 ++- Azaion.Inference/inference_engine.pyx | 84 ++++++++++++++----- Azaion.Inference/main.pyx | 5 +- Azaion.Inference/requirements.txt | 4 +- Azaion.Inference/setup.py | 1 + Azaion.Suite/App.xaml.cs | 9 +- Azaion.Suite/Login.xaml | 4 +- build/build_downloader.cmd | 7 -- build/cdn_manager.py | 22 ++++- build/publish.cmd | 4 +- 23 files changed, 265 insertions(+), 93 deletions(-) create mode 100644 Azaion.Inference/cdn_manager.pxd create mode 100644 Azaion.Inference/cdn_manager.pyx delete mode 100644 build/build_downloader.cmd diff --git a/Azaion.CommonSecurity/Services/AuthProvider.cs b/Azaion.CommonSecurity/Services/AuthProvider.cs index ca0ade1..c345927 100644 --- a/Azaion.CommonSecurity/Services/AuthProvider.cs +++ b/Azaion.CommonSecurity/Services/AuthProvider.cs @@ -11,7 +11,7 @@ public interface IAzaionApi ApiCredentials Credentials { get; } User CurrentUser { get; } void UpdateOffsets(UserQueueOffsets offsets); - Stream GetResource(string filename); + Stream GetResource(string filename, string folder); } public class AzaionApi(HttpClient client, ICache cache, ApiCredentials credentials, IHardwareService hardwareService) : IAzaionApi @@ -32,11 +32,11 @@ public class AzaionApi(HttpClient client, ICache cache, ApiCredentials credentia } } - public Stream GetResource(string filename) + public Stream GetResource(string filename, string folder) { var hardware = cache.GetFromCache(SecurityConstants.HARDWARE_INFO_KEY, hardwareService.GetHardware); - var response = Send(new HttpRequestMessage(HttpMethod.Post, $"/resources/get/{credentials.Folder}") + var response = Send(new HttpRequestMessage(HttpMethod.Post, $"/resources/get/{folder}") { Content = new StringContent(JsonConvert.SerializeObject(new { filename, credentials.Password, hardware }), Encoding.UTF8, APP_JSON) }); diff --git a/Azaion.CommonSecurity/Services/InferenceClient.cs b/Azaion.CommonSecurity/Services/InferenceClient.cs index 7fc307f..da75cff 100644 --- a/Azaion.CommonSecurity/Services/InferenceClient.cs +++ b/Azaion.CommonSecurity/Services/InferenceClient.cs @@ -45,7 +45,7 @@ public class InferenceClient : IInferenceClient process.OutputDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); }; process.ErrorDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); }; - process.Start(); + //process.Start(); } catch (Exception e) { diff --git a/Azaion.Inference/api_client.pxd b/Azaion.Inference/api_client.pxd index b53f75b..d05f072 100644 --- a/Azaion.Inference/api_client.pxd +++ b/Azaion.Inference/api_client.pxd @@ -1,9 +1,11 @@ from user cimport User from credentials cimport Credentials +from cdn_manager cimport CDNManager cdef class ApiClient: cdef Credentials credentials + cdef CDNManager cdn_manager cdef str token, folder, api_url cdef User user @@ -12,6 +14,7 @@ cdef class ApiClient: cdef set_token(self, str token) cdef get_user(self) - cdef load_bytes(self, str filename, str folder=*) - cdef upload_file(self, str filename, str folder=*) - cdef load_ai_model(self, bint is_tensor=*) + cdef load_bytes(self, str filename, str folder) + cdef upload_file(self, str filename, bytes resource, str folder) + cdef load_big_small_resource(self, str resource_name, str folder, str key) + cdef upload_big_small_resource(self, bytes resource, str resource_name, str folder, str key) diff --git a/Azaion.Inference/api_client.pyx b/Azaion.Inference/api_client.pyx index 6f3637d..c933ae5 100644 --- a/Azaion.Inference/api_client.pyx +++ b/Azaion.Inference/api_client.pyx @@ -1,12 +1,15 @@ import json from http import HTTPStatus +from os import path from uuid import UUID import jwt import requests cimport constants +import yaml + +from cdn_manager cimport CDNManager, CDNCredentials from hardware_service cimport HardwareService, HardwareInfo from security cimport Security -from io import BytesIO from user cimport User, RoleEnum cdef class ApiClient: @@ -15,9 +18,19 @@ cdef class ApiClient: self.credentials = None self.user = None self.token = None + self.cdn_manager = None cdef set_credentials(self, Credentials credentials): self.credentials = credentials + yaml_bytes = self.load_bytes(constants.CDN_CONFIG, '') + yaml_config = yaml.safe_load(yaml_bytes) + creds = CDNCredentials(yaml_config["host"], + yaml_config["downloader_access_key"], + yaml_config["downloader_access_secret"], + yaml_config["uploader_access_key"], + yaml_config["uploader_access_secret"]) + + self.cdn_manager = CDNManager(creds) cdef login(self): response = requests.post(f"{constants.API_URL}/login", @@ -57,13 +70,12 @@ cdef class ApiClient: self.login() return self.user - cdef upload_file(self, str filename, str folder=None): - folder = folder or self.credentials.folder + cdef upload_file(self, str filename, bytes resource, str folder): if self.token is None: self.login() url = f"{constants.API_URL}/resources/{folder}" headers = { "Authorization": f"Bearer {self.token}" } - files = dict(data=open(filename, 'rb')) + files = {'data': (filename, resource)} try: r = requests.post(url, headers=headers, files=files, allow_redirects=True) r.raise_for_status() @@ -71,9 +83,7 @@ cdef class ApiClient: except Exception as e: print(f"Upload fail: {e}") - cdef load_bytes(self, str filename, str folder=None): - folder = folder or self.credentials.folder - + cdef load_bytes(self, str filename, str folder): hardware_service = HardwareService() cdef HardwareInfo hardware = hardware_service.get_hardware_info() @@ -111,22 +121,30 @@ cdef class ApiClient: constants.log(f'Downloaded file: {filename}, {len(data)} bytes') return data - cdef load_ai_model(self, bint is_tensor=False): - if is_tensor: - big_file = constants.AI_TENSOR_MODEL_FILE_BIG - small_file = constants.AI_TENSOR_MODEL_FILE_SMALL - else: - big_file = constants.AI_ONNX_MODEL_FILE_BIG - small_file = constants.AI_ONNX_MODEL_FILE_SMALL + cdef load_big_small_resource(self, str resource_name, str folder, str key): + cdef str big_part = path.join(folder, f'{resource_name}.big') + cdef str small_part = f'{resource_name}.small' - with open(big_file, 'rb') as binary_file: + with open(big_part, 'rb') as binary_file: encrypted_bytes_big = binary_file.read() - print('read encrypted big file') - print(f'small file: {small_file}') - encrypted_bytes_small = self.load_bytes(small_file) - print('read encrypted small file') - encrypted_model_bytes = encrypted_bytes_small + encrypted_bytes_big - key = Security.get_model_encryption_key() - model_bytes = Security.decrypt_to(encrypted_model_bytes, key) - return model_bytes + encrypted_bytes_small = self.load_bytes(small_part, folder) + + encrypted_bytes = encrypted_bytes_small + encrypted_bytes_big + result = Security.decrypt_to(encrypted_bytes, key) + return result + + cdef upload_big_small_resource(self, bytes resource, str resource_name, str folder, str key): + cdef str big_part_name = f'{resource_name}.big' + cdef str small_part_name = f'{resource_name}.small' + + resource_encrypted = Security.encrypt_to(resource, key) + part_small_size = min(constants.SMALL_SIZE_KB * 1024, int(0.3 * len(resource_encrypted))) + part_small = resource_encrypted[:part_small_size] # slice bytes for part1 + + part_big = resource_encrypted[part_small_size:] + + self.cdn_manager.upload(constants.MODELS_FOLDER, big_part_name, part_big) + with open(path.join(folder, big_part_name), 'wb') as f: + f.write(part_big) + self.upload_file(small_part_name, part_small, constants.MODELS_FOLDER) \ No newline at end of file diff --git a/Azaion.Inference/build.cmd b/Azaion.Inference/build.cmd index 80d5a8e..b9e6857 100644 --- a/Azaion.Inference/build.cmd +++ b/Azaion.Inference/build.cmd @@ -10,6 +10,7 @@ pyinstaller --name=azaion-inference ^ --collect-all onnxruntime ^ --collect-all tensorrt ^ --collect-all pycuda ^ +--collect-all pynvml ^ --collect-all re ^ --hidden-import constants ^ --hidden-import annotation ^ diff --git a/Azaion.Inference/cdn_manager.pxd b/Azaion.Inference/cdn_manager.pxd new file mode 100644 index 0000000..028c26d --- /dev/null +++ b/Azaion.Inference/cdn_manager.pxd @@ -0,0 +1,14 @@ +cdef class CDNCredentials: + cdef str host + cdef str downloader_access_key + cdef str downloader_access_secret + cdef str uploader_access_key + cdef str uploader_access_secret + +cdef class CDNManager: + cdef CDNCredentials creds + cdef object download_client + cdef object upload_client + + cdef upload(self, str bucket, str filename, bytes file_bytes) + cdef download(self, str bucket, str filename) \ No newline at end of file diff --git a/Azaion.Inference/cdn_manager.pyx b/Azaion.Inference/cdn_manager.pyx new file mode 100644 index 0000000..92b4d3f --- /dev/null +++ b/Azaion.Inference/cdn_manager.pyx @@ -0,0 +1,41 @@ +import io +import boto3 + + +cdef class CDNCredentials: + def __init__(self, host, downloader_access_key, downloader_access_secret, uploader_access_key, uploader_access_secret): + self.host = host + self.downloader_access_key = downloader_access_key + self.downloader_access_secret = downloader_access_secret + self.uploader_access_key = uploader_access_key + self.uploader_access_secret = uploader_access_secret + + +cdef class CDNManager: + def __init__(self, CDNCredentials credentials): + + self.creds = credentials + self.download_client = boto3.client('s3', endpoint_url=self.creds.host, + aws_access_key_id=self.creds.downloader_access_key, + aws_secret_access_key=self.creds.downloader_access_secret) + self.upload_client = boto3.client('s3', endpoint_url=self.creds.host, + aws_access_key_id=self.creds.uploader_access_key, + aws_secret_access_key=self.creds.uploader_access_secret) + + cdef upload(self, str bucket, str filename, bytes file_bytes): + try: + self.upload_client.upload_fileobj(io.BytesIO(file_bytes), bucket, filename) + print(f'uploaded {filename} ({len(file_bytes)} bytes) to the {bucket}') + return True + except Exception as e: + print(e) + return False + + cdef download(self, str bucket, str filename): + try: + self.download_client.download_file(bucket, filename, filename) + print(f'downloaded {filename} from the {bucket} to current folder') + return True + except Exception as e: + print(e) + return False diff --git a/Azaion.Inference/constants.pxd b/Azaion.Inference/constants.pxd index ebdae29..660c97e 100644 --- a/Azaion.Inference/constants.pxd +++ b/Azaion.Inference/constants.pxd @@ -7,12 +7,12 @@ cdef str ANNOTATIONS_QUEUE # Name of the annotations queue in rabbit cdef str API_URL # Base URL for the external API cdef str QUEUE_CONFIG_FILENAME # queue config filename to load from api -cdef str AI_ONNX_MODEL_FILE_BIG -cdef str AI_ONNX_MODEL_FILE_SMALL +cdef str AI_ONNX_MODEL_FILE -cdef str AI_TENSOR_MODEL_FILE_BIG -cdef str AI_TENSOR_MODEL_FILE_SMALL +cdef str CDN_CONFIG +cdef str MODELS_FOLDER +cdef int SMALL_SIZE_KB cdef bytes DONE_SIGNAL diff --git a/Azaion.Inference/constants.pyx b/Azaion.Inference/constants.pyx index ec4554a..a8f384c 100644 --- a/Azaion.Inference/constants.pyx +++ b/Azaion.Inference/constants.pyx @@ -9,11 +9,12 @@ cdef str ANNOTATIONS_QUEUE = "azaion-annotations" cdef str API_URL = "https://api.azaion.com" # Base URL for the external API cdef str QUEUE_CONFIG_FILENAME = "secured-config.json" -cdef str AI_ONNX_MODEL_FILE_BIG = "azaion.onnx.big" -cdef str AI_ONNX_MODEL_FILE_SMALL = "azaion.onnx.small" +cdef str AI_ONNX_MODEL_FILE = "azaion.onnx" -cdef str AI_TENSOR_MODEL_FILE_BIG = "azaion.engine.big" -cdef str AI_TENSOR_MODEL_FILE_SMALL = "azaion.engine.small" +cdef str CDN_CONFIG = "cdn.yaml" +cdef str MODELS_FOLDER = "models" + +cdef int SMALL_SIZE_KB = 3 cdef log(str log_message, bytes client_id=None): local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) diff --git a/Azaion.Inference/credentials.pxd b/Azaion.Inference/credentials.pxd index cd6c090..74bc016 100644 --- a/Azaion.Inference/credentials.pxd +++ b/Azaion.Inference/credentials.pxd @@ -1,7 +1,6 @@ cdef class Credentials: cdef public str email cdef public str password - cdef public str folder @staticmethod cdef from_msgpack(bytes data) \ No newline at end of file diff --git a/Azaion.Inference/credentials.pyx b/Azaion.Inference/credentials.pyx index 2eb020c..ae0e17e 100644 --- a/Azaion.Inference/credentials.pyx +++ b/Azaion.Inference/credentials.pyx @@ -2,16 +2,14 @@ from msgpack import unpackb cdef class Credentials: - def __init__(self, str email, str password, str folder): + def __init__(self, str email, str password): self.email = email self.password = password - self.folder = folder @staticmethod cdef from_msgpack(bytes data): unpacked = unpackb(data, strict_map_key=False) return Credentials( unpacked.get("Email"), - unpacked.get("Password"), - unpacked.get("Folder")) + unpacked.get("Password")) diff --git a/Azaion.Inference/inference.pxd b/Azaion.Inference/inference.pxd index c85793c..ec2f2e0 100644 --- a/Azaion.Inference/inference.pxd +++ b/Azaion.Inference/inference.pxd @@ -17,7 +17,11 @@ cdef class Inference: cdef int model_width cdef int model_height + cdef build_tensor_engine(self) + cdef init_ai(self) + cdef bint is_building_engine cdef bint is_video(self, str filepath) + cdef run_inference(self, RemoteCommand cmd) cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name) cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths) diff --git a/Azaion.Inference/inference.pyx b/Azaion.Inference/inference.pyx index 9ebe14d..c32249a 100644 --- a/Azaion.Inference/inference.pyx +++ b/Azaion.Inference/inference.pyx @@ -1,14 +1,19 @@ import json import mimetypes +import os import subprocess +import time import cv2 import numpy as np + +cimport constants from remote_command cimport RemoteCommand from annotation cimport Detection, Annotation from ai_config cimport AIRecognitionConfig from inference_engine cimport OnnxEngine, TensorRTEngine from hardware_service cimport HardwareService +from security cimport Security cdef class Inference: def __init__(self, api_client, on_annotation): @@ -20,18 +25,41 @@ cdef class Inference: self.model_height = 0 self.engine = None self.class_names = None + self.is_building_engine = False - def init_ai(self): + cdef build_tensor_engine(self): + is_nvidia = HardwareService.has_nvidia_gpu() + if not is_nvidia: + return + + engine_filename = TensorRTEngine.get_engine_filename() + key = Security.get_model_encryption_key() + models_dir = constants.MODELS_FOLDER + if not os.path.exists(os.path.join( models_dir, f'{engine_filename}.big')): + self.is_building_engine = True + onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key) + model_bytes = TensorRTEngine.convert_from_onnx(onnx_model) + self.api_client.upload_big_small_resource(model_bytes, engine_filename, models_dir, key) + self.is_building_engine = False + + + cdef init_ai(self): if self.engine is not None: return is_nvidia = HardwareService.has_nvidia_gpu() + key = Security.get_model_encryption_key() + models_dir = constants.MODELS_FOLDER if is_nvidia: - model_bytes = self.api_client.load_ai_model(is_tensor=True) - self.engine = TensorRTEngine(model_bytes, batch_size=4) + while self.is_building_engine: + time.sleep(1) + engine_filename = TensorRTEngine.get_engine_filename() + model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key) + self.engine = TensorRTEngine(model_bytes) + else: - model_bytes = self.api_client.load_ai_model() - self.engine = OnnxEngine(model_bytes, batch_size=4) + model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key) + self.engine = OnnxEngine(model_bytes) self.model_height, self.model_width = self.engine.get_input_shape() self.class_names = self.engine.get_class_names() @@ -135,7 +163,7 @@ cdef class Inference: images.append(m) # images first, it's faster if len(images) > 0: - for chunk in self.split_list_extend(images, ai_config.model_batch_size): + for chunk in self.split_list_extend(images, self.engine.get_input_shape()): print(f'run inference on {" ".join(chunk)}...') self._process_images(cmd, ai_config, chunk) if len(videos) > 0: @@ -161,7 +189,7 @@ cdef class Inference: batch_frames.append(frame) batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC))) - if len(batch_frames) == ai_config.model_batch_size: + if len(batch_frames) == self.engine.get_input_shape(): input_blob = self.preprocess(batch_frames) outputs = self.engine.run(input_blob) diff --git a/Azaion.Inference/inference_engine.pxd b/Azaion.Inference/inference_engine.pxd index 1f76d05..0e74a46 100644 --- a/Azaion.Inference/inference_engine.pxd +++ b/Azaion.Inference/inference_engine.pxd @@ -26,4 +26,13 @@ cdef class TensorRTEngine(InferenceEngine): cdef object input_shape cdef object output_shape cdef object h_output - cdef object class_names \ No newline at end of file + cdef object class_names + + @staticmethod + cdef bytes convert_from_onnx(bytes onnx_model) + + @staticmethod + cdef unsigned long long get_gpu_memory_bytes(device_id=?) + + @staticmethod + cdef str get_engine_filename(device_id=?) \ No newline at end of file diff --git a/Azaion.Inference/inference_engine.pyx b/Azaion.Inference/inference_engine.pyx index 8785a68..6c61138 100644 --- a/Azaion.Inference/inference_engine.pyx +++ b/Azaion.Inference/inference_engine.pyx @@ -6,6 +6,7 @@ import onnxruntime as onnx import tensorrt as trt import pycuda.driver as cuda import pycuda.autoinit # required for automatically initialize CUDA, do not remove. +import pynvml cdef class InferenceEngine: @@ -28,13 +29,13 @@ cdef class InferenceEngine: cdef class OnnxEngine(InferenceEngine): def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs): super().__init__(model_bytes, batch_size) - self.batch_size = batch_size + + self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) self.model_inputs = self.session.get_inputs() self.input_name = self.model_inputs[0].name self.input_shape = self.model_inputs[0].shape - if self.input_shape[0] != -1: - self.batch_size = self.input_shape[0] + self.batch_size = self.input_shape[0] if self.input_shape[0] != -1 else batch_size print(f'AI detection model input: {self.model_inputs} {self.input_shape}') model_meta = self.session.get_modelmeta() print("Metadata:", model_meta.custom_metadata_map) @@ -57,25 +58,12 @@ cdef class OnnxEngine(InferenceEngine): cdef class TensorRTEngine(InferenceEngine): def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs): super().__init__(model_bytes, batch_size) - self.batch_size = batch_size print('Enter init TensorRT') try: logger = trt.Logger(trt.Logger.WARNING) - metadata_len = struct.unpack("(secureAppConfig.InferenceClientConfig)); _resourceLoader = new ResourceLoader(_inferenceClient); var login = new Login(); login.CredentialsEntered += async (_, credentials) => { - credentials.Folder = secureAppConfig.DirectoriesConfig.ApiResourcesDirectory; - _inferenceClient.Send(RemoteCommand.Create(CommandType.Login, credentials)); _azaionApi = new AzaionApi(new HttpClient { BaseAddress = new Uri(SecurityConstants.API_URL) }, _cache, credentials, _hardwareService); try { - _securedConfig = _resourceLoader.LoadFile("config.secured.json"); - _systemConfig = _resourceLoader.LoadFile("config.system.json"); + _securedConfig = _resourceLoader.LoadFile("config.secured.json", apiDir); + _systemConfig = _resourceLoader.LoadFile("config.system.json", apiDir); } catch (Exception e) { @@ -125,7 +124,7 @@ public partial class App { try { - var stream = _resourceLoader.LoadFile($"{assemblyName}.dll"); + var stream = _resourceLoader.LoadFile($"{assemblyName}.dll", apiDir); return Assembly.Load(stream.ToArray()); } catch (Exception e) diff --git a/Azaion.Suite/Login.xaml b/Azaion.Suite/Login.xaml index 8b419d6..64b3bce 100644 --- a/Azaion.Suite/Login.xaml +++ b/Azaion.Suite/Login.xaml @@ -74,7 +74,7 @@ BorderBrush="DimGray" BorderThickness="0,0,0,1" HorizontalAlignment="Left" - Text="" + Text="admin@azaion.com" /> + Password="Az@1on1000Odm$n"/>