read cdn yaml config from api

automate tensorrt model conversion in case of no existing one for user's gpu
This commit is contained in:
Alex Bezdieniezhnykh
2025-04-23 23:20:08 +03:00
parent c68c293448
commit e798af470b
23 changed files with 265 additions and 93 deletions
@@ -11,7 +11,7 @@ public interface IAzaionApi
ApiCredentials Credentials { get; } ApiCredentials Credentials { get; }
User CurrentUser { get; } User CurrentUser { get; }
void UpdateOffsets(UserQueueOffsets offsets); void UpdateOffsets(UserQueueOffsets offsets);
Stream GetResource(string filename); Stream GetResource(string filename, string folder);
} }
public class AzaionApi(HttpClient client, ICache cache, ApiCredentials credentials, IHardwareService hardwareService) : IAzaionApi public class AzaionApi(HttpClient client, ICache cache, ApiCredentials credentials, IHardwareService hardwareService) : IAzaionApi
@@ -32,11 +32,11 @@ public class AzaionApi(HttpClient client, ICache cache, ApiCredentials credentia
} }
} }
public Stream GetResource(string filename) public Stream GetResource(string filename, string folder)
{ {
var hardware = cache.GetFromCache(SecurityConstants.HARDWARE_INFO_KEY, hardwareService.GetHardware); var hardware = cache.GetFromCache(SecurityConstants.HARDWARE_INFO_KEY, hardwareService.GetHardware);
var response = Send(new HttpRequestMessage(HttpMethod.Post, $"/resources/get/{credentials.Folder}") var response = Send(new HttpRequestMessage(HttpMethod.Post, $"/resources/get/{folder}")
{ {
Content = new StringContent(JsonConvert.SerializeObject(new { filename, credentials.Password, hardware }), Encoding.UTF8, APP_JSON) Content = new StringContent(JsonConvert.SerializeObject(new { filename, credentials.Password, hardware }), Encoding.UTF8, APP_JSON)
}); });
@@ -45,7 +45,7 @@ public class InferenceClient : IInferenceClient
process.OutputDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); }; process.OutputDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); };
process.ErrorDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); }; process.ErrorDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); };
process.Start(); //process.Start();
} }
catch (Exception e) catch (Exception e)
{ {
+6 -3
View File
@@ -1,9 +1,11 @@
from user cimport User from user cimport User
from credentials cimport Credentials from credentials cimport Credentials
from cdn_manager cimport CDNManager
cdef class ApiClient: cdef class ApiClient:
cdef Credentials credentials cdef Credentials credentials
cdef CDNManager cdn_manager
cdef str token, folder, api_url cdef str token, folder, api_url
cdef User user cdef User user
@@ -12,6 +14,7 @@ cdef class ApiClient:
cdef set_token(self, str token) cdef set_token(self, str token)
cdef get_user(self) cdef get_user(self)
cdef load_bytes(self, str filename, str folder=*) cdef load_bytes(self, str filename, str folder)
cdef upload_file(self, str filename, str folder=*) cdef upload_file(self, str filename, bytes resource, str folder)
cdef load_ai_model(self, bint is_tensor=*) cdef load_big_small_resource(self, str resource_name, str folder, str key)
cdef upload_big_small_resource(self, bytes resource, str resource_name, str folder, str key)
+41 -23
View File
@@ -1,12 +1,15 @@
import json import json
from http import HTTPStatus from http import HTTPStatus
from os import path
from uuid import UUID from uuid import UUID
import jwt import jwt
import requests import requests
cimport constants cimport constants
import yaml
from cdn_manager cimport CDNManager, CDNCredentials
from hardware_service cimport HardwareService, HardwareInfo from hardware_service cimport HardwareService, HardwareInfo
from security cimport Security from security cimport Security
from io import BytesIO
from user cimport User, RoleEnum from user cimport User, RoleEnum
cdef class ApiClient: cdef class ApiClient:
@@ -15,9 +18,19 @@ cdef class ApiClient:
self.credentials = None self.credentials = None
self.user = None self.user = None
self.token = None self.token = None
self.cdn_manager = None
cdef set_credentials(self, Credentials credentials): cdef set_credentials(self, Credentials credentials):
self.credentials = credentials self.credentials = credentials
yaml_bytes = self.load_bytes(constants.CDN_CONFIG, <str>'')
yaml_config = yaml.safe_load(yaml_bytes)
creds = CDNCredentials(yaml_config["host"],
yaml_config["downloader_access_key"],
yaml_config["downloader_access_secret"],
yaml_config["uploader_access_key"],
yaml_config["uploader_access_secret"])
self.cdn_manager = CDNManager(creds)
cdef login(self): cdef login(self):
response = requests.post(f"{constants.API_URL}/login", response = requests.post(f"{constants.API_URL}/login",
@@ -57,13 +70,12 @@ cdef class ApiClient:
self.login() self.login()
return self.user return self.user
cdef upload_file(self, str filename, str folder=None): cdef upload_file(self, str filename, bytes resource, str folder):
folder = folder or self.credentials.folder
if self.token is None: if self.token is None:
self.login() self.login()
url = f"{constants.API_URL}/resources/{folder}" url = f"{constants.API_URL}/resources/{folder}"
headers = { "Authorization": f"Bearer {self.token}" } headers = { "Authorization": f"Bearer {self.token}" }
files = dict(data=open(<str>filename, 'rb')) files = {'data': (filename, resource)}
try: try:
r = requests.post(url, headers=headers, files=files, allow_redirects=True) r = requests.post(url, headers=headers, files=files, allow_redirects=True)
r.raise_for_status() r.raise_for_status()
@@ -71,9 +83,7 @@ cdef class ApiClient:
except Exception as e: except Exception as e:
print(f"Upload fail: {e}") print(f"Upload fail: {e}")
cdef load_bytes(self, str filename, str folder=None): cdef load_bytes(self, str filename, str folder):
folder = folder or self.credentials.folder
hardware_service = HardwareService() hardware_service = HardwareService()
cdef HardwareInfo hardware = hardware_service.get_hardware_info() cdef HardwareInfo hardware = hardware_service.get_hardware_info()
@@ -111,22 +121,30 @@ cdef class ApiClient:
constants.log(<str>f'Downloaded file: {filename}, {len(data)} bytes') constants.log(<str>f'Downloaded file: {filename}, {len(data)} bytes')
return data return data
cdef load_ai_model(self, bint is_tensor=False): cdef load_big_small_resource(self, str resource_name, str folder, str key):
if is_tensor: cdef str big_part = path.join(<str>folder, f'{resource_name}.big')
big_file = <str> constants.AI_TENSOR_MODEL_FILE_BIG cdef str small_part = f'{resource_name}.small'
small_file = <str> constants.AI_TENSOR_MODEL_FILE_SMALL
else:
big_file = <str>constants.AI_ONNX_MODEL_FILE_BIG
small_file = <str> constants.AI_ONNX_MODEL_FILE_SMALL
with open(big_file, 'rb') as binary_file: with open(<str>big_part, 'rb') as binary_file:
encrypted_bytes_big = binary_file.read() encrypted_bytes_big = binary_file.read()
print('read encrypted big file')
print(f'small file: {small_file}')
encrypted_bytes_small = self.load_bytes(small_file)
print('read encrypted small file')
encrypted_model_bytes = encrypted_bytes_small + encrypted_bytes_big
key = Security.get_model_encryption_key()
model_bytes = Security.decrypt_to(encrypted_model_bytes, key) encrypted_bytes_small = self.load_bytes(small_part, folder)
return model_bytes
encrypted_bytes = encrypted_bytes_small + encrypted_bytes_big
result = Security.decrypt_to(encrypted_bytes, key)
return result
cdef upload_big_small_resource(self, bytes resource, str resource_name, str folder, str key):
cdef str big_part_name = f'{resource_name}.big'
cdef str small_part_name = f'{resource_name}.small'
resource_encrypted = Security.encrypt_to(<bytes>resource, key)
part_small_size = min(constants.SMALL_SIZE_KB * 1024, int(0.3 * len(resource_encrypted)))
part_small = resource_encrypted[:part_small_size] # slice bytes for part1
part_big = resource_encrypted[part_small_size:]
self.cdn_manager.upload(<str>constants.MODELS_FOLDER, <str>big_part_name, part_big)
with open(path.join(<str>folder, <str>big_part_name), 'wb') as f:
f.write(part_big)
self.upload_file(small_part_name, part_small, constants.MODELS_FOLDER)
+1
View File
@@ -10,6 +10,7 @@ pyinstaller --name=azaion-inference ^
--collect-all onnxruntime ^ --collect-all onnxruntime ^
--collect-all tensorrt ^ --collect-all tensorrt ^
--collect-all pycuda ^ --collect-all pycuda ^
--collect-all pynvml ^
--collect-all re ^ --collect-all re ^
--hidden-import constants ^ --hidden-import constants ^
--hidden-import annotation ^ --hidden-import annotation ^
+14
View File
@@ -0,0 +1,14 @@
cdef class CDNCredentials:
cdef str host
cdef str downloader_access_key
cdef str downloader_access_secret
cdef str uploader_access_key
cdef str uploader_access_secret
cdef class CDNManager:
cdef CDNCredentials creds
cdef object download_client
cdef object upload_client
cdef upload(self, str bucket, str filename, bytes file_bytes)
cdef download(self, str bucket, str filename)
+41
View File
@@ -0,0 +1,41 @@
import io
import boto3
cdef class CDNCredentials:
def __init__(self, host, downloader_access_key, downloader_access_secret, uploader_access_key, uploader_access_secret):
self.host = host
self.downloader_access_key = downloader_access_key
self.downloader_access_secret = downloader_access_secret
self.uploader_access_key = uploader_access_key
self.uploader_access_secret = uploader_access_secret
cdef class CDNManager:
def __init__(self, CDNCredentials credentials):
self.creds = credentials
self.download_client = boto3.client('s3', endpoint_url=self.creds.host,
aws_access_key_id=self.creds.downloader_access_key,
aws_secret_access_key=self.creds.downloader_access_secret)
self.upload_client = boto3.client('s3', endpoint_url=self.creds.host,
aws_access_key_id=self.creds.uploader_access_key,
aws_secret_access_key=self.creds.uploader_access_secret)
cdef upload(self, str bucket, str filename, bytes file_bytes):
try:
self.upload_client.upload_fileobj(io.BytesIO(file_bytes), bucket, filename)
print(f'uploaded {filename} ({len(file_bytes)} bytes) to the {bucket}')
return True
except Exception as e:
print(e)
return False
cdef download(self, str bucket, str filename):
try:
self.download_client.download_file(bucket, filename, filename)
print(f'downloaded {filename} from the {bucket} to current folder')
return True
except Exception as e:
print(e)
return False
+4 -4
View File
@@ -7,12 +7,12 @@ cdef str ANNOTATIONS_QUEUE # Name of the annotations queue in rabbit
cdef str API_URL # Base URL for the external API cdef str API_URL # Base URL for the external API
cdef str QUEUE_CONFIG_FILENAME # queue config filename to load from api cdef str QUEUE_CONFIG_FILENAME # queue config filename to load from api
cdef str AI_ONNX_MODEL_FILE_BIG cdef str AI_ONNX_MODEL_FILE
cdef str AI_ONNX_MODEL_FILE_SMALL
cdef str AI_TENSOR_MODEL_FILE_BIG cdef str CDN_CONFIG
cdef str AI_TENSOR_MODEL_FILE_SMALL cdef str MODELS_FOLDER
cdef int SMALL_SIZE_KB
cdef bytes DONE_SIGNAL cdef bytes DONE_SIGNAL
+5 -4
View File
@@ -9,11 +9,12 @@ cdef str ANNOTATIONS_QUEUE = "azaion-annotations"
cdef str API_URL = "https://api.azaion.com" # Base URL for the external API cdef str API_URL = "https://api.azaion.com" # Base URL for the external API
cdef str QUEUE_CONFIG_FILENAME = "secured-config.json" cdef str QUEUE_CONFIG_FILENAME = "secured-config.json"
cdef str AI_ONNX_MODEL_FILE_BIG = "azaion.onnx.big" cdef str AI_ONNX_MODEL_FILE = "azaion.onnx"
cdef str AI_ONNX_MODEL_FILE_SMALL = "azaion.onnx.small"
cdef str AI_TENSOR_MODEL_FILE_BIG = "azaion.engine.big" cdef str CDN_CONFIG = "cdn.yaml"
cdef str AI_TENSOR_MODEL_FILE_SMALL = "azaion.engine.small" cdef str MODELS_FOLDER = "models"
cdef int SMALL_SIZE_KB = 3
cdef log(str log_message, bytes client_id=None): cdef log(str log_message, bytes client_id=None):
local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
-1
View File
@@ -1,7 +1,6 @@
cdef class Credentials: cdef class Credentials:
cdef public str email cdef public str email
cdef public str password cdef public str password
cdef public str folder
@staticmethod @staticmethod
cdef from_msgpack(bytes data) cdef from_msgpack(bytes data)
+2 -4
View File
@@ -2,16 +2,14 @@ from msgpack import unpackb
cdef class Credentials: cdef class Credentials:
def __init__(self, str email, str password, str folder): def __init__(self, str email, str password):
self.email = email self.email = email
self.password = password self.password = password
self.folder = folder
@staticmethod @staticmethod
cdef from_msgpack(bytes data): cdef from_msgpack(bytes data):
unpacked = unpackb(data, strict_map_key=False) unpacked = unpackb(data, strict_map_key=False)
return Credentials( return Credentials(
unpacked.get("Email"), unpacked.get("Email"),
unpacked.get("Password"), unpacked.get("Password"))
unpacked.get("Folder"))
+4
View File
@@ -17,7 +17,11 @@ cdef class Inference:
cdef int model_width cdef int model_width
cdef int model_height cdef int model_height
cdef build_tensor_engine(self)
cdef init_ai(self)
cdef bint is_building_engine
cdef bint is_video(self, str filepath) cdef bint is_video(self, str filepath)
cdef run_inference(self, RemoteCommand cmd) cdef run_inference(self, RemoteCommand cmd)
cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name) cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name)
cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths) cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
+35 -7
View File
@@ -1,14 +1,19 @@
import json import json
import mimetypes import mimetypes
import os
import subprocess import subprocess
import time
import cv2 import cv2
import numpy as np import numpy as np
cimport constants
from remote_command cimport RemoteCommand from remote_command cimport RemoteCommand
from annotation cimport Detection, Annotation from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig from ai_config cimport AIRecognitionConfig
from inference_engine cimport OnnxEngine, TensorRTEngine from inference_engine cimport OnnxEngine, TensorRTEngine
from hardware_service cimport HardwareService from hardware_service cimport HardwareService
from security cimport Security
cdef class Inference: cdef class Inference:
def __init__(self, api_client, on_annotation): def __init__(self, api_client, on_annotation):
@@ -20,18 +25,41 @@ cdef class Inference:
self.model_height = 0 self.model_height = 0
self.engine = None self.engine = None
self.class_names = None self.class_names = None
self.is_building_engine = False
def init_ai(self): cdef build_tensor_engine(self):
is_nvidia = HardwareService.has_nvidia_gpu()
if not is_nvidia:
return
engine_filename = TensorRTEngine.get_engine_filename()
key = Security.get_model_encryption_key()
models_dir = constants.MODELS_FOLDER
if not os.path.exists(os.path.join(<str> models_dir, f'{engine_filename}.big')):
self.is_building_engine = True
onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
self.is_building_engine = False
cdef init_ai(self):
if self.engine is not None: if self.engine is not None:
return return
is_nvidia = HardwareService.has_nvidia_gpu() is_nvidia = HardwareService.has_nvidia_gpu()
key = Security.get_model_encryption_key()
models_dir = constants.MODELS_FOLDER
if is_nvidia: if is_nvidia:
model_bytes = self.api_client.load_ai_model(is_tensor=True) while self.is_building_engine:
self.engine = TensorRTEngine(model_bytes, batch_size=4) time.sleep(1)
engine_filename = TensorRTEngine.get_engine_filename()
model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
self.engine = TensorRTEngine(model_bytes)
else: else:
model_bytes = self.api_client.load_ai_model() model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
self.engine = OnnxEngine(model_bytes, batch_size=4) self.engine = OnnxEngine(model_bytes)
self.model_height, self.model_width = self.engine.get_input_shape() self.model_height, self.model_width = self.engine.get_input_shape()
self.class_names = self.engine.get_class_names() self.class_names = self.engine.get_class_names()
@@ -135,7 +163,7 @@ cdef class Inference:
images.append(m) images.append(m)
# images first, it's faster # images first, it's faster
if len(images) > 0: if len(images) > 0:
for chunk in self.split_list_extend(images, ai_config.model_batch_size): for chunk in self.split_list_extend(images, self.engine.get_input_shape()):
print(f'run inference on {" ".join(chunk)}...') print(f'run inference on {" ".join(chunk)}...')
self._process_images(cmd, ai_config, chunk) self._process_images(cmd, ai_config, chunk)
if len(videos) > 0: if len(videos) > 0:
@@ -161,7 +189,7 @@ cdef class Inference:
batch_frames.append(frame) batch_frames.append(frame)
batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC))) batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC)))
if len(batch_frames) == ai_config.model_batch_size: if len(batch_frames) == self.engine.get_input_shape():
input_blob = self.preprocess(batch_frames) input_blob = self.preprocess(batch_frames)
outputs = self.engine.run(input_blob) outputs = self.engine.run(input_blob)
+9
View File
@@ -27,3 +27,12 @@ cdef class TensorRTEngine(InferenceEngine):
cdef object output_shape cdef object output_shape
cdef object h_output cdef object h_output
cdef object class_names cdef object class_names
@staticmethod
cdef bytes convert_from_onnx(bytes onnx_model)
@staticmethod
cdef unsigned long long get_gpu_memory_bytes(device_id=?)
@staticmethod
cdef str get_engine_filename(device_id=?)
+64 -20
View File
@@ -6,6 +6,7 @@ import onnxruntime as onnx
import tensorrt as trt import tensorrt as trt
import pycuda.driver as cuda import pycuda.driver as cuda
import pycuda.autoinit # required for automatically initialize CUDA, do not remove. import pycuda.autoinit # required for automatically initialize CUDA, do not remove.
import pynvml
cdef class InferenceEngine: cdef class InferenceEngine:
@@ -28,13 +29,13 @@ cdef class InferenceEngine:
cdef class OnnxEngine(InferenceEngine): cdef class OnnxEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs): def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs):
super().__init__(model_bytes, batch_size) super().__init__(model_bytes, batch_size)
self.batch_size = batch_size
self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
self.model_inputs = self.session.get_inputs() self.model_inputs = self.session.get_inputs()
self.input_name = self.model_inputs[0].name self.input_name = self.model_inputs[0].name
self.input_shape = self.model_inputs[0].shape self.input_shape = self.model_inputs[0].shape
if self.input_shape[0] != -1: self.batch_size = self.input_shape[0] if self.input_shape[0] != -1 else batch_size
self.batch_size = self.input_shape[0]
print(f'AI detection model input: {self.model_inputs} {self.input_shape}') print(f'AI detection model input: {self.model_inputs} {self.input_shape}')
model_meta = self.session.get_modelmeta() model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map) print("Metadata:", model_meta.custom_metadata_map)
@@ -57,25 +58,12 @@ cdef class OnnxEngine(InferenceEngine):
cdef class TensorRTEngine(InferenceEngine): cdef class TensorRTEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs): def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs):
super().__init__(model_bytes, batch_size) super().__init__(model_bytes, batch_size)
self.batch_size = batch_size
print('Enter init TensorRT') print('Enter init TensorRT')
try: try:
logger = trt.Logger(trt.Logger.WARNING) logger = trt.Logger(trt.Logger.WARNING)
metadata_len = struct.unpack("<I", model_bytes[:4])[0]
try:
metadata = json.loads(model_bytes[4:4 + metadata_len])
print(f"Model metadata: {json.dumps(metadata, indent=2)}")
string_dict = metadata['names']
self.class_names = {int(k): v for k, v in string_dict.items()}
except json.JSONDecodeError:
print(f"Failed to parse metadata")
return
engine_data = model_bytes[4 + metadata_len:]
runtime = trt.Runtime(logger) runtime = trt.Runtime(logger)
engine = runtime.deserialize_cuda_engine(engine_data) engine = runtime.deserialize_cuda_engine(model_bytes)
if engine is None: if engine is None:
raise RuntimeError(f"Failed to load TensorRT engine from bytes") raise RuntimeError(f"Failed to load TensorRT engine from bytes")
@@ -84,8 +72,7 @@ cdef class TensorRTEngine(InferenceEngine):
# input # input
self.input_name = engine.get_tensor_name(0) self.input_name = engine.get_tensor_name(0)
engine_input_shape = engine.get_tensor_shape(self.input_name) engine_input_shape = engine.get_tensor_shape(self.input_name)
if engine_input_shape[0] != -1: self.batch_size = self.input_shape[0] if self.input_shape[0] != -1 else batch_size
self.batch_size = engine_input_shape[0]
self.input_shape = [ self.input_shape = [
self.batch_size, self.batch_size,
@@ -101,7 +88,7 @@ cdef class TensorRTEngine(InferenceEngine):
self.output_name = engine.get_tensor_name(1) self.output_name = engine.get_tensor_name(1)
engine_output_shape = tuple(engine.get_tensor_shape(self.output_name)) engine_output_shape = tuple(engine.get_tensor_shape(self.output_name))
self.output_shape = [ self.output_shape = [
batch_size if self.input_shape[0] == -1 else self.input_shape[0], self.batch_size,
300 if engine_output_shape[1] == -1 else engine_output_shape[1], # max detections number 300 if engine_output_shape[1] == -1 else engine_output_shape[1], # max detections number
6 if engine_output_shape[2] == -1 else engine_output_shape[2] # x1 y1 x2 y2 conf cls 6 if engine_output_shape[2] == -1 else engine_output_shape[2] # x1 y1 x2 y2 conf cls
] ]
@@ -113,6 +100,63 @@ cdef class TensorRTEngine(InferenceEngine):
except Exception as e: except Exception as e:
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}") raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
@staticmethod
cdef unsigned long long get_gpu_memory_bytes(device_id=0):
total_memory = None
try:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total_memory = mem_info.total
except pynvml.NVMLError:
total_memory = None
finally:
try:
pynvml.nvmlShutdown()
except pynvml.NVMLError:
pass
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb
@staticmethod
cdef str get_engine_filename(device_id=0):
try:
device = cuda.Device(device_id)
sm_count = device.multiprocessor_count
cc_major, cc_minor = device.compute_capability()
return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
except Exception:
return None
@staticmethod
cdef bytes convert_from_onnx(bytes onnx_model):
cdef unsigned long long workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes() * 0.9)
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
trt_logger = trt.Logger(trt.Logger.WARNING)
with trt.Builder(trt_logger) as builder, \
builder.create_network(explicit_batch_flag) as network, \
trt.OnnxParser(network, trt_logger) as parser, \
builder.create_builder_config() as config:
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
if not parser.parse(onnx_model):
return None
if builder.platform_has_fast_fp16:
print('Converting to supported fp16')
config.set_flag(trt.BuilderFlag.FP16)
else:
print('Converting to supported fp32. (fp16 is not supported)')
plan = builder.build_serialized_network(network, config)
if plan is None:
print('Conversion failed.')
return None
return bytes(plan)
cdef tuple get_input_shape(self): cdef tuple get_input_shape(self):
return self.input_shape[2], self.input_shape[3] return self.input_shape[2], self.input_shape[3]
+3 -2
View File
@@ -4,6 +4,8 @@ from queue import Queue
cimport constants cimport constants
from threading import Thread from threading import Thread
import yaml
from api_client cimport ApiClient from api_client cimport ApiClient
from annotation cimport Annotation from annotation cimport Annotation
from inference cimport Inference from inference cimport Inference
@@ -59,9 +61,8 @@ cdef class CommandProcessor:
print(f"Error handling client: {e}") print(f"Error handling client: {e}")
cdef login(self, RemoteCommand command): cdef login(self, RemoteCommand command):
cdef User user
self.api_client.set_credentials(Credentials.from_msgpack(command.data)) self.api_client.set_credentials(Credentials.from_msgpack(command.data))
user = self.api_client.get_user() Thread(target=self.inference.build_tensor_engine).start() # build AI engine in non-blocking thread
cdef load_file(self, RemoteCommand command): cdef load_file(self, RemoteCommand command):
cdef FileData file_data = FileData.from_msgpack(command.data) cdef FileData file_data = FileData.from_msgpack(command.data)
+2
View File
@@ -12,3 +12,5 @@ requests
pyyaml pyyaml
pycuda pycuda
tensorrt tensorrt
pynvml
boto3
+1
View File
@@ -12,6 +12,7 @@ extensions = [
Extension('remote_command', ['remote_command.pyx']), Extension('remote_command', ['remote_command.pyx']),
Extension('remote_command_handler', ['remote_command_handler.pyx']), Extension('remote_command_handler', ['remote_command_handler.pyx']),
Extension('user', ['user.pyx']), Extension('user', ['user.pyx']),
Extension('cdn_manager', ['cdn_manager.pyx']),
Extension('api_client', ['api_client.pyx']), Extension('api_client', ['api_client.pyx']),
Extension('ai_config', ['ai_config.pyx']), Extension('ai_config', ['ai_config.pyx']),
Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()]), Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()]),
+4 -5
View File
@@ -89,21 +89,20 @@ public partial class App
{ {
new ConfigUpdater().CheckConfig(); new ConfigUpdater().CheckConfig();
var secureAppConfig = ReadSecureAppConfig(); var secureAppConfig = ReadSecureAppConfig();
var apiDir = secureAppConfig.DirectoriesConfig.ApiResourcesDirectory;
_inferenceClient = new InferenceClient(new OptionsWrapper<InferenceClientConfig>(secureAppConfig.InferenceClientConfig)); _inferenceClient = new InferenceClient(new OptionsWrapper<InferenceClientConfig>(secureAppConfig.InferenceClientConfig));
_resourceLoader = new ResourceLoader(_inferenceClient); _resourceLoader = new ResourceLoader(_inferenceClient);
var login = new Login(); var login = new Login();
login.CredentialsEntered += async (_, credentials) => login.CredentialsEntered += async (_, credentials) =>
{ {
credentials.Folder = secureAppConfig.DirectoriesConfig.ApiResourcesDirectory;
_inferenceClient.Send(RemoteCommand.Create(CommandType.Login, credentials)); _inferenceClient.Send(RemoteCommand.Create(CommandType.Login, credentials));
_azaionApi = new AzaionApi(new HttpClient { BaseAddress = new Uri(SecurityConstants.API_URL) }, _cache, credentials, _hardwareService); _azaionApi = new AzaionApi(new HttpClient { BaseAddress = new Uri(SecurityConstants.API_URL) }, _cache, credentials, _hardwareService);
try try
{ {
_securedConfig = _resourceLoader.LoadFile("config.secured.json"); _securedConfig = _resourceLoader.LoadFile("config.secured.json", apiDir);
_systemConfig = _resourceLoader.LoadFile("config.system.json"); _systemConfig = _resourceLoader.LoadFile("config.system.json", apiDir);
} }
catch (Exception e) catch (Exception e)
{ {
@@ -125,7 +124,7 @@ public partial class App
{ {
try try
{ {
var stream = _resourceLoader.LoadFile($"{assemblyName}.dll"); var stream = _resourceLoader.LoadFile($"{assemblyName}.dll", apiDir);
return Assembly.Load(stream.ToArray()); return Assembly.Load(stream.ToArray());
} }
catch (Exception e) catch (Exception e)
+2 -2
View File
@@ -74,7 +74,7 @@
BorderBrush="DimGray" BorderBrush="DimGray"
BorderThickness="0,0,0,1" BorderThickness="0,0,0,1"
HorizontalAlignment="Left" HorizontalAlignment="Left"
Text="" Text="admin@azaion.com"
/> />
<TextBlock Text="Пароль" <TextBlock Text="Пароль"
Grid.Row="2" Grid.Row="2"
@@ -89,7 +89,7 @@
Width="300" Width="300"
BorderThickness="0,0,0,1" BorderThickness="0,0,0,1"
HorizontalAlignment="Left" HorizontalAlignment="Left"
Password=""/> Password="Az@1on1000Odm$n"/>
</Grid> </Grid>
<Button x:Name="LoginBtn" <Button x:Name="LoginBtn"
Content="Вхід" Content="Вхід"
-7
View File
@@ -1,7 +0,0 @@
python -m venv venv
venv\Scripts\pip install -r requirements.txt
venv\Scripts\pyinstaller --onefile --collect-all boto3 cdn_manager.py
move dist\cdn_manager.exe .\cdn_manager.exe
rmdir /s /q dist
rmdir /s /q build
+19 -3
View File
@@ -36,8 +36,24 @@ class CDNManager:
def download(self, bucket: str, filename: str): def download(self, bucket: str, filename: str):
try: try:
self.download_client.download_file(bucket, filename, filename) if filename is not None:
print(f'downloaded {filename} from the {bucket} to current folder') self.download_client.download_file(bucket, filename, os.path.join(bucket, filename))
print(f'downloaded {filename} from the {bucket} to current folder')
else:
response = self.download_client.list_objects_v2(Bucket=bucket)
if 'Contents' in response:
for obj in response['Contents']:
object_key = obj['Key']
local_filepath = os.path.join(bucket, object_key)
local_dir = os.path.dirname(local_filepath)
if local_dir:
os.makedirs(local_dir, exist_ok=True)
if not object_key.endswith('/'):
try:
self.download_client.download_file(bucket, object_key, local_filepath)
except Exception as e_file:
all_successful = False # Mark as failed if any file fails
return True return True
except Exception as e: except Exception as e:
print(e) print(e)
@@ -58,7 +74,7 @@ cdn_manager = CDNManager(CDNCredentials(
input_action = sys.argv[1] input_action = sys.argv[1]
input_bucket = sys.argv[2] input_bucket = sys.argv[2]
input_filename = sys.argv[3] input_filename = sys.argv[3] if len(sys.argv) > 3 else None
if len(sys.argv) > 4: # 0 is this script's path, hence 5 args is max if len(sys.argv) > 4: # 0 is this script's path, hence 5 args is max
input_path = sys.argv[4] input_path = sys.argv[4]
+2 -2
View File
@@ -43,6 +43,7 @@ venv\Scripts\pyinstaller --name=azaion-inference ^
--collect-all onnxruntime ^ --collect-all onnxruntime ^
--collect-all tensorrt ^ --collect-all tensorrt ^
--collect-all pycuda ^ --collect-all pycuda ^
--collect-all pynvml ^
--collect-all re ^ --collect-all re ^
--hidden-import constants ^ --hidden-import constants ^
--hidden-import annotation ^ --hidden-import annotation ^
@@ -71,8 +72,7 @@ cd..
echo Download onnx model echo Download onnx model
cd build cd build
call cdn_manager.exe download models azaion.onnx.big call cdn_manager.exe download models
call cdn_manager.exe download models azaion.engine.big
move azaion.* ..\dist\ move azaion.* ..\dist\
cd.. cd..