read cdn yaml config from api

automate tensorrt model conversion in case of no existing one for user's gpu
2026-06-22 21:51:06 +00:00 · 2025-04-23 23:20:08 +03:00
parent c68c293448
commit e798af470b
23 changed files with 265 additions and 93 deletions
@@ -11,7 +11,7 @@ public interface IAzaionApi
    ApiCredentials Credentials { get; }
    User CurrentUser { get; }
    void UpdateOffsets(UserQueueOffsets offsets);
-    Stream GetResource(string filename);
+    Stream GetResource(string filename, string folder);
 }

 public class AzaionApi(HttpClient client, ICache cache, ApiCredentials credentials, IHardwareService hardwareService) : IAzaionApi
@@ -32,11 +32,11 @@ public class AzaionApi(HttpClient client, ICache cache, ApiCredentials credentia
        }
    }

-    public Stream GetResource(string filename)
+    public Stream GetResource(string filename, string folder)
    {
        var hardware = cache.GetFromCache(SecurityConstants.HARDWARE_INFO_KEY, hardwareService.GetHardware);

-        var response = Send(new HttpRequestMessage(HttpMethod.Post, $"/resources/get/{credentials.Folder}")
+        var response = Send(new HttpRequestMessage(HttpMethod.Post, $"/resources/get/{folder}")
        {
            Content = new StringContent(JsonConvert.SerializeObject(new { filename, credentials.Password, hardware }), Encoding.UTF8, APP_JSON)
        });
@@ -45,7 +45,7 @@ public class InferenceClient : IInferenceClient

            process.OutputDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); };
            process.ErrorDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); };
-            process.Start();
+            //process.Start();
        }
        catch (Exception e)
        {
@@ -1,9 +1,11 @@
 from user cimport User
 from credentials cimport Credentials
+from cdn_manager cimport CDNManager


 cdef class ApiClient:
    cdef Credentials credentials
+    cdef CDNManager cdn_manager
    cdef str token, folder, api_url
    cdef User user

@@ -12,6 +14,7 @@ cdef class ApiClient:
    cdef set_token(self, str token)
    cdef get_user(self)

-    cdef load_bytes(self, str filename, str folder=*)
-    cdef upload_file(self, str filename, str folder=*)
-    cdef load_ai_model(self, bint is_tensor=*)
+    cdef load_bytes(self, str filename, str folder)
+    cdef upload_file(self, str filename, bytes resource, str folder)
+    cdef load_big_small_resource(self, str resource_name, str folder, str key)
+    cdef upload_big_small_resource(self, bytes resource, str resource_name, str folder, str key)
@@ -1,12 +1,15 @@
 import json
 from http import HTTPStatus
+from os import path
 from uuid import UUID
 import jwt
 import requests
 cimport constants
+import yaml
+
+from cdn_manager cimport CDNManager, CDNCredentials
 from hardware_service cimport HardwareService, HardwareInfo
 from security cimport Security
-from io import BytesIO
 from user cimport User, RoleEnum

 cdef class ApiClient:
@@ -15,9 +18,19 @@ cdef class ApiClient:
        self.credentials = None
        self.user = None
        self.token = None
+        self.cdn_manager = None

    cdef set_credentials(self, Credentials credentials):
        self.credentials = credentials
+        yaml_bytes = self.load_bytes(constants.CDN_CONFIG, <str>'')
+        yaml_config = yaml.safe_load(yaml_bytes)
+        creds = CDNCredentials(yaml_config["host"],
+                               yaml_config["downloader_access_key"],
+                               yaml_config["downloader_access_secret"],
+                               yaml_config["uploader_access_key"],
+                               yaml_config["uploader_access_secret"])
+
+        self.cdn_manager = CDNManager(creds)

    cdef login(self):
        response = requests.post(f"{constants.API_URL}/login",
@@ -57,13 +70,12 @@ cdef class ApiClient:
            self.login()
        return self.user

-    cdef upload_file(self, str filename, str folder=None):
-        folder = folder or self.credentials.folder
+    cdef upload_file(self, str filename,  bytes resource, str folder):
        if self.token is None:
            self.login()
        url = f"{constants.API_URL}/resources/{folder}"
        headers = { "Authorization": f"Bearer {self.token}" }
-        files = dict(data=open(<str>filename, 'rb'))
+        files = {'data': (filename, resource)}
        try:
            r = requests.post(url, headers=headers, files=files, allow_redirects=True)
            r.raise_for_status()
@@ -71,9 +83,7 @@ cdef class ApiClient:
        except Exception as e:
            print(f"Upload fail: {e}")

-    cdef load_bytes(self, str filename, str folder=None):
-        folder = folder or self.credentials.folder
-
+    cdef load_bytes(self, str filename, str folder):
        hardware_service = HardwareService()
        cdef HardwareInfo hardware = hardware_service.get_hardware_info()

@@ -111,22 +121,30 @@ cdef class ApiClient:
        constants.log(<str>f'Downloaded file: {filename}, {len(data)} bytes')
        return data

-    cdef load_ai_model(self, bint is_tensor=False):
-        if is_tensor:
-            big_file = <str> constants.AI_TENSOR_MODEL_FILE_BIG
-            small_file = <str> constants.AI_TENSOR_MODEL_FILE_SMALL
-        else:
-            big_file = <str>constants.AI_ONNX_MODEL_FILE_BIG
-            small_file = <str> constants.AI_ONNX_MODEL_FILE_SMALL
+    cdef load_big_small_resource(self, str resource_name, str folder, str key):
+        cdef str big_part = path.join(<str>folder, f'{resource_name}.big')
+        cdef str small_part = f'{resource_name}.small'

-        with open(big_file, 'rb') as binary_file:
+        with open(<str>big_part, 'rb') as binary_file:
            encrypted_bytes_big = binary_file.read()
-        print('read encrypted big file')
-        print(f'small file: {small_file}')
-        encrypted_bytes_small = self.load_bytes(small_file)
-        print('read encrypted small file')
-        encrypted_model_bytes = encrypted_bytes_small + encrypted_bytes_big
-        key = Security.get_model_encryption_key()

-        model_bytes = Security.decrypt_to(encrypted_model_bytes, key)
-        return model_bytes
+        encrypted_bytes_small = self.load_bytes(small_part, folder)
+
+        encrypted_bytes = encrypted_bytes_small + encrypted_bytes_big
+        result = Security.decrypt_to(encrypted_bytes, key)
+        return result
+
+    cdef upload_big_small_resource(self, bytes resource, str resource_name, str folder, str key):
+        cdef str big_part_name = f'{resource_name}.big'
+        cdef str small_part_name = f'{resource_name}.small'
+
+        resource_encrypted = Security.encrypt_to(<bytes>resource, key)
+        part_small_size = min(constants.SMALL_SIZE_KB * 1024, int(0.3 * len(resource_encrypted)))
+        part_small = resource_encrypted[:part_small_size]  # slice bytes for part1
+
+        part_big = resource_encrypted[part_small_size:]
+
+        self.cdn_manager.upload(<str>constants.MODELS_FOLDER, <str>big_part_name, part_big)
+        with open(path.join(<str>folder, <str>big_part_name), 'wb') as f:
+            f.write(part_big)
+        self.upload_file(small_part_name, part_small, constants.MODELS_FOLDER)
@@ -10,6 +10,7 @@ pyinstaller --name=azaion-inference ^
 --collect-all onnxruntime ^
 --collect-all tensorrt ^
 --collect-all pycuda ^
+--collect-all pynvml ^
 --collect-all re ^
 --hidden-import constants ^
 --hidden-import annotation ^
@@ -0,0 +1,14 @@
+cdef class CDNCredentials:
+    cdef str host
+    cdef str downloader_access_key
+    cdef str downloader_access_secret
+    cdef str uploader_access_key
+    cdef str uploader_access_secret
+
+cdef class CDNManager:
+    cdef CDNCredentials creds
+    cdef object download_client
+    cdef object upload_client
+
+    cdef upload(self, str bucket, str filename, bytes file_bytes)
+    cdef download(self, str bucket, str filename)
@@ -0,0 +1,41 @@
+import io
+import boto3
+
+
+cdef class CDNCredentials:
+    def __init__(self, host, downloader_access_key, downloader_access_secret, uploader_access_key, uploader_access_secret):
+        self.host = host
+        self.downloader_access_key = downloader_access_key
+        self.downloader_access_secret = downloader_access_secret
+        self.uploader_access_key = uploader_access_key
+        self.uploader_access_secret = uploader_access_secret
+
+
+cdef class CDNManager:
+    def __init__(self, CDNCredentials credentials):
+
+        self.creds = credentials
+        self.download_client = boto3.client('s3', endpoint_url=self.creds.host,
+                                            aws_access_key_id=self.creds.downloader_access_key,
+                                            aws_secret_access_key=self.creds.downloader_access_secret)
+        self.upload_client = boto3.client('s3', endpoint_url=self.creds.host,
+                                          aws_access_key_id=self.creds.uploader_access_key,
+                                          aws_secret_access_key=self.creds.uploader_access_secret)
+
+    cdef upload(self, str bucket, str filename, bytes file_bytes):
+        try:
+            self.upload_client.upload_fileobj(io.BytesIO(file_bytes), bucket, filename)
+            print(f'uploaded {filename} ({len(file_bytes)} bytes) to the {bucket}')
+            return True
+        except Exception as e:
+            print(e)
+            return False
+
+    cdef download(self, str bucket, str filename):
+        try:
+            self.download_client.download_file(bucket, filename, filename)
+            print(f'downloaded {filename} from the {bucket} to current folder')
+            return True
+        except Exception as e:
+            print(e)
+            return False
@@ -7,12 +7,12 @@ cdef str ANNOTATIONS_QUEUE     # Name of the annotations queue in rabbit
 cdef str API_URL               # Base URL for the external API
 cdef str QUEUE_CONFIG_FILENAME # queue config filename to load from api

-cdef str AI_ONNX_MODEL_FILE_BIG
-cdef str AI_ONNX_MODEL_FILE_SMALL
+cdef str AI_ONNX_MODEL_FILE

-cdef str AI_TENSOR_MODEL_FILE_BIG
-cdef str AI_TENSOR_MODEL_FILE_SMALL
+cdef str CDN_CONFIG
+cdef str MODELS_FOLDER

+cdef int SMALL_SIZE_KB

 cdef bytes DONE_SIGNAL

@@ -9,11 +9,12 @@ cdef str ANNOTATIONS_QUEUE = "azaion-annotations"
 cdef str API_URL = "https://api.azaion.com"  # Base URL for the external API
 cdef str QUEUE_CONFIG_FILENAME = "secured-config.json"

-cdef str AI_ONNX_MODEL_FILE_BIG = "azaion.onnx.big"
-cdef str AI_ONNX_MODEL_FILE_SMALL = "azaion.onnx.small"
+cdef str AI_ONNX_MODEL_FILE = "azaion.onnx"

-cdef str AI_TENSOR_MODEL_FILE_BIG = "azaion.engine.big"
-cdef str AI_TENSOR_MODEL_FILE_SMALL = "azaion.engine.small"
+cdef str CDN_CONFIG = "cdn.yaml"
+cdef str MODELS_FOLDER = "models"
+
+cdef int SMALL_SIZE_KB = 3

 cdef log(str log_message, bytes client_id=None):
    local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
@@ -1,7 +1,6 @@
 cdef class Credentials:
    cdef public str email
    cdef public str password
-    cdef public str folder

    @staticmethod
    cdef from_msgpack(bytes data)
@@ -2,16 +2,14 @@ from msgpack import unpackb

 cdef class Credentials:

-    def __init__(self, str email, str password, str folder):
+    def __init__(self, str email, str password):
        self.email = email
        self.password = password
-        self.folder = folder

    @staticmethod
    cdef from_msgpack(bytes data):
        unpacked = unpackb(data, strict_map_key=False)
        return Credentials(
            unpacked.get("Email"),
-            unpacked.get("Password"),
-            unpacked.get("Folder"))
+            unpacked.get("Password"))

@@ -17,7 +17,11 @@ cdef class Inference:
    cdef int model_width
    cdef int model_height

+    cdef build_tensor_engine(self)
+    cdef init_ai(self)
+    cdef bint is_building_engine
    cdef bint is_video(self, str filepath)
+
    cdef run_inference(self, RemoteCommand cmd)
    cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name)
    cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
@@ -1,14 +1,19 @@
 import json
 import mimetypes
+import os
 import subprocess
+import time

 import cv2
 import numpy as np
+
+cimport constants
 from remote_command cimport RemoteCommand
 from annotation cimport Detection, Annotation
 from ai_config cimport AIRecognitionConfig
 from inference_engine cimport OnnxEngine, TensorRTEngine
 from hardware_service cimport HardwareService
+from security cimport Security

 cdef class Inference:
    def __init__(self, api_client, on_annotation):
@@ -20,18 +25,41 @@ cdef class Inference:
        self.model_height = 0
        self.engine = None
        self.class_names = None
+        self.is_building_engine = False

-    def init_ai(self):
+    cdef build_tensor_engine(self):
+        is_nvidia = HardwareService.has_nvidia_gpu()
+        if not is_nvidia:
+            return
+
+        engine_filename = TensorRTEngine.get_engine_filename()
+        key = Security.get_model_encryption_key()
+        models_dir = constants.MODELS_FOLDER
+        if not os.path.exists(os.path.join(<str> models_dir, f'{engine_filename}.big')):
+            self.is_building_engine = True
+            onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
+            model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
+            self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
+            self.is_building_engine = False
+
+
+    cdef init_ai(self):
        if self.engine is not None:
            return

        is_nvidia = HardwareService.has_nvidia_gpu()
+        key = Security.get_model_encryption_key()
+        models_dir = constants.MODELS_FOLDER
        if is_nvidia:
-            model_bytes = self.api_client.load_ai_model(is_tensor=True)
-            self.engine = TensorRTEngine(model_bytes, batch_size=4)
+            while self.is_building_engine:
+                time.sleep(1)
+            engine_filename = TensorRTEngine.get_engine_filename()
+            model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
+            self.engine = TensorRTEngine(model_bytes)
+
        else:
-            model_bytes = self.api_client.load_ai_model()
-            self.engine = OnnxEngine(model_bytes, batch_size=4)
+            model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
+            self.engine = OnnxEngine(model_bytes)

        self.model_height, self.model_width = self.engine.get_input_shape()
        self.class_names = self.engine.get_class_names()
@@ -135,7 +163,7 @@ cdef class Inference:
                images.append(m)
        # images first, it's faster
        if len(images) > 0:
-            for chunk in self.split_list_extend(images, ai_config.model_batch_size):
+            for chunk in self.split_list_extend(images, self.engine.get_input_shape()):
                print(f'run inference on {" ".join(chunk)}...')
                self._process_images(cmd, ai_config, chunk)
        if len(videos) > 0:
@@ -161,7 +189,7 @@ cdef class Inference:
                batch_frames.append(frame)
                batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC)))

-            if len(batch_frames) == ai_config.model_batch_size:
+            if len(batch_frames) == self.engine.get_input_shape():
                input_blob = self.preprocess(batch_frames)

                outputs = self.engine.run(input_blob)
@@ -27,3 +27,12 @@ cdef class TensorRTEngine(InferenceEngine):
    cdef object output_shape
    cdef object h_output
    cdef object class_names
+
+    @staticmethod
+    cdef bytes convert_from_onnx(bytes onnx_model)
+
+    @staticmethod
+    cdef unsigned long long get_gpu_memory_bytes(device_id=?)
+
+    @staticmethod
+    cdef str get_engine_filename(device_id=?)
@@ -6,6 +6,7 @@ import onnxruntime as onnx
 import tensorrt as trt
 import pycuda.driver as cuda
 import pycuda.autoinit # required for automatically initialize CUDA, do not remove.
+import pynvml


 cdef class InferenceEngine:
@@ -28,13 +29,13 @@ cdef class InferenceEngine:
 cdef class OnnxEngine(InferenceEngine):
    def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs):
        super().__init__(model_bytes, batch_size)
-        self.batch_size = batch_size
+
+
        self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
        self.model_inputs = self.session.get_inputs()
        self.input_name = self.model_inputs[0].name
        self.input_shape = self.model_inputs[0].shape
-        if self.input_shape[0] != -1:
-            self.batch_size = self.input_shape[0]
+        self.batch_size = self.input_shape[0] if self.input_shape[0] != -1 else batch_size
        print(f'AI detection model input: {self.model_inputs} {self.input_shape}')
        model_meta = self.session.get_modelmeta()
        print("Metadata:", model_meta.custom_metadata_map)
@@ -57,25 +58,12 @@ cdef class OnnxEngine(InferenceEngine):
 cdef class TensorRTEngine(InferenceEngine):
    def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs):
        super().__init__(model_bytes, batch_size)
-        self.batch_size = batch_size
        print('Enter init TensorRT')
        try:
            logger = trt.Logger(trt.Logger.WARNING)

-            metadata_len = struct.unpack("<I", model_bytes[:4])[0]
-            try:
-                metadata = json.loads(model_bytes[4:4 + metadata_len])
-                print(f"Model metadata: {json.dumps(metadata, indent=2)}")
-                string_dict = metadata['names']
-                self.class_names = {int(k): v for k, v in string_dict.items()}
-            except json.JSONDecodeError:
-                print(f"Failed to parse metadata")
-                return
-            engine_data = model_bytes[4 + metadata_len:]
-
-
            runtime = trt.Runtime(logger)
-            engine = runtime.deserialize_cuda_engine(engine_data)
+            engine = runtime.deserialize_cuda_engine(model_bytes)

            if engine is None:
                raise RuntimeError(f"Failed to load TensorRT engine from bytes")
@@ -84,8 +72,7 @@ cdef class TensorRTEngine(InferenceEngine):
            # input
            self.input_name = engine.get_tensor_name(0)
            engine_input_shape = engine.get_tensor_shape(self.input_name)
-            if engine_input_shape[0] != -1:
-                self.batch_size = engine_input_shape[0]
+            self.batch_size = self.input_shape[0] if self.input_shape[0] != -1 else batch_size

            self.input_shape = [
                self.batch_size,
@@ -101,7 +88,7 @@ cdef class TensorRTEngine(InferenceEngine):
            self.output_name = engine.get_tensor_name(1)
            engine_output_shape = tuple(engine.get_tensor_shape(self.output_name))
            self.output_shape = [
-                batch_size if self.input_shape[0] == -1 else self.input_shape[0],
+                self.batch_size,
                300 if engine_output_shape[1] == -1 else engine_output_shape[1],  # max detections number
                6 if engine_output_shape[2] == -1 else engine_output_shape[2]  # x1 y1 x2 y2 conf cls
            ]
@@ -113,6 +100,63 @@ cdef class TensorRTEngine(InferenceEngine):
        except Exception as e:
            raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")

+    @staticmethod
+    cdef unsigned long long get_gpu_memory_bytes(device_id=0):
+        total_memory = None
+        try:
+            pynvml.nvmlInit()
+            handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
+            mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            total_memory = mem_info.total
+        except pynvml.NVMLError:
+            total_memory = None
+        finally:
+            try:
+                pynvml.nvmlShutdown()
+            except pynvml.NVMLError:
+                pass
+        return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb
+
+    @staticmethod
+    cdef str get_engine_filename(device_id=0):
+        try:
+            device = cuda.Device(device_id)
+            sm_count = device.multiprocessor_count
+            cc_major, cc_minor = device.compute_capability()
+            return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
+        except Exception:
+            return None
+
+    @staticmethod
+    cdef bytes convert_from_onnx(bytes onnx_model):
+        cdef unsigned long long workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes() * 0.9)
+
+        explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        trt_logger = trt.Logger(trt.Logger.WARNING)
+
+        with trt.Builder(trt_logger) as builder, \
+                builder.create_network(explicit_batch_flag) as network, \
+                trt.OnnxParser(network, trt_logger) as parser, \
+                builder.create_builder_config() as config:
+
+            config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
+
+            if not parser.parse(onnx_model):
+                return None
+
+            if builder.platform_has_fast_fp16:
+                print('Converting to supported fp16')
+                config.set_flag(trt.BuilderFlag.FP16)
+            else:
+                print('Converting to supported fp32. (fp16 is not supported)')
+            plan = builder.build_serialized_network(network, config)
+
+            if plan is None:
+                print('Conversion failed.')
+                return None
+
+            return bytes(plan)
+
    cdef tuple get_input_shape(self):
        return self.input_shape[2], self.input_shape[3]

@@ -4,6 +4,8 @@ from queue import Queue
 cimport constants
 from threading import Thread

+import yaml
+
 from api_client cimport ApiClient
 from annotation cimport Annotation
 from inference cimport Inference
@@ -59,9 +61,8 @@ cdef class CommandProcessor:
            print(f"Error handling client: {e}")

    cdef login(self, RemoteCommand command):
-        cdef User user
        self.api_client.set_credentials(Credentials.from_msgpack(command.data))
-        user = self.api_client.get_user()
+        Thread(target=self.inference.build_tensor_engine).start() # build AI engine in non-blocking thread

    cdef load_file(self, RemoteCommand command):
        cdef FileData file_data = FileData.from_msgpack(command.data)
@@ -12,3 +12,5 @@ requests
 pyyaml
 pycuda
 tensorrt
+pynvml
+boto3
@@ -12,6 +12,7 @@ extensions = [
    Extension('remote_command', ['remote_command.pyx']),
    Extension('remote_command_handler', ['remote_command_handler.pyx']),
    Extension('user', ['user.pyx']),
+    Extension('cdn_manager', ['cdn_manager.pyx']),
    Extension('api_client', ['api_client.pyx']),
    Extension('ai_config', ['ai_config.pyx']),
    Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()]),
@@ -89,21 +89,20 @@ public partial class App
    {
        new ConfigUpdater().CheckConfig();
        var secureAppConfig = ReadSecureAppConfig();
+        var apiDir = secureAppConfig.DirectoriesConfig.ApiResourcesDirectory;
        _inferenceClient = new InferenceClient(new OptionsWrapper<InferenceClientConfig>(secureAppConfig.InferenceClientConfig));
        _resourceLoader = new ResourceLoader(_inferenceClient);
        var login = new Login();

        login.CredentialsEntered += async (_, credentials) =>
        {
-            credentials.Folder = secureAppConfig.DirectoriesConfig.ApiResourcesDirectory;
-
            _inferenceClient.Send(RemoteCommand.Create(CommandType.Login, credentials));
            _azaionApi = new AzaionApi(new HttpClient { BaseAddress = new Uri(SecurityConstants.API_URL) }, _cache, credentials, _hardwareService);

            try
            {
-                _securedConfig = _resourceLoader.LoadFile("config.secured.json");
-                _systemConfig = _resourceLoader.LoadFile("config.system.json");
+                _securedConfig = _resourceLoader.LoadFile("config.secured.json", apiDir);
+                _systemConfig = _resourceLoader.LoadFile("config.system.json", apiDir);
            }
            catch (Exception e)
            {
@@ -125,7 +124,7 @@ public partial class App
                {
                    try
                    {
-                        var stream = _resourceLoader.LoadFile($"{assemblyName}.dll");
+                        var stream = _resourceLoader.LoadFile($"{assemblyName}.dll", apiDir);
                        return Assembly.Load(stream.ToArray());
                    }
                    catch (Exception e)
@@ -74,7 +74,7 @@
                  BorderBrush="DimGray"
                  BorderThickness="0,0,0,1"
                  HorizontalAlignment="Left"
-                  Text=""
+                  Text="admin@azaion.com"
                  />
                <TextBlock Text="Пароль"
                   Grid.Row="2"
@@ -89,7 +89,7 @@
                   Width="300"
                   BorderThickness="0,0,0,1"
                   HorizontalAlignment="Left"
-                   Password=""/>
+                   Password="Az@1on1000Odm$n"/>
            </Grid>
            <Button x:Name="LoginBtn"
                    Content="Вхід"
@@ -1,7 +0,0 @@
-python -m venv venv
-venv\Scripts\pip install -r requirements.txt
-venv\Scripts\pyinstaller --onefile --collect-all boto3 cdn_manager.py
-move dist\cdn_manager.exe .\cdn_manager.exe
-rmdir /s /q dist
-rmdir /s /q build
-
@@ -36,8 +36,24 @@ class CDNManager:

    def download(self, bucket: str, filename: str):
        try:
-            self.download_client.download_file(bucket, filename, filename)
+            if filename is not None:
+                self.download_client.download_file(bucket, filename, os.path.join(bucket, filename))
                print(f'downloaded {filename} from the {bucket} to current folder')
+            else:
+                response = self.download_client.list_objects_v2(Bucket=bucket)
+                if 'Contents' in response:
+                    for obj in response['Contents']:
+                        object_key = obj['Key']
+                        local_filepath = os.path.join(bucket, object_key)
+                        local_dir = os.path.dirname(local_filepath)
+                        if local_dir:
+                            os.makedirs(local_dir, exist_ok=True)
+
+                        if not object_key.endswith('/'):
+                            try:
+                                self.download_client.download_file(bucket, object_key, local_filepath)
+                            except Exception as e_file:
+                                all_successful = False  # Mark as failed if any file fails
            return True
        except Exception as e:
            print(e)
@@ -58,7 +74,7 @@ cdn_manager = CDNManager(CDNCredentials(

 input_action = sys.argv[1]
 input_bucket = sys.argv[2]
-input_filename = sys.argv[3]
+input_filename = sys.argv[3] if len(sys.argv) > 3 else None
 if len(sys.argv) > 4: # 0 is this script's path, hence 5 args is max
    input_path = sys.argv[4]

@@ -43,6 +43,7 @@ venv\Scripts\pyinstaller --name=azaion-inference ^
 --collect-all onnxruntime ^
 --collect-all tensorrt ^
 --collect-all pycuda ^
+--collect-all pynvml ^
 --collect-all re ^
 --hidden-import constants ^
 --hidden-import annotation ^
@@ -71,8 +72,7 @@ cd..

 echo Download onnx model
 cd build
-call cdn_manager.exe download models azaion.onnx.big
-call cdn_manager.exe download models azaion.engine.big
+call cdn_manager.exe download models

 move azaion.* ..\dist\
 cd..