separate load functionality from inference client to loader client. Call loader client from inference to get the model.

remove dummy dlls, remove resource loader from c#. TODO: Load dlls separately by Loader UI and loader client WIP
2026-04-22 10:36:30 +00:00 · 2025-06-06 20:04:03 +03:00
parent 500db31142
commit 7750025631
54 changed files with 353 additions and 571 deletions
@@ -23,7 +23,6 @@ echo install azaion-inference
 venv\Scripts\pyinstaller --name=azaion-inference ^
 --collect-submodules cv2 ^
 --add-data "venv\Lib\site-packages\cv2;cv2" ^
--collect-all requests ^
 --collect-all psutil ^
 --collect-all msgpack ^
 --collect-all zmq ^
@@ -36,14 +35,6 @@ venv\Scripts\pyinstaller --name=azaion-inference ^
 --collect-all jwt ^
 --hidden-import constants ^
 --hidden-import annotation ^
--hidden-import credentials ^
--hidden-import file_data ^
--hidden-import user ^
--hidden-import security ^
--hidden-import secure_model ^
--hidden-import cdn_manager ^
--hidden-import api_client ^
--hidden-import hardware_service ^
 --hidden-import remote_command ^
 --hidden-import ai_config ^
 --hidden-import tensorrt_engine ^
@@ -0,0 +1,16 @@
+cdef class FileData:
+    cdef public str folder
+    cdef public str filename
+
+    @staticmethod
+    cdef from_msgpack(bytes data)
+
+    cdef bytes serialize(self)
+
+cdef class UploadFileData(FileData):
+    cdef public bytes resource
+
+    @staticmethod
+    cdef from_msgpack(bytes data)
+
+    cdef bytes serialize(self)
@@ -0,0 +1,40 @@
+from msgpack import unpackb, packb
+
+cdef class FileData:
+    def __init__(self, str folder, str filename):
+        self.folder = folder
+        self.filename = filename
+
+    @staticmethod
+    cdef from_msgpack(bytes data):
+        unpacked = unpackb(data, strict_map_key=False)
+        return FileData(
+            unpacked.get("Folder"),
+            unpacked.get("Filename"))
+
+    cdef bytes serialize(self):
+        return packb({
+            "Folder": self.folder,
+            "Filename": self.filename
+        })
+
+
+cdef class UploadFileData(FileData):
+    def __init__(self, bytes resource, str folder, str filename):
+        super().__init__(folder, filename)
+        self.resource = resource
+
+    @staticmethod
+    cdef from_msgpack(bytes data):
+        unpacked = unpackb(data, strict_map_key=False)
+        return UploadFileData(
+            unpacked.get("Resource"),
+            unpacked.get("Folder"),
+            unpacked.get("Filename"))
+
+    cdef bytes serialize(self):
+        return packb({
+            "Resource": self.resource,
+            "Folder": self.folder,
+            "Filename": self.filename
+        })
@@ -1,11 +1,11 @@
 from remote_command cimport RemoteCommand
 from annotation cimport Annotation, Detection
 from ai_config cimport AIRecognitionConfig
-from api_client cimport ApiClient
+from loader_client cimport LoaderClient
 from inference_engine cimport InferenceEngine

 cdef class Inference:
-    cdef ApiClient api_client
+    cdef LoaderClient loader_client
    cdef InferenceEngine engine
    cdef object on_annotation
    cdef Annotation _previous_annotation
@@ -1,29 +1,54 @@
-import json
 import mimetypes
-import os
-import subprocess
-import sys
 import time
-
 import cv2
 import numpy as np
-
 cimport constants
 from remote_command cimport RemoteCommand
 from annotation cimport Detection, Annotation
 from ai_config cimport AIRecognitionConfig
-from hardware_service cimport HardwareService
-from security cimport Security
+import pynvml

-if HardwareService.has_nvidia_gpu():
+cdef int tensor_gpu_index
+
+cdef int check_tensor_gpu_index():
+    try:
+        pynvml.nvmlInit()
+        deviceCount = pynvml.nvmlDeviceGetCount()
+
+        if deviceCount == 0:
+            print('No NVIDIA GPUs found.')
+            return -1
+
+        for i in range(deviceCount):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+
+            if major > 6 or (major == 6 and minor >= 1):
+                print('found NVIDIA GPU!')
+                return i
+
+        print('NVIDIA GPU doesnt support TensorRT!')
+        return -1
+
+    except pynvml.NVMLError:
+        return -1
+    finally:
+        try:
+            pynvml.nvmlShutdown()
+        except:
+            print('Failed to shutdown pynvml cause probably no NVidia GPU')
+            pass
+
+tensor_gpu_index = check_tensor_gpu_index()
+if tensor_gpu_index > -1:
    from tensorrt_engine import TensorRTEngine
 else:
    from onnx_engine import OnnxEngine


 cdef class Inference:
-    def __init__(self, api_client, on_annotation):
-        self.api_client = api_client
+    def __init__(self, loader_client, on_annotation):
+        self.loader_client = loader_client
        self.on_annotation = on_annotation
        self.stop_signal = False
        self.model_input = None
@@ -33,27 +58,26 @@ cdef class Inference:
        self.is_building_engine = False

    cdef build_tensor_engine(self, object updater_callback):
-        is_nvidia = HardwareService.has_nvidia_gpu()
-        if not is_nvidia:
+        if not tensor_gpu_index == -1:
            return

        engine_filename = TensorRTEngine.get_engine_filename(0)
-        key = Security.get_model_encryption_key()
        models_dir = constants.MODELS_FOLDER

        self.is_building_engine = True
        updater_callback('downloading')
-        if self.api_client.load_big_small_resource(engine_filename, models_dir, key):
+
+        if self.loader_client.load_big_small_resource(engine_filename, models_dir):
            print('tensor rt engine is here, no need to build')
            self.is_building_engine = False
            return

        # time.sleep(8) # prevent simultaneously loading dll and models
        updater_callback('converting')
-        onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
+        onnx_model = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
        model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
        updater_callback('uploading')
-        self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
+        self.loader_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir)
        print(f'uploaded {engine_filename} to CDN and API')
        self.is_building_engine = False

@@ -61,17 +85,16 @@ cdef class Inference:
        if self.engine is not None:
            return

-        is_nvidia = HardwareService.has_nvidia_gpu()
-        key = Security.get_model_encryption_key()
        models_dir = constants.MODELS_FOLDER
-        if is_nvidia:
+        if tensor_gpu_index > -1:
            while self.is_building_engine:
                time.sleep(1)
            engine_filename = TensorRTEngine.get_engine_filename(0)
-            model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
+
+            model_bytes = self.loader_client.load_big_small_resource(engine_filename, models_dir)
            self.engine = TensorRTEngine(model_bytes)
        else:
-            model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
+            model_bytes = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
            self.engine = OnnxEngine(model_bytes)

        self.model_height, self.model_width = self.engine.get_input_shape()
@@ -0,0 +1,13 @@
+from remote_command cimport RemoteCommand
+
+cdef class LoaderClient:
+    cdef object _context
+    cdef object _socket
+
+    cdef RemoteCommand _send_receive_command(self, RemoteCommand command)
+
+    cdef load_big_small_resource(self, str filename, str directory)
+
+    cdef upload_big_small_resource(self, bytes content, str filename, str directory)
+
+    cdef close(self)
@@ -0,0 +1,39 @@
+import zmq
+from remote_command cimport RemoteCommand, CommandType
+from file_data cimport FileData, UploadFileData
+
+cdef class LoaderClient:
+    def __init__(self, str zmq_host, int zmq_port):
+        self._context = zmq.Context().instance()
+        self._socket = self._context.socket(zmq.DEALER)
+        self._socket.connect(f'tcp://{zmq_host}:{zmq_port}')
+
+    cdef RemoteCommand _send_receive_command(self, RemoteCommand command):
+        self._socket.send(command.serialize())
+        return RemoteCommand.from_msgpack(self._socket.recv())
+
+    cdef load_big_small_resource(self, str filename, str directory):
+        cdef FileData file_data = FileData(folder=directory, filename=filename)
+        cdef RemoteCommand response = self._send_receive_command(RemoteCommand(CommandType.LOAD_BIG_SMALL, data=file_data.serialize()))
+        if response.command_type == CommandType.DATA_BYTES:
+            return response.data
+        elif response.command_type == CommandType.ERROR:
+            raise Exception(f"Error from server: {response.message}")
+        else:
+            raise Exception(f"Unexpected response command type: {response.command_type}")
+
+    cdef upload_big_small_resource(self, bytes content, str filename, str directory):
+        cdef UploadFileData upload_file_data = UploadFileData(content, filename, directory)
+        cdef RemoteCommand upload_resp = self._send_receive_command(RemoteCommand(CommandType.UPLOAD_BIG_SMALL, data=upload_file_data.serialize()))
+        if upload_resp.command_type == CommandType.OK:
+            return
+        elif upload_resp.command_type == CommandType.ERROR:
+            raise Exception(f"Error from server: {upload_resp.message}")
+        else:
+            raise Exception(f"Unexpected response command type: {upload_resp.command_type}")
+
+    cdef close(self):
+        if self._socket and not self._socket.closed:
+            self._socket.close()
+        if self._context and not self._context.closed:
+            self._context.term()
@@ -4,10 +4,9 @@ from queue import Queue
 cimport constants
 from threading import Thread

-import yaml
-
 from annotation cimport Annotation
 from inference cimport Inference
+from loader_client cimport LoaderClient
 from remote_command cimport RemoteCommand, CommandType
 from remote_command_handler cimport RemoteCommandHandler

@@ -17,14 +16,16 @@ cdef class CommandProcessor:
    cdef object inference_queue
    cdef bint running
    cdef Inference inference
+    cdef LoaderClient loader_client

-    def __init__(self, int zmq_port, str api_url):
+    def __init__(self, int zmq_port, str loader_zmq_host, int loader_zmq_port, str api_url):
        self.remote_handler = RemoteCommandHandler(zmq_port, self.on_command)
        self.inference_queue = Queue(maxsize=constants.QUEUE_MAXSIZE)
        self.remote_handler.start()
        self.running = True
+        self.loader_client = LoaderClient(loader_zmq_host, loader_zmq_port)
        #TODO: replace api_client to azaion_loader.exe call
-        self.inference = Inference(self.api_client, self.on_annotation)
+        self.inference = Inference(self.loader_client, self.on_annotation)

    def start(self):
        while self.running:
@@ -41,14 +42,12 @@ cdef class CommandProcessor:

    cdef on_command(self, RemoteCommand command):
        try:
-            if command.command_type == CommandType.LOGIN:
-                self.api_client.set_credentials(Credentials.from_msgpack(command.data))
-            elif command.command_type == CommandType.LOAD:
-                self.load_file(command)
-            elif command.command_type == CommandType.INFERENCE:
+            if command.command_type == CommandType.INFERENCE:
                self.inference_queue.put(command)
            elif command.command_type == CommandType.AI_AVAILABILITY_CHECK:
-                self.build_tensor_engine(command.client_id)
+                self.inference.build_tensor_engine(lambda status: self.remote_handler.send(
+                    command.client_id, RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, None, status).serialize()))
+                self.remote_handler.send(command.client_id, RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, None, 'enabled').serialize())
            elif command.command_type == CommandType.STOP_INFERENCE:
                self.inference.stop()
            elif command.command_type == CommandType.EXIT:
@@ -59,25 +58,6 @@ cdef class CommandProcessor:
        except Exception as e:
            print(f"Error handling client: {e}")

-    cdef build_tensor_engine(self, client_id):
-        self.inference.build_tensor_engine(lambda status: self.build_tensor_status_updater(client_id, status))
-        self.remote_handler.send(client_id, RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, None, 'enabled').serialize())
-
-    cdef build_tensor_status_updater(self, bytes client_id, str status):
-        self.remote_handler.send(client_id, RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, None, status).serialize())
-
-    cdef load_file(self, RemoteCommand command):
-        cdef RemoteCommand response
-        cdef FileData file_data
-        cdef bytes file_bytes
-        try:
-            file_data = FileData.from_msgpack(command.data)
-            file_bytes = self.api_client.load_bytes(file_data.filename, file_data.folder)
-            response = RemoteCommand(CommandType.DATA_BYTES, file_bytes)
-        except Exception as e:
-            response = RemoteCommand(CommandType.DATA_BYTES, None, str(e))
-        self.remote_handler.send(command.client_id, response.serialize())
-
    cdef on_annotation(self, RemoteCommand cmd, Annotation annotation):
        cdef RemoteCommand response = RemoteCommand(CommandType.INFERENCE_DATA, annotation.serialize())
        self.remote_handler.send(cmd.client_id, response.serialize())
@@ -1,6 +1,9 @@
 cdef enum CommandType:
+    OK = 3
    LOGIN = 10
    LOAD = 20
+    LOAD_BIG_SMALL = 22
+    UPLOAD_BIG_SMALL = 24
    DATA_BYTES = 25
    INFERENCE = 30
    INFERENCE_DATA = 35
@@ -8,8 +8,11 @@ cdef class RemoteCommand:

    def __str__(self):
        command_type_names = {
+            3: "OK",
            10: "LOGIN",
            20: "LOAD",
+            22: "LOAD_BIG_SMALL",
+            24: "UPLOAD_BIG_SMALL",
            25: "DATA_BYTES",
            30: "INFERENCE",
            35: "INFERENCE_DATA",
@@ -5,15 +5,10 @@ import numpy as np
 extensions = [
    Extension('constants', ['constants.pyx']),
    Extension('annotation', ['annotation.pyx']),
-    Extension('credentials', ['credentials.pyx']),
    Extension('file_data', ['file_data.pyx']),
-    Extension('hardware_service', ['hardware_service.pyx'], extra_compile_args=["-g"], extra_link_args=["-g"]),
-    Extension('security', ['security.pyx']),
+    Extension('loader_client', ['loader_client.pyx']),
    Extension('remote_command', ['remote_command.pyx']),
    Extension('remote_command_handler', ['remote_command_handler.pyx']),
-    Extension('user', ['user.pyx']),
-    Extension('cdn_manager', ['cdn_manager.pyx']),
-    Extension('api_client', ['api_client.pyx']),
    Extension('ai_config', ['ai_config.pyx']),
    Extension('tensorrt_engine', ['tensorrt_engine.pyx'], include_dirs=[np.get_include()]),
    Extension('onnx_engine', ['onnx_engine.pyx'], include_dirs=[np.get_include()]),
@@ -2,18 +2,16 @@ from main import CommandProcessor
 import argparse


-def start(zmq_port, api_url):
-    processor = CommandProcessor(zmq_port, api_url)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-p", "--port", type=int, default=5127, help="zero mq port")
+    parser.add_argument("-lh", "--loader-host", type=str, default="127.0.0.1", help="zero mq loader app port")
+    parser.add_argument("-lp", "--loader-port", type=int, default=5025, help="zero mq loader app port")
+    parser.add_argument("-a", "--api", type=str, default="https://api.azaion.com", help="api url")
+    args = parser.parse_args()
+
+    processor = CommandProcessor(args.port, args.loader_host, args.loader_port, args.api)
    try:
        processor.start()
    except KeyboardInterrupt:
        processor.stop()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-p", "--port", type=str, default="5127", help="zero mq port")
-    parser.add_argument("-a", "--api", type=str, default="https://api.azaion.com", help="api url")
-    args = parser.parse_args()
-
-    start(int(args.port), args.api)