separate load functionality from inference client to loader client. Call loader client from inference to get the model.

remove dummy dlls, remove resource loader from c#.

TODO: Load dlls separately by Loader UI and loader client

WIP
This commit is contained in:
Alex Bezdieniezhnykh
2025-06-06 20:04:03 +03:00
parent 500db31142
commit 7750025631
54 changed files with 353 additions and 571 deletions
-9
View File
@@ -23,7 +23,6 @@ echo install azaion-inference
venv\Scripts\pyinstaller --name=azaion-inference ^
--collect-submodules cv2 ^
--add-data "venv\Lib\site-packages\cv2;cv2" ^
--collect-all requests ^
--collect-all psutil ^
--collect-all msgpack ^
--collect-all zmq ^
@@ -36,14 +35,6 @@ venv\Scripts\pyinstaller --name=azaion-inference ^
--collect-all jwt ^
--hidden-import constants ^
--hidden-import annotation ^
--hidden-import credentials ^
--hidden-import file_data ^
--hidden-import user ^
--hidden-import security ^
--hidden-import secure_model ^
--hidden-import cdn_manager ^
--hidden-import api_client ^
--hidden-import hardware_service ^
--hidden-import remote_command ^
--hidden-import ai_config ^
--hidden-import tensorrt_engine ^
+16
View File
@@ -0,0 +1,16 @@
cdef class FileData:
cdef public str folder
cdef public str filename
@staticmethod
cdef from_msgpack(bytes data)
cdef bytes serialize(self)
cdef class UploadFileData(FileData):
cdef public bytes resource
@staticmethod
cdef from_msgpack(bytes data)
cdef bytes serialize(self)
+40
View File
@@ -0,0 +1,40 @@
from msgpack import unpackb, packb
cdef class FileData:
def __init__(self, str folder, str filename):
self.folder = folder
self.filename = filename
@staticmethod
cdef from_msgpack(bytes data):
unpacked = unpackb(data, strict_map_key=False)
return FileData(
unpacked.get("Folder"),
unpacked.get("Filename"))
cdef bytes serialize(self):
return packb({
"Folder": self.folder,
"Filename": self.filename
})
cdef class UploadFileData(FileData):
def __init__(self, bytes resource, str folder, str filename):
super().__init__(folder, filename)
self.resource = resource
@staticmethod
cdef from_msgpack(bytes data):
unpacked = unpackb(data, strict_map_key=False)
return UploadFileData(
unpacked.get("Resource"),
unpacked.get("Folder"),
unpacked.get("Filename"))
cdef bytes serialize(self):
return packb({
"Resource": self.resource,
"Folder": self.folder,
"Filename": self.filename
})
+2 -2
View File
@@ -1,11 +1,11 @@
from remote_command cimport RemoteCommand
from annotation cimport Annotation, Detection
from ai_config cimport AIRecognitionConfig
from api_client cimport ApiClient
from loader_client cimport LoaderClient
from inference_engine cimport InferenceEngine
cdef class Inference:
cdef ApiClient api_client
cdef LoaderClient loader_client
cdef InferenceEngine engine
cdef object on_annotation
cdef Annotation _previous_annotation
+45 -22
View File
@@ -1,29 +1,54 @@
import json
import mimetypes
import os
import subprocess
import sys
import time
import cv2
import numpy as np
cimport constants
from remote_command cimport RemoteCommand
from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig
from hardware_service cimport HardwareService
from security cimport Security
import pynvml
if HardwareService.has_nvidia_gpu():
cdef int tensor_gpu_index
cdef int check_tensor_gpu_index():
try:
pynvml.nvmlInit()
deviceCount = pynvml.nvmlDeviceGetCount()
if deviceCount == 0:
print('No NVIDIA GPUs found.')
return -1
for i in range(deviceCount):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
if major > 6 or (major == 6 and minor >= 1):
print('found NVIDIA GPU!')
return i
print('NVIDIA GPU doesnt support TensorRT!')
return -1
except pynvml.NVMLError:
return -1
finally:
try:
pynvml.nvmlShutdown()
except:
print('Failed to shutdown pynvml cause probably no NVidia GPU')
pass
tensor_gpu_index = check_tensor_gpu_index()
if tensor_gpu_index > -1:
from tensorrt_engine import TensorRTEngine
else:
from onnx_engine import OnnxEngine
cdef class Inference:
def __init__(self, api_client, on_annotation):
self.api_client = api_client
def __init__(self, loader_client, on_annotation):
self.loader_client = loader_client
self.on_annotation = on_annotation
self.stop_signal = False
self.model_input = None
@@ -33,27 +58,26 @@ cdef class Inference:
self.is_building_engine = False
cdef build_tensor_engine(self, object updater_callback):
is_nvidia = HardwareService.has_nvidia_gpu()
if not is_nvidia:
if not tensor_gpu_index == -1:
return
engine_filename = TensorRTEngine.get_engine_filename(0)
key = Security.get_model_encryption_key()
models_dir = constants.MODELS_FOLDER
self.is_building_engine = True
updater_callback('downloading')
if self.api_client.load_big_small_resource(engine_filename, models_dir, key):
if self.loader_client.load_big_small_resource(engine_filename, models_dir):
print('tensor rt engine is here, no need to build')
self.is_building_engine = False
return
# time.sleep(8) # prevent simultaneously loading dll and models
updater_callback('converting')
onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
onnx_model = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
updater_callback('uploading')
self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
self.loader_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir)
print(f'uploaded {engine_filename} to CDN and API')
self.is_building_engine = False
@@ -61,17 +85,16 @@ cdef class Inference:
if self.engine is not None:
return
is_nvidia = HardwareService.has_nvidia_gpu()
key = Security.get_model_encryption_key()
models_dir = constants.MODELS_FOLDER
if is_nvidia:
if tensor_gpu_index > -1:
while self.is_building_engine:
time.sleep(1)
engine_filename = TensorRTEngine.get_engine_filename(0)
model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
model_bytes = self.loader_client.load_big_small_resource(engine_filename, models_dir)
self.engine = TensorRTEngine(model_bytes)
else:
model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
model_bytes = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
self.engine = OnnxEngine(model_bytes)
self.model_height, self.model_width = self.engine.get_input_shape()
+13
View File
@@ -0,0 +1,13 @@
from remote_command cimport RemoteCommand
cdef class LoaderClient:
cdef object _context
cdef object _socket
cdef RemoteCommand _send_receive_command(self, RemoteCommand command)
cdef load_big_small_resource(self, str filename, str directory)
cdef upload_big_small_resource(self, bytes content, str filename, str directory)
cdef close(self)
+39
View File
@@ -0,0 +1,39 @@
import zmq
from remote_command cimport RemoteCommand, CommandType
from file_data cimport FileData, UploadFileData
cdef class LoaderClient:
def __init__(self, str zmq_host, int zmq_port):
self._context = zmq.Context().instance()
self._socket = self._context.socket(zmq.DEALER)
self._socket.connect(f'tcp://{zmq_host}:{zmq_port}')
cdef RemoteCommand _send_receive_command(self, RemoteCommand command):
self._socket.send(command.serialize())
return RemoteCommand.from_msgpack(self._socket.recv())
cdef load_big_small_resource(self, str filename, str directory):
cdef FileData file_data = FileData(folder=directory, filename=filename)
cdef RemoteCommand response = self._send_receive_command(RemoteCommand(CommandType.LOAD_BIG_SMALL, data=file_data.serialize()))
if response.command_type == CommandType.DATA_BYTES:
return response.data
elif response.command_type == CommandType.ERROR:
raise Exception(f"Error from server: {response.message}")
else:
raise Exception(f"Unexpected response command type: {response.command_type}")
cdef upload_big_small_resource(self, bytes content, str filename, str directory):
cdef UploadFileData upload_file_data = UploadFileData(content, filename, directory)
cdef RemoteCommand upload_resp = self._send_receive_command(RemoteCommand(CommandType.UPLOAD_BIG_SMALL, data=upload_file_data.serialize()))
if upload_resp.command_type == CommandType.OK:
return
elif upload_resp.command_type == CommandType.ERROR:
raise Exception(f"Error from server: {upload_resp.message}")
else:
raise Exception(f"Unexpected response command type: {upload_resp.command_type}")
cdef close(self):
if self._socket and not self._socket.closed:
self._socket.close()
if self._context and not self._context.closed:
self._context.term()
+9 -29
View File
@@ -4,10 +4,9 @@ from queue import Queue
cimport constants
from threading import Thread
import yaml
from annotation cimport Annotation
from inference cimport Inference
from loader_client cimport LoaderClient
from remote_command cimport RemoteCommand, CommandType
from remote_command_handler cimport RemoteCommandHandler
@@ -17,14 +16,16 @@ cdef class CommandProcessor:
cdef object inference_queue
cdef bint running
cdef Inference inference
cdef LoaderClient loader_client
def __init__(self, int zmq_port, str api_url):
def __init__(self, int zmq_port, str loader_zmq_host, int loader_zmq_port, str api_url):
self.remote_handler = RemoteCommandHandler(zmq_port, self.on_command)
self.inference_queue = Queue(maxsize=constants.QUEUE_MAXSIZE)
self.remote_handler.start()
self.running = True
self.loader_client = LoaderClient(loader_zmq_host, loader_zmq_port)
#TODO: replace api_client to azaion_loader.exe call
self.inference = Inference(self.api_client, self.on_annotation)
self.inference = Inference(self.loader_client, self.on_annotation)
def start(self):
while self.running:
@@ -41,14 +42,12 @@ cdef class CommandProcessor:
cdef on_command(self, RemoteCommand command):
try:
if command.command_type == CommandType.LOGIN:
self.api_client.set_credentials(Credentials.from_msgpack(command.data))
elif command.command_type == CommandType.LOAD:
self.load_file(command)
elif command.command_type == CommandType.INFERENCE:
if command.command_type == CommandType.INFERENCE:
self.inference_queue.put(command)
elif command.command_type == CommandType.AI_AVAILABILITY_CHECK:
self.build_tensor_engine(command.client_id)
self.inference.build_tensor_engine(lambda status: self.remote_handler.send(
command.client_id, RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, None, status).serialize()))
self.remote_handler.send(command.client_id, RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, None, 'enabled').serialize())
elif command.command_type == CommandType.STOP_INFERENCE:
self.inference.stop()
elif command.command_type == CommandType.EXIT:
@@ -59,25 +58,6 @@ cdef class CommandProcessor:
except Exception as e:
print(f"Error handling client: {e}")
cdef build_tensor_engine(self, client_id):
self.inference.build_tensor_engine(lambda status: self.build_tensor_status_updater(client_id, status))
self.remote_handler.send(client_id, RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, None, 'enabled').serialize())
cdef build_tensor_status_updater(self, bytes client_id, str status):
self.remote_handler.send(client_id, RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, None, status).serialize())
cdef load_file(self, RemoteCommand command):
cdef RemoteCommand response
cdef FileData file_data
cdef bytes file_bytes
try:
file_data = FileData.from_msgpack(command.data)
file_bytes = self.api_client.load_bytes(file_data.filename, file_data.folder)
response = RemoteCommand(CommandType.DATA_BYTES, file_bytes)
except Exception as e:
response = RemoteCommand(CommandType.DATA_BYTES, None, str(e))
self.remote_handler.send(command.client_id, response.serialize())
cdef on_annotation(self, RemoteCommand cmd, Annotation annotation):
cdef RemoteCommand response = RemoteCommand(CommandType.INFERENCE_DATA, annotation.serialize())
self.remote_handler.send(cmd.client_id, response.serialize())
+3
View File
@@ -1,6 +1,9 @@
cdef enum CommandType:
OK = 3
LOGIN = 10
LOAD = 20
LOAD_BIG_SMALL = 22
UPLOAD_BIG_SMALL = 24
DATA_BYTES = 25
INFERENCE = 30
INFERENCE_DATA = 35
+3
View File
@@ -8,8 +8,11 @@ cdef class RemoteCommand:
def __str__(self):
command_type_names = {
3: "OK",
10: "LOGIN",
20: "LOAD",
22: "LOAD_BIG_SMALL",
24: "UPLOAD_BIG_SMALL",
25: "DATA_BYTES",
30: "INFERENCE",
35: "INFERENCE_DATA",
+1 -6
View File
@@ -5,15 +5,10 @@ import numpy as np
extensions = [
Extension('constants', ['constants.pyx']),
Extension('annotation', ['annotation.pyx']),
Extension('credentials', ['credentials.pyx']),
Extension('file_data', ['file_data.pyx']),
Extension('hardware_service', ['hardware_service.pyx'], extra_compile_args=["-g"], extra_link_args=["-g"]),
Extension('security', ['security.pyx']),
Extension('loader_client', ['loader_client.pyx']),
Extension('remote_command', ['remote_command.pyx']),
Extension('remote_command_handler', ['remote_command_handler.pyx']),
Extension('user', ['user.pyx']),
Extension('cdn_manager', ['cdn_manager.pyx']),
Extension('api_client', ['api_client.pyx']),
Extension('ai_config', ['ai_config.pyx']),
Extension('tensorrt_engine', ['tensorrt_engine.pyx'], include_dirs=[np.get_include()]),
Extension('onnx_engine', ['onnx_engine.pyx'], include_dirs=[np.get_include()]),
+9 -11
View File
@@ -2,18 +2,16 @@ from main import CommandProcessor
import argparse
def start(zmq_port, api_url):
processor = CommandProcessor(zmq_port, api_url)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--port", type=int, default=5127, help="zero mq port")
parser.add_argument("-lh", "--loader-host", type=str, default="127.0.0.1", help="zero mq loader app port")
parser.add_argument("-lp", "--loader-port", type=int, default=5025, help="zero mq loader app port")
parser.add_argument("-a", "--api", type=str, default="https://api.azaion.com", help="api url")
args = parser.parse_args()
processor = CommandProcessor(args.port, args.loader_host, args.loader_port, args.api)
try:
processor.start()
except KeyboardInterrupt:
processor.stop()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--port", type=str, default="5127", help="zero mq port")
parser.add_argument("-a", "--api", type=str, default="https://api.azaion.com", help="api url")
args = parser.parse_args()
start(int(args.port), args.api)