Merge remote-tracking branch 'origin/dev' into dev

2026-06-22 06:11:07 +00:00 · 2025-05-01 01:18:24 +03:00
parent a1aedd7332 28069f63f9
commit 1b4901d568
7 changed files with 38 additions and 28 deletions
@@ -4,7 +4,7 @@ from PyInstaller.utils.hooks import collect_all
 datas = [('venv\\Lib\\site-packages\\cv2', 'cv2')]
 binaries = []
-hiddenimports = ['constants', 'annotation', 'credentials', 'file_data', 'user', 'security', 'secure_model', 'cdn_manager', 'api_client', 'hardware_service', 'remote_command', 'ai_config', 'inference_engine', 'inference', 'remote_command_handler']
+hiddenimports = ['constants', 'annotation', 'credentials', 'file_data', 'user', 'security', 'secure_model', 'cdn_manager', 'api_client', 'hardware_service', 'remote_command', 'ai_config', 'tensorrt_engine', 'onnx_engine', 'inference_engine', 'inference', 'remote_command_handler']
 hiddenimports += collect_submodules('cv2')
 tmp_ret = collect_all('requests')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
@@ -28,6 +28,10 @@ tmp_ret = collect_all('pynvml')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('boto3')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('re')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 tmp_ret = collect_all('jwt')
 datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
 a = Analysis(
@@ -45,6 +45,8 @@ venv\Scripts\pyinstaller --name=azaion-inference ^
 --hidden-import hardware_service ^
 --hidden-import remote_command ^
 --hidden-import ai_config ^
 --hidden-import tensorrt_engine ^
 --hidden-import onnx_engine ^
 --hidden-import inference_engine ^
 --hidden-import inference ^
 --hidden-import remote_command_handler ^
@@ -1,6 +1,7 @@
 import re
 import subprocess
 import psutil
 import pynvml
 cdef class HardwareInfo:
    def __init__(self, str cpu, str gpu, str memory, str mac_address):
@@ -46,14 +47,25 @@ cdef class HardwareService:
    @staticmethod
    cdef has_nvidia_gpu():
        try:
-            output = subprocess.check_output(['nvidia-smi']).decode()
+            pynvml.nvmlInit()
-            match = re.search(r'CUDA Version:\s*([\d.]+)', output)
+            device_count = pynvml.nvmlDeviceGetCount()
-            if match:
+
-                return float(match.group(1)) > 11
+            if device_count > 0:
                print(f"Found NVIDIA GPU(s).")
                return True
            else:
                print("No NVIDIA GPUs found by NVML.")
                return False
-        except Exception as e:
+
-            print(e)
+        except pynvml.NVMLError as error:
            print(f"Failed to find NVIDIA GPU")
            return False
        finally:
            try:
                pynvml.nvmlShutdown()
            except:
                print('Failed to shutdown pynvml cause probably no NVidia GPU')
                pass
    cdef HardwareInfo get_hardware_info(self):
        if self.is_windows:
@@ -16,7 +16,7 @@ from hardware_service cimport HardwareService
 from security cimport Security
 if HardwareService.has_nvidia_gpu():
-    from tensorrt_engine cimport TensorRTEngine
+    from tensorrt_engine import TensorRTEngine
 else:
    from onnx_engine import OnnxEngine
@@ -14,11 +14,11 @@ cdef class OnnxEngine(InferenceEngine):
        model_meta = self.session.get_modelmeta()
        print("Metadata:", model_meta.custom_metadata_map)
-    cdef tuple get_input_shape(self):
+    cpdef tuple get_input_shape(self):
        shape = self.input_shape
        return shape[2], shape[3]
-    cdef int get_batch_size(self):
+    cpdef int get_batch_size(self):
        return self.batch_size
    cpdef run(self, input_data):
@@ -16,17 +16,9 @@ cdef class TensorRTEngine(InferenceEngine):
    cdef object stream
    @staticmethod
    cdef get_gpu_memory_bytes(int device_id)
-    @staticmethod
+    cpdef tuple get_input_shape(self)
    cdef get_engine_filename(int device_id)
-    @staticmethod
+    cpdef int get_batch_size(self)
    cdef convert_from_onnx(bytes onnx_model)
-    cdef tuple get_input_shape(self)
+    cpdef run(self, input_data)
    cdef int get_batch_size(self)
    cdef run(self, input_data)
@@ -56,7 +56,7 @@ cdef class TensorRTEngine(InferenceEngine):
            raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
    @staticmethod
-    cdef get_gpu_memory_bytes(int device_id):
+    def get_gpu_memory_bytes(int device_id):
        total_memory = None
        try:
            pynvml.nvmlInit()
@@ -73,7 +73,7 @@ cdef class TensorRTEngine(InferenceEngine):
        return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb
    @staticmethod
-    cdef get_engine_filename(int device_id):
+    def get_engine_filename(int device_id):
        try:
            device = cuda.Device(device_id)
            sm_count = device.multiprocessor_count
@@ -83,7 +83,7 @@ cdef class TensorRTEngine(InferenceEngine):
            return None
    @staticmethod
-    cdef convert_from_onnx(bytes onnx_model):
+    def convert_from_onnx(bytes onnx_model):
        workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes(0) * 0.9)
        explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
@@ -112,13 +112,13 @@ cdef class TensorRTEngine(InferenceEngine):
            constants.log('conversion done!')
            return bytes(plan)
-    cdef tuple get_input_shape(self):
+    cpdef tuple get_input_shape(self):
        return self.input_shape[2], self.input_shape[3]
-    cdef int get_batch_size(self):
+    cpdef int get_batch_size(self):
        return self.batch_size
-    cdef run(self, input_data):
+    cpdef run(self, input_data):
        try:
            cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
            self.context.set_tensor_address(self.input_name, int(self.d_input))  # input buffer