From b3db108f59db738a30b1dae5bed62a16092bab0f Mon Sep 17 00:00:00 2001 From: Alex Bezdieniezhnykh Date: Wed, 30 Apr 2025 21:43:36 +0300 Subject: [PATCH 1/5] add missing packages to build script --- Azaion.Inference/azaion-inference.spec | 6 +++++- Azaion.Inference/build.cmd | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Azaion.Inference/azaion-inference.spec b/Azaion.Inference/azaion-inference.spec index 80ecf15..5a83791 100644 --- a/Azaion.Inference/azaion-inference.spec +++ b/Azaion.Inference/azaion-inference.spec @@ -4,7 +4,7 @@ from PyInstaller.utils.hooks import collect_all datas = [('venv\\Lib\\site-packages\\cv2', 'cv2')] binaries = [] -hiddenimports = ['constants', 'annotation', 'credentials', 'file_data', 'user', 'security', 'secure_model', 'cdn_manager', 'api_client', 'hardware_service', 'remote_command', 'ai_config', 'inference_engine', 'inference', 'remote_command_handler'] +hiddenimports = ['constants', 'annotation', 'credentials', 'file_data', 'user', 'security', 'secure_model', 'cdn_manager', 'api_client', 'hardware_service', 'remote_command', 'ai_config', 'tensorrt_engine', 'onnx_engine', 'inference_engine', 'inference', 'remote_command_handler'] hiddenimports += collect_submodules('cv2') tmp_ret = collect_all('requests') datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] @@ -28,6 +28,10 @@ tmp_ret = collect_all('pynvml') datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] tmp_ret = collect_all('boto3') datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] +tmp_ret = collect_all('re') +datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] +tmp_ret = collect_all('jwt') +datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2] a = Analysis( diff --git a/Azaion.Inference/build.cmd b/Azaion.Inference/build.cmd index ee58445..6b451e1 100644 --- a/Azaion.Inference/build.cmd +++ b/Azaion.Inference/build.cmd @@ -44,6 +44,8 @@ venv\Scripts\pyinstaller --name=azaion-inference ^ --hidden-import hardware_service ^ --hidden-import remote_command ^ --hidden-import ai_config ^ +--hidden-import tensorrt_engine ^ +--hidden-import onnx_engine ^ --hidden-import inference_engine ^ --hidden-import inference ^ --hidden-import remote_command_handler ^ From ae83bc8542cf1b9c712a4aba8b904d723c4f8803 Mon Sep 17 00:00:00 2001 From: Alex Bezdieniezhnykh Date: Wed, 30 Apr 2025 22:19:42 +0300 Subject: [PATCH 2/5] fix checking nvidia gpu --- Azaion.Inference/hardware_service.pyx | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/Azaion.Inference/hardware_service.pyx b/Azaion.Inference/hardware_service.pyx index 7a1e97c..9782b21 100644 --- a/Azaion.Inference/hardware_service.pyx +++ b/Azaion.Inference/hardware_service.pyx @@ -1,6 +1,7 @@ import re import subprocess import psutil +import pynvml cdef class HardwareInfo: def __init__(self, str cpu, str gpu, str memory, str mac_address): @@ -46,14 +47,25 @@ cdef class HardwareService: @staticmethod cdef has_nvidia_gpu(): try: - output = subprocess.check_output(['nvidia-smi']).decode() - match = re.search(r'CUDA Version:\s*([\d.]+)', output) - if match: - return float(match.group(1)) > 11 - return False - except Exception as e: - print(e) + pynvml.nvmlInit() + device_count = pynvml.nvmlDeviceGetCount() + + if device_count > 0: + print(f"Found NVIDIA GPU(s).") + return True + else: + print("No NVIDIA GPUs found by NVML.") + return False + + except pynvml.NVMLError as error: + print(f"Failed to find NVIDIA GPU") return False + finally: + try: + pynvml.nvmlShutdown() + except: + print('Failed to shutdown pynvml cause probably no NVidia GPU') + pass cdef HardwareInfo get_hardware_info(self): if self.is_windows: From 1c4bdabfb51a8462d20e98af7ed62748d8b8e0e8 Mon Sep 17 00:00:00 2001 From: Alex Bezdieniezhnykh Date: Wed, 30 Apr 2025 23:08:53 +0300 Subject: [PATCH 3/5] import Tensorrt not in compile time in order to dynamically load tensorrt only if nvidia gpu is present --- Azaion.Inference/inference.pyx | 2 +- Azaion.Inference/onnx_engine.pyx | 4 ++-- Azaion.Inference/tensorrt_engine.pxd | 14 +++----------- Azaion.Inference/tensorrt_engine.pyx | 12 ++++++------ 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/Azaion.Inference/inference.pyx b/Azaion.Inference/inference.pyx index d505e60..48925de 100644 --- a/Azaion.Inference/inference.pyx +++ b/Azaion.Inference/inference.pyx @@ -16,7 +16,7 @@ from hardware_service cimport HardwareService from security cimport Security if HardwareService.has_nvidia_gpu(): - from tensorrt_engine cimport TensorRTEngine + from tensorrt_engine import TensorRTEngine else: from onnx_engine import OnnxEngine diff --git a/Azaion.Inference/onnx_engine.pyx b/Azaion.Inference/onnx_engine.pyx index ed8cdeb..de68d73 100644 --- a/Azaion.Inference/onnx_engine.pyx +++ b/Azaion.Inference/onnx_engine.pyx @@ -14,11 +14,11 @@ cdef class OnnxEngine(InferenceEngine): model_meta = self.session.get_modelmeta() print("Metadata:", model_meta.custom_metadata_map) - cdef tuple get_input_shape(self): + cpdef tuple get_input_shape(self): shape = self.input_shape return shape[2], shape[3] - cdef int get_batch_size(self): + cpdef int get_batch_size(self): return self.batch_size cpdef run(self, input_data): diff --git a/Azaion.Inference/tensorrt_engine.pxd b/Azaion.Inference/tensorrt_engine.pxd index 90d8b79..6fc31bd 100644 --- a/Azaion.Inference/tensorrt_engine.pxd +++ b/Azaion.Inference/tensorrt_engine.pxd @@ -16,17 +16,9 @@ cdef class TensorRTEngine(InferenceEngine): cdef object stream - @staticmethod - cdef get_gpu_memory_bytes(int device_id) - @staticmethod - cdef get_engine_filename(int device_id) + cpdef tuple get_input_shape(self) - @staticmethod - cdef convert_from_onnx(bytes onnx_model) + cpdef int get_batch_size(self) - cdef tuple get_input_shape(self) - - cdef int get_batch_size(self) - - cdef run(self, input_data) + cpdef run(self, input_data) diff --git a/Azaion.Inference/tensorrt_engine.pyx b/Azaion.Inference/tensorrt_engine.pyx index c7cf1a5..d8c4189 100644 --- a/Azaion.Inference/tensorrt_engine.pyx +++ b/Azaion.Inference/tensorrt_engine.pyx @@ -56,7 +56,7 @@ cdef class TensorRTEngine(InferenceEngine): raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}") @staticmethod - cdef get_gpu_memory_bytes(int device_id): + def get_gpu_memory_bytes(int device_id): total_memory = None try: pynvml.nvmlInit() @@ -73,7 +73,7 @@ cdef class TensorRTEngine(InferenceEngine): return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb @staticmethod - cdef get_engine_filename(int device_id): + def get_engine_filename(int device_id): try: device = cuda.Device(device_id) sm_count = device.multiprocessor_count @@ -83,7 +83,7 @@ cdef class TensorRTEngine(InferenceEngine): return None @staticmethod - cdef convert_from_onnx(bytes onnx_model): + def convert_from_onnx(bytes onnx_model): workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes(0) * 0.9) explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) @@ -112,13 +112,13 @@ cdef class TensorRTEngine(InferenceEngine): constants.log('conversion done!') return bytes(plan) - cdef tuple get_input_shape(self): + cpdef tuple get_input_shape(self): return self.input_shape[2], self.input_shape[3] - cdef int get_batch_size(self): + cpdef int get_batch_size(self): return self.batch_size - cdef run(self, input_data): + cpdef run(self, input_data): try: cuda.memcpy_htod_async(self.d_input, input_data, self.stream) self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer From cf01e5d95212062e35d78982a73f0fc9b11444ea Mon Sep 17 00:00:00 2001 From: Alex Bezdieniezhnykh Date: Wed, 30 Apr 2025 23:32:03 +0300 Subject: [PATCH 4/5] Revert "import Tensorrt not in compile time in order to dynamically load tensorrt only if nvidia gpu is present" This reverts commit 1c4bdabfb51a8462d20e98af7ed62748d8b8e0e8. --- Azaion.Inference/inference.pyx | 2 +- Azaion.Inference/onnx_engine.pyx | 4 ++-- Azaion.Inference/tensorrt_engine.pxd | 14 +++++++++++--- Azaion.Inference/tensorrt_engine.pyx | 12 ++++++------ 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/Azaion.Inference/inference.pyx b/Azaion.Inference/inference.pyx index 48925de..d505e60 100644 --- a/Azaion.Inference/inference.pyx +++ b/Azaion.Inference/inference.pyx @@ -16,7 +16,7 @@ from hardware_service cimport HardwareService from security cimport Security if HardwareService.has_nvidia_gpu(): - from tensorrt_engine import TensorRTEngine + from tensorrt_engine cimport TensorRTEngine else: from onnx_engine import OnnxEngine diff --git a/Azaion.Inference/onnx_engine.pyx b/Azaion.Inference/onnx_engine.pyx index de68d73..ed8cdeb 100644 --- a/Azaion.Inference/onnx_engine.pyx +++ b/Azaion.Inference/onnx_engine.pyx @@ -14,11 +14,11 @@ cdef class OnnxEngine(InferenceEngine): model_meta = self.session.get_modelmeta() print("Metadata:", model_meta.custom_metadata_map) - cpdef tuple get_input_shape(self): + cdef tuple get_input_shape(self): shape = self.input_shape return shape[2], shape[3] - cpdef int get_batch_size(self): + cdef int get_batch_size(self): return self.batch_size cpdef run(self, input_data): diff --git a/Azaion.Inference/tensorrt_engine.pxd b/Azaion.Inference/tensorrt_engine.pxd index 6fc31bd..90d8b79 100644 --- a/Azaion.Inference/tensorrt_engine.pxd +++ b/Azaion.Inference/tensorrt_engine.pxd @@ -16,9 +16,17 @@ cdef class TensorRTEngine(InferenceEngine): cdef object stream + @staticmethod + cdef get_gpu_memory_bytes(int device_id) - cpdef tuple get_input_shape(self) + @staticmethod + cdef get_engine_filename(int device_id) - cpdef int get_batch_size(self) + @staticmethod + cdef convert_from_onnx(bytes onnx_model) - cpdef run(self, input_data) + cdef tuple get_input_shape(self) + + cdef int get_batch_size(self) + + cdef run(self, input_data) diff --git a/Azaion.Inference/tensorrt_engine.pyx b/Azaion.Inference/tensorrt_engine.pyx index d8c4189..c7cf1a5 100644 --- a/Azaion.Inference/tensorrt_engine.pyx +++ b/Azaion.Inference/tensorrt_engine.pyx @@ -56,7 +56,7 @@ cdef class TensorRTEngine(InferenceEngine): raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}") @staticmethod - def get_gpu_memory_bytes(int device_id): + cdef get_gpu_memory_bytes(int device_id): total_memory = None try: pynvml.nvmlInit() @@ -73,7 +73,7 @@ cdef class TensorRTEngine(InferenceEngine): return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb @staticmethod - def get_engine_filename(int device_id): + cdef get_engine_filename(int device_id): try: device = cuda.Device(device_id) sm_count = device.multiprocessor_count @@ -83,7 +83,7 @@ cdef class TensorRTEngine(InferenceEngine): return None @staticmethod - def convert_from_onnx(bytes onnx_model): + cdef convert_from_onnx(bytes onnx_model): workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes(0) * 0.9) explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) @@ -112,13 +112,13 @@ cdef class TensorRTEngine(InferenceEngine): constants.log('conversion done!') return bytes(plan) - cpdef tuple get_input_shape(self): + cdef tuple get_input_shape(self): return self.input_shape[2], self.input_shape[3] - cpdef int get_batch_size(self): + cdef int get_batch_size(self): return self.batch_size - cpdef run(self, input_data): + cdef run(self, input_data): try: cuda.memcpy_htod_async(self.d_input, input_data, self.stream) self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer From 28069f63f991e22a3b9b36184629e17e9e7303bc Mon Sep 17 00:00:00 2001 From: Alex Bezdieniezhnykh Date: Wed, 30 Apr 2025 23:47:46 +0300 Subject: [PATCH 5/5] Reapply "import Tensorrt not in compile time in order to dynamically load tensorrt only if nvidia gpu is present" This reverts commit cf01e5d95212062e35d78982a73f0fc9b11444ea. --- Azaion.Inference/inference.pyx | 2 +- Azaion.Inference/onnx_engine.pyx | 4 ++-- Azaion.Inference/tensorrt_engine.pxd | 14 +++----------- Azaion.Inference/tensorrt_engine.pyx | 12 ++++++------ 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/Azaion.Inference/inference.pyx b/Azaion.Inference/inference.pyx index d505e60..48925de 100644 --- a/Azaion.Inference/inference.pyx +++ b/Azaion.Inference/inference.pyx @@ -16,7 +16,7 @@ from hardware_service cimport HardwareService from security cimport Security if HardwareService.has_nvidia_gpu(): - from tensorrt_engine cimport TensorRTEngine + from tensorrt_engine import TensorRTEngine else: from onnx_engine import OnnxEngine diff --git a/Azaion.Inference/onnx_engine.pyx b/Azaion.Inference/onnx_engine.pyx index ed8cdeb..de68d73 100644 --- a/Azaion.Inference/onnx_engine.pyx +++ b/Azaion.Inference/onnx_engine.pyx @@ -14,11 +14,11 @@ cdef class OnnxEngine(InferenceEngine): model_meta = self.session.get_modelmeta() print("Metadata:", model_meta.custom_metadata_map) - cdef tuple get_input_shape(self): + cpdef tuple get_input_shape(self): shape = self.input_shape return shape[2], shape[3] - cdef int get_batch_size(self): + cpdef int get_batch_size(self): return self.batch_size cpdef run(self, input_data): diff --git a/Azaion.Inference/tensorrt_engine.pxd b/Azaion.Inference/tensorrt_engine.pxd index 90d8b79..6fc31bd 100644 --- a/Azaion.Inference/tensorrt_engine.pxd +++ b/Azaion.Inference/tensorrt_engine.pxd @@ -16,17 +16,9 @@ cdef class TensorRTEngine(InferenceEngine): cdef object stream - @staticmethod - cdef get_gpu_memory_bytes(int device_id) - @staticmethod - cdef get_engine_filename(int device_id) + cpdef tuple get_input_shape(self) - @staticmethod - cdef convert_from_onnx(bytes onnx_model) + cpdef int get_batch_size(self) - cdef tuple get_input_shape(self) - - cdef int get_batch_size(self) - - cdef run(self, input_data) + cpdef run(self, input_data) diff --git a/Azaion.Inference/tensorrt_engine.pyx b/Azaion.Inference/tensorrt_engine.pyx index c7cf1a5..d8c4189 100644 --- a/Azaion.Inference/tensorrt_engine.pyx +++ b/Azaion.Inference/tensorrt_engine.pyx @@ -56,7 +56,7 @@ cdef class TensorRTEngine(InferenceEngine): raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}") @staticmethod - cdef get_gpu_memory_bytes(int device_id): + def get_gpu_memory_bytes(int device_id): total_memory = None try: pynvml.nvmlInit() @@ -73,7 +73,7 @@ cdef class TensorRTEngine(InferenceEngine): return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb @staticmethod - cdef get_engine_filename(int device_id): + def get_engine_filename(int device_id): try: device = cuda.Device(device_id) sm_count = device.multiprocessor_count @@ -83,7 +83,7 @@ cdef class TensorRTEngine(InferenceEngine): return None @staticmethod - cdef convert_from_onnx(bytes onnx_model): + def convert_from_onnx(bytes onnx_model): workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes(0) * 0.9) explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) @@ -112,13 +112,13 @@ cdef class TensorRTEngine(InferenceEngine): constants.log('conversion done!') return bytes(plan) - cdef tuple get_input_shape(self): + cpdef tuple get_input_shape(self): return self.input_shape[2], self.input_shape[3] - cdef int get_batch_size(self): + cpdef int get_batch_size(self): return self.batch_size - cdef run(self, input_data): + cpdef run(self, input_data): try: cuda.memcpy_htod_async(self.d_input, input_data, self.stream) self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer