separate load functionality from inference client to loader client. Call loader client from inference to get the model.

remove dummy dlls, remove resource loader from c#. TODO: Load dlls separately by Loader UI and loader client WIP
2026-06-21 10:51:07 +00:00 · 2025-06-06 20:04:03 +03:00
parent 500db31142
commit 7750025631
54 changed files with 353 additions and 571 deletions
@@ -1,29 +1,54 @@
-import json
 import mimetypes
-import os
-import subprocess
-import sys
 import time
-
 import cv2
 import numpy as np
-
 cimport constants
 from remote_command cimport RemoteCommand
 from annotation cimport Detection, Annotation
 from ai_config cimport AIRecognitionConfig
-from hardware_service cimport HardwareService
-from security cimport Security
+import pynvml

-if HardwareService.has_nvidia_gpu():
+cdef int tensor_gpu_index
+
+cdef int check_tensor_gpu_index():
+    try:
+        pynvml.nvmlInit()
+        deviceCount = pynvml.nvmlDeviceGetCount()
+
+        if deviceCount == 0:
+            print('No NVIDIA GPUs found.')
+            return -1
+
+        for i in range(deviceCount):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+
+            if major > 6 or (major == 6 and minor >= 1):
+                print('found NVIDIA GPU!')
+                return i
+
+        print('NVIDIA GPU doesnt support TensorRT!')
+        return -1
+
+    except pynvml.NVMLError:
+        return -1
+    finally:
+        try:
+            pynvml.nvmlShutdown()
+        except:
+            print('Failed to shutdown pynvml cause probably no NVidia GPU')
+            pass
+
+tensor_gpu_index = check_tensor_gpu_index()
+if tensor_gpu_index > -1:
    from tensorrt_engine import TensorRTEngine
 else:
    from onnx_engine import OnnxEngine


 cdef class Inference:
-    def __init__(self, api_client, on_annotation):
-        self.api_client = api_client
+    def __init__(self, loader_client, on_annotation):
+        self.loader_client = loader_client
        self.on_annotation = on_annotation
        self.stop_signal = False
        self.model_input = None
@@ -33,27 +58,26 @@ cdef class Inference:
        self.is_building_engine = False

    cdef build_tensor_engine(self, object updater_callback):
-        is_nvidia = HardwareService.has_nvidia_gpu()
-        if not is_nvidia:
+        if not tensor_gpu_index == -1:
            return

        engine_filename = TensorRTEngine.get_engine_filename(0)
-        key = Security.get_model_encryption_key()
        models_dir = constants.MODELS_FOLDER

        self.is_building_engine = True
        updater_callback('downloading')
-        if self.api_client.load_big_small_resource(engine_filename, models_dir, key):
+
+        if self.loader_client.load_big_small_resource(engine_filename, models_dir):
            print('tensor rt engine is here, no need to build')
            self.is_building_engine = False
            return

        # time.sleep(8) # prevent simultaneously loading dll and models
        updater_callback('converting')
-        onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
+        onnx_model = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
        model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
        updater_callback('uploading')
-        self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
+        self.loader_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir)
        print(f'uploaded {engine_filename} to CDN and API')
        self.is_building_engine = False

@@ -61,17 +85,16 @@ cdef class Inference:
        if self.engine is not None:
            return

-        is_nvidia = HardwareService.has_nvidia_gpu()
-        key = Security.get_model_encryption_key()
        models_dir = constants.MODELS_FOLDER
-        if is_nvidia:
+        if tensor_gpu_index > -1:
            while self.is_building_engine:
                time.sleep(1)
            engine_filename = TensorRTEngine.get_engine_filename(0)
-            model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
+
+            model_bytes = self.loader_client.load_big_small_resource(engine_filename, models_dir)
            self.engine = TensorRTEngine(model_bytes)
        else:
-            model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
+            model_bytes = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
            self.engine = OnnxEngine(model_bytes)

        self.model_height, self.model_width = self.engine.get_input_shape()