read cdn yaml config from api

automate tensorrt model conversion in case of no existing one for user's gpu
2026-04-22 12:36:31 +00:00 · 2025-04-23 23:20:08 +03:00
parent c68c293448
commit e798af470b
23 changed files with 265 additions and 93 deletions
@@ -1,14 +1,19 @@
 import json
 import mimetypes
+import os
 import subprocess
+import time

 import cv2
 import numpy as np
+
+cimport constants
 from remote_command cimport RemoteCommand
 from annotation cimport Detection, Annotation
 from ai_config cimport AIRecognitionConfig
 from inference_engine cimport OnnxEngine, TensorRTEngine
 from hardware_service cimport HardwareService
+from security cimport Security

 cdef class Inference:
    def __init__(self, api_client, on_annotation):
@@ -20,18 +25,41 @@ cdef class Inference:
        self.model_height = 0
        self.engine = None
        self.class_names = None
+        self.is_building_engine = False

-    def init_ai(self):
+    cdef build_tensor_engine(self):
+        is_nvidia = HardwareService.has_nvidia_gpu()
+        if not is_nvidia:
+            return
+
+        engine_filename = TensorRTEngine.get_engine_filename()
+        key = Security.get_model_encryption_key()
+        models_dir = constants.MODELS_FOLDER
+        if not os.path.exists(os.path.join(<str> models_dir, f'{engine_filename}.big')):
+            self.is_building_engine = True
+            onnx_model = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
+            model_bytes = TensorRTEngine.convert_from_onnx(onnx_model)
+            self.api_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir, key)
+            self.is_building_engine = False
+
+
+    cdef init_ai(self):
        if self.engine is not None:
            return

        is_nvidia = HardwareService.has_nvidia_gpu()
+        key = Security.get_model_encryption_key()
+        models_dir = constants.MODELS_FOLDER
        if is_nvidia:
-            model_bytes = self.api_client.load_ai_model(is_tensor=True)
-            self.engine = TensorRTEngine(model_bytes, batch_size=4)
+            while self.is_building_engine:
+                time.sleep(1)
+            engine_filename = TensorRTEngine.get_engine_filename()
+            model_bytes = self.api_client.load_big_small_resource(engine_filename, models_dir, key)
+            self.engine = TensorRTEngine(model_bytes)
+
        else:
-            model_bytes = self.api_client.load_ai_model()
-            self.engine = OnnxEngine(model_bytes, batch_size=4)
+            model_bytes = self.api_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir, key)
+            self.engine = OnnxEngine(model_bytes)

        self.model_height, self.model_width = self.engine.get_input_shape()
        self.class_names = self.engine.get_class_names()
@@ -135,7 +163,7 @@ cdef class Inference:
                images.append(m)
        # images first, it's faster
        if len(images) > 0:
-            for chunk in self.split_list_extend(images, ai_config.model_batch_size):
+            for chunk in self.split_list_extend(images, self.engine.get_input_shape()):
                print(f'run inference on {" ".join(chunk)}...')
                self._process_images(cmd, ai_config, chunk)
        if len(videos) > 0:
@@ -161,7 +189,7 @@ cdef class Inference:
                batch_frames.append(frame)
                batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC)))

-            if len(batch_frames) == ai_config.model_batch_size:
+            if len(batch_frames) == self.engine.get_input_shape():
                input_blob = self.preprocess(batch_frames)

                outputs = self.engine.run(input_blob)