Refactor inference engine and task management: Remove obsolete inference engine and ONNX engine files, update inference processing to utilize batch handling, and enhance task management structure in documentation. Adjust paths for task specifications to align with new directory organization.

2026-04-22 09:06:31 +00:00 · 2026-03-28 01:04:28 +02:00
parent 1e4ef299f9
commit 5be53739cd
60 changed files with 111875 additions and 208 deletions
@@ -0,0 +1,32 @@
+def _check_tensor_gpu_index():
+    try:
+        import pynvml
+        pynvml.nvmlInit()
+        device_count = pynvml.nvmlDeviceGetCount()
+        if device_count == 0:
+            return -1
+        for i in range(device_count):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+            if major > 6 or (major == 6 and minor >= 1):
+                return i
+        return -1
+    except Exception:
+        return -1
+    finally:
+        try:
+            import pynvml
+            pynvml.nvmlShutdown()
+        except Exception:
+            pass
+
+
+tensor_gpu_index = _check_tensor_gpu_index()
+
+
+def create_engine(model_bytes: bytes, batch_size: int = 1):
+    if tensor_gpu_index > -1:
+        from engines.tensorrt_engine import TensorRTEngine
+        return TensorRTEngine(model_bytes, batch_size)
+    from engines.onnx_engine import OnnxEngine
+    return OnnxEngine(model_bytes, batch_size)