Refactor inference engine and task management: Remove obsolete inference engine and ONNX engine files, update inference processing to utilize batch handling, and enhance task management structure in documentation. Adjust paths for task specifications to align with new directory organization.

This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-28 01:04:28 +02:00
parent 1e4ef299f9
commit 5be53739cd
60 changed files with 111875 additions and 208 deletions
+32
View File
@@ -0,0 +1,32 @@
def _check_tensor_gpu_index():
try:
import pynvml
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
if device_count == 0:
return -1
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
if major > 6 or (major == 6 and minor >= 1):
return i
return -1
except Exception:
return -1
finally:
try:
import pynvml
pynvml.nvmlShutdown()
except Exception:
pass
tensor_gpu_index = _check_tensor_gpu_index()
def create_engine(model_bytes: bytes, batch_size: int = 1):
if tensor_gpu_index > -1:
from engines.tensorrt_engine import TensorRTEngine
return TensorRTEngine(model_bytes, batch_size)
from engines.onnx_engine import OnnxEngine
return OnnxEngine(model_bytes, batch_size)