Add AIAvailabilityStatus and AIRecognitionConfig classes for AI model management

- Introduced `AIAvailabilityStatus` class to manage the availability status of AI models, including methods for setting status and logging messages.
- Added `AIRecognitionConfig` class to encapsulate configuration parameters for AI recognition, with a static method for creating instances from dictionaries.
- Implemented enums for AI availability states to enhance clarity and maintainability.
- Updated related Cython files to support the new classes and ensure proper type handling.

These changes aim to improve the structure and functionality of the AI model management system, facilitating better status tracking and configuration handling.
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-31 05:49:51 +03:00
parent fc57d677b4
commit 8ce40a9385
43 changed files with 1190 additions and 462 deletions
+52
View File
@@ -0,0 +1,52 @@
import platform
import sys
def _check_tensor_gpu_index():
try:
import pynvml
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
if device_count == 0:
return -1
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
if major > 6 or (major == 6 and minor >= 1):
return i
return -1
except Exception:
return -1
finally:
try:
import pynvml
pynvml.nvmlShutdown()
except Exception:
pass
def _is_apple_silicon():
if sys.platform != "darwin" or platform.machine() != "arm64":
return False
try:
import coremltools
return True
except ImportError:
return False
tensor_gpu_index = _check_tensor_gpu_index()
def _select_engine_class():
if tensor_gpu_index > -1:
from engines.tensorrt_engine import TensorRTEngine # pyright: ignore[reportMissingImports]
return TensorRTEngine
if _is_apple_silicon():
from engines.coreml_engine import CoreMLEngine
return CoreMLEngine
from engines.onnx_engine import OnnxEngine
return OnnxEngine
EngineClass = _select_engine_class()
+13
View File
@@ -0,0 +1,13 @@
from engines.inference_engine cimport InferenceEngine
cdef class CoreMLEngine(InferenceEngine):
cdef object model
cdef int img_width
cdef int img_height
cdef tuple get_input_shape(self)
cdef run(self, input_data)
cdef preprocess(self, list frames)
cdef list postprocess(self, output, object ai_config)
+100
View File
@@ -0,0 +1,100 @@
from engines.inference_engine cimport InferenceEngine
from annotation cimport Detection
cimport constants_inf
import numpy as np
from PIL import Image
import cv2
import io
import os
import tempfile
import zipfile
cdef class CoreMLEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, max_batch_size: int = 1, **kwargs):
InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="coreml")
import coremltools as ct
model_path = kwargs.get('model_path')
if model_path is None:
model_path = self._extract_from_zip(model_bytes)
self.model = ct.models.MLModel(
model_path, compute_units=ct.ComputeUnit.ALL)
spec = self.model.get_spec()
img_input = spec.description.input[0]
self.img_width = int(img_input.type.imageType.width)
self.img_height = int(img_input.type.imageType.height)
constants_inf.log(<str>f'CoreML model: {self.img_width}x{self.img_height}')
@staticmethod
def get_engine_filename():
return "azaion_coreml.zip"
@staticmethod
def _extract_from_zip(model_bytes):
tmpdir = tempfile.mkdtemp()
buf = io.BytesIO(model_bytes)
with zipfile.ZipFile(buf, 'r') as zf:
zf.extractall(tmpdir)
for item in os.listdir(tmpdir):
if item.endswith('.mlpackage') or item.endswith('.mlmodel'):
return os.path.join(tmpdir, item)
raise ValueError("No .mlpackage or .mlmodel found in zip")
cdef tuple get_input_shape(self):
return <tuple>(self.img_height, self.img_width)
cdef preprocess(self, list frames):
frame = frames[0]
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
resized = cv2.resize(rgb, (self.img_width, self.img_height))
return Image.fromarray(resized)
cdef run(self, input_data):
predict = getattr(self.model, 'predict')
return predict({
'image': input_data,
'iouThreshold': 0.45,
'confidenceThreshold': 0.25,
})
cdef list postprocess(self, output, object ai_config):
cdef int w = self.img_width
cdef int h = self.img_height
coords = output.get('coordinates', np.empty((0, 4), dtype=np.float32))
confs = output.get('confidence', np.empty((0, 80), dtype=np.float32))
cdef list[Detection] detections = []
if coords.size == 0:
return [detections]
cx, cy, bw, bh = coords[:, 0], coords[:, 1], coords[:, 2], coords[:, 3]
x1 = (cx - bw / 2) * w
y1 = (cy - bh / 2) * h
x2 = (cx + bw / 2) * w
y2 = (cy + bh / 2) * h
class_ids = np.argmax(confs, axis=1)
conf_values = np.max(confs, axis=1)
for i in range(len(conf_values)):
conf = round(float(conf_values[i]), 2)
if conf < ai_config.probability_threshold:
continue
det_x1 = float(x1[i]) / w
det_y1 = float(y1[i]) / h
det_x2 = float(x2[i]) / w
det_y2 = float(y2[i]) / h
det_cx = (det_x1 + det_x2) / 2
det_cy = (det_y1 + det_y2) / 2
det_w = det_x2 - det_x1
det_h = det_y2 - det_y1
detections.append(Detection(det_cx, det_cy, det_w, det_h, int(class_ids[i]), conf))
filtered = self.remove_overlapping(detections, ai_config.tracking_intersection_threshold)
return [filtered]
+12
View File
@@ -0,0 +1,12 @@
from annotation cimport Detection
cdef class InferenceEngine:
cdef public int max_batch_size
cdef public str engine_name
cdef tuple get_input_shape(self)
cdef run(self, input_data)
cdef preprocess(self, list frames)
cdef list postprocess(self, output, object ai_config)
cdef list remove_overlapping(self, list[Detection] detections, float threshold)
cpdef list process_frames(self, list frames, object ai_config)
+106
View File
@@ -0,0 +1,106 @@
import cv2
import numpy as np
from annotation cimport Detection
cdef class InferenceEngine:
def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
self.max_batch_size = max_batch_size
self.engine_name = <str>kwargs.get('engine_name', "onnx")
@staticmethod
def get_engine_filename():
return None
@staticmethod
def get_source_filename():
return None
@staticmethod
def convert_from_source(bytes source_bytes):
return source_bytes
cdef tuple get_input_shape(self):
raise NotImplementedError("Subclass must implement get_input_shape")
cdef run(self, input_data):
raise NotImplementedError("Subclass must implement run")
cdef preprocess(self, list frames):
cdef int h, w
h, w = self.get_input_shape()
blobs = [cv2.dnn.blobFromImage(frame,
scalefactor=1.0 / 255.0,
size=(w, h),
mean=(0, 0, 0),
swapRB=True,
crop=False)
for frame in frames]
return np.vstack(blobs)
cdef list postprocess(self, output, object ai_config):
cdef list[Detection] detections
cdef int ann_index
cdef float x1, y1, x2, y2, conf
cdef int class_id
cdef list results = []
cdef int h, w
h, w = self.get_input_shape()
for ann_index in range(len(output[0])):
detections = []
for det in output[0][ann_index]:
if det[4] == 0:
break
x1 = det[0] / w
y1 = det[1] / h
x2 = det[2] / w
y2 = det[3] / h
conf = round(det[4], 2)
class_id = int(det[5])
x = (x1 + x2) / 2
y = (y1 + y2) / 2
bw = x2 - x1
bh = y2 - y1
if conf >= ai_config.probability_threshold:
detections.append(Detection(x, y, bw, bh, class_id, conf))
filtered = self.remove_overlapping(detections, ai_config.tracking_intersection_threshold)
results.append(filtered)
return results
cdef list remove_overlapping(self, list[Detection] detections, float threshold):
cdef Detection det1, det2
filtered_output = []
filtered_out_indexes = []
for det1_index in range(len(detections)):
if det1_index in filtered_out_indexes:
continue
det1 = detections[det1_index]
res = det1_index
for det2_index in range(det1_index + 1, len(detections)):
det2 = detections[det2_index]
if det1.overlaps(det2, threshold):
if det1.confidence > det2.confidence or (
det1.confidence == det2.confidence and det1.cls < det2.cls):
filtered_out_indexes.append(det2_index)
else:
filtered_out_indexes.append(res)
res = det2_index
filtered_output.append(detections[res])
filtered_out_indexes.append(res)
return filtered_output
cpdef list process_frames(self, list frames, object ai_config):
cdef int effective_batch = min(self.max_batch_size, ai_config.model_batch_size)
if effective_batch < 1:
effective_batch = 1
cdef list all_detections = []
cdef int i
for i in range(0, len(frames), effective_batch):
chunk = frames[i:i + effective_batch]
input_blob = self.preprocess(chunk)
raw_output = self.run(input_blob)
batch_dets = self.postprocess(raw_output, ai_config)
all_detections.extend(batch_dets)
return all_detections
+13
View File
@@ -0,0 +1,13 @@
from engines.inference_engine cimport InferenceEngine
cdef class OnnxEngine(InferenceEngine):
cdef public object session
cdef object _cpu_session
cdef object model_inputs
cdef str input_name
cdef object input_shape
cdef tuple get_input_shape(self)
cdef run(self, input_data)
+48
View File
@@ -0,0 +1,48 @@
from engines.inference_engine cimport InferenceEngine
import onnxruntime as onnx
cimport constants_inf
import os
def _select_providers():
available = set(onnx.get_available_providers())
skip_coreml = os.environ.get("SKIP_COREML", "").lower() in ("1", "true", "yes")
preferred = ["CoreMLExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]
if skip_coreml:
preferred = [p for p in preferred if p != "CoreMLExecutionProvider"]
selected = [p for p in preferred if p in available]
return selected or ["CPUExecutionProvider"]
cdef class OnnxEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
InferenceEngine.__init__(self, model_bytes, max_batch_size)
providers = _select_providers()
constants_inf.log(<str>f'ONNX providers: {providers}')
self.session = onnx.InferenceSession(model_bytes, providers=providers)
self.model_inputs = self.session.get_inputs()
self.input_name = self.model_inputs[0].name
self.input_shape = self.model_inputs[0].shape
if self.input_shape[0] not in (-1, None, "N"):
self.max_batch_size = self.input_shape[0]
constants_inf.log(f'AI detection model input: {self.model_inputs} {self.input_shape}')
model_meta = self.session.get_modelmeta()
constants_inf.log(f"Metadata: {model_meta.custom_metadata_map}")
self._cpu_session = None
if any("CoreML" in p for p in self.session.get_providers()):
constants_inf.log(<str>'CoreML active — creating CPU fallback session')
self._cpu_session = onnx.InferenceSession(
model_bytes, providers=["CPUExecutionProvider"])
cdef tuple get_input_shape(self):
shape = self.input_shape
return <tuple>(shape[2], shape[3])
cdef run(self, input_data):
try:
return self.session.run(None, {self.input_name: input_data})
except Exception:
if self._cpu_session is not None:
return self._cpu_session.run(None, {self.input_name: input_data})
raise
+20
View File
@@ -0,0 +1,20 @@
from engines.inference_engine cimport InferenceEngine
cdef class TensorRTEngine(InferenceEngine):
cdef public object context
cdef public object d_input
cdef public object d_output
cdef str input_name
cdef list input_shape
cdef object h_output
cdef str output_name
cdef list output_shape
cdef object stream
cdef tuple get_input_shape(self)
cdef run(self, input_data)
+169
View File
@@ -0,0 +1,169 @@
from engines.inference_engine cimport InferenceEngine
import tensorrt as trt # pyright: ignore[reportMissingImports]
import pycuda.driver as cuda # pyright: ignore[reportMissingImports]
import pycuda.autoinit # pyright: ignore[reportMissingImports]
import pynvml
import numpy as np
cimport constants_inf
GPU_MEMORY_FRACTION = 0.8
cdef class TensorRTEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="tensorrt")
try:
logger = trt.Logger(trt.Logger.WARNING)
runtime = trt.Runtime(logger)
engine = runtime.deserialize_cuda_engine(model_bytes)
if engine is None:
raise RuntimeError("Failed to load TensorRT engine from bytes")
self.context = engine.create_execution_context()
self.input_name = engine.get_tensor_name(0)
engine_input_shape = engine.get_tensor_shape(self.input_name)
C = engine_input_shape[1]
H = 1280 if engine_input_shape[2] == -1 else engine_input_shape[2]
W = 1280 if engine_input_shape[3] == -1 else engine_input_shape[3]
if engine_input_shape[0] == -1:
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
self.max_batch_size = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
else:
self.max_batch_size = engine_input_shape[0]
self.input_shape = [self.max_batch_size, C, H, W]
self.context.set_input_shape(self.input_name, self.input_shape)
input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsize
self.d_input = cuda.mem_alloc(input_size)
self.output_name = engine.get_tensor_name(1)
engine_output_shape = tuple(engine.get_tensor_shape(self.output_name))
self.output_shape = [
self.max_batch_size,
300 if engine_output_shape[1] == -1 else engine_output_shape[1],
6 if engine_output_shape[2] == -1 else engine_output_shape[2],
]
self.h_output = cuda.pagelocked_empty(tuple(self.output_shape), dtype=np.float32)
self.d_output = cuda.mem_alloc(self.h_output.nbytes)
self.stream = cuda.Stream()
except Exception as e:
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
@staticmethod
def calculate_max_batch_size(gpu_memory_bytes, int input_h, int input_w):
frame_input_bytes = 3 * input_h * input_w * 4
estimated_per_frame = frame_input_bytes * 12
available = gpu_memory_bytes * GPU_MEMORY_FRACTION
calculated = max(1, int(available / estimated_per_frame))
return min(calculated, 32)
@staticmethod
def get_gpu_memory_bytes(int device_id):
total_memory = None
try:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total_memory = mem_info.total
except pynvml.NVMLError:
total_memory = None
finally:
try:
pynvml.nvmlShutdown()
except pynvml.NVMLError:
pass
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory
@staticmethod
def get_engine_filename():
try:
from engines import tensor_gpu_index
device = cuda.Device(max(tensor_gpu_index, 0))
sm_count = device.multiprocessor_count
cc_major, cc_minor = device.compute_capability()
return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
except Exception:
return None
@staticmethod
def get_source_filename():
import constants_inf
return constants_inf.AI_ONNX_MODEL_FILE
@staticmethod
def convert_from_source(bytes onnx_model):
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
workspace_bytes = int(gpu_mem * 0.9)
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
trt_logger = trt.Logger(trt.Logger.WARNING)
with trt.Builder(trt_logger) as builder, \
builder.create_network(explicit_batch_flag) as network, \
trt.OnnxParser(network, trt_logger) as parser, \
builder.create_builder_config() as config:
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
if not parser.parse(onnx_model):
return None
input_tensor = network.get_input(0)
shape = input_tensor.shape
C = shape[1]
H = max(shape[2], 1280) if shape[2] != -1 else 1280
W = max(shape[3], 1280) if shape[3] != -1 else 1280
if shape[0] == -1:
max_batch = TensorRTEngine.calculate_max_batch_size(gpu_mem, H, W)
profile = builder.create_optimization_profile()
profile.set_shape(
input_tensor.name,
(1, C, H, W),
(max_batch, C, H, W),
(max_batch, C, H, W),
)
config.add_optimization_profile(profile)
if builder.platform_has_fast_fp16:
constants_inf.log(<str>'Converting to supported fp16')
config.set_flag(trt.BuilderFlag.FP16)
else:
constants_inf.log(<str>'Converting to supported fp32. (fp16 is not supported)')
plan = builder.build_serialized_network(network, config)
if plan is None:
constants_inf.logerror(<str>'Conversion failed.')
return None
constants_inf.log('conversion done!')
return bytes(plan)
cdef tuple get_input_shape(self):
return <tuple>(self.input_shape[2], self.input_shape[3])
cdef run(self, input_data):
try:
actual_batch = input_data.shape[0]
if actual_batch != self.input_shape[0]:
actual_shape = [actual_batch, self.input_shape[1], self.input_shape[2], self.input_shape[3]]
self.context.set_input_shape(self.input_name, actual_shape)
cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
self.context.set_tensor_address(self.input_name, int(self.d_input))
self.context.set_tensor_address(self.output_name, int(self.d_output))
self.context.execute_async_v3(stream_handle=self.stream.handle)
self.stream.synchronize()
cuda.memcpy_dtoh(self.h_output, self.d_output)
output_shape = [actual_batch, self.output_shape[1], self.output_shape[2]]
output = self.h_output[:actual_batch].reshape(output_shape)
return [output]
except Exception as e:
raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}")