fix converting model initialization

This commit is contained in:
Oleksandr Bezdieniezhnykh
2025-09-04 10:39:41 +03:00
parent b3665630ed
commit be77a81875
8 changed files with 97 additions and 41 deletions
+1 -1
View File
@@ -483,7 +483,7 @@ public class AnnotatorEventHandler(
mainWindow.AIDetectBtn.IsEnabled = e.Status == AIAvailabilityEnum.Enabled; mainWindow.AIDetectBtn.IsEnabled = e.Status == AIAvailabilityEnum.Enabled;
mainWindow.StatusHelp.Text = e.ToString(); mainWindow.StatusHelp.Text = e.ToString();
}); });
1 if (e.Status is AIAvailabilityEnum.Enabled or AIAvailabilityEnum.Error) if (e.Status is AIAvailabilityEnum.Enabled or AIAvailabilityEnum.Error)
await inferenceService.CheckAIAvailabilityTokenSource.CancelAsync(); await inferenceService.CheckAIAvailabilityTokenSource.CancelAsync();
} }
} }
@@ -49,7 +49,7 @@ public class InferenceClient : IInferenceClient
Arguments = $"-p {_inferenceClientConfig.ZeroMqPort} -lp {_loaderClientConfig.ZeroMqPort} -a {_inferenceClientConfig.ApiUrl}", Arguments = $"-p {_inferenceClientConfig.ZeroMqPort} -lp {_loaderClientConfig.ZeroMqPort} -a {_inferenceClientConfig.ApiUrl}",
CreateNoWindow = true CreateNoWindow = true
}; };
process.Start(); //process.Start();
} }
catch (Exception e) catch (Exception e)
{ {
@@ -4,11 +4,13 @@ cdef enum AIAvailabilityEnum:
CONVERTING = 20 CONVERTING = 20
UPLOADING = 30 UPLOADING = 30
ENABLED = 200 ENABLED = 200
WARNING = 300
ERROR = 500 ERROR = 500
cdef class AIAvailabilityStatus: cdef class AIAvailabilityStatus:
cdef AIAvailabilityEnum status cdef AIAvailabilityEnum status
cdef str error_message cdef str error_message
cdef object _lock
cdef bytes serialize(self) cdef bytes serialize(self)
cdef set_status(self, AIAvailabilityEnum status, str error_message=*) cdef set_status(self, AIAvailabilityEnum status, str error_message=*)
+29 -11
View File
@@ -1,5 +1,6 @@
cimport constants_inf cimport constants_inf
import msgpack import msgpack
from threading import Lock
AIStatus2Text = { AIStatus2Text = {
AIAvailabilityEnum.NONE: "None", AIAvailabilityEnum.NONE: "None",
@@ -15,23 +16,40 @@ cdef class AIAvailabilityStatus:
def __init__(self): def __init__(self):
self.status = AIAvailabilityEnum.NONE self.status = AIAvailabilityEnum.NONE
self.error_message = None self.error_message = None
self._lock = Lock()
def __str__(self): def __str__(self):
status_text = AIStatus2Text.get(self.status, "Unknown") self._lock.acquire()
error_text = self.error_message if self.error_message else "" try:
return f"{status_text} {error_text}" status_text = AIStatus2Text.get(self.status, "Unknown")
error_text = self.error_message if self.error_message else ""
return f"{status_text} {error_text}"
finally:
self._lock.release()
cdef bytes serialize(self): cdef bytes serialize(self):
return msgpack.packb({ self._lock.acquire()
"s": self.status, try:
"m": self.error_message return msgpack.packb({
}) "s": self.status,
"m": self.error_message
})
finally:
self._lock.release()
cdef set_status(self, AIAvailabilityEnum status, str error_message=None): cdef set_status(self, AIAvailabilityEnum status, str error_message=None):
self.status = status log_message = ""
self.error_message = error_message self._lock.acquire()
try:
self.status = status
self.error_message = error_message
status_text = AIStatus2Text.get(self.status, "Unknown")
error_text = self.error_message if self.error_message else ""
log_message = f"{status_text} {error_text}"
finally:
self._lock.release()
if error_message is not None: if error_message is not None:
constants_inf.logerror(<str>error_message) constants_inf.logerror(<str>error_message)
else: else:
constants_inf.log(<str>str(self)) constants_inf.log(<str>log_message)
+4 -1
View File
@@ -4,11 +4,12 @@ from annotation cimport Annotation, Detection
from ai_config cimport AIRecognitionConfig from ai_config cimport AIRecognitionConfig
from loader_client cimport LoaderClient from loader_client cimport LoaderClient
from inference_engine cimport InferenceEngine from inference_engine cimport InferenceEngine
from remote_command_handler_inf cimport RemoteCommandHandler
cdef class Inference: cdef class Inference:
cdef LoaderClient loader_client cdef LoaderClient loader_client
cdef InferenceEngine engine cdef InferenceEngine engine
cdef object on_annotation cdef RemoteCommandHandler remote_handler
cdef Annotation _previous_annotation cdef Annotation _previous_annotation
cdef dict[str, list(Detection)] _tile_detections cdef dict[str, list(Detection)] _tile_detections
cdef AIRecognitionConfig ai_config cdef AIRecognitionConfig ai_config
@@ -20,6 +21,7 @@ cdef class Inference:
cdef int model_height cdef int model_height
cdef bytes get_onnx_engine_bytes(self) cdef bytes get_onnx_engine_bytes(self)
cdef convert_and_upload_model(self, bytes onnx_engine_bytes, str engine_filename)
cdef init_ai(self) cdef init_ai(self)
cdef bint is_building_engine cdef bint is_building_engine
cdef bint is_video(self, str filepath) cdef bint is_video(self, str filepath)
@@ -28,6 +30,7 @@ cdef class Inference:
cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name) cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name)
cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths) cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
cdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data) cdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data)
cdef on_annotation(self, RemoteCommand cmd, Annotation annotation)
cdef split_to_tiles(self, frame, path, tile_size, overlap_percent) cdef split_to_tiles(self, frame, path, tile_size, overlap_percent)
cdef stop(self) cdef stop(self)
+55 -18
View File
@@ -11,6 +11,8 @@ from remote_command_inf cimport RemoteCommand
from annotation cimport Detection, Annotation from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig from ai_config cimport AIRecognitionConfig
import pynvml import pynvml
from threading import Thread
from remote_command_inf cimport RemoteCommand, CommandType
cdef int tensor_gpu_index cdef int tensor_gpu_index
@@ -20,7 +22,7 @@ cdef int check_tensor_gpu_index():
deviceCount = pynvml.nvmlDeviceGetCount() deviceCount = pynvml.nvmlDeviceGetCount()
if deviceCount == 0: if deviceCount == 0:
constants_inf.logerror('No NVIDIA GPUs found.') constants_inf.logerror(<str>'No NVIDIA GPUs found.')
return -1 return -1
for i in range(deviceCount): for i in range(deviceCount):
@@ -28,10 +30,10 @@ cdef int check_tensor_gpu_index():
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle) major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
if major > 6 or (major == 6 and minor >= 1): if major > 6 or (major == 6 and minor >= 1):
constants_inf.log('found NVIDIA GPU!') constants_inf.log(<str>'found NVIDIA GPU!')
return i return i
constants_inf.logerror('NVIDIA GPU doesnt support TensorRT!') constants_inf.logerror(<str>'NVIDIA GPU doesnt support TensorRT!')
return -1 return -1
except pynvml.NVMLError: except pynvml.NVMLError:
@@ -40,7 +42,7 @@ cdef int check_tensor_gpu_index():
try: try:
pynvml.nvmlShutdown() pynvml.nvmlShutdown()
except: except:
constants_inf.logerror('Failed to shutdown pynvml cause probably no NVIDIA GPU') constants_inf.logerror(<str>'Failed to shutdown pynvml cause probably no NVIDIA GPU')
pass pass
tensor_gpu_index = check_tensor_gpu_index() tensor_gpu_index = check_tensor_gpu_index()
@@ -51,9 +53,9 @@ else:
cdef class Inference: cdef class Inference:
def __init__(self, loader_client, on_annotation): def __init__(self, loader_client, remote_handler):
self.loader_client = loader_client self.loader_client = loader_client
self.on_annotation = on_annotation self.remote_handler = remote_handler
self.stop_signal = False self.stop_signal = False
self.model_input = None self.model_input = None
self.model_width = 0 self.model_width = 0
@@ -61,8 +63,10 @@ cdef class Inference:
self.engine = None self.engine = None
self.is_building_engine = False self.is_building_engine = False
self.ai_availability_status = AIAvailabilityStatus() self.ai_availability_status = AIAvailabilityStatus()
self._converted_model_bytes = None
self.init_ai() self.init_ai()
cdef bytes get_onnx_engine_bytes(self): cdef bytes get_onnx_engine_bytes(self):
models_dir = constants_inf.MODELS_FOLDER models_dir = constants_inf.MODELS_FOLDER
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING) self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
@@ -71,15 +75,43 @@ cdef class Inference:
raise Exception(res.err) raise Exception(res.err)
return res.data return res.data
cdef convert_and_upload_model(self, bytes onnx_engine_bytes, str engine_filename):
try:
self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING)
models_dir = constants_inf.MODELS_FOLDER
model_bytes = TensorRTEngine.convert_from_onnx(onnx_engine_bytes)
self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING)
res = self.loader_client.upload_big_small_resource(model_bytes, engine_filename, models_dir)
if res.err is not None:
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>f"Failed to upload converted model: {res.err}")
self._converted_model_bytes = model_bytes
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
self._converted_model_bytes = None
finally:
self.is_building_engine = False
cdef init_ai(self): cdef init_ai(self):
constants_inf.log(<str> 'init AI...') constants_inf.log(<str> 'init AI...')
try: try:
while self.is_building_engine:
time.sleep(1)
if self.engine is not None: if self.engine is not None:
return return
if self.is_building_engine:
return
if self._converted_model_bytes is not None:
try:
self.engine = TensorRTEngine(self._converted_model_bytes)
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
self.model_height, self.model_width = self.engine.get_input_shape()
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
finally:
self._converted_model_bytes = None # Consume the bytes
return
self.is_building_engine = True
models_dir = constants_inf.MODELS_FOLDER models_dir = constants_inf.MODELS_FOLDER
if tensor_gpu_index > -1: if tensor_gpu_index > -1:
try: try:
@@ -93,15 +125,12 @@ cdef class Inference:
except Exception as e: except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>str(e)) self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>str(e))
onnx_engine_bytes = self.get_onnx_engine_bytes() onnx_engine_bytes = self.get_onnx_engine_bytes()
self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING) self.is_building_engine = True
model_bytes = TensorRTEngine.convert_from_onnx(onnx_engine_bytes)
self.engine = TensorRTEngine(model_bytes) thread = Thread(target=self.convert_and_upload_model, args=(onnx_engine_bytes, engine_filename))
self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING) thread.daemon = True
res = self.loader_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir) thread.start()
if res.err is not None: return
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, res.err)
else:
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
else: else:
self.engine = OnnxEngine(<bytes>self.get_onnx_engine_bytes()) self.engine = OnnxEngine(<bytes>self.get_onnx_engine_bytes())
self.is_building_engine = False self.is_building_engine = False
@@ -200,6 +229,11 @@ cdef class Inference:
self.stop_signal = False self.stop_signal = False
self.init_ai() self.init_ai()
if self.engine is None:
constants_inf.log(<str> "AI engine not available. Conversion may be in progress. Skipping inference.")
response = RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, self.ai_availability_status.serialize())
self.remote_handler.send(cmd.client_id, response.serialize())
return
for m in ai_config.paths: for m in ai_config.paths:
if self.is_video(m): if self.is_video(m):
@@ -258,6 +292,9 @@ cdef class Inference:
batch_timestamps.clear() batch_timestamps.clear()
v_input.release() v_input.release()
cdef on_annotation(self, RemoteCommand cmd, Annotation annotation):
cdef RemoteCommand response = RemoteCommand(CommandType.INFERENCE_DATA, annotation.serialize())
self.remote_handler.send(cmd.client_id, response.serialize())
cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths): cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths):
cdef list frame_data cdef list frame_data
+2 -6
View File
@@ -24,7 +24,7 @@ cdef class CommandProcessor:
self.remote_handler.start() self.remote_handler.start()
self.running = True self.running = True
self.loader_client = LoaderClient(loader_zmq_host, loader_zmq_port) self.loader_client = LoaderClient(loader_zmq_host, loader_zmq_port)
self.inference = Inference(self.loader_client, self.on_annotation) self.inference = Inference(self.loader_client, self.remote_handler)
def start(self): def start(self):
while self.running: while self.running:
@@ -54,11 +54,7 @@ cdef class CommandProcessor:
else: else:
pass pass
except Exception as e: except Exception as e:
constants_inf.logerror(f"Error handling client: {e}") constants_inf.logerror(<str>f"Error handling client: {str(e)}")
cdef on_annotation(self, RemoteCommand cmd, Annotation annotation):
cdef RemoteCommand response = RemoteCommand(CommandType.INFERENCE_DATA, annotation.serialize())
self.remote_handler.send(cmd.client_id, response.serialize())
def stop(self): def stop(self):
self.inference.stop() self.inference.stop()
+3 -3
View File
@@ -100,14 +100,14 @@ cdef class TensorRTEngine(InferenceEngine):
return None return None
if builder.platform_has_fast_fp16: if builder.platform_has_fast_fp16:
constants_inf.log('Converting to supported fp16') constants_inf.log(<str>'Converting to supported fp16')
config.set_flag(trt.BuilderFlag.FP16) config.set_flag(trt.BuilderFlag.FP16)
else: else:
constants_inf.log('Converting to supported fp32. (fp16 is not supported)') constants_inf.log(<str>'Converting to supported fp32. (fp16 is not supported)')
plan = builder.build_serialized_network(network, config) plan = builder.build_serialized_network(network, config)
if plan is None: if plan is None:
constants_inf.logerror('Conversion failed.') constants_inf.logerror(<str>'Conversion failed.')
return None return None
constants_inf.log('conversion done!') constants_inf.log('conversion done!')
return bytes(plan) return bytes(plan)