diff --git a/Azaion.Annotator/AnnotatorEventHandler.cs b/Azaion.Annotator/AnnotatorEventHandler.cs index 3973894..86944c4 100644 --- a/Azaion.Annotator/AnnotatorEventHandler.cs +++ b/Azaion.Annotator/AnnotatorEventHandler.cs @@ -483,7 +483,7 @@ public class AnnotatorEventHandler( mainWindow.AIDetectBtn.IsEnabled = e.Status == AIAvailabilityEnum.Enabled; mainWindow.StatusHelp.Text = e.ToString(); }); -1 if (e.Status is AIAvailabilityEnum.Enabled or AIAvailabilityEnum.Error) + if (e.Status is AIAvailabilityEnum.Enabled or AIAvailabilityEnum.Error) await inferenceService.CheckAIAvailabilityTokenSource.CancelAsync(); } } \ No newline at end of file diff --git a/Azaion.Common/Services/Inference/InferenceClient.cs b/Azaion.Common/Services/Inference/InferenceClient.cs index 2e612b9..89da10a 100644 --- a/Azaion.Common/Services/Inference/InferenceClient.cs +++ b/Azaion.Common/Services/Inference/InferenceClient.cs @@ -49,7 +49,7 @@ public class InferenceClient : IInferenceClient Arguments = $"-p {_inferenceClientConfig.ZeroMqPort} -lp {_loaderClientConfig.ZeroMqPort} -a {_inferenceClientConfig.ApiUrl}", CreateNoWindow = true }; - process.Start(); + //process.Start(); } catch (Exception e) { diff --git a/Azaion.Inference/ai_availability_status.pxd b/Azaion.Inference/ai_availability_status.pxd index 5501113..8c1df52 100644 --- a/Azaion.Inference/ai_availability_status.pxd +++ b/Azaion.Inference/ai_availability_status.pxd @@ -4,11 +4,13 @@ cdef enum AIAvailabilityEnum: CONVERTING = 20 UPLOADING = 30 ENABLED = 200 + WARNING = 300 ERROR = 500 cdef class AIAvailabilityStatus: cdef AIAvailabilityEnum status cdef str error_message + cdef object _lock cdef bytes serialize(self) cdef set_status(self, AIAvailabilityEnum status, str error_message=*) \ No newline at end of file diff --git a/Azaion.Inference/ai_availability_status.pyx b/Azaion.Inference/ai_availability_status.pyx index 9f2e4d0..a35d460 100644 --- a/Azaion.Inference/ai_availability_status.pyx +++ b/Azaion.Inference/ai_availability_status.pyx @@ -1,5 +1,6 @@ cimport constants_inf import msgpack +from threading import Lock AIStatus2Text = { AIAvailabilityEnum.NONE: "None", @@ -15,23 +16,40 @@ cdef class AIAvailabilityStatus: def __init__(self): self.status = AIAvailabilityEnum.NONE self.error_message = None + self._lock = Lock() def __str__(self): - status_text = AIStatus2Text.get(self.status, "Unknown") - error_text = self.error_message if self.error_message else "" - return f"{status_text} {error_text}" + self._lock.acquire() + try: + status_text = AIStatus2Text.get(self.status, "Unknown") + error_text = self.error_message if self.error_message else "" + return f"{status_text} {error_text}" + finally: + self._lock.release() cdef bytes serialize(self): - return msgpack.packb({ - "s": self.status, - "m": self.error_message - }) + self._lock.acquire() + try: + return msgpack.packb({ + "s": self.status, + "m": self.error_message + }) + finally: + self._lock.release() cdef set_status(self, AIAvailabilityEnum status, str error_message=None): - self.status = status - self.error_message = error_message + log_message = "" + self._lock.acquire() + try: + self.status = status + self.error_message = error_message + status_text = AIStatus2Text.get(self.status, "Unknown") + error_text = self.error_message if self.error_message else "" + log_message = f"{status_text} {error_text}" + finally: + self._lock.release() + if error_message is not None: constants_inf.logerror(error_message) else: - constants_inf.log(str(self)) - + constants_inf.log(log_message) \ No newline at end of file diff --git a/Azaion.Inference/inference.pxd b/Azaion.Inference/inference.pxd index 6cea5a4..ebafc21 100644 --- a/Azaion.Inference/inference.pxd +++ b/Azaion.Inference/inference.pxd @@ -4,11 +4,12 @@ from annotation cimport Annotation, Detection from ai_config cimport AIRecognitionConfig from loader_client cimport LoaderClient from inference_engine cimport InferenceEngine +from remote_command_handler_inf cimport RemoteCommandHandler cdef class Inference: cdef LoaderClient loader_client cdef InferenceEngine engine - cdef object on_annotation + cdef RemoteCommandHandler remote_handler cdef Annotation _previous_annotation cdef dict[str, list(Detection)] _tile_detections cdef AIRecognitionConfig ai_config @@ -20,6 +21,7 @@ cdef class Inference: cdef int model_height cdef bytes get_onnx_engine_bytes(self) + cdef convert_and_upload_model(self, bytes onnx_engine_bytes, str engine_filename) cdef init_ai(self) cdef bint is_building_engine cdef bint is_video(self, str filepath) @@ -28,6 +30,7 @@ cdef class Inference: cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name) cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths) cdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data) + cdef on_annotation(self, RemoteCommand cmd, Annotation annotation) cdef split_to_tiles(self, frame, path, tile_size, overlap_percent) cdef stop(self) diff --git a/Azaion.Inference/inference.pyx b/Azaion.Inference/inference.pyx index 34c8f22..2e04ee1 100644 --- a/Azaion.Inference/inference.pyx +++ b/Azaion.Inference/inference.pyx @@ -11,6 +11,8 @@ from remote_command_inf cimport RemoteCommand from annotation cimport Detection, Annotation from ai_config cimport AIRecognitionConfig import pynvml +from threading import Thread +from remote_command_inf cimport RemoteCommand, CommandType cdef int tensor_gpu_index @@ -20,7 +22,7 @@ cdef int check_tensor_gpu_index(): deviceCount = pynvml.nvmlDeviceGetCount() if deviceCount == 0: - constants_inf.logerror('No NVIDIA GPUs found.') + constants_inf.logerror('No NVIDIA GPUs found.') return -1 for i in range(deviceCount): @@ -28,10 +30,10 @@ cdef int check_tensor_gpu_index(): major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle) if major > 6 or (major == 6 and minor >= 1): - constants_inf.log('found NVIDIA GPU!') + constants_inf.log('found NVIDIA GPU!') return i - constants_inf.logerror('NVIDIA GPU doesnt support TensorRT!') + constants_inf.logerror('NVIDIA GPU doesnt support TensorRT!') return -1 except pynvml.NVMLError: @@ -40,7 +42,7 @@ cdef int check_tensor_gpu_index(): try: pynvml.nvmlShutdown() except: - constants_inf.logerror('Failed to shutdown pynvml cause probably no NVIDIA GPU') + constants_inf.logerror('Failed to shutdown pynvml cause probably no NVIDIA GPU') pass tensor_gpu_index = check_tensor_gpu_index() @@ -51,9 +53,9 @@ else: cdef class Inference: - def __init__(self, loader_client, on_annotation): + def __init__(self, loader_client, remote_handler): self.loader_client = loader_client - self.on_annotation = on_annotation + self.remote_handler = remote_handler self.stop_signal = False self.model_input = None self.model_width = 0 @@ -61,8 +63,10 @@ cdef class Inference: self.engine = None self.is_building_engine = False self.ai_availability_status = AIAvailabilityStatus() + self._converted_model_bytes = None self.init_ai() + cdef bytes get_onnx_engine_bytes(self): models_dir = constants_inf.MODELS_FOLDER self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING) @@ -71,15 +75,43 @@ cdef class Inference: raise Exception(res.err) return res.data + cdef convert_and_upload_model(self, bytes onnx_engine_bytes, str engine_filename): + try: + self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING) + models_dir = constants_inf.MODELS_FOLDER + model_bytes = TensorRTEngine.convert_from_onnx(onnx_engine_bytes) + + self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING) + res = self.loader_client.upload_big_small_resource(model_bytes, engine_filename, models_dir) + if res.err is not None: + self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, f"Failed to upload converted model: {res.err}") + + self._converted_model_bytes = model_bytes + except Exception as e: + self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, str(e)) + self._converted_model_bytes = None + finally: + self.is_building_engine = False + cdef init_ai(self): constants_inf.log( 'init AI...') try: - while self.is_building_engine: - time.sleep(1) if self.engine is not None: return + if self.is_building_engine: + return + + if self._converted_model_bytes is not None: + try: + self.engine = TensorRTEngine(self._converted_model_bytes) + self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED) + self.model_height, self.model_width = self.engine.get_input_shape() + except Exception as e: + self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, str(e)) + finally: + self._converted_model_bytes = None # Consume the bytes + return - self.is_building_engine = True models_dir = constants_inf.MODELS_FOLDER if tensor_gpu_index > -1: try: @@ -93,15 +125,12 @@ cdef class Inference: except Exception as e: self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, str(e)) onnx_engine_bytes = self.get_onnx_engine_bytes() - self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING) - model_bytes = TensorRTEngine.convert_from_onnx(onnx_engine_bytes) - self.engine = TensorRTEngine(model_bytes) - self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING) - res = self.loader_client.upload_big_small_resource(model_bytes, engine_filename, models_dir) - if res.err is not None: - self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, res.err) - else: - self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED) + self.is_building_engine = True + + thread = Thread(target=self.convert_and_upload_model, args=(onnx_engine_bytes, engine_filename)) + thread.daemon = True + thread.start() + return else: self.engine = OnnxEngine(self.get_onnx_engine_bytes()) self.is_building_engine = False @@ -200,6 +229,11 @@ cdef class Inference: self.stop_signal = False self.init_ai() + if self.engine is None: + constants_inf.log( "AI engine not available. Conversion may be in progress. Skipping inference.") + response = RemoteCommand(CommandType.AI_AVAILABILITY_RESULT, self.ai_availability_status.serialize()) + self.remote_handler.send(cmd.client_id, response.serialize()) + return for m in ai_config.paths: if self.is_video(m): @@ -258,6 +292,9 @@ cdef class Inference: batch_timestamps.clear() v_input.release() + cdef on_annotation(self, RemoteCommand cmd, Annotation annotation): + cdef RemoteCommand response = RemoteCommand(CommandType.INFERENCE_DATA, annotation.serialize()) + self.remote_handler.send(cmd.client_id, response.serialize()) cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths): cdef list frame_data diff --git a/Azaion.Inference/main_inference.pyx b/Azaion.Inference/main_inference.pyx index fea5ddd..55f0371 100644 --- a/Azaion.Inference/main_inference.pyx +++ b/Azaion.Inference/main_inference.pyx @@ -24,7 +24,7 @@ cdef class CommandProcessor: self.remote_handler.start() self.running = True self.loader_client = LoaderClient(loader_zmq_host, loader_zmq_port) - self.inference = Inference(self.loader_client, self.on_annotation) + self.inference = Inference(self.loader_client, self.remote_handler) def start(self): while self.running: @@ -54,11 +54,7 @@ cdef class CommandProcessor: else: pass except Exception as e: - constants_inf.logerror(f"Error handling client: {e}") - - cdef on_annotation(self, RemoteCommand cmd, Annotation annotation): - cdef RemoteCommand response = RemoteCommand(CommandType.INFERENCE_DATA, annotation.serialize()) - self.remote_handler.send(cmd.client_id, response.serialize()) + constants_inf.logerror(f"Error handling client: {str(e)}") def stop(self): self.inference.stop() diff --git a/Azaion.Inference/tensorrt_engine.pyx b/Azaion.Inference/tensorrt_engine.pyx index a0a03e7..e9a7cd1 100644 --- a/Azaion.Inference/tensorrt_engine.pyx +++ b/Azaion.Inference/tensorrt_engine.pyx @@ -100,14 +100,14 @@ cdef class TensorRTEngine(InferenceEngine): return None if builder.platform_has_fast_fp16: - constants_inf.log('Converting to supported fp16') + constants_inf.log('Converting to supported fp16') config.set_flag(trt.BuilderFlag.FP16) else: - constants_inf.log('Converting to supported fp32. (fp16 is not supported)') + constants_inf.log('Converting to supported fp32. (fp16 is not supported)') plan = builder.build_serialized_network(network, config) if plan is None: - constants_inf.logerror('Conversion failed.') + constants_inf.logerror('Conversion failed.') return None constants_inf.log('conversion done!') return bytes(plan)