Add AIAvailabilityStatus and AIRecognitionConfig classes for AI model management

- Introduced `AIAvailabilityStatus` class to manage the availability status of AI models, including methods for setting status and logging messages.
- Added `AIRecognitionConfig` class to encapsulate configuration parameters for AI recognition, with a static method for creating instances from dictionaries.
- Implemented enums for AI availability states to enhance clarity and maintainability.
- Updated related Cython files to support the new classes and ensure proper type handling.

These changes aim to improve the structure and functionality of the AI model management system, facilitating better status tracking and configuration handling.
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-31 05:49:51 +03:00
parent fc57d677b4
commit 8ce40a9385
43 changed files with 1190 additions and 462 deletions
+426
View File
@@ -0,0 +1,426 @@
import mimetypes
from pathlib import Path
import cv2
cimport constants_inf
from ai_availability_status cimport AIAvailabilityEnum, AIAvailabilityStatus
from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig
from engines.inference_engine cimport InferenceEngine
from loader_http_client cimport LoaderHttpClient
from threading import Thread
from engines import EngineClass
cdef class Inference:
cdef LoaderHttpClient loader_client
cdef InferenceEngine engine
cdef object _annotation_callback
cdef object _status_callback
cdef Annotation _previous_annotation
cdef dict[str, list[Detection]] _tile_detections
cdef dict[str, int] detection_counts
cdef AIRecognitionConfig ai_config
cdef bint stop_signal
cdef public AIAvailabilityStatus ai_availability_status
cdef str model_input
cdef bytes _converted_model_bytes
cdef bint is_building_engine
def __init__(self, loader_client):
self.loader_client = loader_client
self._annotation_callback = None
self._status_callback = None
self.stop_signal = <bint>False
self.model_input = <str>None
self.detection_counts = {}
self.engine = <InferenceEngine>None
self.is_building_engine = <bint>False
self.ai_availability_status = AIAvailabilityStatus()
self._converted_model_bytes = <bytes>None
self.init_ai()
@property
def is_engine_ready(self):
return self.engine is not None
@property
def engine_name(self):
if self.engine is not None:
return self.engine.engine_name
return None
cdef bytes download_model(self, str filename):
models_dir = constants_inf.MODELS_FOLDER
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
res = self.loader_client.load_big_small_resource(filename, models_dir)
if res.err is not None:
raise Exception(res.err)
return <bytes>res.data
cdef convert_and_upload_model(self, bytes source_bytes, str engine_filename):
try:
self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING)
models_dir = constants_inf.MODELS_FOLDER
model_bytes = EngineClass.convert_from_source(source_bytes)
self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING)
res = self.loader_client.upload_big_small_resource(model_bytes, engine_filename, models_dir)
if res.err is not None:
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>f"Failed to upload converted model: {res.err}")
self._converted_model_bytes = model_bytes
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
self._converted_model_bytes = <bytes>None
finally:
self.is_building_engine = <bint>False
cdef init_ai(self):
constants_inf.log(<str> 'init AI...')
try:
if self.engine is not None:
return
if self.is_building_engine:
return
if self._converted_model_bytes is not None:
try:
self.engine = EngineClass(self._converted_model_bytes)
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
finally:
self._converted_model_bytes = <bytes>None
return
models_dir = constants_inf.MODELS_FOLDER
engine_filename = EngineClass.get_engine_filename()
if engine_filename is not None:
try:
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
res = self.loader_client.load_big_small_resource(engine_filename, models_dir)
if res.err is not None:
raise Exception(res.err)
self.engine = EngineClass(res.data)
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
source_filename = EngineClass.get_source_filename()
if source_filename is None:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>f"Pre-built engine not found: {str(e)}")
return
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>str(e))
source_bytes = self.download_model(source_filename)
self.is_building_engine = <bint>True
thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, engine_filename))
thread.daemon = True
thread.start()
return
else:
self.engine = EngineClass(<bytes>self.download_model(constants_inf.AI_ONNX_MODEL_FILE))
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
self.is_building_engine = <bint>False
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>str(e))
self.is_building_engine = <bint>False
cdef bint is_video(self, str filepath):
mime_type, _ = mimetypes.guess_type(<str>filepath)
return <bint>(mime_type and mime_type.startswith("video"))
cpdef run_detect(self, dict config_dict, object annotation_callback, object status_callback=None):
cdef list[str] videos = []
cdef list[str] images = []
cdef AIRecognitionConfig ai_config = AIRecognitionConfig.from_dict(config_dict)
if ai_config is None:
raise Exception('ai recognition config is empty')
self._annotation_callback = annotation_callback
self._status_callback = status_callback
self.stop_signal = <bint>False
self.init_ai()
if self.engine is None:
constants_inf.log(<str> "AI engine not available. Conversion may be in progress. Skipping inference.")
return
self.detection_counts = {}
for p in ai_config.paths:
media_name = Path(<str>p).stem.replace(" ", "")
self.detection_counts[media_name] = 0
if self.is_video(p):
videos.append(p)
else:
images.append(p)
if len(images) > 0:
constants_inf.log(<str>f'run inference on {" ".join(images)}...')
self._process_images(ai_config, images)
if len(videos) > 0:
for v in videos:
constants_inf.log(<str>f'run inference on {v}...')
self._process_video(ai_config, v)
cdef _process_video(self, AIRecognitionConfig ai_config, str video_name):
cdef int frame_count = 0
cdef int batch_count = 0
cdef list batch_frames = []
cdef list[long] batch_timestamps = []
cdef Annotation annotation
cdef int model_h, model_w
self._previous_annotation = <Annotation>None
model_h, model_w = self.engine.get_input_shape()
v_input = cv2.VideoCapture(<str>video_name)
if not v_input.isOpened():
constants_inf.logerror(<str>f'Failed to open video: {video_name}')
return
total_frames = int(v_input.get(cv2.CAP_PROP_FRAME_COUNT))
fps = v_input.get(cv2.CAP_PROP_FPS)
width = int(v_input.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(v_input.get(cv2.CAP_PROP_FRAME_HEIGHT))
constants_inf.log(<str>f'Video: {total_frames} frames, {fps:.1f} fps, {width}x{height}')
cdef int effective_batch = min(self.engine.max_batch_size, ai_config.model_batch_size)
if effective_batch < 1:
effective_batch = 1
while v_input.isOpened() and not self.stop_signal:
ret, frame = v_input.read()
if not ret or frame is None:
break
frame_count += 1
if frame_count % ai_config.frame_period_recognition == 0:
batch_frames.append(frame)
batch_timestamps.append(<long>v_input.get(cv2.CAP_PROP_POS_MSEC))
if len(batch_frames) >= effective_batch:
batch_count += 1
constants_inf.log(<str>f'Video batch {batch_count}: frame {frame_count}/{total_frames} ({frame_count*100//total_frames}%)')
self._process_video_batch(ai_config, batch_frames, batch_timestamps, video_name, frame_count, total_frames, model_w)
batch_frames = []
batch_timestamps = []
if batch_frames:
batch_count += 1
constants_inf.log(<str>f'Video batch {batch_count} (flush): {len(batch_frames)} remaining frames')
self._process_video_batch(ai_config, batch_frames, batch_timestamps, video_name, frame_count, total_frames, model_w)
v_input.release()
constants_inf.log(<str>f'Video done: {frame_count} frames read, {batch_count} batches processed')
self.send_detection_status()
cdef _process_video_batch(self, AIRecognitionConfig ai_config, list batch_frames,
list batch_timestamps, str video_name,
int frame_count, int total_frames, int model_w):
cdef Annotation annotation
list_detections = self.engine.process_frames(batch_frames, ai_config)
total_dets = sum(len(d) for d in list_detections)
if total_dets > 0:
constants_inf.log(<str>f'Video batch: {total_dets} detections from postprocess')
for i in range(len(list_detections)):
detections = list_detections[i]
original_media_name = Path(<str>video_name).stem.replace(" ", "")
name = f'{original_media_name}_{constants_inf.format_time(batch_timestamps[i])}'
annotation = Annotation(name, original_media_name, batch_timestamps[i], detections)
if detections:
valid = self.is_valid_video_annotation(annotation, ai_config, model_w)
constants_inf.log(<str>f'Video frame {name}: {len(detections)} dets, valid={valid}')
if valid:
_, image = cv2.imencode('.jpg', batch_frames[i])
annotation.image = image.tobytes()
self._previous_annotation = annotation
self.on_annotation(annotation, frame_count, total_frames)
else:
self.is_valid_video_annotation(annotation, ai_config, model_w)
cdef on_annotation(self, Annotation annotation, int frame_count=0, int total_frames=0):
self.detection_counts[annotation.original_media_name] = self.detection_counts.get(annotation.original_media_name, 0) + 1
if self._annotation_callback is not None:
percent = int(frame_count * 100 / total_frames) if total_frames > 0 else 0
cb = self._annotation_callback
cb(annotation, percent)
cdef _process_images(self, AIRecognitionConfig ai_config, list[str] image_paths):
cdef list all_frame_data = []
cdef double ground_sampling_distance
cdef int model_h, model_w
model_h, model_w = self.engine.get_input_shape()
self._tile_detections = {}
for path in image_paths:
frame = cv2.imread(<str>path)
if frame is None:
constants_inf.logerror(<str>f'Failed to read image {path}')
continue
img_h, img_w, _ = frame.shape
original_media_name = Path(<str> path).stem.replace(" ", "")
ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w)
constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}')
if img_h <= 1.5 * model_h and img_w <= 1.5 * model_w:
all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
else:
tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
constants_inf.log(<str> f'calc tile size: {tile_size}')
res = self.split_to_tiles(frame, path, tile_size, ai_config.big_image_tile_overlap_percent)
for tile_frame, omn, tile_name in res:
all_frame_data.append((tile_frame, omn, tile_name, ground_sampling_distance))
if not all_frame_data:
return
frames = [fd[0] for fd in all_frame_data]
all_dets = self.engine.process_frames(frames, ai_config)
for i in range(len(all_dets)):
frame_entry = all_frame_data[i]
f = frame_entry[0]
original_media_name = frame_entry[1]
name = frame_entry[2]
gsd = frame_entry[3]
annotation = Annotation(name, original_media_name, 0, all_dets[i])
if self.is_valid_image_annotation(annotation, gsd, f.shape):
constants_inf.log(<str> f'Detected {annotation}')
_, image = cv2.imencode('.jpg', f)
annotation.image = image.tobytes()
self.on_annotation(annotation)
self.send_detection_status()
cdef send_detection_status(self):
if self._status_callback is not None:
cb = self._status_callback
for media_name in self.detection_counts.keys():
cb(media_name, self.detection_counts[media_name])
self.detection_counts.clear()
cdef split_to_tiles(self, frame, path, tile_size, overlap_percent):
constants_inf.log(<str>f'splitting image {path} to tiles...')
img_h, img_w, _ = frame.shape
stride_w = int(tile_size * (1 - overlap_percent / 100))
stride_h = int(tile_size * (1 - overlap_percent / 100))
results = []
original_media_name = Path(<str> path).stem.replace(" ", "")
for y in range(0, img_h, stride_h):
for x in range(0, img_w, stride_w):
x_end = min(x + tile_size, img_w)
y_end = min(y + tile_size, img_h)
if x_end - x < tile_size:
if img_w - (x - stride_w) <= tile_size:
continue
x = img_w - tile_size
if y_end - y < tile_size:
if img_h - (y - stride_h) <= tile_size:
continue
y = img_h - tile_size
tile = frame[y:y_end, x:x_end]
name = f'{original_media_name}{constants_inf.SPLIT_SUFFIX}{tile_size:04d}_{x:04d}_{y:04d}!_000000'
results.append((tile, original_media_name, name))
return results
cpdef stop(self):
self.stop_signal = <bint>True
cdef remove_tiled_duplicates(self, Annotation annotation):
right = annotation.name.rindex('!')
left = annotation.name.index(constants_inf.SPLIT_SUFFIX) + len(constants_inf.SPLIT_SUFFIX)
tile_size_str, x_str, y_str = annotation.name[left:right].split('_')
tile_size = int(tile_size_str)
x = int(x_str)
y = int(y_str)
cdef list[Detection] unique_detections = []
existing_abs_detections = self._tile_detections.setdefault(annotation.original_media_name, [])
for det in annotation.detections:
x1 = det.x * tile_size
y1 = det.y * tile_size
det_abs = Detection(x + x1, y + y1, det.w * tile_size, det.h * tile_size, det.cls, det.confidence)
if det_abs not in existing_abs_detections:
unique_detections.append(det)
existing_abs_detections.append(det_abs)
annotation.detections = unique_detections
cdef bint is_valid_image_annotation(self, Annotation annotation, double ground_sampling_distance, frame_shape):
if constants_inf.SPLIT_SUFFIX in annotation.name:
self.remove_tiled_duplicates(annotation)
img_h, img_w, _ = frame_shape
if annotation.detections:
constants_inf.log(<str> f'Initial ann: {annotation}')
cdef list[Detection] valid_detections = []
for det in annotation.detections:
m_w = det.w * img_w * ground_sampling_distance
m_h = det.h * img_h * ground_sampling_distance
max_size = constants_inf.annotations_dict[det.cls].max_object_size_meters
if m_w <= max_size and m_h <= max_size:
valid_detections.append(det)
constants_inf.log(<str> f'Kept ({m_w} {m_h}) <= {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
else:
constants_inf.log(<str> f'Removed ({m_w} {m_h}) > {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
annotation.detections = valid_detections
if not annotation.detections:
return <bint>False
return <bint>True
cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config, int model_w):
if constants_inf.SPLIT_SUFFIX in annotation.name:
self.remove_tiled_duplicates(annotation)
if not annotation.detections:
return <bint>False
if self._previous_annotation is None:
return <bint>True
if annotation.time >= self._previous_annotation.time + <long>(ai_config.frame_recognition_seconds * 1000):
return <bint>True
if len(annotation.detections) > len(self._previous_annotation.detections):
return <bint>True
cdef:
Detection current_det, prev_det
double dx, dy, distance_sq, min_distance_sq
Detection closest_det
for current_det in annotation.detections:
min_distance_sq = <double>1e18
closest_det = <Detection>None
for prev_det in self._previous_annotation.detections:
dx = current_det.x - prev_det.x
dy = current_det.y - prev_det.y
distance_sq = dx * dx + dy * dy
if distance_sq < min_distance_sq:
min_distance_sq = distance_sq
closest_det = prev_det
dist_px = ai_config.tracking_distance_confidence * model_w
dist_px_sq = dist_px * dist_px
if min_distance_sq > dist_px_sq:
return <bint>True
if current_det.confidence >= closest_det.confidence + ai_config.tracking_probability_increase:
return <bint>True
return <bint>False