Files
detections/src/inference.pyx
T
Oleksandr Bezdieniezhnykh 8116b55813 [AZ-180] Refactor inference and engine factory for improved model handling
- Updated the autopilot state to reflect the current task status as in progress.
- Refactored the inference module to streamline model downloading and conversion processes, replacing the download_model method with a more efficient load_source method.
- Introduced asynchronous model building in the inference module to enhance performance during model conversion.
- Enhanced the engine factory to include a new method for building and caching models, improving error handling and logging during the upload process.
- Added calibration cache handling in the Jetson TensorRT engine for better resource management.

Made-with: Cursor
2026-04-03 06:41:11 +03:00

473 lines
21 KiB
Cython

import io
import threading
import av
import cv2
import numpy as np
cimport constants_inf
from ai_availability_status cimport AIAvailabilityEnum, AIAvailabilityStatus
from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig
from engines.inference_engine cimport InferenceEngine
from loader_http_client cimport LoaderHttpClient
from threading import Thread
from engines import engine_factory
def ai_config_from_dict(dict data):
return AIRecognitionConfig.from_dict(data)
def _write_video_bytes_to_path(str path, bytes data, object done_event):
try:
with open(path, 'wb') as f:
f.write(data)
finally:
done_event.set()
cdef class Inference:
cdef LoaderHttpClient loader_client
cdef InferenceEngine engine
cdef object _annotation_callback
cdef object _status_callback
cdef Annotation _previous_annotation
cdef dict[str, list[Detection]] _tile_detections
cdef dict[str, int] detection_counts
cdef AIRecognitionConfig ai_config
cdef bint stop_signal
cdef public AIAvailabilityStatus ai_availability_status
cdef str model_input
cdef bytes _converted_model_bytes
cdef bint is_building_engine
def __init__(self, loader_client):
self.loader_client = loader_client
self._annotation_callback = None
self._status_callback = None
self.stop_signal = <bint>False
self.model_input = <str>None
self.detection_counts = {}
self.engine = <InferenceEngine>None
self.is_building_engine = <bint>False
self.ai_availability_status = AIAvailabilityStatus()
self._converted_model_bytes = <bytes>None
self.init_ai()
@property
def is_engine_ready(self):
return self.engine is not None
@property
def engine_name(self):
if self.engine is not None:
return self.engine.engine_name
return None
cdef _build_engine_async(self, bytes source_bytes, str models_dir):
try:
self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING)
engine_bytes = engine_factory.build_and_cache(source_bytes, self.loader_client, models_dir)
self._converted_model_bytes = engine_bytes
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>str(e))
self._converted_model_bytes = <bytes>None
finally:
self.is_building_engine = <bint>False
cdef init_ai(self):
constants_inf.log(<str> 'init AI...')
try:
if self.engine is not None:
return
if self.is_building_engine:
return
if self._converted_model_bytes is not None:
try:
self.engine = engine_factory.create(self._converted_model_bytes)
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
finally:
self._converted_model_bytes = <bytes>None
return
models_dir = constants_inf.MODELS_FOLDER
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
engine = engine_factory.load_engine(self.loader_client, models_dir)
if engine is not None:
self.engine = engine
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
return
source_filename = engine_factory.get_source_filename()
if source_filename is None:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>"No engine available and no source to build from")
return
source_bytes = engine_factory.load_source(self.loader_client, models_dir)
if engine_factory.has_build_step:
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>"Cached engine not found, converting from source")
self.is_building_engine = <bint>True
thread = Thread(target=self._build_engine_async, args=(source_bytes, models_dir))
thread.daemon = True
thread.start()
else:
self.engine = engine_factory.create(source_bytes)
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>str(e))
self.is_building_engine = <bint>False
cpdef run_detect_image(self, bytes image_bytes, AIRecognitionConfig ai_config, str media_name,
object annotation_callback, object status_callback=None):
cdef list all_frame_data = []
cdef str original_media_name
self._annotation_callback = annotation_callback
self._status_callback = status_callback
self.stop_signal = <bint>False
self.init_ai()
if self.engine is None:
constants_inf.log(<str> "AI engine not available. Conversion may be in progress. Skipping inference.")
return
if not image_bytes:
return
frame = cv2.imdecode(np.frombuffer(image_bytes, dtype=np.uint8), cv2.IMREAD_COLOR)
if frame is None:
constants_inf.logerror(<str>'Failed to decode image bytes')
return
original_media_name = media_name.replace(" ", "")
self.detection_counts = {}
self.detection_counts[original_media_name] = 0
self._tile_detections = {}
self._append_image_frame_entries(ai_config, all_frame_data, frame, original_media_name)
self._finalize_image_inference(ai_config, all_frame_data)
cpdef run_detect_video(self, bytes video_bytes, AIRecognitionConfig ai_config, str media_name, str save_path,
object annotation_callback, object status_callback=None):
cdef str original_media_name
self._annotation_callback = annotation_callback
self._status_callback = status_callback
self.stop_signal = <bint>False
self.init_ai()
if self.engine is None:
constants_inf.log(<str> "AI engine not available. Conversion may be in progress. Skipping inference.")
return
if not video_bytes:
return
original_media_name = media_name.replace(" ", "")
self.detection_counts = {}
self.detection_counts[original_media_name] = 0
writer_done = threading.Event()
wt = threading.Thread(
target=_write_video_bytes_to_path,
args=(save_path, video_bytes, writer_done),
daemon=True,
)
wt.start()
try:
bio = io.BytesIO(video_bytes)
container = av.open(bio)
try:
self._process_video_pyav(ai_config, original_media_name, container)
finally:
container.close()
finally:
writer_done.wait()
wt.join(timeout=3600)
cpdef run_detect_video_stream(self, object readable, AIRecognitionConfig ai_config, str media_name,
object annotation_callback, object status_callback=None):
cdef str original_media_name
self._annotation_callback = annotation_callback
self._status_callback = status_callback
self.stop_signal = <bint>False
self.init_ai()
if self.engine is None:
constants_inf.log(<str> "AI engine not available. Conversion may be in progress. Skipping inference.")
return
original_media_name = media_name.replace(" ", "")
self.detection_counts = {}
self.detection_counts[original_media_name] = 0
container = av.open(readable)
try:
self._process_video_pyav(ai_config, original_media_name, container)
finally:
container.close()
cdef _process_video_pyav(self, AIRecognitionConfig ai_config, str original_media_name, object container):
cdef int frame_count = 0
cdef int batch_count = 0
cdef list batch_frames = []
cdef list[long] batch_timestamps = []
cdef int model_h, model_w
cdef int total_frames
cdef int tf
cdef double duration_sec
cdef double fps
self._previous_annotation = <Annotation>None
model_h, model_w = self.engine.get_input_shape()
streams = container.streams.video
if not streams:
constants_inf.logerror(<str>'No video stream in container')
self.send_detection_status()
return
vstream = streams[0]
total_frames = 0
if vstream.frames is not None and int(vstream.frames) > 0:
total_frames = int(vstream.frames)
else:
duration_sec = 0.0
if vstream.duration is not None and vstream.time_base is not None:
duration_sec = float(vstream.duration * vstream.time_base)
fps = 25.0
if vstream.average_rate is not None:
fps = float(vstream.average_rate)
if duration_sec > 0:
total_frames = int(duration_sec * fps)
if total_frames < 1:
total_frames = 1
tf = total_frames
constants_inf.log(<str>f'Video (PyAV): ~{tf} frames est, {vstream.width}x{vstream.height}')
cdef int effective_batch = min(self.engine.max_batch_size, ai_config.model_batch_size)
if effective_batch < 1:
effective_batch = 1
for av_frame in container.decode(vstream):
if self.stop_signal:
break
frame_count += 1
arr = av_frame.to_ndarray(format='bgr24')
if frame_count % ai_config.frame_period_recognition == 0:
ts_ms = 0
if av_frame.time is not None:
ts_ms = int(av_frame.time * 1000)
elif av_frame.pts is not None and vstream.time_base is not None:
ts_ms = int(float(av_frame.pts) * float(vstream.time_base) * 1000)
batch_frames.append(arr)
batch_timestamps.append(<long>ts_ms)
if len(batch_frames) >= effective_batch:
batch_count += 1
tf = total_frames if total_frames > 0 else max(frame_count, 1)
constants_inf.log(<str>f'Video batch {batch_count}: frame {frame_count}/{tf} ({frame_count*100//tf}%)')
last_ts = batch_timestamps[len(batch_timestamps) - 1] if batch_timestamps else 0
self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w)
if self._annotation_callback is not None:
pann = Annotation(original_media_name, original_media_name, last_ts, [])
cb = self._annotation_callback
cb(pann, int(frame_count * 100 / tf))
batch_frames = []
batch_timestamps = []
if batch_frames:
batch_count += 1
tf = total_frames if total_frames > 0 else max(frame_count, 1)
constants_inf.log(<str>f'Video batch {batch_count} (flush): {len(batch_frames)} remaining frames')
last_ts = batch_timestamps[len(batch_timestamps) - 1] if batch_timestamps else 0
self._process_video_batch(ai_config, batch_frames, batch_timestamps, original_media_name, frame_count, tf, model_w)
if self._annotation_callback is not None:
pann = Annotation(original_media_name, original_media_name, last_ts, [])
cb = self._annotation_callback
cb(pann, 100)
constants_inf.log(<str>f'Video done: {frame_count} frames read, {batch_count} batches processed')
self.send_detection_status()
cdef _process_video_batch(self, AIRecognitionConfig ai_config, list batch_frames,
list batch_timestamps, str original_media_name,
int frame_count, int total_frames, int model_w):
cdef Annotation annotation
list_detections = self.engine.process_frames(batch_frames, ai_config)
total_dets = sum(len(d) for d in list_detections)
if total_dets > 0:
constants_inf.log(<str>f'Video batch: {total_dets} detections from postprocess')
for i in range(len(list_detections)):
detections = list_detections[i]
name = f'{original_media_name}_{constants_inf.format_time(batch_timestamps[i])}'
annotation = Annotation(name, original_media_name, batch_timestamps[i], detections)
if detections:
valid = self.is_valid_video_annotation(annotation, ai_config, model_w)
constants_inf.log(<str>f'Video frame {name}: {len(detections)} dets, valid={valid}')
if valid:
_, image = cv2.imencode('.jpg', batch_frames[i])
annotation.image = image.tobytes()
self._previous_annotation = annotation
self.on_annotation(annotation, frame_count, total_frames)
else:
self.is_valid_video_annotation(annotation, ai_config, model_w)
cdef on_annotation(self, Annotation annotation, int frame_count=0, int total_frames=0):
self.detection_counts[annotation.original_media_name] = self.detection_counts.get(annotation.original_media_name, 0) + 1
if self._annotation_callback is not None:
percent = int(frame_count * 100 / total_frames) if total_frames > 0 else 0
cb = self._annotation_callback
cb(annotation, percent)
cdef _append_image_frame_entries(self, AIRecognitionConfig ai_config, list all_frame_data, frame, str original_media_name):
cdef double ground_sampling_distance
cdef int model_h, model_w
cdef int img_h, img_w
model_h, model_w = self.engine.get_input_shape()
img_h, img_w, _ = frame.shape
ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w)
constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}')
if img_h <= 1.5 * model_h and img_w <= 1.5 * model_w:
all_frame_data.append((frame, original_media_name, f'{original_media_name}_000000', ground_sampling_distance))
else:
tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
constants_inf.log(<str> f'calc tile size: {tile_size}')
res = self.split_to_tiles(frame, original_media_name, tile_size, ai_config.big_image_tile_overlap_percent)
for tile_frame, omn, tile_name in res:
all_frame_data.append((tile_frame, omn, tile_name, ground_sampling_distance))
cdef _finalize_image_inference(self, AIRecognitionConfig ai_config, list all_frame_data):
if not all_frame_data:
return
frames = [fd[0] for fd in all_frame_data]
all_dets = self.engine.process_frames(frames, ai_config)
for i in range(len(all_dets)):
frame_entry = all_frame_data[i]
f = frame_entry[0]
original_media_name = frame_entry[1]
name = frame_entry[2]
gsd = frame_entry[3]
annotation = Annotation(name, original_media_name, 0, all_dets[i])
if self.is_valid_image_annotation(annotation, gsd, f.shape):
constants_inf.log(<str> f'Detected {annotation}')
_, image = cv2.imencode('.jpg', f)
annotation.image = image.tobytes()
self.on_annotation(annotation)
self.send_detection_status()
cdef send_detection_status(self):
if self._status_callback is not None:
cb = self._status_callback
for media_name in self.detection_counts.keys():
cb(media_name, self.detection_counts[media_name])
self.detection_counts.clear()
cdef split_to_tiles(self, frame, str media_stem, tile_size, overlap_percent):
constants_inf.log(<str>f'splitting image {media_stem} to tiles...')
img_h, img_w, _ = frame.shape
stride_w = int(tile_size * (1 - overlap_percent / 100))
stride_h = int(tile_size * (1 - overlap_percent / 100))
results = []
original_media_name = media_stem
for y in range(0, img_h, stride_h):
for x in range(0, img_w, stride_w):
x_end = min(x + tile_size, img_w)
y_end = min(y + tile_size, img_h)
if x_end - x < tile_size:
if img_w - (x - stride_w) <= tile_size:
continue
x = img_w - tile_size
if y_end - y < tile_size:
if img_h - (y - stride_h) <= tile_size:
continue
y = img_h - tile_size
tile = frame[y:y_end, x:x_end]
name = f'{original_media_name}{constants_inf.SPLIT_SUFFIX}{tile_size:04d}_{x:04d}_{y:04d}!_000000'
results.append((tile, original_media_name, name))
return results
cpdef stop(self):
self.stop_signal = <bint>True
cdef remove_tiled_duplicates(self, Annotation annotation):
right = annotation.name.rindex('!')
left = annotation.name.index(constants_inf.SPLIT_SUFFIX) + len(constants_inf.SPLIT_SUFFIX)
tile_size_str, x_str, y_str = annotation.name[left:right].split('_')
tile_size = int(tile_size_str)
x = int(x_str)
y = int(y_str)
cdef list[Detection] unique_detections = []
existing_abs_detections = self._tile_detections.setdefault(annotation.original_media_name, [])
for det in annotation.detections:
x1 = det.x * tile_size
y1 = det.y * tile_size
det_abs = Detection(x + x1, y + y1, det.w * tile_size, det.h * tile_size, det.cls, det.confidence)
if det_abs not in existing_abs_detections:
unique_detections.append(det)
existing_abs_detections.append(det_abs)
annotation.detections = unique_detections
cdef bint is_valid_image_annotation(self, Annotation annotation, double ground_sampling_distance, frame_shape):
if constants_inf.SPLIT_SUFFIX in annotation.name:
self.remove_tiled_duplicates(annotation)
img_h, img_w, _ = frame_shape
if annotation.detections:
constants_inf.log(<str> f'Initial ann: {annotation}')
cdef list[Detection] valid_detections = []
for det in annotation.detections:
m_w = det.w * img_w * ground_sampling_distance
m_h = det.h * img_h * ground_sampling_distance
max_size = constants_inf.annotations_dict[det.cls].max_object_size_meters
if m_w <= max_size and m_h <= max_size:
valid_detections.append(det)
constants_inf.log(<str> f'Kept ({m_w} {m_h}) <= {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
else:
constants_inf.log(<str> f'Removed ({m_w} {m_h}) > {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
annotation.detections = valid_detections
if not annotation.detections:
return <bint>False
return <bint>True
cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config, int model_w):
if constants_inf.SPLIT_SUFFIX in annotation.name:
self.remove_tiled_duplicates(annotation)
if not annotation.detections:
return <bint>False
if self._previous_annotation is None:
return <bint>True
if annotation.time >= self._previous_annotation.time + <long>(ai_config.frame_recognition_seconds * 1000):
return <bint>True
if len(annotation.detections) > len(self._previous_annotation.detections):
return <bint>True
cdef:
Detection current_det, prev_det
double dx, dy, distance_sq, min_distance_sq
Detection closest_det
for current_det in annotation.detections:
min_distance_sq = <double>1e18
closest_det = <Detection>None
for prev_det in self._previous_annotation.detections:
dx = current_det.x - prev_det.x
dy = current_det.y - prev_det.y
distance_sq = dx * dx + dy * dy
if distance_sq < min_distance_sq:
min_distance_sq = distance_sq
closest_det = prev_det
dist_px = ai_config.tracking_distance_confidence * model_w
dist_px_sq = dist_px * dist_px
if min_distance_sq > dist_px_sq:
return <bint>True
if current_det.confidence >= closest_det.confidence + ai_config.tracking_probability_increase:
return <bint>True
return <bint>False