Files
annotations/Azaion.Inference/inference.pyx
T
2025-02-06 10:48:03 +02:00

189 lines
7.1 KiB
Cython

import mimetypes
import time
import cv2
import numpy as np
import onnxruntime as onnx
from remote_command cimport RemoteCommand
from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig
cdef class Inference:
def __init__(self, model_bytes, on_annotation):
self.stop_signal = False
self.session = onnx.InferenceSession(
model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
self.on_annotation = on_annotation
self.ai_config = AIRecognitionConfig(4, 2, 0.25, 0.15, 15, 0.8, b'')
model_inputs = self.session.get_inputs()
self.model_input = model_inputs[0].name
input_shape = model_inputs[0].shape
self.model_width = input_shape[2]
self.model_height = input_shape[3]
print(f'AI detection model input: {self.model_input} ({self.model_width}, {self.model_height})')
model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map)
cdef preprocess(self, frame):
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (self.model_width, self.model_height))
image_data = np.array(img) / 255.0
image_data = np.transpose(image_data, (2, 0, 1)) # Channel first
image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
return image_data
cdef postprocess(self, output, int img_width, int img_height):
outputs = np.transpose(np.squeeze(output[0]))
rows = outputs.shape[0]
boxes = []
scores = []
class_ids = []
x_factor = img_width / self.model_width
y_factor = img_height / self.model_height
for i in range(rows):
classes_scores = outputs[i][4:]
max_score = np.amax(classes_scores)
if max_score >= self.ai_config.probability_threshold:
class_id = np.argmax(classes_scores)
x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
left = int((x - w / 2) * x_factor)
top = int((y - h / 2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
class_ids.append(class_id)
scores.append(max_score)
boxes.append([left, top, width, height])
indices = cv2.dnn.NMSBoxes(boxes, scores, self.ai_config.probability_threshold, 0.45)
detections = []
for i in indices:
x, y, w, h = boxes[i]
detections.append(Detection(x, y, w, h, class_ids[i], scores[i]))
return detections
cdef bint is_video(self, str filepath):
mime_type, _ = mimetypes.guess_type(<str>filepath)
return mime_type and mime_type.startswith("video")
cdef run_inference(self, RemoteCommand cmd, int batch_size=8):
print('run inference..')
self.ai_config = AIRecognitionConfig.from_msgpack(cmd.data)
self.stop_signal = False
if self.is_video(cmd.filename):
self._process_video(cmd, batch_size)
else:
self._process_image(cmd)
cdef _process_video(self, RemoteCommand cmd, int batch_size):
frame_count = 0
batch_frame = []
self._previous_annotation = None
self.start_video_time = time.time()
v_input = cv2.VideoCapture(<str>cmd.filename)
while v_input.isOpened():
ret, frame = v_input.read()
if not ret or frame is None:
break
frame_count += 1
if frame_count % self.ai_config.frame_period_recognition == 0:
ms = int(v_input.get(cv2.CAP_PROP_POS_MSEC))
annotation = self.detect_frame(frame, ms)
if annotation is not None:
self._previous_annotation = annotation
self.on_annotation(annotation)
cdef detect_frame(self, frame, long time):
cdef Annotation annotation
img_height, img_width = frame.shape[:2]
start_time = time.time()
img_data = self.preprocess(frame)
preprocess_time = time.time()
outputs = self.session.run(None, {self.model_input: img_data})
inference_time = time.time()
detections = self.postprocess(outputs, img_width, img_height)
postprocess_time = time.time()
print(f'video time, ms: {time / 1000:.3f}. total time, s : {postprocess_time - self.start_video_time:.3f} '
f'preprocess time: {preprocess_time - start_time:.3f}, inference time: {inference_time - preprocess_time:.3f},'
f' postprocess time: {postprocess_time - inference_time:.3f}, total time: {postprocess_time - start_time:.3f}')
if len(detections) > 0:
annotation = Annotation(frame, time, detections)
if self.is_valid_annotation(annotation):
_, image = cv2.imencode('.jpg', frame)
annotation.image = image.tobytes()
return annotation
return None
cdef _process_image(self, RemoteCommand cmd):
self._previous_annotation = None
frame = cv2.imread(<str>cmd.filename)
annotation = self.detect_frame(frame, 0)
if annotation is None:
_, image = cv2.imencode('.jpg', frame)
annotation = Annotation(frame, time, [])
annotation.image = image.tobytes()
self.on_annotation(cmd, annotation)
cdef stop(self):
self.stop_signal = True
cdef bint is_valid_annotation(self, Annotation annotation):
# No detections, invalid
if not annotation.detections:
return False
# First valid annotation, always accept
if self._previous_annotation is None:
return True
# Enough time has passed since last annotation
if annotation.time >= self._previous_annotation.time + <long>(self.ai_config.frame_recognition_seconds * 1000):
return True
# More objects detected than before
if len(annotation.detections) > len(self._previous_annotation.detections):
return True
cdef:
Detection current_det, prev_det
double dx, dy, distance_sq, min_distance_sq
Detection closest_det
# Check each detection against previous frame
for current_det in annotation.detections:
min_distance_sq = 1e18 # Initialize with large value
closest_det = None
# Find the closest detection in previous frame
for prev_det in self._previous_annotation.detections:
dx = current_det.x - prev_det.x
dy = current_det.y - prev_det.y
distance_sq = dx * dx + dy * dy
if distance_sq < min_distance_sq:
min_distance_sq = distance_sq
closest_det = prev_det
# Check if beyond tracking distance
if min_distance_sq > self.ai_config.tracking_distance_confidence:
return True
# Check probability increase
if current_det.confidence >= closest_det.confidence + self.ai_config.tracking_probability_increase:
return True
return False