mirror of
https://github.com/azaion/annotations.git
synced 2026-04-22 22:16:30 +00:00
fb11622c32
Step 1: can compile
91 lines
3.0 KiB
Cython
91 lines
3.0 KiB
Cython
# cython: language_level=3
|
|
from ultralytics import YOLO
|
|
import mimetypes
|
|
import cv2
|
|
from ultralytics.engine.results import Boxes
|
|
from processor_command import FileCommand
|
|
|
|
cdef class Inference:
|
|
"""Handles YOLO inference using the AI model."""
|
|
|
|
def __init__(self, model_bytes, on_annotations):
|
|
self.model = YOLO(model_bytes)
|
|
self.on_annotations = on_annotations
|
|
|
|
cdef bint is_video(self, str filepath):
|
|
mime_type, _ = mimetypes.guess_type(<str>filepath)
|
|
return mime_type and mime_type.startswith("video")
|
|
|
|
cdef run_inference(self, cmd: FileCommand, int batch_size=8, int frame_skip=4):
|
|
if self.is_video(cmd.filename):
|
|
return self._process_video(cmd, batch_size, frame_skip)
|
|
else:
|
|
return self._process_image(cmd)
|
|
|
|
cdef _process_video(self, cmd: FileCommand, int batch_size, int frame_skip):
|
|
frame_count = 0
|
|
batch_frame = []
|
|
annotations = []
|
|
v_input = cv2.VideoCapture(<str>cmd.filename)
|
|
|
|
while v_input.isOpened():
|
|
ret, frame = v_input.read()
|
|
ms = v_input.get(cv2.CAP_PROP_POS_MSEC)
|
|
if not ret or frame is None:
|
|
break
|
|
|
|
frame_count += 1
|
|
if frame_count % frame_skip == 0:
|
|
batch_frame.append((frame, ms))
|
|
|
|
if len(batch_frame) == batch_size:
|
|
frames = list(map(lambda x: x[0], batch_frame))
|
|
results = self.model.track(frames, persist=True)
|
|
|
|
for frame, res in zip(batch_frame, results):
|
|
annotation = self.process_detections(int(frame[1]), frame[0], res.boxes)
|
|
if len(annotation.detections) > 0:
|
|
annotations.append(annotation)
|
|
self.on_annotations(cmd, annotations)
|
|
batch_frame.clear()
|
|
|
|
v_input.release()
|
|
|
|
cdef _process_image(self, cmd: FileCommand):
|
|
frame = cv2.imread(<str>cmd.filename)
|
|
res = self.model.track(frame)
|
|
annotation = self.process_detections(0, frame, res[0].boxes)
|
|
self.on_annotations(cmd, [annotation])
|
|
|
|
cdef process_detections(self, float time, frame, boxes: Boxes):
|
|
detections = []
|
|
for box in boxes:
|
|
b = box.xywhn[0].cpu().numpy()
|
|
cls = int(box.cls[0].cpu().numpy().item())
|
|
detections.append(Detection(<double>b[0], <double>b[1], <double>b[2], <double>b[3], cls))
|
|
_, encoded_image = cv2.imencode('.jpg', frame[0])
|
|
image_bytes = encoded_image.tobytes()
|
|
return Annotation(image_bytes, time, detections)
|
|
|
|
|
|
cdef class Detection:
|
|
cdef double x
|
|
cdef double y
|
|
cdef double w
|
|
cdef double h
|
|
cdef int cls
|
|
|
|
def __init__(self, double x, double y, double w, double h, int cls):
|
|
self.x = x
|
|
self.y = y
|
|
self.w = w
|
|
self.h = h
|
|
self.cls = cls
|
|
|
|
cdef class Annotation:
|
|
|
|
def __init__(self, image_bytes: bytes, float time, detections: [Detection]):
|
|
self.image = image_bytes
|
|
self.time = time
|
|
self.detections = detections
|