rewrite inference and file loading to cython

Step 1: can compile
2026-04-22 12:46:30 +00:00 · 2025-01-15 16:43:56 +02:00
parent 1bc1d81fde
commit fb11622c32
12 changed files with 619 additions and 1 deletions
@@ -0,0 +1,90 @@
+# cython: language_level=3
+from ultralytics import YOLO
+import mimetypes
+import cv2
+from ultralytics.engine.results import Boxes
+from processor_command import FileCommand
+
+cdef class Inference:
+    """Handles YOLO inference using the AI model."""
+
+    def __init__(self, model_bytes, on_annotations):
+        self.model = YOLO(model_bytes)
+        self.on_annotations = on_annotations
+
+    cdef bint is_video(self, str filepath):
+        mime_type, _ = mimetypes.guess_type(<str>filepath)
+        return mime_type and mime_type.startswith("video")
+
+    cdef run_inference(self, cmd: FileCommand, int batch_size=8, int frame_skip=4):
+        if self.is_video(cmd.filename):
+            return self._process_video(cmd, batch_size, frame_skip)
+        else:
+            return self._process_image(cmd)
+
+    cdef _process_video(self, cmd: FileCommand, int batch_size, int frame_skip):
+        frame_count = 0
+        batch_frame = []
+        annotations = []
+        v_input = cv2.VideoCapture(<str>cmd.filename)
+
+        while v_input.isOpened():
+            ret, frame = v_input.read()
+            ms = v_input.get(cv2.CAP_PROP_POS_MSEC)
+            if not ret or frame is None:
+                break
+
+            frame_count += 1
+            if frame_count % frame_skip == 0:
+                batch_frame.append((frame, ms))
+
+            if len(batch_frame) == batch_size:
+                frames = list(map(lambda x: x[0], batch_frame))
+                results = self.model.track(frames, persist=True)
+
+                for frame, res in zip(batch_frame, results):
+                    annotation = self.process_detections(int(frame[1]), frame[0], res.boxes)
+                    if len(annotation.detections) > 0:
+                        annotations.append(annotation)
+                self.on_annotations(cmd, annotations)
+                batch_frame.clear()
+
+        v_input.release()
+
+    cdef _process_image(self, cmd: FileCommand):
+        frame = cv2.imread(<str>cmd.filename)
+        res = self.model.track(frame)
+        annotation = self.process_detections(0, frame, res[0].boxes)
+        self.on_annotations(cmd, [annotation])
+
+    cdef process_detections(self, float time, frame, boxes: Boxes):
+        detections = []
+        for box in boxes:
+            b = box.xywhn[0].cpu().numpy()
+            cls = int(box.cls[0].cpu().numpy().item())
+            detections.append(Detection(<double>b[0], <double>b[1], <double>b[2], <double>b[3], cls))
+        _, encoded_image = cv2.imencode('.jpg', frame[0])
+        image_bytes = encoded_image.tobytes()
+        return Annotation(image_bytes, time, detections)
+
+
+cdef class Detection:
+    cdef double x
+    cdef double y
+    cdef double w
+    cdef double h
+    cdef int cls
+
+    def __init__(self, double x, double y, double w, double h, int cls):
+        self.x = x
+        self.y = y
+        self.w = w
+        self.h = h
+        self.cls = cls
+
+cdef class Annotation:
+
+    def __init__(self, image_bytes: bytes, float time, detections: [Detection]):
+        self.image = image_bytes
+        self.time = time
+        self.detections = detections