from ultralytics import YOLO import mimetypes import cv2 from ultralytics.engine.results import Boxes from remote_command cimport RemoteCommand from annotation cimport Detection, Annotation cdef class Inference: def __init__(self, model_bytes, on_annotations): self.model = YOLO(model_bytes) self.on_annotations = on_annotations cdef bint is_video(self, str filepath): mime_type, _ = mimetypes.guess_type(filepath) return mime_type and mime_type.startswith("video") cdef run_inference(self, RemoteCommand cmd, int batch_size=8, int frame_skip=4): if self.is_video(cmd.filename): return self._process_video(cmd, batch_size, frame_skip) else: return self._process_image(cmd) cdef _process_video(self, RemoteCommand cmd, int batch_size, int frame_skip): frame_count = 0 batch_frame = [] annotations = [] v_input = cv2.VideoCapture(cmd.filename) while v_input.isOpened(): ret, frame = v_input.read() ms = v_input.get(cv2.CAP_PROP_POS_MSEC) if not ret or frame is None: break frame_count += 1 if frame_count % frame_skip == 0: batch_frame.append((frame, ms)) if len(batch_frame) == batch_size: frames = list(map(lambda x: x[0], batch_frame)) results = self.model.track(frames, persist=True) for frame, res in zip(batch_frame, results): annotation = self.process_detections(int(frame[1]), frame[0], res.boxes) if len(annotation.detections) > 0: annotations.append(annotation) self.on_annotations(cmd, annotations) batch_frame.clear() v_input.release() cdef _process_image(self, RemoteCommand cmd): frame = cv2.imread(cmd.filename) res = self.model.track(frame) annotation = self.process_detections(0, frame, res[0].boxes) self.on_annotations(cmd, [annotation]) cdef process_detections(self, float time, frame, boxes: Boxes): detections = [] for box in boxes: b = box.xywhn[0].cpu().numpy() cls = int(box.cls[0].cpu().numpy().item()) detections.append(Detection(b[0], b[1], b[2], b[3], cls)) _, encoded_image = cv2.imencode('.jpg', frame[0]) image_bytes = encoded_image.tobytes() return Annotation(image_bytes, time, detections)