from ultralytics import YOLO
import mimetypes
import cv2
from ultralytics.engine.results import Boxes
from remote_command cimport RemoteCommand
from annotation cimport Detection, Annotation

cdef class Inference:
    def __init__(self, model_bytes, on_annotations):
        self.model = YOLO(model_bytes)
        self.on_annotations = on_annotations

    cdef bint is_video(self, str filepath):
        mime_type, _ = mimetypes.guess_type(<str>filepath)
        return mime_type and mime_type.startswith("video")

    cdef run_inference(self, RemoteCommand cmd, int batch_size=8, int frame_skip=4):
        if self.is_video(cmd.filename):
            return self._process_video(cmd, batch_size, frame_skip)
        else:
            return self._process_image(cmd)

    cdef _process_video(self, RemoteCommand cmd, int batch_size, int frame_skip):
        frame_count = 0
        batch_frame = []
        annotations = []
        v_input = cv2.VideoCapture(<str>cmd.filename)

        while v_input.isOpened():
            ret, frame = v_input.read()
            ms = v_input.get(cv2.CAP_PROP_POS_MSEC)
            if not ret or frame is None:
                break

            frame_count += 1
            if frame_count % frame_skip == 0:
                batch_frame.append((frame, ms))

            if len(batch_frame) == batch_size:
                frames = list(map(lambda x: x[0], batch_frame))
                results = self.model.track(frames, persist=True)

                for frame, res in zip(batch_frame, results):
                    annotation = self.process_detections(int(frame[1]), frame[0], res.boxes)
                    if len(annotation.detections) > 0:
                        annotations.append(annotation)
                self.on_annotations(cmd, annotations)
                batch_frame.clear()

        v_input.release()

    cdef _process_image(self, RemoteCommand cmd):
        frame = cv2.imread(<str>cmd.filename)
        res = self.model.track(frame)
        annotation = self.process_detections(0, frame, res[0].boxes)
        self.on_annotations(cmd, [annotation])

    cdef process_detections(self, float time, frame, boxes: Boxes):
        detections = []
        for box in boxes:
            b = box.xywhn[0].cpu().numpy()
            cls = int(box.cls[0].cpu().numpy().item())
            detections.append(Detection(<double>b[0], <double>b[1], <double>b[2], <double>b[3], cls))
        _, encoded_image = cv2.imencode('.jpg', frame[0])
        image_bytes = encoded_image.tobytes()
        return Annotation(image_bytes, time, detections)