add manual Tile Processor

zoom on video on pause (temp image)
2026-06-21 08:11:05 +00:00 · 2025-07-28 12:39:52 +03:00
parent fefd054ea0
commit fc6e5db795
34 changed files with 716 additions and 209 deletions
@@ -13,23 +13,14 @@ Results (file or annotations) is putted to the other queue, or the same socket,

 <h2>Installation</h2>

-Prepare correct onnx model from YOLO:
-```python
-from ultralytics import YOLO
-import netron
-
-model = YOLO("azaion.pt")
-model.export(format="onnx", imgsz=1280, nms=True, batch=4)
-netron.start('azaion.onnx')
-```    
-Read carefully about [export arguments](https://docs.ultralytics.com/modes/export/), you have to use nms=True, and batching with a proper batch size  
-
 <h3>Install libs</h3>
 https://www.python.org/downloads/

 Windows

 - [Install CUDA](https://developer.nvidia.com/cuda-12-1-0-download-archive)
+- [Install Visual Studio Build Tools 2019](https://visualstudio.microsoft.com/downloads/?q=build+tools)
+

 Linux
 ```
@@ -44,6 +35,17 @@ Linux
    nvcc --version
 ```
    
+Prepare correct onnx model from YOLO:
+```python
+from ultralytics import YOLO
+import netron
+
+model = YOLO("azaion.pt")
+model.export(format="onnx", imgsz=1280, nms=True, batch=4)
+netron.start('azaion.onnx')
+```    
+Read carefully about [export arguments](https://docs.ultralytics.com/modes/export/), you have to use nms=True, and batching with a proper batch size  
+

 <h3>Install dependencies</h3>
 1. Install python with max version 3.11. Pytorch for now supports 3.11 max  
@@ -7,9 +7,11 @@ cdef class AIRecognitionConfig:
    cdef public double tracking_probability_increase
    cdef public double tracking_intersection_threshold

+    cdef public int big_image_tile_overlap_percent
+
    cdef public bytes file_data
    cdef public list[str] paths
    cdef public int model_batch_size

    @staticmethod
-    cdef from_msgpack(bytes data)
+    cdef from_msgpack(bytes data)
@@ -9,6 +9,7 @@ cdef class AIRecognitionConfig:
                 tracking_distance_confidence,
                 tracking_probability_increase,
                 tracking_intersection_threshold,
+                 big_image_tile_overlap_percent,

                 file_data,
                 paths,
@@ -21,6 +22,7 @@ cdef class AIRecognitionConfig:
        self.tracking_distance_confidence = tracking_distance_confidence
        self.tracking_probability_increase = tracking_probability_increase
        self.tracking_intersection_threshold = tracking_intersection_threshold
+        self.big_image_tile_overlap_percent = big_image_tile_overlap_percent

        self.file_data = file_data
        self.paths = paths
@@ -31,6 +33,7 @@ cdef class AIRecognitionConfig:
                f'probability_increase : {self.tracking_probability_increase}, '
                f'intersection_threshold : {self.tracking_intersection_threshold}, '
                f'frame_period_recognition : {self.frame_period_recognition}, '
+                f'big_image_tile_overlap_percent: {self.big_image_tile_overlap_percent}, '
                f'paths: {self.paths}, '
                f'model_batch_size: {self.model_batch_size}')

@@ -45,6 +48,7 @@ cdef class AIRecognitionConfig:
            unpacked.get("t_dc", 0.0),
            unpacked.get("t_pi", 0.0),
            unpacked.get("t_it", 0.0),
+            unpacked.get("ov_p", 20),

            unpacked.get("d", b''),
            unpacked.get("p", []),
@@ -3,7 +3,7 @@ cdef class Detection:
    cdef public str annotation_name
    cdef public int cls

-    cdef public overlaps(self, Detection det2)
+    cdef public overlaps(self, Detection det2, float confidence_threshold)

 cdef class Annotation:
    cdef public str name
@@ -14,13 +14,13 @@ cdef class Detection:
    def __str__(self):
        return f'{self.cls}: {self.x:.2f} {self.y:.2f} {self.w:.2f} {self.h:.2f}, prob: {(self.confidence*100):.1f}%'

-    cdef overlaps(self, Detection det2):
+    cdef overlaps(self, Detection det2, float confidence_threshold):
        cdef double overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x)
        cdef double overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y)
        cdef double overlap_area = max(0.0, overlap_x) * max(0.0, overlap_y)
        cdef double min_area = min(self.w * self.h, det2.w * det2.h)

-        return overlap_area / min_area > 0.6
+        return overlap_area / min_area > confidence_threshold

 cdef class Annotation:
    def __init__(self, str name, long ms, list[Detection] detections):
@@ -23,11 +23,13 @@ cdef class Inference:

    cdef run_inference(self, RemoteCommand cmd)
    cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name)
-    cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
+    cpdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
+    cpdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data)
+    cpdef split_to_tiles(self, frame, path, img_w, img_h, overlap_percent)
    cdef stop(self)

    cdef preprocess(self, frames)
-    cdef remove_overlapping_detections(self, list[Detection] detections)
+    cdef remove_overlapping_detections(self, list[Detection] detections, float confidence_threshold=?)
    cdef postprocess(self, output, ai_config)
    cdef split_list_extend(self, lst, chunk_size)

@@ -150,13 +150,13 @@ cdef class Inference:
                    h = y2 - y1
                    if conf >= ai_config.probability_threshold:
                        detections.append(Detection(x, y, w, h, class_id, conf))
-                filtered_detections = self.remove_overlapping_detections(detections)
+                filtered_detections = self.remove_overlapping_detections(detections, ai_config.tracking_intersection_threshold)
                results.append(filtered_detections)
            return results
        except Exception as e:
            raise RuntimeError(f"Failed to postprocess: {str(e)}")

-    cdef remove_overlapping_detections(self, list[Detection] detections):
+    cdef remove_overlapping_detections(self, list[Detection] detections, float confidence_threshold=0.6):
        cdef Detection det1, det2
        filtered_output = []
        filtered_out_indexes = []
@@ -168,7 +168,7 @@ cdef class Inference:
            res = det1_index
            for det2_index in range(det1_index + 1, len(detections)):
                det2 = detections[det2_index]
-                if det1.overlaps(det2):
+                if det1.overlaps(det2, confidence_threshold):
                    if det1.confidence > det2.confidence or (
                            det1.confidence == det2.confidence and det1.cls < det2.cls):  # det1 has higher confidence or lower class_id
                        filtered_out_indexes.append(det2_index)
@@ -211,9 +211,8 @@ cdef class Inference:
                images.append(m)
        # images first, it's faster
        if len(images) > 0:
-            for chunk in self.split_list_extend(images, self.engine.get_batch_size()):
-                constants_inf.log(f'run inference on {" ".join(chunk)}...')
-                self._process_images(cmd, ai_config, chunk)
+            constants_inf.log(f'run inference on {" ".join(images)}...')
+            self._process_images(cmd, ai_config, images)
        if len(videos) > 0:
            for v in videos:
                constants_inf.log(f'run inference on {v}...')
@@ -250,8 +249,6 @@ cdef class Inference:
                        _, image = cv2.imencode('.jpg', batch_frames[i])
                        annotation.image = image.tobytes()
                        self._previous_annotation = annotation
-
-                        print(annotation)
                        self.on_annotation(cmd, annotation)

                batch_frames.clear()
@@ -259,15 +256,53 @@ cdef class Inference:
        v_input.release()


-    cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths):
-        cdef list frames = []
-        cdef list timestamps = []
-        self._previous_annotation = None
-        for image in image_paths:
-            frame = cv2.imread(image)
-            frames.append(frame)
-            timestamps.append(0)
+    cpdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths):
+        cdef list frame_data = []
+        for path in image_paths:
+            frame = cv2.imread(<str>path)
+            if frame is None:
+                constants_inf.logerror(<str>f'Failed to read image {path}')
+                continue
+            img_h, img_w, _ = frame.shape
+            if img_h <= 1.5 * self.model_height and img_w <= 1.5 * self.model_width:
+                frame_data.append((frame, path))
+            else:
+                (split_frames, split_pats) = self.split_to_tiles(frame, path, img_w, img_h, ai_config.big_image_tile_overlap_percent)
+                frame_data.extend(zip(split_frames, split_pats))

+        for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()):
+            self._process_images_inner(cmd, ai_config, chunk)
+
+
+    cpdef split_to_tiles(self, frame, path, img_w, img_h, overlap_percent):
+        stride_w = self.model_width * (1 - overlap_percent / 100)
+        stride_h = self.model_height * (1 - overlap_percent / 100)
+        n_tiles_x = int(np.ceil((img_w - self.model_width) / stride_w)) + 1
+        n_tiles_y = int(np.ceil((img_h - self.model_height) / stride_h)) + 1
+
+        results = []
+        for y_idx in range(n_tiles_y):
+            for x_idx in range(n_tiles_x):
+                y_start = y_idx * stride_w
+                x_start = x_idx * stride_h
+
+                # Ensure the tile doesn't go out of bounds
+                y_end = min(y_start + self.model_width, img_h)
+                x_end = min(x_start + self.model_height, img_w)
+
+                # We need to re-calculate start if we are at the edge to get a full 1280x1280 tile
+                if y_end == img_h:
+                    y_start = img_h - self.model_height
+                if x_end == img_w:
+                    x_start = img_w - self.model_width
+
+                tile = frame[y_start:y_end, x_start:x_end]
+                name = path.stem + f'.tile_{x_start}_{y_start}' + path.suffix
+                results.append((tile, name))
+        return results
+
+    cpdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data):
+        frames = [frame for frame, _ in frame_data]
        input_blob = self.preprocess(frames)

        outputs = self.engine.run(input_blob)
@@ -275,7 +310,7 @@ cdef class Inference:
        list_detections = self.postprocess(outputs, ai_config)
        for i in range(len(list_detections)):
            detections = list_detections[i]
-            annotation = Annotation(image_paths[i], timestamps[i], detections)
+            annotation = Annotation(frame_data[i][1], 0, detections)
            _, image = cv2.imencode('.jpg', frames[i])
            annotation.image = image.tobytes()
            self.on_annotation(cmd, annotation)
@@ -322,7 +357,9 @@ cdef class Inference:
                    closest_det = prev_det

            # Check if beyond tracking distance
-            if min_distance_sq > ai_config.tracking_distance_confidence:
+            dist_px = ai_config.tracking_distance_confidence * self.model_width
+            dist_px_sq = dist_px * dist_px
+            if min_distance_sq > dist_px_sq:
                return True

            # Check probability increase
@@ -7,11 +7,12 @@ cryptography==44.0.2
 psutil
 msgpack
 pyjwt
-zmq
+pyzmq
 requests
 pyyaml
 pycuda
-tensorrt
+tensorrt==10.11.0.33
 pynvml
 boto3
-loguru
+loguru
+pytest
@@ -2,19 +2,30 @@ from setuptools import setup, Extension
 from Cython.Build import cythonize
 import numpy as np

+# debug_args = {}
+# trace_line = False
+
+debug_args = {
+    'extra_compile_args': ['-O0', '-g'],
+    'extra_link_args': ['-g'],
+    'define_macros': [('CYTHON_TRACE_NOGIL', '1')]
+}
+trace_line = True
+
 extensions = [
-    Extension('constants_inf', ['constants_inf.pyx']),
-    Extension('file_data', ['file_data.pyx']),
-    Extension('remote_command_inf', ['remote_command_inf.pyx']),
-    Extension('remote_command_handler_inf', ['remote_command_handler_inf.pyx']),
-    Extension('annotation', ['annotation.pyx']),
-    Extension('loader_client', ['loader_client.pyx']),
-    Extension('ai_config', ['ai_config.pyx']),
-    Extension('tensorrt_engine', ['tensorrt_engine.pyx'], include_dirs=[np.get_include()]),
-    Extension('onnx_engine', ['onnx_engine.pyx'], include_dirs=[np.get_include()]),
-    Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()]),
-    Extension('inference', ['inference.pyx'], include_dirs=[np.get_include()]),
-    Extension('main_inference', ['main_inference.pyx']),
+    Extension('constants_inf', ['constants_inf.pyx'], **debug_args),
+    Extension('file_data', ['file_data.pyx'], **debug_args),
+    Extension('remote_command_inf', ['remote_command_inf.pyx'], **debug_args),
+    Extension('remote_command_handler_inf', ['remote_command_handler_inf.pyx'], **debug_args),
+    Extension('annotation', ['annotation.pyx'], **debug_args),
+    Extension('loader_client', ['loader_client.pyx'], **debug_args),
+    Extension('ai_config', ['ai_config.pyx'], **debug_args),
+    Extension('tensorrt_engine', ['tensorrt_engine.pyx'], include_dirs=[np.get_include()], **debug_args),
+    Extension('onnx_engine', ['onnx_engine.pyx'], include_dirs=[np.get_include()], **debug_args),
+    Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()], **debug_args),
+    Extension('inference', ['inference.pyx'], include_dirs=[np.get_include()], **debug_args),
+    Extension('main_inference', ['main_inference.pyx'], **debug_args),
+
 ]

 setup(
@@ -23,10 +34,11 @@ setup(
        extensions,
        compiler_directives={
            "language_level": 3,
-            "emit_code_comments" : False,
+            "emit_code_comments": False,
            "binding": True,
            'boundscheck': False,
-            'wraparound': False
+            'wraparound': False,
+            'linetrace': trace_line
        }
    ),
    install_requires=[
@@ -34,4 +46,4 @@ setup(
        'pywin32; platform_system=="Windows"'
    ],
    zip_safe=False
-)
+)
@@ -0,0 +1,37 @@
+from setuptools import setup, Extension
+from Cython.Build import cythonize
+import numpy as np
+
+extensions = [
+    Extension('constants_inf', ['constants_inf.pyx']),
+    Extension('file_data', ['file_data.pyx']),
+    Extension('remote_command_inf', ['remote_command_inf.pyx']),
+    Extension('remote_command_handler_inf', ['remote_command_handler_inf.pyx']),
+    Extension('annotation', ['annotation.pyx']),
+    Extension('loader_client', ['loader_client.pyx']),
+    Extension('ai_config', ['ai_config.pyx']),
+    Extension('tensorrt_engine', ['tensorrt_engine.pyx'], include_dirs=[np.get_include()]),
+    Extension('onnx_engine', ['onnx_engine.pyx'], include_dirs=[np.get_include()]),
+    Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()]),
+    Extension('inference', ['inference.pyx'], include_dirs=[np.get_include()]),
+    Extension('main_inference', ['main_inference.pyx'])
+]
+
+setup(
+    name="azaion.ai",
+    ext_modules=cythonize(
+        extensions,
+        compiler_directives={
+            "language_level": 3,
+            "emit_code_comments" : False,
+            "binding": True,
+            'boundscheck': False,
+            'wraparound': False
+        }
+    ),
+    install_requires=[
+        'ultralytics>=8.0.0',
+        'pywin32; platform_system=="Windows"'
+    ],
+    zip_safe=False
+)
@@ -0,0 +1,8 @@
+import inference
+from ai_config import AIRecognitionConfig
+from remote_command_inf import RemoteCommand
+
+
+def test_process_images():
+    inf = inference.Inference(None, None)
+    inf._process_images(RemoteCommand(30), AIRecognitionConfig(4, 2, 15,  0.15, 15, 0.8, 20, b'test', [], 4), ['test_img01.JPG', 'test_img02.jpg'])