mirror of
https://github.com/azaion/annotations.git
synced 2026-04-22 10:46:30 +00:00
add manual Tile Processor
zoom on video on pause (temp image)
This commit is contained in:
+13
-11
@@ -13,23 +13,14 @@ Results (file or annotations) is putted to the other queue, or the same socket,
|
||||
|
||||
<h2>Installation</h2>
|
||||
|
||||
Prepare correct onnx model from YOLO:
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
import netron
|
||||
|
||||
model = YOLO("azaion.pt")
|
||||
model.export(format="onnx", imgsz=1280, nms=True, batch=4)
|
||||
netron.start('azaion.onnx')
|
||||
```
|
||||
Read carefully about [export arguments](https://docs.ultralytics.com/modes/export/), you have to use nms=True, and batching with a proper batch size
|
||||
|
||||
<h3>Install libs</h3>
|
||||
https://www.python.org/downloads/
|
||||
|
||||
Windows
|
||||
|
||||
- [Install CUDA](https://developer.nvidia.com/cuda-12-1-0-download-archive)
|
||||
- [Install Visual Studio Build Tools 2019](https://visualstudio.microsoft.com/downloads/?q=build+tools)
|
||||
|
||||
|
||||
Linux
|
||||
```
|
||||
@@ -44,6 +35,17 @@ Linux
|
||||
nvcc --version
|
||||
```
|
||||
|
||||
Prepare correct onnx model from YOLO:
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
import netron
|
||||
|
||||
model = YOLO("azaion.pt")
|
||||
model.export(format="onnx", imgsz=1280, nms=True, batch=4)
|
||||
netron.start('azaion.onnx')
|
||||
```
|
||||
Read carefully about [export arguments](https://docs.ultralytics.com/modes/export/), you have to use nms=True, and batching with a proper batch size
|
||||
|
||||
|
||||
<h3>Install dependencies</h3>
|
||||
1. Install python with max version 3.11. Pytorch for now supports 3.11 max
|
||||
|
||||
@@ -7,9 +7,11 @@ cdef class AIRecognitionConfig:
|
||||
cdef public double tracking_probability_increase
|
||||
cdef public double tracking_intersection_threshold
|
||||
|
||||
cdef public int big_image_tile_overlap_percent
|
||||
|
||||
cdef public bytes file_data
|
||||
cdef public list[str] paths
|
||||
cdef public int model_batch_size
|
||||
|
||||
@staticmethod
|
||||
cdef from_msgpack(bytes data)
|
||||
cdef from_msgpack(bytes data)
|
||||
|
||||
@@ -9,6 +9,7 @@ cdef class AIRecognitionConfig:
|
||||
tracking_distance_confidence,
|
||||
tracking_probability_increase,
|
||||
tracking_intersection_threshold,
|
||||
big_image_tile_overlap_percent,
|
||||
|
||||
file_data,
|
||||
paths,
|
||||
@@ -21,6 +22,7 @@ cdef class AIRecognitionConfig:
|
||||
self.tracking_distance_confidence = tracking_distance_confidence
|
||||
self.tracking_probability_increase = tracking_probability_increase
|
||||
self.tracking_intersection_threshold = tracking_intersection_threshold
|
||||
self.big_image_tile_overlap_percent = big_image_tile_overlap_percent
|
||||
|
||||
self.file_data = file_data
|
||||
self.paths = paths
|
||||
@@ -31,6 +33,7 @@ cdef class AIRecognitionConfig:
|
||||
f'probability_increase : {self.tracking_probability_increase}, '
|
||||
f'intersection_threshold : {self.tracking_intersection_threshold}, '
|
||||
f'frame_period_recognition : {self.frame_period_recognition}, '
|
||||
f'big_image_tile_overlap_percent: {self.big_image_tile_overlap_percent}, '
|
||||
f'paths: {self.paths}, '
|
||||
f'model_batch_size: {self.model_batch_size}')
|
||||
|
||||
@@ -45,6 +48,7 @@ cdef class AIRecognitionConfig:
|
||||
unpacked.get("t_dc", 0.0),
|
||||
unpacked.get("t_pi", 0.0),
|
||||
unpacked.get("t_it", 0.0),
|
||||
unpacked.get("ov_p", 20),
|
||||
|
||||
unpacked.get("d", b''),
|
||||
unpacked.get("p", []),
|
||||
|
||||
@@ -3,7 +3,7 @@ cdef class Detection:
|
||||
cdef public str annotation_name
|
||||
cdef public int cls
|
||||
|
||||
cdef public overlaps(self, Detection det2)
|
||||
cdef public overlaps(self, Detection det2, float confidence_threshold)
|
||||
|
||||
cdef class Annotation:
|
||||
cdef public str name
|
||||
|
||||
@@ -14,13 +14,13 @@ cdef class Detection:
|
||||
def __str__(self):
|
||||
return f'{self.cls}: {self.x:.2f} {self.y:.2f} {self.w:.2f} {self.h:.2f}, prob: {(self.confidence*100):.1f}%'
|
||||
|
||||
cdef overlaps(self, Detection det2):
|
||||
cdef overlaps(self, Detection det2, float confidence_threshold):
|
||||
cdef double overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x)
|
||||
cdef double overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y)
|
||||
cdef double overlap_area = max(0.0, overlap_x) * max(0.0, overlap_y)
|
||||
cdef double min_area = min(self.w * self.h, det2.w * det2.h)
|
||||
|
||||
return overlap_area / min_area > 0.6
|
||||
return overlap_area / min_area > confidence_threshold
|
||||
|
||||
cdef class Annotation:
|
||||
def __init__(self, str name, long ms, list[Detection] detections):
|
||||
|
||||
@@ -23,11 +23,13 @@ cdef class Inference:
|
||||
|
||||
cdef run_inference(self, RemoteCommand cmd)
|
||||
cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name)
|
||||
cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
|
||||
cpdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
|
||||
cpdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data)
|
||||
cpdef split_to_tiles(self, frame, path, img_w, img_h, overlap_percent)
|
||||
cdef stop(self)
|
||||
|
||||
cdef preprocess(self, frames)
|
||||
cdef remove_overlapping_detections(self, list[Detection] detections)
|
||||
cdef remove_overlapping_detections(self, list[Detection] detections, float confidence_threshold=?)
|
||||
cdef postprocess(self, output, ai_config)
|
||||
cdef split_list_extend(self, lst, chunk_size)
|
||||
|
||||
|
||||
@@ -150,13 +150,13 @@ cdef class Inference:
|
||||
h = y2 - y1
|
||||
if conf >= ai_config.probability_threshold:
|
||||
detections.append(Detection(x, y, w, h, class_id, conf))
|
||||
filtered_detections = self.remove_overlapping_detections(detections)
|
||||
filtered_detections = self.remove_overlapping_detections(detections, ai_config.tracking_intersection_threshold)
|
||||
results.append(filtered_detections)
|
||||
return results
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to postprocess: {str(e)}")
|
||||
|
||||
cdef remove_overlapping_detections(self, list[Detection] detections):
|
||||
cdef remove_overlapping_detections(self, list[Detection] detections, float confidence_threshold=0.6):
|
||||
cdef Detection det1, det2
|
||||
filtered_output = []
|
||||
filtered_out_indexes = []
|
||||
@@ -168,7 +168,7 @@ cdef class Inference:
|
||||
res = det1_index
|
||||
for det2_index in range(det1_index + 1, len(detections)):
|
||||
det2 = detections[det2_index]
|
||||
if det1.overlaps(det2):
|
||||
if det1.overlaps(det2, confidence_threshold):
|
||||
if det1.confidence > det2.confidence or (
|
||||
det1.confidence == det2.confidence and det1.cls < det2.cls): # det1 has higher confidence or lower class_id
|
||||
filtered_out_indexes.append(det2_index)
|
||||
@@ -211,9 +211,8 @@ cdef class Inference:
|
||||
images.append(m)
|
||||
# images first, it's faster
|
||||
if len(images) > 0:
|
||||
for chunk in self.split_list_extend(images, self.engine.get_batch_size()):
|
||||
constants_inf.log(f'run inference on {" ".join(chunk)}...')
|
||||
self._process_images(cmd, ai_config, chunk)
|
||||
constants_inf.log(f'run inference on {" ".join(images)}...')
|
||||
self._process_images(cmd, ai_config, images)
|
||||
if len(videos) > 0:
|
||||
for v in videos:
|
||||
constants_inf.log(f'run inference on {v}...')
|
||||
@@ -250,8 +249,6 @@ cdef class Inference:
|
||||
_, image = cv2.imencode('.jpg', batch_frames[i])
|
||||
annotation.image = image.tobytes()
|
||||
self._previous_annotation = annotation
|
||||
|
||||
print(annotation)
|
||||
self.on_annotation(cmd, annotation)
|
||||
|
||||
batch_frames.clear()
|
||||
@@ -259,15 +256,53 @@ cdef class Inference:
|
||||
v_input.release()
|
||||
|
||||
|
||||
cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths):
|
||||
cdef list frames = []
|
||||
cdef list timestamps = []
|
||||
self._previous_annotation = None
|
||||
for image in image_paths:
|
||||
frame = cv2.imread(image)
|
||||
frames.append(frame)
|
||||
timestamps.append(0)
|
||||
cpdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths):
|
||||
cdef list frame_data = []
|
||||
for path in image_paths:
|
||||
frame = cv2.imread(<str>path)
|
||||
if frame is None:
|
||||
constants_inf.logerror(<str>f'Failed to read image {path}')
|
||||
continue
|
||||
img_h, img_w, _ = frame.shape
|
||||
if img_h <= 1.5 * self.model_height and img_w <= 1.5 * self.model_width:
|
||||
frame_data.append((frame, path))
|
||||
else:
|
||||
(split_frames, split_pats) = self.split_to_tiles(frame, path, img_w, img_h, ai_config.big_image_tile_overlap_percent)
|
||||
frame_data.extend(zip(split_frames, split_pats))
|
||||
|
||||
for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()):
|
||||
self._process_images_inner(cmd, ai_config, chunk)
|
||||
|
||||
|
||||
cpdef split_to_tiles(self, frame, path, img_w, img_h, overlap_percent):
|
||||
stride_w = self.model_width * (1 - overlap_percent / 100)
|
||||
stride_h = self.model_height * (1 - overlap_percent / 100)
|
||||
n_tiles_x = int(np.ceil((img_w - self.model_width) / stride_w)) + 1
|
||||
n_tiles_y = int(np.ceil((img_h - self.model_height) / stride_h)) + 1
|
||||
|
||||
results = []
|
||||
for y_idx in range(n_tiles_y):
|
||||
for x_idx in range(n_tiles_x):
|
||||
y_start = y_idx * stride_w
|
||||
x_start = x_idx * stride_h
|
||||
|
||||
# Ensure the tile doesn't go out of bounds
|
||||
y_end = min(y_start + self.model_width, img_h)
|
||||
x_end = min(x_start + self.model_height, img_w)
|
||||
|
||||
# We need to re-calculate start if we are at the edge to get a full 1280x1280 tile
|
||||
if y_end == img_h:
|
||||
y_start = img_h - self.model_height
|
||||
if x_end == img_w:
|
||||
x_start = img_w - self.model_width
|
||||
|
||||
tile = frame[y_start:y_end, x_start:x_end]
|
||||
name = path.stem + f'.tile_{x_start}_{y_start}' + path.suffix
|
||||
results.append((tile, name))
|
||||
return results
|
||||
|
||||
cpdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data):
|
||||
frames = [frame for frame, _ in frame_data]
|
||||
input_blob = self.preprocess(frames)
|
||||
|
||||
outputs = self.engine.run(input_blob)
|
||||
@@ -275,7 +310,7 @@ cdef class Inference:
|
||||
list_detections = self.postprocess(outputs, ai_config)
|
||||
for i in range(len(list_detections)):
|
||||
detections = list_detections[i]
|
||||
annotation = Annotation(image_paths[i], timestamps[i], detections)
|
||||
annotation = Annotation(frame_data[i][1], 0, detections)
|
||||
_, image = cv2.imencode('.jpg', frames[i])
|
||||
annotation.image = image.tobytes()
|
||||
self.on_annotation(cmd, annotation)
|
||||
@@ -322,7 +357,9 @@ cdef class Inference:
|
||||
closest_det = prev_det
|
||||
|
||||
# Check if beyond tracking distance
|
||||
if min_distance_sq > ai_config.tracking_distance_confidence:
|
||||
dist_px = ai_config.tracking_distance_confidence * self.model_width
|
||||
dist_px_sq = dist_px * dist_px
|
||||
if min_distance_sq > dist_px_sq:
|
||||
return True
|
||||
|
||||
# Check probability increase
|
||||
|
||||
@@ -7,11 +7,12 @@ cryptography==44.0.2
|
||||
psutil
|
||||
msgpack
|
||||
pyjwt
|
||||
zmq
|
||||
pyzmq
|
||||
requests
|
||||
pyyaml
|
||||
pycuda
|
||||
tensorrt
|
||||
tensorrt==10.11.0.33
|
||||
pynvml
|
||||
boto3
|
||||
loguru
|
||||
loguru
|
||||
pytest
|
||||
+27
-15
@@ -2,19 +2,30 @@ from setuptools import setup, Extension
|
||||
from Cython.Build import cythonize
|
||||
import numpy as np
|
||||
|
||||
# debug_args = {}
|
||||
# trace_line = False
|
||||
|
||||
debug_args = {
|
||||
'extra_compile_args': ['-O0', '-g'],
|
||||
'extra_link_args': ['-g'],
|
||||
'define_macros': [('CYTHON_TRACE_NOGIL', '1')]
|
||||
}
|
||||
trace_line = True
|
||||
|
||||
extensions = [
|
||||
Extension('constants_inf', ['constants_inf.pyx']),
|
||||
Extension('file_data', ['file_data.pyx']),
|
||||
Extension('remote_command_inf', ['remote_command_inf.pyx']),
|
||||
Extension('remote_command_handler_inf', ['remote_command_handler_inf.pyx']),
|
||||
Extension('annotation', ['annotation.pyx']),
|
||||
Extension('loader_client', ['loader_client.pyx']),
|
||||
Extension('ai_config', ['ai_config.pyx']),
|
||||
Extension('tensorrt_engine', ['tensorrt_engine.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('onnx_engine', ['onnx_engine.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('inference', ['inference.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('main_inference', ['main_inference.pyx']),
|
||||
Extension('constants_inf', ['constants_inf.pyx'], **debug_args),
|
||||
Extension('file_data', ['file_data.pyx'], **debug_args),
|
||||
Extension('remote_command_inf', ['remote_command_inf.pyx'], **debug_args),
|
||||
Extension('remote_command_handler_inf', ['remote_command_handler_inf.pyx'], **debug_args),
|
||||
Extension('annotation', ['annotation.pyx'], **debug_args),
|
||||
Extension('loader_client', ['loader_client.pyx'], **debug_args),
|
||||
Extension('ai_config', ['ai_config.pyx'], **debug_args),
|
||||
Extension('tensorrt_engine', ['tensorrt_engine.pyx'], include_dirs=[np.get_include()], **debug_args),
|
||||
Extension('onnx_engine', ['onnx_engine.pyx'], include_dirs=[np.get_include()], **debug_args),
|
||||
Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()], **debug_args),
|
||||
Extension('inference', ['inference.pyx'], include_dirs=[np.get_include()], **debug_args),
|
||||
Extension('main_inference', ['main_inference.pyx'], **debug_args),
|
||||
|
||||
]
|
||||
|
||||
setup(
|
||||
@@ -23,10 +34,11 @@ setup(
|
||||
extensions,
|
||||
compiler_directives={
|
||||
"language_level": 3,
|
||||
"emit_code_comments" : False,
|
||||
"emit_code_comments": False,
|
||||
"binding": True,
|
||||
'boundscheck': False,
|
||||
'wraparound': False
|
||||
'wraparound': False,
|
||||
'linetrace': trace_line
|
||||
}
|
||||
),
|
||||
install_requires=[
|
||||
@@ -34,4 +46,4 @@ setup(
|
||||
'pywin32; platform_system=="Windows"'
|
||||
],
|
||||
zip_safe=False
|
||||
)
|
||||
)
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
from setuptools import setup, Extension
|
||||
from Cython.Build import cythonize
|
||||
import numpy as np
|
||||
|
||||
extensions = [
|
||||
Extension('constants_inf', ['constants_inf.pyx']),
|
||||
Extension('file_data', ['file_data.pyx']),
|
||||
Extension('remote_command_inf', ['remote_command_inf.pyx']),
|
||||
Extension('remote_command_handler_inf', ['remote_command_handler_inf.pyx']),
|
||||
Extension('annotation', ['annotation.pyx']),
|
||||
Extension('loader_client', ['loader_client.pyx']),
|
||||
Extension('ai_config', ['ai_config.pyx']),
|
||||
Extension('tensorrt_engine', ['tensorrt_engine.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('onnx_engine', ['onnx_engine.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('inference', ['inference.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('main_inference', ['main_inference.pyx'])
|
||||
]
|
||||
|
||||
setup(
|
||||
name="azaion.ai",
|
||||
ext_modules=cythonize(
|
||||
extensions,
|
||||
compiler_directives={
|
||||
"language_level": 3,
|
||||
"emit_code_comments" : False,
|
||||
"binding": True,
|
||||
'boundscheck': False,
|
||||
'wraparound': False
|
||||
}
|
||||
),
|
||||
install_requires=[
|
||||
'ultralytics>=8.0.0',
|
||||
'pywin32; platform_system=="Windows"'
|
||||
],
|
||||
zip_safe=False
|
||||
)
|
||||
@@ -0,0 +1,8 @@
|
||||
import inference
|
||||
from ai_config import AIRecognitionConfig
|
||||
from remote_command_inf import RemoteCommand
|
||||
|
||||
|
||||
def test_process_images():
|
||||
inf = inference.Inference(None, None)
|
||||
inf._process_images(RemoteCommand(30), AIRecognitionConfig(4, 2, 15, 0.15, 15, 0.8, 20, b'test', [], 4), ['test_img01.JPG', 'test_img02.jpg'])
|
||||
Reference in New Issue
Block a user