mirror of
https://github.com/azaion/annotations.git
synced 2026-04-22 12:56:30 +00:00
splitting python complete
This commit is contained in:
@@ -12,5 +12,4 @@ cdef class Annotation:
|
||||
cdef public list[Detection] detections
|
||||
cdef public bytes image
|
||||
|
||||
cdef format_time(self, ms)
|
||||
cdef bytes serialize(self)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import msgpack
|
||||
from pathlib import Path
|
||||
cimport constants_inf
|
||||
|
||||
cdef class Detection:
|
||||
def __init__(self, double x, double y, double w, double h, int cls, double confidence):
|
||||
@@ -14,6 +14,17 @@ cdef class Detection:
|
||||
def __str__(self):
|
||||
return f'{self.cls}: {self.x:.2f} {self.y:.2f} {self.w:.2f} {self.h:.2f}, prob: {(self.confidence*100):.1f}%'
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Detection):
|
||||
return False
|
||||
|
||||
if max(abs(self.x - other.x),
|
||||
abs(self.y - other.y),
|
||||
abs(self.w - other.w),
|
||||
abs(self.h - other.h)) > constants_inf.TILE_DUPLICATE_CONFIDENCE_THRESHOLD:
|
||||
return False
|
||||
return True
|
||||
|
||||
cdef overlaps(self, Detection det2, float confidence_threshold):
|
||||
cdef double overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x)
|
||||
cdef double overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y)
|
||||
@@ -23,9 +34,9 @@ cdef class Detection:
|
||||
return overlap_area / min_area > confidence_threshold
|
||||
|
||||
cdef class Annotation:
|
||||
def __init__(self, str name, long ms, list[Detection] detections):
|
||||
self.original_media_name = Path(<str>name).stem.replace(" ", "")
|
||||
self.name = f'{self.original_media_name}_{self.format_time(ms)}'
|
||||
def __init__(self, str name, str original_media_name, long ms, list[Detection] detections):
|
||||
self.name = name
|
||||
self.original_media_name = original_media_name
|
||||
self.time = ms
|
||||
self.detections = detections if detections is not None else []
|
||||
for d in self.detections:
|
||||
@@ -42,17 +53,6 @@ cdef class Annotation:
|
||||
)
|
||||
return f"{self.name}: {detections_str}"
|
||||
|
||||
cdef format_time(self, ms):
|
||||
# Calculate hours, minutes, seconds, and hundreds of milliseconds.
|
||||
h = ms // 3600000 # Total full hours.
|
||||
ms_remaining = ms % 3600000
|
||||
m = ms_remaining // 60000 # Full minutes.
|
||||
ms_remaining %= 60000
|
||||
s = ms_remaining // 1000 # Full seconds.
|
||||
f = (ms_remaining % 1000) // 100 # Hundreds of milliseconds.
|
||||
h = h % 10
|
||||
return f"{h}{m:02}{s:02}{f}"
|
||||
|
||||
cdef bytes serialize(self):
|
||||
return msgpack.packb({
|
||||
"n": self.name,
|
||||
|
||||
@@ -13,5 +13,9 @@ cdef str MODELS_FOLDER
|
||||
|
||||
cdef int SMALL_SIZE_KB
|
||||
|
||||
cdef str SPLIT_SUFFIX
|
||||
cdef int TILE_DUPLICATE_CONFIDENCE_THRESHOLD
|
||||
|
||||
cdef log(str log_message)
|
||||
cdef logerror(str error)
|
||||
cdef logerror(str error)
|
||||
cdef format_time(int ms)
|
||||
@@ -12,6 +12,9 @@ cdef str MODELS_FOLDER = "models"
|
||||
|
||||
cdef int SMALL_SIZE_KB = 3
|
||||
|
||||
cdef str SPLIT_SUFFIX = "!split!"
|
||||
cdef int TILE_DUPLICATE_CONFIDENCE_THRESHOLD = 5
|
||||
|
||||
logger.remove()
|
||||
log_format = "[{time:HH:mm:ss} {level}] {message}"
|
||||
logger.add(
|
||||
@@ -40,4 +43,15 @@ cdef log(str log_message):
|
||||
logger.info(log_message)
|
||||
|
||||
cdef logerror(str error):
|
||||
logger.error(error)
|
||||
logger.error(error)
|
||||
|
||||
cdef format_time(int ms):
|
||||
# Calculate hours, minutes, seconds, and hundreds of milliseconds.
|
||||
h = ms // 3600000 # Total full hours.
|
||||
ms_remaining = ms % 3600000
|
||||
m = ms_remaining // 60000 # Full minutes.
|
||||
ms_remaining %= 60000
|
||||
s = ms_remaining // 1000 # Full seconds.
|
||||
f = (ms_remaining % 1000) // 100 # Hundreds of milliseconds.
|
||||
h = h % 10
|
||||
return f"{h}{m:02}{s:02}{f}"
|
||||
|
||||
@@ -9,23 +9,26 @@ cdef class Inference:
|
||||
cdef InferenceEngine engine
|
||||
cdef object on_annotation
|
||||
cdef Annotation _previous_annotation
|
||||
cdef dict[str, list(Detection)] _tile_detections
|
||||
cdef AIRecognitionConfig ai_config
|
||||
cdef bint stop_signal
|
||||
|
||||
cdef str model_input
|
||||
cdef int model_width
|
||||
cdef int model_height
|
||||
cdef int tile_width
|
||||
cdef int tile_height
|
||||
|
||||
cdef build_tensor_engine(self, object updater_callback)
|
||||
cdef init_ai(self)
|
||||
cpdef init_ai(self)
|
||||
cdef bint is_building_engine
|
||||
cdef bint is_video(self, str filepath)
|
||||
|
||||
cdef run_inference(self, RemoteCommand cmd)
|
||||
cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name)
|
||||
cpdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
|
||||
cpdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data)
|
||||
cpdef split_to_tiles(self, frame, path, img_w, img_h, overlap_percent)
|
||||
cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths)
|
||||
cdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data)
|
||||
cpdef split_to_tiles(self, frame, path, overlap_percent)
|
||||
cdef stop(self)
|
||||
|
||||
cdef preprocess(self, frames)
|
||||
@@ -33,4 +36,6 @@ cdef class Inference:
|
||||
cdef postprocess(self, output, ai_config)
|
||||
cdef split_list_extend(self, lst, chunk_size)
|
||||
|
||||
cdef bint is_valid_annotation(self, Annotation annotation, AIRecognitionConfig ai_config)
|
||||
cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config)
|
||||
cdef bint is_valid_image_annotation(self, Annotation annotation)
|
||||
cdef remove_tiled_duplicates(self, Annotation annotation)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import mimetypes
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
cimport constants_inf
|
||||
@@ -54,6 +56,8 @@ cdef class Inference:
|
||||
self.model_input = None
|
||||
self.model_width = 0
|
||||
self.model_height = 0
|
||||
self.tile_width = 0
|
||||
self.tile_height = 0
|
||||
self.engine = None
|
||||
self.is_building_engine = False
|
||||
|
||||
@@ -93,7 +97,7 @@ cdef class Inference:
|
||||
except Exception as e:
|
||||
updater_callback(f'Error. {str(e)}')
|
||||
|
||||
cdef init_ai(self):
|
||||
cpdef init_ai(self):
|
||||
if self.engine is not None:
|
||||
return
|
||||
|
||||
@@ -114,6 +118,8 @@ cdef class Inference:
|
||||
self.engine = OnnxEngine(res.data)
|
||||
|
||||
self.model_height, self.model_width = self.engine.get_input_shape()
|
||||
self.tile_width = self.model_width
|
||||
self.tile_height = self.model_height
|
||||
|
||||
cdef preprocess(self, frames):
|
||||
blobs = [cv2.dnn.blobFromImage(frame,
|
||||
@@ -211,11 +217,11 @@ cdef class Inference:
|
||||
images.append(m)
|
||||
# images first, it's faster
|
||||
if len(images) > 0:
|
||||
constants_inf.log(f'run inference on {" ".join(images)}...')
|
||||
constants_inf.log(<str>f'run inference on {" ".join(images)}...')
|
||||
self._process_images(cmd, ai_config, images)
|
||||
if len(videos) > 0:
|
||||
for v in videos:
|
||||
constants_inf.log(f'run inference on {v}...')
|
||||
constants_inf.log(<str>f'run inference on {v}...')
|
||||
self._process_video(cmd, ai_config, v)
|
||||
|
||||
|
||||
@@ -223,8 +229,10 @@ cdef class Inference:
|
||||
cdef int frame_count = 0
|
||||
cdef list batch_frames = []
|
||||
cdef list[int] batch_timestamps = []
|
||||
cdef Annotation annotation
|
||||
self._previous_annotation = None
|
||||
|
||||
|
||||
v_input = cv2.VideoCapture(<str>video_name)
|
||||
while v_input.isOpened() and not self.stop_signal:
|
||||
ret, frame = v_input.read()
|
||||
@@ -244,8 +252,12 @@ cdef class Inference:
|
||||
list_detections = self.postprocess(outputs, ai_config)
|
||||
for i in range(len(list_detections)):
|
||||
detections = list_detections[i]
|
||||
annotation = Annotation(video_name, batch_timestamps[i], detections)
|
||||
if self.is_valid_annotation(annotation, ai_config):
|
||||
|
||||
original_media_name = Path(<str>video_name).stem.replace(" ", "")
|
||||
name = f'{original_media_name}_{constants_inf.format_time(batch_timestamps[i])}'
|
||||
annotation = Annotation(name, original_media_name, batch_timestamps[i], detections)
|
||||
|
||||
if self.is_valid_video_annotation(annotation, ai_config):
|
||||
_, image = cv2.imencode('.jpg', batch_frames[i])
|
||||
annotation.image = image.tobytes()
|
||||
self._previous_annotation = annotation
|
||||
@@ -256,71 +268,104 @@ cdef class Inference:
|
||||
v_input.release()
|
||||
|
||||
|
||||
cpdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths):
|
||||
cdef list frame_data = []
|
||||
cdef _process_images(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list[str] image_paths):
|
||||
cdef list frame_data
|
||||
self._tile_detections = {}
|
||||
for path in image_paths:
|
||||
frame_data = []
|
||||
frame = cv2.imread(<str>path)
|
||||
img_h, img_w, _ = frame.shape
|
||||
if frame is None:
|
||||
constants_inf.logerror(<str>f'Failed to read image {path}')
|
||||
continue
|
||||
img_h, img_w, _ = frame.shape
|
||||
original_media_name = Path(<str> path).stem.replace(" ", "")
|
||||
if img_h <= 1.5 * self.model_height and img_w <= 1.5 * self.model_width:
|
||||
frame_data.append((frame, path))
|
||||
frame_data.append((frame, original_media_name, f'{original_media_name}_000000'))
|
||||
else:
|
||||
(split_frames, split_pats) = self.split_to_tiles(frame, path, img_w, img_h, ai_config.big_image_tile_overlap_percent)
|
||||
frame_data.extend(zip(split_frames, split_pats))
|
||||
res = self.split_to_tiles(frame, path, ai_config.big_image_tile_overlap_percent)
|
||||
frame_data.extend(res)
|
||||
if len(frame_data) > self.engine.get_batch_size():
|
||||
for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()):
|
||||
self._process_images_inner(cmd, ai_config, chunk)
|
||||
|
||||
for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()):
|
||||
self._process_images_inner(cmd, ai_config, chunk)
|
||||
|
||||
|
||||
cpdef split_to_tiles(self, frame, path, img_w, img_h, overlap_percent):
|
||||
stride_w = self.model_width * (1 - overlap_percent / 100)
|
||||
stride_h = self.model_height * (1 - overlap_percent / 100)
|
||||
n_tiles_x = int(np.ceil((img_w - self.model_width) / stride_w)) + 1
|
||||
n_tiles_y = int(np.ceil((img_h - self.model_height) / stride_h)) + 1
|
||||
cpdef split_to_tiles(self, frame, path, overlap_percent):
|
||||
constants_inf.log(<str>f'splitting image {path} to tiles...')
|
||||
img_h, img_w, _ = frame.shape
|
||||
stride_w = int(self.tile_width * (1 - overlap_percent / 100))
|
||||
stride_h = int(self.tile_height * (1 - overlap_percent / 100))
|
||||
|
||||
results = []
|
||||
for y_idx in range(n_tiles_y):
|
||||
for x_idx in range(n_tiles_x):
|
||||
y_start = y_idx * stride_w
|
||||
x_start = x_idx * stride_h
|
||||
original_media_name = Path(<str> path).stem.replace(" ", "")
|
||||
for y in range(0, img_h, stride_h):
|
||||
for x in range(0, img_w, stride_w):
|
||||
x_end = min(x + self.tile_width, img_w)
|
||||
y_end = min(y + self.tile_height, img_h)
|
||||
|
||||
# Ensure the tile doesn't go out of bounds
|
||||
y_end = min(y_start + self.model_width, img_h)
|
||||
x_end = min(x_start + self.model_height, img_w)
|
||||
# correct x,y for the close-to-border tiles
|
||||
if x_end - x < self.tile_width:
|
||||
if img_w - (x - stride_w) <= self.tile_width:
|
||||
continue # the previous tile already covered the last gap
|
||||
x = img_w - self.tile_width
|
||||
if y_end - y < self.tile_height:
|
||||
if img_h - (y - stride_h) <= self.tile_height:
|
||||
continue # the previous tile already covered the last gap
|
||||
y = img_h - self.tile_height
|
||||
|
||||
# We need to re-calculate start if we are at the edge to get a full 1280x1280 tile
|
||||
if y_end == img_h:
|
||||
y_start = img_h - self.model_height
|
||||
if x_end == img_w:
|
||||
x_start = img_w - self.model_width
|
||||
|
||||
tile = frame[y_start:y_end, x_start:x_end]
|
||||
name = path.stem + f'.tile_{x_start}_{y_start}' + path.suffix
|
||||
results.append((tile, name))
|
||||
tile = frame[y:y_end, x:x_end]
|
||||
name = f'{original_media_name}{constants_inf.SPLIT_SUFFIX}{x:04d}_{y:04d}!_000000'
|
||||
results.append((tile, original_media_name, name))
|
||||
return results
|
||||
|
||||
cpdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data):
|
||||
frames = [frame for frame, _ in frame_data]
|
||||
cdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data):
|
||||
cdef list frames, original_media_names, names
|
||||
cdef Annotation annotation
|
||||
frames, original_media_names, names = map(list, zip(*frame_data))
|
||||
input_blob = self.preprocess(frames)
|
||||
|
||||
outputs = self.engine.run(input_blob)
|
||||
|
||||
list_detections = self.postprocess(outputs, ai_config)
|
||||
for i in range(len(list_detections)):
|
||||
detections = list_detections[i]
|
||||
annotation = Annotation(frame_data[i][1], 0, detections)
|
||||
_, image = cv2.imencode('.jpg', frames[i])
|
||||
annotation.image = image.tobytes()
|
||||
self.on_annotation(cmd, annotation)
|
||||
annotation = Annotation(names[i], original_media_names[i], 0, list_detections[i])
|
||||
if self.is_valid_image_annotation(annotation):
|
||||
_, image = cv2.imencode('.jpg', frames[i])
|
||||
annotation.image = image.tobytes()
|
||||
self.on_annotation(cmd, annotation)
|
||||
|
||||
|
||||
cdef stop(self):
|
||||
self.stop_signal = True
|
||||
|
||||
cdef bint is_valid_annotation(self, Annotation annotation, AIRecognitionConfig ai_config):
|
||||
# No detections, invalid
|
||||
cdef remove_tiled_duplicates(self, Annotation annotation):
|
||||
right = annotation.name.rindex('!')
|
||||
left = annotation.name.index(constants_inf.SPLIT_SUFFIX) + len(constants_inf.SPLIT_SUFFIX)
|
||||
x_str, y_str = annotation.name[left:right].split('_')
|
||||
x = int(x_str)
|
||||
y = int(y_str)
|
||||
|
||||
for det in annotation.detections:
|
||||
x1 = det.x * self.tile_width
|
||||
y1 = det.y * self.tile_height
|
||||
det_abs = Detection(x + x1, y + y1, det.w * self.tile_width, det.h * self.tile_height, det.cls, det.confidence)
|
||||
detections = self._tile_detections.setdefault(annotation.original_media_name, [])
|
||||
if det_abs in detections:
|
||||
annotation.detections.remove(det)
|
||||
else:
|
||||
detections.append(det_abs)
|
||||
|
||||
cdef bint is_valid_image_annotation(self, Annotation annotation):
|
||||
if constants_inf.SPLIT_SUFFIX in annotation.name:
|
||||
self.remove_tiled_duplicates(annotation)
|
||||
if not annotation.detections:
|
||||
return False
|
||||
return True
|
||||
|
||||
cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config):
|
||||
if constants_inf.SPLIT_SUFFIX in annotation.name:
|
||||
self.remove_tiled_duplicates(annotation)
|
||||
if not annotation.detections:
|
||||
return False
|
||||
|
||||
|
||||
@@ -2,15 +2,15 @@ from setuptools import setup, Extension
|
||||
from Cython.Build import cythonize
|
||||
import numpy as np
|
||||
|
||||
# debug_args = {}
|
||||
# trace_line = False
|
||||
debug_args = {}
|
||||
trace_line = False
|
||||
|
||||
debug_args = {
|
||||
'extra_compile_args': ['-O0', '-g'],
|
||||
'extra_link_args': ['-g'],
|
||||
'define_macros': [('CYTHON_TRACE_NOGIL', '1')]
|
||||
}
|
||||
trace_line = True
|
||||
# debug_args = {
|
||||
# 'extra_compile_args': ['-O0', '-g'],
|
||||
# 'extra_link_args': ['-g'],
|
||||
# 'define_macros': [('CYTHON_TRACE_NOGIL', '1')]
|
||||
# }
|
||||
# trace_line = True
|
||||
|
||||
extensions = [
|
||||
Extension('constants_inf', ['constants_inf.pyx'], **debug_args),
|
||||
|
||||
@@ -1,8 +1,30 @@
|
||||
import inference
|
||||
from ai_config import AIRecognitionConfig
|
||||
from remote_command_inf import RemoteCommand
|
||||
from unittest.mock import Mock
|
||||
import numpy as np
|
||||
|
||||
from loader_client import LoaderClient
|
||||
|
||||
|
||||
def test_process_images():
|
||||
inf = inference.Inference(None, None)
|
||||
inf._process_images(RemoteCommand(30), AIRecognitionConfig(4, 2, 15, 0.15, 15, 0.8, 20, b'test', [], 4), ['test_img01.JPG', 'test_img02.jpg'])
|
||||
def test_split_to_tiles():
|
||||
loader_client = LoaderClient("test", 0)
|
||||
ai_config = AIRecognitionConfig(
|
||||
frame_period_recognition=4,
|
||||
frame_recognition_seconds=2,
|
||||
probability_threshold=0.2,
|
||||
|
||||
tracking_distance_confidence=0.15,
|
||||
tracking_probability_increase=0.15,
|
||||
tracking_intersection_threshold=0.6,
|
||||
big_image_tile_overlap_percent=20,
|
||||
|
||||
file_data=None,
|
||||
paths=[],
|
||||
model_batch_size=4
|
||||
)
|
||||
inf = inference.Inference(loader_client, ai_config)
|
||||
test_frame = np.zeros((6336, 8448, 3), dtype=np.uint8)
|
||||
|
||||
inf.init_ai()
|
||||
inf.split_to_tiles(test_frame, 'test_image.jpg', ai_config.big_image_tile_overlap_percent)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user