add MediaFile

This commit is contained in:
Oleksandr Bezdieniezhnykh
2025-11-17 07:46:11 +02:00
parent fd95d2ba2c
commit 47de31307b
+51 -56
View File
@@ -258,13 +258,13 @@ cdef class Inference:
cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name): cdef _process_video(self, RemoteCommand cmd, AIRecognitionConfig ai_config, str video_name):
cdef int frame_count = 0 cdef int frame_count = 0
cdef list batch_frames = [] cdef list frame_data = []
cdef list[int] batch_timestamps = []
cdef Annotation annotation
self._previous_annotation = None self._previous_annotation = None
self._tile_detections = {}
original_media_name = Path(<str>video_name).stem.replace(" ", "")
v_input = cv2.VideoCapture(<str>video_name) v_input = cv2.VideoCapture(<str>video_name)
while v_input.isOpened() and not self.stop_signal: while v_input.isOpened() and not self.stop_signal:
ret, frame = v_input.read() ret, frame = v_input.read()
if not ret or frame is None: if not ret or frame is None:
@@ -272,30 +272,17 @@ cdef class Inference:
frame_count += 1 frame_count += 1
if frame_count % ai_config.frame_period_recognition == 0: if frame_count % ai_config.frame_period_recognition == 0:
batch_frames.append(frame) timestamp = int(v_input.get(cv2.CAP_PROP_POS_MSEC))
batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC))) tiles, ground_sampling_distance = self.prepare_frame_or_tiles(frame, original_media_name, timestamp, ai_config)
frame_data.extend(tiles)
if len(batch_frames) == self.engine.get_batch_size(): if len(frame_data) >= self.engine.get_batch_size():
input_blob = self.preprocess(batch_frames) self.process_frame_data_batches(cmd, ai_config, frame_data, ground_sampling_distance, True)
frame_data.clear()
outputs = self.engine.run(input_blob) if frame_data:
self.process_frame_data_batches(cmd, ai_config, frame_data, ground_sampling_distance, True)
list_detections = self.postprocess(outputs, ai_config)
for i in range(len(list_detections)):
detections = list_detections[i]
original_media_name = Path(<str>video_name).stem.replace(" ", "")
name = f'{original_media_name}_{constants_inf.format_time(batch_timestamps[i])}'
annotation = Annotation(name, original_media_name, batch_timestamps[i], detections)
if self.is_valid_video_annotation(annotation, ai_config):
_, image = cv2.imencode('.jpg', batch_frames[i])
annotation.image = image.tobytes()
self._previous_annotation = annotation
self.on_annotation(cmd, annotation)
batch_frames.clear()
batch_timestamps.clear()
v_input.release() v_input.release()
self.send_detection_status(cmd.client_id) self.send_detection_status(cmd.client_id)
@@ -310,29 +297,20 @@ cdef class Inference:
for path in image_paths: for path in image_paths:
frame_data = [] frame_data = []
frame = cv2.imread(<str>path) frame = cv2.imread(<str>path)
img_h, img_w, _ = frame.shape
if frame is None: if frame is None:
constants_inf.logerror(<str>f'Failed to read image {path}') constants_inf.logerror(<str>f'Failed to read image {path}')
continue continue
original_media_name = Path(<str> path).stem.replace(" ", "") original_media_name = Path(<str> path).stem.replace(" ", "")
ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w) tiles, ground_sampling_distance = self.prepare_frame_or_tiles(frame, original_media_name, 0, ai_config)
constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}') constants_inf.log(<str>f'ground sampling distance: {ground_sampling_distance}')
frame_data.extend(tiles)
if img_h <= 1.5 * self.model_height and img_w <= 1.5 * self.model_width:
frame_data.append((frame, original_media_name, f'{original_media_name}_000000'))
else:
tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
constants_inf.log(<str> f'calc tile size: {tile_size}')
res = self.split_to_tiles(frame, path, tile_size, ai_config.big_image_tile_overlap_percent)
frame_data.extend(res)
if len(frame_data) > self.engine.get_batch_size(): if len(frame_data) > self.engine.get_batch_size():
for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()): self.process_frame_data_batches(cmd, ai_config, frame_data, ground_sampling_distance, False)
self._process_images_inner(cmd, ai_config, chunk, ground_sampling_distance)
self.send_detection_status(cmd.client_id) self.send_detection_status(cmd.client_id)
for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()): self.process_frame_data_batches(cmd, ai_config, frame_data, ground_sampling_distance, False)
self._process_images_inner(cmd, ai_config, chunk, ground_sampling_distance)
self.send_detection_status(cmd.client_id) self.send_detection_status(cmd.client_id)
cdef send_detection_status(self, client_id): cdef send_detection_status(self, client_id):
@@ -347,47 +325,67 @@ cdef class Inference:
pass pass
self.detection_counts.clear() self.detection_counts.clear()
cdef split_to_tiles(self, frame, path, tile_size, overlap_percent): cdef prepare_frame_or_tiles(self, frame, str original_media_name, int timestamp, AIRecognitionConfig ai_config):
constants_inf.log(<str>f'splitting image {path} to tiles...') img_h, img_w, _ = frame.shape
ground_sampling_distance = ai_config.sensor_width * ai_config.altitude / (ai_config.focal_length * img_w)
if img_h <= 1.5 * self.model_height and img_w <= 1.5 * self.model_width:
name = f'{original_media_name}_{constants_inf.format_time(timestamp)}'
return [(frame, original_media_name, name, timestamp)], ground_sampling_distance
else:
tile_size = int(constants_inf.METERS_IN_TILE / ground_sampling_distance)
tiles = self.split_to_tiles(frame, original_media_name, timestamp, tile_size, ai_config.big_image_tile_overlap_percent)
return tiles, ground_sampling_distance
cdef process_frame_data_batches(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data, double ground_sampling_distance, bint is_video):
for chunk in self.split_list_extend(frame_data, self.engine.get_batch_size()):
self._process_batch(cmd, ai_config, chunk, ground_sampling_distance, is_video)
cdef split_to_tiles(self, frame, str original_media_name, int timestamp, tile_size, overlap_percent):
img_h, img_w, _ = frame.shape img_h, img_w, _ = frame.shape
stride_w = int(tile_size * (1 - overlap_percent / 100)) stride_w = int(tile_size * (1 - overlap_percent / 100))
stride_h = int(tile_size * (1 - overlap_percent / 100)) stride_h = int(tile_size * (1 - overlap_percent / 100))
results = [] results = []
original_media_name = Path(<str> path).stem.replace(" ", "")
for y in range(0, img_h, stride_h): for y in range(0, img_h, stride_h):
for x in range(0, img_w, stride_w): for x in range(0, img_w, stride_w):
x_end = min(x + tile_size, img_w) x_end = min(x + tile_size, img_w)
y_end = min(y + tile_size, img_h) y_end = min(y + tile_size, img_h)
# correct x,y for the close-to-border tiles
if x_end - x < tile_size: if x_end - x < tile_size:
if img_w - (x - stride_w) <= tile_size: if img_w - (x - stride_w) <= tile_size:
continue # the previous tile already covered the last gap continue
x = img_w - tile_size x = img_w - tile_size
if y_end - y < tile_size: if y_end - y < tile_size:
if img_h - (y - stride_h) <= tile_size: if img_h - (y - stride_h) <= tile_size:
continue # the previous tile already covered the last gap continue
y = img_h - tile_size y = img_h - tile_size
tile = frame[y:y_end, x:x_end] tile = frame[y:y_end, x:x_end]
name = f'{original_media_name}{constants_inf.SPLIT_SUFFIX}{tile_size:04d}_{x:04d}_{y:04d}!_000000' name = f'{original_media_name}{constants_inf.SPLIT_SUFFIX}{tile_size:04d}_{x:04d}_{y:04d}!_{constants_inf.format_time(timestamp)}'
results.append((tile, original_media_name, name)) results.append((tile, original_media_name, name, timestamp))
return results return results
cdef _process_images_inner(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data, double ground_sampling_distance): cdef _process_batch(self, RemoteCommand cmd, AIRecognitionConfig ai_config, list frame_data, double ground_sampling_distance, bint is_video):
cdef list frames, original_media_names, names cdef list frames, original_media_names, names, timestamps
cdef Annotation annotation cdef Annotation annotation
cdef int i cdef int i
frames, original_media_names, names = map(list, zip(*frame_data))
frames, original_media_names, names, timestamps = map(list, zip(*frame_data))
input_blob = self.preprocess(frames) input_blob = self.preprocess(frames)
outputs = self.engine.run(input_blob) outputs = self.engine.run(input_blob)
list_detections = self.postprocess(outputs, ai_config) list_detections = self.postprocess(outputs, ai_config)
for i in range(len(list_detections)): for i in range(len(list_detections)):
annotation = Annotation(names[i], original_media_names[i], 0, list_detections[i]) annotation = Annotation(names[i], original_media_names[i], timestamps[i], list_detections[i])
if self.is_valid_image_annotation(annotation, ground_sampling_distance, frames[i].shape):
if self.check_valid_sizes(annotation, ground_sampling_distance, frames[i].shape):
if is_video:
if not self.is_valid_video_annotation(annotation, ai_config):
continue
self._previous_annotation = annotation
constants_inf.log(<str> f'Detected {annotation}') constants_inf.log(<str> f'Detected {annotation}')
_, image = cv2.imencode('.jpg', frames[i]) _, image = cv2.imencode('.jpg', frames[i])
annotation.image = image.tobytes() annotation.image = image.tobytes()
@@ -423,7 +421,7 @@ cdef class Inference:
annotation.detections = unique_detections annotation.detections = unique_detections
cdef bint is_valid_image_annotation(self, Annotation annotation, double ground_sampling_distance, frame_shape): cdef bint check_valid_sizes(self, Annotation annotation, double ground_sampling_distance, frame_shape):
if constants_inf.SPLIT_SUFFIX in annotation.name: if constants_inf.SPLIT_SUFFIX in annotation.name:
self.remove_tiled_duplicates(annotation) self.remove_tiled_duplicates(annotation)
img_h, img_w, _ = frame_shape img_h, img_w, _ = frame_shape
@@ -442,7 +440,6 @@ cdef class Inference:
else: else:
constants_inf.log(<str> f'Removed ({m_w} {m_h}) > {max_size}. class: {constants_inf.annotations_dict[det.cls].name}') constants_inf.log(<str> f'Removed ({m_w} {m_h}) > {max_size}. class: {constants_inf.annotations_dict[det.cls].name}')
# Replace the old list with the new, filtered one
annotation.detections = valid_detections annotation.detections = valid_detections
if not annotation.detections: if not annotation.detections:
@@ -450,8 +447,6 @@ cdef class Inference:
return True return True
cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config): cdef bint is_valid_video_annotation(self, Annotation annotation, AIRecognitionConfig ai_config):
if constants_inf.SPLIT_SUFFIX in annotation.name:
self.remove_tiled_duplicates(annotation)
if not annotation.detections: if not annotation.detections:
return False return False