mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-22 09:06:35 +00:00
add export to FP16
add inference with possibility to have different
This commit is contained in:
+5
-14
@@ -31,22 +31,13 @@ def export_onnx(model_path):
|
|||||||
return Path(model_path).stem + '.onnx'
|
return Path(model_path).stem + '.onnx'
|
||||||
|
|
||||||
|
|
||||||
def export_tensorrt(model_path, dataset_yaml):
|
def export_tensorrt(model_path):
|
||||||
form_data_sample(path.join(path.dirname(dataset_yaml), 'minival', 'images'))
|
YOLO(model_path).export(
|
||||||
model = YOLO(model_path)
|
|
||||||
with open(dataset_yaml, 'r') as file:
|
|
||||||
yaml_data = yaml.safe_load(file) or {}
|
|
||||||
yaml_data['minival'] = 'minival/images'
|
|
||||||
with open(dataset_yaml, 'w') as file:
|
|
||||||
yaml.dump(yaml_data, file)
|
|
||||||
|
|
||||||
model.export(
|
|
||||||
format='engine',
|
format='engine',
|
||||||
batch=4,
|
batch=4,
|
||||||
half=True,
|
half=True,
|
||||||
nms=True,
|
simplify=True,
|
||||||
data=dataset_yaml,
|
nms=True
|
||||||
split='minival'
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def form_data_sample(destination_path, size=500, write_txt_log=False):
|
def form_data_sample(destination_path, size=500, write_txt_log=False):
|
||||||
@@ -76,6 +67,6 @@ def show_model(model: str = None):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
export_tensorrt('azaion-2025-03-10.pt', path.join(datasets_dir, 'azaion-2025-03-10', 'data.yaml'))
|
export_tensorrt('azaion-2025-03-10.pt')
|
||||||
# export_rknn('azaion-2025-03-10.pt')
|
# export_rknn('azaion-2025-03-10.pt')
|
||||||
# export_onnx('azaion-2025-03-10.pt')
|
# export_onnx('azaion-2025-03-10.pt')
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
import json
|
||||||
|
from enum import Enum
|
||||||
|
from os.path import join, dirname
|
||||||
|
|
||||||
|
|
||||||
|
class Detection:
|
||||||
|
def __init__(self, x, y, w, h, cls, confidence):
|
||||||
|
self.x = x
|
||||||
|
self.y = y
|
||||||
|
self.w = w
|
||||||
|
self.h = h
|
||||||
|
self.cls = cls
|
||||||
|
self.confidence = confidence
|
||||||
|
|
||||||
|
def overlaps(self, det2, iou_threshold):
|
||||||
|
overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x)
|
||||||
|
overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y)
|
||||||
|
intersection = max(0, overlap_x) * max(0, overlap_y)
|
||||||
|
union = self.w * self.h + det2.w * det2.h - intersection
|
||||||
|
|
||||||
|
return intersection / union > iou_threshold
|
||||||
|
|
||||||
|
|
||||||
|
class Annotation:
|
||||||
|
def __init__(self, frame, time, detections: list[Detection]):
|
||||||
|
self.frame = frame
|
||||||
|
self.time = time
|
||||||
|
self.detections = detections if detections is not None else []
|
||||||
|
|
||||||
|
|
||||||
|
class WeatherMode(Enum):
|
||||||
|
Norm = 0
|
||||||
|
Wint = 20
|
||||||
|
Night = 40
|
||||||
|
|
||||||
|
class AnnotationClass:
|
||||||
|
def __init__(self, id, name, color):
|
||||||
|
self.id = id
|
||||||
|
self.name = name
|
||||||
|
self.color = color
|
||||||
|
color_str = color.lstrip('#')
|
||||||
|
self.opencv_color = (int(color_str[4:6], 16), int(color_str[2:4], 16), int(color_str[0:2], 16))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def read_json():
|
||||||
|
classes_path = join(dirname(dirname(__file__)), 'classes.json')
|
||||||
|
with open(classes_path, 'r', encoding='utf-8') as f:
|
||||||
|
j = json.loads(f.read())
|
||||||
|
annotations_dict = {}
|
||||||
|
for mode in WeatherMode:
|
||||||
|
for cl in j:
|
||||||
|
id = mode.value + cl['Id']
|
||||||
|
name = cl['Name'] if mode.value == 0 else f'{cl["Name"]}({mode.name})'
|
||||||
|
annotations_dict[id] = AnnotationClass(id, name, cl['Color'])
|
||||||
|
return annotations_dict
|
||||||
|
|
||||||
|
@property
|
||||||
|
def color_tuple(self):
|
||||||
|
color = self.color[3:]
|
||||||
|
lv = len(color)
|
||||||
|
xx = range(0, lv, lv // 3)
|
||||||
|
return tuple(int(color[i:i + lv // 3], 16) for i in xx)
|
||||||
@@ -0,0 +1,140 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from onnx_engine import InferenceEngine
|
||||||
|
from dto import AnnotationClass, Annotation, Detection
|
||||||
|
|
||||||
|
|
||||||
|
class Inference:
|
||||||
|
def __init__(self, engine: InferenceEngine, confidence_threshold, iou_threshold):
|
||||||
|
self.engine = engine
|
||||||
|
self.confidence_threshold = confidence_threshold
|
||||||
|
self.iou_threshold = iou_threshold
|
||||||
|
self.batch_size = engine.get_batch_size()
|
||||||
|
|
||||||
|
self.model_height, self.model_width = engine.get_input_shape()
|
||||||
|
self.classes = AnnotationClass.read_json()
|
||||||
|
|
||||||
|
def draw(self, annotation: Annotation):
|
||||||
|
img = annotation.frame
|
||||||
|
img_height, img_width = img.shape[:2]
|
||||||
|
for d in annotation.detections:
|
||||||
|
x1 = int(img_width * (d.x - d.w / 2))
|
||||||
|
y1 = int(img_height * (d.y - d.h / 2))
|
||||||
|
x2 = int(x1 + img_width * d.w)
|
||||||
|
y2 = int(y1 + img_height * d.h)
|
||||||
|
|
||||||
|
color = self.classes[d.cls].opencv_color
|
||||||
|
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
|
||||||
|
label = f"{self.classes[d.cls].name}: {d.confidence:.2f}"
|
||||||
|
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
||||||
|
|
||||||
|
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
|
||||||
|
|
||||||
|
cv2.rectangle(
|
||||||
|
img, (x1, label_y - label_height), (x1 + label_width, label_y + label_height), color, cv2.FILLED
|
||||||
|
)
|
||||||
|
cv2.putText(img, label, (x1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
|
||||||
|
cv2.imshow('Video', img)
|
||||||
|
|
||||||
|
def preprocess(self, frames):
|
||||||
|
blobs = [cv2.dnn.blobFromImage(frame,
|
||||||
|
scalefactor=1.0 / 255.0,
|
||||||
|
size=(self.model_width, self.model_height),
|
||||||
|
mean=(0, 0, 0),
|
||||||
|
swapRB=True,
|
||||||
|
crop=False)
|
||||||
|
for frame in frames]
|
||||||
|
return np.vstack(blobs)
|
||||||
|
|
||||||
|
def postprocess(self, batch_frames, batch_timestamps, output):
|
||||||
|
anns = []
|
||||||
|
for i in range(len(output[0])):
|
||||||
|
frame = batch_frames[i]
|
||||||
|
timestamp = batch_timestamps[i]
|
||||||
|
detections = []
|
||||||
|
for det in output[0][i]:
|
||||||
|
if det[4] == 0:
|
||||||
|
break
|
||||||
|
if det[4] < self.confidence_threshold:
|
||||||
|
continue
|
||||||
|
|
||||||
|
x1 = max(0, det[0] / self.model_width)
|
||||||
|
y1 = max(0, det[1] / self.model_height)
|
||||||
|
x2 = min(1, det[2] / self.model_width)
|
||||||
|
y2 = min(1, det[3] / self.model_height)
|
||||||
|
conf = round(det[4], 2)
|
||||||
|
class_id = int(det[5])
|
||||||
|
|
||||||
|
x = (x1 + x2) / 2
|
||||||
|
y = (y1 + y2) / 2
|
||||||
|
w = x2 - x1
|
||||||
|
h = y2 - y1
|
||||||
|
detections.append(Detection(x, y, w, h, class_id, conf))
|
||||||
|
|
||||||
|
filtered_detections = self.remove_overlapping_detections(detections)
|
||||||
|
|
||||||
|
# if len(filtered_detections) > 0:
|
||||||
|
# _, image = cv2.imencode('.jpg', frame)
|
||||||
|
# image_bytes = image.tobytes()
|
||||||
|
annotation = Annotation(frame, timestamp, filtered_detections)
|
||||||
|
anns.append(annotation)
|
||||||
|
return anns
|
||||||
|
|
||||||
|
def process(self, video):
|
||||||
|
frame_count = 0
|
||||||
|
batch_frames = []
|
||||||
|
batch_timestamps = []
|
||||||
|
v_input = cv2.VideoCapture(video)
|
||||||
|
while v_input.isOpened():
|
||||||
|
ret, frame = v_input.read()
|
||||||
|
if not ret or frame is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
frame_count += 1
|
||||||
|
if frame_count % 4 == 0:
|
||||||
|
batch_frames.append(frame)
|
||||||
|
batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC)))
|
||||||
|
|
||||||
|
if len(batch_frames) == self.batch_size:
|
||||||
|
input_blob = self.preprocess(batch_frames)
|
||||||
|
outputs = self.engine.run(input_blob)
|
||||||
|
annotations = self.postprocess(batch_frames, batch_timestamps, outputs)
|
||||||
|
for annotation in annotations:
|
||||||
|
self.draw(annotation)
|
||||||
|
print(f'video: {annotation.time / 1000:.3f}s')
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
batch_frames.clear()
|
||||||
|
batch_timestamps.clear()
|
||||||
|
|
||||||
|
if len(batch_frames) > 0:
|
||||||
|
input_blob = self.preprocess(batch_frames)
|
||||||
|
outputs = self.engine.run(input_blob)
|
||||||
|
annotations = self.postprocess(batch_frames, batch_timestamps, outputs)
|
||||||
|
for annotation in annotations:
|
||||||
|
self.draw(annotation)
|
||||||
|
print(f'video: {annotation.time / 1000:.3f}s')
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
|
||||||
|
def remove_overlapping_detections(self, detections):
|
||||||
|
filtered_output = []
|
||||||
|
filtered_out_indexes = []
|
||||||
|
|
||||||
|
for det1_index in range(len(detections)):
|
||||||
|
if det1_index in filtered_out_indexes:
|
||||||
|
continue
|
||||||
|
det1 = detections[det1_index]
|
||||||
|
res = det1_index
|
||||||
|
for det2_index in range(det1_index + 1, len(detections)):
|
||||||
|
det2 = detections[det2_index]
|
||||||
|
if det1.overlaps(det2, self.iou_threshold):
|
||||||
|
if det1.confidence > det2.confidence or (det1.confidence == det2.confidence and det1.cls < det2.cls):
|
||||||
|
filtered_out_indexes.append(det2_index)
|
||||||
|
else:
|
||||||
|
filtered_out_indexes.append(res)
|
||||||
|
res = det2_index
|
||||||
|
filtered_output.append(detections[res])
|
||||||
|
filtered_out_indexes.append(res)
|
||||||
|
return filtered_output
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
import abc
|
||||||
|
from typing import List, Tuple
|
||||||
|
import numpy as np
|
||||||
|
import onnxruntime as onnx
|
||||||
|
|
||||||
|
|
||||||
|
class InferenceEngine(abc.ABC):
|
||||||
|
@abc.abstractmethod
|
||||||
|
def __init__(self, model_path: str, batch_size: int = 1, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def get_input_shape(self) -> Tuple[int, int]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def get_batch_size(self) -> int:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def run(self, input_data: np.ndarray) -> List[np.ndarray]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class OnnxEngine(InferenceEngine):
|
||||||
|
def __init__(self, model_path: str, batch_size: int = 1, **kwargs):
|
||||||
|
self.model_path = model_path
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.session = onnx.InferenceSession(model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
|
||||||
|
self.model_inputs = self.session.get_inputs()
|
||||||
|
self.input_name = self.model_inputs[0].name
|
||||||
|
self.input_shape = self.model_inputs[0].shape
|
||||||
|
|
||||||
|
def get_input_shape(self) -> Tuple[int, int]:
|
||||||
|
shape = self.input_shape
|
||||||
|
return shape[2], shape[3]
|
||||||
|
|
||||||
|
def get_batch_size(self) -> int:
|
||||||
|
return self.batch_size
|
||||||
|
|
||||||
|
def run(self, input_data: np.ndarray) -> List[np.ndarray]:
|
||||||
|
return self.session.run(None, {self.input_name: input_data})
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
from onnx_engine import OnnxEngine
|
||||||
|
from tensorrt_engine import TensorRTEngine
|
||||||
|
from inference import Inference
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4),
|
||||||
|
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
|
||||||
|
# detection for the first 200sec of video:
|
||||||
|
# onnxInference: 81 sec, 6.3Gb VRAM
|
||||||
|
# tensorrt: 54 sec, 3.7Gb VRAM
|
||||||
|
|
||||||
|
# Inference(TensorRTEngine('azaion-2025-03-10_int8.engine', batch_size=16),
|
||||||
|
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
|
||||||
|
# INT8 for 200sec: 54 sec 3.7Gb
|
||||||
|
|
||||||
|
# Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8),
|
||||||
|
# confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
|
||||||
|
|
||||||
|
Inference(TensorRTEngine('azaion-2025-03-10-half_batch4.engine', batch_size=4),
|
||||||
|
confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import tensorrt as trt
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
import pycuda.autoinit # required for automatically initialize CUDA, do not remove.
|
||||||
|
|
||||||
|
from onnx_engine import InferenceEngine
|
||||||
|
|
||||||
|
|
||||||
|
class TensorRTEngine(InferenceEngine):
|
||||||
|
def __init__(self, model_path: str, batch_size: int = 4, **kwargs):
|
||||||
|
self.model_path = model_path
|
||||||
|
self.batch_size = batch_size
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger = trt.Logger(trt.Logger.WARNING)
|
||||||
|
|
||||||
|
with open(model_path, 'rb') as f:
|
||||||
|
metadata_len = int.from_bytes(f.read(4), byteorder='little', signed=True)
|
||||||
|
metadata_bytes = f.read(metadata_len)
|
||||||
|
try:
|
||||||
|
self.metadata = json.loads(metadata_bytes)
|
||||||
|
print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print(f"Failed to parse metadata: {metadata_bytes}")
|
||||||
|
self.metadata = {}
|
||||||
|
engine_data = f.read()
|
||||||
|
|
||||||
|
runtime = trt.Runtime(logger)
|
||||||
|
self.engine = runtime.deserialize_cuda_engine(engine_data)
|
||||||
|
|
||||||
|
if self.engine is None:
|
||||||
|
raise RuntimeError(f"Failed to load TensorRT engine from {model_path}")
|
||||||
|
|
||||||
|
self.context = self.engine.create_execution_context()
|
||||||
|
|
||||||
|
# input
|
||||||
|
self.input_name = self.engine.get_tensor_name(0)
|
||||||
|
engine_input_shape = self.engine.get_tensor_shape(self.input_name)
|
||||||
|
self.input_shape = [
|
||||||
|
batch_size if engine_input_shape[0] == -1 else engine_input_shape[0],
|
||||||
|
engine_input_shape[1], # Channels (usually fixed at 3 for RGB)
|
||||||
|
1280 if engine_input_shape[2] == -1 else engine_input_shape[2], # Height
|
||||||
|
1280 if engine_input_shape[3] == -1 else engine_input_shape[3] # Width
|
||||||
|
]
|
||||||
|
self.context.set_input_shape(self.input_name, self.input_shape)
|
||||||
|
input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsize
|
||||||
|
self.d_input = cuda.mem_alloc(input_size)
|
||||||
|
|
||||||
|
# output
|
||||||
|
self.output_name = self.engine.get_tensor_name(1)
|
||||||
|
engine_output_shape = tuple(self.engine.get_tensor_shape(self.output_name))
|
||||||
|
self.output_shape = [
|
||||||
|
batch_size if self.input_shape[0] == -1 else self.input_shape[0],
|
||||||
|
300 if engine_output_shape[1] == -1 else engine_output_shape[1], # max detections number
|
||||||
|
6 if engine_output_shape[2] == -1 else engine_output_shape[2] # x1 y1 x2 y2 conf cls
|
||||||
|
]
|
||||||
|
self.h_output = cuda.pagelocked_empty(tuple(self.output_shape), dtype=np.float32)
|
||||||
|
self.d_output = cuda.mem_alloc(self.h_output.nbytes)
|
||||||
|
|
||||||
|
self.stream = cuda.Stream()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
|
||||||
|
|
||||||
|
def get_input_shape(self) -> Tuple[int, int]:
|
||||||
|
return self.input_shape[2], self.input_shape[3]
|
||||||
|
|
||||||
|
def get_batch_size(self) -> int:
|
||||||
|
return self.batch_size
|
||||||
|
|
||||||
|
# In tensorrt_engine.py, modify the run method:
|
||||||
|
|
||||||
|
def run(self, input_data: np.ndarray) -> List[np.ndarray]:
|
||||||
|
try:
|
||||||
|
cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
|
||||||
|
self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer
|
||||||
|
self.context.set_tensor_address(self.output_name, int(self.d_output)) # output buffer
|
||||||
|
|
||||||
|
self.context.execute_async_v3(stream_handle=self.stream.handle)
|
||||||
|
self.stream.synchronize()
|
||||||
|
|
||||||
|
# Fix: Remove the stream parameter from memcpy_dtoh
|
||||||
|
cuda.memcpy_dtoh(self.h_output, self.d_output)
|
||||||
|
|
||||||
|
output = self.h_output.reshape(self.output_shape)
|
||||||
|
return [output]
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}")
|
||||||
+2
-1
@@ -16,4 +16,5 @@ boto3
|
|||||||
msgpack
|
msgpack
|
||||||
rstream
|
rstream
|
||||||
onnxruntime-gpu
|
onnxruntime-gpu
|
||||||
netron
|
netron
|
||||||
|
pycuda
|
||||||
@@ -1,227 +0,0 @@
|
|||||||
import json
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
from enum import Enum
|
|
||||||
from os.path import join, dirname
|
|
||||||
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
import onnxruntime as onnx
|
|
||||||
|
|
||||||
|
|
||||||
class Detection:
|
|
||||||
def __init__(self, x, y, w, h, cls, confidence):
|
|
||||||
self.x = x
|
|
||||||
self.y = y
|
|
||||||
self.w = w
|
|
||||||
self.h = h
|
|
||||||
self.cls = cls
|
|
||||||
self.confidence = confidence
|
|
||||||
|
|
||||||
def overlaps(self, det2):
|
|
||||||
overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x)
|
|
||||||
overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y)
|
|
||||||
overlap_area = max(0, overlap_x) * max(0, overlap_y)
|
|
||||||
min_area = min(self.w * self.h, det2.w * det2.h)
|
|
||||||
|
|
||||||
return overlap_area / min_area > 0.6
|
|
||||||
|
|
||||||
|
|
||||||
class Annotation:
|
|
||||||
def __init__(self, frame, image_bytes, time, detections: list[Detection]):
|
|
||||||
self.frame = frame
|
|
||||||
self.image = image_bytes
|
|
||||||
self.time = time
|
|
||||||
self.detections = detections if detections is not None else []
|
|
||||||
|
|
||||||
|
|
||||||
class WeatherMode(Enum):
|
|
||||||
Norm = 0
|
|
||||||
Wint = 20
|
|
||||||
Night = 40
|
|
||||||
|
|
||||||
|
|
||||||
class AnnotationClass:
|
|
||||||
def __init__(self, id, name, color):
|
|
||||||
self.id = id
|
|
||||||
self.name = name
|
|
||||||
self.color = color
|
|
||||||
color_str = color.lstrip('#')
|
|
||||||
self.opencv_color = (int(color_str[4:6], 16), int(color_str[2:4], 16), int(color_str[0:2], 16))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def read_json():
|
|
||||||
classes_path = join(dirname(dirname(__file__)), 'classes.json')
|
|
||||||
with open(classes_path, 'r', encoding='utf-8') as f:
|
|
||||||
j = json.loads(f.read())
|
|
||||||
annotations_dict = {}
|
|
||||||
for mode in WeatherMode:
|
|
||||||
for cl in j:
|
|
||||||
id = mode.value + cl['Id']
|
|
||||||
name = cl['Name'] if mode.value == 0 else f'{cl["Name"]}({mode.name})'
|
|
||||||
annotations_dict[id] = AnnotationClass(id, name, cl['Color'])
|
|
||||||
return annotations_dict
|
|
||||||
|
|
||||||
@property
|
|
||||||
def color_tuple(self):
|
|
||||||
color = self.color[3:]
|
|
||||||
lv = len(color)
|
|
||||||
xx = range(0, lv, lv // 3)
|
|
||||||
return tuple(int(color[i:i + lv // 3], 16) for i in xx)
|
|
||||||
|
|
||||||
class Inference:
|
|
||||||
def __init__(self, onnx_model, batch_size, confidence_thres, iou_thres):
|
|
||||||
self.onnx_model = onnx_model
|
|
||||||
self.batch_size = batch_size
|
|
||||||
self.confidence_thres = confidence_thres
|
|
||||||
self.iou_thres = iou_thres
|
|
||||||
self.model_width = None
|
|
||||||
self.model_height = None
|
|
||||||
|
|
||||||
self.classes = AnnotationClass.read_json()
|
|
||||||
|
|
||||||
def draw(self, annotation: Annotation):
|
|
||||||
img = annotation.frame
|
|
||||||
img_height, img_width = img.shape[:2]
|
|
||||||
for d in annotation.detections:
|
|
||||||
x1 = int(img_width * (d.x - d.w / 2))
|
|
||||||
y1 = int(img_height * (d.y - d.h / 2))
|
|
||||||
x2 = int(x1 + img_width * d.w)
|
|
||||||
y2 = int(y1 + img_height * d.h)
|
|
||||||
|
|
||||||
color = self.classes[d.cls].opencv_color
|
|
||||||
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
|
|
||||||
label = f"{self.classes[d.cls].name}: {d.confidence:.2f}"
|
|
||||||
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
|
||||||
|
|
||||||
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
|
|
||||||
|
|
||||||
cv2.rectangle(
|
|
||||||
img, (x1, label_y - label_height), (x1 + label_width, label_y + label_height), color, cv2.FILLED
|
|
||||||
)
|
|
||||||
cv2.putText(img, label, (x1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
|
|
||||||
cv2.imshow('Video', img)
|
|
||||||
|
|
||||||
def preprocess(self, frames):
|
|
||||||
blobs = [cv2.dnn.blobFromImage(frame,
|
|
||||||
scalefactor=1.0 / 255.0,
|
|
||||||
size=(self.model_width, self.model_height),
|
|
||||||
mean=(0, 0, 0),
|
|
||||||
swapRB=True,
|
|
||||||
crop=False)
|
|
||||||
for frame in frames]
|
|
||||||
return np.vstack(blobs)
|
|
||||||
|
|
||||||
def postprocess(self, batch_frames, batch_timestamps, output):
|
|
||||||
anns = []
|
|
||||||
for i in range(len(output[0])):
|
|
||||||
frame = batch_frames[i]
|
|
||||||
timestamp = batch_timestamps[i]
|
|
||||||
detections = []
|
|
||||||
for det in output[0][i]:
|
|
||||||
if det[4] == 0: # if confidence is 0 then valid points are over.
|
|
||||||
break
|
|
||||||
x1 = max(0, det[0] / self.model_width)
|
|
||||||
y1 = max(0, det[1] / self.model_height)
|
|
||||||
x2 = min(1, det[2] / self.model_width)
|
|
||||||
y2 = min(1, det[3] / self.model_height)
|
|
||||||
conf = round(det[4],2)
|
|
||||||
class_id = int(det[5])
|
|
||||||
|
|
||||||
x = (x1 + x2) / 2
|
|
||||||
y = (y1 + y2) / 2
|
|
||||||
w = x2 - x1
|
|
||||||
h = y2 - y1
|
|
||||||
detections.append(Detection(x, y, w, h, class_id, conf))
|
|
||||||
|
|
||||||
filtered_detections = self.remove_overlapping_detections(detections)
|
|
||||||
|
|
||||||
if len(filtered_detections) > 0:
|
|
||||||
_, image = cv2.imencode('.jpg', frame)
|
|
||||||
image_bytes = image.tobytes()
|
|
||||||
annotation = Annotation(frame, image_bytes, timestamp, filtered_detections)
|
|
||||||
anns.append(annotation)
|
|
||||||
return anns
|
|
||||||
|
|
||||||
def process(self, video):
|
|
||||||
session = onnx.InferenceSession(self.onnx_model, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
|
|
||||||
model_inputs = session.get_inputs()
|
|
||||||
input_name = model_inputs[0].name
|
|
||||||
input_shape = model_inputs[0].shape
|
|
||||||
self.model_width = input_shape[2]
|
|
||||||
self.model_height = input_shape[3]
|
|
||||||
|
|
||||||
frame_count = 0
|
|
||||||
batch_frames = []
|
|
||||||
batch_timestamps = []
|
|
||||||
v_input = cv2.VideoCapture(video)
|
|
||||||
while v_input.isOpened():
|
|
||||||
ret, frame = v_input.read()
|
|
||||||
if not ret or frame is None:
|
|
||||||
break
|
|
||||||
|
|
||||||
frame_count += 1
|
|
||||||
if frame_count % 4 == 0:
|
|
||||||
batch_frames.append(frame)
|
|
||||||
batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC)))
|
|
||||||
|
|
||||||
if len(batch_frames) == self.batch_size:
|
|
||||||
input_blob = self.preprocess(batch_frames)
|
|
||||||
outputs = session.run(None, {input_name: input_blob})
|
|
||||||
annotations = self.postprocess(batch_frames, batch_timestamps, outputs)
|
|
||||||
for annotation in annotations:
|
|
||||||
self.draw(annotation)
|
|
||||||
print(f'video: {annotation.time/1000:.3f}s')
|
|
||||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
||||||
break
|
|
||||||
batch_frames.clear()
|
|
||||||
batch_timestamps.clear()
|
|
||||||
|
|
||||||
def remove_overlapping_detections(self, detections):
|
|
||||||
filtered_output = []
|
|
||||||
filtered_out_indexes = []
|
|
||||||
|
|
||||||
for det1_index in range(len(detections)):
|
|
||||||
if det1_index in filtered_out_indexes:
|
|
||||||
continue
|
|
||||||
det1 = detections[det1_index]
|
|
||||||
res = det1_index
|
|
||||||
for det2_index in range(det1_index + 1, len(detections)):
|
|
||||||
det2 = detections[det2_index]
|
|
||||||
if det1.overlaps(det2):
|
|
||||||
if det1.confidence > det2.confidence or (det1.confidence == det2.confidence and det1.cls < det2.cls): # det1 has higher confidence or lower class_id
|
|
||||||
filtered_out_indexes.append(det2_index)
|
|
||||||
else:
|
|
||||||
filtered_out_indexes.append(res)
|
|
||||||
res = det2_index
|
|
||||||
filtered_output.append(detections[res])
|
|
||||||
filtered_out_indexes.append(res)
|
|
||||||
return filtered_output
|
|
||||||
|
|
||||||
|
|
||||||
def overlap_tests(self):
|
|
||||||
detections = [
|
|
||||||
Detection(10, 10, 200, 200, 0, 0.5),
|
|
||||||
Detection(10, 10, 200, 200, 0, 0.6),
|
|
||||||
Detection(10, 10, 200, 200, 0, 0.4),
|
|
||||||
Detection(10, 10, 200, 200, 0, 0.8),
|
|
||||||
Detection(10, 10, 200, 200, 0, 0.3),
|
|
||||||
]
|
|
||||||
result = self.remove_overlapping_detections(detections)
|
|
||||||
|
|
||||||
detections = [
|
|
||||||
Detection(10, 10, 100, 100, 0, 0.5),
|
|
||||||
Detection(50, 50, 120, 110, 0, 0.6)
|
|
||||||
]
|
|
||||||
result2 = self.remove_overlapping_detections(detections)
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
model = 'azaion-2024-10-26.onnx'
|
|
||||||
input_video = 'ForAI_test.mp4'
|
|
||||||
inf = Inference(model, batch_size=2, confidence_thres=0.5, iou_thres=0.35)
|
|
||||||
# inf.overlap_tests()
|
|
||||||
inf.process(input_video)
|
|
||||||
|
|
||||||
cv2.waitKey(0)
|
|
||||||
@@ -184,6 +184,7 @@ def train_dataset(existing_date=None, from_scratch=False):
|
|||||||
model_name = latest_model if latest_model is not None and path.isfile(latest_model) and not from_scratch else 'yolo11m.yaml'
|
model_name = latest_model if latest_model is not None and path.isfile(latest_model) and not from_scratch else 'yolo11m.yaml'
|
||||||
print(f'Initial model: {model_name}')
|
print(f'Initial model: {model_name}')
|
||||||
model = YOLO(model_name)
|
model = YOLO(model_name)
|
||||||
|
model.info['author'] = 'LLC Azaion'
|
||||||
|
|
||||||
yaml = abspath(path.join(cur_dataset, 'data.yaml'))
|
yaml = abspath(path.join(cur_dataset, 'data.yaml'))
|
||||||
results = model.train(data=yaml,
|
results = model.train(data=yaml,
|
||||||
|
|||||||
Reference in New Issue
Block a user