diff --git a/exports/export.py b/exports/export.py index a60be0e..88ae2ae 100644 --- a/exports/export.py +++ b/exports/export.py @@ -31,22 +31,13 @@ def export_onnx(model_path): return Path(model_path).stem + '.onnx' -def export_tensorrt(model_path, dataset_yaml): - form_data_sample(path.join(path.dirname(dataset_yaml), 'minival', 'images')) - model = YOLO(model_path) - with open(dataset_yaml, 'r') as file: - yaml_data = yaml.safe_load(file) or {} - yaml_data['minival'] = 'minival/images' - with open(dataset_yaml, 'w') as file: - yaml.dump(yaml_data, file) - - model.export( +def export_tensorrt(model_path): + YOLO(model_path).export( format='engine', batch=4, half=True, - nms=True, - data=dataset_yaml, - split='minival' + simplify=True, + nms=True ) def form_data_sample(destination_path, size=500, write_txt_log=False): @@ -76,6 +67,6 @@ def show_model(model: str = None): if __name__ == '__main__': - export_tensorrt('azaion-2025-03-10.pt', path.join(datasets_dir, 'azaion-2025-03-10', 'data.yaml')) + export_tensorrt('azaion-2025-03-10.pt') # export_rknn('azaion-2025-03-10.pt') # export_onnx('azaion-2025-03-10.pt') \ No newline at end of file diff --git a/inference/dto.py b/inference/dto.py new file mode 100644 index 0000000..1f7cae9 --- /dev/null +++ b/inference/dto.py @@ -0,0 +1,62 @@ +import json +from enum import Enum +from os.path import join, dirname + + +class Detection: + def __init__(self, x, y, w, h, cls, confidence): + self.x = x + self.y = y + self.w = w + self.h = h + self.cls = cls + self.confidence = confidence + + def overlaps(self, det2, iou_threshold): + overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x) + overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y) + intersection = max(0, overlap_x) * max(0, overlap_y) + union = self.w * self.h + det2.w * det2.h - intersection + + return intersection / union > iou_threshold + + +class Annotation: + def __init__(self, frame, time, detections: list[Detection]): + self.frame = frame + self.time = time + self.detections = detections if detections is not None else [] + + +class WeatherMode(Enum): + Norm = 0 + Wint = 20 + Night = 40 + +class AnnotationClass: + def __init__(self, id, name, color): + self.id = id + self.name = name + self.color = color + color_str = color.lstrip('#') + self.opencv_color = (int(color_str[4:6], 16), int(color_str[2:4], 16), int(color_str[0:2], 16)) + + @staticmethod + def read_json(): + classes_path = join(dirname(dirname(__file__)), 'classes.json') + with open(classes_path, 'r', encoding='utf-8') as f: + j = json.loads(f.read()) + annotations_dict = {} + for mode in WeatherMode: + for cl in j: + id = mode.value + cl['Id'] + name = cl['Name'] if mode.value == 0 else f'{cl["Name"]}({mode.name})' + annotations_dict[id] = AnnotationClass(id, name, cl['Color']) + return annotations_dict + + @property + def color_tuple(self): + color = self.color[3:] + lv = len(color) + xx = range(0, lv, lv // 3) + return tuple(int(color[i:i + lv // 3], 16) for i in xx) \ No newline at end of file diff --git a/inference/inference.py b/inference/inference.py new file mode 100644 index 0000000..5405051 --- /dev/null +++ b/inference/inference.py @@ -0,0 +1,140 @@ +import cv2 +import numpy as np + +from onnx_engine import InferenceEngine +from dto import AnnotationClass, Annotation, Detection + + +class Inference: + def __init__(self, engine: InferenceEngine, confidence_threshold, iou_threshold): + self.engine = engine + self.confidence_threshold = confidence_threshold + self.iou_threshold = iou_threshold + self.batch_size = engine.get_batch_size() + + self.model_height, self.model_width = engine.get_input_shape() + self.classes = AnnotationClass.read_json() + + def draw(self, annotation: Annotation): + img = annotation.frame + img_height, img_width = img.shape[:2] + for d in annotation.detections: + x1 = int(img_width * (d.x - d.w / 2)) + y1 = int(img_height * (d.y - d.h / 2)) + x2 = int(x1 + img_width * d.w) + y2 = int(y1 + img_height * d.h) + + color = self.classes[d.cls].opencv_color + cv2.rectangle(img, (x1, y1), (x2, y2), color, 2) + label = f"{self.classes[d.cls].name}: {d.confidence:.2f}" + (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) + + label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 + + cv2.rectangle( + img, (x1, label_y - label_height), (x1 + label_width, label_y + label_height), color, cv2.FILLED + ) + cv2.putText(img, label, (x1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) + cv2.imshow('Video', img) + + def preprocess(self, frames): + blobs = [cv2.dnn.blobFromImage(frame, + scalefactor=1.0 / 255.0, + size=(self.model_width, self.model_height), + mean=(0, 0, 0), + swapRB=True, + crop=False) + for frame in frames] + return np.vstack(blobs) + + def postprocess(self, batch_frames, batch_timestamps, output): + anns = [] + for i in range(len(output[0])): + frame = batch_frames[i] + timestamp = batch_timestamps[i] + detections = [] + for det in output[0][i]: + if det[4] == 0: + break + if det[4] < self.confidence_threshold: + continue + + x1 = max(0, det[0] / self.model_width) + y1 = max(0, det[1] / self.model_height) + x2 = min(1, det[2] / self.model_width) + y2 = min(1, det[3] / self.model_height) + conf = round(det[4], 2) + class_id = int(det[5]) + + x = (x1 + x2) / 2 + y = (y1 + y2) / 2 + w = x2 - x1 + h = y2 - y1 + detections.append(Detection(x, y, w, h, class_id, conf)) + + filtered_detections = self.remove_overlapping_detections(detections) + + # if len(filtered_detections) > 0: + # _, image = cv2.imencode('.jpg', frame) + # image_bytes = image.tobytes() + annotation = Annotation(frame, timestamp, filtered_detections) + anns.append(annotation) + return anns + + def process(self, video): + frame_count = 0 + batch_frames = [] + batch_timestamps = [] + v_input = cv2.VideoCapture(video) + while v_input.isOpened(): + ret, frame = v_input.read() + if not ret or frame is None: + break + + frame_count += 1 + if frame_count % 4 == 0: + batch_frames.append(frame) + batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC))) + + if len(batch_frames) == self.batch_size: + input_blob = self.preprocess(batch_frames) + outputs = self.engine.run(input_blob) + annotations = self.postprocess(batch_frames, batch_timestamps, outputs) + for annotation in annotations: + self.draw(annotation) + print(f'video: {annotation.time / 1000:.3f}s') + if cv2.waitKey(1) & 0xFF == ord('q'): + break + batch_frames.clear() + batch_timestamps.clear() + + if len(batch_frames) > 0: + input_blob = self.preprocess(batch_frames) + outputs = self.engine.run(input_blob) + annotations = self.postprocess(batch_frames, batch_timestamps, outputs) + for annotation in annotations: + self.draw(annotation) + print(f'video: {annotation.time / 1000:.3f}s') + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + def remove_overlapping_detections(self, detections): + filtered_output = [] + filtered_out_indexes = [] + + for det1_index in range(len(detections)): + if det1_index in filtered_out_indexes: + continue + det1 = detections[det1_index] + res = det1_index + for det2_index in range(det1_index + 1, len(detections)): + det2 = detections[det2_index] + if det1.overlaps(det2, self.iou_threshold): + if det1.confidence > det2.confidence or (det1.confidence == det2.confidence and det1.cls < det2.cls): + filtered_out_indexes.append(det2_index) + else: + filtered_out_indexes.append(res) + res = det2_index + filtered_output.append(detections[res]) + filtered_out_indexes.append(res) + return filtered_output \ No newline at end of file diff --git a/inference/onnx_engine.py b/inference/onnx_engine.py new file mode 100644 index 0000000..3af914f --- /dev/null +++ b/inference/onnx_engine.py @@ -0,0 +1,43 @@ +import abc +from typing import List, Tuple +import numpy as np +import onnxruntime as onnx + + +class InferenceEngine(abc.ABC): + @abc.abstractmethod + def __init__(self, model_path: str, batch_size: int = 1, **kwargs): + pass + + @abc.abstractmethod + def get_input_shape(self) -> Tuple[int, int]: + pass + + @abc.abstractmethod + def get_batch_size(self) -> int: + pass + + @abc.abstractmethod + def run(self, input_data: np.ndarray) -> List[np.ndarray]: + pass + + + +class OnnxEngine(InferenceEngine): + def __init__(self, model_path: str, batch_size: int = 1, **kwargs): + self.model_path = model_path + self.batch_size = batch_size + self.session = onnx.InferenceSession(model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + self.model_inputs = self.session.get_inputs() + self.input_name = self.model_inputs[0].name + self.input_shape = self.model_inputs[0].shape + + def get_input_shape(self) -> Tuple[int, int]: + shape = self.input_shape + return shape[2], shape[3] + + def get_batch_size(self) -> int: + return self.batch_size + + def run(self, input_data: np.ndarray) -> List[np.ndarray]: + return self.session.run(None, {self.input_name: input_data}) \ No newline at end of file diff --git a/inference/start_inference.py b/inference/start_inference.py new file mode 100644 index 0000000..7d1f11d --- /dev/null +++ b/inference/start_inference.py @@ -0,0 +1,20 @@ +from onnx_engine import OnnxEngine +from tensorrt_engine import TensorRTEngine +from inference import Inference + +if __name__ == "__main__": + # Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4), + # confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') + # detection for the first 200sec of video: + # onnxInference: 81 sec, 6.3Gb VRAM + # tensorrt: 54 sec, 3.7Gb VRAM + + # Inference(TensorRTEngine('azaion-2025-03-10_int8.engine', batch_size=16), + # confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') + # INT8 for 200sec: 54 sec 3.7Gb + + # Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8), + # confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') + + Inference(TensorRTEngine('azaion-2025-03-10-half_batch4.engine', batch_size=4), + confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') \ No newline at end of file diff --git a/inference/tensorrt_engine.py b/inference/tensorrt_engine.py new file mode 100644 index 0000000..0dfff06 --- /dev/null +++ b/inference/tensorrt_engine.py @@ -0,0 +1,92 @@ +from pathlib import Path +from typing import List, Tuple +import json +import numpy as np +import tensorrt as trt +import pycuda.driver as cuda +import pycuda.autoinit # required for automatically initialize CUDA, do not remove. + +from onnx_engine import InferenceEngine + + +class TensorRTEngine(InferenceEngine): + def __init__(self, model_path: str, batch_size: int = 4, **kwargs): + self.model_path = model_path + self.batch_size = batch_size + + try: + logger = trt.Logger(trt.Logger.WARNING) + + with open(model_path, 'rb') as f: + metadata_len = int.from_bytes(f.read(4), byteorder='little', signed=True) + metadata_bytes = f.read(metadata_len) + try: + self.metadata = json.loads(metadata_bytes) + print(f"Model metadata: {json.dumps(self.metadata, indent=2)}") + except json.JSONDecodeError: + print(f"Failed to parse metadata: {metadata_bytes}") + self.metadata = {} + engine_data = f.read() + + runtime = trt.Runtime(logger) + self.engine = runtime.deserialize_cuda_engine(engine_data) + + if self.engine is None: + raise RuntimeError(f"Failed to load TensorRT engine from {model_path}") + + self.context = self.engine.create_execution_context() + + # input + self.input_name = self.engine.get_tensor_name(0) + engine_input_shape = self.engine.get_tensor_shape(self.input_name) + self.input_shape = [ + batch_size if engine_input_shape[0] == -1 else engine_input_shape[0], + engine_input_shape[1], # Channels (usually fixed at 3 for RGB) + 1280 if engine_input_shape[2] == -1 else engine_input_shape[2], # Height + 1280 if engine_input_shape[3] == -1 else engine_input_shape[3] # Width + ] + self.context.set_input_shape(self.input_name, self.input_shape) + input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsize + self.d_input = cuda.mem_alloc(input_size) + + # output + self.output_name = self.engine.get_tensor_name(1) + engine_output_shape = tuple(self.engine.get_tensor_shape(self.output_name)) + self.output_shape = [ + batch_size if self.input_shape[0] == -1 else self.input_shape[0], + 300 if engine_output_shape[1] == -1 else engine_output_shape[1], # max detections number + 6 if engine_output_shape[2] == -1 else engine_output_shape[2] # x1 y1 x2 y2 conf cls + ] + self.h_output = cuda.pagelocked_empty(tuple(self.output_shape), dtype=np.float32) + self.d_output = cuda.mem_alloc(self.h_output.nbytes) + + self.stream = cuda.Stream() + + except Exception as e: + raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}") + + def get_input_shape(self) -> Tuple[int, int]: + return self.input_shape[2], self.input_shape[3] + + def get_batch_size(self) -> int: + return self.batch_size + + # In tensorrt_engine.py, modify the run method: + + def run(self, input_data: np.ndarray) -> List[np.ndarray]: + try: + cuda.memcpy_htod_async(self.d_input, input_data, self.stream) + self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer + self.context.set_tensor_address(self.output_name, int(self.d_output)) # output buffer + + self.context.execute_async_v3(stream_handle=self.stream.handle) + self.stream.synchronize() + + # Fix: Remove the stream parameter from memcpy_dtoh + cuda.memcpy_dtoh(self.h_output, self.d_output) + + output = self.h_output.reshape(self.output_shape) + return [output] + + except Exception as e: + raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 99578ba..eff6220 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ boto3 msgpack rstream onnxruntime-gpu -netron \ No newline at end of file +netron +pycuda \ No newline at end of file diff --git a/tests/onnx_inference.py b/tests/onnx_inference.py deleted file mode 100644 index 3fbde48..0000000 --- a/tests/onnx_inference.py +++ /dev/null @@ -1,227 +0,0 @@ -import json -import sys -import time -from enum import Enum -from os.path import join, dirname - -import cv2 -import numpy as np -import onnxruntime as onnx - - -class Detection: - def __init__(self, x, y, w, h, cls, confidence): - self.x = x - self.y = y - self.w = w - self.h = h - self.cls = cls - self.confidence = confidence - - def overlaps(self, det2): - overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x) - overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y) - overlap_area = max(0, overlap_x) * max(0, overlap_y) - min_area = min(self.w * self.h, det2.w * det2.h) - - return overlap_area / min_area > 0.6 - - -class Annotation: - def __init__(self, frame, image_bytes, time, detections: list[Detection]): - self.frame = frame - self.image = image_bytes - self.time = time - self.detections = detections if detections is not None else [] - - -class WeatherMode(Enum): - Norm = 0 - Wint = 20 - Night = 40 - - -class AnnotationClass: - def __init__(self, id, name, color): - self.id = id - self.name = name - self.color = color - color_str = color.lstrip('#') - self.opencv_color = (int(color_str[4:6], 16), int(color_str[2:4], 16), int(color_str[0:2], 16)) - - @staticmethod - def read_json(): - classes_path = join(dirname(dirname(__file__)), 'classes.json') - with open(classes_path, 'r', encoding='utf-8') as f: - j = json.loads(f.read()) - annotations_dict = {} - for mode in WeatherMode: - for cl in j: - id = mode.value + cl['Id'] - name = cl['Name'] if mode.value == 0 else f'{cl["Name"]}({mode.name})' - annotations_dict[id] = AnnotationClass(id, name, cl['Color']) - return annotations_dict - - @property - def color_tuple(self): - color = self.color[3:] - lv = len(color) - xx = range(0, lv, lv // 3) - return tuple(int(color[i:i + lv // 3], 16) for i in xx) - -class Inference: - def __init__(self, onnx_model, batch_size, confidence_thres, iou_thres): - self.onnx_model = onnx_model - self.batch_size = batch_size - self.confidence_thres = confidence_thres - self.iou_thres = iou_thres - self.model_width = None - self.model_height = None - - self.classes = AnnotationClass.read_json() - - def draw(self, annotation: Annotation): - img = annotation.frame - img_height, img_width = img.shape[:2] - for d in annotation.detections: - x1 = int(img_width * (d.x - d.w / 2)) - y1 = int(img_height * (d.y - d.h / 2)) - x2 = int(x1 + img_width * d.w) - y2 = int(y1 + img_height * d.h) - - color = self.classes[d.cls].opencv_color - cv2.rectangle(img, (x1, y1), (x2, y2), color, 2) - label = f"{self.classes[d.cls].name}: {d.confidence:.2f}" - (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) - - label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 - - cv2.rectangle( - img, (x1, label_y - label_height), (x1 + label_width, label_y + label_height), color, cv2.FILLED - ) - cv2.putText(img, label, (x1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) - cv2.imshow('Video', img) - - def preprocess(self, frames): - blobs = [cv2.dnn.blobFromImage(frame, - scalefactor=1.0 / 255.0, - size=(self.model_width, self.model_height), - mean=(0, 0, 0), - swapRB=True, - crop=False) - for frame in frames] - return np.vstack(blobs) - - def postprocess(self, batch_frames, batch_timestamps, output): - anns = [] - for i in range(len(output[0])): - frame = batch_frames[i] - timestamp = batch_timestamps[i] - detections = [] - for det in output[0][i]: - if det[4] == 0: # if confidence is 0 then valid points are over. - break - x1 = max(0, det[0] / self.model_width) - y1 = max(0, det[1] / self.model_height) - x2 = min(1, det[2] / self.model_width) - y2 = min(1, det[3] / self.model_height) - conf = round(det[4],2) - class_id = int(det[5]) - - x = (x1 + x2) / 2 - y = (y1 + y2) / 2 - w = x2 - x1 - h = y2 - y1 - detections.append(Detection(x, y, w, h, class_id, conf)) - - filtered_detections = self.remove_overlapping_detections(detections) - - if len(filtered_detections) > 0: - _, image = cv2.imencode('.jpg', frame) - image_bytes = image.tobytes() - annotation = Annotation(frame, image_bytes, timestamp, filtered_detections) - anns.append(annotation) - return anns - - def process(self, video): - session = onnx.InferenceSession(self.onnx_model, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) - model_inputs = session.get_inputs() - input_name = model_inputs[0].name - input_shape = model_inputs[0].shape - self.model_width = input_shape[2] - self.model_height = input_shape[3] - - frame_count = 0 - batch_frames = [] - batch_timestamps = [] - v_input = cv2.VideoCapture(video) - while v_input.isOpened(): - ret, frame = v_input.read() - if not ret or frame is None: - break - - frame_count += 1 - if frame_count % 4 == 0: - batch_frames.append(frame) - batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC))) - - if len(batch_frames) == self.batch_size: - input_blob = self.preprocess(batch_frames) - outputs = session.run(None, {input_name: input_blob}) - annotations = self.postprocess(batch_frames, batch_timestamps, outputs) - for annotation in annotations: - self.draw(annotation) - print(f'video: {annotation.time/1000:.3f}s') - if cv2.waitKey(1) & 0xFF == ord('q'): - break - batch_frames.clear() - batch_timestamps.clear() - - def remove_overlapping_detections(self, detections): - filtered_output = [] - filtered_out_indexes = [] - - for det1_index in range(len(detections)): - if det1_index in filtered_out_indexes: - continue - det1 = detections[det1_index] - res = det1_index - for det2_index in range(det1_index + 1, len(detections)): - det2 = detections[det2_index] - if det1.overlaps(det2): - if det1.confidence > det2.confidence or (det1.confidence == det2.confidence and det1.cls < det2.cls): # det1 has higher confidence or lower class_id - filtered_out_indexes.append(det2_index) - else: - filtered_out_indexes.append(res) - res = det2_index - filtered_output.append(detections[res]) - filtered_out_indexes.append(res) - return filtered_output - - - def overlap_tests(self): - detections = [ - Detection(10, 10, 200, 200, 0, 0.5), - Detection(10, 10, 200, 200, 0, 0.6), - Detection(10, 10, 200, 200, 0, 0.4), - Detection(10, 10, 200, 200, 0, 0.8), - Detection(10, 10, 200, 200, 0, 0.3), - ] - result = self.remove_overlapping_detections(detections) - - detections = [ - Detection(10, 10, 100, 100, 0, 0.5), - Detection(50, 50, 120, 110, 0, 0.6) - ] - result2 = self.remove_overlapping_detections(detections) - pass - - -if __name__ == "__main__": - model = 'azaion-2024-10-26.onnx' - input_video = 'ForAI_test.mp4' - inf = Inference(model, batch_size=2, confidence_thres=0.5, iou_thres=0.35) - # inf.overlap_tests() - inf.process(input_video) - - cv2.waitKey(0) \ No newline at end of file diff --git a/train.py b/train.py index b5d6495..28932bb 100644 --- a/train.py +++ b/train.py @@ -184,6 +184,7 @@ def train_dataset(existing_date=None, from_scratch=False): model_name = latest_model if latest_model is not None and path.isfile(latest_model) and not from_scratch else 'yolo11m.yaml' print(f'Initial model: {model_name}') model = YOLO(model_name) + model.info['author'] = 'LLC Azaion' yaml = abspath(path.join(cur_dataset, 'data.yaml')) results = model.train(data=yaml,