diff --git a/README.md b/README.md index 1084f82..0b2eebe 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,5 @@ nvcc --version python -m pip install --upgrade pip pip install --upgrade huggingface_hub pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 - pip install -q git+https://github.com/THU-MIG/yolov10.git - pip install albumentations + pip install ultralytics albumentations ``` diff --git a/checkpoint.txt b/checkpoint.txt new file mode 100644 index 0000000..c4a1dfd --- /dev/null +++ b/checkpoint.txt @@ -0,0 +1 @@ +2024-06-27 20:51:35 \ No newline at end of file diff --git a/config.py b/config.py deleted file mode 100644 index 5aab429..0000000 --- a/config.py +++ /dev/null @@ -1,22 +0,0 @@ -import yaml - -config_file = 'config.yaml' - - -class Config: - - def __init__(self): - with open(config_file, 'r') as f: - c = yaml.safe_load(f) - self.checkpoint = c['checkpoint'] - self.images_dir = c['images_dir'] - self.labels_dir = c['labels_dir'] - f.close() - - def write(self): - with open(config_file, 'w') as f: - d = dict(checkpoint=self.checkpoint, - images_dir=self.images_dir, - labels_dir=self.labels_dir) - yaml.safe_dump(d, f) - f.close() diff --git a/config.yaml b/config.yaml deleted file mode 100644 index b12ff48..0000000 --- a/config.yaml +++ /dev/null @@ -1,3 +0,0 @@ -checkpoint: 2024-06-18 19:14:02.080664 -images_dir: E:\images -labels_dir: E:\labels diff --git a/constants.py b/constants.py index 7f419eb..36700a1 100644 --- a/constants.py +++ b/constants.py @@ -1,9 +1,23 @@ import os from dto.annotationClass import AnnotationClass -current_dataset_dir = os.path.join('datasets', 'zombobase-current') -current_images_dir = os.path.join(current_dataset_dir, 'images') -current_labels_dir = os.path.join(current_dataset_dir, 'labels') +prefix = 'azaion-' +images = 'images' +labels = 'labels' + +data_dir = '/azaion/data/raw' +data_images_dir = os.path.join(data_dir, images) +data_labels_dir = os.path.join(data_dir, labels) + +processed_dir = '/azaion/data/processed' +processed_images_dir = os.path.join(processed_dir, images) +processed_labels_dir = os.path.join(processed_dir, labels) + + +datasets_dir = '/azaion/datasets' +models_dir = '/azaion/models' + annotation_classes = AnnotationClass.read_json() -prefix = 'zombobase-' date_format = '%Y-%m-%d' +checkpoint_file = 'checkpoint.txt' +checkpoint_date_format = '%Y-%m-%d %H:%M:%S' diff --git a/dataset-visualiser.py b/dataset-visualiser.py index 8ec1a69..04b8229 100644 --- a/dataset-visualiser.py +++ b/dataset-visualiser.py @@ -6,11 +6,13 @@ from dto.imageLabel import ImageLabel from preprocessing import read_labels from matplotlib import pyplot as plt +from constants import datasets_dir, prefix + annotation_classes = AnnotationClass.read_json() -dataset_dir = os.path.join('datasets', 'zombobase-2024-06-18', 'train') -images_dir = os.path.join(dataset_dir, 'images') -labels_dir = os.path.join(dataset_dir, 'labels') +cur_dataset = os.path.join(datasets_dir, f'{prefix}2024-06-18', 'train') +images_dir = os.path.join(cur_dataset, 'images') +labels_dir = os.path.join(cur_dataset, 'labels') for f in os.listdir(images_dir)[35247:]: image_path = os.path.join(images_dir, f) diff --git a/preprocessing.py b/preprocessing.py index 8bc6cc0..860ef59 100644 --- a/preprocessing.py +++ b/preprocessing.py @@ -1,19 +1,15 @@ import os.path import time -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path import albumentations as A import cv2 import numpy as np -from dateutil.relativedelta import relativedelta -from config import Config -from constants import current_images_dir, current_labels_dir, annotation_classes, prefix, date_format, \ - current_dataset_dir +from constants import (data_images_dir, data_labels_dir, processed_images_dir, processed_labels_dir, + annotation_classes, checkpoint_file, checkpoint_date_format) from dto.imageLabel import ImageLabel -config = Config() - def image_processing(img_ann: ImageLabel) -> [ImageLabel]: transforms = [ @@ -45,12 +41,12 @@ def image_processing(img_ann: ImageLabel) -> [ImageLabel]: img = ImageLabel( image=res['image'], labels=res['bboxes'], - image_path=os.path.join(current_images_dir, f'{name}{path.suffix}'), - labels_path=os.path.join(current_labels_dir, f'{name}.txt') + image_path=os.path.join(processed_images_dir, f'{name}{path.suffix}'), + labels_path=os.path.join(processed_labels_dir, f'{name}.txt') ) results.append(img) except Exception as e: - print(f'Error during transformtation: {e}') + print(f'Error during transformation: {e}') return results @@ -80,6 +76,7 @@ def read_labels(labels_path) -> [[]]: str_coordinates = row.split(' ') class_num = str_coordinates.pop(0) coordinates = [float(n.replace(',', '.')) for n in str_coordinates] + # noinspection PyTypeChecker coordinates.append(class_num) arr.append(coordinates) return arr @@ -92,34 +89,24 @@ def process_image(img_ann): write_result(ImageLabel( image=img_ann.image, labels=img_ann.labels, - image_path=os.path.join(current_images_dir, Path(img_ann.image_path).name), - labels_path=os.path.join(current_labels_dir, Path(img_ann.labels_path).name) + image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name), + labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name) )) # os.remove(img_ann.image_path) # os.remove(img_ann.labels_path) -def get_checkpoint(): - if config.checkpoint is not None: - return config.checkpoint - - dates = [] - for directory in os.listdir('models'): - try: - dates.append(datetime.strptime(directory[len(prefix):], date_format)) - except: - continue - if len(dates) == 0: - return datetime.now() - relativedelta(years=1) - else: - return max(dates) - - def main(): - last_date = checkpoint = get_checkpoint() + checkpoint = datetime.now() - timedelta(days=720) + try: + with open(checkpoint_file, 'r') as f: + checkpoint = datetime.strptime(f.read(), checkpoint_date_format) + except: + pass + last_date = checkpoint while True: images = [] - with os.scandir(config.images_dir) as imd: + with os.scandir(data_images_dir) as imd: for image_file in imd: if not image_file.is_file(): continue @@ -130,8 +117,8 @@ def main(): for image_file in images: try: - image_path = os.path.join(config.images_dir, image_file.name) - labels_path = os.path.join(config.labels_dir, f'{Path(image_path).stem}.txt') + image_path = os.path.join(data_images_dir, image_file.name) + labels_path = os.path.join(data_labels_dir, f'{Path(image_path).stem}.txt') image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED) process_image(ImageLabel( image_path=image_path, @@ -142,42 +129,14 @@ def main(): except Exception as e: print(f'Error appeared {e}') if last_date != checkpoint: - checkpoint = config.checkpoint = last_date - config.write() + checkpoint = last_date + try: + with open(checkpoint_file, 'w') as f: + f.write(datetime.strftime(checkpoint, checkpoint_date_format)) + except: + pass time.sleep(5) -def check_labels(): - for label in os.listdir(os.path.join(current_dataset_dir, 'labels')): - with open(os.path.join(current_dataset_dir, 'labels', label), 'r') as f: - lines = f.readlines() - for line in lines: - list_c = line.split(' ')[1:] - for l in list_c: - if float(l) > 1: - print('Error!') - - -def fix_class(folder): - for label in os.listdir(folder): - if label.startswith('0000'): - with open(os.path.join(folder, label), 'r+') as f: - lines = f.readlines() - truncated = False - for i in range(0, len(lines)): - if len(lines[i]) < 25: - print(lines[i]) - truncated = True - lines.pop(i) - if truncated: - f.truncate(0) - f.seek(0) - f.writelines(lines) - f.close() - - if __name__ == '__main__': - fix_class('datasets/zombobase-2024-06-18/test/labels') - fix_class('datasets/zombobase-2024-06-18/train/labels') - fix_class('datasets/zombobase-2024-06-18/valid/labels') - # main() + main() diff --git a/train.py b/train.py index dbe12e7..696c4cf 100644 --- a/train.py +++ b/train.py @@ -3,19 +3,20 @@ from os.path import abspath import shutil from datetime import datetime from pathlib import Path -from ultralytics import YOLOv10 -from constants import current_images_dir, current_labels_dir, annotation_classes, prefix, date_format +from ultralytics import YOLO +from constants import processed_images_dir, processed_labels_dir, annotation_classes, prefix, date_format, datasets_dir, models_dir -latest_model = f'models/{prefix}latest.pt' +latest_model = path.join(models_dir, f'{prefix}latest.pt') today_folder = f'{prefix}{datetime.now():{date_format}}' +today_dataset = path.join(datasets_dir, today_folder) train_set = 70 valid_set = 20 test_set = 10 def form_dataset(): - makedirs(path.join('datasets', today_folder), exist_ok=True) - images = listdir(current_images_dir) + makedirs(today_dataset, exist_ok=True) + images = listdir(processed_images_dir) train_size = int(len(images) * train_set / 100.0) valid_size = int(len(images) * valid_set / 100.0) @@ -28,15 +29,14 @@ def form_dataset(): def move_annotations(images, folder): - today_dataset = path.join('datasets', today_folder) destination_images = path.join(today_dataset, folder, 'images') makedirs(destination_images, exist_ok=True) destination_labels = path.join(today_dataset, folder, 'labels') makedirs(destination_labels, exist_ok=True) for image_name in images: - image_path = path.join(current_images_dir, image_name) + image_path = path.join(processed_images_dir, image_name) label_name = f'{Path(image_name).stem}.txt' - label_path = path.join(current_labels_dir, label_name) + label_path = path.join(processed_labels_dir, label_name) if not check_label(label_path): remove(image_path) else: @@ -77,38 +77,40 @@ def create_yaml(): lines.append(f'val: valid/images') lines.append('') - today_yaml = abspath(path.join('datasets', today_folder, 'data.yaml')) + today_yaml = abspath(path.join(today_dataset, 'data.yaml')) with open(today_yaml, 'w', encoding='utf-8') as f: f.writelines([f'{line}\n' for line in lines]) -def revert_to_current(date): +def revert_to_processed_data(date): def revert_dir(src_dir, dest_dir): for file in listdir(src_dir): s = path.join(src_dir, file) d = path.join(dest_dir, file) replace(s, d) - date_dataset = path.join('datasets', f'{prefix}{date}') - current_dataset = path.join('datasets', f'{prefix}current') + date_dataset = path.join(datasets_dir, f'{prefix}{date}') + makedirs(processed_images_dir, exist_ok=True) + makedirs(processed_labels_dir, exist_ok=True) for subset in ['test', 'train', 'valid']: - revert_dir(path.join(date_dataset, subset, 'images'), path.join(current_dataset, 'images')) - revert_dir(path.join(date_dataset, subset, 'labels'), path.join(current_dataset, 'labels')) + revert_dir(path.join(date_dataset, subset, 'images'), processed_images_dir) + revert_dir(path.join(date_dataset, subset, 'labels'), processed_labels_dir) shutil.rmtree(date_dataset) if __name__ == '__main__': # form_dataset() - # create_yaml() - m = latest_model or 'yolov10x.yaml' - print(f'Initial model: {m}') - model = YOLOv10(latest_model or 'yolov10x.yaml') + model_name = latest_model if path.isfile(latest_model) else 'yolov8m.yaml' + print(f'Initial model: {model_name}') + model = YOLO(model_name) + + # cur_folder = path.join(datasets_dir, f'{prefix}2024-06-18') - folder = f'{prefix}2024-06-18' - yaml = abspath(path.join('datasets', folder, 'data.yaml')) - results = model.train(data=yaml, epochs=100, batch=10, imgsz=640, save_period=1) + cur_folder = today_dataset + yaml = abspath(path.join(cur_folder, 'data.yaml')) + results = model.train(data=yaml, epochs=100, batch=55, imgsz=640, save_period=1) shutil.copy(f'{results.save_dir}/weights/best.pt', latest_model) - shutil.copytree(results.save_dir, f'models/{folder}') + shutil.copytree(results.save_dir, path.join(models_dir, cur_folder)) shutil.rmtree('runs') - shutil.rmtree('models/zombobase-latest') + shutil.rmtree(path.join(models_dir, f'{prefix}latest')) diff --git a/yolov10x.yaml b/yolov10x.yaml deleted file mode 100644 index 539af6b..0000000 --- a/yolov10x.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Parameters -nc: 50 # number of classes -scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' - # [depth, width, max_channels] - x: [1.00, 1.25, 512] - -# YOLOv8.0n backbone -backbone: - # [from, repeats, module, args] - - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - - [-1, 3, C2f, [128, True]] - - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - - [-1, 6, C2f, [256, True]] - - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16 - - [-1, 6, C2fCIB, [512, True]] - - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32 - - [-1, 3, C2fCIB, [1024, True]] - - [-1, 1, SPPF, [1024, 5]] # 9 - - [-1, 1, PSA, [1024]] # 10 - -# YOLOv8.0n head -head: - - [-1, 1, nn.Upsample, [None, 2, "nearest"]] - - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - - [-1, 3, C2fCIB, [512, True]] # 13 - - - [-1, 1, nn.Upsample, [None, 2, "nearest"]] - - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - - [-1, 3, C2f, [256]] # 16 (P3/8-small) - - - [-1, 1, Conv, [256, 3, 2]] - - [[-1, 13], 1, Concat, [1]] # cat head P4 - - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium) - - - [-1, 1, SCDown, [512, 3, 2]] - - [[-1, 10], 1, Concat, [1]] # cat head P5 - - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large) - - - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5) diff --git a/yolov8m.yaml b/yolov8m.yaml new file mode 100644 index 0000000..df8fdcf --- /dev/null +++ b/yolov8m.yaml @@ -0,0 +1,40 @@ +# Ultralytics YOLO 🚀, GPL-3.0 license + +# Parameters +nc: 50 # number of classes +depth_multiple: 0.67 # scales module repeats +width_multiple: 0.75 # scales convolution channels + +# YOLOv8.0m backbone +backbone: + # [from, repeats, module, args] + - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 + - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 + - [-1, 3, C2f, [128, True]] + - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 + - [-1, 6, C2f, [256, True]] + - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 + - [-1, 6, C2f, [512, True]] + - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 + - [-1, 3, C2f, [768, True]] + - [-1, 1, SPPF, [768, 5]] # 9 + +# YOLOv8.0m head +head: + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 6], 1, Concat, [1]] # cat backbone P4 + - [-1, 3, C2f, [512]] # 13 + + - [-1, 1, nn.Upsample, [None, 2, 'nearest']] + - [[-1, 4], 1, Concat, [1]] # cat backbone P3 + - [-1, 3, C2f, [256]] # 17 (P3/8-small) + + - [-1, 1, Conv, [256, 3, 2]] + - [[-1, 12], 1, Concat, [1]] # cat head P4 + - [-1, 3, C2f, [512]] # 20 (P4/16-medium) + + - [-1, 1, Conv, [512, 3, 2]] + - [[-1, 9], 1, Concat, [1]] # cat head P5 + - [-1, 3, C2f, [768]] # 23 (P5/32-large) + + - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)