copy images and labels during forming dataset. add folder for corrupted labels, small refactor

This commit is contained in:
zxsanny
2024-08-13 01:57:42 +03:00
parent bb1dbfe1e7
commit 6b0a0e678e
4 changed files with 120 additions and 80 deletions
+9 -4
View File
@@ -1,21 +1,26 @@
import os import os
from dto.annotationClass import AnnotationClass from dto.annotationClass import AnnotationClass
azaion = '/azaion'
prefix = 'azaion-' prefix = 'azaion-'
images = 'images' images = 'images'
labels = 'labels' labels = 'labels'
data_dir = '/azaion/data/raw' data_dir = os.path.join(azaion, 'data')
data_images_dir = os.path.join(data_dir, images) data_images_dir = os.path.join(data_dir, images)
data_labels_dir = os.path.join(data_dir, labels) data_labels_dir = os.path.join(data_dir, labels)
processed_dir = '/azaion/data/processed' processed_dir = os.path.join(azaion, 'data-processed')
processed_images_dir = os.path.join(processed_dir, images) processed_images_dir = os.path.join(processed_dir, images)
processed_labels_dir = os.path.join(processed_dir, labels) processed_labels_dir = os.path.join(processed_dir, labels)
corrupted_dir = os.path.join(azaion, 'data-corrupted')
corrupted_images_dir = os.path.join(corrupted_dir, images)
corrupted_labels_dir = os.path.join(corrupted_dir, labels)
datasets_dir = '/azaion/datasets'
models_dir = '/azaion/models' datasets_dir = os.path.join(azaion, 'datasets')
models_dir = os.path.join(azaion, 'models')
annotation_classes = AnnotationClass.read_json() annotation_classes = AnnotationClass.read_json()
date_format = '%Y-%m-%d' date_format = '%Y-%m-%d'
+41 -19
View File
@@ -6,25 +6,47 @@ from dto.imageLabel import ImageLabel
from preprocessing import read_labels from preprocessing import read_labels
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from constants import datasets_dir, prefix from constants import datasets_dir, prefix, processed_images_dir, processed_labels_dir
annotation_classes = AnnotationClass.read_json() annotation_classes = AnnotationClass.read_json()
cur_dataset = os.path.join(datasets_dir, f'{prefix}2024-06-18', 'train')
images_dir = os.path.join(cur_dataset, 'images')
labels_dir = os.path.join(cur_dataset, 'labels')
for f in os.listdir(images_dir)[35247:]:
image_path = os.path.join(images_dir, f)
labels_path = os.path.join(labels_dir, f'{Path(f).stem}.txt')
img = ImageLabel(
image_path=image_path,
image=cv2.imread(image_path),
labels_path=labels_path,
labels=read_labels(labels_path)
)
img.visualize(annotation_classes)
print(f'visualizing {image_path}')
plt.close()
key = input('Press any key to continue')
def visualise_dataset():
cur_dataset = os.path.join(datasets_dir, f'{prefix}2024-06-18', 'train')
images_dir = os.path.join(cur_dataset, 'images')
labels_dir = os.path.join(cur_dataset, 'labels')
for f in os.listdir(images_dir)[35247:]:
image_path = os.path.join(images_dir, f)
labels_path = os.path.join(labels_dir, f'{Path(f).stem}.txt')
img = ImageLabel(
image_path=image_path,
image=cv2.imread(image_path),
labels_path=labels_path,
labels=read_labels(labels_path)
)
img.visualize(annotation_classes)
print(f'visualizing {image_path}')
plt.close()
key = input('Press any key to continue')
def visualise_processed_folder():
def show_image(img):
image_path = os.path.join(processed_images_dir, img)
labels_path = os.path.join(processed_labels_dir, f'{Path(img).stem}.txt')
img = ImageLabel(
image_path=image_path,
image=cv2.imread(image_path),
labels_path=labels_path,
labels=read_labels(labels_path)
)
img.visualize(annotation_classes)
images = os.listdir(processed_images_dir)
cur = 0
show_image(images[cur])
pass
if __name__ == '__main__':
visualise_processed_folder()
+5 -26
View File
@@ -50,13 +50,10 @@ def image_processing(img_ann: ImageLabel) -> [ImageLabel]:
return results return results
def write_result(img_ann: ImageLabel, show_image=False): def write_result(img_ann: ImageLabel):
os.makedirs(os.path.dirname(img_ann.image_path), exist_ok=True) os.makedirs(os.path.dirname(img_ann.image_path), exist_ok=True)
os.makedirs(os.path.dirname(img_ann.labels_path), exist_ok=True) os.makedirs(os.path.dirname(img_ann.labels_path), exist_ok=True)
if show_image:
img_ann.visualize(annotation_classes)
cv2.imencode('.jpg', img_ann.image)[1].tofile(img_ann.image_path) cv2.imencode('.jpg', img_ann.image)[1].tofile(img_ann.image_path)
print(f'{img_ann.image_path} written') print(f'{img_ann.image_path} written')
@@ -92,28 +89,16 @@ def process_image(img_ann):
image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name), image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name),
labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name) labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name)
)) ))
# os.remove(img_ann.image_path)
# os.remove(img_ann.labels_path)
def main(): def main():
checkpoint = datetime.now() - timedelta(days=720)
try:
with open(checkpoint_file, 'r') as f:
checkpoint = datetime.strptime(f.read(), checkpoint_date_format)
except:
pass
last_date = checkpoint
while True: while True:
processed_images = set(f.name for f in os.scandir(processed_images_dir))
images = [] images = []
with os.scandir(data_images_dir) as imd: with os.scandir(data_images_dir) as imd:
for image_file in imd: for image_file in imd:
if not image_file.is_file(): if image_file.is_file() and image_file.name not in processed_images:
continue
mod_time = datetime.fromtimestamp(image_file.stat().st_mtime)
if mod_time > checkpoint:
images.append(image_file) images.append(image_file)
last_date = max(last_date, mod_time)
for image_file in images: for image_file in images:
try: try:
@@ -128,14 +113,8 @@ def main():
)) ))
except Exception as e: except Exception as e:
print(f'Error appeared {e}') print(f'Error appeared {e}')
if last_date != checkpoint: print('All processed, waiting for 2 minutes...')
checkpoint = last_date time.sleep(120)
try:
with open(checkpoint_file, 'w') as f:
f.write(datetime.strftime(checkpoint, checkpoint_date_format))
except:
pass
time.sleep(5)
if __name__ == '__main__': if __name__ == '__main__':
+65 -31
View File
@@ -1,12 +1,17 @@
from os import path, replace, remove, listdir, makedirs import random
from os import path, replace, remove, listdir, makedirs, scandir
from os.path import abspath from os.path import abspath
import shutil import shutil
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from ultralytics import YOLO from ultralytics import YOLO
from constants import processed_images_dir, processed_labels_dir, annotation_classes, prefix, date_format, datasets_dir, models_dir from constants import (processed_images_dir,
processed_labels_dir,
annotation_classes,
prefix, date_format,
datasets_dir, models_dir,
corrupted_images_dir, corrupted_labels_dir)
latest_model = path.join(models_dir, f'{prefix}latest.pt')
today_folder = f'{prefix}{datetime.now():{date_format}}' today_folder = f'{prefix}{datetime.now():{date_format}}'
today_dataset = path.join(datasets_dir, today_folder) today_dataset = path.join(datasets_dir, today_folder)
train_set = 70 train_set = 70
@@ -14,38 +19,61 @@ valid_set = 20
test_set = 10 test_set = 10
def form_dataset(): def form_dataset(set_date: datetime):
makedirs(today_dataset, exist_ok=True) makedirs(today_dataset, exist_ok=True)
images = listdir(processed_images_dir) images = []
with scandir(processed_images_dir) as imd:
for image_file in imd:
if not image_file.is_file():
continue
mod_time = datetime.fromtimestamp(image_file.stat().st_mtime)
if set_date is None:
images.append(image_file)
elif mod_time > set_date:
images.append(image_file)
print('shuffling images')
random.shuffle(images)
train_size = int(len(images) * train_set / 100.0) train_size = int(len(images) * train_set / 100.0)
valid_size = int(len(images) * valid_set / 100.0) valid_size = int(len(images) * valid_set / 100.0)
move_annotations(images[:train_size], 'train') print(f'copy train dataset, size: {train_size} annotations')
move_annotations(images[train_size:train_size + valid_size], 'valid') copy_annotations(images[:train_size], 'train')
move_annotations(images[train_size + valid_size:], 'test')
print(f'copy valid set, size: {valid_size} annotations')
copy_annotations(images[train_size:train_size + valid_size], 'valid')
print(f'copy test set, size: {len(images) - train_size - valid_size} annotations')
copy_annotations(images[train_size + valid_size:], 'test')
print('creating yaml...')
create_yaml() create_yaml()
def move_annotations(images, folder): def copy_annotations(images, folder):
destination_images = path.join(today_dataset, folder, 'images') destination_images = path.join(today_dataset, folder, 'images')
makedirs(destination_images, exist_ok=True) makedirs(destination_images, exist_ok=True)
destination_labels = path.join(today_dataset, folder, 'labels') destination_labels = path.join(today_dataset, folder, 'labels')
makedirs(destination_labels, exist_ok=True) makedirs(destination_labels, exist_ok=True)
for image_name in images:
image_path = path.join(processed_images_dir, image_name) makedirs(corrupted_images_dir, exist_ok=True)
label_name = f'{Path(image_name).stem}.txt' makedirs(corrupted_labels_dir, exist_ok=True)
for image in images:
label_name = f'{Path(image.path).stem}.txt'
label_path = path.join(processed_labels_dir, label_name) label_path = path.join(processed_labels_dir, label_name)
if not check_label(label_path): if check_label(label_path):
remove(image_path) shutil.copy(image.path, path.join(destination_images, image.name))
shutil.copy(label_path, path.join(destination_labels, label_name))
else: else:
replace(image_path, path.join(destination_images, image_name)) shutil.copy(image.path, path.join(corrupted_images_dir, image.name))
replace(label_path, path.join(destination_labels, label_name)) shutil.copy(label_path, path.join(corrupted_labels_dir, label_name))
print(f'Label {label_path} is corrupted! Copy with its image to the corrupted directory ({corrupted_labels_dir})')
def check_label(label_path): def check_label(label_path):
lines_edited = False
if not path.exists(label_path): if not path.exists(label_path):
return False return False
with open(label_path, 'r') as f: with open(label_path, 'r') as f:
@@ -53,16 +81,7 @@ def check_label(label_path):
for line in lines: for line in lines:
for val in line.split(' ')[1:]: for val in line.split(' ')[1:]:
if float(val) > 1: if float(val) > 1:
lines.remove(line) return False
lines_edited = True
if len(lines) == 0:
return False
if not lines_edited:
return True
with open(label_path, 'w') as label_write:
label_write.writelines(lines)
label_write.close()
return True return True
@@ -97,10 +116,25 @@ def revert_to_processed_data(date):
shutil.rmtree(date_dataset) shutil.rmtree(date_dataset)
if __name__ == '__main__': def get_latest_model():
# form_dataset() def convert(d: str):
dir_date = datetime.strptime(d.replace(prefix, ''), '%Y-%m-%d')
dir_model_path = path.join(models_dir, d, 'weights', 'best.pt')
return {'date': dir_date, 'path': dir_model_path}
model_name = latest_model if path.isfile(latest_model) else 'yolov8m.yaml' dates = [convert(d) for d in listdir(models_dir)]
sorted_dates = list(sorted(dates, key=lambda x: x['date']))
if len(sorted_dates) == 0:
return None, None
last_model = sorted_dates[-1]
return last_model['date'], last_model['path']
if __name__ == '__main__':
latest_date, latest_model = get_latest_model()
# form_dataset(latest_date)
model_name = latest_model if latest_model is not None and path.isfile(latest_model) else 'yolov8m.yaml'
print(f'Initial model: {model_name}') print(f'Initial model: {model_name}')
model = YOLO(model_name) model = YOLO(model_name)
@@ -108,7 +142,7 @@ if __name__ == '__main__':
cur_folder = today_dataset cur_folder = today_dataset
yaml = abspath(path.join(cur_folder, 'data.yaml')) yaml = abspath(path.join(cur_folder, 'data.yaml'))
results = model.train(data=yaml, epochs=100, batch=55, imgsz=640, save_period=1) results = model.train(data=yaml, epochs=100, batch=60, imgsz=640, save_period=1)
shutil.copy(f'{results.save_dir}/weights/best.pt', latest_model) shutil.copy(f'{results.save_dir}/weights/best.pt', latest_model)
shutil.copytree(results.save_dir, path.join(models_dir, cur_folder)) shutil.copytree(results.save_dir, path.join(models_dir, cur_folder))