mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-22 22:56:34 +00:00
copy images and labels during forming dataset. add folder for corrupted labels, small refactor
This commit is contained in:
+9
-4
@@ -1,21 +1,26 @@
|
|||||||
import os
|
import os
|
||||||
from dto.annotationClass import AnnotationClass
|
from dto.annotationClass import AnnotationClass
|
||||||
|
|
||||||
|
azaion = '/azaion'
|
||||||
prefix = 'azaion-'
|
prefix = 'azaion-'
|
||||||
images = 'images'
|
images = 'images'
|
||||||
labels = 'labels'
|
labels = 'labels'
|
||||||
|
|
||||||
data_dir = '/azaion/data/raw'
|
data_dir = os.path.join(azaion, 'data')
|
||||||
data_images_dir = os.path.join(data_dir, images)
|
data_images_dir = os.path.join(data_dir, images)
|
||||||
data_labels_dir = os.path.join(data_dir, labels)
|
data_labels_dir = os.path.join(data_dir, labels)
|
||||||
|
|
||||||
processed_dir = '/azaion/data/processed'
|
processed_dir = os.path.join(azaion, 'data-processed')
|
||||||
processed_images_dir = os.path.join(processed_dir, images)
|
processed_images_dir = os.path.join(processed_dir, images)
|
||||||
processed_labels_dir = os.path.join(processed_dir, labels)
|
processed_labels_dir = os.path.join(processed_dir, labels)
|
||||||
|
|
||||||
|
corrupted_dir = os.path.join(azaion, 'data-corrupted')
|
||||||
|
corrupted_images_dir = os.path.join(corrupted_dir, images)
|
||||||
|
corrupted_labels_dir = os.path.join(corrupted_dir, labels)
|
||||||
|
|
||||||
datasets_dir = '/azaion/datasets'
|
|
||||||
models_dir = '/azaion/models'
|
datasets_dir = os.path.join(azaion, 'datasets')
|
||||||
|
models_dir = os.path.join(azaion, 'models')
|
||||||
|
|
||||||
annotation_classes = AnnotationClass.read_json()
|
annotation_classes = AnnotationClass.read_json()
|
||||||
date_format = '%Y-%m-%d'
|
date_format = '%Y-%m-%d'
|
||||||
|
|||||||
+41
-19
@@ -6,25 +6,47 @@ from dto.imageLabel import ImageLabel
|
|||||||
from preprocessing import read_labels
|
from preprocessing import read_labels
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
from constants import datasets_dir, prefix
|
from constants import datasets_dir, prefix, processed_images_dir, processed_labels_dir
|
||||||
|
|
||||||
|
|
||||||
annotation_classes = AnnotationClass.read_json()
|
annotation_classes = AnnotationClass.read_json()
|
||||||
cur_dataset = os.path.join(datasets_dir, f'{prefix}2024-06-18', 'train')
|
|
||||||
images_dir = os.path.join(cur_dataset, 'images')
|
|
||||||
labels_dir = os.path.join(cur_dataset, 'labels')
|
|
||||||
|
|
||||||
for f in os.listdir(images_dir)[35247:]:
|
|
||||||
image_path = os.path.join(images_dir, f)
|
|
||||||
labels_path = os.path.join(labels_dir, f'{Path(f).stem}.txt')
|
|
||||||
img = ImageLabel(
|
|
||||||
image_path=image_path,
|
|
||||||
image=cv2.imread(image_path),
|
|
||||||
labels_path=labels_path,
|
|
||||||
labels=read_labels(labels_path)
|
|
||||||
)
|
|
||||||
img.visualize(annotation_classes)
|
|
||||||
print(f'visualizing {image_path}')
|
|
||||||
plt.close()
|
|
||||||
key = input('Press any key to continue')
|
|
||||||
|
|
||||||
|
def visualise_dataset():
|
||||||
|
cur_dataset = os.path.join(datasets_dir, f'{prefix}2024-06-18', 'train')
|
||||||
|
images_dir = os.path.join(cur_dataset, 'images')
|
||||||
|
labels_dir = os.path.join(cur_dataset, 'labels')
|
||||||
|
|
||||||
|
for f in os.listdir(images_dir)[35247:]:
|
||||||
|
image_path = os.path.join(images_dir, f)
|
||||||
|
labels_path = os.path.join(labels_dir, f'{Path(f).stem}.txt')
|
||||||
|
img = ImageLabel(
|
||||||
|
image_path=image_path,
|
||||||
|
image=cv2.imread(image_path),
|
||||||
|
labels_path=labels_path,
|
||||||
|
labels=read_labels(labels_path)
|
||||||
|
)
|
||||||
|
img.visualize(annotation_classes)
|
||||||
|
print(f'visualizing {image_path}')
|
||||||
|
plt.close()
|
||||||
|
key = input('Press any key to continue')
|
||||||
|
|
||||||
|
|
||||||
|
def visualise_processed_folder():
|
||||||
|
|
||||||
|
def show_image(img):
|
||||||
|
image_path = os.path.join(processed_images_dir, img)
|
||||||
|
labels_path = os.path.join(processed_labels_dir, f'{Path(img).stem}.txt')
|
||||||
|
img = ImageLabel(
|
||||||
|
image_path=image_path,
|
||||||
|
image=cv2.imread(image_path),
|
||||||
|
labels_path=labels_path,
|
||||||
|
labels=read_labels(labels_path)
|
||||||
|
)
|
||||||
|
img.visualize(annotation_classes)
|
||||||
|
images = os.listdir(processed_images_dir)
|
||||||
|
cur = 0
|
||||||
|
show_image(images[cur])
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
visualise_processed_folder()
|
||||||
|
|||||||
+5
-26
@@ -50,13 +50,10 @@ def image_processing(img_ann: ImageLabel) -> [ImageLabel]:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def write_result(img_ann: ImageLabel, show_image=False):
|
def write_result(img_ann: ImageLabel):
|
||||||
os.makedirs(os.path.dirname(img_ann.image_path), exist_ok=True)
|
os.makedirs(os.path.dirname(img_ann.image_path), exist_ok=True)
|
||||||
os.makedirs(os.path.dirname(img_ann.labels_path), exist_ok=True)
|
os.makedirs(os.path.dirname(img_ann.labels_path), exist_ok=True)
|
||||||
|
|
||||||
if show_image:
|
|
||||||
img_ann.visualize(annotation_classes)
|
|
||||||
|
|
||||||
cv2.imencode('.jpg', img_ann.image)[1].tofile(img_ann.image_path)
|
cv2.imencode('.jpg', img_ann.image)[1].tofile(img_ann.image_path)
|
||||||
print(f'{img_ann.image_path} written')
|
print(f'{img_ann.image_path} written')
|
||||||
|
|
||||||
@@ -92,28 +89,16 @@ def process_image(img_ann):
|
|||||||
image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name),
|
image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name),
|
||||||
labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name)
|
labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name)
|
||||||
))
|
))
|
||||||
# os.remove(img_ann.image_path)
|
|
||||||
# os.remove(img_ann.labels_path)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
checkpoint = datetime.now() - timedelta(days=720)
|
|
||||||
try:
|
|
||||||
with open(checkpoint_file, 'r') as f:
|
|
||||||
checkpoint = datetime.strptime(f.read(), checkpoint_date_format)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
last_date = checkpoint
|
|
||||||
while True:
|
while True:
|
||||||
|
processed_images = set(f.name for f in os.scandir(processed_images_dir))
|
||||||
images = []
|
images = []
|
||||||
with os.scandir(data_images_dir) as imd:
|
with os.scandir(data_images_dir) as imd:
|
||||||
for image_file in imd:
|
for image_file in imd:
|
||||||
if not image_file.is_file():
|
if image_file.is_file() and image_file.name not in processed_images:
|
||||||
continue
|
|
||||||
mod_time = datetime.fromtimestamp(image_file.stat().st_mtime)
|
|
||||||
if mod_time > checkpoint:
|
|
||||||
images.append(image_file)
|
images.append(image_file)
|
||||||
last_date = max(last_date, mod_time)
|
|
||||||
|
|
||||||
for image_file in images:
|
for image_file in images:
|
||||||
try:
|
try:
|
||||||
@@ -128,14 +113,8 @@ def main():
|
|||||||
))
|
))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'Error appeared {e}')
|
print(f'Error appeared {e}')
|
||||||
if last_date != checkpoint:
|
print('All processed, waiting for 2 minutes...')
|
||||||
checkpoint = last_date
|
time.sleep(120)
|
||||||
try:
|
|
||||||
with open(checkpoint_file, 'w') as f:
|
|
||||||
f.write(datetime.strftime(checkpoint, checkpoint_date_format))
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
time.sleep(5)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
@@ -1,12 +1,17 @@
|
|||||||
from os import path, replace, remove, listdir, makedirs
|
import random
|
||||||
|
from os import path, replace, remove, listdir, makedirs, scandir
|
||||||
from os.path import abspath
|
from os.path import abspath
|
||||||
import shutil
|
import shutil
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from ultralytics import YOLO
|
from ultralytics import YOLO
|
||||||
from constants import processed_images_dir, processed_labels_dir, annotation_classes, prefix, date_format, datasets_dir, models_dir
|
from constants import (processed_images_dir,
|
||||||
|
processed_labels_dir,
|
||||||
|
annotation_classes,
|
||||||
|
prefix, date_format,
|
||||||
|
datasets_dir, models_dir,
|
||||||
|
corrupted_images_dir, corrupted_labels_dir)
|
||||||
|
|
||||||
latest_model = path.join(models_dir, f'{prefix}latest.pt')
|
|
||||||
today_folder = f'{prefix}{datetime.now():{date_format}}'
|
today_folder = f'{prefix}{datetime.now():{date_format}}'
|
||||||
today_dataset = path.join(datasets_dir, today_folder)
|
today_dataset = path.join(datasets_dir, today_folder)
|
||||||
train_set = 70
|
train_set = 70
|
||||||
@@ -14,38 +19,61 @@ valid_set = 20
|
|||||||
test_set = 10
|
test_set = 10
|
||||||
|
|
||||||
|
|
||||||
def form_dataset():
|
def form_dataset(set_date: datetime):
|
||||||
makedirs(today_dataset, exist_ok=True)
|
makedirs(today_dataset, exist_ok=True)
|
||||||
images = listdir(processed_images_dir)
|
images = []
|
||||||
|
with scandir(processed_images_dir) as imd:
|
||||||
|
for image_file in imd:
|
||||||
|
if not image_file.is_file():
|
||||||
|
continue
|
||||||
|
mod_time = datetime.fromtimestamp(image_file.stat().st_mtime)
|
||||||
|
if set_date is None:
|
||||||
|
images.append(image_file)
|
||||||
|
elif mod_time > set_date:
|
||||||
|
images.append(image_file)
|
||||||
|
|
||||||
|
print('shuffling images')
|
||||||
|
random.shuffle(images)
|
||||||
|
|
||||||
train_size = int(len(images) * train_set / 100.0)
|
train_size = int(len(images) * train_set / 100.0)
|
||||||
valid_size = int(len(images) * valid_set / 100.0)
|
valid_size = int(len(images) * valid_set / 100.0)
|
||||||
|
|
||||||
move_annotations(images[:train_size], 'train')
|
print(f'copy train dataset, size: {train_size} annotations')
|
||||||
move_annotations(images[train_size:train_size + valid_size], 'valid')
|
copy_annotations(images[:train_size], 'train')
|
||||||
move_annotations(images[train_size + valid_size:], 'test')
|
|
||||||
|
|
||||||
|
print(f'copy valid set, size: {valid_size} annotations')
|
||||||
|
copy_annotations(images[train_size:train_size + valid_size], 'valid')
|
||||||
|
|
||||||
|
print(f'copy test set, size: {len(images) - train_size - valid_size} annotations')
|
||||||
|
copy_annotations(images[train_size + valid_size:], 'test')
|
||||||
|
|
||||||
|
print('creating yaml...')
|
||||||
create_yaml()
|
create_yaml()
|
||||||
|
|
||||||
|
|
||||||
def move_annotations(images, folder):
|
def copy_annotations(images, folder):
|
||||||
destination_images = path.join(today_dataset, folder, 'images')
|
destination_images = path.join(today_dataset, folder, 'images')
|
||||||
makedirs(destination_images, exist_ok=True)
|
makedirs(destination_images, exist_ok=True)
|
||||||
|
|
||||||
destination_labels = path.join(today_dataset, folder, 'labels')
|
destination_labels = path.join(today_dataset, folder, 'labels')
|
||||||
makedirs(destination_labels, exist_ok=True)
|
makedirs(destination_labels, exist_ok=True)
|
||||||
for image_name in images:
|
|
||||||
image_path = path.join(processed_images_dir, image_name)
|
makedirs(corrupted_images_dir, exist_ok=True)
|
||||||
label_name = f'{Path(image_name).stem}.txt'
|
makedirs(corrupted_labels_dir, exist_ok=True)
|
||||||
|
|
||||||
|
for image in images:
|
||||||
|
label_name = f'{Path(image.path).stem}.txt'
|
||||||
label_path = path.join(processed_labels_dir, label_name)
|
label_path = path.join(processed_labels_dir, label_name)
|
||||||
if not check_label(label_path):
|
if check_label(label_path):
|
||||||
remove(image_path)
|
shutil.copy(image.path, path.join(destination_images, image.name))
|
||||||
|
shutil.copy(label_path, path.join(destination_labels, label_name))
|
||||||
else:
|
else:
|
||||||
replace(image_path, path.join(destination_images, image_name))
|
shutil.copy(image.path, path.join(corrupted_images_dir, image.name))
|
||||||
replace(label_path, path.join(destination_labels, label_name))
|
shutil.copy(label_path, path.join(corrupted_labels_dir, label_name))
|
||||||
|
print(f'Label {label_path} is corrupted! Copy with its image to the corrupted directory ({corrupted_labels_dir})')
|
||||||
|
|
||||||
|
|
||||||
def check_label(label_path):
|
def check_label(label_path):
|
||||||
lines_edited = False
|
|
||||||
if not path.exists(label_path):
|
if not path.exists(label_path):
|
||||||
return False
|
return False
|
||||||
with open(label_path, 'r') as f:
|
with open(label_path, 'r') as f:
|
||||||
@@ -53,16 +81,7 @@ def check_label(label_path):
|
|||||||
for line in lines:
|
for line in lines:
|
||||||
for val in line.split(' ')[1:]:
|
for val in line.split(' ')[1:]:
|
||||||
if float(val) > 1:
|
if float(val) > 1:
|
||||||
lines.remove(line)
|
return False
|
||||||
lines_edited = True
|
|
||||||
if len(lines) == 0:
|
|
||||||
return False
|
|
||||||
if not lines_edited:
|
|
||||||
return True
|
|
||||||
|
|
||||||
with open(label_path, 'w') as label_write:
|
|
||||||
label_write.writelines(lines)
|
|
||||||
label_write.close()
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@@ -97,10 +116,25 @@ def revert_to_processed_data(date):
|
|||||||
shutil.rmtree(date_dataset)
|
shutil.rmtree(date_dataset)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def get_latest_model():
|
||||||
# form_dataset()
|
def convert(d: str):
|
||||||
|
dir_date = datetime.strptime(d.replace(prefix, ''), '%Y-%m-%d')
|
||||||
|
dir_model_path = path.join(models_dir, d, 'weights', 'best.pt')
|
||||||
|
return {'date': dir_date, 'path': dir_model_path}
|
||||||
|
|
||||||
model_name = latest_model if path.isfile(latest_model) else 'yolov8m.yaml'
|
dates = [convert(d) for d in listdir(models_dir)]
|
||||||
|
sorted_dates = list(sorted(dates, key=lambda x: x['date']))
|
||||||
|
if len(sorted_dates) == 0:
|
||||||
|
return None, None
|
||||||
|
last_model = sorted_dates[-1]
|
||||||
|
return last_model['date'], last_model['path']
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
latest_date, latest_model = get_latest_model()
|
||||||
|
# form_dataset(latest_date)
|
||||||
|
|
||||||
|
model_name = latest_model if latest_model is not None and path.isfile(latest_model) else 'yolov8m.yaml'
|
||||||
print(f'Initial model: {model_name}')
|
print(f'Initial model: {model_name}')
|
||||||
model = YOLO(model_name)
|
model = YOLO(model_name)
|
||||||
|
|
||||||
@@ -108,7 +142,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
cur_folder = today_dataset
|
cur_folder = today_dataset
|
||||||
yaml = abspath(path.join(cur_folder, 'data.yaml'))
|
yaml = abspath(path.join(cur_folder, 'data.yaml'))
|
||||||
results = model.train(data=yaml, epochs=100, batch=55, imgsz=640, save_period=1)
|
results = model.train(data=yaml, epochs=100, batch=60, imgsz=640, save_period=1)
|
||||||
|
|
||||||
shutil.copy(f'{results.save_dir}/weights/best.pt', latest_model)
|
shutil.copy(f'{results.save_dir}/weights/best.pt', latest_model)
|
||||||
shutil.copytree(results.save_dir, path.join(models_dir, cur_folder))
|
shutil.copytree(results.save_dir, path.join(models_dir, cur_folder))
|
||||||
|
|||||||
Reference in New Issue
Block a user