From 6e5153ffb771968faa1a60fab28964f077694134 Mon Sep 17 00:00:00 2001 From: Alex Bezdieniezhnykh Date: Sat, 31 May 2025 14:50:58 +0300 Subject: [PATCH] prepare train.py to automated training set cryptography lib to the certain version add manual_run for manual operations, right now it is onnx conversion and upload --- annotation-queue/offset.yaml | 2 +- manual_run.py | 31 +++++++++++ preprocess-train.py | 8 --- requirements.txt | 2 +- train.py | 101 +++++++---------------------------- 5 files changed, 51 insertions(+), 93 deletions(-) create mode 100644 manual_run.py delete mode 100644 preprocess-train.py diff --git a/annotation-queue/offset.yaml b/annotation-queue/offset.yaml index 3815d7c..4d17e70 100644 --- a/annotation-queue/offset.yaml +++ b/annotation-queue/offset.yaml @@ -1 +1 @@ -offset_queue: 0 +offset_queue: 108 diff --git a/manual_run.py b/manual_run.py new file mode 100644 index 0000000..338a5c1 --- /dev/null +++ b/manual_run.py @@ -0,0 +1,31 @@ +import shutil +from datetime import datetime +from os import path + +from constants import models_dir, prefix, date_format, MODELS_FOLDER +from api_client import ApiClient +from augmentation import Augmentator +from exports import export_onnx +from security import Security + +# Augmentator().augment_annotations() +#train_dataset(from_scratch=True) + +# resume_training('/azaion/dev/ai-training/runs/detect/train12/weights/last.pt') + +result_dir = '/azaion/dev/ai-training/runs/detect/train12' +model_dir = path.join(models_dir, f'{prefix}2025-05-18') +shutil.copytree(result_dir, model_dir) + +model_path = path.join(models_dir, f'{prefix[:-1]}.pt') +shutil.copy(path.join(model_dir, 'weights', 'best.pt'), model_path) + +api_client = ApiClient() +onnx_path = export_onnx(model_path) +print(f'Conversion done: onnx path: {onnx_path}') + +with open(onnx_path, 'rb') as binary_file: + onnx_bytes = binary_file.read() + +key = Security.get_model_encryption_key() +api_client.upload_big_small_resource(onnx_bytes, onnx_path, MODELS_FOLDER, key) diff --git a/preprocess-train.py b/preprocess-train.py deleted file mode 100644 index 9216a9e..0000000 --- a/preprocess-train.py +++ /dev/null @@ -1,8 +0,0 @@ -from augmentation import Augmentator -from train import train_dataset, convert2rknn, resume_training - -# Augmentator().augment_annotations() -#train_dataset(from_scratch=True) - - -resume_training('/azaion/dev/ai-training/runs/detect/train12/weights/last.pt') diff --git a/requirements.txt b/requirements.txt index 7fad15e..0fe63ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ albumentations opencv-python matplotlib PyYAML -cryptography +cryptography==44.0.2 numpy==1.26.4 requests pyyaml diff --git a/train.py b/train.py index 346580a..5a1c638 100644 --- a/train.py +++ b/train.py @@ -37,33 +37,24 @@ old_images_percentage = 75 DEFAULT_CLASS_NUM = 80 total_files_copied = 0 -def form_dataset(from_date: datetime): - makedirs(today_dataset, exist_ok=True) + +def form_dataset(): + shutil.rmtree(today_dataset, ignore_errors=True) + makedirs(today_dataset) images = [] - old_images = [] with scandir(processed_images_dir) as imd: for image_file in imd: if not image_file.is_file(): continue - mod_time = datetime.fromtimestamp(image_file.stat().st_mtime).replace(hour=0, minute=0, second=0, microsecond=0) - if from_date is None: - images.append(image_file) - elif mod_time > from_date: - images.append(image_file) - else: # gather old images as well in order to avoid overfitting on the only new data. - old_images.append(image_file) - - random.shuffle(old_images) - old_images_size = int(len(old_images) * old_images_percentage / 100.0) - - print(f'Got {len(images)} new images and {old_images_size} of old images (to prevent overfitting). Shuffling them...') - images.extend(old_images[:old_images_size]) + images.append(image_file) + print(f'Got {len(images)} images. Start shuffling...') random.shuffle(images) train_size = int(len(images) * train_set / 100.0) valid_size = int(len(images) * valid_set / 100.0) + print(f'Start copying...') copy_annotations(images[:train_size], 'train') copy_annotations(images[train_size:train_size + valid_size], 'valid') copy_annotations(images[train_size + valid_size:], 'test') @@ -140,37 +131,6 @@ def create_yaml(): f.writelines([f'{line}\n' for line in lines]) -def revert_to_processed_data(date): - def revert_dir(src_dir, dest_dir): - for file in listdir(src_dir): - s = path.join(src_dir, file) - d = path.join(dest_dir, file) - replace(s, d) - date_dataset = path.join(datasets_dir, f'{prefix}{date}') - makedirs(processed_images_dir, exist_ok=True) - makedirs(processed_labels_dir, exist_ok=True) - for subset in ['test', 'train', 'valid']: - revert_dir(path.join(date_dataset, subset, 'images'), processed_images_dir) - revert_dir(path.join(date_dataset, subset, 'labels'), processed_labels_dir) - shutil.rmtree(date_dataset) - - -def get_latest_model(): - def convert(d: str): - if not d.startswith(prefix): - return None - dir_date = datetime.strptime(d.replace(prefix, ''), '%Y-%m-%d') - dir_model_path = path.join(models_dir, d, 'weights', 'best.pt') - return {'date': dir_date, 'path': dir_model_path} - - dates = [convert(d) for d in next(os.walk(models_dir))[1]] - dates = list(filter(lambda x : x is not None, dates)) - sorted_dates = list(sorted(dates, key=lambda x: x['date'] )) - if len(sorted_dates) == 0: - return None, None - last_model = sorted_dates[-1] - return last_model['date'], last_model['path'] - def resume_training(last_pt_path): model = YOLO(last_pt_path) @@ -183,63 +143,38 @@ def resume_training(last_pt_path): workers=24) -def train_dataset(existing_date=None, from_scratch=False): - latest_date, latest_model = get_latest_model() if not from_scratch else None, None - - if existing_date is not None: - cur_folder = f'{prefix}{existing_date}' - cur_dataset = path.join(datasets_dir, f'{prefix}{existing_date}') - else: - # if from_scratch and Path(today_dataset).exists(): - # shutil.rmtree(today_dataset) - # form_dataset(latest_date) - # create_yaml() - cur_folder = today_folder - cur_dataset = today_dataset - - model_name = latest_model if latest_model is not None and path.isfile(latest_model) and not from_scratch else 'yolo11m.yaml' - print(f'Initial model: {model_name}') +def train_dataset(): + form_dataset() + create_yaml() + model_name = 'yolo11m.yaml' model = YOLO(model_name) - yaml = abspath(path.join(cur_dataset, 'data.yaml')) - results = model.train(data=yaml, + results = model.train(data=abspath(path.join(today_dataset, 'data.yaml')), epochs=120, batch=11, imgsz=1280, save_period=1, workers=24) - model_dir = path.join(models_dir, cur_folder) + model_dir = path.join(models_dir, today_folder) shutil.copytree(results.save_dir, model_dir) model_path = path.join(models_dir, f'{prefix[:-1]}.pt') shutil.copy(path.join(model_dir, 'weights', 'best.pt'), model_path) - shutil.rmtree('runs') return model_path -def convert2rknn(): - subprocess.call(['bash', 'convert.sh'], cwd="./orangepi5") - latest_date, latest_model = get_latest_model() - model = YOLO(latest_model) - model.export(format="onnx") - pass - - def validate(model_path): model = YOLO(model_path) - metrics = model.val() - pass + print(model.val()) if __name__ == '__main__': - model_path = train_dataset(from_scratch=True) - # validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt')) - # form_data_sample(500) - # convert2rknn() - api_client = ApiClient() - onnx_path = export_onnx('azaion.pt') + model_path = train_dataset() + validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt')) + onnx_path = export_onnx(model_path) + api_client = ApiClient() with open(onnx_path, 'rb') as binary_file: onnx_bytes = binary_file.read()