mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-23 03:46:35 +00:00
prepare train.py to automated training
set cryptography lib to the certain version add manual_run for manual operations, right now it is onnx conversion and upload
This commit is contained in:
@@ -1 +1 @@
|
|||||||
offset_queue: 0
|
offset_queue: 108
|
||||||
|
|||||||
@@ -0,0 +1,31 @@
|
|||||||
|
import shutil
|
||||||
|
from datetime import datetime
|
||||||
|
from os import path
|
||||||
|
|
||||||
|
from constants import models_dir, prefix, date_format, MODELS_FOLDER
|
||||||
|
from api_client import ApiClient
|
||||||
|
from augmentation import Augmentator
|
||||||
|
from exports import export_onnx
|
||||||
|
from security import Security
|
||||||
|
|
||||||
|
# Augmentator().augment_annotations()
|
||||||
|
#train_dataset(from_scratch=True)
|
||||||
|
|
||||||
|
# resume_training('/azaion/dev/ai-training/runs/detect/train12/weights/last.pt')
|
||||||
|
|
||||||
|
result_dir = '/azaion/dev/ai-training/runs/detect/train12'
|
||||||
|
model_dir = path.join(models_dir, f'{prefix}2025-05-18')
|
||||||
|
shutil.copytree(result_dir, model_dir)
|
||||||
|
|
||||||
|
model_path = path.join(models_dir, f'{prefix[:-1]}.pt')
|
||||||
|
shutil.copy(path.join(model_dir, 'weights', 'best.pt'), model_path)
|
||||||
|
|
||||||
|
api_client = ApiClient()
|
||||||
|
onnx_path = export_onnx(model_path)
|
||||||
|
print(f'Conversion done: onnx path: {onnx_path}')
|
||||||
|
|
||||||
|
with open(onnx_path, 'rb') as binary_file:
|
||||||
|
onnx_bytes = binary_file.read()
|
||||||
|
|
||||||
|
key = Security.get_model_encryption_key()
|
||||||
|
api_client.upload_big_small_resource(onnx_bytes, onnx_path, MODELS_FOLDER, key)
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
from augmentation import Augmentator
|
|
||||||
from train import train_dataset, convert2rknn, resume_training
|
|
||||||
|
|
||||||
# Augmentator().augment_annotations()
|
|
||||||
#train_dataset(from_scratch=True)
|
|
||||||
|
|
||||||
|
|
||||||
resume_training('/azaion/dev/ai-training/runs/detect/train12/weights/last.pt')
|
|
||||||
+1
-1
@@ -8,7 +8,7 @@ albumentations
|
|||||||
opencv-python
|
opencv-python
|
||||||
matplotlib
|
matplotlib
|
||||||
PyYAML
|
PyYAML
|
||||||
cryptography
|
cryptography==44.0.2
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
requests
|
requests
|
||||||
pyyaml
|
pyyaml
|
||||||
|
|||||||
@@ -37,33 +37,24 @@ old_images_percentage = 75
|
|||||||
DEFAULT_CLASS_NUM = 80
|
DEFAULT_CLASS_NUM = 80
|
||||||
total_files_copied = 0
|
total_files_copied = 0
|
||||||
|
|
||||||
def form_dataset(from_date: datetime):
|
|
||||||
makedirs(today_dataset, exist_ok=True)
|
def form_dataset():
|
||||||
|
shutil.rmtree(today_dataset, ignore_errors=True)
|
||||||
|
makedirs(today_dataset)
|
||||||
images = []
|
images = []
|
||||||
old_images = []
|
|
||||||
with scandir(processed_images_dir) as imd:
|
with scandir(processed_images_dir) as imd:
|
||||||
for image_file in imd:
|
for image_file in imd:
|
||||||
if not image_file.is_file():
|
if not image_file.is_file():
|
||||||
continue
|
continue
|
||||||
mod_time = datetime.fromtimestamp(image_file.stat().st_mtime).replace(hour=0, minute=0, second=0, microsecond=0)
|
|
||||||
if from_date is None:
|
|
||||||
images.append(image_file)
|
images.append(image_file)
|
||||||
elif mod_time > from_date:
|
|
||||||
images.append(image_file)
|
|
||||||
else: # gather old images as well in order to avoid overfitting on the only new data.
|
|
||||||
old_images.append(image_file)
|
|
||||||
|
|
||||||
random.shuffle(old_images)
|
|
||||||
old_images_size = int(len(old_images) * old_images_percentage / 100.0)
|
|
||||||
|
|
||||||
print(f'Got {len(images)} new images and {old_images_size} of old images (to prevent overfitting). Shuffling them...')
|
|
||||||
images.extend(old_images[:old_images_size])
|
|
||||||
|
|
||||||
|
print(f'Got {len(images)} images. Start shuffling...')
|
||||||
random.shuffle(images)
|
random.shuffle(images)
|
||||||
|
|
||||||
train_size = int(len(images) * train_set / 100.0)
|
train_size = int(len(images) * train_set / 100.0)
|
||||||
valid_size = int(len(images) * valid_set / 100.0)
|
valid_size = int(len(images) * valid_set / 100.0)
|
||||||
|
|
||||||
|
print(f'Start copying...')
|
||||||
copy_annotations(images[:train_size], 'train')
|
copy_annotations(images[:train_size], 'train')
|
||||||
copy_annotations(images[train_size:train_size + valid_size], 'valid')
|
copy_annotations(images[train_size:train_size + valid_size], 'valid')
|
||||||
copy_annotations(images[train_size + valid_size:], 'test')
|
copy_annotations(images[train_size + valid_size:], 'test')
|
||||||
@@ -140,37 +131,6 @@ def create_yaml():
|
|||||||
f.writelines([f'{line}\n' for line in lines])
|
f.writelines([f'{line}\n' for line in lines])
|
||||||
|
|
||||||
|
|
||||||
def revert_to_processed_data(date):
|
|
||||||
def revert_dir(src_dir, dest_dir):
|
|
||||||
for file in listdir(src_dir):
|
|
||||||
s = path.join(src_dir, file)
|
|
||||||
d = path.join(dest_dir, file)
|
|
||||||
replace(s, d)
|
|
||||||
date_dataset = path.join(datasets_dir, f'{prefix}{date}')
|
|
||||||
makedirs(processed_images_dir, exist_ok=True)
|
|
||||||
makedirs(processed_labels_dir, exist_ok=True)
|
|
||||||
for subset in ['test', 'train', 'valid']:
|
|
||||||
revert_dir(path.join(date_dataset, subset, 'images'), processed_images_dir)
|
|
||||||
revert_dir(path.join(date_dataset, subset, 'labels'), processed_labels_dir)
|
|
||||||
shutil.rmtree(date_dataset)
|
|
||||||
|
|
||||||
|
|
||||||
def get_latest_model():
|
|
||||||
def convert(d: str):
|
|
||||||
if not d.startswith(prefix):
|
|
||||||
return None
|
|
||||||
dir_date = datetime.strptime(d.replace(prefix, ''), '%Y-%m-%d')
|
|
||||||
dir_model_path = path.join(models_dir, d, 'weights', 'best.pt')
|
|
||||||
return {'date': dir_date, 'path': dir_model_path}
|
|
||||||
|
|
||||||
dates = [convert(d) for d in next(os.walk(models_dir))[1]]
|
|
||||||
dates = list(filter(lambda x : x is not None, dates))
|
|
||||||
sorted_dates = list(sorted(dates, key=lambda x: x['date'] ))
|
|
||||||
if len(sorted_dates) == 0:
|
|
||||||
return None, None
|
|
||||||
last_model = sorted_dates[-1]
|
|
||||||
return last_model['date'], last_model['path']
|
|
||||||
|
|
||||||
|
|
||||||
def resume_training(last_pt_path):
|
def resume_training(last_pt_path):
|
||||||
model = YOLO(last_pt_path)
|
model = YOLO(last_pt_path)
|
||||||
@@ -183,63 +143,38 @@ def resume_training(last_pt_path):
|
|||||||
workers=24)
|
workers=24)
|
||||||
|
|
||||||
|
|
||||||
def train_dataset(existing_date=None, from_scratch=False):
|
def train_dataset():
|
||||||
latest_date, latest_model = get_latest_model() if not from_scratch else None, None
|
form_dataset()
|
||||||
|
create_yaml()
|
||||||
if existing_date is not None:
|
model_name = 'yolo11m.yaml'
|
||||||
cur_folder = f'{prefix}{existing_date}'
|
|
||||||
cur_dataset = path.join(datasets_dir, f'{prefix}{existing_date}')
|
|
||||||
else:
|
|
||||||
# if from_scratch and Path(today_dataset).exists():
|
|
||||||
# shutil.rmtree(today_dataset)
|
|
||||||
# form_dataset(latest_date)
|
|
||||||
# create_yaml()
|
|
||||||
cur_folder = today_folder
|
|
||||||
cur_dataset = today_dataset
|
|
||||||
|
|
||||||
model_name = latest_model if latest_model is not None and path.isfile(latest_model) and not from_scratch else 'yolo11m.yaml'
|
|
||||||
print(f'Initial model: {model_name}')
|
|
||||||
model = YOLO(model_name)
|
model = YOLO(model_name)
|
||||||
|
|
||||||
yaml = abspath(path.join(cur_dataset, 'data.yaml'))
|
results = model.train(data=abspath(path.join(today_dataset, 'data.yaml')),
|
||||||
results = model.train(data=yaml,
|
|
||||||
epochs=120,
|
epochs=120,
|
||||||
batch=11,
|
batch=11,
|
||||||
imgsz=1280,
|
imgsz=1280,
|
||||||
save_period=1,
|
save_period=1,
|
||||||
workers=24)
|
workers=24)
|
||||||
|
|
||||||
model_dir = path.join(models_dir, cur_folder)
|
model_dir = path.join(models_dir, today_folder)
|
||||||
shutil.copytree(results.save_dir, model_dir)
|
shutil.copytree(results.save_dir, model_dir)
|
||||||
|
|
||||||
model_path = path.join(models_dir, f'{prefix[:-1]}.pt')
|
model_path = path.join(models_dir, f'{prefix[:-1]}.pt')
|
||||||
shutil.copy(path.join(model_dir, 'weights', 'best.pt'), model_path)
|
shutil.copy(path.join(model_dir, 'weights', 'best.pt'), model_path)
|
||||||
shutil.rmtree('runs')
|
|
||||||
return model_path
|
return model_path
|
||||||
|
|
||||||
|
|
||||||
def convert2rknn():
|
|
||||||
subprocess.call(['bash', 'convert.sh'], cwd="./orangepi5")
|
|
||||||
latest_date, latest_model = get_latest_model()
|
|
||||||
model = YOLO(latest_model)
|
|
||||||
model.export(format="onnx")
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def validate(model_path):
|
def validate(model_path):
|
||||||
model = YOLO(model_path)
|
model = YOLO(model_path)
|
||||||
metrics = model.val()
|
print(model.val())
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
model_path = train_dataset(from_scratch=True)
|
model_path = train_dataset()
|
||||||
# validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt'))
|
validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt'))
|
||||||
# form_data_sample(500)
|
onnx_path = export_onnx(model_path)
|
||||||
# convert2rknn()
|
|
||||||
api_client = ApiClient()
|
|
||||||
onnx_path = export_onnx('azaion.pt')
|
|
||||||
|
|
||||||
|
api_client = ApiClient()
|
||||||
with open(onnx_path, 'rb') as binary_file:
|
with open(onnx_path, 'rb') as binary_file:
|
||||||
onnx_bytes = binary_file.read()
|
onnx_bytes = binary_file.read()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user