mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-22 11:06:35 +00:00
remove unnecessary middle epochs
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import concurrent.futures
|
||||
import glob
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
@@ -146,18 +147,21 @@ def resume_training(last_pt_path):
|
||||
def train_dataset():
|
||||
form_dataset()
|
||||
create_yaml()
|
||||
model_name = 'yolo11m.yaml'
|
||||
model = YOLO(model_name)
|
||||
model = YOLO('yolo11m.yaml')
|
||||
|
||||
results = model.train(data=abspath(path.join(today_dataset, 'data.yaml')),
|
||||
epochs=120,
|
||||
batch=11,
|
||||
imgsz=1280,
|
||||
save_period=1,
|
||||
workers=24)
|
||||
epochs=120, # Empirically set for good performance and relatively not so long training
|
||||
# (360k of annotations on 1 RTX4090 takes 11.5 days of training :( )
|
||||
batch=11, # reflects current GPU memory, 24Gb (batch 11 gets ~22Gb, batch 12 fails on 24.2Gb)
|
||||
imgsz=1280, # 1280p is a tradeoff between quality and speed
|
||||
save_period=1, # for resuming in case of power outages / other issues
|
||||
workers=24) # loading data workers. Bound to cpus count
|
||||
|
||||
model_dir = path.join(models_dir, today_folder)
|
||||
|
||||
shutil.copytree(results.save_dir, model_dir)
|
||||
for file in glob.glob(path.join(model_dir, 'weights', 'epoch*')): # remove unnecessary middle epochs
|
||||
os.remove(file)
|
||||
shutil.copy(path.join(model_dir, 'weights', 'best.pt'), constants.CURRENT_PT_MODEL)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user