mirror of
https://github.com/azaion/ai-training.git
synced 2026-04-23 00:26:35 +00:00
correct albumentation
try to make augmentation on GPU. saved llm prompt
This commit is contained in:
@@ -0,0 +1,172 @@
|
||||
I have a code for augmenting photos for dataset, I'm using albumentations. The problem is - I have 38k photos and more in a future, and albumentations works on CPU. It's working very slow, around 1800/ hour. I want to use GPU approach for augmentation task, DALI, since it's an original Nvidia implementation.
|
||||
Note, it should create 7 augmented images + original one.
|
||||
|
||||
Here is a code I'm using now:
|
||||
|
||||
import os.path
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
import albumentations as A
|
||||
import cv2
|
||||
import numpy as np
|
||||
import concurrent.futures
|
||||
|
||||
from constants import (data_images_dir, data_labels_dir, processed_images_dir, processed_labels_dir,
|
||||
annotation_classes, checkpoint_file, checkpoint_date_format)
|
||||
from dto.imageLabel import ImageLabel
|
||||
|
||||
total_files_processed = 0
|
||||
transform = A.Compose([
|
||||
# Flips, rotations and brightness
|
||||
A.HorizontalFlip(),
|
||||
A.RandomBrightnessContrast(brightness_limit=(-0.05, 0.05), contrast_limit=(-0.05, 0.05)),
|
||||
A.Affine(p=0.7, scale=(0.8, 1.2), rotate=25, translate_percent=0.1),
|
||||
|
||||
# Weather
|
||||
A.RandomFog(p=0.2, fog_coef_range=(0, 0.3)),
|
||||
A.RandomShadow(p=0.2),
|
||||
|
||||
# Image Quality/Noise
|
||||
A.MotionBlur(p=0.2, blur_limit=(3, 5)),
|
||||
|
||||
# Color Variations
|
||||
A.HueSaturationValue(p=0.3, hue_shift_limit=8, sat_shift_limit=8, val_shift_limit=8)
|
||||
], bbox_params=A.BboxParams(format='yolo'))
|
||||
|
||||
def correct_bboxes(labels):
|
||||
margin = 0.0005
|
||||
min_size = 0.01
|
||||
res = []
|
||||
for bboxes in labels:
|
||||
x = bboxes[0]
|
||||
y = bboxes[1]
|
||||
half_width = 0.5*bboxes[2]
|
||||
half_height = 0.5*bboxes[3]
|
||||
|
||||
# calc how much bboxes are outside borders ( +small margin ).
|
||||
# value should be negative. If it's positive, then put 0, as no correction
|
||||
w_diff = min( (1 - margin) - (x + half_width), (x - half_width) - margin, 0 )
|
||||
w = bboxes[2] + 2*w_diff
|
||||
if w < min_size:
|
||||
continue
|
||||
h_diff = min( (1 - margin) - (y + half_height), ((y - half_height) - margin), 0)
|
||||
h = bboxes[3] + 2 * h_diff
|
||||
if h < min_size:
|
||||
continue
|
||||
res.append([x, y, w, h, bboxes[4]])
|
||||
return res
|
||||
pass
|
||||
|
||||
|
||||
def image_processing(img_ann: ImageLabel) -> [ImageLabel]:
|
||||
results = []
|
||||
labels = correct_bboxes(img_ann.labels)
|
||||
if len(labels) == 0 and len(img_ann.labels) != 0:
|
||||
print('no labels but was!!!')
|
||||
results.append(ImageLabel(
|
||||
image=img_ann.image,
|
||||
labels=img_ann.labels,
|
||||
image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name),
|
||||
labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name)
|
||||
)
|
||||
)
|
||||
for i in range(7):
|
||||
try:
|
||||
res = transform(image=img_ann.image, bboxes=labels)
|
||||
path = Path(img_ann.image_path)
|
||||
name = f'{path.stem}_{i + 1}'
|
||||
img = ImageLabel(
|
||||
image=res['image'],
|
||||
labels=res['bboxes'],
|
||||
image_path=os.path.join(processed_images_dir, f'{name}{path.suffix}'),
|
||||
labels_path=os.path.join(processed_labels_dir, f'{name}.txt')
|
||||
)
|
||||
results.append(img)
|
||||
except Exception as e:
|
||||
print(f'Error during transformation: {e}')
|
||||
return results
|
||||
|
||||
|
||||
def write_result(img_ann: ImageLabel):
|
||||
cv2.imencode('.jpg', img_ann.image)[1].tofile(img_ann.image_path)
|
||||
print(f'{img_ann.image_path} written')
|
||||
|
||||
with open(img_ann.labels_path, 'w') as f:
|
||||
lines = [f'{ann[4]} {round(ann[0], 5)} {round(ann[1], 5)} {round(ann[2], 5)} {round(ann[3], 5)}\n' for ann in
|
||||
img_ann.labels]
|
||||
f.writelines(lines)
|
||||
f.close()
|
||||
global total_files_processed
|
||||
print(f'{total_files_processed}. {img_ann.labels_path} written')
|
||||
|
||||
|
||||
def read_labels(labels_path) -> [[]]:
|
||||
with open(labels_path, 'r') as f:
|
||||
rows = f.readlines()
|
||||
arr = []
|
||||
for row in rows:
|
||||
str_coordinates = row.split(' ')
|
||||
class_num = str_coordinates.pop(0)
|
||||
coordinates = [float(n.replace(',', '.')) for n in str_coordinates]
|
||||
# noinspection PyTypeChecker
|
||||
coordinates.append(class_num)
|
||||
arr.append(coordinates)
|
||||
return arr
|
||||
|
||||
|
||||
def preprocess_annotations():
|
||||
global total_files_processed # Indicate that we're using the global counter
|
||||
total_files_processed = 0
|
||||
|
||||
os.makedirs(processed_images_dir, exist_ok=True)
|
||||
os.makedirs(processed_labels_dir, exist_ok=True)
|
||||
|
||||
processed_images = set(f.name for f in os.scandir(processed_images_dir))
|
||||
images = []
|
||||
with os.scandir(data_images_dir) as imd:
|
||||
for image_file in imd:
|
||||
if image_file.is_file() and image_file.name not in processed_images:
|
||||
images.append(image_file)
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
executor.map(process_image_file, images)
|
||||
|
||||
def process_image_file(image_file): # this function will be executed in thread
|
||||
try:
|
||||
image_path = os.path.join(data_images_dir, image_file.name)
|
||||
labels_path = os.path.join(data_labels_dir, f'{Path(image_path).stem}.txt')
|
||||
image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
|
||||
|
||||
img_ann = ImageLabel(
|
||||
image_path=image_path,
|
||||
image=image,
|
||||
labels_path=labels_path,
|
||||
labels=read_labels(labels_path)
|
||||
)
|
||||
try:
|
||||
results = image_processing(img_ann)
|
||||
for res_ann in results:
|
||||
write_result(res_ann)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
global total_files_processed
|
||||
total_files_processed += 1
|
||||
except Exception as e:
|
||||
print(f'Error appeared in thread for {image_file.name}: {e}')
|
||||
|
||||
|
||||
def main():
|
||||
while True:
|
||||
preprocess_annotations()
|
||||
print('All processed, waiting for 2 minutes...')
|
||||
time.sleep(120)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
please rewrite the whole code to DALI, utilizing GPU.
|
||||
I do have 128Gb of RAM, RTX4090 and 32CPU Cores
|
||||
Also note, that for each image I'm making 7 augmented version (each of this versions should be different, cause of random factor of apply one or another augmentation + different random parameters in each augmentation mechanism)
|
||||
Make this number 7 configurable in the beginning of the file. Also utilize GPU as match as possible, use batching
|
||||
Reference in New Issue
Block a user