correct albumentation

try to make augmentation on GPU. saved llm prompt
2026-04-23 00:26:35 +00:00 · 2025-03-05 10:45:41 +02:00
parent 2fa864018f
commit b5e5f0b297
8 changed files with 442 additions and 138 deletions
@@ -0,0 +1,172 @@
+I have a code for augmenting photos for dataset, I'm using albumentations. The problem is - I have 38k photos and more in a future, and albumentations works on CPU. It's working very slow, around 1800/ hour. I want to use GPU approach for augmentation task, DALI, since it's an original Nvidia implementation.
+Note, it should create 7 augmented images  + original one.
+
+Here is a code I'm using now:
+
+import os.path
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+import albumentations as A
+import cv2
+import numpy as np
+import concurrent.futures
+
+from constants import (data_images_dir, data_labels_dir, processed_images_dir, processed_labels_dir,
+                       annotation_classes, checkpoint_file, checkpoint_date_format)
+from dto.imageLabel import ImageLabel
+
+total_files_processed = 0
+transform = A.Compose([
+    # Flips, rotations and brightness
+    A.HorizontalFlip(),
+    A.RandomBrightnessContrast(brightness_limit=(-0.05, 0.05), contrast_limit=(-0.05, 0.05)),
+    A.Affine(p=0.7, scale=(0.8, 1.2), rotate=25, translate_percent=0.1),
+
+    # Weather
+    A.RandomFog(p=0.2, fog_coef_range=(0, 0.3)),
+    A.RandomShadow(p=0.2),
+
+    # Image Quality/Noise
+    A.MotionBlur(p=0.2, blur_limit=(3, 5)),
+
+    # Color Variations
+    A.HueSaturationValue(p=0.3, hue_shift_limit=8, sat_shift_limit=8, val_shift_limit=8)
+], bbox_params=A.BboxParams(format='yolo'))
+
+def correct_bboxes(labels):
+    margin = 0.0005
+    min_size = 0.01
+    res = []
+    for bboxes in labels:
+        x = bboxes[0]
+        y = bboxes[1]
+        half_width = 0.5*bboxes[2]
+        half_height = 0.5*bboxes[3]
+
+        # calc how much bboxes are outside borders ( +small margin ).
+        # value should be negative. If it's positive, then put 0, as no correction
+        w_diff = min( (1 - margin) - (x + half_width), (x - half_width) - margin, 0 )
+        w = bboxes[2] + 2*w_diff
+        if w < min_size:
+            continue
+        h_diff = min( (1 - margin) - (y + half_height), ((y - half_height) - margin), 0)
+        h = bboxes[3] + 2 * h_diff
+        if h < min_size:
+            continue
+        res.append([x, y, w, h, bboxes[4]])
+    return res
+    pass
+
+
+def image_processing(img_ann: ImageLabel) -> [ImageLabel]:
+    results = []
+    labels = correct_bboxes(img_ann.labels)
+    if len(labels) == 0 and len(img_ann.labels) != 0:
+        print('no labels but was!!!')
+    results.append(ImageLabel(
+            image=img_ann.image,
+            labels=img_ann.labels,
+            image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name),
+            labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name)
+        )
+    )
+    for i in range(7):
+        try:
+            res = transform(image=img_ann.image, bboxes=labels)
+            path = Path(img_ann.image_path)
+            name = f'{path.stem}_{i + 1}'
+            img = ImageLabel(
+                image=res['image'],
+                labels=res['bboxes'],
+                image_path=os.path.join(processed_images_dir, f'{name}{path.suffix}'),
+                labels_path=os.path.join(processed_labels_dir, f'{name}.txt')
+            )
+            results.append(img)
+        except Exception as e:
+            print(f'Error during transformation: {e}')
+    return results
+
+
+def write_result(img_ann: ImageLabel):
+    cv2.imencode('.jpg', img_ann.image)[1].tofile(img_ann.image_path)
+    print(f'{img_ann.image_path} written')
+
+    with open(img_ann.labels_path, 'w') as f:
+        lines = [f'{ann[4]} {round(ann[0], 5)} {round(ann[1], 5)} {round(ann[2], 5)} {round(ann[3], 5)}\n' for ann in
+                 img_ann.labels]
+        f.writelines(lines)
+        f.close()
+    global total_files_processed
+    print(f'{total_files_processed}. {img_ann.labels_path} written')
+
+
+def read_labels(labels_path) -> [[]]:
+    with open(labels_path, 'r') as f:
+        rows = f.readlines()
+        arr = []
+        for row in rows:
+            str_coordinates = row.split(' ')
+            class_num = str_coordinates.pop(0)
+            coordinates = [float(n.replace(',', '.')) for n in str_coordinates]
+            # noinspection PyTypeChecker
+            coordinates.append(class_num)
+            arr.append(coordinates)
+        return arr
+
+
+def preprocess_annotations():
+    global total_files_processed  # Indicate that we're using the global counter
+    total_files_processed = 0
+
+    os.makedirs(processed_images_dir, exist_ok=True)
+    os.makedirs(processed_labels_dir, exist_ok=True)
+
+    processed_images = set(f.name for f in os.scandir(processed_images_dir))
+    images = []
+    with os.scandir(data_images_dir) as imd:
+        for image_file in imd:
+            if image_file.is_file() and image_file.name not in processed_images:
+                images.append(image_file)
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        executor.map(process_image_file, images)
+
+def process_image_file(image_file): # this function will be executed in thread
+    try:
+        image_path = os.path.join(data_images_dir, image_file.name)
+        labels_path = os.path.join(data_labels_dir, f'{Path(image_path).stem}.txt')
+        image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
+
+        img_ann = ImageLabel(
+            image_path=image_path,
+            image=image,
+            labels_path=labels_path,
+            labels=read_labels(labels_path)
+        )
+        try:
+            results = image_processing(img_ann)
+            for res_ann in results:
+                write_result(res_ann)
+        except Exception as e:
+            print(e)
+        global total_files_processed
+        total_files_processed += 1
+    except Exception as e:
+        print(f'Error appeared in thread for {image_file.name}: {e}')
+
+
+def main():
+    while True:
+        preprocess_annotations()
+        print('All processed, waiting for 2 minutes...')
+        time.sleep(120)
+
+
+if __name__ == '__main__':
+    main()
+
+
+please rewrite the whole code to DALI, utilizing GPU.
+I do have 128Gb of RAM, RTX4090 and 32CPU Cores 
+Also note, that for each image I'm making 7 augmented version (each of this versions should be different, cause of random factor of apply one or another augmentation + different random parameters in each augmentation mechanism)
+Make this number 7 configurable in the beginning of the file. Also utilize GPU as match as possible, use batching