correct albumentation

try to make augmentation on GPU.
saved llm prompt
This commit is contained in:
zxsanny
2025-03-05 10:45:41 +02:00
parent 2fa864018f
commit b5e5f0b297
8 changed files with 442 additions and 138 deletions
-5
View File
@@ -1,5 +0,0 @@
1. Download latest release from here https://joshua-riek.github.io/ubuntu-rockchip-download/boards/orangepi-5.html
f.e. https://github.com/Joshua-Riek/ubuntu-rockchip/releases/download/v2.3.2/ubuntu-22.04-preinstalled-desktop-arm64-orangepi-5.img.xz
but look to the more recent version on ubuntu 22.04
2. Write the image to the microsd using https://bztsrc.gitlab.io/usbimager/ (sudo ./usbimager on linux) (or use BalenaEtcher)
-36
View File
@@ -1,36 +0,0 @@
mkdir rknn-convert
cd rknn-convert
# Install converter PT to ONNX
git clone https://github.com/airockchip/ultralytics_yolov8
cd ultralytics_yolov8
sudo apt install python3.12-venv
python3 -m venv env
source env/bin/activate
pip install .
pip install onnx
cp ultralytics/cfg/default.yaml ultralytics/cfg/default_backup.yaml
sed -i -E "s/(model: ).+( #.+)/\1azaion.pt\2/" ultralytics/cfg/default.yaml
cd ..
deactivate
# Install converter ONNX to RKNN
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
chmod +x miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
source ~/miniconda/bin/activate
conda create -n toolkit2 -y python=3.11
conda activate toolkit2
git clone https://github.com/rockchip-linux/rknn-toolkit2.git
cd rknn-toolkit2/rknn-toolkit2/packages
pip install -r requirements_cp311-1.6.0.txt
pip install rknn_toolkit2-1.6.0+81f21f4d-cp311-cp311-linux_x86_64.whl
pip install "numpy<2.0"
cd ../../../
git clone https://github.com/airockchip/rknn_model_zoo.git
sed -i -E "s#(DATASET_PATH = ').+(')#\1/azaion/data-sample/azaion_subset.txt\2 #" rknn_model_zoo/examples/yolov8/python/convert.py
conda deactivate
conda deactivate
-19
View File
@@ -1,19 +0,0 @@
# PT to ONNX
cd rknn-convert/ultralytics_yolov8/
cp --verbose /azaion/models/azaion.pt .
source env/bin/activate
pip install onnx
export PYTHONPATH=./
python ./ultralytics/engine/exporter.py
cp --verbose azaion.onnx ../
cd ..
deactivate
cp --verbose azaion.onnx /azaion/models/
# ONNX to RKNN
source ~/miniconda/bin/activate
conda activate toolkit2
cd rknn_model_zoo/examples/yolov8/python
python convert.py ../../../../azaion.onnx rk3588 i8 /azaion/models/azaion.rknn
conda deactivate
conda deactivate
+171
View File
@@ -0,0 +1,171 @@
import os
import time
import numpy as np
import cv2
from pathlib import Path
import concurrent.futures
import nvidia.dali as dali
import nvidia.dali.fn as fn
import nvidia.dali.types as types
from constants import (
data_images_dir,
data_labels_dir,
processed_images_dir,
processed_labels_dir
)
# Configurable number of augmentations per image
NUM_AUGMENTATIONS = 7
class DataLoader:
def __init__(self, batch_size=32):
self.batch_size = batch_size
os.makedirs(processed_images_dir, exist_ok=True)
os.makedirs(processed_labels_dir, exist_ok=True)
def _read_labels(self, labels_path):
with open(labels_path, 'r') as f:
rows = f.readlines()
arr = []
for row in rows:
str_coordinates = row.split(' ')
class_num = str_coordinates.pop(0)
coordinates = [float(n.replace(',', '.')) for n in str_coordinates]
coordinates.append(class_num)
arr.append(coordinates)
return arr
def _get_image_label_pairs(self):
processed_images = set(f.name for f in os.scandir(processed_images_dir))
pairs = []
for image_file in os.scandir(data_images_dir):
if image_file.is_file() and image_file.name not in processed_images:
image_path = os.path.join(data_images_dir, image_file.name)
labels_path = os.path.join(data_labels_dir, f'{Path(image_path).stem}.txt')
if os.path.exists(labels_path):
pairs.append((image_path, labels_path))
return pairs
def create_dali_pipeline(self, file_paths):
@dali.pipeline_def(batch_size=self.batch_size, num_threads=32, device_id=0)
def augmentation_pipeline():
# Read images
jpegs, _ = fn.file_reader(file_root=data_images_dir, file_list=file_paths, random_shuffle=False)
# Decode images
images = fn.decoders.image(jpegs, device='mixed')
# Random augmentations with GPU acceleration
augmented_images = []
for _ in range(NUM_AUGMENTATIONS):
aug_image = fn.random_resized_crop(images, random_area=(0.8, 1.0))
# Apply multiple random augmentations
aug_image = fn.flip(aug_image, horizontal=fn.random.coin_flip())
aug_image = fn.brightness_contrast(
aug_image,
brightness=fn.random.uniform(range=(-0.05, 0.05)),
contrast=fn.random.uniform(range=(-0.05, 0.05))
)
aug_image = fn.rotate(
aug_image,
angle=fn.random.uniform(range=(-25, 25)),
fill_value=0
)
# Add noise and color jittering
aug_image = fn.noise.gaussian(aug_image, mean=0, stddev=fn.random.uniform(range=(0, 0.1)))
aug_image = fn.hsv(
aug_image,
hue=fn.random.uniform(range=(-8, 8)),
saturation=fn.random.uniform(range=(-8, 8)),
value=fn.random.uniform(range=(-8, 8))
)
augmented_images.append(aug_image)
# Also include original image
augmented_images.append(images)
return augmented_images
return augmentation_pipeline()
def process_batch(self):
image_label_pairs = self._get_image_label_pairs()
# Create file list for DALI
file_list_path = os.path.join(processed_images_dir, 'file_list.txt')
with open(file_list_path, 'w') as f:
for img_path, _ in image_label_pairs:
f.write(f'{img_path}\n')
# Create DALI pipeline
pipeline = self.create_dali_pipeline(file_list_path)
pipeline.build()
# Process images
for batch_idx in range(0, len(image_label_pairs), self.batch_size):
batch_pairs = image_label_pairs[batch_idx:batch_idx + self.batch_size]
pipeline.run()
# Get augmented images
for img_idx, (orig_img_path, orig_labels_path) in enumerate(batch_pairs):
# Read original labels
orig_labels = self._read_labels(orig_labels_path)
# Write original image and labels
self._write_image_and_labels(
pipeline.output[NUM_AUGMENTATIONS][img_idx],
orig_img_path,
orig_labels,
is_original=True
)
# Write augmented images
for aug_idx in range(NUM_AUGMENTATIONS):
self._write_image_and_labels(
pipeline.output[aug_idx][img_idx],
orig_img_path,
orig_labels,
aug_idx=aug_idx
)
def _write_image_and_labels(self, image, orig_img_path, labels, is_original=False, aug_idx=None):
path = Path(orig_img_path)
if is_original:
img_name = path.name
label_name = f'{path.stem}.txt'
else:
img_name = f'{path.stem}_{aug_idx + 1}{path.suffix}'
label_name = f'{path.stem}_{aug_idx + 1}.txt'
# Write image
img_path = os.path.join(processed_images_dir, img_name)
cv2.imencode('.jpg', image.asnumpy())[1].tofile(img_path)
# Write labels
label_path = os.path.join(processed_labels_dir, label_name)
with open(label_path, 'w') as f:
lines = [f'{ann[4]} {round(ann[0], 5)} {round(ann[1], 5)} {round(ann[2], 5)} {round(ann[3], 5)}\n' for ann in labels]
f.writelines(lines)
def main():
while True:
loader = DataLoader()
loader.process_batch()
print('All processed, waiting for 2 minutes...')
time.sleep(120)
if __name__ == '__main__':
main()
+46 -38
View File
@@ -5,11 +5,29 @@ from pathlib import Path
import albumentations as A
import cv2
import numpy as np
import concurrent.futures
from constants import (data_images_dir, data_labels_dir, processed_images_dir, processed_labels_dir,
annotation_classes, checkpoint_file, checkpoint_date_format)
from dto.imageLabel import ImageLabel
total_files_processed = 0
transform = A.Compose([
# Flips, rotations and brightness
A.HorizontalFlip(),
A.RandomBrightnessContrast(brightness_limit=(-0.05, 0.05), contrast_limit=(-0.05, 0.05)),
A.Affine(p=0.7, scale=(0.8, 1.2), rotate=25, translate_percent=0.1),
# Weather
A.RandomFog(p=0.2, fog_coef_range=(0, 0.3)),
A.RandomShadow(p=0.2),
# Image Quality/Noise
A.MotionBlur(p=0.2, blur_limit=(3, 5)),
# Color Variations
A.HueSaturationValue(p=0.3, hue_shift_limit=8, sat_shift_limit=8, val_shift_limit=8)
], bbox_params=A.BboxParams(format='yolo'))
def correct_bboxes(labels):
margin = 0.0005
@@ -37,31 +55,18 @@ def correct_bboxes(labels):
def image_processing(img_ann: ImageLabel) -> [ImageLabel]:
transforms = [
A.Compose([A.HorizontalFlip(always_apply=True)],
bbox_params=A.BboxParams(format='yolo', )),
A.Compose([A.RandomBrightnessContrast(always_apply=True)],
bbox_params=A.BboxParams(format='yolo')),
A.Compose([A.SafeRotate(limit=90, always_apply=True)],
bbox_params=A.BboxParams(format='yolo')),
A.Compose([A.SafeRotate(limit=90, always_apply=True),
A.RandomBrightnessContrast(always_apply=True)],
bbox_params=A.BboxParams(format='yolo')),
A.Compose([A.ShiftScaleRotate(scale_limit=0.2, always_apply=True),
A.VerticalFlip(always_apply=True), ],
bbox_params=A.BboxParams(format='yolo')),
A.Compose([A.ShiftScaleRotate(scale_limit=0.2, always_apply=True)],
bbox_params=A.BboxParams(format='yolo')),
A.Compose([A.SafeRotate(limit=90, always_apply=True),
A.RandomBrightnessContrast(always_apply=True)],
bbox_params=A.BboxParams(format='yolo'))
]
results = []
labels = correct_bboxes(img_ann.labels)
if len(labels) == 0 and len(img_ann.labels) != 0:
print('no labels but was!!!')
for i, transform in enumerate(transforms):
results.append(ImageLabel(
image=img_ann.image,
labels=img_ann.labels,
image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name),
labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name)
)
)
for i in range(7):
try:
res = transform(image=img_ann.image, bboxes=labels)
path = Path(img_ann.image_path)
@@ -87,7 +92,8 @@ def write_result(img_ann: ImageLabel):
img_ann.labels]
f.writelines(lines)
f.close()
print(f'{img_ann.labels_path} written')
global total_files_processed
print(f'{total_files_processed}. {img_ann.labels_path} written')
def read_labels(labels_path) -> [[]]:
@@ -104,19 +110,10 @@ def read_labels(labels_path) -> [[]]:
return arr
def process_image(img_ann):
results = image_processing(img_ann)
for res_ann in results:
write_result(res_ann)
write_result(ImageLabel(
image=img_ann.image,
labels=img_ann.labels,
image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name),
labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name)
))
def preprocess_annotations():
global total_files_processed # Indicate that we're using the global counter
total_files_processed = 0
os.makedirs(processed_images_dir, exist_ok=True)
os.makedirs(processed_labels_dir, exist_ok=True)
@@ -126,20 +123,31 @@ def preprocess_annotations():
for image_file in imd:
if image_file.is_file() and image_file.name not in processed_images:
images.append(image_file)
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(process_image_file, images)
for image_file in images:
def process_image_file(image_file): # this function will be executed in thread
try:
image_path = os.path.join(data_images_dir, image_file.name)
labels_path = os.path.join(data_labels_dir, f'{Path(image_path).stem}.txt')
image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
process_image(ImageLabel(
img_ann = ImageLabel(
image_path=image_path,
image=image,
labels_path=labels_path,
labels=read_labels(labels_path)
))
)
try:
results = image_processing(img_ann)
for res_ann in results:
write_result(res_ann)
except Exception as e:
print(f'Error appeared {e}')
print(e)
global total_files_processed
total_files_processed += 1
except Exception as e:
print(f'Error appeared in thread for {image_file.name}: {e}')
def main():
+172
View File
@@ -0,0 +1,172 @@
I have a code for augmenting photos for dataset, I'm using albumentations. The problem is - I have 38k photos and more in a future, and albumentations works on CPU. It's working very slow, around 1800/ hour. I want to use GPU approach for augmentation task, DALI, since it's an original Nvidia implementation.
Note, it should create 7 augmented images + original one.
Here is a code I'm using now:
import os.path
import time
from datetime import datetime, timedelta
from pathlib import Path
import albumentations as A
import cv2
import numpy as np
import concurrent.futures
from constants import (data_images_dir, data_labels_dir, processed_images_dir, processed_labels_dir,
annotation_classes, checkpoint_file, checkpoint_date_format)
from dto.imageLabel import ImageLabel
total_files_processed = 0
transform = A.Compose([
# Flips, rotations and brightness
A.HorizontalFlip(),
A.RandomBrightnessContrast(brightness_limit=(-0.05, 0.05), contrast_limit=(-0.05, 0.05)),
A.Affine(p=0.7, scale=(0.8, 1.2), rotate=25, translate_percent=0.1),
# Weather
A.RandomFog(p=0.2, fog_coef_range=(0, 0.3)),
A.RandomShadow(p=0.2),
# Image Quality/Noise
A.MotionBlur(p=0.2, blur_limit=(3, 5)),
# Color Variations
A.HueSaturationValue(p=0.3, hue_shift_limit=8, sat_shift_limit=8, val_shift_limit=8)
], bbox_params=A.BboxParams(format='yolo'))
def correct_bboxes(labels):
margin = 0.0005
min_size = 0.01
res = []
for bboxes in labels:
x = bboxes[0]
y = bboxes[1]
half_width = 0.5*bboxes[2]
half_height = 0.5*bboxes[3]
# calc how much bboxes are outside borders ( +small margin ).
# value should be negative. If it's positive, then put 0, as no correction
w_diff = min( (1 - margin) - (x + half_width), (x - half_width) - margin, 0 )
w = bboxes[2] + 2*w_diff
if w < min_size:
continue
h_diff = min( (1 - margin) - (y + half_height), ((y - half_height) - margin), 0)
h = bboxes[3] + 2 * h_diff
if h < min_size:
continue
res.append([x, y, w, h, bboxes[4]])
return res
pass
def image_processing(img_ann: ImageLabel) -> [ImageLabel]:
results = []
labels = correct_bboxes(img_ann.labels)
if len(labels) == 0 and len(img_ann.labels) != 0:
print('no labels but was!!!')
results.append(ImageLabel(
image=img_ann.image,
labels=img_ann.labels,
image_path=os.path.join(processed_images_dir, Path(img_ann.image_path).name),
labels_path=os.path.join(processed_labels_dir, Path(img_ann.labels_path).name)
)
)
for i in range(7):
try:
res = transform(image=img_ann.image, bboxes=labels)
path = Path(img_ann.image_path)
name = f'{path.stem}_{i + 1}'
img = ImageLabel(
image=res['image'],
labels=res['bboxes'],
image_path=os.path.join(processed_images_dir, f'{name}{path.suffix}'),
labels_path=os.path.join(processed_labels_dir, f'{name}.txt')
)
results.append(img)
except Exception as e:
print(f'Error during transformation: {e}')
return results
def write_result(img_ann: ImageLabel):
cv2.imencode('.jpg', img_ann.image)[1].tofile(img_ann.image_path)
print(f'{img_ann.image_path} written')
with open(img_ann.labels_path, 'w') as f:
lines = [f'{ann[4]} {round(ann[0], 5)} {round(ann[1], 5)} {round(ann[2], 5)} {round(ann[3], 5)}\n' for ann in
img_ann.labels]
f.writelines(lines)
f.close()
global total_files_processed
print(f'{total_files_processed}. {img_ann.labels_path} written')
def read_labels(labels_path) -> [[]]:
with open(labels_path, 'r') as f:
rows = f.readlines()
arr = []
for row in rows:
str_coordinates = row.split(' ')
class_num = str_coordinates.pop(0)
coordinates = [float(n.replace(',', '.')) for n in str_coordinates]
# noinspection PyTypeChecker
coordinates.append(class_num)
arr.append(coordinates)
return arr
def preprocess_annotations():
global total_files_processed # Indicate that we're using the global counter
total_files_processed = 0
os.makedirs(processed_images_dir, exist_ok=True)
os.makedirs(processed_labels_dir, exist_ok=True)
processed_images = set(f.name for f in os.scandir(processed_images_dir))
images = []
with os.scandir(data_images_dir) as imd:
for image_file in imd:
if image_file.is_file() and image_file.name not in processed_images:
images.append(image_file)
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(process_image_file, images)
def process_image_file(image_file): # this function will be executed in thread
try:
image_path = os.path.join(data_images_dir, image_file.name)
labels_path = os.path.join(data_labels_dir, f'{Path(image_path).stem}.txt')
image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
img_ann = ImageLabel(
image_path=image_path,
image=image,
labels_path=labels_path,
labels=read_labels(labels_path)
)
try:
results = image_processing(img_ann)
for res_ann in results:
write_result(res_ann)
except Exception as e:
print(e)
global total_files_processed
total_files_processed += 1
except Exception as e:
print(f'Error appeared in thread for {image_file.name}: {e}')
def main():
while True:
preprocess_annotations()
print('All processed, waiting for 2 minutes...')
time.sleep(120)
if __name__ == '__main__':
main()
please rewrite the whole code to DALI, utilizing GPU.
I do have 128Gb of RAM, RTX4090 and 32CPU Cores
Also note, that for each image I'm making 7 augmented version (each of this versions should be different, cause of random factor of apply one or another augmentation + different random parameters in each augmentation mechanism)
Make this number 7 configurable in the beginning of the file. Also utilize GPU as match as possible, use batching
+9 -7
View File
@@ -3,12 +3,14 @@ torch
torchvision
torchaudio
ultralytics
albumentations~=2.0.4
albumentations
opencv-python~=4.11.0.86
matplotlib~=3.10.0
PyYAML~=6.0.2
cryptography~=44.0.1
numpy~=2.1.1
requests~=2.32.3
opencv-python
matplotlib
PyYAML
cryptography
numpy
requests
pyyaml
boto3
nvidia-dali-cuda120
+35 -24
View File
@@ -1,4 +1,4 @@
import io
import concurrent.futures
import os
import random
import shutil
@@ -31,7 +31,7 @@ test_set = 10
old_images_percentage = 75
DEFAULT_CLASS_NUM = 80
total_files_copied = 0
def form_dataset(from_date: datetime):
makedirs(today_dataset, exist_ok=True)
@@ -67,6 +67,25 @@ def form_dataset(from_date: datetime):
def copy_annotations(images, folder):
global total_files_copied
total_files_copied = 0
def copy_image(image):
global total_files_copied
total_files_copied += 1
label_name = f'{Path(image.path).stem}.txt'
label_path = path.join(processed_labels_dir, label_name)
if check_label(label_path):
shutil.copy(image.path, path.join(destination_images, image.name))
shutil.copy(label_path, path.join(destination_labels, label_name))
else:
shutil.copy(image.path, path.join(corrupted_images_dir, image.name))
shutil.copy(label_path, path.join(corrupted_labels_dir, label_name))
print(f'Label {label_path} is corrupted! Copy with its image to the corrupted directory ({corrupted_labels_dir})')
if total_files_copied % 1000 == 0:
print(f'{total_files_copied} copied...')
destination_images = path.join(today_dataset, folder, 'images')
makedirs(destination_images, exist_ok=True)
@@ -78,19 +97,10 @@ def copy_annotations(images, folder):
copied = 0
print(f'Copying annotations to {destination_images} and {destination_labels} folders:')
for image in images:
label_name = f'{Path(image.path).stem}.txt'
label_path = path.join(processed_labels_dir, label_name)
if check_label(label_path):
shutil.copy(image.path, path.join(destination_images, image.name))
shutil.copy(label_path, path.join(destination_labels, label_name))
else:
shutil.copy(image.path, path.join(corrupted_images_dir, image.name))
shutil.copy(label_path, path.join(corrupted_labels_dir, label_name))
print(f'Label {label_path} is corrupted! Copy with its image to the corrupted directory ({corrupted_labels_dir})')
copied = copied + 1
if copied % 1000 == 0:
print(f'{copied} copied...')
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(copy_image, images)
print(f'Copied all {copied} annotations to {destination_images} and {destination_labels} folders')
@@ -143,11 +153,14 @@ def revert_to_processed_data(date):
def get_latest_model():
def convert(d: str):
if not d.startswith(prefix):
return None
dir_date = datetime.strptime(d.replace(prefix, ''), '%Y-%m-%d')
dir_model_path = path.join(models_dir, d, 'weights', 'best.pt')
return {'date': dir_date, 'path': dir_model_path}
dates = [convert(d) for d in next(os.walk(models_dir))[1]]
dates = list(filter(lambda x : x is not None, dates))
sorted_dates = list(sorted(dates, key=lambda x: x['date'] ))
if len(sorted_dates) == 0:
return None, None
@@ -223,9 +236,8 @@ def validate(model_path):
def upload_model(model_path: str):
# model = YOLO(model_path)
# model.export(format="onnx", imgsz=1280, nms=True, batch=4)
model = YOLO(model_path)
model.export(format="onnx", imgsz=1280, nms=True, batch=4)
onnx_model = path.dirname(model_path) + Path(model_path).stem + '.onnx'
with open(onnx_model, 'rb') as f_in:
@@ -250,9 +262,8 @@ def upload_model(model_path: str):
api.upload_file('azaion.onnx.small', onnx_part_small)
if __name__ == '__main__':
# model_path = train_dataset('2024-10-26', from_scratch=True)
# validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt'))
# form_data_sample(500)
# convert2rknn()
model_path = 'azaion.pt'
upload_model(model_path)
model_path = train_dataset(from_scratch=True)
validate(path.join('runs', 'detect', 'train7', 'weights', 'best.pt'))
form_data_sample(500)
convert2rknn()
upload_model('azaion.pt')