ai-training/convert-annotations.py

import os
import shutil
import xml.etree.cElementTree as et
from pathlib import Path
import cv2

labels_dir = 'labels'
images_dir = 'images'

tag_size = 'size'
tag_object = 'object'
tag_name = 'name'
tag_bndbox = 'bndbox'
name_class_map = {'Truck': 1, 'Car': 2, 'Taxi': 2}  # 1 Вантажівка, 2 Машина легкова
forbidden_classes = ['Motorcycle']
default_class = 1
image_extensions = ['jpg', 'png', 'jpeg']


def convert(folder, read_annotations, ann_format):
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)

    for f in os.listdir(folder):
        if not f[-3:] in image_extensions:
            continue
        im = cv2.imread(os.path.join(folder, f))
        height = im.shape[0]
        width = im.shape[1]

        label = f'{Path(f).stem}.{ann_format}'
        try:
            with open(os.path.join(folder, label), 'r') as label_file:
                text = label_file.read()
                lines = read_annotations(width, height, text)
        except ValueError as val_err:
            print(f'Image {f} annotations could not be converted. Error: {val_err}')
            continue
        except Exception as e:
            print(f'Error conversion for {f}. Error: {e}')

        shutil.copy(os.path.join(folder, f), os.path.join(images_dir, f))
        with open(os.path.join(labels_dir, f'{Path(label).stem}.txt'), 'w') as new_label_file:
            new_label_file.writelines(lines)
            new_label_file.close()
        print(f'Image {f} has been processed successfully')


def minmax2yolo(width, height, xmin, xmax, ymin, ymax):
    c_w = (xmax - xmin) / width
    c_h = (ymax - ymin) / height
    c_x = xmin / width + c_w / 2
    c_y = ymin / height + c_h / 2
    return round(c_x, 5), round(c_y, 5), round(c_w, 5), round(c_h, 5)


def read_pascal_voc(width, height, s):
    root = et.fromstring(s)
    lines = []
    for node_object in root.findall(tag_object):
        class_num = default_class
        c_x = c_y = c_w = c_h = 0
        for node_object_ch in node_object:
            if node_object_ch.tag == tag_name:
                key = node_object_ch.text
                if key in name_class_map:
                    class_num = name_class_map[key]
                else:
                    if key in forbidden_classes:
                        class_num = -1
                        continue
                    else:
                        class_num = default_class
            if node_object_ch.tag == tag_bndbox:
                bbox_dict = {bbox_ch.tag: bbox_ch.text for bbox_ch in node_object_ch}
                c_x, c_y, c_w, c_h = minmax2yolo(width, height,
                                                 int(bbox_dict['xmin']),
                                                 int(bbox_dict['xmax']),
                                                 int(bbox_dict['ymin']),
                                                 int(bbox_dict['ymax']))
        if class_num == -1:
            continue
        if c_x > 1 or c_y > 1 or c_w > 1 or c_h > 1:
            print('Values are out of bounds')
        else:
            if c_x != 0 and c_y != 0 and c_w != 0 and c_h != 0:
                lines.append(f'{class_num} {c_x} {c_y} {c_w} {c_h}\n')
    return lines


def read_bbox_oriented(width, height, s):
    yolo_lines = []
    lines = s.split('\n', )
    for line in lines:
        if line == '':
            continue
        vals = line.split(' ')
        if len(vals) != 14:
            raise ValueError('wrong format')
        xmin = min(int(vals[6]), int(vals[7]), int(vals[8]), int(vals[9]))
        xmax = max(int(vals[6]), int(vals[7]), int(vals[8]), int(vals[9]))
        ymin = min(int(vals[10]), int(vals[11]), int(vals[12]), int(vals[13]))
        ymax = max(int(vals[10]), int(vals[11]), int(vals[12]), int(vals[13]))
        c_x, c_y, c_w, c_h = minmax2yolo(width, height, xmin, xmax, ymin, ymax)
        if c_x > 1 or c_y > 1 or c_w > 1 or c_h > 1:
            print('Values are out of bounds')
        else:
            yolo_lines.append(f'2 {c_x} {c_y} {c_w} {c_h}\n')
    return yolo_lines


def rename_images(folder):
    for f in os.listdir(folder):
        shutil.move(os.path.join(folder, f), os.path.join(folder, f[:-7] + '.png'))


if __name__ == '__main__':
    convert('datasets/others/UAVHeightImages', read_bbox_oriented, 'txt')
    convert('datasets/others/UAVimages', read_pascal_voc, 'xml')