Add core functionality for API client, CDN management, and data augmentation

- Introduced `ApiClient` for handling API interactions, including file uploads and downloads.
- Implemented `CDNManager` for managing CDN operations with AWS S3.
- Added `Augmentator` class for image augmentation, including bounding box corrections and transformations.
- Created utility functions for annotation conversion and dataset visualization.
- Established a new rules file for sound notifications during human input requests.

These additions enhance the system's capabilities for data handling and user interaction, laying the groundwork for future features.

Simplify autopilot state file to minimal current-step pointer; add execution safety rule to cursor-meta; remove Completed Steps/Key Decisions/Retry Log/Blockers from state template and all references.
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-03-28 00:12:54 +02:00
parent 142c6c4de8
commit c20018745b
31 changed files with 0 additions and 0 deletions
View File
+63
View File
@@ -0,0 +1,63 @@
import json
from enum import Enum
from os.path import join, dirname
class Detection:
def __init__(self, x, y, w, h, cls, confidence):
self.x = x
self.y = y
self.w = w
self.h = h
self.cls = cls
self.confidence = confidence
def overlaps(self, det2, iou_threshold):
overlap_x = 0.5 * (self.w + det2.w) - abs(self.x - det2.x)
overlap_y = 0.5 * (self.h + det2.h) - abs(self.y - det2.y)
intersection = max(0, overlap_x) * max(0, overlap_y)
union = self.w * self.h + det2.w * det2.h - intersection
return intersection / union > iou_threshold
class Annotation:
def __init__(self, frame, time, detections: list[Detection]):
self.frame = frame
self.time = time
self.detections = detections if detections is not None else []
class WeatherMode(Enum):
Norm = 0
Wint = 20
Night = 40
class AnnotationClass:
def __init__(self, id, name, color):
self.id = id
self.name = name
self.color = color
color_str = color.lstrip('#')
self.opencv_color = (int(color_str[4:6], 16), int(color_str[2:4], 16), int(color_str[0:2], 16))
@staticmethod
def read_json():
classes_path = join(dirname(dirname(__file__)), 'classes.json')
with open(classes_path, 'r', encoding='utf-8') as f:
j = json.loads(f.read())
annotations_dict = {}
for mode in WeatherMode:
for cl in j:
id = mode.value + cl['Id']
name = cl['Name'] if mode.value == 0 else f'{cl["Name"]}({mode.name})'
annotations_dict[id] = AnnotationClass(id, name, cl['Color'])
return annotations_dict
@property
def color_tuple(self):
color = self.color[3:]
lv = len(color)
xx = range(0, lv, lv // 3)
return tuple(int(color[i:i + lv // 3], 16) for i in xx)
+139
View File
@@ -0,0 +1,139 @@
import cv2
import numpy as np
from inference.dto import Annotation, Detection, AnnotationClass
from inference.onnx_engine import InferenceEngine
class Inference:
def __init__(self, engine: InferenceEngine, confidence_threshold, iou_threshold):
self.engine = engine
self.confidence_threshold = confidence_threshold
self.iou_threshold = iou_threshold
self.batch_size = engine.get_batch_size()
self.model_height, self.model_width = engine.get_input_shape()
self.classes = AnnotationClass.read_json()
def draw(self, annotation: Annotation):
img = annotation.frame
img_height, img_width = img.shape[:2]
for d in annotation.detections:
x1 = int(img_width * (d.x - d.w / 2))
y1 = int(img_height * (d.y - d.h / 2))
x2 = int(x1 + img_width * d.w)
y2 = int(y1 + img_height * d.h)
color = self.classes[d.cls].opencv_color
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
label = f"{self.classes[d.cls].name}: {d.confidence:.2f}"
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
cv2.rectangle(
img, (x1, label_y - label_height), (x1 + label_width, label_y + label_height), color, cv2.FILLED
)
cv2.putText(img, label, (x1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
cv2.imshow('Video', img)
def preprocess(self, frames):
blobs = [cv2.dnn.blobFromImage(frame,
scalefactor=1.0 / 255.0,
size=(self.model_width, self.model_height),
mean=(0, 0, 0),
swapRB=True,
crop=False)
for frame in frames]
return np.vstack(blobs)
def postprocess(self, batch_frames, batch_timestamps, output):
anns = []
for i in range(len(output[0])):
frame = batch_frames[i]
timestamp = batch_timestamps[i]
detections = []
for det in output[0][i]:
if det[4] == 0:
break
if det[4] < self.confidence_threshold:
continue
x1 = max(0, det[0] / self.model_width)
y1 = max(0, det[1] / self.model_height)
x2 = min(1, det[2] / self.model_width)
y2 = min(1, det[3] / self.model_height)
conf = round(det[4], 2)
class_id = int(det[5])
x = (x1 + x2) / 2
y = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
detections.append(Detection(x, y, w, h, class_id, conf))
filtered_detections = self.remove_overlapping_detections(detections)
# if len(filtered_detections) > 0:
# _, image = cv2.imencode('.jpg', frame)
# image_bytes = image.tobytes()
annotation = Annotation(frame, timestamp, filtered_detections)
anns.append(annotation)
return anns
def process(self, video):
frame_count = 0
batch_frames = []
batch_timestamps = []
v_input = cv2.VideoCapture(video)
while v_input.isOpened():
ret, frame = v_input.read()
if not ret or frame is None:
break
frame_count += 1
if frame_count % 4 == 0:
batch_frames.append(frame)
batch_timestamps.append(int(v_input.get(cv2.CAP_PROP_POS_MSEC)))
if len(batch_frames) == self.batch_size:
input_blob = self.preprocess(batch_frames)
outputs = self.engine.run(input_blob)
annotations = self.postprocess(batch_frames, batch_timestamps, outputs)
for annotation in annotations:
self.draw(annotation)
print(f'video: {annotation.time / 1000:.3f}s')
if cv2.waitKey(1) & 0xFF == ord('q'):
break
batch_frames.clear()
batch_timestamps.clear()
if len(batch_frames) > 0:
input_blob = self.preprocess(batch_frames)
outputs = self.engine.run(input_blob)
annotations = self.postprocess(batch_frames, batch_timestamps, outputs)
for annotation in annotations:
self.draw(annotation)
print(f'video: {annotation.time / 1000:.3f}s')
if cv2.waitKey(1) & 0xFF == ord('q'):
break
def remove_overlapping_detections(self, detections):
filtered_output = []
filtered_out_indexes = []
for det1_index in range(len(detections)):
if det1_index in filtered_out_indexes:
continue
det1 = detections[det1_index]
res = det1_index
for det2_index in range(det1_index + 1, len(detections)):
det2 = detections[det2_index]
if det1.overlaps(det2, self.iou_threshold):
if det1.confidence > det2.confidence or (det1.confidence == det2.confidence and det1.cls < det2.cls):
filtered_out_indexes.append(det2_index)
else:
filtered_out_indexes.append(res)
res = det2_index
filtered_output.append(detections[res])
filtered_out_indexes.append(res)
return filtered_output
+47
View File
@@ -0,0 +1,47 @@
import abc
from typing import List, Tuple
import numpy as np
import onnxruntime as onnx
class InferenceEngine(abc.ABC):
@abc.abstractmethod
def __init__(self, model_path: str, batch_size: int = 1, **kwargs):
pass
@abc.abstractmethod
def get_input_shape(self) -> Tuple[int, int]:
pass
@abc.abstractmethod
def get_batch_size(self) -> int:
pass
@abc.abstractmethod
def run(self, input_data: np.ndarray) -> List[np.ndarray]:
pass
class OnnxEngine(InferenceEngine):
def __init__(self, model_bytes, batch_size: int = 1, **kwargs):
self.batch_size = batch_size
self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
self.model_inputs = self.session.get_inputs()
self.input_name = self.model_inputs[0].name
self.input_shape = self.model_inputs[0].shape
if self.input_shape[0] != -1:
self.batch_size = self.input_shape[0]
model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map)
self.class_names = eval(model_meta.custom_metadata_map["names"])
pass
def get_input_shape(self) -> Tuple[int, int]:
shape = self.input_shape
return shape[2], shape[3]
def get_batch_size(self) -> int:
return self.batch_size
def run(self, input_data: np.ndarray) -> List[np.ndarray]:
return self.session.run(None, {self.input_name: input_data})
+148
View File
@@ -0,0 +1,148 @@
import re
import struct
import subprocess
from pathlib import Path
from typing import List, Tuple
import json
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
from inference.onnx_engine import InferenceEngine
# required for automatically initialize CUDA, do not remove.
import pycuda.autoinit
import pynvml
class TensorRTEngine(InferenceEngine):
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
def __init__(self, model_bytes: bytes, **kwargs):
try:
# metadata_len = struct.unpack("<I", model_bytes[:4])[0]
# try:
# self.metadata = json.loads(model_bytes[4:4 + metadata_len])
# self.class_names = self.metadata['names']
# print(f"Model metadata: {json.dumps(self.metadata, indent=2)}")
# except json.JSONDecodeError as err:
# print(f"Failed to parse metadata")
# return
# engine_data = model_bytes[4 + metadata_len:]
runtime = trt.Runtime(self.TRT_LOGGER)
self.engine = runtime.deserialize_cuda_engine(model_bytes)
if self.engine is None:
raise RuntimeError(f"Failed to load TensorRT engine!")
self.context = self.engine.create_execution_context()
# input
self.input_name = self.engine.get_tensor_name(0)
engine_input_shape = self.engine.get_tensor_shape(self.input_name)
if engine_input_shape[0] != -1:
self.batch_size = engine_input_shape[0]
self.input_shape = [
self.batch_size,
engine_input_shape[1], # Channels (usually fixed at 3 for RGB)
1280 if engine_input_shape[2] == -1 else engine_input_shape[2], # Height
1280 if engine_input_shape[3] == -1 else engine_input_shape[3] # Width
]
self.context.set_input_shape(self.input_name, self.input_shape)
input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsize
self.d_input = cuda.mem_alloc(input_size)
# output
self.output_name = self.engine.get_tensor_name(1)
engine_output_shape = tuple(self.engine.get_tensor_shape(self.output_name))
self.output_shape = [
4 if self.input_shape[0] == -1 else self.input_shape[0], # by default, batch size is 4
300 if engine_output_shape[1] == -1 else engine_output_shape[1], # max detections number
6 if engine_output_shape[2] == -1 else engine_output_shape[2] # x1 y1 x2 y2 conf cls
]
self.h_output = cuda.pagelocked_empty(tuple(self.output_shape), dtype=np.float32)
self.d_output = cuda.mem_alloc(self.h_output.nbytes)
self.stream = cuda.Stream()
except Exception as e:
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
def get_input_shape(self) -> Tuple[int, int]:
return self.input_shape[2], self.input_shape[3]
def get_batch_size(self) -> int:
return self.batch_size
@staticmethod
def get_gpu_memory_bytes(device_id=0) -> int:
total_memory = None
try:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total_memory = mem_info.total
except pynvml.NVMLError:
total_memory = None
finally:
try:
pynvml.nvmlShutdown()
except pynvml.NVMLError:
pass
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory # default 2 Gb
@staticmethod
def get_engine_filename(device_id=0) -> str | None:
try:
device = cuda.Device(device_id)
sm_count = device.multiprocessor_count
cc_major, cc_minor = device.compute_capability()
return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
except Exception:
return None
@staticmethod
def convert_from_onnx(onnx_model: bytes) -> bytes | None:
workspace_bytes = int(TensorRTEngine.get_gpu_memory_bytes() * 0.9)
explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
with trt.Builder(TensorRTEngine.TRT_LOGGER) as builder, \
builder.create_network(explicit_batch_flag) as network, \
trt.OnnxParser(network, TensorRTEngine.TRT_LOGGER) as parser, \
builder.create_builder_config() as config:
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_bytes)
if not parser.parse(onnx_model):
return None
if builder.platform_has_fast_fp16:
print('Converting to supported fp16')
config.set_flag(trt.BuilderFlag.FP16)
else:
print('Converting to supported fp32. (fp16 is not supported)')
plan = builder.build_serialized_network(network, config)
if plan is None:
print('Conversion failed.')
return None
return bytes(plan)
def run(self, input_data: np.ndarray) -> List[np.ndarray]:
try:
cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer
self.context.set_tensor_address(self.output_name, int(self.d_output)) # output buffer
self.context.execute_async_v3(stream_handle=self.stream.handle)
self.stream.synchronize()
# Fix: Remove the stream parameter from memcpy_dtoh
cuda.memcpy_dtoh(self.h_output, self.d_output)
output = self.h_output.reshape(self.output_shape)
return [output]
except Exception as e:
raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}")