move python inference to Azaion.Inference folder

This commit is contained in:
Alex Bezdieniezhnykh
2025-02-06 10:48:03 +02:00
parent 739759628a
commit ba3e3b4a55
39 changed files with 313 additions and 173 deletions
+1
View File
@@ -11,3 +11,4 @@ venv
*.c
*.pyd
cython_debug*
dist
-130
View File
@@ -1,130 +0,0 @@
from ultralytics import YOLO
import mimetypes
import cv2
from ultralytics.engine.results import Boxes
from remote_command cimport RemoteCommand
from annotation cimport Detection, Annotation
from secure_model cimport SecureModelLoader
from ai_config cimport AIRecognitionConfig
cdef class Inference:
def __init__(self, model_bytes, on_annotation):
loader = SecureModelLoader()
model_path = loader.load_model(model_bytes)
self.stop_signal = False
self.model = YOLO(<str>model_path)
self.on_annotation = on_annotation
cdef bint is_video(self, str filepath):
mime_type, _ = mimetypes.guess_type(<str>filepath)
return mime_type and mime_type.startswith("video")
cdef run_inference(self, RemoteCommand cmd, int batch_size=8):
print('run inference..')
self.stop_signal = False
if self.is_video(cmd.filename):
self._process_video(cmd, batch_size)
else:
self._process_image(cmd)
cdef _process_video(self, RemoteCommand cmd, int batch_size):
frame_count = 0
batch_frame = []
self._previous_annotation = None
v_input = cv2.VideoCapture(<str>cmd.filename)
self.ai_config = AIRecognitionConfig.from_msgpack(cmd.data)
while v_input.isOpened() and not self.stop_signal:
ret, frame = v_input.read()
ms = v_input.get(cv2.CAP_PROP_POS_MSEC)
if not ret or frame is None:
break
frame_count += 1
if frame_count % self.ai_config.frame_period_recognition == 0:
batch_frame.append((frame, ms))
if len(batch_frame) == batch_size:
frames = list(map(lambda x: x[0], batch_frame))
results = self.model.track(frames, persist=True)
for frame, res in zip(batch_frame, results):
annotation = self.frame_to_annotation(int(frame[1]), frame[0], res.boxes)
is_valid = self.is_valid_annotation(<Annotation>annotation)
print(f'Is valid annotation: {is_valid}')
if is_valid:
self._previous_annotation = annotation
self.on_annotation(cmd, annotation)
batch_frame.clear()
v_input.release()
cdef _process_image(self, RemoteCommand cmd):
frame = cv2.imread(<str>cmd.filename)
res = self.model.track(frame)
annotation = self.frame_to_annotation(0, frame, res[0].boxes)
self.on_annotation(cmd, annotation)
cdef stop(self):
self.stop_signal = True
cdef frame_to_annotation(self, long time, frame, boxes: Boxes):
detections = []
for box in boxes:
b = box.xywhn[0].cpu().numpy()
cls = int(box.cls[0].cpu().numpy().item())
confidence = box.conf[0].cpu().numpy().item()
det = Detection(<double> b[0], <double> b[1], <double> b[2], <double> b[3], cls, confidence)
detections.append(det)
_, encoded_image = cv2.imencode('.jpg', frame)
image_bytes = encoded_image.tobytes()
return Annotation(image_bytes, time, detections)
cdef bint is_valid_annotation(self, Annotation annotation):
# No detections, invalid
if not annotation.detections:
return False
# First valid annotation, always accept
if self._previous_annotation is None:
return True
# Enough time has passed since last annotation
if annotation.time >= self._previous_annotation.time + <long>(self.ai_config.frame_recognition_seconds * 1000):
return True
# More objects detected than before
if len(annotation.detections) > len(self._previous_annotation.detections):
return True
cdef:
Detection current_det, prev_det
double dx, dy, distance_sq, min_distance_sq
Detection closest_det
# Check each detection against previous frame
for current_det in annotation.detections:
min_distance_sq = 1e18 # Initialize with large value
closest_det = None
# Find closest detection in previous frame
for prev_det in self._previous_annotation.detections:
dx = current_det.x - prev_det.x
dy = current_det.y - prev_det.y
distance_sq = dx * dx + dy * dy
if distance_sq < min_distance_sq:
min_distance_sq = distance_sq
closest_det = prev_det
# Check if beyond tracking distance
if min_distance_sq > self.ai_config.tracking_distance_confidence:
return True
# Check probability increase
if current_det.confidence >= closest_det.confidence + self.ai_config.tracking_probability_increase:
return True
# No validation criteria met
return False
-1
View File
@@ -1 +0,0 @@
eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1laWQiOiJkOTBhMzZjYS1lMjM3LTRmYmQtOWM3Yy0xMjcwNDBhYzg1NTYiLCJ1bmlxdWVfbmFtZSI6ImFkbWluQGF6YWlvbi5jb20iLCJyb2xlIjoiQXBpQWRtaW4iLCJuYmYiOjE3MzgzNjUwMjksImV4cCI6MTczODM3OTQyOSwiaWF0IjoxNzM4MzY1MDI5LCJpc3MiOiJBemFpb25BcGkiLCJhdWQiOiJBbm5vdGF0b3JzL09yYW5nZVBpL0FkbWlucyJ9.5teWb-gnhRngV337u_0OyUQ-o2-plN7shrvvKUsckPw
-1
View File
@@ -27,7 +27,6 @@
<PackageReference Include="SkiaSharp" Version="2.88.9" />
<PackageReference Include="VideoLAN.LibVLC.Windows" Version="3.0.21" />
<PackageReference Include="WindowsAPICodePack" Version="7.0.4" />
<PackageReference Include="YoloV8.Gpu" Version="5.0.4" />
</ItemGroup>
<ItemGroup>
@@ -5,11 +5,13 @@ namespace Azaion.Common.DTO.Config;
[MessagePackObject]
public class AIRecognitionConfig
{
[Key("FrameRecognitionSeconds")] public double FrameRecognitionSeconds { get; set; }
[Key(nameof(FramePeriodRecognition))] public int FramePeriodRecognition { get; set; }
[Key(nameof(FrameRecognitionSeconds))] public double FrameRecognitionSeconds { get; set; }
[Key(nameof(ProbabilityThreshold))] public double ProbabilityThreshold { get; set; }
[Key("TrackingDistanceConfidence")] public double TrackingDistanceConfidence { get; set; }
[Key("TrackingProbabilityIncrease")] public double TrackingProbabilityIncrease { get; set; }
[Key("TrackingIntersectionThreshold")] public double TrackingIntersectionThreshold { get; set; }
[Key("FramePeriodRecognition")] public int FramePeriodRecognition { get; set; }
[Key("Data")] public byte[] Data { get; set; }
[Key(nameof(TrackingDistanceConfidence))] public double TrackingDistanceConfidence { get; set; }
[Key(nameof(TrackingProbabilityIncrease))] public double TrackingProbabilityIncrease { get; set; }
[Key(nameof(TrackingIntersectionThreshold))] public double TrackingIntersectionThreshold { get; set; }
[Key(nameof(Data))] public byte[] Data { get; set; }
}
@@ -23,4 +23,6 @@ public class SecurityConstants
public const int ZMQ_PORT = 5127;
#endregion SocketClient
public static string AzaionInferencePath = "azaion-inference.exe";
}
@@ -46,21 +46,19 @@ public class PythonResourceLoader : IResourceLoader, IAuthProvider
private void StartPython( ApiConfig apiConfig, ApiCredentials credentials)
{
//var inferenceExe = LoadPythonFile().GetAwaiter().GetResult();
string outputProcess = "";
string errorProcess = "";
var path = "azaion-inference.exe";
var arguments = $"-e {credentials.Email} -p {credentials.Password} -f {apiConfig.ResourcesFolder}";
using var process = new Process();
process.StartInfo.FileName = path;
process.StartInfo.Arguments = arguments;
process.StartInfo.UseShellExecute = false;
process.StartInfo.RedirectStandardOutput = true;
process.StartInfo.RedirectStandardError = true;
//process.StartInfo.CreateNoWindow = true;
process.OutputDataReceived += (sender, e) => { if (e.Data != null) Console.WriteLine(e.Data); };
process.ErrorDataReceived += (sender, e) => { if (e.Data != null) Console.WriteLine(e.Data); };
process.StartInfo = new ProcessStartInfo
{
FileName = SecurityConstants.AzaionInferencePath,
Arguments = $"-e {credentials.Email} -p {credentials.Password} -f {apiConfig.ResourcesFolder}",
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
//CreateNoWindow = true
};
process.OutputDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); };
process.ErrorDataReceived += (_, e) => { if (e.Data != null) Console.WriteLine(e.Data); };
process.Start();
}
@@ -45,11 +45,6 @@ This is crucial for the build because build needs Python.h header and other file
```
python -m pip install --upgrade pip
pip install --upgrade huggingface_hub
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install ultralytics
pip uninstall -y opencv-python
pip install opencv-python cython msgpack cryptography rstream pika zmq pyjwt pyinstaller tensorboard
```
In case of fbgemm.dll error (Windows specific):
@@ -1,9 +1,12 @@
cdef class AIRecognitionConfig:
cdef public double frame_recognition_seconds
cdef public int frame_period_recognition
cdef public double probability_threshold
cdef public double tracking_distance_confidence
cdef public double tracking_probability_increase
cdef public double tracking_intersection_threshold
cdef public int frame_period_recognition
cdef public bytes file_data
@staticmethod
@@ -2,18 +2,24 @@ from msgpack import unpackb
cdef class AIRecognitionConfig:
def __init__(self,
frame_period_recognition,
frame_recognition_seconds,
probability_threshold,
tracking_distance_confidence,
tracking_probability_increase,
tracking_intersection_threshold,
frame_period_recognition,
file_data
):
self.frame_period_recognition = frame_period_recognition
self.frame_recognition_seconds = frame_recognition_seconds
self.probability_threshold = probability_threshold
self.tracking_distance_confidence = tracking_distance_confidence
self.tracking_probability_increase = tracking_probability_increase
self.tracking_intersection_threshold = tracking_intersection_threshold
self.frame_period_recognition = frame_period_recognition
self.file_data = file_data
def __str__(self):
@@ -24,9 +30,13 @@ cdef class AIRecognitionConfig:
cdef from_msgpack(bytes data):
unpacked = unpackb(data, strict_map_key=False)
return AIRecognitionConfig(
unpacked.get("FramePeriodRecognition", 0),
unpacked.get("FrameRecognitionSeconds", 0.0),
unpacked.get("ProbabilityThreshold", 0.0),
unpacked.get("TrackingDistanceConfidence", 0.0),
unpacked.get("TrackingProbabilityIncrease", 0.0),
unpacked.get("TrackingIntersectionThreshold", 0.0),
unpacked.get("FramePeriodRecognition", 0),
unpacked.get("Data", b''))
@@ -13,10 +13,10 @@ cdef class Detection:
return f'{self.cls}: {self.x:.2f} {self.y:.2f} {self.w:.2f} {self.h:.2f}, prob: {(self.confidence*100):.1f}%'
cdef class Annotation:
def __init__(self, bytes image_bytes, long time, list[Detection] detections):
self.image = image_bytes
def __init__(self, long time, list[Detection] detections):
self.time = time
self.detections = detections if detections is not None else []
self.image = b''
cdef bytes serialize(self):
return msgpack.packb({
@@ -1,12 +1,10 @@
pyinstaller --onefile ^
pyinstaller --onefile ^
--collect-all jwt ^
--collect-all requests ^
--collect-all psutil ^
--collect-all cryptography ^
--collect-all msgpack ^
--collect-all expecttest ^
--collect-all torch ^
--collect-all ultralytics ^
--collect-all zmq ^
--hidden-import user ^
--hidden-import security ^
@@ -19,4 +17,6 @@
--hidden-import ai_config ^
--hidden-import inference ^
--hidden-import remote_command_handler ^
--hidden-import cv2 ^
--hidden-import onnxruntime ^
start.py
@@ -7,6 +7,6 @@ cdef str ANNOTATIONS_QUEUE = "azaion-annotations"
cdef str API_URL = "https://api.azaion.com" # Base URL for the external API
cdef str TOKEN_FILE = "token"
cdef str QUEUE_CONFIG_FILENAME = "secured-config.json"
cdef str AI_MODEL_FILE = "azaion.pt"
cdef str AI_MODEL_FILE = "azaion.onnx"
cdef bytes DONE_SIGNAL = b"DONE"
@@ -3,17 +3,25 @@ from annotation cimport Annotation
from ai_config cimport AIRecognitionConfig
cdef class Inference:
cdef object model
cdef object session
cdef object on_annotation
cdef Annotation _previous_annotation
cdef AIRecognitionConfig ai_config
cdef bint stop_signal
cdef str model_input
cdef int model_width
cdef int model_height
cdef bint is_video(self, str filepath)
cdef run_inference(self, RemoteCommand cmd, int batch_size=?)
cdef _process_video(self, RemoteCommand cmd, int batch_size)
cdef _process_image(self, RemoteCommand cmd)
cdef stop(self)
cdef frame_to_annotation(self, long time, frame, boxes: object)
cdef preprocess(self, frame)
cdef postprocess(self, output, int img_width, int img_height)
cdef detect_frame(self, frame, long time)
cdef bint is_valid_annotation(self, Annotation annotation)
+188
View File
@@ -0,0 +1,188 @@
import mimetypes
import time
import cv2
import numpy as np
import onnxruntime as onnx
from remote_command cimport RemoteCommand
from annotation cimport Detection, Annotation
from ai_config cimport AIRecognitionConfig
cdef class Inference:
def __init__(self, model_bytes, on_annotation):
self.stop_signal = False
self.session = onnx.InferenceSession(
model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
self.on_annotation = on_annotation
self.ai_config = AIRecognitionConfig(4, 2, 0.25, 0.15, 15, 0.8, b'')
model_inputs = self.session.get_inputs()
self.model_input = model_inputs[0].name
input_shape = model_inputs[0].shape
self.model_width = input_shape[2]
self.model_height = input_shape[3]
print(f'AI detection model input: {self.model_input} ({self.model_width}, {self.model_height})')
model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map)
cdef preprocess(self, frame):
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (self.model_width, self.model_height))
image_data = np.array(img) / 255.0
image_data = np.transpose(image_data, (2, 0, 1)) # Channel first
image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
return image_data
cdef postprocess(self, output, int img_width, int img_height):
outputs = np.transpose(np.squeeze(output[0]))
rows = outputs.shape[0]
boxes = []
scores = []
class_ids = []
x_factor = img_width / self.model_width
y_factor = img_height / self.model_height
for i in range(rows):
classes_scores = outputs[i][4:]
max_score = np.amax(classes_scores)
if max_score >= self.ai_config.probability_threshold:
class_id = np.argmax(classes_scores)
x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
left = int((x - w / 2) * x_factor)
top = int((y - h / 2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
class_ids.append(class_id)
scores.append(max_score)
boxes.append([left, top, width, height])
indices = cv2.dnn.NMSBoxes(boxes, scores, self.ai_config.probability_threshold, 0.45)
detections = []
for i in indices:
x, y, w, h = boxes[i]
detections.append(Detection(x, y, w, h, class_ids[i], scores[i]))
return detections
cdef bint is_video(self, str filepath):
mime_type, _ = mimetypes.guess_type(<str>filepath)
return mime_type and mime_type.startswith("video")
cdef run_inference(self, RemoteCommand cmd, int batch_size=8):
print('run inference..')
self.ai_config = AIRecognitionConfig.from_msgpack(cmd.data)
self.stop_signal = False
if self.is_video(cmd.filename):
self._process_video(cmd, batch_size)
else:
self._process_image(cmd)
cdef _process_video(self, RemoteCommand cmd, int batch_size):
frame_count = 0
batch_frame = []
self._previous_annotation = None
self.start_video_time = time.time()
v_input = cv2.VideoCapture(<str>cmd.filename)
while v_input.isOpened():
ret, frame = v_input.read()
if not ret or frame is None:
break
frame_count += 1
if frame_count % self.ai_config.frame_period_recognition == 0:
ms = int(v_input.get(cv2.CAP_PROP_POS_MSEC))
annotation = self.detect_frame(frame, ms)
if annotation is not None:
self._previous_annotation = annotation
self.on_annotation(annotation)
cdef detect_frame(self, frame, long time):
cdef Annotation annotation
img_height, img_width = frame.shape[:2]
start_time = time.time()
img_data = self.preprocess(frame)
preprocess_time = time.time()
outputs = self.session.run(None, {self.model_input: img_data})
inference_time = time.time()
detections = self.postprocess(outputs, img_width, img_height)
postprocess_time = time.time()
print(f'video time, ms: {time / 1000:.3f}. total time, s : {postprocess_time - self.start_video_time:.3f} '
f'preprocess time: {preprocess_time - start_time:.3f}, inference time: {inference_time - preprocess_time:.3f},'
f' postprocess time: {postprocess_time - inference_time:.3f}, total time: {postprocess_time - start_time:.3f}')
if len(detections) > 0:
annotation = Annotation(frame, time, detections)
if self.is_valid_annotation(annotation):
_, image = cv2.imencode('.jpg', frame)
annotation.image = image.tobytes()
return annotation
return None
cdef _process_image(self, RemoteCommand cmd):
self._previous_annotation = None
frame = cv2.imread(<str>cmd.filename)
annotation = self.detect_frame(frame, 0)
if annotation is None:
_, image = cv2.imencode('.jpg', frame)
annotation = Annotation(frame, time, [])
annotation.image = image.tobytes()
self.on_annotation(cmd, annotation)
cdef stop(self):
self.stop_signal = True
cdef bint is_valid_annotation(self, Annotation annotation):
# No detections, invalid
if not annotation.detections:
return False
# First valid annotation, always accept
if self._previous_annotation is None:
return True
# Enough time has passed since last annotation
if annotation.time >= self._previous_annotation.time + <long>(self.ai_config.frame_recognition_seconds * 1000):
return True
# More objects detected than before
if len(annotation.detections) > len(self._previous_annotation.detections):
return True
cdef:
Detection current_det, prev_det
double dx, dy, distance_sq, min_distance_sq
Detection closest_det
# Check each detection against previous frame
for current_det in annotation.detections:
min_distance_sq = 1e18 # Initialize with large value
closest_det = None
# Find the closest detection in previous frame
for prev_det in self._previous_annotation.detections:
dx = current_det.x - prev_det.x
dy = current_det.y - prev_det.y
distance_sq = dx * dx + dy * dy
if distance_sq < min_distance_sq:
min_distance_sq = distance_sq
closest_det = prev_det
# Check if beyond tracking distance
if min_distance_sq > self.ai_config.tracking_distance_confidence:
return True
# Check probability increase
if current_det.confidence >= closest_det.confidence + self.ai_config.tracking_probability_increase:
return True
return False
+5
View File
@@ -0,0 +1,5 @@
setuptools
Cython
opencv-python
numpy
onnxruntime-gpu
@@ -1,5 +1,6 @@
from setuptools import setup, Extension
from Cython.Build import cythonize
import numpy as np
extensions = [
Extension('constants', ['constants.pyx']),
@@ -13,7 +14,6 @@ extensions = [
Extension('secure_model', ['secure_model.pyx']),
Extension('ai_config', ['ai_config.pyx']),
Extension('inference', ['inference.pyx']),
Extension('main', ['main.pyx']),
]
+57
View File
@@ -0,0 +1,57 @@
# -*- mode: python ; coding: utf-8 -*-
from PyInstaller.utils.hooks import collect_all
datas = []
binaries = []
hiddenimports = ['user', 'security', 'secure_model', 'api_client', 'hardware_service', 'constants', 'annotation', 'remote_command', 'ai_config', 'inference', 'remote_command_handler', 'cv2', 'onnxruntime']
tmp_ret = collect_all('jwt')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
tmp_ret = collect_all('requests')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
tmp_ret = collect_all('psutil')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
tmp_ret = collect_all('cryptography')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
tmp_ret = collect_all('msgpack')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
tmp_ret = collect_all('expecttest')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
tmp_ret = collect_all('zmq')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
a = Analysis(
['start.py'],
pathex=[],
binaries=binaries,
datas=datas,
hiddenimports=hiddenimports,
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
noarchive=False,
optimize=0,
)
pyz = PYZ(a.pure)
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.datas,
[],
name='start',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=True,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None,
)
+1
View File
@@ -0,0 +1 @@
eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJuYW1laWQiOiJkOTBhMzZjYS1lMjM3LTRmYmQtOWM3Yy0xMjcwNDBhYzg1NTYiLCJ1bmlxdWVfbmFtZSI6ImFkbWluQGF6YWlvbi5jb20iLCJyb2xlIjoiQXBpQWRtaW4iLCJuYmYiOjE3Mzg4Mjk0NTMsImV4cCI6MTczODg0Mzg1MywiaWF0IjoxNzM4ODI5NDUzLCJpc3MiOiJBemFpb25BcGkiLCJhdWQiOiJBbm5vdGF0b3JzL09yYW5nZVBpL0FkbWlucyJ9.t6ImX8KkH5IQ4zNNY5IbXESSI6uia4iuzyMhodvM7AA
+4 -2
View File
@@ -36,11 +36,13 @@
"RightPanelWidth": 230.0
},
"AIRecognitionConfig": {
"FramePeriodRecognition": 4,
"FrameRecognitionSeconds": 2.0,
"ProbabilityThreshold": 0.25,
"TrackingDistanceConfidence": 0.15,
"TrackingProbabilityIncrease": 15.0,
"TrackingIntersectionThreshold": 0.8,
"FramePeriodRecognition": 4
"TrackingIntersectionThreshold": 0.8
},
"ThumbnailConfig": { "Size": "240,135", "Border": 10 }
}