mirror of
https://github.com/azaion/annotations.git
synced 2026-04-22 15:56:30 +00:00
fix bug with annotation result gradient stops
add tensorrt engine
This commit is contained in:
@@ -72,5 +72,5 @@ In case of fbgemm.dll error (Windows specific):
|
||||
|
||||
<h3>Build exe</h3>
|
||||
```
|
||||
.\build.exe
|
||||
.\build.cmd
|
||||
```
|
||||
|
||||
@@ -14,4 +14,4 @@ cdef class ApiClient:
|
||||
|
||||
cdef load_bytes(self, str filename, str folder=*)
|
||||
cdef upload_file(self, str filename, str folder=*)
|
||||
cdef load_ai_model(self)
|
||||
cdef load_ai_model(self, bint is_tensor=*)
|
||||
|
||||
@@ -73,6 +73,7 @@ cdef class ApiClient:
|
||||
|
||||
cdef load_bytes(self, str filename, str folder=None):
|
||||
folder = folder or self.credentials.folder
|
||||
|
||||
hardware_service = HardwareService()
|
||||
cdef HardwareInfo hardware = hardware_service.get_hardware_info()
|
||||
|
||||
@@ -110,11 +111,20 @@ cdef class ApiClient:
|
||||
constants.log(<str>f'Downloaded file: {filename}, {len(data)} bytes')
|
||||
return data
|
||||
|
||||
cdef load_ai_model(self):
|
||||
with open(<str>constants.AI_MODEL_FILE_BIG, 'rb') as binary_file:
|
||||
encrypted_bytes_big = binary_file.read()
|
||||
encrypted_bytes_small = self.load_bytes(constants.AI_MODEL_FILE_SMALL)
|
||||
cdef load_ai_model(self, bint is_tensor=False):
|
||||
if is_tensor:
|
||||
big_file = <str> constants.AI_TENSOR_MODEL_FILE_BIG
|
||||
small_file = <str> constants.AI_TENSOR_MODEL_FILE_SMALL
|
||||
else:
|
||||
big_file = <str>constants.AI_ONNX_MODEL_FILE_BIG
|
||||
small_file = <str> constants.AI_ONNX_MODEL_FILE_SMALL
|
||||
|
||||
with open(big_file, 'rb') as binary_file:
|
||||
encrypted_bytes_big = binary_file.read()
|
||||
print('read encrypted big file')
|
||||
print(f'small file: {small_file}')
|
||||
encrypted_bytes_small = self.load_bytes(small_file)
|
||||
print('read encrypted small file')
|
||||
encrypted_model_bytes = encrypted_bytes_small + encrypted_bytes_big
|
||||
key = Security.get_model_encryption_key()
|
||||
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
# -*- mode: python ; coding: utf-8 -*-
|
||||
from PyInstaller.utils.hooks import collect_all
|
||||
|
||||
datas = []
|
||||
binaries = []
|
||||
hiddenimports = ['constants', 'annotation', 'credentials', 'file_data', 'user', 'security', 'secure_model', 'api_client', 'hardware_service', 'remote_command', 'ai_config', 'inference_engine', 'inference', 'remote_command_handler']
|
||||
tmp_ret = collect_all('pyyaml')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('jwt')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('requests')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('psutil')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('msgpack')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('zmq')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('cryptography')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('cv2')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('onnxruntime')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('tensorrt')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('pycuda')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
tmp_ret = collect_all('re')
|
||||
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
|
||||
|
||||
|
||||
a = Analysis(
|
||||
['start.py'],
|
||||
pathex=[],
|
||||
binaries=binaries,
|
||||
datas=datas,
|
||||
hiddenimports=hiddenimports,
|
||||
hookspath=[],
|
||||
hooksconfig={},
|
||||
runtime_hooks=[],
|
||||
excludes=[],
|
||||
noarchive=False,
|
||||
optimize=0,
|
||||
)
|
||||
pyz = PYZ(a.pure)
|
||||
|
||||
exe = EXE(
|
||||
pyz,
|
||||
a.scripts,
|
||||
[],
|
||||
exclude_binaries=True,
|
||||
name='azaion-inference',
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=True,
|
||||
console=True,
|
||||
disable_windowed_traceback=False,
|
||||
argv_emulation=False,
|
||||
target_arch=None,
|
||||
codesign_identity=None,
|
||||
entitlements_file=None,
|
||||
)
|
||||
coll = COLLECT(
|
||||
exe,
|
||||
a.binaries,
|
||||
a.datas,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
name='azaion-inference',
|
||||
)
|
||||
@@ -1,4 +1,4 @@
|
||||
pyinstaller --onefile ^
|
||||
pyinstaller --name=azaion-inference ^
|
||||
--collect-all pyyaml ^
|
||||
--collect-all jwt ^
|
||||
--collect-all requests ^
|
||||
@@ -8,6 +8,9 @@ pyinstaller --onefile ^
|
||||
--collect-all cryptography ^
|
||||
--collect-all cv2 ^
|
||||
--collect-all onnxruntime ^
|
||||
--collect-all tensorrt ^
|
||||
--collect-all pycuda ^
|
||||
--collect-all re ^
|
||||
--hidden-import constants ^
|
||||
--hidden-import annotation ^
|
||||
--hidden-import credentials ^
|
||||
@@ -19,6 +22,7 @@ pyinstaller --onefile ^
|
||||
--hidden-import hardware_service ^
|
||||
--hidden-import remote_command ^
|
||||
--hidden-import ai_config ^
|
||||
--hidden-import inference_engine ^
|
||||
--hidden-import inference ^
|
||||
--hidden-import remote_command_handler ^
|
||||
start.py
|
||||
@@ -6,8 +6,13 @@ cdef str ANNOTATIONS_QUEUE # Name of the annotations queue in rabbit
|
||||
|
||||
cdef str API_URL # Base URL for the external API
|
||||
cdef str QUEUE_CONFIG_FILENAME # queue config filename to load from api
|
||||
cdef str AI_MODEL_FILE_BIG # AI Model file (BIG part)
|
||||
cdef str AI_MODEL_FILE_SMALL # AI Model file (small part)
|
||||
|
||||
cdef str AI_ONNX_MODEL_FILE_BIG
|
||||
cdef str AI_ONNX_MODEL_FILE_SMALL
|
||||
|
||||
cdef str AI_TENSOR_MODEL_FILE_BIG
|
||||
cdef str AI_TENSOR_MODEL_FILE_SMALL
|
||||
|
||||
|
||||
cdef bytes DONE_SIGNAL
|
||||
|
||||
|
||||
@@ -8,8 +8,12 @@ cdef str ANNOTATIONS_QUEUE = "azaion-annotations"
|
||||
|
||||
cdef str API_URL = "https://api.azaion.com" # Base URL for the external API
|
||||
cdef str QUEUE_CONFIG_FILENAME = "secured-config.json"
|
||||
cdef str AI_MODEL_FILE_BIG = "azaion.onnx.big"
|
||||
cdef str AI_MODEL_FILE_SMALL = "azaion.onnx.small"
|
||||
|
||||
cdef str AI_ONNX_MODEL_FILE_BIG = "azaion.onnx.big"
|
||||
cdef str AI_ONNX_MODEL_FILE_SMALL = "azaion.onnx.small"
|
||||
|
||||
cdef str AI_TENSOR_MODEL_FILE_BIG = "azaion.engine.big"
|
||||
cdef str AI_TENSOR_MODEL_FILE_SMALL = "azaion.engine.small"
|
||||
|
||||
cdef log(str log_message, bytes client_id=None):
|
||||
local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
|
||||
|
||||
@@ -5,4 +5,7 @@ cdef class HardwareInfo:
|
||||
cdef class HardwareService:
|
||||
cdef bint is_windows
|
||||
cdef get_mac_address(self, interface=*)
|
||||
|
||||
@staticmethod
|
||||
cdef has_nvidia_gpu()
|
||||
cdef HardwareInfo get_hardware_info(self)
|
||||
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
import subprocess
|
||||
import psutil
|
||||
|
||||
@@ -42,6 +43,18 @@ cdef class HardwareService:
|
||||
return addr.address.replace('-', '')
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
cdef has_nvidia_gpu():
|
||||
try:
|
||||
output = subprocess.check_output(['nvidia-smi']).decode()
|
||||
match = re.search(r'CUDA Version:\s*([\d.]+)', output)
|
||||
if match:
|
||||
return float(match.group(1)) > 11
|
||||
return False
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
cdef HardwareInfo get_hardware_info(self):
|
||||
if self.is_windows:
|
||||
os_command = (
|
||||
|
||||
@@ -2,10 +2,11 @@ from remote_command cimport RemoteCommand
|
||||
from annotation cimport Annotation, Detection
|
||||
from ai_config cimport AIRecognitionConfig
|
||||
from api_client cimport ApiClient
|
||||
from inference_engine cimport InferenceEngine
|
||||
|
||||
cdef class Inference:
|
||||
cdef ApiClient api_client
|
||||
cdef object session
|
||||
cdef InferenceEngine engine
|
||||
cdef object on_annotation
|
||||
cdef Annotation _previous_annotation
|
||||
cdef AIRecognitionConfig ai_config
|
||||
|
||||
@@ -1,40 +1,40 @@
|
||||
import json
|
||||
import mimetypes
|
||||
import subprocess
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as onnx
|
||||
|
||||
cimport constants
|
||||
from remote_command cimport RemoteCommand
|
||||
from annotation cimport Detection, Annotation
|
||||
from ai_config cimport AIRecognitionConfig
|
||||
from inference_engine cimport OnnxEngine, TensorRTEngine
|
||||
from hardware_service cimport HardwareService
|
||||
|
||||
cdef class Inference:
|
||||
def __init__(self, api_client, on_annotation):
|
||||
self.api_client = api_client
|
||||
self.on_annotation = on_annotation
|
||||
self.stop_signal = False
|
||||
self.session = None
|
||||
self.model_input = None
|
||||
self.model_width = 0
|
||||
self.model_height = 0
|
||||
self.engine = None
|
||||
self.class_names = None
|
||||
|
||||
def init_ai(self):
|
||||
model_bytes = self.api_client.load_ai_model()
|
||||
self.session = onnx.InferenceSession(
|
||||
model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
model_inputs = self.session.get_inputs()
|
||||
self.model_input = model_inputs[0].name
|
||||
input_shape = model_inputs[0].shape
|
||||
self.model_width = input_shape[2]
|
||||
self.model_height = input_shape[3]
|
||||
print(f'AI detection model input: {self.model_input} ({self.model_width}, {self.model_height})')
|
||||
model_meta = self.session.get_modelmeta()
|
||||
print("Metadata:", model_meta.custom_metadata_map)
|
||||
self.class_names = eval(model_meta.custom_metadata_map["names"])
|
||||
if self.engine is not None:
|
||||
return
|
||||
|
||||
is_nvidia = HardwareService.has_nvidia_gpu()
|
||||
if is_nvidia:
|
||||
model_bytes = self.api_client.load_ai_model(is_tensor=True)
|
||||
self.engine = TensorRTEngine(model_bytes, batch_size=4)
|
||||
else:
|
||||
model_bytes = self.api_client.load_ai_model()
|
||||
self.engine = OnnxEngine(model_bytes, batch_size=4)
|
||||
|
||||
self.model_height, self.model_width = self.engine.get_input_shape()
|
||||
self.class_names = self.engine.get_class_names()
|
||||
|
||||
cdef preprocess(self, frames):
|
||||
blobs = [cv2.dnn.blobFromImage(frame,
|
||||
@@ -47,33 +47,37 @@ cdef class Inference:
|
||||
return np.vstack(blobs)
|
||||
|
||||
cdef postprocess(self, output, ai_config):
|
||||
print('enter postprocess')
|
||||
cdef list[Detection] detections = []
|
||||
cdef int ann_index
|
||||
cdef float x1, y1, x2, y2, conf, cx, cy, w, h
|
||||
cdef int class_id
|
||||
cdef list[list[Detection]] results = []
|
||||
print('start try: code')
|
||||
try:
|
||||
for ann_index in range(len(output[0])):
|
||||
detections.clear()
|
||||
for det in output[0][ann_index]:
|
||||
if det[4] == 0: # if confidence is 0 then valid points are over.
|
||||
break
|
||||
x1 = det[0] / self.model_width
|
||||
y1 = det[1] / self.model_height
|
||||
x2 = det[2] / self.model_width
|
||||
y2 = det[3] / self.model_height
|
||||
conf = round(det[4], 2)
|
||||
class_id = int(det[5])
|
||||
|
||||
for ann_index in range(len(output[0])):
|
||||
detections.clear()
|
||||
for det in output[0][ann_index]:
|
||||
if det[4] == 0: # if confidence is 0 then valid points are over.
|
||||
break
|
||||
x1 = det[0] / self.model_width
|
||||
y1 = det[1] / self.model_height
|
||||
x2 = det[2] / self.model_width
|
||||
y2 = det[3] / self.model_height
|
||||
conf = round(det[4], 2)
|
||||
class_id = int(det[5])
|
||||
|
||||
x = (x1 + x2) / 2
|
||||
y = (y1 + y2) / 2
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
if conf >= ai_config.probability_threshold:
|
||||
detections.append(Detection(x, y, w, h, class_id, conf))
|
||||
filtered_detections = self.remove_overlapping_detections(detections)
|
||||
results.append(filtered_detections)
|
||||
return results
|
||||
x = (x1 + x2) / 2
|
||||
y = (y1 + y2) / 2
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
if conf >= ai_config.probability_threshold:
|
||||
detections.append(Detection(x, y, w, h, class_id, conf))
|
||||
filtered_detections = self.remove_overlapping_detections(detections)
|
||||
results.append(filtered_detections)
|
||||
return results
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to postprocess: {str(e)}")
|
||||
|
||||
cdef remove_overlapping_detections(self, list[Detection] detections):
|
||||
cdef Detection det1, det2
|
||||
@@ -121,8 +125,7 @@ cdef class Inference:
|
||||
raise Exception('ai recognition config is empty')
|
||||
|
||||
self.stop_signal = False
|
||||
if self.session is None:
|
||||
self.init_ai()
|
||||
self.init_ai()
|
||||
|
||||
print(ai_config.paths)
|
||||
for m in ai_config.paths:
|
||||
@@ -160,7 +163,9 @@ cdef class Inference:
|
||||
|
||||
if len(batch_frames) == ai_config.model_batch_size:
|
||||
input_blob = self.preprocess(batch_frames)
|
||||
outputs = self.session.run(None, {self.model_input: input_blob})
|
||||
|
||||
outputs = self.engine.run(input_blob)
|
||||
|
||||
list_detections = self.postprocess(outputs, ai_config)
|
||||
for i in range(len(list_detections)):
|
||||
detections = list_detections[i]
|
||||
@@ -189,7 +194,9 @@ cdef class Inference:
|
||||
timestamps.append(0)
|
||||
|
||||
input_blob = self.preprocess(frames)
|
||||
outputs = self.session.run(None, {self.model_input: input_blob})
|
||||
|
||||
outputs = self.engine.run(input_blob)
|
||||
|
||||
list_detections = self.postprocess(outputs, ai_config)
|
||||
for i in range(len(list_detections)):
|
||||
detections = list_detections[i]
|
||||
@@ -199,6 +206,7 @@ cdef class Inference:
|
||||
print(annotation.to_str(self.class_names))
|
||||
self.on_annotation(cmd, annotation)
|
||||
|
||||
|
||||
cdef stop(self):
|
||||
self.stop_signal = True
|
||||
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
from typing import List, Tuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
cdef class InferenceEngine:
|
||||
cdef public int batch_size
|
||||
cdef tuple get_input_shape(self)
|
||||
cdef int get_batch_size(self)
|
||||
cdef get_class_names(self)
|
||||
cpdef run(self, input_data)
|
||||
|
||||
cdef class OnnxEngine(InferenceEngine):
|
||||
cdef object session
|
||||
cdef list model_inputs
|
||||
cdef str input_name
|
||||
cdef object input_shape
|
||||
cdef object class_names
|
||||
|
||||
cdef class TensorRTEngine(InferenceEngine):
|
||||
cdef object stream
|
||||
cdef object context
|
||||
cdef str input_name
|
||||
cdef str output_name
|
||||
cdef object d_input
|
||||
cdef object d_output
|
||||
cdef object input_shape
|
||||
cdef object output_shape
|
||||
cdef object h_output
|
||||
cdef object class_names
|
||||
@@ -0,0 +1,140 @@
|
||||
import json
|
||||
import struct
|
||||
from typing import List, Tuple
|
||||
import numpy as np
|
||||
import onnxruntime as onnx
|
||||
import tensorrt as trt
|
||||
import pycuda.driver as cuda
|
||||
import pycuda.autoinit # required for automatically initialize CUDA, do not remove.
|
||||
|
||||
|
||||
cdef class InferenceEngine:
|
||||
def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs):
|
||||
self.batch_size = batch_size
|
||||
|
||||
cdef tuple get_input_shape(self):
|
||||
raise NotImplementedError("Subclass must implement get_input_shape")
|
||||
|
||||
cdef int get_batch_size(self):
|
||||
return self.batch_size
|
||||
|
||||
cpdef run(self, input_data):
|
||||
raise NotImplementedError("Subclass must implement run")
|
||||
|
||||
cdef get_class_names(self):
|
||||
raise NotImplementedError("Subclass must implement get_class_names")
|
||||
|
||||
|
||||
cdef class OnnxEngine(InferenceEngine):
|
||||
def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs):
|
||||
super().__init__(model_bytes, batch_size)
|
||||
self.batch_size = batch_size
|
||||
self.session = onnx.InferenceSession(model_bytes, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
|
||||
self.model_inputs = self.session.get_inputs()
|
||||
self.input_name = self.model_inputs[0].name
|
||||
self.input_shape = self.model_inputs[0].shape
|
||||
if self.input_shape[0] != -1:
|
||||
self.batch_size = self.input_shape[0]
|
||||
print(f'AI detection model input: {self.model_inputs} {self.input_shape}')
|
||||
model_meta = self.session.get_modelmeta()
|
||||
print("Metadata:", model_meta.custom_metadata_map)
|
||||
self.class_names = eval(model_meta.custom_metadata_map["names"])
|
||||
|
||||
cdef tuple get_input_shape(self):
|
||||
shape = self.input_shape
|
||||
return shape[2], shape[3]
|
||||
|
||||
cdef int get_batch_size(self):
|
||||
return self.batch_size
|
||||
|
||||
cdef get_class_names(self):
|
||||
return self.class_names
|
||||
|
||||
cpdef run(self, input_data):
|
||||
return self.session.run(None, {self.input_name: input_data})
|
||||
|
||||
|
||||
cdef class TensorRTEngine(InferenceEngine):
|
||||
def __init__(self, model_bytes: bytes, batch_size: int = 4, **kwargs):
|
||||
super().__init__(model_bytes, batch_size)
|
||||
self.batch_size = batch_size
|
||||
print('Enter init TensorRT')
|
||||
try:
|
||||
logger = trt.Logger(trt.Logger.WARNING)
|
||||
|
||||
metadata_len = struct.unpack("<I", model_bytes[:4])[0]
|
||||
try:
|
||||
metadata = json.loads(model_bytes[4:4 + metadata_len])
|
||||
print(f"Model metadata: {json.dumps(metadata, indent=2)}")
|
||||
string_dict = metadata['names']
|
||||
self.class_names = {int(k): v for k, v in string_dict.items()}
|
||||
except json.JSONDecodeError:
|
||||
print(f"Failed to parse metadata")
|
||||
return
|
||||
engine_data = model_bytes[4 + metadata_len:]
|
||||
|
||||
|
||||
runtime = trt.Runtime(logger)
|
||||
engine = runtime.deserialize_cuda_engine(engine_data)
|
||||
|
||||
if engine is None:
|
||||
raise RuntimeError(f"Failed to load TensorRT engine from bytes")
|
||||
|
||||
self.context = engine.create_execution_context()
|
||||
# input
|
||||
self.input_name = engine.get_tensor_name(0)
|
||||
engine_input_shape = engine.get_tensor_shape(self.input_name)
|
||||
if engine_input_shape[0] != -1:
|
||||
self.batch_size = engine_input_shape[0]
|
||||
|
||||
self.input_shape = [
|
||||
self.batch_size,
|
||||
engine_input_shape[1], # Channels (usually fixed at 3 for RGB)
|
||||
1280 if engine_input_shape[2] == -1 else engine_input_shape[2], # Height
|
||||
1280 if engine_input_shape[3] == -1 else engine_input_shape[3] # Width
|
||||
]
|
||||
self.context.set_input_shape(self.input_name, self.input_shape)
|
||||
input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsize
|
||||
self.d_input = cuda.mem_alloc(input_size)
|
||||
|
||||
# output
|
||||
self.output_name = engine.get_tensor_name(1)
|
||||
engine_output_shape = tuple(engine.get_tensor_shape(self.output_name))
|
||||
self.output_shape = [
|
||||
batch_size if self.input_shape[0] == -1 else self.input_shape[0],
|
||||
300 if engine_output_shape[1] == -1 else engine_output_shape[1], # max detections number
|
||||
6 if engine_output_shape[2] == -1 else engine_output_shape[2] # x1 y1 x2 y2 conf cls
|
||||
]
|
||||
self.h_output = cuda.pagelocked_empty(tuple(self.output_shape), dtype=np.float32)
|
||||
self.d_output = cuda.mem_alloc(self.h_output.nbytes)
|
||||
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to initialize TensorRT engine: {str(e)}")
|
||||
|
||||
cdef tuple get_input_shape(self):
|
||||
return self.input_shape[2], self.input_shape[3]
|
||||
|
||||
cdef int get_batch_size(self):
|
||||
return self.batch_size
|
||||
|
||||
cdef get_class_names(self):
|
||||
return self.class_names
|
||||
|
||||
cpdef run(self, input_data):
|
||||
try:
|
||||
cuda.memcpy_htod_async(self.d_input, input_data, self.stream)
|
||||
self.context.set_tensor_address(self.input_name, int(self.d_input)) # input buffer
|
||||
self.context.set_tensor_address(self.output_name, int(self.d_output)) # output buffer
|
||||
|
||||
self.context.execute_async_v3(stream_handle=self.stream.handle)
|
||||
self.stream.synchronize()
|
||||
|
||||
# Fix: Remove the stream parameter from memcpy_dtoh
|
||||
cuda.memcpy_dtoh(self.h_output, self.d_output)
|
||||
output = self.h_output.reshape(self.output_shape)
|
||||
return [output]
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to run TensorRT inference: {str(e)}")
|
||||
@@ -9,4 +9,6 @@ msgpack
|
||||
pyjwt
|
||||
zmq
|
||||
requests
|
||||
pyyaml
|
||||
pyyaml
|
||||
pycuda
|
||||
tensorrt
|
||||
@@ -14,6 +14,7 @@ extensions = [
|
||||
Extension('user', ['user.pyx']),
|
||||
Extension('api_client', ['api_client.pyx']),
|
||||
Extension('ai_config', ['ai_config.pyx']),
|
||||
Extension('inference_engine', ['inference_engine.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('inference', ['inference.pyx'], include_dirs=[np.get_include()]),
|
||||
Extension('main', ['main.pyx']),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user