write logs for inference and loader to file

This commit is contained in:
Alex Bezdieniezhnykh
2025-06-14 16:08:32 +03:00
parent 8aa2f563a4
commit 6f297c4ebf
30 changed files with 218 additions and 140 deletions
+2
View File
@@ -26,6 +26,8 @@ tmp_ret = collect_all('pynvml')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
tmp_ret = collect_all('jwt')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
tmp_ret = collect_all('loguru')
datas += tmp_ret[0]; binaries += tmp_ret[1]; hiddenimports += tmp_ret[2]
a = Analysis(
+1
View File
@@ -33,6 +33,7 @@ venv\Scripts\pyinstaller --name=azaion-inference ^
--collect-all pycuda ^
--collect-all pynvml ^
--collect-all jwt ^
--collect-all loguru ^
--hidden-import constants ^
--hidden-import file_data ^
--hidden-import remote_command ^
+2 -2
View File
@@ -13,5 +13,5 @@ cdef str MODELS_FOLDER
cdef int SMALL_SIZE_KB
cdef log(str log_message, bytes client_id=*)
cdef log(str log_message)
cdef logerror(str error)
+32 -5
View File
@@ -1,4 +1,6 @@
import time
import sys
from loguru import logger
cdef str CONFIG_FILE = "config.yaml" # Port for the zmq
@@ -10,7 +12,32 @@ cdef str MODELS_FOLDER = "models"
cdef int SMALL_SIZE_KB = 3
cdef log(str log_message, bytes client_id=None):
local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
client_str = '' if client_id is None else f' {client_id}'
print(f'[{local_time}{client_str}]: {log_message}')
logger.remove()
log_format = "[{time:HH:mm:ss} {level}] {message}"
logger.add(
sink="Logs/log_inference_{time:YYYYMMDD}.txt",
level="INFO",
format=log_format,
enqueue=True,
rotation="1 day",
retention="30 days",
)
logger.add(
sys.stdout,
level="DEBUG",
format=log_format,
filter=lambda record: record["level"].name in ("INFO", "DEBUG", "SUCCESS"),
colorize=True
)
logger.add(
sys.stderr,
level="WARNING",
format=log_format,
colorize=True
)
cdef log(str log_message):
logger.info(log_message)
cdef logerror(str error):
logger.error(error)
+10 -10
View File
@@ -16,7 +16,7 @@ cdef int check_tensor_gpu_index():
deviceCount = pynvml.nvmlDeviceGetCount()
if deviceCount == 0:
print('No NVIDIA GPUs found.')
constants.logerror('No NVIDIA GPUs found.')
return -1
for i in range(deviceCount):
@@ -24,10 +24,10 @@ cdef int check_tensor_gpu_index():
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
if major > 6 or (major == 6 and minor >= 1):
print('found NVIDIA GPU!')
constants.log('found NVIDIA GPU!')
return i
print('NVIDIA GPU doesnt support TensorRT!')
constants.logerror('NVIDIA GPU doesnt support TensorRT!')
return -1
except pynvml.NVMLError:
@@ -36,7 +36,7 @@ cdef int check_tensor_gpu_index():
try:
pynvml.nvmlShutdown()
except:
print('Failed to shutdown pynvml cause probably no NVidia GPU')
constants.logerror('Failed to shutdown pynvml cause probably no NVIDIA GPU')
pass
tensor_gpu_index = check_tensor_gpu_index()
@@ -70,15 +70,15 @@ cdef class Inference:
res = self.loader_client.load_big_small_resource(engine_filename, models_dir)
if res.err is None:
print('tensor rt engine is here, no need to build')
constants.log('tensor rt engine is here, no need to build')
self.is_building_engine = False
updater_callback('enabled')
return
print(res.err)
constants.logerror(res.err)
# time.sleep(8) # prevent simultaneously loading dll and models
updater_callback('converting')
print('try to load onnx')
constants.log('try to load onnx')
res = self.loader_client.load_big_small_resource(constants.AI_ONNX_MODEL_FILE, models_dir)
if res.err is not None:
updater_callback(f'Error. {res.err}')
@@ -87,7 +87,7 @@ cdef class Inference:
res = self.loader_client.upload_big_small_resource(model_bytes, <str> engine_filename, models_dir)
if res.err is not None:
updater_callback(f'Error. {res.err}')
print(f'uploaded {engine_filename} to CDN and API')
constants.log(f'uploaded {engine_filename} to CDN and API')
self.is_building_engine = False
updater_callback('enabled')
except Exception as e:
@@ -212,11 +212,11 @@ cdef class Inference:
# images first, it's faster
if len(images) > 0:
for chunk in self.split_list_extend(images, self.engine.get_batch_size()):
print(f'run inference on {" ".join(chunk)}...')
constants.log(f'run inference on {" ".join(chunk)}...')
self._process_images(cmd, ai_config, chunk)
if len(videos) > 0:
for v in videos:
print(f'run inference on {v}...')
constants.log(f'run inference on {v}...')
self._process_video(cmd, ai_config, v)
+2 -2
View File
@@ -37,7 +37,7 @@ cdef class CommandProcessor:
continue
except Exception as e:
traceback.print_exc()
print('EXIT!')
constants.log('EXIT!')
cdef on_command(self, RemoteCommand command):
try:
@@ -54,7 +54,7 @@ cdef class CommandProcessor:
else:
pass
except Exception as e:
print(f"Error handling client: {e}")
constants.logerror(f"Error handling client: {e}")
cdef on_annotation(self, RemoteCommand cmd, Annotation annotation):
cdef RemoteCommand response = RemoteCommand(CommandType.INFERENCE_DATA, annotation.serialize())
+3 -2
View File
@@ -1,5 +1,6 @@
from inference_engine cimport InferenceEngine
import onnxruntime as onnx
cimport constants
cdef class OnnxEngine(InferenceEngine):
def __init__(self, model_bytes: bytes, batch_size: int = 1, **kwargs):
@@ -10,9 +11,9 @@ cdef class OnnxEngine(InferenceEngine):
self.input_name = self.model_inputs[0].name
self.input_shape = self.model_inputs[0].shape
self.batch_size = self.input_shape[0] if self.input_shape[0] != -1 else batch_size
print(f'AI detection model input: {self.model_inputs} {self.input_shape}')
constants.log(f'AI detection model input: {self.model_inputs} {self.input_shape}')
model_meta = self.session.get_modelmeta()
print("Metadata:", model_meta.custom_metadata_map)
constants.log(f"Metadata: {model_meta.custom_metadata_map}")
cpdef tuple get_input_shape(self):
shape = self.input_shape
+3 -4
View File
@@ -3,7 +3,6 @@ import zmq
from threading import Thread, Event
from remote_command cimport RemoteCommand
cimport constants
import yaml
cdef class RemoteCommandHandler:
def __init__(self, int zmq_port, object on_command):
@@ -28,7 +27,7 @@ cdef class RemoteCommandHandler:
for _ in range(4): # 4 worker threads
worker = Thread(target=self._worker_loop, daemon=True)
self._workers.append(worker)
print(f'Listening to commands on port {zmq_port}...')
constants.log(f'Listening to commands on port {zmq_port}...')
cdef start(self):
self._proxy_thread.start()
@@ -40,7 +39,7 @@ cdef class RemoteCommandHandler:
zmq.proxy_steerable(self._router, self._dealer, control=self._control)
except zmq.error.ZMQError as e:
if self._shutdown_event.is_set():
print("Shutdown, exit proxy loop.")
constants.log("Shutdown, exit proxy loop.")
else:
raise
@@ -59,7 +58,7 @@ cdef class RemoteCommandHandler:
client_id, message = worker_socket.recv_multipart()
cmd = RemoteCommand.from_msgpack(<bytes> message)
cmd.client_id = client_id
constants.log(<str>f'{cmd}', client_id)
constants.log(cmd)
self._on_command(cmd)
except Exception as e:
if not self._shutdown_event.is_set():
+1
View File
@@ -14,3 +14,4 @@ pycuda
tensorrt
pynvml
boto3
loguru
+2 -2
View File
@@ -103,11 +103,11 @@ cdef class TensorRTEngine(InferenceEngine):
constants.log('Converting to supported fp16')
config.set_flag(trt.BuilderFlag.FP16)
else:
print('Converting to supported fp32. (fp16 is not supported)')
constants.log('Converting to supported fp32. (fp16 is not supported)')
plan = builder.build_serialized_network(network, config)
if plan is None:
print('Conversion failed.')
constants.logerror('Conversion failed.')
return None
constants.log('conversion done!')
return bytes(plan)
@@ -215,7 +215,7 @@ class Api:
response = requests.post(url, data=payload, headers=headers, stream=True)
if response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
print('500!')
print('500!') #test
key = self.get_encryption_key(hardware.hash)