mirror of
https://github.com/azaion/detections.git
synced 2026-04-22 06:46:32 +00:00
[AZ-180] Add Jetson Orin Nano support with INT8 TensorRT engine
- Dockerfile.jetson: JetPack 6.x L4T base image (aarch64), TensorRT and PyCUDA from apt - requirements-jetson.txt: derived from requirements.txt, no pip tensorrt/pycuda - docker-compose.jetson.yml: runtime: nvidia for NVIDIA Container Runtime - tensorrt_engine.pyx: convert_from_source accepts optional calib_cache_path; INT8 used when cache present, FP16 fallback; get_engine_filename encodes precision suffix to avoid engine cache confusion - inference.pyx: init_ai tries INT8 engine then FP16 on lookup; downloads calibration cache before conversion thread; passes cache path through to convert_from_source - constants_inf: add INT8_CALIB_CACHE_FILE constant - Unit tests for AC-3 (INT8 flag set when cache provided) and AC-4 (FP16 when no cache) Made-with: Cursor
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
cdef str CONFIG_FILE
|
||||
|
||||
cdef str AI_ONNX_MODEL_FILE
|
||||
cdef str INT8_CALIB_CACHE_FILE
|
||||
|
||||
cdef str CDN_CONFIG
|
||||
cdef str MODELS_FOLDER
|
||||
|
||||
@@ -6,6 +6,7 @@ from loguru import logger
|
||||
|
||||
cdef str CONFIG_FILE = "config.yaml"
|
||||
cdef str AI_ONNX_MODEL_FILE = "azaion.onnx"
|
||||
cdef str INT8_CALIB_CACHE_FILE = "azaion.int8_calib.cache"
|
||||
|
||||
cdef str CDN_CONFIG = "cdn.yaml"
|
||||
cdef str MODELS_FOLDER = "models"
|
||||
|
||||
@@ -4,11 +4,31 @@ import pycuda.driver as cuda # pyright: ignore[reportMissingImports]
|
||||
import pycuda.autoinit # pyright: ignore[reportMissingImports]
|
||||
import pynvml
|
||||
import numpy as np
|
||||
import os
|
||||
cimport constants_inf
|
||||
|
||||
GPU_MEMORY_FRACTION = 0.8
|
||||
|
||||
|
||||
class _CacheCalibrator(trt.IInt8EntropyCalibrator2):
|
||||
def __init__(self, path):
|
||||
super().__init__()
|
||||
self._path = path
|
||||
|
||||
def get_batch_size(self):
|
||||
return 1
|
||||
|
||||
def get_batch(self, names):
|
||||
return None
|
||||
|
||||
def read_calibration_cache(self):
|
||||
with open(self._path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def write_calibration_cache(self, cache):
|
||||
pass
|
||||
|
||||
|
||||
cdef class TensorRTEngine(InferenceEngine):
|
||||
def __init__(self, model_bytes: bytes, max_batch_size: int = 8, **kwargs):
|
||||
InferenceEngine.__init__(self, model_bytes, max_batch_size, engine_name="tensorrt")
|
||||
@@ -80,13 +100,16 @@ cdef class TensorRTEngine(InferenceEngine):
|
||||
return 2 * 1024 * 1024 * 1024 if total_memory is None else total_memory
|
||||
|
||||
@staticmethod
|
||||
def get_engine_filename():
|
||||
def get_engine_filename(str precision="fp16"):
|
||||
try:
|
||||
from engines import tensor_gpu_index
|
||||
device = cuda.Device(max(tensor_gpu_index, 0))
|
||||
sm_count = device.multiprocessor_count
|
||||
cc_major, cc_minor = device.compute_capability()
|
||||
return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
|
||||
base = f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}"
|
||||
if precision == "int8":
|
||||
return f"{base}.int8.engine"
|
||||
return f"{base}.engine"
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@@ -96,7 +119,7 @@ cdef class TensorRTEngine(InferenceEngine):
|
||||
return constants_inf.AI_ONNX_MODEL_FILE
|
||||
|
||||
@staticmethod
|
||||
def convert_from_source(bytes onnx_model):
|
||||
def convert_from_source(bytes onnx_model, str calib_cache_path=None):
|
||||
gpu_mem = TensorRTEngine.get_gpu_memory_bytes(0)
|
||||
workspace_bytes = int(gpu_mem * 0.9)
|
||||
|
||||
@@ -130,7 +153,13 @@ cdef class TensorRTEngine(InferenceEngine):
|
||||
)
|
||||
config.add_optimization_profile(profile)
|
||||
|
||||
if builder.platform_has_fast_fp16:
|
||||
use_int8 = calib_cache_path is not None and os.path.isfile(calib_cache_path)
|
||||
if use_int8:
|
||||
constants_inf.log(<str>'Converting to INT8 with calibration cache')
|
||||
calibrator = _CacheCalibrator(calib_cache_path)
|
||||
config.set_flag(trt.BuilderFlag.INT8)
|
||||
config.int8_calibrator = calibrator
|
||||
elif builder.platform_has_fast_fp16:
|
||||
constants_inf.log(<str>'Converting to supported fp16')
|
||||
config.set_flag(trt.BuilderFlag.FP16)
|
||||
else:
|
||||
|
||||
+50
-21
@@ -1,4 +1,6 @@
|
||||
import io
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
|
||||
import av
|
||||
@@ -74,11 +76,11 @@ cdef class Inference:
|
||||
raise Exception(res.err)
|
||||
return <bytes>res.data
|
||||
|
||||
cdef convert_and_upload_model(self, bytes source_bytes, str engine_filename):
|
||||
cdef convert_and_upload_model(self, bytes source_bytes, str engine_filename, str calib_cache_path):
|
||||
try:
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING)
|
||||
models_dir = constants_inf.MODELS_FOLDER
|
||||
model_bytes = EngineClass.convert_from_source(source_bytes)
|
||||
model_bytes = EngineClass.convert_from_source(source_bytes, calib_cache_path)
|
||||
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING)
|
||||
res = self.loader_client.upload_big_small_resource(model_bytes, engine_filename, models_dir)
|
||||
@@ -92,6 +94,11 @@ cdef class Inference:
|
||||
self._converted_model_bytes = <bytes>None
|
||||
finally:
|
||||
self.is_building_engine = <bint>False
|
||||
if calib_cache_path is not None:
|
||||
try:
|
||||
os.unlink(calib_cache_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cdef init_ai(self):
|
||||
constants_inf.log(<str> 'init AI...')
|
||||
@@ -112,28 +119,35 @@ cdef class Inference:
|
||||
return
|
||||
|
||||
models_dir = constants_inf.MODELS_FOLDER
|
||||
engine_filename = EngineClass.get_engine_filename()
|
||||
if engine_filename is not None:
|
||||
try:
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
|
||||
res = self.loader_client.load_big_small_resource(engine_filename, models_dir)
|
||||
if res.err is not None:
|
||||
raise Exception(res.err)
|
||||
self.engine = EngineClass(res.data)
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
|
||||
except Exception as e:
|
||||
source_filename = EngineClass.get_source_filename()
|
||||
if source_filename is None:
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>f"Pre-built engine not found: {str(e)}")
|
||||
engine_filename_fp16 = EngineClass.get_engine_filename()
|
||||
if engine_filename_fp16 is not None:
|
||||
engine_filename_int8 = EngineClass.get_engine_filename(<str>"int8")
|
||||
for candidate in [engine_filename_int8, engine_filename_fp16]:
|
||||
try:
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
|
||||
res = self.loader_client.load_big_small_resource(candidate, models_dir)
|
||||
if res.err is not None:
|
||||
raise Exception(res.err)
|
||||
self.engine = EngineClass(res.data)
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
|
||||
return
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>str(e))
|
||||
source_bytes = self.download_model(source_filename)
|
||||
self.is_building_engine = <bint>True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, engine_filename))
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
source_filename = EngineClass.get_source_filename()
|
||||
if source_filename is None:
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>"Pre-built engine not found and no source available")
|
||||
return
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>"Cached engine not found, converting from source")
|
||||
source_bytes = self.download_model(source_filename)
|
||||
calib_cache_path = self._try_download_calib_cache(models_dir)
|
||||
target_engine_filename = EngineClass.get_engine_filename(<str>"int8") if calib_cache_path is not None else engine_filename_fp16
|
||||
self.is_building_engine = <bint>True
|
||||
|
||||
thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, target_engine_filename, calib_cache_path))
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
return
|
||||
else:
|
||||
self.engine = EngineClass(<bytes>self.download_model(constants_inf.AI_ONNX_MODEL_FILE))
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
|
||||
@@ -142,6 +156,21 @@ cdef class Inference:
|
||||
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>str(e))
|
||||
self.is_building_engine = <bint>False
|
||||
|
||||
cdef str _try_download_calib_cache(self, str models_dir):
|
||||
try:
|
||||
res = self.loader_client.load_big_small_resource(constants_inf.INT8_CALIB_CACHE_FILE, models_dir)
|
||||
if res.err is not None:
|
||||
constants_inf.log(<str>f"INT8 calibration cache not available: {res.err}")
|
||||
return <str>None
|
||||
fd, path = tempfile.mkstemp(suffix='.cache')
|
||||
with os.fdopen(fd, 'wb') as f:
|
||||
f.write(res.data)
|
||||
constants_inf.log(<str>'INT8 calibration cache downloaded')
|
||||
return <str>path
|
||||
except Exception as e:
|
||||
constants_inf.log(<str>f"INT8 calibration cache download failed: {str(e)}")
|
||||
return <str>None
|
||||
|
||||
cpdef run_detect_image(self, bytes image_bytes, AIRecognitionConfig ai_config, str media_name,
|
||||
object annotation_callback, object status_callback=None):
|
||||
cdef list all_frame_data = []
|
||||
|
||||
Reference in New Issue
Block a user