import pycuda.driver as cuda import yaml import constants from api_client import ApiClient, ApiCredentials from cdn_manager import CDNManager, CDNCredentials from inference.inference import Inference from inference.tensorrt_engine import TensorRTEngine from security import Security from utils import Dotdict def get_engine_filename(device_id=0): try: device = cuda.Device(device_id) sm_count = device.multiprocessor_count cc_major, cc_minor = device.compute_capability() return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine" except Exception: return None if __name__ == "__main__": # Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4), # confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') # detection for the first 200sec of video: # onnxInference: 81 sec, 6.3Gb VRAM # tensorrt: 54 sec, 3.7Gb VRAM # Inference(TensorRTEngine('azaion-2025-03-10_int8.engine', batch_size=16), # confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') # INT8 for 200sec: 54 sec 3.7Gb # Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8), # confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4') api_client = ApiClient() key = Security.get_model_encryption_key() engine_filename = TensorRTEngine.get_engine_filename() model_bytes = api_client.load_big_small_resource(engine_filename, 'models', key) Inference(TensorRTEngine(model_bytes), confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4') # cdn_manager.download(cdn_c.bucket, constants.AI_TENSOR_MODEL_FILE_BIG) # tensor_model_bytes = api_client.load_resource(constants.AI_TENSOR_MODEL_FILE_BIG, constants.AI_TENSOR_MODEL_FILE_SMALL) # Inference(OnnxEngine(onxx_model_bytes, batch_size=4), # confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4')