ai-training/src/start_inference.py

import pycuda.driver as cuda
import yaml
import constants
from api_client import ApiClient, ApiCredentials
from cdn_manager import CDNManager, CDNCredentials
from inference.inference import Inference
from inference.tensorrt_engine import TensorRTEngine
from security import Security
from utils import Dotdict


def get_engine_filename(device_id=0):
    try:
        device = cuda.Device(device_id)
        sm_count = device.multiprocessor_count
        cc_major, cc_minor = device.compute_capability()
        return f"azaion.cc_{cc_major}.{cc_minor}_sm_{sm_count}.engine"
    except Exception:
        return None

if __name__ == "__main__":
    # Inference(OnnxEngine('azaion-2025-03-10.onnx', batch_size=4),
    #            confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
    # detection for the first 200sec of video:
    # onnxInference: 81 sec, 6.3Gb VRAM
    # tensorrt: 54 sec, 3.7Gb VRAM

    # Inference(TensorRTEngine('azaion-2025-03-10_int8.engine', batch_size=16),
    #          confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')
    # INT8 for 200sec: 54 sec 3.7Gb

    # Inference(TensorRTEngine('azaion-2025-03-10_batch8.engine', batch_size=8),
    #           confidence_threshold=0.5, iou_threshold=0.3).process('ForAI_test.mp4')

    api_client = ApiClient()
    key = Security.get_model_encryption_key()
    engine_filename = TensorRTEngine.get_engine_filename()
    model_bytes = api_client.load_big_small_resource(engine_filename, 'models', key)

    Inference(TensorRTEngine(model_bytes),
              confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4')


    # cdn_manager.download(cdn_c.bucket, constants.AI_TENSOR_MODEL_FILE_BIG)
    # tensor_model_bytes = api_client.load_resource(constants.AI_TENSOR_MODEL_FILE_BIG, constants.AI_TENSOR_MODEL_FILE_SMALL)

    # Inference(OnnxEngine(onxx_model_bytes, batch_size=4),
    #           confidence_threshold=0.5, iou_threshold=0.3).process('tests/ForAI_test.mp4')