cimport constants_inf from loader_http_client cimport LoaderHttpClient, LoadResult class EngineFactory: has_build_step = False def create(self, model_bytes: bytes): raise NotImplementedError def load_engine(self, LoaderHttpClient loader_client, str models_dir): cdef str filename cdef LoadResult res filename = self._get_ai_engine_filename() if filename is None: return None try: res = loader_client.load_big_small_resource(filename, models_dir) if res.err is None: return self.create(res.data) except Exception: pass return None def _get_ai_engine_filename(self): return None def get_source_filename(self): return None def load_source(self, LoaderHttpClient loader_client, str models_dir): cdef LoadResult res filename = self.get_source_filename() if filename is None: return None res = loader_client.load_big_small_resource(filename, models_dir) if res.err is not None: raise Exception(res.err) return res.data def build_from_source(self, onnx_bytes, loader_client, models_dir): raise NotImplementedError(f"{type(self).__name__} does not support building from source") def build_and_cache(self, bytes source_bytes, LoaderHttpClient loader_client, str models_dir): cdef LoadResult res engine_bytes, engine_filename = self.build_from_source(source_bytes, loader_client, models_dir) if engine_bytes is None: raise RuntimeError("TensorRT conversion failed: no engine bytes produced") if engine_filename is None: raise RuntimeError("TensorRT conversion failed: engine filename could not be resolved") res = loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir) if res.err is not None: constants_inf.log(f"Failed to upload converted model: {res.err}") return engine_bytes class OnnxEngineFactory(EngineFactory): def create(self, model_bytes: bytes): from engines.onnx_engine import OnnxEngine return OnnxEngine(model_bytes) def get_source_filename(self): return constants_inf.AI_ONNX_MODEL_FILE class CoreMLEngineFactory(EngineFactory): def create(self, model_bytes: bytes): from engines.coreml_engine import CoreMLEngine return CoreMLEngine(model_bytes) def _get_ai_engine_filename(self): return "azaion_coreml.zip" class TensorRTEngineFactory(EngineFactory): has_build_step = True def create(self, model_bytes: bytes): from engines.tensorrt_engine import TensorRTEngine return TensorRTEngine(model_bytes) def _get_ai_engine_filename(self): from engines.tensorrt_engine import TensorRTEngine return TensorRTEngine.get_engine_filename() def get_source_filename(self): return constants_inf.AI_ONNX_MODEL_FILE def build_from_source(self, onnx_bytes, loader_client, models_dir): from engines.tensorrt_engine import TensorRTEngine engine_bytes = TensorRTEngine.convert_from_source(onnx_bytes, None) return engine_bytes, TensorRTEngine.get_engine_filename() class JetsonTensorRTEngineFactory(TensorRTEngineFactory): def create(self, model_bytes: bytes): from engines.jetson_tensorrt_engine import JetsonTensorRTEngine return JetsonTensorRTEngine(model_bytes) def load_engine(self, LoaderHttpClient loader_client, str models_dir): cdef str filename cdef LoadResult res from engines.tensorrt_engine import TensorRTEngine for precision in ("int8", "fp16"): filename = TensorRTEngine.get_engine_filename(precision) if filename is None: continue try: res = loader_client.load_big_small_resource(filename, models_dir) if res.err is None: return self.create(res.data) except Exception: pass return None def _get_ai_engine_filename(self): from engines.tensorrt_engine import TensorRTEngine return TensorRTEngine.get_engine_filename("int8") def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir): from engines.jetson_tensorrt_engine import JetsonTensorRTEngine from engines.tensorrt_engine import TensorRTEngine engine_bytes, precision = JetsonTensorRTEngine.convert_from_source_with_precision(onnx_bytes, loader_client, models_dir) return engine_bytes, TensorRTEngine.get_engine_filename(precision)