diff --git a/_docs/_autopilot_state.md b/_docs/_autopilot_state.md index a9a3228..dbdb740 100644 --- a/_docs/_autopilot_state.md +++ b/_docs/_autopilot_state.md @@ -4,8 +4,8 @@ flow: existing-code step: 8 name: New Task -status: not_started -sub_step: 0 +status: in_progress +sub_step: 1 — Gather Feature Description retry_count: 0 ## Cycle Notes diff --git a/src/engines/engine_factory.pyx b/src/engines/engine_factory.pyx index 253a3bd..2db0d65 100644 --- a/src/engines/engine_factory.pyx +++ b/src/engines/engine_factory.pyx @@ -1,5 +1,3 @@ -import os -import tempfile from loader_http_client cimport LoaderHttpClient, LoadResult @@ -29,9 +27,28 @@ class EngineFactory: def get_source_filename(self): return None + def load_source(self, LoaderHttpClient loader_client, str models_dir): + cdef LoadResult res + filename = self.get_source_filename() + if filename is None: + return None + res = loader_client.load_big_small_resource(filename, models_dir) + if res.err is not None: + raise Exception(res.err) + return res.data + def build_from_source(self, onnx_bytes, loader_client, models_dir): raise NotImplementedError(f"{type(self).__name__} does not support building from source") + def build_and_cache(self, bytes source_bytes, LoaderHttpClient loader_client, str models_dir): + cdef LoadResult res + import constants_inf + engine_bytes, engine_filename = self.build_from_source(source_bytes, loader_client, models_dir) + res = loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir) + if res.err is not None: + constants_inf.log(f"Failed to upload converted model: {res.err}") + return engine_bytes + class OnnxEngineFactory(EngineFactory): def create(self, model_bytes: bytes): @@ -83,34 +100,7 @@ class JetsonTensorRTEngineFactory(TensorRTEngineFactory): return TensorRTEngine.get_engine_filename("int8") def build_from_source(self, onnx_bytes, LoaderHttpClient loader_client, str models_dir): - cdef str calib_cache_path + from engines.jetson_tensorrt_engine import JetsonTensorRTEngine from engines.tensorrt_engine import TensorRTEngine - calib_cache_path = self._download_calib_cache(loader_client, models_dir) - try: - engine_bytes = TensorRTEngine.convert_from_source(onnx_bytes, calib_cache_path) - return engine_bytes, TensorRTEngine.get_engine_filename("int8") - finally: - if calib_cache_path is not None: - try: - os.unlink(calib_cache_path) - except Exception: - pass - - def _download_calib_cache(self, LoaderHttpClient loader_client, str models_dir): - cdef LoadResult res - import constants_inf - try: - res = loader_client.load_big_small_resource( - constants_inf.INT8_CALIB_CACHE_FILE, models_dir - ) - if res.err is not None: - constants_inf.log(f"INT8 calibration cache not available: {res.err}") - return None - fd, path = tempfile.mkstemp(suffix=".cache") - with os.fdopen(fd, "wb") as f: - f.write(res.data) - constants_inf.log("INT8 calibration cache downloaded") - return path - except Exception as e: - constants_inf.log(f"INT8 calibration cache download failed: {str(e)}") - return None + engine_bytes = JetsonTensorRTEngine.convert_from_source(onnx_bytes, loader_client, models_dir) + return engine_bytes, TensorRTEngine.get_engine_filename("int8") diff --git a/src/engines/jetson_tensorrt_engine.pyx b/src/engines/jetson_tensorrt_engine.pyx index 3f77e1e..cc9fc56 100644 --- a/src/engines/jetson_tensorrt_engine.pyx +++ b/src/engines/jetson_tensorrt_engine.pyx @@ -1,5 +1,39 @@ +import os +import tempfile from engines.tensorrt_engine cimport TensorRTEngine +from loader_http_client cimport LoaderHttpClient, LoadResult cdef class JetsonTensorRTEngine(TensorRTEngine): - pass + @staticmethod + def convert_from_source(bytes onnx_model, LoaderHttpClient loader_client, str models_dir): + cdef str calib_cache_path + calib_cache_path = JetsonTensorRTEngine._download_calib_cache(loader_client, models_dir) + try: + return TensorRTEngine.convert_from_source(onnx_model, calib_cache_path) + finally: + if calib_cache_path is not None: + try: + os.unlink(calib_cache_path) + except Exception: + pass + + @staticmethod + def _download_calib_cache(LoaderHttpClient loader_client, str models_dir): + cdef LoadResult res + import constants_inf + try: + res = loader_client.load_big_small_resource( + constants_inf.INT8_CALIB_CACHE_FILE, models_dir + ) + if res.err is not None: + constants_inf.log(f"INT8 calibration cache not available: {res.err}") + return None + fd, path = tempfile.mkstemp(suffix=".cache") + with os.fdopen(fd, "wb") as f: + f.write(res.data) + constants_inf.log("INT8 calibration cache downloaded") + return path + except Exception as e: + constants_inf.log(f"INT8 calibration cache download failed: {str(e)}") + return None diff --git a/src/inference.pyx b/src/inference.pyx index 1af4ed6..ef3bbff 100644 --- a/src/inference.pyx +++ b/src/inference.pyx @@ -66,28 +66,14 @@ cdef class Inference: return None - cdef bytes download_model(self, str filename): - models_dir = constants_inf.MODELS_FOLDER - self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING) - res = self.loader_client.load_big_small_resource(filename, models_dir) - if res.err is not None: - raise Exception(res.err) - return res.data - - cdef convert_and_upload_model(self, bytes source_bytes, str models_dir): + cdef _build_engine_async(self, bytes source_bytes, str models_dir): try: self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING) - engine_bytes, engine_filename = engine_factory.build_from_source(source_bytes, self.loader_client, models_dir) - - self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING) - res = self.loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir) - if res.err is not None: - self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, f"Failed to upload converted model: {res.err}") - + engine_bytes = engine_factory.build_and_cache(source_bytes, self.loader_client, models_dir) self._converted_model_bytes = engine_bytes self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED) except Exception as e: - self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, str(e)) + self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, str(e)) self._converted_model_bytes = None finally: self.is_building_engine = False @@ -123,12 +109,12 @@ cdef class Inference: self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, "No engine available and no source to build from") return - source_bytes = self.download_model(source_filename) + source_bytes = engine_factory.load_source(self.loader_client, models_dir) if engine_factory.has_build_step: self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, "Cached engine not found, converting from source") self.is_building_engine = True - thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, models_dir)) + thread = Thread(target=self._build_engine_async, args=(source_bytes, models_dir)) thread.daemon = True thread.start() else: