[AZ-180] Refactor inference and engine factory for improved model handling

- Updated the autopilot state to reflect the current task status as in progress.
- Refactored the inference module to streamline model downloading and conversion processes, replacing the download_model method with a more efficient load_source method.
- Introduced asynchronous model building in the inference module to enhance performance during model conversion.
- Enhanced the engine factory to include a new method for building and caching models, improving error handling and logging during the upload process.
- Added calibration cache handling in the Jetson TensorRT engine for better resource management.

Made-with: Cursor
This commit is contained in:
Oleksandr Bezdieniezhnykh
2026-04-03 06:41:11 +03:00
parent 834f846dc8
commit 8116b55813
4 changed files with 64 additions and 54 deletions
+5 -19
View File
@@ -66,28 +66,14 @@ cdef class Inference:
return None
cdef bytes download_model(self, str filename):
models_dir = constants_inf.MODELS_FOLDER
self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
res = self.loader_client.load_big_small_resource(filename, models_dir)
if res.err is not None:
raise Exception(res.err)
return <bytes>res.data
cdef convert_and_upload_model(self, bytes source_bytes, str models_dir):
cdef _build_engine_async(self, bytes source_bytes, str models_dir):
try:
self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING)
engine_bytes, engine_filename = engine_factory.build_from_source(source_bytes, self.loader_client, models_dir)
self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING)
res = self.loader_client.upload_big_small_resource(engine_bytes, engine_filename, models_dir)
if res.err is not None:
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>f"Failed to upload converted model: {res.err}")
engine_bytes = engine_factory.build_and_cache(source_bytes, self.loader_client, models_dir)
self._converted_model_bytes = engine_bytes
self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
except Exception as e:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str> str(e))
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>str(e))
self._converted_model_bytes = <bytes>None
finally:
self.is_building_engine = <bint>False
@@ -123,12 +109,12 @@ cdef class Inference:
self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>"No engine available and no source to build from")
return
source_bytes = self.download_model(source_filename)
source_bytes = engine_factory.load_source(self.loader_client, models_dir)
if engine_factory.has_build_step:
self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>"Cached engine not found, converting from source")
self.is_building_engine = <bint>True
thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, models_dir))
thread = Thread(target=self._build_engine_async, args=(source_bytes, models_dir))
thread.daemon = True
thread.start()
else: