[AZ-180] Add Jetson Orin Nano support with INT8 TensorRT engine

- Dockerfile.jetson: JetPack 6.x L4T base image (aarch64), TensorRT and PyCUDA from apt - requirements-jetson.txt: derived from requirements.txt, no pip tensorrt/pycuda - docker-compose.jetson.yml: runtime: nvidia for NVIDIA Container Runtime - tensorrt_engine.pyx: convert_from_source accepts optional calib_cache_path; INT8 used when cache present, FP16 fallback; get_engine_filename encodes precision suffix to avoid engine cache confusion - inference.pyx: init_ai tries INT8 engine then FP16 on lookup; downloads calibration cache before conversion thread; passes cache path through to convert_from_source - constants_inf: add INT8_CALIB_CACHE_FILE constant - Unit tests for AC-3 (INT8 flag set when cache provided) and AC-4 (FP16 when no cache) Made-with: Cursor
2026-06-23 15:51:08 +00:00 · 2026-04-02 07:12:45 +03:00
parent 097811a67b
commit 2149cd6c08
12 changed files with 381 additions and 29 deletions
@@ -1,4 +1,6 @@
 import io
+import os
+import tempfile
 import threading

 import av
@@ -74,11 +76,11 @@ cdef class Inference:
            raise Exception(res.err)
        return <bytes>res.data

-    cdef convert_and_upload_model(self, bytes source_bytes, str engine_filename):
+    cdef convert_and_upload_model(self, bytes source_bytes, str engine_filename, str calib_cache_path):
        try:
            self.ai_availability_status.set_status(AIAvailabilityEnum.CONVERTING)
            models_dir = constants_inf.MODELS_FOLDER
-            model_bytes = EngineClass.convert_from_source(source_bytes)
+            model_bytes = EngineClass.convert_from_source(source_bytes, calib_cache_path)

            self.ai_availability_status.set_status(AIAvailabilityEnum.UPLOADING)
            res = self.loader_client.upload_big_small_resource(model_bytes, engine_filename, models_dir)
@@ -92,6 +94,11 @@ cdef class Inference:
            self._converted_model_bytes = <bytes>None
        finally:
            self.is_building_engine = <bint>False
+            if calib_cache_path is not None:
+                try:
+                    os.unlink(calib_cache_path)
+                except Exception:
+                    pass

    cdef init_ai(self):
        constants_inf.log(<str> 'init AI...')
@@ -112,28 +119,35 @@ cdef class Inference:
                return

            models_dir = constants_inf.MODELS_FOLDER
-            engine_filename = EngineClass.get_engine_filename()
-            if engine_filename is not None:
-                try:
-                    self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
-                    res = self.loader_client.load_big_small_resource(engine_filename, models_dir)
-                    if res.err is not None:
-                        raise Exception(res.err)
-                    self.engine = EngineClass(res.data)
-                    self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
-                except Exception as e:
-                    source_filename = EngineClass.get_source_filename()
-                    if source_filename is None:
-                        self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>f"Pre-built engine not found: {str(e)}")
+            engine_filename_fp16 = EngineClass.get_engine_filename()
+            if engine_filename_fp16 is not None:
+                engine_filename_int8 = EngineClass.get_engine_filename(<str>"int8")
+                for candidate in [engine_filename_int8, engine_filename_fp16]:
+                    try:
+                        self.ai_availability_status.set_status(AIAvailabilityEnum.DOWNLOADING)
+                        res = self.loader_client.load_big_small_resource(candidate, models_dir)
+                        if res.err is not None:
+                            raise Exception(res.err)
+                        self.engine = EngineClass(res.data)
+                        self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
                        return
-                    self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>str(e))
-                    source_bytes = self.download_model(source_filename)
-                    self.is_building_engine = <bint>True
+                    except Exception:
+                        pass

-                    thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, engine_filename))
-                    thread.daemon = True
-                    thread.start()
+                source_filename = EngineClass.get_source_filename()
+                if source_filename is None:
+                    self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>"Pre-built engine not found and no source available")
                    return
+                self.ai_availability_status.set_status(AIAvailabilityEnum.WARNING, <str>"Cached engine not found, converting from source")
+                source_bytes = self.download_model(source_filename)
+                calib_cache_path = self._try_download_calib_cache(models_dir)
+                target_engine_filename = EngineClass.get_engine_filename(<str>"int8") if calib_cache_path is not None else engine_filename_fp16
+                self.is_building_engine = <bint>True
+
+                thread = Thread(target=self.convert_and_upload_model, args=(source_bytes, target_engine_filename, calib_cache_path))
+                thread.daemon = True
+                thread.start()
+                return
            else:
                self.engine = EngineClass(<bytes>self.download_model(constants_inf.AI_ONNX_MODEL_FILE))
                self.ai_availability_status.set_status(AIAvailabilityEnum.ENABLED)
@@ -142,6 +156,21 @@ cdef class Inference:
            self.ai_availability_status.set_status(AIAvailabilityEnum.ERROR, <str>str(e))
            self.is_building_engine = <bint>False

+    cdef str _try_download_calib_cache(self, str models_dir):
+        try:
+            res = self.loader_client.load_big_small_resource(constants_inf.INT8_CALIB_CACHE_FILE, models_dir)
+            if res.err is not None:
+                constants_inf.log(<str>f"INT8 calibration cache not available: {res.err}")
+                return <str>None
+            fd, path = tempfile.mkstemp(suffix='.cache')
+            with os.fdopen(fd, 'wb') as f:
+                f.write(res.data)
+            constants_inf.log(<str>'INT8 calibration cache downloaded')
+            return <str>path
+        except Exception as e:
+            constants_inf.log(<str>f"INT8 calibration cache download failed: {str(e)}")
+            return <str>None
+
    cpdef run_detect_image(self, bytes image_bytes, AIRecognitionConfig ai_config, str media_name,
                           object annotation_callback, object status_callback=None):
        cdef list all_frame_data = []