From 3984507221d99b93138e4d6a9570e9b97cdf7316 Mon Sep 17 00:00:00 2001
From: Oleksandr Bezdieniezhnykh <oleksandr.bezdieniezhnykh@pwc.com>
Date: Thu, 2 Apr 2026 07:32:16 +0300
Subject: [PATCH] [AZ-180] Fix INT8 conversion: set FP16 flag alongside INT8
 for TensorRT 10.x

In TensorRT 10.x, INT8 conversion requires FP16 to be set as a fallback for
network layers (e.g. normalization ops in detection models) that have no INT8
kernel implementation. Without FP16, build_serialized_network can return None
on Jetson for YOLO-type models. INT8 flag is still the primary precision;
FP16 is only the layer-level fallback within the same engine.

Made-with: Cursor
---
 src/engines/tensorrt_engine.pyx | 2 ++
 tests/test_az180_jetson_int8.py | 1 +
 2 files changed, 3 insertions(+)
diff --git a/src/engines/tensorrt_engine.pyx b/src/engines/tensorrt_engine.pyx
index 56883a2..b1dba55 100644
--- a/src/engines/tensorrt_engine.pyx
+++ b/src/engines/tensorrt_engine.pyx
@@ -158,6 +158,8 @@ cdef class TensorRTEngine(InferenceEngine):
                 constants_inf.log(<str>'Converting to INT8 with calibration cache')
                 calibrator = _CacheCalibrator(calib_cache_path)
                 config.set_flag(trt.BuilderFlag.INT8)
+                if builder.platform_has_fast_fp16:
+                    config.set_flag(trt.BuilderFlag.FP16)
                 config.int8_calibrator = calibrator
             elif builder.platform_has_fast_fp16:
                 constants_inf.log(<str>'Converting to supported fp16')
diff --git a/tests/test_az180_jetson_int8.py b/tests/test_az180_jetson_int8.py
index d9186d4..a67faad 100644
--- a/tests/test_az180_jetson_int8.py
+++ b/tests/test_az180_jetson_int8.py
@@ -73,6 +73,7 @@ def test_convert_from_source_uses_int8_when_cache_provided():
 
         # Assert
         mock_config.set_flag.assert_any_call("INT8")
+        mock_config.set_flag.assert_any_call("FP16")
         assert mock_config.int8_calibrator is not None
     finally:
         os.unlink(cache_path)