mirror of
https://github.com/azaion/detections.git
synced 2026-04-22 09:06:31 +00:00
[AZ-180] Fix INT8 conversion: set FP16 flag alongside INT8 for TensorRT 10.x
In TensorRT 10.x, INT8 conversion requires FP16 to be set as a fallback for network layers (e.g. normalization ops in detection models) that have no INT8 kernel implementation. Without FP16, build_serialized_network can return None on Jetson for YOLO-type models. INT8 flag is still the primary precision; FP16 is only the layer-level fallback within the same engine. Made-with: Cursor
This commit is contained in:
@@ -158,6 +158,8 @@ cdef class TensorRTEngine(InferenceEngine):
|
|||||||
constants_inf.log(<str>'Converting to INT8 with calibration cache')
|
constants_inf.log(<str>'Converting to INT8 with calibration cache')
|
||||||
calibrator = _CacheCalibrator(calib_cache_path)
|
calibrator = _CacheCalibrator(calib_cache_path)
|
||||||
config.set_flag(trt.BuilderFlag.INT8)
|
config.set_flag(trt.BuilderFlag.INT8)
|
||||||
|
if builder.platform_has_fast_fp16:
|
||||||
|
config.set_flag(trt.BuilderFlag.FP16)
|
||||||
config.int8_calibrator = calibrator
|
config.int8_calibrator = calibrator
|
||||||
elif builder.platform_has_fast_fp16:
|
elif builder.platform_has_fast_fp16:
|
||||||
constants_inf.log(<str>'Converting to supported fp16')
|
constants_inf.log(<str>'Converting to supported fp16')
|
||||||
|
|||||||
@@ -73,6 +73,7 @@ def test_convert_from_source_uses_int8_when_cache_provided():
|
|||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
mock_config.set_flag.assert_any_call("INT8")
|
mock_config.set_flag.assert_any_call("INT8")
|
||||||
|
mock_config.set_flag.assert_any_call("FP16")
|
||||||
assert mock_config.int8_calibrator is not None
|
assert mock_config.int8_calibrator is not None
|
||||||
finally:
|
finally:
|
||||||
os.unlink(cache_path)
|
os.unlink(cache_path)
|
||||||
|
|||||||
Reference in New Issue
Block a user