mirror of
https://github.com/azaion/detections.git
synced 2026-04-22 11:56:31 +00:00
2c35e59a77
- Added Jetson-specific deployment instructions to `deploy_scripts.md`, detailing prerequisites and service management. - Updated `deploy_status_report.md` to reflect the completion of the AZ-180 cycle and the readiness of Jetson support. - Removed outdated task documentation for Jetson Orin Nano support from the todo list. Made-with: Cursor
142 lines
4.6 KiB
Python
142 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate an INT8 calibration cache for TensorRT on Jetson.
|
|
|
|
Run this INSIDE the Jetson Docker container:
|
|
|
|
docker compose -f docker-compose.demo-jetson.yml run --rm \
|
|
-v /path/to/images:/calibration \
|
|
detections \
|
|
python3 scripts/generate_int8_cache.py \
|
|
--images-dir /calibration \
|
|
--onnx /models/azaion.onnx \
|
|
--output /models/azaion.int8_calib.cache
|
|
|
|
The cache file must be in the loader's models volume so the detections
|
|
service can download it on startup via the Loader API.
|
|
"""
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--images-dir", required=True, help="Directory with calibration images (JPG/PNG)")
|
|
parser.add_argument("--onnx", required=True, help="Path to azaion.onnx")
|
|
parser.add_argument("--output", default="azaion.int8_calib.cache")
|
|
parser.add_argument("--input-size", type=int, default=1280, help="Model input H=W (default 1280)")
|
|
parser.add_argument("--num-samples", type=int, default=500)
|
|
return parser.parse_args()
|
|
|
|
|
|
def collect_images(images_dir: str, num_samples: int) -> list[Path]:
|
|
root = Path(images_dir)
|
|
images: list[Path] = []
|
|
for pattern in ("**/*.jpg", "**/*.jpeg", "**/*.png"):
|
|
images += sorted(root.glob(pattern))
|
|
return images[:num_samples]
|
|
|
|
|
|
def preprocess(path: Path, h: int, w: int) -> np.ndarray | None:
|
|
img = cv2.imread(str(path))
|
|
if img is None:
|
|
return None
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
img = cv2.resize(img, (w, h))
|
|
img = img.astype(np.float32) / 255.0
|
|
return np.ascontiguousarray(img.transpose(2, 0, 1)[np.newaxis]) # NCHW
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
try:
|
|
import pycuda.autoinit # noqa: F401
|
|
import pycuda.driver as cuda
|
|
import tensorrt as trt
|
|
except ImportError as e:
|
|
print(f"ERROR: {e}\nRun this script inside the Jetson Docker container.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
images = collect_images(args.images_dir, args.num_samples)
|
|
if not images:
|
|
print(f"No images found in {args.images_dir}", file=sys.stderr)
|
|
sys.exit(1)
|
|
print(f"Using {len(images)} calibration images")
|
|
|
|
H = W = args.input_size
|
|
|
|
class _ImageCalibrator(trt.IInt8EntropyCalibrator2):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self._idx = 0
|
|
self._buf = cuda.mem_alloc(3 * H * W * 4)
|
|
|
|
def get_batch_size(self):
|
|
return 1
|
|
|
|
def get_batch(self, names):
|
|
while self._idx < len(images):
|
|
arr = preprocess(images[self._idx], H, W)
|
|
self._idx += 1
|
|
if arr is None:
|
|
continue
|
|
cuda.memcpy_htod(self._buf, arr)
|
|
return [int(self._buf)]
|
|
return None
|
|
|
|
def read_calibration_cache(self):
|
|
return None
|
|
|
|
def write_calibration_cache(self, cache):
|
|
with open(args.output, "wb") as f:
|
|
f.write(cache)
|
|
print(f"Cache written → {args.output}")
|
|
|
|
onnx_data = Path(args.onnx).read_bytes()
|
|
logger = trt.Logger(trt.Logger.INFO)
|
|
explicit_batch = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
|
|
|
|
with (
|
|
trt.Builder(logger) as builder,
|
|
builder.create_network(explicit_batch) as network,
|
|
trt.OnnxParser(network, logger) as parser,
|
|
builder.create_builder_config() as config,
|
|
):
|
|
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 4 * 1024 ** 3)
|
|
config.set_flag(trt.BuilderFlag.INT8)
|
|
if builder.platform_has_fast_fp16:
|
|
config.set_flag(trt.BuilderFlag.FP16)
|
|
|
|
calibrator = _ImageCalibrator()
|
|
config.int8_calibrator = calibrator
|
|
|
|
if not parser.parse(onnx_data):
|
|
for i in range(parser.num_errors):
|
|
print(parser.get_error(i), file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
inp = network.get_input(0)
|
|
shape = inp.shape
|
|
C = shape[1]
|
|
if shape[0] == -1:
|
|
profile = builder.create_optimization_profile()
|
|
profile.set_shape(inp.name, (1, C, H, W), (1, C, H, W), (1, C, H, W))
|
|
config.add_optimization_profile(profile)
|
|
|
|
print("Building TensorRT engine with INT8 calibration (several minutes on Jetson)…")
|
|
plan = builder.build_serialized_network(network, config)
|
|
if plan is None:
|
|
print("Engine build failed", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print("Done. Upload the cache to the Loader before (re)starting the detections service.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|