#!/usr/bin/env python3 """ Sample a random subset of images from a YOLO dataset for INT8 calibration. Run locally (on your dev machine) before deploying to Jetson: python3 scripts/jetson/sample_calibration_images.py \ --dataset /path/to/dataset-2025-05-22 \ --output /tmp/calibration \ --num-samples 500 The output directory can then be passed directly to deploy_demo_jetson.sh via --calibration-images, or to generate_int8_cache.py via --images-dir. """ import argparse import random import shutil import sys from pathlib import Path def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", required=True, help="Root of the YOLO dataset (must contain images/)") parser.add_argument("--output", required=True, help="Destination directory for sampled images") parser.add_argument("--num-samples", type=int, default=500) parser.add_argument("--seed", type=int, default=42) return parser.parse_args() def collect_images(dataset_root: Path) -> list[Path]: images_dir = dataset_root / "images" if not images_dir.is_dir(): print(f"ERROR: {images_dir} not found", file=sys.stderr) sys.exit(1) images: list[Path] = [] for pattern in ("**/*.jpg", "**/*.jpeg", "**/*.png"): images += sorted(images_dir.glob(pattern)) return images def main(): args = parse_args() dataset_root = Path(args.dataset) output_dir = Path(args.output) images = collect_images(dataset_root) if not images: print(f"ERROR: no images found in {dataset_root / 'images'}", file=sys.stderr) sys.exit(1) rng = random.Random(args.seed) sample = rng.sample(images, min(args.num_samples, len(images))) output_dir.mkdir(parents=True, exist_ok=True) for src in sample: shutil.copy2(src, output_dir / src.name) print(f"Sampled {len(sample)} images → {output_dir}") if __name__ == "__main__": main()