"""Programmatically generate the crafted JPEG fixture for CVE-2025-53644. Per AZ-407 § AC-6 and AZ-406 § Risk 5 — the upstream PoC JPEG has unclear redistribution terms, so the e2e harness generates a structurally equivalent malformed file from scratch rather than committing copyrighted bytes. AZ-407 ships a *minimal* malformed JPEG with: * Valid SOI marker (``FFD8``) * Valid DQT (quantisation table) * Valid SOF0 (baseline DCT) header * **Truncated SOS marker** — the marker is announced (``FFDA``) but only the length field is present; the entropy-coded data is deliberately absent. This is the structural feature CVE-2025-53644 exploits: vulnerable OpenCV (≤ 4.11) reads past the buffer; hardened OpenCV (≥ 4.12) rejects gracefully with an `imread` failure. AZ-439 (NFT-SEC-04) tightens this further: * Adds an oversized DHT segment (the full PoC structure) * Runs the file under AddressSanitizer to assert no buffer-overflow / use-after-free is reported on the hardened build * Compares behaviour against a control vulnerable OpenCV ≤ 4.11 The AZ-407 fixture is sufficient to verify AC-6: feeding it to OpenCV 4.12+ does NOT crash; it returns a clean decode failure. The function is deterministic: same input → identical output bytes. """ from __future__ import annotations import argparse import hashlib import logging from pathlib import Path logger = logging.getLogger(__name__) def _build_minimal_malformed_jpeg() -> bytes: """Emit a deterministic malformed JPEG with a truncated SOS marker. Byte-level structure (annotated): FFD8 # SOI FFE0 0010 4A464946 00 0102 0000 0001 0001 0000 # APP0 / JFIF stub FFDB 0043 00 <64 bytes> # DQT (table 0, baseline) FFC0 0011 08 0001 0001 03 01 22 00 02 11 01 03 11 01 # SOF0 (1x1 baseline 3-component) FFC4 001F 00 <31 bytes> # DHT (DC table 0; bytes follow JPEG std) FFDA 000C 03 01 00 02 11 03 11 00 3F 00 # SOS — header announced, NO entropy data # CVE: truncated stream """ soi = b"\xff\xd8" app0 = bytes.fromhex( "ffe000104a46494600010200000001000100" "00" ) dqt_body = bytes(range(64)) dqt = b"\xff\xdb" + (3 + len(dqt_body)).to_bytes(2, "big") + b"\x00" + dqt_body sof0 = bytes.fromhex( "ffc0001108" # SOF0 marker + length + precision "0001" # height = 1 "0001" # width = 1 "03" # 3 components "012200" # Y : id=1, sampling=22, quant tbl=0 "021101" # Cb : id=2, sampling=11, quant tbl=1 "031101" # Cr : id=3, sampling=11, quant tbl=1 ) # DHT for AC bits — standard JPEG huffman table 0/0; the count/value # bytes here are a 31-byte body that decodes cleanly. We hand-craft # the structure rather than depending on PIL. dht_body = ( b"\x00" # tc=0, th=0 + bytes([0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) # length counts + bytes([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]) # symbols ) dht = b"\xff\xc4" + (2 + len(dht_body)).to_bytes(2, "big") + dht_body # SOS: announce the marker + parameters, then STOP. No entropy-coded # scan data. No EOI. This is the CVE-relevant truncation. sos = bytes.fromhex( "ffda000c" # SOS marker + length "03" # 3 components in scan "0100" # Y : DC=0 / AC=0 "0211" # Cb : DC=1 / AC=1 "0311" # Cr : DC=1 / AC=1 "00" # Ss "3f" # Se "00" # Ah/Al ) return soi + app0 + dqt + sof0 + dht + sos def generate(out_path: Path) -> Path: """Write the AZ-407 malformed JPEG to ``out_path``. Returns the path on success. Idempotent: writing twice produces the same bytes. """ blob = _build_minimal_malformed_jpeg() out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_bytes(blob) logger.info( "Wrote %d-byte CVE-2025-53644 fixture (sha256=%s) to %s", len(blob), hashlib.sha256(blob).hexdigest(), out_path, ) return out_path def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description="Generate CVE-2025-53644 fixture JPEG.") parser.add_argument( "out", type=Path, nargs="?", default=Path("cve-2025-53644.jpg"), help="Output JPEG path (default: ./cve-2025-53644.jpg)", ) args = parser.parse_args(argv) logging.basicConfig(level=logging.INFO) generate(args.out) return 0 if __name__ == "__main__": raise SystemExit(main())