""" 04b_quantize_int8_raw.py — Quantize the RAW float ONNX (3 outputs, no decoder) to INT8 QOperator. This is the version that quantizes correctly because the sigmoid/decoder live OUTSIDE the network. Reuses the COCO val2017 calibration set already on disk from 04_quantize_int8.py. """ import os, sys, hashlib, random import numpy as np, cv2 from PIL import Image from onnxruntime.quantization import ( quantize_static, CalibrationDataReader, QuantType, QuantFormat, ) SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) ONNX_FLOAT = os.path.join(SCRIPT_DIR, "out_onnx", "yolov4-leaky-416_float_raw.onnx") ONNX_INT8 = os.path.join(SCRIPT_DIR, "out_onnx", "yolov4-leaky-416_int8_qop_raw.onnx") CALIB_DIR = os.path.join(SCRIPT_DIR, "_calib_val2017") INPUT_NAME = "images"; INPUT_SIZE = 416 NUM_CALIB = 1000; SEED = 42 def letterbox_nchw(rgb, size=INPUT_SIZE): h, w = rgb.shape[:2]; s = min(size/h, size/w) nh, nw = int(round(h*s)), int(round(w*s)) resized = cv2.resize(rgb, (nw, nh)) pad = np.full((size, size, 3), 114, np.uint8); pad[:nh,:nw] = resized chw = pad.astype(np.float32).transpose(2,0,1) / 255.0 return np.expand_dims(chw, 0) class Reader(CalibrationDataReader): def __init__(self, paths, input_name=INPUT_NAME): self.paths, self.input_name, self.idx = paths, input_name, 0 def get_next(self): if self.idx >= len(self.paths): return None if self.idx % 100 == 0: print(f" calibrating {self.idx}/{len(self.paths)} ...", flush=True) try: pil = Image.open(self.paths[self.idx]).convert("RGB") blob = letterbox_nchw(np.array(pil)) self.idx += 1 return {self.input_name: blob} except Exception as e: print(f" [skip] {self.paths[self.idx]}: {e}") self.idx += 1 return self.get_next() def sha256_of(p): h = hashlib.sha256() with open(p, "rb") as f: for c in iter(lambda: f.read(1<<20), b""): h.update(c) return h.hexdigest() def main(): print("=" * 70) print(f" Quantize RAW float ONNX -> INT8 (calib: {NUM_CALIB} val2017 imgs)") print("=" * 70) print(f" float input : {ONNX_FLOAT}") print(f" float SHA-256: {sha256_of(ONNX_FLOAT)}") print(f" output : {ONNX_INT8}") files = sorted(f for f in os.listdir(CALIB_DIR) if f.endswith(".jpg")) rng = random.Random(SEED) sample = rng.sample(files, min(NUM_CALIB, len(files))) paths = [os.path.join(CALIB_DIR, f) for f in sample] print(f"\n[1/2] {len(paths)} val2017 calibration paths") print("[2/2] quantize_static (this will take 10-20 min) ...") quantize_static( model_input=ONNX_FLOAT, model_output=ONNX_INT8, calibration_data_reader=Reader(paths), quant_format=QuantFormat.QOperator, weight_type=QuantType.QInt8, activation_type=QuantType.QInt8, per_channel=False, reduce_range=False, ) sz = os.path.getsize(ONNX_INT8); sha = sha256_of(ONNX_INT8) print(f"\nDone: size={sz:,} bytes ({sz/1e6:.2f} MB)") print(f" sha256={sha}") if __name__ == "__main__": main()