#!/usr/bin/env python3 """ Export Hugging Face Whisper checkpoints to OpenVINO IR + INT8 asymmetric weights (group size 128) using Optimum Intel optimum-cli. Output layout targets openvino_genai.WhisperPipeline. Aligns with the OpenVINO Hub INT8 Whisper recipe family: --weight-format int8 --group-size 128 without --sym (asymmetric INT8). Prerequisite: venv with scripts/requirements-whisper-export.txt installed. Example: cd inference/python python scripts/export_whisper_int8_ov.py --model openai/whisper-small --output models/whisper-small-int8-new --cache-dir ./cache_dir Default --task is automatic-speech-recognition-with-past (beam_idx / stateful decoder for GenAI). HF checkpoints in BF16 (e.g. safetensors) load in PyTorch as usual; no extra flags needed for export. """ from __future__ import annotations import argparse import os import shutil import subprocess import sys from pathlib import Path def _optimum_cli() -> str: bindir = Path(sys.executable).resolve().parent exe = bindir / ("optimum-cli.exe" if os.name == "nt" else "optimum-cli") if exe.is_file(): return str(exe) return "optimum-cli" def main() -> int: ap = argparse.ArgumentParser(description="Export Whisper to OpenVINO INT8 IR (GenAI layout).") ap.add_argument("--model", default="openai/whisper-small", help="HF model id or local path") ap.add_argument("--output", type=Path, required=True, help="Output directory") ap.add_argument( "--task", default="automatic-speech-recognition-with-past", help="Optimum task; -with-past is required for WhisperPipeline (beam_idx).", ) ap.add_argument("--cache-dir", type=Path, default=None, help="HF download cache") ap.add_argument("--weight-format", choices=("int8", "fp16", "fp32"), default="int8") ap.add_argument("--group-size", type=int, default=128) ap.add_argument("--sym", action="store_true", help="Symmetric INT8 (asymmetric is default)") ap.add_argument("--disable-stateful", action="store_true") ap.add_argument("--disable-convert-tokenizer", action="store_true") ap.add_argument("--overwrite", action="store_true") ns = ap.parse_args() out = ns.output.resolve() if out.exists(): if ns.overwrite: shutil.rmtree(out) else: print(f"Refusing to clobber {out} (use --overwrite)", file=sys.stderr) return 1 cmd = [ _optimum_cli(), "export", "openvino", "-m", ns.model, "--task", ns.task, "--weight-format", ns.weight_format, "--group-size", str(ns.group_size), str(out), ] if ns.sym: cmd.append("--sym") if ns.cache_dir is not None: cmd.extend(["--cache_dir", str(ns.cache_dir.resolve())]) if ns.disable_stateful: cmd.append("--disable-stateful") if ns.disable_convert_tokenizer: cmd.append("--disable-convert-tokenizer") print("Running:", " ".join(cmd), flush=True) ret = subprocess.call(cmd) if ret != 0: return ret print(f"Done: {out}", flush=True) for name in ( "openvino_encoder_model.xml", "openvino_decoder_model.xml", "openvino_tokenizer.xml", "openvino_detokenizer.xml", ): p = out / name if not p.is_file(): print(f"Warning: missing {p}", file=sys.stderr) return 0 if __name__ == "__main__": raise SystemExit(main())