#!/usr/bin/env python3
"""
Export Hugging Face Whisper checkpoints to OpenVINO IR + INT8 asymmetric weights (group size 128)
using Optimum Intel optimum-cli. Output layout targets openvino_genai.WhisperPipeline.

Aligns with the OpenVINO Hub INT8 Whisper recipe family:
  --weight-format int8 --group-size 128 without --sym (asymmetric INT8).

Prerequisite: venv with scripts/requirements-whisper-export.txt installed.

Example:
  cd inference/python
  python scripts/export_whisper_int8_ov.py --model openai/whisper-small --output models/whisper-small-int8-new --cache-dir ./cache_dir

Default --task is automatic-speech-recognition-with-past (beam_idx / stateful decoder for GenAI).

HF checkpoints in BF16 (e.g. safetensors) load in PyTorch as usual; no extra flags needed for export.
"""

from __future__ import annotations

import argparse
import os
import shutil
import subprocess
import sys
from pathlib import Path


def _optimum_cli() -> str:
    bindir = Path(sys.executable).resolve().parent
    exe = bindir / ("optimum-cli.exe" if os.name == "nt" else "optimum-cli")
    if exe.is_file():
        return str(exe)
    return "optimum-cli"


def main() -> int:
    ap = argparse.ArgumentParser(description="Export Whisper to OpenVINO INT8 IR (GenAI layout).")
    ap.add_argument("--model", default="openai/whisper-small", help="HF model id or local path")
    ap.add_argument("--output", type=Path, required=True, help="Output directory")
    ap.add_argument(
        "--task",
        default="automatic-speech-recognition-with-past",
        help="Optimum task; -with-past is required for WhisperPipeline (beam_idx).",
    )
    ap.add_argument("--cache-dir", type=Path, default=None, help="HF download cache")
    ap.add_argument("--weight-format", choices=("int8", "fp16", "fp32"), default="int8")
    ap.add_argument("--group-size", type=int, default=128)
    ap.add_argument("--sym", action="store_true", help="Symmetric INT8 (asymmetric is default)")
    ap.add_argument("--disable-stateful", action="store_true")
    ap.add_argument("--disable-convert-tokenizer", action="store_true")
    ap.add_argument("--overwrite", action="store_true")
    ns = ap.parse_args()
    out = ns.output.resolve()
    if out.exists():
        if ns.overwrite:
            shutil.rmtree(out)
        else:
            print(f"Refusing to clobber {out} (use --overwrite)", file=sys.stderr)
            return 1

    cmd = [
        _optimum_cli(),
        "export",
        "openvino",
        "-m",
        ns.model,
        "--task",
        ns.task,
        "--weight-format",
        ns.weight_format,
        "--group-size",
        str(ns.group_size),
        str(out),
    ]
    if ns.sym:
        cmd.append("--sym")
    if ns.cache_dir is not None:
        cmd.extend(["--cache_dir", str(ns.cache_dir.resolve())])
    if ns.disable_stateful:
        cmd.append("--disable-stateful")
    if ns.disable_convert_tokenizer:
        cmd.append("--disable-convert-tokenizer")

    print("Running:", " ".join(cmd), flush=True)
    ret = subprocess.call(cmd)
    if ret != 0:
        return ret
    print(f"Done: {out}", flush=True)
    for name in (
        "openvino_encoder_model.xml",
        "openvino_decoder_model.xml",
        "openvino_tokenizer.xml",
        "openvino_detokenizer.xml",
    ):
        p = out / name
        if not p.is_file():
            print(f"Warning: missing {p}", file=sys.stderr)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())