{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HachimiMT Benchmark Profile\n",
    "\n",
    "Notebook này dùng để đo tốc độ dịch file và in dòng `BENCH_PROFILE` để soi bottleneck CPU/GPU.\n",
    "\n",
    "- Chạy được trên Google Colab và Kaggle.\n",
    "- Tự tải bản code mới nhất từ HF Space `hachimimt-local.zip`.\n",
    "- Colab: cell chọn file sẽ mở upload nếu bạn chưa set `INPUT_PATH`.\n",
    "- Kaggle: thêm file `.txt` bằng **Add Input** hoặc đặt file trong `/kaggle/working`, rồi chạy cell chọn file.\n",
    "\n",
    "Dòng cần xem nằm gần cuối output cell benchmark:\n",
    "\n",
    "```text\n",
    "BENCH_RUNTIME ... ct2_cuda_devices=...\n",
    "BENCH_PACKAGES ... ctranslate2=... sentencepiece=... torch=...\n",
    "BENCH_ENV ... HACHIMIMT_GPU_INDICES=... HACHIMIMT_CT2_WINDOW_MULTIPLIER=...\n",
    "BENCH_PROFILE ... chunk_s=... ct2_infer_s=... decode_s=... tokenize_wait_s=...\n",
    "BENCH_DONE ...\n",
    "```\n",
    "\n",
    "Preset hiện tại dùng để đo cấu hình đã chốt trên Kaggle T4 x2: chạy cả 2 GPU, batch `96`, `batch_type=tokens`, window `8` (effective 32x), beam `2`. Nếu muốn kiểm chứng lại, bật cell sweep để chạy window `[4, 8, 16]`. Để so Kaggle với Colab công bằng, bật `USE_SINGLE_GPU_FOR_FAIR_TEST=True`; notebook sẽ tự dùng window `16` cho lần fair test nếu bạn không override `WINDOW_MULTIPLIER`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Tải code mới + cài dependencies\n",
    "import os\n",
    "import shutil\n",
    "import subprocess\n",
    "import sys\n",
    "import urllib.request\n",
    "import zipfile\n",
    "from pathlib import Path\n",
    "\n",
    "IN_KAGGLE = Path(\"/kaggle/working\").exists()\n",
    "IN_COLAB = Path(\"/content\").exists() and not IN_KAGGLE\n",
    "WORKDIR = Path(\"/kaggle/working\" if IN_KAGGLE else \"/content\" if IN_COLAB else \".\").resolve()\n",
    "os.chdir(WORKDIR)\n",
    "\n",
    "ZIP_URL = \"https://huggingface.co/spaces/ngocdang83/HachimiMT-demo/resolve/main/hachimimt-local.zip\"\n",
    "zip_path = WORKDIR / \"hachimimt-local.zip\"\n",
    "\n",
    "print(\"Runtime:\", \"Kaggle\" if IN_KAGGLE else \"Colab\" if IN_COLAB else \"Local\")\n",
    "print(\"Working dir:\", WORKDIR)\n",
    "print(\"Downloading:\", ZIP_URL)\n",
    "urllib.request.urlretrieve(ZIP_URL, zip_path)\n",
    "\n",
    "shutil.rmtree(WORKDIR / \"hachimimt\", ignore_errors=True)\n",
    "with zipfile.ZipFile(zip_path) as zf:\n",
    "    zf.extractall(WORKDIR)\n",
    "\n",
    "print(\"Extracted:\", sorted(p.name for p in (WORKDIR / \"hachimimt\").iterdir()))\n",
    "print(\"Installing requirements...\")\n",
    "subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"-r\", str(WORKDIR / \"hachimimt\" / \"requirements.txt\")])\n",
    "print(\"Setup done\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. Chọn file input\n",
    "# - Colab: để trống INPUT_PATH thì notebook sẽ mở hộp upload.\n",
    "# - Kaggle: dùng Add Input để gắn dataset .txt, hoặc điền path cụ thể dưới đây.\n",
    "from pathlib import Path\n",
    "import sys\n",
    "\n",
    "INPUT_PATH = \"\"  # ví dụ Kaggle: \"/kaggle/input/my-dataset/book.txt\"; Colab: \"/content/book.txt\"\n",
    "\n",
    "def _is_colab_runtime() -> bool:\n",
    "    try:\n",
    "        import google.colab  # type: ignore\n",
    "        return True\n",
    "    except Exception:\n",
    "        return False\n",
    "\n",
    "\n",
    "def resolve_input_path() -> Path:\n",
    "    raw = INPUT_PATH.strip()\n",
    "    if raw:\n",
    "        path = Path(raw).expanduser().resolve()\n",
    "        if not path.exists():\n",
    "            raise FileNotFoundError(f\"INPUT_PATH không tồn tại: {path}\")\n",
    "        return path\n",
    "\n",
    "    if _is_colab_runtime():\n",
    "        from google.colab import files  # type: ignore\n",
    "        print(\"Chọn/upload file .txt từ máy của bạn...\")\n",
    "        uploaded = files.upload()\n",
    "        if not uploaded:\n",
    "            raise FileNotFoundError(\"Bạn chưa upload file nào.\")\n",
    "        name = next(iter(uploaded))\n",
    "        return Path(name).resolve()\n",
    "\n",
    "    candidates = []\n",
    "    for root in [\"/kaggle/input\", \"/kaggle/working\", \"/content\"]:\n",
    "        base = Path(root)\n",
    "        if base.exists():\n",
    "            candidates.extend(p for p in base.rglob(\"*.txt\") if p.is_file())\n",
    "\n",
    "    candidates = sorted(set(candidates), key=lambda p: p.stat().st_size, reverse=True)\n",
    "    if not candidates:\n",
    "        raise FileNotFoundError(\n",
    "            \"Không tìm thấy file .txt. Trên Kaggle: bấm Add Input để gắn dataset chứa .txt, \"\n",
    "            \"hoặc upload/tạo file trong /kaggle/working rồi chạy lại cell này.\"\n",
    "        )\n",
    "\n",
    "    print(\"Tìm thấy file .txt, chọn file lớn nhất:\")\n",
    "    for idx, path in enumerate(candidates[:10], start=1):\n",
    "        print(f\"{idx}. {path} ({path.stat().st_size:,} bytes)\")\n",
    "    return candidates[0]\n",
    "\n",
    "input_path = resolve_input_path()\n",
    "print(\"INPUT_FILE=\", input_path)\n",
    "print(\"size_bytes=\", input_path.stat().st_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3. Cấu hình benchmark\n",
    "# Đổi các biến ở đây rồi chạy lại cell benchmark bên dưới.\n",
    "import os\n",
    "\n",
    "MODEL = \"HachimiMT-60\"      # HachimiMT-60, HachimiMT-30, MoxhiMT-60, MoxhiMT-30, MoxhiMT-30-QT, HirashibaMT-Medium, HirashibaMT-Tiny\n",
    "BEAM = 2                    # 1 nhanh hơn, 2 thường cân bằng hơn\n",
    "CHUNK_MODE = \"sentence\"     # sentence hoặc paragraph\n",
    "NORMALIZE = \"auto\"          # auto, t2s, none\n",
    "PROGRESS_SECONDS = 30\n",
    "\n",
    "# Preset hiện tại ưu tiên benchmark đơn với cấu hình đã chốt.\n",
    "# Đổi thành True nếu bạn muốn chạy một cấu hình đơn trước sweep.\n",
    "RUN_SINGLE_BENCHMARK = True\n",
    "\n",
    "# Bật để so Kaggle x1 T4 công bằng với Colab x1 T4.\n",
    "# Tắt để Kaggle tự dùng toàn bộ GPU được cấp, ví dụ T4 x2.\n",
    "USE_SINGLE_GPU_FOR_FAIR_TEST = False\n",
    "FAIR_GPU_INDICES = \"0\"\n",
    "\n",
    "# Các giá trị này sẽ truyền vào subprocess benchmark trước khi app import CT2.\n",
    "# Để \"\" nếu muốn dùng auto/default của app.\n",
    "BATCH_SIZE = \"96\"\n",
    "WINDOW_MULTIPLIER = \"8\"      # Kaggle T4 x2: 8 nhanh nhất trong sweep; Colab fair x1 dùng 16\n",
    "FAIR_WINDOW_MULTIPLIER = \"16\"\n",
    "CT2_BATCH_TYPE = \"tokens\"\n",
    "INTER_THREADS = \"1\"\n",
    "TOKENIZE_WORKERS = \"\"      # Colab 2 vCPU có thể thử \"2\"; để trống = auto\n",
    "TOKENIZE_JOB_SIZE = \"\"\n",
    "CT2_THREADS = \"\"\n",
    "\n",
    "TRACKED_ENV_KEYS = [\n",
    "    \"CUDA_VISIBLE_DEVICES\",\n",
    "    \"HACHIMIMT_GPU_INDICES\",\n",
    "    \"HACHIMIMT_AUTO_ALL_GPUS\",\n",
    "    \"HACHIMIMT_BATCH_SIZE\",\n",
    "    \"HACHIMIMT_THREADS\",\n",
    "    \"HACHIMIMT_TOKENIZE_WORKERS\",\n",
    "    \"HACHIMIMT_TOKENIZE_JOB_SIZE\",\n",
    "    \"HACHIMIMT_CT2_BATCH_TYPE\",\n",
    "    \"HACHIMIMT_CT2_WINDOW_MULTIPLIER\",\n",
    "    \"HACHIMIMT_INTER_THREADS\",\n",
    "]\n",
    "\n",
    "\n",
    "def _set_or_unset(env, key, value):\n",
    "    value = str(value).strip()\n",
    "    if value:\n",
    "        env[key] = value\n",
    "    else:\n",
    "        env.pop(key, None)\n",
    "\n",
    "\n",
    "def build_benchmark_env(\n",
    "    *,\n",
    "    use_single_gpu=USE_SINGLE_GPU_FOR_FAIR_TEST,\n",
    "    gpu_indices=FAIR_GPU_INDICES,\n",
    "    batch_size=BATCH_SIZE,\n",
    "    window_multiplier=WINDOW_MULTIPLIER,\n",
    "    fair_window_multiplier=FAIR_WINDOW_MULTIPLIER,\n",
    "    ct2_batch_type=CT2_BATCH_TYPE,\n",
    "    inter_threads=INTER_THREADS,\n",
    "    tokenize_workers=TOKENIZE_WORKERS,\n",
    "    tokenize_job_size=TOKENIZE_JOB_SIZE,\n",
    "    ct2_threads=CT2_THREADS,\n",
    "):\n",
    "    env = os.environ.copy()\n",
    "    if use_single_gpu:\n",
    "        env[\"HACHIMIMT_GPU_INDICES\"] = str(gpu_indices).strip() or \"0\"\n",
    "        env[\"HACHIMIMT_AUTO_ALL_GPUS\"] = \"0\"\n",
    "        if not str(window_multiplier).strip():\n",
    "            window_multiplier = fair_window_multiplier\n",
    "    else:\n",
    "        env.pop(\"HACHIMIMT_GPU_INDICES\", None)\n",
    "        env.pop(\"HACHIMIMT_AUTO_ALL_GPUS\", None)\n",
    "\n",
    "    _set_or_unset(env, \"HACHIMIMT_BATCH_SIZE\", batch_size)\n",
    "    _set_or_unset(env, \"HACHIMIMT_CT2_WINDOW_MULTIPLIER\", window_multiplier)\n",
    "    _set_or_unset(env, \"HACHIMIMT_CT2_BATCH_TYPE\", ct2_batch_type)\n",
    "    _set_or_unset(env, \"HACHIMIMT_INTER_THREADS\", inter_threads)\n",
    "    _set_or_unset(env, \"HACHIMIMT_TOKENIZE_WORKERS\", tokenize_workers)\n",
    "    _set_or_unset(env, \"HACHIMIMT_TOKENIZE_JOB_SIZE\", tokenize_job_size)\n",
    "    _set_or_unset(env, \"HACHIMIMT_THREADS\", ct2_threads)\n",
    "    return env\n",
    "\n",
    "\n",
    "preview_env = build_benchmark_env()\n",
    "print(\"Benchmark config:\")\n",
    "print(\"MODEL=\", MODEL, \"BEAM=\", BEAM, \"CHUNK_MODE=\", CHUNK_MODE, \"NORMALIZE=\", NORMALIZE)\n",
    "for key in TRACKED_ENV_KEYS:\n",
    "    if key in preview_env:\n",
    "        print(f\"{key}={preview_env[key]}\")\n",
    "print(\"single_gpu_fair_test=\", USE_SINGLE_GPU_FOR_FAIR_TEST)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4. Chạy benchmark và in BENCH_PROFILE\n",
    "import os\n",
    "import subprocess\n",
    "import sys\n",
    "from pathlib import Path\n",
    "\n",
    "bench_script = Path(\"hachimimt/src/benchmark_file.py\")\n",
    "if not bench_script.exists():\n",
    "    raise FileNotFoundError(f\"Không thấy benchmark script: {bench_script}\")\n",
    "if not Path(input_path).exists():\n",
    "    raise FileNotFoundError(f\"Input file không tồn tại: {input_path}\")\n",
    "\n",
    "\n",
    "def parse_kv_line(line):\n",
    "    data = {}\n",
    "    for part in line.split()[1:]:\n",
    "        if \"=\" in part:\n",
    "            key, value = part.split(\"=\", 1)\n",
    "            data[key] = value\n",
    "    return data\n",
    "\n",
    "\n",
    "def run_benchmark_once(\n",
    "    *,\n",
    "    label=\"single\",\n",
    "    model=MODEL,\n",
    "    beam=BEAM,\n",
    "    chunk_mode=CHUNK_MODE,\n",
    "    normalize=NORMALIZE,\n",
    "    progress_seconds=PROGRESS_SECONDS,\n",
    "    use_single_gpu=USE_SINGLE_GPU_FOR_FAIR_TEST,\n",
    "    gpu_indices=FAIR_GPU_INDICES,\n",
    "    batch_size=BATCH_SIZE,\n",
    "    window_multiplier=WINDOW_MULTIPLIER,\n",
    "):\n",
    "    env = build_benchmark_env(\n",
    "        use_single_gpu=use_single_gpu,\n",
    "        gpu_indices=gpu_indices,\n",
    "        batch_size=batch_size,\n",
    "        window_multiplier=window_multiplier,\n",
    "    )\n",
    "    cmd = [\n",
    "        sys.executable,\n",
    "        str(bench_script),\n",
    "        str(input_path),\n",
    "        \"--model\", model,\n",
    "        \"--backend\", \"ct2\",\n",
    "        \"--beam\", str(beam),\n",
    "        \"--chunk-mode\", chunk_mode,\n",
    "        \"--normalize\", normalize,\n",
    "        \"--progress-seconds\", str(progress_seconds),\n",
    "    ]\n",
    "\n",
    "    print(f\"RUN_LABEL={label}\")\n",
    "    print(\"RUN:\", \" \".join(cmd))\n",
    "    print(\"ENV:\", {key: env.get(key) for key in TRACKED_ENV_KEYS if env.get(key) is not None})\n",
    "    print(\"\\n--- benchmark output ---\")\n",
    "    lines = []\n",
    "    process = subprocess.Popen(\n",
    "        cmd,\n",
    "        stdout=subprocess.PIPE,\n",
    "        stderr=subprocess.STDOUT,\n",
    "        text=True,\n",
    "        bufsize=1,\n",
    "        env=env,\n",
    "    )\n",
    "    assert process.stdout is not None\n",
    "    for line in process.stdout:\n",
    "        print(line, end=\"\")\n",
    "        lines.append(line.rstrip(\"\\n\"))\n",
    "    returncode = process.wait()\n",
    "    if returncode != 0:\n",
    "        raise subprocess.CalledProcessError(returncode, cmd)\n",
    "\n",
    "    profile_lines = [line for line in lines if line.startswith(\"BENCH_PROFILE\")]\n",
    "    done_lines = [line for line in lines if line.startswith(\"BENCH_DONE\")]\n",
    "    runtime_lines = [line for line in lines if line.startswith(\"BENCH_RUNTIME\")]\n",
    "    package_lines = [line for line in lines if line.startswith(\"BENCH_PACKAGES\")]\n",
    "    env_lines = [line for line in lines if line.startswith(\"BENCH_ENV\")]\n",
    "    summary = {\n",
    "        \"label\": label,\n",
    "        \"profile_line\": profile_lines[-1] if profile_lines else \"\",\n",
    "        \"done_line\": done_lines[-1] if done_lines else \"\",\n",
    "        \"runtime_line\": runtime_lines[-1] if runtime_lines else \"\",\n",
    "        \"package_line\": package_lines[-1] if package_lines else \"\",\n",
    "        \"env_line\": env_lines[-1] if env_lines else \"\",\n",
    "        \"profile\": parse_kv_line(profile_lines[-1]) if profile_lines else {},\n",
    "        \"done\": parse_kv_line(done_lines[-1]) if done_lines else {},\n",
    "    }\n",
    "    print(\"\\n--- parsed summary ---\")\n",
    "    print(summary[\"runtime_line\"] or \"Không thấy BENCH_RUNTIME. Hãy chắc notebook đã tải zip mới từ Space.\")\n",
    "    print(summary[\"package_line\"] or \"Không thấy BENCH_PACKAGES.\")\n",
    "    print(summary[\"env_line\"] or \"Không thấy BENCH_ENV.\")\n",
    "    print(summary[\"profile_line\"] or \"Không thấy BENCH_PROFILE.\")\n",
    "    print(summary[\"done_line\"] or \"Không thấy BENCH_DONE.\")\n",
    "    return summary\n",
    "\n",
    "\n",
    "if RUN_SINGLE_BENCHMARK:\n",
    "    last_summary = run_benchmark_once()\n",
    "else:\n",
    "    print(\"RUN_SINGLE_BENCHMARK=False. Bỏ qua benchmark đơn, chạy tiếp cell sweep.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5. Sweep batch/window/beam bằng script CLI\n",
    "# Preset tuỳ chọn: kiểm chứng window trên Kaggle T4 x2 sau khi batch=96 đã chốt.\n",
    "RUN_SWEEP = False\n",
    "SWEEP_SINGLE_GPU = False     # False: Kaggle dùng auto/all GPU; True: fair x1 T4\n",
    "SWEEP_BEAMS = [1, 2]\n",
    "SWEEP_BATCHES = [96]\n",
    "SWEEP_WINDOWS = [4, 8, 16]   # Kaggle x2: window 8/16 đều đạt effective 32x; 8 đang nhỉnh nhất\n",
    "SWEEP_PROGRESS_SECONDS = 999999\n",
    "SWEEP_MAX_RUNS = 0           # 0 = chạy hết combo; đặt 1/2 để smoke nhanh\n",
    "\n",
    "def _csv(values):\n",
    "    return \",\".join(str(value) for value in values)\n",
    "\n",
    "\n",
    "if RUN_SWEEP:\n",
    "    sweep_script = Path(\"hachimimt/src/benchmark_sweep.py\")\n",
    "    if not sweep_script.exists():\n",
    "        raise FileNotFoundError(f\"Không thấy sweep script: {sweep_script}\")\n",
    "\n",
    "    cmd = [\n",
    "        sys.executable,\n",
    "        str(sweep_script),\n",
    "        str(input_path),\n",
    "        \"--model\", MODEL,\n",
    "        \"--backend\", \"ct2\",\n",
    "        \"--chunk-mode\", CHUNK_MODE,\n",
    "        \"--normalize\", NORMALIZE,\n",
    "        \"--beams\", _csv(SWEEP_BEAMS),\n",
    "        \"--batches\", _csv(SWEEP_BATCHES),\n",
    "        \"--windows\", _csv(SWEEP_WINDOWS),\n",
    "        \"--ct2-batch-type\", CT2_BATCH_TYPE,\n",
    "        \"--inter-threads\", INTER_THREADS,\n",
    "        \"--progress-seconds\", str(SWEEP_PROGRESS_SECONDS),\n",
    "    ]\n",
    "    if SWEEP_SINGLE_GPU:\n",
    "        cmd.extend([\"--single-gpu\", \"--gpu-indices\", FAIR_GPU_INDICES])\n",
    "    else:\n",
    "        cmd.append(\"--auto-all-gpus\")\n",
    "    if TOKENIZE_WORKERS:\n",
    "        cmd.extend([\"--tokenize-workers\", TOKENIZE_WORKERS])\n",
    "    if TOKENIZE_JOB_SIZE:\n",
    "        cmd.extend([\"--tokenize-job-size\", TOKENIZE_JOB_SIZE])\n",
    "    if CT2_THREADS:\n",
    "        cmd.extend([\"--ct2-threads\", CT2_THREADS])\n",
    "    if SWEEP_MAX_RUNS:\n",
    "        cmd.extend([\"--max-runs\", str(SWEEP_MAX_RUNS)])\n",
    "\n",
    "    print(\"RUN_SWEEP_CMD:\", \" \".join(cmd))\n",
    "    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)\n",
    "    assert process.stdout is not None\n",
    "    for line in process.stdout:\n",
    "        print(line, end=\"\")\n",
    "    returncode = process.wait()\n",
    "    if returncode != 0:\n",
    "        raise subprocess.CalledProcessError(returncode, cmd)\n",
    "else:\n",
    "    print(\"RUN_SWEEP=False. Đổi thành True để chạy mini-sweep batch/window/beam.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Đọc kết quả\n",
    "\n",
    "- `ct2_infer_s`: thời gian inference thật trong CTranslate2. Nếu dòng này chiếm phần lớn, bottleneck chính là model/GPU.\n",
    "- `chunk_s`: thời gian chia chunk và đếm token.\n",
    "- `decode_s`: thời gian decode output.\n",
    "- `tokenize_wait_s`: thời gian GPU phải chờ tokenization. Nếu cao trên Colab/Kaggle, CPU đang nghẽn.\n",
    "- `BENCH_RUNTIME`: Python/platform và số GPU CT2 nhìn thấy.\n",
    "- `BENCH_PACKAGES`: version package quan trọng; khác version CT2/SentencePiece có thể làm lệch tốc độ.\n",
    "- `BENCH_ENV`: cấu hình hiệu năng thật được truyền vào subprocess benchmark.\n",
    "- `BENCH_DONE chars_s`: tốc độ chữ Hán/giây tính trên thời gian dịch.\n",
    "- Fair test Kaggle vs Colab: đặt `USE_SINGLE_GPU_FOR_FAIR_TEST=True`, `FAIR_GPU_INDICES=\"0\"`; nếu `WINDOW_MULTIPLIER=\"\"`, notebook tự dùng `FAIR_WINDOW_MULTIPLIER=\"16\"`."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}