{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Model Testing Notebook\n",
    "\n",
    "Evaluate and compare different model configurations:\n",
    "- Whisper model sizes (tiny vs base vs small vs medium)\n",
    "- LLM backends (OpenAI GPT-3.5 vs GPT-4 vs HuggingFace BART)\n",
    "- Embedding models for RAG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys, time\n",
    "sys.path.insert(0, '..')\n",
    "from dotenv import load_dotenv\n",
    "load_dotenv('../.env')\n",
    "\n",
    "SAMPLE_TEXT = \"\"\"\n",
    "Machine learning is a branch of artificial intelligence that enables systems\n",
    "to learn from data. Deep learning uses neural networks with multiple layers.\n",
    "Natural language processing allows computers to understand human language.\n",
    "We need to review the model architecture next week and assign tasks to the team.\n",
    "\"\"\"\n",
    "\n",
    "print('Setup complete ✅')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Compare Whisper Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import whisper, time\n",
    "\n",
    "AUDIO_FILE = '../data/audio/sample.wav'  # Provide a test WAV file\n",
    "MODELS     = ['tiny', 'base', 'small']\n",
    "\n",
    "results = []\n",
    "for model_name in MODELS:\n",
    "    print(f'Testing Whisper {model_name}...')\n",
    "    t0    = time.time()\n",
    "    model = whisper.load_model(model_name, download_root='../models/whisper')\n",
    "    out   = model.transcribe(AUDIO_FILE)\n",
    "    elapsed = time.time() - t0\n",
    "    results.append({'model': model_name, 'time_s': round(elapsed, 2), 'words': len(out['text'].split())})\n",
    "    print(f'  → {elapsed:.1f}s | {len(out[\"text\"].split())} words')\n",
    "\n",
    "import pandas as pd\n",
    "pd.DataFrame(results)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Compare Summarization Quality"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# HuggingFace BART\n",
    "from transformers import pipeline\n",
    "\n",
    "hf_pipe = pipeline('summarization', model='facebook/bart-large-cnn', device=-1)\n",
    "hf_out  = hf_pipe(SAMPLE_TEXT, max_length=100, min_length=30, do_sample=False)\n",
    "print('BART Summary:')\n",
    "print(hf_out[0]['summary_text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# OpenAI GPT-3.5\n",
    "import os\n",
    "from openai import OpenAI\n",
    "\n",
    "client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))\n",
    "resp   = client.chat.completions.create(\n",
    "    model='gpt-3.5-turbo',\n",
    "    messages=[{\n",
    "        'role': 'user',\n",
    "        'content': f'Summarize in 3 bullet points:\\n\\n{SAMPLE_TEXT}'\n",
    "    }],\n",
    "    max_tokens=200,\n",
    ")\n",
    "print('GPT-3.5 Summary:')\n",
    "print(resp.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Embedding Model Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sentence_transformers import SentenceTransformer\n",
    "import numpy as np, time\n",
    "\n",
    "MODELS = [\n",
    "    'all-MiniLM-L6-v2',\n",
    "    'all-mpnet-base-v2',\n",
    "    'paraphrase-MiniLM-L3-v2',\n",
    "]\n",
    "\n",
    "sentences = [\n",
    "    'Machine learning is a subset of AI',\n",
    "    'Deep learning uses neural networks',\n",
    "    'Natural language processing handles text',\n",
    "    'Action items should be reviewed weekly',\n",
    "]\n",
    "\n",
    "for m_name in MODELS:\n",
    "    t0    = time.time()\n",
    "    model = SentenceTransformer(m_name)\n",
    "    embs  = model.encode(sentences)\n",
    "    print(f'{m_name}: dim={embs.shape[1]}, time={time.time()-t0:.2f}s')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. RAG Retrieval Quality Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from backend.services.rag_pipeline import RAGPipeline\n",
    "\n",
    "TEST_CHUNKS = [\n",
    "    {'chunk_id': i, 'text': s, 'start_ts': f'00:0{i}:00', 'end_ts': f'00:0{i+1}:00',\n",
    "     'start': i*60.0, 'end': (i+1)*60.0}\n",
    "    for i, s in enumerate(sentences)\n",
    "]\n",
    "\n",
    "rag = RAGPipeline()\n",
    "rag.index_chunks(TEST_CHUNKS)\n",
    "\n",
    "TEST_QUERIES = [\n",
    "    'artificial intelligence and machine learning',\n",
    "    'neural network architecture',\n",
    "    'weekly tasks and action items',\n",
    "]\n",
    "\n",
    "for q in TEST_QUERIES:\n",
    "    results = rag.query(q, top_k=2)\n",
    "    print(f'\\nQuery: {q}')\n",
    "    for r in results:\n",
    "        print(f'  [{r[\"score\"]:.3f}] {r[\"text\"]}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}