"""
backend/services/summarizer.py
================================
LLM-based summarization of transcript chunks.
Supports OpenAI GPT and HuggingFace (BART) backends.
"""

import os
from typing import Dict, List, Optional

from backend.utils.config import settings
from backend.utils.logger import get_logger

logger = get_logger(__name__)

# ── Prompt Templates ──────────────────────────────────────────────────────────

CHUNK_SUMMARY_PROMPT = """You are an expert educator and note-taker analyzing a video transcript.
Extract ALL important information from this transcript excerpt. Be thorough and detailed.

CRITICAL INSTRUCTION: You MUST write everything in the following language: {language}

Transcript:
\"\"\"
{text}
\"\"\"

Provide a detailed summary covering:
- All key concepts and definitions explained
- Important facts, numbers, formulas, or data mentioned
- Examples given by the speaker
- Any processes or steps described

Format as clear bullet points. Do NOT skip any important detail:"""

FINAL_SUMMARY_PROMPT = """You are a world-class educator creating comprehensive study notes from a video.
Based on the chunk summaries below, produce beautifully structured, in-depth notes.

CRITICAL INSTRUCTION: You MUST write EVERYTHING in the following language: {language}

Use this exact Markdown structure:
## 📌 Overview
(2-3 sentence overview of what this video covers)

## 🎯 Key Concepts
(Explain each main concept in simple, clear language with examples)

## 📖 Detailed Notes
(Use ### subheadings for each topic. Include all facts, formulas, definitions, processes)

## 💡 Important Points to Remember
(Bullet list of the most critical takeaways)

Chunk Summaries:
\"\"\"
{summaries}
\"\"\"

Write the complete, detailed notes now:"""


class Summarizer:
    """
    Summarizes transcript chunks using LLMs.
    Supports OpenAI GPT (API) and HuggingFace BART (local).
    """

    def __init__(self, api_key: str = None):
        self.api_key = api_key
        self._openai_client = None
        self._hf_pipeline = None

    # ── Public API ────────────────────────────────────────────

    def summarize_chunks(self, chunks: List[Dict], language: str = "English") -> List[Dict]:
        """
        Summarize each transcript chunk individually.

        Args:
            chunks: List of chunk dicts from TextChunker.
            language: Language to output the summary in.

        Returns:
            Same list with 'summary' key added to each chunk dict.
        """
        logger.info(f"Summarizing {len(chunks)} chunks using {settings.LLM_PROVIDER} in {language}...")
        results = []
        for i, chunk in enumerate(chunks):
            logger.debug(f"  Summarizing chunk {i + 1}/{len(chunks)}")
            summary = self._summarize_text(chunk["text"], language)
            results.append({**chunk, "summary": summary})
        logger.info("Chunk summarization complete ✅")
        return results

    def generate_final_notes(self, summarized_chunks: List[Dict], language: str = "English") -> str:
        """
        Combine all chunk summaries into a single structured document.

        Args:
            summarized_chunks: Output of summarize_chunks().
            language: Language to output the notes in.

        Returns:
            Markdown-formatted final notes string.
        """
        has_key = bool(self.api_key or os.environ.get("OPENAI_API_KEY") or settings.OPENAI_API_KEY)
        logger.info("Generating final structured notes...")

        if settings.LLM_PROVIDER == "openai" and has_key:
            combined = "\n\n".join(
                f"[{c['start_ts']} → {c['end_ts']}]\n{c['summary']}"
                for c in summarized_chunks
                if c.get("summary")
            )
            prompt = FINAL_SUMMARY_PROMPT.format(summaries=combined, language=language)
            final = self._openai_chat(prompt)
        else:
            # Rule-based fallback: assemble Markdown directly from chunk summaries
            final = self._build_notes_from_chunks(summarized_chunks)

        logger.info("Final notes generated ✅")
        return final

    def translate_notes(self, notes: str, language: str) -> str:
        """
        Translates the final notes into the specified target language.
        """
        has_key = bool(self.api_key or os.environ.get("OPENAI_API_KEY") or settings.OPENAI_API_KEY)
        logger.info(f"Translating notes to {language}...")

        if settings.LLM_PROVIDER == "openai" and has_key:
            prompt = (
                f"You are an expert translator. Translate the following Markdown notes into {language}. "
                f"Maintain all Markdown formatting (headings, bullet points, bold, etc.) exactly as they appear in the original.\n\n"
                f"Notes:\n\"\"\"\n{notes}\n\"\"\"\n\nTranslated Notes:"
            )
            translated = self._openai_chat(prompt)
        else:
            logger.warning("No OpenAI key available for translation. Returning original notes.")
            translated = notes

        logger.info(f"Notes translated to {language} ✅")
        return translated

    def answer_question(self, query: str, context: str) -> str:
        """
        Answers a user's question using video context + general AI knowledge
        for a more detailed and educational response.
        """
        has_key = bool(self.api_key or os.environ.get("OPENAI_API_KEY") or settings.OPENAI_API_KEY)
        logger.info(f"Answering query: '{query}'")

        if settings.LLM_PROVIDER == "openai" and has_key:
            prompt = (
                f"You are a friendly, knowledgeable human expert having a conversation with the user about a video they just watched.\n\n"
                f"The user has a question. Below is the relevant portion of the video transcript.\n"
                f"Your job is to:\n"
                f"1. Answer the question naturally, clearly, and conversationally, as if you are explaining it to a friend.\n"
                f"2. Use the transcript context to inform your answer. If the transcript is missing details, seamlessly bring in your own general knowledge to give a great, helpful explanation.\n"
                f"3. Avoid acting like a robot (don't say 'As an AI' or 'Based on the context'). Just give the answer directly and warmly.\n"
                f"4. You can use some light formatting (like bolding or a few bullet points if it helps readability), but keep the tone natural and human-like.\n\n"
                f"Video Transcript Context:\n\"\"\"\n{context}\n\"\"\"\n\n"
                f"User's Question: {query}\n\n"
                f"Your natural answer:"
            )
            answer = self._openai_chat(prompt)
        else:
            # Fallback: return the most relevant context segments as the answer
            lines = [f"**Relevant content from the video:**\n"]
            for line in context.split("\n\n")[:3]:
                if line.strip():
                    lines.append(f"> {line.strip()}")
            answer = "\n\n".join(lines) if lines else "No relevant content found for your query."

        logger.info("Answer generated ✅")
        return answer

    # ── Internal dispatcher ───────────────────────────────────

    def _summarize_text(self, text: str, language: str) -> str:
        """Dispatch to the configured LLM backend."""
        has_key = bool(self.api_key or os.environ.get("OPENAI_API_KEY") or settings.OPENAI_API_KEY)
        if settings.LLM_PROVIDER == "openai" and has_key:
            return self._summarize_openai(text, language)
        else:
            return self._summarize_huggingface(text)

    def _build_notes_from_chunks(self, summarized_chunks: List[Dict]) -> str:
        """Rule-based final notes builder used when no LLM is available."""
        lines = [
            "## 📌 Overview",
            "",
            "These notes were generated from the video transcript using local AI (no OpenAI key provided).",
            "",
            "## 📖 Detailed Notes",
            "",
        ]
        for chunk in summarized_chunks:
            if not chunk.get("summary"):
                continue
            ts = f"{chunk.get('start_ts', '?')} → {chunk.get('end_ts', '?')}"
            lines.append(f"### ⏱ {ts}")
            lines.append("")
            # Each bullet point from the summary becomes a line
            for part in chunk["summary"].split("\n"):
                part = part.strip()
                if part:
                    lines.append(part)
            lines.append("")

        lines += [
            "## 💡 Important Points to Remember",
            "",
            "- Review the timestamped sections above for key concepts.",
            "- Provide an OpenAI API key to get richer, structured AI-generated notes.",
        ]
        return "\n".join(lines)

    # ── OpenAI Backend ────────────────────────────────────────

    def _get_openai_client(self):
        from openai import OpenAI
        kwargs = {"api_key": self.api_key or os.environ.get("OPENAI_API_KEY") or settings.OPENAI_API_KEY}
        if settings.OPENAI_BASE_URL:
            kwargs["base_url"] = settings.OPENAI_BASE_URL
        return OpenAI(**kwargs)

    def _summarize_openai(self, text: str, language: str) -> str:
        prompt = CHUNK_SUMMARY_PROMPT.format(text=text[:3000], language=language)
        return self._openai_chat(prompt)

    def _openai_chat(self, prompt: str) -> str:
        try:
            client = self._get_openai_client()
            response = client.chat.completions.create(
                model=settings.OPENAI_MODEL,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a professional academic note-taker and summarizer.",
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.3,
                max_tokens=2048,
            )
            content = response.choices[0].message.content
            return content.strip() if content else ""
        except Exception as e:
            logger.error(f"OpenAI API error: {e}")
            return f"[Summarization failed: {str(e)[:100]}]"

    # ── HuggingFace Backend ───────────────────────────────────

    def _get_hf_pipeline(self):
        # We now return a tuple of (model, tokenizer) instead of a pipeline
        if self._hf_pipeline is None:
            from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
            logger.info(f"Loading HuggingFace model: {settings.HF_SUMMARIZATION_MODEL}")
            tokenizer = AutoTokenizer.from_pretrained(settings.HF_SUMMARIZATION_MODEL)
            model = AutoModelForSeq2SeqLM.from_pretrained(settings.HF_SUMMARIZATION_MODEL)
            self._hf_pipeline = (model, tokenizer)
            logger.info("HuggingFace summarization model loaded ✅")
        return self._hf_pipeline

    def _summarize_huggingface(self, text: str) -> str:
        try:
            # BART needs at least a minimal amount of text to summarize
            clean_text = text.strip()
            if len(clean_text) < 50:
                return clean_text  # Too short to summarize; return as-is

            model, tokenizer = self._get_hf_pipeline()
            inputs = tokenizer(
                clean_text,
                max_length=1024,
                truncation=True,
                return_tensors="pt"
            )
            # Clamp min_length to avoid generation errors on short inputs
            input_len = inputs["input_ids"].shape[1]
            min_len = min(30, max(10, input_len // 4))
            summary_ids = model.generate(
                inputs["input_ids"],
                num_beams=4,
                min_length=min_len,
                max_length=256,
                early_stopping=True
            )
            return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        except Exception as e:
            logger.error(f"HuggingFace summarization error: {e}")
            # Return a cleaned-up version of the input text as fallback
            sentences = [s.strip() for s in text.replace("\n", " ").split(".") if len(s.strip()) > 20]
            return ". ".join(sentences[:5]) + "." if sentences else "[Content not available]"