import os
import re
import gradio as gr
import torch
from transformers import pipeline
from huggingface_hub import InferenceClient

# ─── Cache Configuration ──────────────────────────────────────────────────────
os.environ["TRANSFORMERS_CACHE"] = "/tmp/model_cache"
os.environ["HF_HOME"] = "/tmp/model_cache"

# ─── HF Token ─────────────────────────────────────────────────────────────────
HF_TOKEN = os.environ.get("HF_TOKEN")

# ─── Model Loading ────────────────────────────────────────────────────────────
print("Loading Whisper model...")
whisper = pipeline(
    task="automatic-speech-recognition",
    model="openai/whisper-small",
    chunk_length_s=30,
    stride_length_s=5,
    return_timestamps=True
)
print("Whisper loaded successfully.")

print("Loading sentiment model...")
sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model="cardiffnlp/twitter-roberta-base-sentiment-latest"
)
print("Sentiment model loaded successfully.")

print("Initializing LLM Inference client...")
llm_client = InferenceClient(
    model="mistralai/Mistral-7B-Instruct-v0.2",
    token=HF_TOKEN
)
print("LLM client ready.")


# ─── Prompt Template ──────────────────────────────────────────────────────────
def build_prompt(transcript: str) -> str:
    return f"""You are an expert meeting analyst for Product Managers.
Analyze the following meeting transcript and return ONLY a structured response
with these four sections:

## Summary
A 3-5 sentence TLDR of the meeting.

## Action Items
A bullet list. Each item format: "- [Owner if mentioned] Task description (Deadline if mentioned)"

## Key Decisions
A bullet list of decisions made or agreed upon during the meeting.

## Sentiment
Overall tone: Positive / Neutral / Negative. One sentence explanation.

Transcript:
{transcript[:3000]}"""


# ─── Section Parser ───────────────────────────────────────────────────────────
def parse_sections(text: str) -> dict:
    result = {}
    patterns = {
        "summary":       r"##\s*Summary\s*(.*?)(?=##|\Z)",
        "action_items":  r"##\s*Action Items\s*(.*?)(?=##|\Z)",
        "key_decisions": r"##\s*Key Decisions\s*(.*?)(?=##|\Z)",
        "sentiment":     r"##\s*Sentiment\s*(.*?)(?=##|\Z)",
    }
    for key, pattern in patterns.items():
        match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
        result[key] = match.group(1).strip() if match else ""
    return result


# ─── Core Processing Pipeline ─────────────────────────────────────────────────
def process_meeting(audio_file):
    if audio_file is None:
        return (
            "No audio file provided. Please upload a .mp3, .wav, or .m4a file.",
            "", "", "", ""
        )

    try:
        # Step 1: Transcribe with Whisper
        print(f"Transcribing audio file: {audio_file}")
        result = whisper(audio_file)
        transcript = result["text"].strip()
        print(f"Transcription complete. Length: {len(transcript)} characters.")

        if len(transcript) < 20:
            return (
                "Transcription too short. Audio may be silent or corrupt.",
                "", "", "", ""
            )

        # Step 2: LLM Analysis via chat_completion
        print("Sending transcript to LLM for analysis...")
        prompt = build_prompt(transcript)

        response = llm_client.chat_completion(
            messages=[{"role": "user", "content": prompt}],
            max_tokens=600,
            temperature=0.3,
        )
        llm_response = response.choices[0].message.content
        print("LLM response received.")

        # Step 3: Parse structured sections
        sections = parse_sections(llm_response)

        summary       = sections.get("summary", "").strip()
        action_items  = sections.get("action_items", "").strip()
        key_decisions = sections.get("key_decisions", "").strip()
        llm_sentiment = sections.get("sentiment", "").strip()

        if not summary:
            summary = llm_response.strip()
        if not action_items:
            action_items = "No action items detected in this meeting."
        if not key_decisions:
            key_decisions = "No key decisions detected in this meeting."

        # Step 4: Sentiment via RoBERTa
        short_text = transcript[:512]
        sent_result = sentiment_analyzer(short_text)[0]
        label = sent_result["label"].capitalize()
        score = round(sent_result["score"] * 100, 1)
        sentiment_display = f"{label} (confidence: {score}%)"

        if llm_sentiment:
            sentiment_display = f"{label} (confidence: {score}%)\n\n{llm_sentiment}"

        print("Processing complete.")
        return transcript, summary, action_items, key_decisions, sentiment_display

    except Exception as e:
        error_detail = str(e)
        print(f"ERROR in process_meeting: {error_detail}")
        return (
            f"Processing failed.\n\nError details:\n{error_detail}\n\n"
            f"Common causes:\n"
            f"1. HF_TOKEN secret not set in Space Settings\n"
            f"2. Inference API rate limit — wait 1 minute and retry\n"
            f"3. Audio format not supported — try converting to .wav",
            "", "", "", ""
        )


# ─── AMI Dataset Sample Loader ────────────────────────────────────────────────
def load_ami_sample():
    try:
        from datasets import load_dataset
        import numpy as np
        import tempfile
        import soundfile as sf

        print("Loading AMI dataset sample (streaming)...")
        ds = load_dataset(
            "edinburghcstr/ami",
            "ihm",
            split="test",
            streaming=True,
            trust_remote_code=True
        )

        sample = next(iter(ds))
        audio_array = np.array(sample["audio"]["array"])
        sample_rate = sample["audio"]["sampling_rate"]

        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
        sf.write(tmp.name, audio_array, sample_rate)

        print(f"AMI sample loaded: {tmp.name}")
        return tmp.name, "✅ AMI sample loaded. Click Analyze Meeting to process it."

    except Exception as e:
        error_msg = f"❌ Failed to load AMI sample: {str(e)}"
        print(error_msg)
        return None, error_msg


# ─── Gradio UI ────────────────────────────────────────────────────────────────
with gr.Blocks(title="PM Meeting Summarizer", theme=gr.themes.Soft()) as demo:

    gr.Markdown("""
    # 🎙️ PM Meeting Summarizer
    **Upload any meeting recording → get instant summary, action items, decisions & sentiment.**
    Built for Product Managers | Powered by Whisper + Mistral-7B
    """)

    with gr.Row():
        with gr.Column(scale=1):
            audio_input = gr.Audio(
                sources=["upload", "microphone"],
                type="filepath",
                label="Upload Meeting Audio (.mp3, .wav, .m4a)",
                format="mp3"
            )
            submit_btn = gr.Button("Analyze Meeting", variant="primary")

        with gr.Column(scale=2):
            with gr.Tab("Summary"):
                summary_out = gr.Textbox(
                    label="Meeting Summary",
                    lines=5,
                    placeholder="Summary will appear here after analysis..."
                )
            with gr.Tab("Action Items"):
                actions_out = gr.Textbox(
                    label="Action Items",
                    lines=6,
                    placeholder="Action items will appear here..."
                )
            with gr.Tab("Key Decisions"):
                decisions_out = gr.Textbox(
                    label="Key Decisions",
                    lines=5,
                    placeholder="Key decisions will appear here..."
                )
            with gr.Tab("Sentiment"):
                sentiment_out = gr.Textbox(
                    label="Overall Sentiment",
                    lines=3,
                    placeholder="Sentiment analysis will appear here..."
                )

    with gr.Accordion("Full Transcript", open=False):
        transcript_out = gr.Textbox(
            label="Raw Transcript",
            lines=10,
            placeholder="Full transcript will appear here after analysis..."
        )

    with gr.Row():
        load_sample_btn = gr.Button("Load AMI Dataset Sample", variant="secondary")
        sample_status = gr.Textbox(
            label="Status",
            interactive=False,
            scale=2,
            placeholder="Click to load a real meeting sample from the AMI corpus..."
        )

    # ── Event Handlers ─────────────────────────────────────────────────────
    submit_btn.click(
        fn=process_meeting,
        inputs=[audio_input],
        outputs=[transcript_out, summary_out, actions_out, decisions_out, sentiment_out]
    )

    load_sample_btn.click(
        fn=load_ami_sample,
        inputs=[],
        outputs=[audio_input, sample_status]
    )

# ─── Launch ───────────────────────────────────────────────────────────────────
demo.launch()