import os import re import gradio as gr import torch from transformers import pipeline from huggingface_hub import InferenceClient # ─── Cache Configuration ────────────────────────────────────────────────────── os.environ["TRANSFORMERS_CACHE"] = "/tmp/model_cache" os.environ["HF_HOME"] = "/tmp/model_cache" # ─── HF Token ───────────────────────────────────────────────────────────────── HF_TOKEN = os.environ.get("HF_TOKEN") # ─── Model Loading ──────────────────────────────────────────────────────────── print("Loading Whisper model...") whisper = pipeline( task="automatic-speech-recognition", model="openai/whisper-small", chunk_length_s=30, stride_length_s=5, return_timestamps=True ) print("Whisper loaded successfully.") print("Loading sentiment model...") sentiment_analyzer = pipeline( "sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest" ) print("Sentiment model loaded successfully.") print("Initializing LLM Inference client...") llm_client = InferenceClient( model="mistralai/Mistral-7B-Instruct-v0.2", token=HF_TOKEN ) print("LLM client ready.") # ─── Prompt Template ────────────────────────────────────────────────────────── def build_prompt(transcript: str) -> str: return f"""You are an expert meeting analyst for Product Managers. Analyze the following meeting transcript and return ONLY a structured response with these four sections: ## Summary A 3-5 sentence TLDR of the meeting. ## Action Items A bullet list. Each item format: "- [Owner if mentioned] Task description (Deadline if mentioned)" ## Key Decisions A bullet list of decisions made or agreed upon during the meeting. ## Sentiment Overall tone: Positive / Neutral / Negative. One sentence explanation. Transcript: {transcript[:3000]}""" # ─── Section Parser ─────────────────────────────────────────────────────────── def parse_sections(text: str) -> dict: result = {} patterns = { "summary": r"##\s*Summary\s*(.*?)(?=##|\Z)", "action_items": r"##\s*Action Items\s*(.*?)(?=##|\Z)", "key_decisions": r"##\s*Key Decisions\s*(.*?)(?=##|\Z)", "sentiment": r"##\s*Sentiment\s*(.*?)(?=##|\Z)", } for key, pattern in patterns.items(): match = re.search(pattern, text, re.DOTALL | re.IGNORECASE) result[key] = match.group(1).strip() if match else "" return result # ─── Core Processing Pipeline ───────────────────────────────────────────────── def process_meeting(audio_file): if audio_file is None: return ( "No audio file provided. Please upload a .mp3, .wav, or .m4a file.", "", "", "", "" ) try: # Step 1: Transcribe with Whisper print(f"Transcribing audio file: {audio_file}") result = whisper(audio_file) transcript = result["text"].strip() print(f"Transcription complete. Length: {len(transcript)} characters.") if len(transcript) < 20: return ( "Transcription too short. Audio may be silent or corrupt.", "", "", "", "" ) # Step 2: LLM Analysis via chat_completion print("Sending transcript to LLM for analysis...") prompt = build_prompt(transcript) response = llm_client.chat_completion( messages=[{"role": "user", "content": prompt}], max_tokens=600, temperature=0.3, ) llm_response = response.choices[0].message.content print("LLM response received.") # Step 3: Parse structured sections sections = parse_sections(llm_response) summary = sections.get("summary", "").strip() action_items = sections.get("action_items", "").strip() key_decisions = sections.get("key_decisions", "").strip() llm_sentiment = sections.get("sentiment", "").strip() if not summary: summary = llm_response.strip() if not action_items: action_items = "No action items detected in this meeting." if not key_decisions: key_decisions = "No key decisions detected in this meeting." # Step 4: Sentiment via RoBERTa short_text = transcript[:512] sent_result = sentiment_analyzer(short_text)[0] label = sent_result["label"].capitalize() score = round(sent_result["score"] * 100, 1) sentiment_display = f"{label} (confidence: {score}%)" if llm_sentiment: sentiment_display = f"{label} (confidence: {score}%)\n\n{llm_sentiment}" print("Processing complete.") return transcript, summary, action_items, key_decisions, sentiment_display except Exception as e: error_detail = str(e) print(f"ERROR in process_meeting: {error_detail}") return ( f"Processing failed.\n\nError details:\n{error_detail}\n\n" f"Common causes:\n" f"1. HF_TOKEN secret not set in Space Settings\n" f"2. Inference API rate limit — wait 1 minute and retry\n" f"3. Audio format not supported — try converting to .wav", "", "", "", "" ) # ─── AMI Dataset Sample Loader ──────────────────────────────────────────────── def load_ami_sample(): try: from datasets import load_dataset import numpy as np import tempfile import soundfile as sf print("Loading AMI dataset sample (streaming)...") ds = load_dataset( "edinburghcstr/ami", "ihm", split="test", streaming=True, trust_remote_code=True ) sample = next(iter(ds)) audio_array = np.array(sample["audio"]["array"]) sample_rate = sample["audio"]["sampling_rate"] tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) sf.write(tmp.name, audio_array, sample_rate) print(f"AMI sample loaded: {tmp.name}") return tmp.name, "✅ AMI sample loaded. Click Analyze Meeting to process it." except Exception as e: error_msg = f"❌ Failed to load AMI sample: {str(e)}" print(error_msg) return None, error_msg # ─── Gradio UI ──────────────────────────────────────────────────────────────── with gr.Blocks(title="PM Meeting Summarizer", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🎙️ PM Meeting Summarizer **Upload any meeting recording → get instant summary, action items, decisions & sentiment.** Built for Product Managers | Powered by Whisper + Mistral-7B """) with gr.Row(): with gr.Column(scale=1): audio_input = gr.Audio( sources=["upload", "microphone"], type="filepath", label="Upload Meeting Audio (.mp3, .wav, .m4a)", format="mp3" ) submit_btn = gr.Button("Analyze Meeting", variant="primary") with gr.Column(scale=2): with gr.Tab("Summary"): summary_out = gr.Textbox( label="Meeting Summary", lines=5, placeholder="Summary will appear here after analysis..." ) with gr.Tab("Action Items"): actions_out = gr.Textbox( label="Action Items", lines=6, placeholder="Action items will appear here..." ) with gr.Tab("Key Decisions"): decisions_out = gr.Textbox( label="Key Decisions", lines=5, placeholder="Key decisions will appear here..." ) with gr.Tab("Sentiment"): sentiment_out = gr.Textbox( label="Overall Sentiment", lines=3, placeholder="Sentiment analysis will appear here..." ) with gr.Accordion("Full Transcript", open=False): transcript_out = gr.Textbox( label="Raw Transcript", lines=10, placeholder="Full transcript will appear here after analysis..." ) with gr.Row(): load_sample_btn = gr.Button("Load AMI Dataset Sample", variant="secondary") sample_status = gr.Textbox( label="Status", interactive=False, scale=2, placeholder="Click to load a real meeting sample from the AMI corpus..." ) # ── Event Handlers ───────────────────────────────────────────────────── submit_btn.click( fn=process_meeting, inputs=[audio_input], outputs=[transcript_out, summary_out, actions_out, decisions_out, sentiment_out] ) load_sample_btn.click( fn=load_ami_sample, inputs=[], outputs=[audio_input, sample_status] ) # ─── Launch ─────────────────────────────────────────────────────────────────── demo.launch()