Spaces:

John-7S
/

pm-meeting-summarizer

Sleeping

App Files Files Community

pm-meeting-summarizer / app.py

John-7S

Update app.py

b1e2f60 verified about 2 months ago

Raw

History Blame Contribute Delete

10.2 kB

	import os
	import re
	import gradio as gr
	import torch
	from transformers import pipeline
	from huggingface_hub import InferenceClient

	# ─── Cache Configuration ──────────────────────────────────────────────────────
	os.environ["TRANSFORMERS_CACHE"] = "/tmp/model_cache"
	os.environ["HF_HOME"] = "/tmp/model_cache"

	# ─── HF Token ─────────────────────────────────────────────────────────────────
	HF_TOKEN = os.environ.get("HF_TOKEN")

	# ─── Model Loading ────────────────────────────────────────────────────────────
	print("Loading Whisper model...")
	whisper = pipeline(
	task="automatic-speech-recognition",
	model="openai/whisper-small",
	chunk_length_s=30,
	stride_length_s=5,
	return_timestamps=True
	)
	print("Whisper loaded successfully.")

	print("Loading sentiment model...")
	sentiment_analyzer = pipeline(
	"sentiment-analysis",
	model="cardiffnlp/twitter-roberta-base-sentiment-latest"
	)
	print("Sentiment model loaded successfully.")

	print("Initializing LLM Inference client...")
	llm_client = InferenceClient(
	model="mistralai/Mistral-7B-Instruct-v0.2",
	token=HF_TOKEN
	)
	print("LLM client ready.")


	# ─── Prompt Template ──────────────────────────────────────────────────────────
	def build_prompt(transcript: str) -> str:
	return f"""You are an expert meeting analyst for Product Managers.
	Analyze the following meeting transcript and return ONLY a structured response
	with these four sections:

	## Summary
	A 3-5 sentence TLDR of the meeting.

	## Action Items
	A bullet list. Each item format: "- [Owner if mentioned] Task description (Deadline if mentioned)"

	## Key Decisions
	A bullet list of decisions made or agreed upon during the meeting.

	## Sentiment
	Overall tone: Positive / Neutral / Negative. One sentence explanation.

	Transcript:
	{transcript[:3000]}"""


	# ─── Section Parser ───────────────────────────────────────────────────────────
	def parse_sections(text: str) -> dict:
	result = {}
	patterns = {
	"summary": r"##\sSummary\s(.*?)(?=##\|\Z)",
	"action_items": r"##\sAction Items\s(.*?)(?=##\|\Z)",
	"key_decisions": r"##\sKey Decisions\s(.*?)(?=##\|\Z)",
	"sentiment": r"##\sSentiment\s(.*?)(?=##\|\Z)",
	}
	for key, pattern in patterns.items():
	match = re.search(pattern, text, re.DOTALL \| re.IGNORECASE)
	result[key] = match.group(1).strip() if match else ""
	return result


	# ─── Core Processing Pipeline ─────────────────────────────────────────────────
	def process_meeting(audio_file):
	if audio_file is None:
	return (
	"No audio file provided. Please upload a .mp3, .wav, or .m4a file.",
	"", "", "", ""
	)

	try:
	# Step 1: Transcribe with Whisper
	print(f"Transcribing audio file: {audio_file}")
	result = whisper(audio_file)
	transcript = result["text"].strip()
	print(f"Transcription complete. Length: {len(transcript)} characters.")

	if len(transcript) < 20:
	return (
	"Transcription too short. Audio may be silent or corrupt.",
	"", "", "", ""
	)

	# Step 2: LLM Analysis via chat_completion
	print("Sending transcript to LLM for analysis...")
	prompt = build_prompt(transcript)

	response = llm_client.chat_completion(
	messages=[{"role": "user", "content": prompt}],
	max_tokens=600,
	temperature=0.3,
	)
	llm_response = response.choices[0].message.content
	print("LLM response received.")

	# Step 3: Parse structured sections
	sections = parse_sections(llm_response)

	summary = sections.get("summary", "").strip()
	action_items = sections.get("action_items", "").strip()
	key_decisions = sections.get("key_decisions", "").strip()
	llm_sentiment = sections.get("sentiment", "").strip()

	if not summary:
	summary = llm_response.strip()
	if not action_items:
	action_items = "No action items detected in this meeting."
	if not key_decisions:
	key_decisions = "No key decisions detected in this meeting."

	# Step 4: Sentiment via RoBERTa
	short_text = transcript[:512]
	sent_result = sentiment_analyzer(short_text)[0]
	label = sent_result["label"].capitalize()
	score = round(sent_result["score"] * 100, 1)
	sentiment_display = f"{label} (confidence: {score}%)"

	if llm_sentiment:
	sentiment_display = f"{label} (confidence: {score}%)\n\n{llm_sentiment}"

	print("Processing complete.")
	return transcript, summary, action_items, key_decisions, sentiment_display

	except Exception as e:
	error_detail = str(e)
	print(f"ERROR in process_meeting: {error_detail}")
	return (
	f"Processing failed.\n\nError details:\n{error_detail}\n\n"
	f"Common causes:\n"
	f"1. HF_TOKEN secret not set in Space Settings\n"
	f"2. Inference API rate limit — wait 1 minute and retry\n"
	f"3. Audio format not supported — try converting to .wav",
	"", "", "", ""
	)


	# ─── AMI Dataset Sample Loader ────────────────────────────────────────────────
	def load_ami_sample():
	try:
	from datasets import load_dataset
	import numpy as np
	import tempfile
	import soundfile as sf

	print("Loading AMI dataset sample (streaming)...")
	ds = load_dataset(
	"edinburghcstr/ami",
	"ihm",
	split="test",
	streaming=True,
	trust_remote_code=True
	)

	sample = next(iter(ds))
	audio_array = np.array(sample["audio"]["array"])
	sample_rate = sample["audio"]["sampling_rate"]

	tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	sf.write(tmp.name, audio_array, sample_rate)

	print(f"AMI sample loaded: {tmp.name}")
	return tmp.name, "✅ AMI sample loaded. Click Analyze Meeting to process it."

	except Exception as e:
	error_msg = f"❌ Failed to load AMI sample: {str(e)}"
	print(error_msg)
	return None, error_msg


	# ─── Gradio UI ────────────────────────────────────────────────────────────────
	with gr.Blocks(title="PM Meeting Summarizer", theme=gr.themes.Soft()) as demo:

	gr.Markdown("""
	# 🎙️ PM Meeting Summarizer
	Upload any meeting recording → get instant summary, action items, decisions & sentiment.
	Built for Product Managers \| Powered by Whisper + Mistral-7B
	""")

	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Upload Meeting Audio (.mp3, .wav, .m4a)",
	format="mp3"
	)
	submit_btn = gr.Button("Analyze Meeting", variant="primary")

	with gr.Column(scale=2):
	with gr.Tab("Summary"):
	summary_out = gr.Textbox(
	label="Meeting Summary",
	lines=5,
	placeholder="Summary will appear here after analysis..."
	)
	with gr.Tab("Action Items"):
	actions_out = gr.Textbox(
	label="Action Items",
	lines=6,
	placeholder="Action items will appear here..."
	)
	with gr.Tab("Key Decisions"):
	decisions_out = gr.Textbox(
	label="Key Decisions",
	lines=5,
	placeholder="Key decisions will appear here..."
	)
	with gr.Tab("Sentiment"):
	sentiment_out = gr.Textbox(
	label="Overall Sentiment",
	lines=3,
	placeholder="Sentiment analysis will appear here..."
	)

	with gr.Accordion("Full Transcript", open=False):
	transcript_out = gr.Textbox(
	label="Raw Transcript",
	lines=10,
	placeholder="Full transcript will appear here after analysis..."
	)

	with gr.Row():
	load_sample_btn = gr.Button("Load AMI Dataset Sample", variant="secondary")
	sample_status = gr.Textbox(
	label="Status",
	interactive=False,
	scale=2,
	placeholder="Click to load a real meeting sample from the AMI corpus..."
	)

	# ── Event Handlers ─────────────────────────────────────────────────────
	submit_btn.click(
	fn=process_meeting,
	inputs=[audio_input],
	outputs=[transcript_out, summary_out, actions_out, decisions_out, sentiment_out]
	)

	load_sample_btn.click(
	fn=load_ami_sample,
	inputs=[],
	outputs=[audio_input, sample_status]
	)

	# ─── Launch ───────────────────────────────────────────────────────────────────
	demo.launch()