John-7S's picture
Update app.py
b1e2f60 verified
Raw
History Blame Contribute Delete
10.2 kB
import os
import re
import gradio as gr
import torch
from transformers import pipeline
from huggingface_hub import InferenceClient
# ─── Cache Configuration ──────────────────────────────────────────────────────
os.environ["TRANSFORMERS_CACHE"] = "/tmp/model_cache"
os.environ["HF_HOME"] = "/tmp/model_cache"
# ─── HF Token ─────────────────────────────────────────────────────────────────
HF_TOKEN = os.environ.get("HF_TOKEN")
# ─── Model Loading ────────────────────────────────────────────────────────────
print("Loading Whisper model...")
whisper = pipeline(
task="automatic-speech-recognition",
model="openai/whisper-small",
chunk_length_s=30,
stride_length_s=5,
return_timestamps=True
)
print("Whisper loaded successfully.")
print("Loading sentiment model...")
sentiment_analyzer = pipeline(
"sentiment-analysis",
model="cardiffnlp/twitter-roberta-base-sentiment-latest"
)
print("Sentiment model loaded successfully.")
print("Initializing LLM Inference client...")
llm_client = InferenceClient(
model="mistralai/Mistral-7B-Instruct-v0.2",
token=HF_TOKEN
)
print("LLM client ready.")
# ─── Prompt Template ──────────────────────────────────────────────────────────
def build_prompt(transcript: str) -> str:
return f"""You are an expert meeting analyst for Product Managers.
Analyze the following meeting transcript and return ONLY a structured response
with these four sections:
## Summary
A 3-5 sentence TLDR of the meeting.
## Action Items
A bullet list. Each item format: "- [Owner if mentioned] Task description (Deadline if mentioned)"
## Key Decisions
A bullet list of decisions made or agreed upon during the meeting.
## Sentiment
Overall tone: Positive / Neutral / Negative. One sentence explanation.
Transcript:
{transcript[:3000]}"""
# ─── Section Parser ───────────────────────────────────────────────────────────
def parse_sections(text: str) -> dict:
result = {}
patterns = {
"summary": r"##\s*Summary\s*(.*?)(?=##|\Z)",
"action_items": r"##\s*Action Items\s*(.*?)(?=##|\Z)",
"key_decisions": r"##\s*Key Decisions\s*(.*?)(?=##|\Z)",
"sentiment": r"##\s*Sentiment\s*(.*?)(?=##|\Z)",
}
for key, pattern in patterns.items():
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
result[key] = match.group(1).strip() if match else ""
return result
# ─── Core Processing Pipeline ─────────────────────────────────────────────────
def process_meeting(audio_file):
if audio_file is None:
return (
"No audio file provided. Please upload a .mp3, .wav, or .m4a file.",
"", "", "", ""
)
try:
# Step 1: Transcribe with Whisper
print(f"Transcribing audio file: {audio_file}")
result = whisper(audio_file)
transcript = result["text"].strip()
print(f"Transcription complete. Length: {len(transcript)} characters.")
if len(transcript) < 20:
return (
"Transcription too short. Audio may be silent or corrupt.",
"", "", "", ""
)
# Step 2: LLM Analysis via chat_completion
print("Sending transcript to LLM for analysis...")
prompt = build_prompt(transcript)
response = llm_client.chat_completion(
messages=[{"role": "user", "content": prompt}],
max_tokens=600,
temperature=0.3,
)
llm_response = response.choices[0].message.content
print("LLM response received.")
# Step 3: Parse structured sections
sections = parse_sections(llm_response)
summary = sections.get("summary", "").strip()
action_items = sections.get("action_items", "").strip()
key_decisions = sections.get("key_decisions", "").strip()
llm_sentiment = sections.get("sentiment", "").strip()
if not summary:
summary = llm_response.strip()
if not action_items:
action_items = "No action items detected in this meeting."
if not key_decisions:
key_decisions = "No key decisions detected in this meeting."
# Step 4: Sentiment via RoBERTa
short_text = transcript[:512]
sent_result = sentiment_analyzer(short_text)[0]
label = sent_result["label"].capitalize()
score = round(sent_result["score"] * 100, 1)
sentiment_display = f"{label} (confidence: {score}%)"
if llm_sentiment:
sentiment_display = f"{label} (confidence: {score}%)\n\n{llm_sentiment}"
print("Processing complete.")
return transcript, summary, action_items, key_decisions, sentiment_display
except Exception as e:
error_detail = str(e)
print(f"ERROR in process_meeting: {error_detail}")
return (
f"Processing failed.\n\nError details:\n{error_detail}\n\n"
f"Common causes:\n"
f"1. HF_TOKEN secret not set in Space Settings\n"
f"2. Inference API rate limit β€” wait 1 minute and retry\n"
f"3. Audio format not supported β€” try converting to .wav",
"", "", "", ""
)
# ─── AMI Dataset Sample Loader ────────────────────────────────────────────────
def load_ami_sample():
try:
from datasets import load_dataset
import numpy as np
import tempfile
import soundfile as sf
print("Loading AMI dataset sample (streaming)...")
ds = load_dataset(
"edinburghcstr/ami",
"ihm",
split="test",
streaming=True,
trust_remote_code=True
)
sample = next(iter(ds))
audio_array = np.array(sample["audio"]["array"])
sample_rate = sample["audio"]["sampling_rate"]
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
sf.write(tmp.name, audio_array, sample_rate)
print(f"AMI sample loaded: {tmp.name}")
return tmp.name, "βœ… AMI sample loaded. Click Analyze Meeting to process it."
except Exception as e:
error_msg = f"❌ Failed to load AMI sample: {str(e)}"
print(error_msg)
return None, error_msg
# ─── Gradio UI ────────────────────────────────────────────────────────────────
with gr.Blocks(title="PM Meeting Summarizer", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸŽ™οΈ PM Meeting Summarizer
**Upload any meeting recording β†’ get instant summary, action items, decisions & sentiment.**
Built for Product Managers | Powered by Whisper + Mistral-7B
""")
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Upload Meeting Audio (.mp3, .wav, .m4a)",
format="mp3"
)
submit_btn = gr.Button("Analyze Meeting", variant="primary")
with gr.Column(scale=2):
with gr.Tab("Summary"):
summary_out = gr.Textbox(
label="Meeting Summary",
lines=5,
placeholder="Summary will appear here after analysis..."
)
with gr.Tab("Action Items"):
actions_out = gr.Textbox(
label="Action Items",
lines=6,
placeholder="Action items will appear here..."
)
with gr.Tab("Key Decisions"):
decisions_out = gr.Textbox(
label="Key Decisions",
lines=5,
placeholder="Key decisions will appear here..."
)
with gr.Tab("Sentiment"):
sentiment_out = gr.Textbox(
label="Overall Sentiment",
lines=3,
placeholder="Sentiment analysis will appear here..."
)
with gr.Accordion("Full Transcript", open=False):
transcript_out = gr.Textbox(
label="Raw Transcript",
lines=10,
placeholder="Full transcript will appear here after analysis..."
)
with gr.Row():
load_sample_btn = gr.Button("Load AMI Dataset Sample", variant="secondary")
sample_status = gr.Textbox(
label="Status",
interactive=False,
scale=2,
placeholder="Click to load a real meeting sample from the AMI corpus..."
)
# ── Event Handlers ─────────────────────────────────────────────────────
submit_btn.click(
fn=process_meeting,
inputs=[audio_input],
outputs=[transcript_out, summary_out, actions_out, decisions_out, sentiment_out]
)
load_sample_btn.click(
fn=load_ami_sample,
inputs=[],
outputs=[audio_input, sample_status]
)
# ─── Launch ───────────────────────────────────────────────────────────────────
demo.launch()