barakb21's picture
Create app.py
97be09c verified
Raw
History Blame
2.24 kB
import gradio as gr
import pandas as pd
import numpy as np
import re
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
# === Load dataset from Hugging Face ===
raw_data = load_dataset("MongoDB/embedded_movies")
df = pd.DataFrame(raw_data["train"])
# Keep only relevant columns
df = df[["title", "fullplot"]].dropna().reset_index(drop=True)
# === Load model and compute embeddings ===
model = SentenceTransformer("all-MiniLM-L6-v2")
df["fullplot_embedding"] = model.encode(df["fullplot"].tolist(), show_progress_bar=True)
# === Helper Functions ===
def encode_input_text(text):
return model.encode([text])[0]
def extract_keywords(text):
words = re.findall(r'\b\w{5,}\b', text.lower())
return set(words)
def generate_explanation(user_keywords, movie_text):
movie_words = extract_keywords(movie_text)
matched = user_keywords & movie_words
return ", ".join(sorted(matched)) if matched else "No strong keyword match"
def compute_similar_movies(user_embedding, top_k=5):
movie_embeddings = np.stack(df["fullplot_embedding"].values)
similarities = cosine_similarity([user_embedding], movie_embeddings)[0]
df["similarity"] = similarities
top_df = df.sort_values("similarity", ascending=False).head(top_k)
return top_df
# === Gradio UI ===
with gr.Blocks() as demo:
gr.Markdown("## 🎬 Movie Recommender Engine with Explanation")
input_box = gr.Textbox(label="Enter your plot idea", placeholder="e.g., dreams within dreams")
output_box = gr.Textbox(label="Top Recommendations", lines=8)
recommend_button = gr.Button("Recommend")
def recommend(text):
user_embedding = encode_input_text(text)
user_keywords = extract_keywords(text)
top_df = compute_similar_movies(user_embedding)
results = []
for _, row in top_df.iterrows():
explanation = generate_explanation(user_keywords, row["fullplot"])
results.append(f"🎬 {row['title']} (score={row['similarity']:.2f})\nπŸ” {explanation}")
return "\n\n".join(results)
recommend_button.click(fn=recommend, inputs=[input_box], outputs=[output_box])
demo.launch()