barakb21's picture
Update app.py
0252006 verified
Raw
History Blame Contribute Delete
4.81 kB
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import numpy as np
import pandas as pd
import re
# === Load dataset from Hugging Face ===
raw_data = load_dataset("MongoDB/embedded_movies") # Replace with your actual dataset if needed
df = pd.DataFrame(raw_data["train"])
# Keep only relevant columns (include genres/summary/tagline if they exist)
columns_to_keep = ["title", "fullplot", "genres", "summary", "tagline"]
df = df[[col for col in columns_to_keep if col in df.columns]].dropna(subset=["fullplot"]).reset_index(drop=True)
# === Load model and compute embeddings ===
model = SentenceTransformer("all-MiniLM-L6-v2")
df["fullplot_embedding"] = list(model.encode(df["fullplot"].tolist(), show_progress_bar=True))
# === Backend Functions ===
def encode_input_text(text):
return model.encode([text])[0]
def extract_keywords(text):
words = re.findall(r'\b\w{5,}\b', text.lower())
return set(words)
def generate_explanation(user_keywords, movie_text):
movie_words = extract_keywords(movie_text)
matched = user_keywords & movie_words
return ", ".join(sorted(matched)) if matched else "No strong keyword match"
def compute_similar_movies(user_embedding, df, top_k=5):
valid_df = df[df['fullplot_embedding'].notnull()].copy()
movie_embeddings = np.stack(valid_df['fullplot_embedding'].values)
similarities = cosine_similarity([user_embedding], movie_embeddings)[0]
valid_df['similarity'] = similarities
top_indices = similarities.argsort()[::-1][:top_k]
return valid_df.iloc[top_indices]
feedback_list = []
def submit_feedback(movie_title, feedback):
feedback_list.append((movie_title, feedback))
print(f"Feedback received: {movie_title} => {feedback}")
return f"βœ… Feedback received for: {movie_title}"
def format_movie_output(df, user_input):
user_keywords = extract_keywords(user_input)
blocks = []
for _, row in df.iterrows():
title = f"⭐ **{row['title']}**"
genres = f"🎭 *Genres:* {', '.join(row['genres']) if isinstance(row['genres'], list) else row.get('genres', '')}"
similarity = f"πŸ“ˆ *Similarity:* {row['similarity']:.3f}"
movie_text = ' '.join(str(row.get(field, '')) for field in ['fullplot', 'summary', 'tagline'] if field in row)
explanation = generate_explanation(user_keywords, movie_text)
plot = f"<details><summary>πŸ“ Plot</summary>{row['fullplot']}</details>"
blocks.append(f"{title}\n{genres}\n{similarity}\nπŸ” *Matched on:* {explanation}\n{plot}")
return "\n\n---\n\n".join(blocks), df['title'].tolist()
def recommend_movies(user_input, top_k, use_example):
if use_example:
user_input = "A group of unlikely heroes band together to save the galaxy from a powerful villain."
if not user_input.strip():
return "⚠️ Please enter a description.", gr.update(choices=[], value="None")
user_emb = encode_input_text(user_input)
top_df = compute_similar_movies(user_emb, df, top_k=top_k)
result_text, titles = format_movie_output(top_df, user_input)
return result_text, gr.update(choices=titles, value=titles[0] if titles else "None")
def clear_all():
return "", 5, False, "", gr.update(choices=[], value="None")
# === Gradio UI ===
with gr.Blocks() as iface:
gr.Markdown("## 🎬 Movie Recommender | Built By: Barak, Shani, Ido and Bar")
with gr.Row():
with gr.Column(scale=1):
user_input = gr.Textbox(label="Describe a Movie Plot", lines=4, placeholder="e.g. A team of misfits saves the world.")
use_example = gr.Checkbox(label="Use Example Description")
top_k = gr.Slider(1, 10, value=5, step=1, label="Top K Recommendations")
recommend_btn = gr.Button("πŸ” Recommend")
clear_btn = gr.Button("🧹 Clear")
gr.Markdown("### πŸ™‹ Feedback")
feedback_title = gr.Dropdown(choices=[], label="Select Movie to Rate")
feedback_choice = gr.Radio(["πŸ‘", "πŸ‘Ž"], label="Your Feedback")
feedback_btn = gr.Button("Submit Feedback")
feedback_output = gr.Textbox(label="Feedback Result", interactive=False)
with gr.Column(scale=2):
output = gr.Markdown(label="Recommendations")
recommend_btn.click(
fn=recommend_movies,
inputs=[user_input, top_k, use_example],
outputs=[output, feedback_title]
)
clear_btn.click(
fn=clear_all,
inputs=[],
outputs=[user_input, top_k, use_example, output, feedback_title]
)
feedback_btn.click(
fn=submit_feedback,
inputs=[feedback_title, feedback_choice],
outputs=feedback_output
)
iface.launch()