Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sentence_transformers import SentenceTransformer | |
| from datasets import load_dataset | |
| # === Load dataset from Hugging Face === | |
| raw_data = load_dataset("MongoDB/embedded_movies") | |
| df = pd.DataFrame(raw_data["train"]) | |
| # Keep only relevant columns | |
| df = df[["title", "fullplot"]].dropna().reset_index(drop=True) | |
| # === Load model and compute embeddings === | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| df["fullplot_embedding"] = model.encode(df["fullplot"].tolist(), show_progress_bar=True) | |
| # === Helper Functions === | |
| def encode_input_text(text): | |
| return model.encode([text])[0] | |
| def extract_keywords(text): | |
| words = re.findall(r'\b\w{5,}\b', text.lower()) | |
| return set(words) | |
| def generate_explanation(user_keywords, movie_text): | |
| movie_words = extract_keywords(movie_text) | |
| matched = user_keywords & movie_words | |
| return ", ".join(sorted(matched)) if matched else "No strong keyword match" | |
| def compute_similar_movies(user_embedding, top_k=5): | |
| movie_embeddings = np.stack(df["fullplot_embedding"].values) | |
| similarities = cosine_similarity([user_embedding], movie_embeddings)[0] | |
| df["similarity"] = similarities | |
| top_df = df.sort_values("similarity", ascending=False).head(top_k) | |
| return top_df | |
| # === Gradio UI === | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π¬ Movie Recommender Engine with Explanation") | |
| input_box = gr.Textbox(label="Enter your plot idea", placeholder="e.g., dreams within dreams") | |
| output_box = gr.Textbox(label="Top Recommendations", lines=8) | |
| recommend_button = gr.Button("Recommend") | |
| def recommend(text): | |
| user_embedding = encode_input_text(text) | |
| user_keywords = extract_keywords(text) | |
| top_df = compute_similar_movies(user_embedding) | |
| results = [] | |
| for _, row in top_df.iterrows(): | |
| explanation = generate_explanation(user_keywords, row["fullplot"]) | |
| results.append(f"π¬ {row['title']} (score={row['similarity']:.2f})\nπ {explanation}") | |
| return "\n\n".join(results) | |
| recommend_button.click(fn=recommend, inputs=[input_box], outputs=[output_box]) | |
| demo.launch() | |