import gradio as gr import pandas as pd import numpy as np import re from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer from datasets import load_dataset # === Load dataset from Hugging Face === raw_data = load_dataset("MongoDB/embedded_movies") df = pd.DataFrame(raw_data["train"]) # Keep only relevant columns df = df[["title", "fullplot"]].dropna().reset_index(drop=True) # === Load model and compute embeddings === model = SentenceTransformer("all-MiniLM-L6-v2") df["fullplot_embedding"] = model.encode(df["fullplot"].tolist(), show_progress_bar=True) # === Helper Functions === def encode_input_text(text): return model.encode([text])[0] def extract_keywords(text): words = re.findall(r'\b\w{5,}\b', text.lower()) return set(words) def generate_explanation(user_keywords, movie_text): movie_words = extract_keywords(movie_text) matched = user_keywords & movie_words return ", ".join(sorted(matched)) if matched else "No strong keyword match" def compute_similar_movies(user_embedding, top_k=5): movie_embeddings = np.stack(df["fullplot_embedding"].values) similarities = cosine_similarity([user_embedding], movie_embeddings)[0] df["similarity"] = similarities top_df = df.sort_values("similarity", ascending=False).head(top_k) return top_df # === Gradio UI === with gr.Blocks() as demo: gr.Markdown("## šŸŽ¬ Movie Recommender Engine with Explanation") input_box = gr.Textbox(label="Enter your plot idea", placeholder="e.g., dreams within dreams") output_box = gr.Textbox(label="Top Recommendations", lines=8) recommend_button = gr.Button("Recommend") def recommend(text): user_embedding = encode_input_text(text) user_keywords = extract_keywords(text) top_df = compute_similar_movies(user_embedding) results = [] for _, row in top_df.iterrows(): explanation = generate_explanation(user_keywords, row["fullplot"]) results.append(f"šŸŽ¬ {row['title']} (score={row['similarity']:.2f})\nšŸ” {explanation}") return "\n\n".join(results) recommend_button.click(fn=recommend, inputs=[input_box], outputs=[output_box]) demo.launch()