from datasets import load_dataset from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import gradio as gr import numpy as np import pandas as pd import re # === Load dataset from Hugging Face === raw_data = load_dataset("MongoDB/embedded_movies") # Replace with your actual dataset if needed df = pd.DataFrame(raw_data["train"]) # Keep only relevant columns (include genres/summary/tagline if they exist) columns_to_keep = ["title", "fullplot", "genres", "summary", "tagline"] df = df[[col for col in columns_to_keep if col in df.columns]].dropna(subset=["fullplot"]).reset_index(drop=True) # === Load model and compute embeddings === model = SentenceTransformer("all-MiniLM-L6-v2") df["fullplot_embedding"] = list(model.encode(df["fullplot"].tolist(), show_progress_bar=True)) # === Backend Functions === def encode_input_text(text): return model.encode([text])[0] def extract_keywords(text): words = re.findall(r'\b\w{5,}\b', text.lower()) return set(words) def generate_explanation(user_keywords, movie_text): movie_words = extract_keywords(movie_text) matched = user_keywords & movie_words return ", ".join(sorted(matched)) if matched else "No strong keyword match" def compute_similar_movies(user_embedding, df, top_k=5): valid_df = df[df['fullplot_embedding'].notnull()].copy() movie_embeddings = np.stack(valid_df['fullplot_embedding'].values) similarities = cosine_similarity([user_embedding], movie_embeddings)[0] valid_df['similarity'] = similarities top_indices = similarities.argsort()[::-1][:top_k] return valid_df.iloc[top_indices] feedback_list = [] def submit_feedback(movie_title, feedback): feedback_list.append((movie_title, feedback)) print(f"Feedback received: {movie_title} => {feedback}") return f"โœ… Feedback received for: {movie_title}" def format_movie_output(df, user_input): user_keywords = extract_keywords(user_input) blocks = [] for _, row in df.iterrows(): title = f"โญ **{row['title']}**" genres = f"๐ŸŽญ *Genres:* {', '.join(row['genres']) if isinstance(row['genres'], list) else row.get('genres', '')}" similarity = f"๐Ÿ“ˆ *Similarity:* {row['similarity']:.3f}" movie_text = ' '.join(str(row.get(field, '')) for field in ['fullplot', 'summary', 'tagline'] if field in row) explanation = generate_explanation(user_keywords, movie_text) plot = f"
๐Ÿ“ Plot{row['fullplot']}
" blocks.append(f"{title}\n{genres}\n{similarity}\n๐Ÿ” *Matched on:* {explanation}\n{plot}") return "\n\n---\n\n".join(blocks), df['title'].tolist() def recommend_movies(user_input, top_k, use_example): if use_example: user_input = "A group of unlikely heroes band together to save the galaxy from a powerful villain." if not user_input.strip(): return "โš ๏ธ Please enter a description.", gr.update(choices=[], value="None") user_emb = encode_input_text(user_input) top_df = compute_similar_movies(user_emb, df, top_k=top_k) result_text, titles = format_movie_output(top_df, user_input) return result_text, gr.update(choices=titles, value=titles[0] if titles else "None") def clear_all(): return "", 5, False, "", gr.update(choices=[], value="None") # === Gradio UI === with gr.Blocks() as iface: gr.Markdown("## ๐ŸŽฌ Movie Recommender | Built By: Barak, Shani, Ido and Bar") with gr.Row(): with gr.Column(scale=1): user_input = gr.Textbox(label="Describe a Movie Plot", lines=4, placeholder="e.g. A team of misfits saves the world.") use_example = gr.Checkbox(label="Use Example Description") top_k = gr.Slider(1, 10, value=5, step=1, label="Top K Recommendations") recommend_btn = gr.Button("๐Ÿ” Recommend") clear_btn = gr.Button("๐Ÿงน Clear") gr.Markdown("### ๐Ÿ™‹ Feedback") feedback_title = gr.Dropdown(choices=[], label="Select Movie to Rate") feedback_choice = gr.Radio(["๐Ÿ‘", "๐Ÿ‘Ž"], label="Your Feedback") feedback_btn = gr.Button("Submit Feedback") feedback_output = gr.Textbox(label="Feedback Result", interactive=False) with gr.Column(scale=2): output = gr.Markdown(label="Recommendations") recommend_btn.click( fn=recommend_movies, inputs=[user_input, top_k, use_example], outputs=[output, feedback_title] ) clear_btn.click( fn=clear_all, inputs=[], outputs=[user_input, top_k, use_example, output, feedback_title] ) feedback_btn.click( fn=submit_feedback, inputs=[feedback_title, feedback_choice], outputs=feedback_output ) iface.launch()