Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import re | |
| # === Load dataset from Hugging Face === | |
| raw_data = load_dataset("MongoDB/embedded_movies") # Replace with your actual dataset if needed | |
| df = pd.DataFrame(raw_data["train"]) | |
| # Keep only relevant columns (include genres/summary/tagline if they exist) | |
| columns_to_keep = ["title", "fullplot", "genres", "summary", "tagline"] | |
| df = df[[col for col in columns_to_keep if col in df.columns]].dropna(subset=["fullplot"]).reset_index(drop=True) | |
| # === Load model and compute embeddings === | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| df["fullplot_embedding"] = list(model.encode(df["fullplot"].tolist(), show_progress_bar=True)) | |
| # === Backend Functions === | |
| def encode_input_text(text): | |
| return model.encode([text])[0] | |
| def extract_keywords(text): | |
| words = re.findall(r'\b\w{5,}\b', text.lower()) | |
| return set(words) | |
| def generate_explanation(user_keywords, movie_text): | |
| movie_words = extract_keywords(movie_text) | |
| matched = user_keywords & movie_words | |
| return ", ".join(sorted(matched)) if matched else "No strong keyword match" | |
| def compute_similar_movies(user_embedding, df, top_k=5): | |
| valid_df = df[df['fullplot_embedding'].notnull()].copy() | |
| movie_embeddings = np.stack(valid_df['fullplot_embedding'].values) | |
| similarities = cosine_similarity([user_embedding], movie_embeddings)[0] | |
| valid_df['similarity'] = similarities | |
| top_indices = similarities.argsort()[::-1][:top_k] | |
| return valid_df.iloc[top_indices] | |
| feedback_list = [] | |
| def submit_feedback(movie_title, feedback): | |
| feedback_list.append((movie_title, feedback)) | |
| print(f"Feedback received: {movie_title} => {feedback}") | |
| return f"β Feedback received for: {movie_title}" | |
| def format_movie_output(df, user_input): | |
| user_keywords = extract_keywords(user_input) | |
| blocks = [] | |
| for _, row in df.iterrows(): | |
| title = f"β **{row['title']}**" | |
| genres = f"π *Genres:* {', '.join(row['genres']) if isinstance(row['genres'], list) else row.get('genres', '')}" | |
| similarity = f"π *Similarity:* {row['similarity']:.3f}" | |
| movie_text = ' '.join(str(row.get(field, '')) for field in ['fullplot', 'summary', 'tagline'] if field in row) | |
| explanation = generate_explanation(user_keywords, movie_text) | |
| plot = f"<details><summary>π Plot</summary>{row['fullplot']}</details>" | |
| blocks.append(f"{title}\n{genres}\n{similarity}\nπ *Matched on:* {explanation}\n{plot}") | |
| return "\n\n---\n\n".join(blocks), df['title'].tolist() | |
| def recommend_movies(user_input, top_k, use_example): | |
| if use_example: | |
| user_input = "A group of unlikely heroes band together to save the galaxy from a powerful villain." | |
| if not user_input.strip(): | |
| return "β οΈ Please enter a description.", gr.update(choices=[], value="None") | |
| user_emb = encode_input_text(user_input) | |
| top_df = compute_similar_movies(user_emb, df, top_k=top_k) | |
| result_text, titles = format_movie_output(top_df, user_input) | |
| return result_text, gr.update(choices=titles, value=titles[0] if titles else "None") | |
| def clear_all(): | |
| return "", 5, False, "", gr.update(choices=[], value="None") | |
| # === Gradio UI === | |
| with gr.Blocks() as iface: | |
| gr.Markdown("## π¬ Movie Recommender | Built By: Barak, Shani, Ido and Bar") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| user_input = gr.Textbox(label="Describe a Movie Plot", lines=4, placeholder="e.g. A team of misfits saves the world.") | |
| use_example = gr.Checkbox(label="Use Example Description") | |
| top_k = gr.Slider(1, 10, value=5, step=1, label="Top K Recommendations") | |
| recommend_btn = gr.Button("π Recommend") | |
| clear_btn = gr.Button("π§Ή Clear") | |
| gr.Markdown("### π Feedback") | |
| feedback_title = gr.Dropdown(choices=[], label="Select Movie to Rate") | |
| feedback_choice = gr.Radio(["π", "π"], label="Your Feedback") | |
| feedback_btn = gr.Button("Submit Feedback") | |
| feedback_output = gr.Textbox(label="Feedback Result", interactive=False) | |
| with gr.Column(scale=2): | |
| output = gr.Markdown(label="Recommendations") | |
| recommend_btn.click( | |
| fn=recommend_movies, | |
| inputs=[user_input, top_k, use_example], | |
| outputs=[output, feedback_title] | |
| ) | |
| clear_btn.click( | |
| fn=clear_all, | |
| inputs=[], | |
| outputs=[user_input, top_k, use_example, output, feedback_title] | |
| ) | |
| feedback_btn.click( | |
| fn=submit_feedback, | |
| inputs=[feedback_title, feedback_choice], | |
| outputs=feedback_output | |
| ) | |
| iface.launch() | |