import gradio as gr import numpy as np import pandas as pd import re from sklearn.metrics.pairwise import cosine_similarity # === Helper Functions === def encode_input_text(text, model): return model.encode([text])[0] def extract_keywords(text): words = re.findall(r'\b\w{5,}\b', text.lower()) return set(words) def generate_explanation(user_keywords, movie_text): movie_words = extract_keywords(movie_text) matched = user_keywords & movie_words return ", ".join(sorted(matched)) if matched else "No strong keyword match" def compute_similar_movies(user_embedding, df, top_k=5): valid_df = df[df['fullplot_embedding'].notnull()].copy() movie_embeddings = np.stack(valid_df['fullplot_embedding'].values) similarities = cosine_similarity([user_embedding], movie_embeddings)[0] valid_df['similarity'] = similarities top_indices = similarities.argsort()[::-1][:top_k] return valid_df.iloc[top_indices] feedback_list = [] def submit_feedback(movie_title, feedback): feedback_list.append((movie_title, feedback)) print(f"Feedback received: {movie_title} => {feedback}") return f"โœ… Feedback received for: {movie_title}" def format_movie_output(df, user_input): user_keywords = extract_keywords(user_input) blocks = [] for _, row in df.iterrows(): title = f"โญ **{row['title']}**" genres = f"๐ŸŽญ *Genres:* {', '.join(row['genres']) if isinstance(row['genres'], list) else row['genres']}" similarity = f"๐Ÿ“ˆ *Similarity:* {row['similarity']:.3f}" movie_text = ' '.join(str(row.get(field, '')) for field in ['fullplot', 'summary', 'tagline']) explanation = generate_explanation(user_keywords, movie_text) plot = f"
๐Ÿ“ Plot{row['fullplot']}
" blocks.append(f"{title}\n{genres}\n{similarity}\n๐Ÿ” *Matched on:* {explanation}\n{plot}") return "\n\n---\n\n".join(blocks), df['title'].tolist() def recommend_movies(user_input, top_k, use_example): if use_example: user_input = "A group of unlikely heroes band together to save the galaxy from a powerful villain." if not user_input.strip(): return "โš ๏ธ Please enter a description.", gr.update(choices=[], value="None") user_emb = encode_input_text(user_input, model) top_df = compute_similar_movies(user_emb, df, top_k=top_k) result_text, titles = format_movie_output(top_df, user_input) return result_text, gr.update(choices=titles, value=titles[0] if titles else "None") def clear_all(): return "", 5, False, "", gr.update(choices=[], value="None") # === UI === with gr.Blocks() as iface: gr.Markdown("## ๐ŸŽฌ Movie Recommender") with gr.Row(): with gr.Column(scale=1): user_input = gr.Textbox(label="Describe a Movie Plot", lines=4, placeholder="e.g. A team of misfits saves the world.") use_example = gr.Checkbox(label="Use Example Description") top_k = gr.Slider(1, 10, value=5, step=1, label="Top K Recommendations") recommend_btn = gr.Button("๐Ÿ” Recommend") clear_btn = gr.Button("๐Ÿงน Clear") gr.Markdown("### ๐Ÿ™‹ Feedback") feedback_title = gr.Dropdown(choices=[], label="Select Movie to Rate") feedback_choice = gr.Radio(["๐Ÿ‘", "๐Ÿ‘Ž"], label="Your Feedback") feedback_btn = gr.Button("Submit Feedback") feedback_output = gr.Textbox(label="Feedback Result", interactive=False) with gr.Column(scale=2): output = gr.Markdown(label="Recommendations") recommend_btn.click( fn=recommend_movies, inputs=[user_input, top_k, use_example], outputs=[output, feedback_title] ) clear_btn.click( fn=clear_all, inputs=[], outputs=[user_input, top_k, use_example, output, feedback_title] ) feedback_btn.click( fn=submit_feedback, inputs=[feedback_title, feedback_choice], outputs=feedback_output ) iface.launch() # === Gradio UI === with gr.Blocks() as demo: gr.Markdown("## ๐ŸŽฌ Movie Recommendation with Explanation") input_text = gr.Textbox(label="Enter a movie plot description", placeholder="Type a story or plot...") output = gr.Textbox(label="Top recommended movies with explanation", lines=10) btn = gr.Button("Recommend") def full_pipeline(user_text): from sentence_transformers import SentenceTransformer import pickle # Load data and model df = pd.read_pickle("movies_with_embeddings.pkl") model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # Encode user_emb = encode_input_text(user_text, model) user_keywords = extract_keywords(user_text) # Compute recommendations top_df = compute_similar_movies(user_emb, df) results = [] for _, row in top_df.iterrows(): explanation = generate_explanation(user_keywords, row["fullplot"]) results.append(f"๐ŸŽฌ {row['title']} (๐ŸŽฏ score={row['similarity']:.2f})\n๐Ÿ” {explanation}") return "\n\n".join(results) btn.click(fn=full_pipeline, inputs=[input_text], outputs=[output]) demo.launch()