File size: 2,241 Bytes
97be09c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import pandas as pd
import numpy as np
import re
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

# === Load dataset from Hugging Face ===
raw_data = load_dataset("MongoDB/embedded_movies") 
df = pd.DataFrame(raw_data["train"])

# Keep only relevant columns
df = df[["title", "fullplot"]].dropna().reset_index(drop=True)

# === Load model and compute embeddings ===
model = SentenceTransformer("all-MiniLM-L6-v2")
df["fullplot_embedding"] = model.encode(df["fullplot"].tolist(), show_progress_bar=True)

# === Helper Functions ===
def encode_input_text(text):
    return model.encode([text])[0]

def extract_keywords(text):
    words = re.findall(r'\b\w{5,}\b', text.lower())
    return set(words)

def generate_explanation(user_keywords, movie_text):
    movie_words = extract_keywords(movie_text)
    matched = user_keywords & movie_words
    return ", ".join(sorted(matched)) if matched else "No strong keyword match"

def compute_similar_movies(user_embedding, top_k=5):
    movie_embeddings = np.stack(df["fullplot_embedding"].values)
    similarities = cosine_similarity([user_embedding], movie_embeddings)[0]
    df["similarity"] = similarities
    top_df = df.sort_values("similarity", ascending=False).head(top_k)
    return top_df

# === Gradio UI ===
with gr.Blocks() as demo:
    gr.Markdown("## 🎬 Movie Recommender Engine with Explanation")
    input_box = gr.Textbox(label="Enter your plot idea", placeholder="e.g., dreams within dreams")
    output_box = gr.Textbox(label="Top Recommendations", lines=8)
    recommend_button = gr.Button("Recommend")

    def recommend(text):
        user_embedding = encode_input_text(text)
        user_keywords = extract_keywords(text)
        top_df = compute_similar_movies(user_embedding)

        results = []
        for _, row in top_df.iterrows():
            explanation = generate_explanation(user_keywords, row["fullplot"])
            results.append(f"🎬 {row['title']} (score={row['similarity']:.2f})\n🔍 {explanation}")
        return "\n\n".join(results)

    recommend_button.click(fn=recommend, inputs=[input_box], outputs=[output_box])

demo.launch()