File size: 4,029 Bytes
76fca23
 
0226686
fcd831f
76fca23
 
 
 
 
 
 
0226686
d83b431
76fca23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcd831f
76fca23
 
 
 
 
 
 
 
5ba0958
76fca23
 
 
 
 
fcd831f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76fca23
0226686
5ba0958
 
fcd831f
 
 
d83b431
fcd831f
0226686
 
fcd831f
5ba0958
 
d83b431
 
0226686
5ba0958
 
fcd831f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ba0958
 
0bb0b9b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
Visual Search API - HuggingFace Space
Returns embedding vector for external Pinecone queries
Supports both image and text inputs (Jina CLIP v2 multimodal)
"""

import os
import gradio as gr
import torch
import numpy as np
from PIL import Image
import json

# Model (loaded on first use)
model = None


def load_model():
    """Load Jina CLIP v2 model."""
    global model
    if model is None:
        print("Loading Jina CLIP v2...")
        from transformers import AutoModel
        model = AutoModel.from_pretrained(
            "jinaai/jina-clip-v2",
            trust_remote_code=True
        )
        model.eval()
        print("Model loaded!")
    return model


def get_image_embedding(image: Image.Image) -> list:
    """Generate 512-dim embedding for an image."""
    m = load_model()

    with torch.no_grad():
        emb = m.encode_image(image)
        if hasattr(emb, 'cpu'):
            emb = emb.cpu().numpy()
        emb = emb.flatten()
        emb = emb / np.linalg.norm(emb)
        if len(emb) > 512:
            emb = emb[:512]
        return emb.tolist()


def get_text_embedding(text: str) -> list:
    """Generate 512-dim embedding for a text query."""
    m = load_model()

    with torch.no_grad():
        emb = m.encode_text([text])
        if hasattr(emb, 'cpu'):
            emb = emb.cpu().numpy()
        emb = emb.flatten()
        emb = emb / np.linalg.norm(emb)
        if len(emb) > 512:
            emb = emb[:512]
        return emb.tolist()


def image_search(image):
    """Return image embedding vector as JSON."""
    if image is None:
        return json.dumps({"error": "No image provided"})

    try:
        print("Generating image embedding...")
        embedding = get_image_embedding(image)
        print(f"Image embedding generated: {len(embedding)} dimensions")

        return json.dumps({
            "embedding": embedding,
            "dimensions": len(embedding)
        }, indent=2)

    except Exception as e:
        import traceback
        traceback.print_exc()
        return json.dumps({"error": str(e)})


def text_search(text):
    """Return text embedding vector as JSON."""
    if not text or not text.strip():
        return json.dumps({"error": "No text provided"})

    try:
        text = text.strip()[:200]
        print(f"Generating text embedding for: {text}")
        embedding = get_text_embedding(text)
        print(f"Text embedding generated: {len(embedding)} dimensions")

        return json.dumps({
            "embedding": embedding,
            "dimensions": len(embedding)
        }, indent=2)

    except Exception as e:
        import traceback
        traceback.print_exc()
        return json.dumps({"error": str(e)})


# Gradio Blocks with explicit api_name for stable endpoints
# Image: /call/predict (backward compatible with existing image-search.py)
# Text:  /call/text_search (new endpoint for text-search.py)
with gr.Blocks(title="Visual Search - Embedding Generator") as demo:
    gr.Markdown("# Visual Search - Embedding Generator")
    gr.Markdown("Upload an image or enter text to get a 512-dimensional CLIP embedding.")

    with gr.Tab("Image Search"):
        image_input = gr.Image(type="pil", label="Upload Image")
        image_output = gr.Textbox(label="Embedding Vector (JSON)", lines=15)
        image_btn = gr.Button("Generate Embedding")
        image_btn.click(
            image_search,
            inputs=image_input,
            outputs=image_output,
            api_name="predict"
        )

    with gr.Tab("Text Search"):
        text_input = gr.Textbox(
            label="Search Query",
            placeholder="e.g. boys underwear",
            lines=1
        )
        text_output = gr.Textbox(label="Embedding Vector (JSON)", lines=15)
        text_btn = gr.Button("Generate Embedding")
        text_btn.click(
            text_search,
            inputs=text_input,
            outputs=text_output,
            api_name="text_search"
        )

if __name__ == "__main__":
    demo.queue().launch()