Spaces:
Running on Zero
Running on Zero
File size: 4,029 Bytes
da0fb08 ec0ee7c 4f9bac9 da0fb08 ec0ee7c 869bae6 da0fb08 4f9bac9 da0fb08 2d37781 da0fb08 4f9bac9 da0fb08 ec0ee7c 2d37781 4f9bac9 869bae6 4f9bac9 ec0ee7c 4f9bac9 2d37781 869bae6 ec0ee7c 2d37781 4f9bac9 2d37781 d9e5501 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """
Visual Search API - HuggingFace Space
Returns embedding vector for external Pinecone queries
Supports both image and text inputs (Jina CLIP v2 multimodal)
"""
import os
import gradio as gr
import torch
import numpy as np
from PIL import Image
import json
# Model (loaded on first use)
model = None
def load_model():
"""Load Jina CLIP v2 model."""
global model
if model is None:
print("Loading Jina CLIP v2...")
from transformers import AutoModel
model = AutoModel.from_pretrained(
"jinaai/jina-clip-v2",
trust_remote_code=True
)
model.eval()
print("Model loaded!")
return model
def get_image_embedding(image: Image.Image) -> list:
"""Generate 512-dim embedding for an image."""
m = load_model()
with torch.no_grad():
emb = m.encode_image(image)
if hasattr(emb, 'cpu'):
emb = emb.cpu().numpy()
emb = emb.flatten()
emb = emb / np.linalg.norm(emb)
if len(emb) > 512:
emb = emb[:512]
return emb.tolist()
def get_text_embedding(text: str) -> list:
"""Generate 512-dim embedding for a text query."""
m = load_model()
with torch.no_grad():
emb = m.encode_text([text])
if hasattr(emb, 'cpu'):
emb = emb.cpu().numpy()
emb = emb.flatten()
emb = emb / np.linalg.norm(emb)
if len(emb) > 512:
emb = emb[:512]
return emb.tolist()
def image_search(image):
"""Return image embedding vector as JSON."""
if image is None:
return json.dumps({"error": "No image provided"})
try:
print("Generating image embedding...")
embedding = get_image_embedding(image)
print(f"Image embedding generated: {len(embedding)} dimensions")
return json.dumps({
"embedding": embedding,
"dimensions": len(embedding)
}, indent=2)
except Exception as e:
import traceback
traceback.print_exc()
return json.dumps({"error": str(e)})
def text_search(text):
"""Return text embedding vector as JSON."""
if not text or not text.strip():
return json.dumps({"error": "No text provided"})
try:
text = text.strip()[:200]
print(f"Generating text embedding for: {text}")
embedding = get_text_embedding(text)
print(f"Text embedding generated: {len(embedding)} dimensions")
return json.dumps({
"embedding": embedding,
"dimensions": len(embedding)
}, indent=2)
except Exception as e:
import traceback
traceback.print_exc()
return json.dumps({"error": str(e)})
# Gradio Blocks with explicit api_name for stable endpoints
# Image: /call/predict (backward compatible with existing image-search.py)
# Text: /call/text_search (new endpoint for text-search.py)
with gr.Blocks(title="Visual Search - Embedding Generator") as demo:
gr.Markdown("# Visual Search - Embedding Generator")
gr.Markdown("Upload an image or enter text to get a 512-dimensional CLIP embedding.")
with gr.Tab("Image Search"):
image_input = gr.Image(type="pil", label="Upload Image")
image_output = gr.Textbox(label="Embedding Vector (JSON)", lines=15)
image_btn = gr.Button("Generate Embedding")
image_btn.click(
image_search,
inputs=image_input,
outputs=image_output,
api_name="predict"
)
with gr.Tab("Text Search"):
text_input = gr.Textbox(
label="Search Query",
placeholder="e.g. boys underwear",
lines=1
)
text_output = gr.Textbox(label="Embedding Vector (JSON)", lines=15)
text_btn = gr.Button("Generate Embedding")
text_btn.click(
text_search,
inputs=text_input,
outputs=text_output,
api_name="text_search"
)
if __name__ == "__main__":
demo.queue().launch()
|