Spaces:
Running on Zero
Running on Zero
| """ | |
| Visual Search API - HuggingFace Space | |
| Returns embedding vector for external Pinecone queries | |
| Supports both image and text inputs (Jina CLIP v2 multimodal) | |
| """ | |
| import os | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| import json | |
| # Model (loaded on first use) | |
| model = None | |
| def load_model(): | |
| """Load Jina CLIP v2 model.""" | |
| global model | |
| if model is None: | |
| print("Loading Jina CLIP v2...") | |
| from transformers import AutoModel | |
| model = AutoModel.from_pretrained( | |
| "jinaai/jina-clip-v2", | |
| trust_remote_code=True | |
| ) | |
| model.eval() | |
| print("Model loaded!") | |
| return model | |
| def get_image_embedding(image: Image.Image) -> list: | |
| """Generate 512-dim embedding for an image.""" | |
| m = load_model() | |
| with torch.no_grad(): | |
| emb = m.encode_image(image) | |
| if hasattr(emb, 'cpu'): | |
| emb = emb.cpu().numpy() | |
| emb = emb.flatten() | |
| emb = emb / np.linalg.norm(emb) | |
| if len(emb) > 512: | |
| emb = emb[:512] | |
| return emb.tolist() | |
| def get_text_embedding(text: str) -> list: | |
| """Generate 512-dim embedding for a text query.""" | |
| m = load_model() | |
| with torch.no_grad(): | |
| emb = m.encode_text([text]) | |
| if hasattr(emb, 'cpu'): | |
| emb = emb.cpu().numpy() | |
| emb = emb.flatten() | |
| emb = emb / np.linalg.norm(emb) | |
| if len(emb) > 512: | |
| emb = emb[:512] | |
| return emb.tolist() | |
| def image_search(image): | |
| """Return image embedding vector as JSON.""" | |
| if image is None: | |
| return json.dumps({"error": "No image provided"}) | |
| try: | |
| print("Generating image embedding...") | |
| embedding = get_image_embedding(image) | |
| print(f"Image embedding generated: {len(embedding)} dimensions") | |
| return json.dumps({ | |
| "embedding": embedding, | |
| "dimensions": len(embedding) | |
| }, indent=2) | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return json.dumps({"error": str(e)}) | |
| def text_search(text): | |
| """Return text embedding vector as JSON.""" | |
| if not text or not text.strip(): | |
| return json.dumps({"error": "No text provided"}) | |
| try: | |
| text = text.strip()[:200] | |
| print(f"Generating text embedding for: {text}") | |
| embedding = get_text_embedding(text) | |
| print(f"Text embedding generated: {len(embedding)} dimensions") | |
| return json.dumps({ | |
| "embedding": embedding, | |
| "dimensions": len(embedding) | |
| }, indent=2) | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return json.dumps({"error": str(e)}) | |
| # Gradio Blocks with explicit api_name for stable endpoints | |
| # Image: /call/predict (backward compatible with existing image-search.py) | |
| # Text: /call/text_search (new endpoint for text-search.py) | |
| with gr.Blocks(title="Visual Search - Embedding Generator") as demo: | |
| gr.Markdown("# Visual Search - Embedding Generator") | |
| gr.Markdown("Upload an image or enter text to get a 512-dimensional CLIP embedding.") | |
| with gr.Tab("Image Search"): | |
| image_input = gr.Image(type="pil", label="Upload Image") | |
| image_output = gr.Textbox(label="Embedding Vector (JSON)", lines=15) | |
| image_btn = gr.Button("Generate Embedding") | |
| image_btn.click( | |
| image_search, | |
| inputs=image_input, | |
| outputs=image_output, | |
| api_name="predict" | |
| ) | |
| with gr.Tab("Text Search"): | |
| text_input = gr.Textbox( | |
| label="Search Query", | |
| placeholder="e.g. boys underwear", | |
| lines=1 | |
| ) | |
| text_output = gr.Textbox(label="Embedding Vector (JSON)", lines=15) | |
| text_btn = gr.Button("Generate Embedding") | |
| text_btn.click( | |
| text_search, | |
| inputs=text_input, | |
| outputs=text_output, | |
| api_name="text_search" | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |