# /// script # requires-python = ">=3.10" # dependencies = [ # "transformers", # "huggingface-hub", # "numpy", # "requests", # "torch", # ] # /// # colbert-rerank.py from transformers import AutoTokenizer from huggingface_hub import hf_hub_download import numpy as np, requests, torch, torch.nn.functional as F, json model_id = "LiquidAI/LFM2.5-ColBERT-350M" tokenizer = AutoTokenizer.from_pretrained(model_id) config = json.load(open(hf_hub_download(model_id, "config_sentence_transformers.json"))) skiplist = set( t for w in config["skiplist_words"] for t in tokenizer.encode(w, add_special_tokens=False) ) def maxsim(q, d): return (q @ d.T).max(dim=1).values.sum().item() def preprocess(text, is_query): prefix = config["query_prefix"] if is_query else config["document_prefix"] toks = tokenizer.encode(prefix + text) max_len = config["query_length"] if is_query else config["document_length"] if is_query: toks += [tokenizer.pad_token_id] * (max_len - len(toks)) else: toks = toks[:max_len] mask = None if is_query else [t not in skiplist for t in toks] return toks, mask def embed(content, mask=None): emb = np.array( requests.post( "http://localhost:8080/embedding", json={"content": content}, ).json()[0]["embedding"] ) if mask: emb = emb[mask] emb = torch.from_numpy(emb) emb = F.normalize(emb, p=2, dim=-1) # L2 normalize each token embedding return emb.unsqueeze(0) docs = [ "hi", "it is a bear", "The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.", ] query = "What is panda?" q = embed(*preprocess(query, True)) d = [embed(*preprocess(doc, False)) for doc in docs] s = [(query, doc, maxsim(q.squeeze(), di.squeeze())) for doc, di in zip(docs, d)] for q_text, d_text, score in s: print(f"Score: {score:.2f} | Q: {q_text} | D: {d_text}")