""" Jina Reranker API Adapter Calls Jina AI's cloud reranker API instead of running the model locally. Same jina-reranker-v3 model, but runs on Jina's GPU servers. Benefits over self-hosted: - ~300ms latency (vs ~6s/doc on CPU) - No model download or GPU needed - Same API key as Jina Reader (unified token balance) - Production-ready immediately API: POST https://api.jina.ai/v1/rerank Docs: https://jina.ai/reranker Free: 1M tokens on signup (same key as Reader) """ import logging import time import httpx from typing import List, Dict, Any, Optional from src.core.ports.reranker_port import RerankerPort logger = logging.getLogger(__name__) class JinaRerankerAPIAdapter(RerankerPort): """ Reranker using Jina AI's cloud API. Sends all documents in ONE API call — Jina handles batching server-side. Falls back to score-based ordering if API fails. Token usage: query_tokens + sum(doc_tokens) Typical: ~1,400 tokens per call with 7 docs × 200 chars each """ API_URL = "https://api.jina.ai/v1/rerank" def __init__( self, api_key: str, model: str = "jina-reranker-v3", timeout: float = 5.0, ): self.api_key = api_key self.model = model self.timeout = timeout self._client: Optional[httpx.Client] = None if not api_key or api_key in ("", "your-jina-api-key-here"): logger.warning("Jina Reranker API: no API key — adapter disabled") self.api_key = None else: logger.info(f"Jina Reranker API ready (model={model}, timeout={timeout}s)") def _get_client(self) -> httpx.Client: if self._client is None: self._client = httpx.Client( timeout=self.timeout, headers={ "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "Accept": "application/json", } ) return self._client def rerank( self, query: str, docs: List[Dict[str, Any]], top_n: int = 5, ) -> List[Dict[str, Any]]: """ Rerank documents using Jina API. Sends all docs in one request — Jina returns them sorted by relevance. Falls back to vector score ordering if API unavailable. """ if not docs: return [] if not self.api_key: logger.warning("[JinaReranker] API disabled — falling back to score ordering") return sorted(docs, key=lambda x: x.get("score", 0), reverse=True)[:top_n] MAX_CHARS = 2048 valid_docs = [] doc_texts = [] for doc in docs: content = doc.get("content", "").strip() if content: doc_texts.append(content[:MAX_CHARS]) valid_docs.append(doc) if not doc_texts: return [] logger.info( f"[JinaReranker] Reranking {len(valid_docs)} docs for query: '{query[:60]}'" ) for i, doc in enumerate(valid_docs[:5], 1): chars = len(doc.get("content", "")) src = doc.get("source") or doc.get("metadata", {}).get("source", "?") logger.info(f"[JinaReranker] Input #{i}: {chars:,} chars — src={src} — {doc.get('title', doc.get('content',''))[:60]}") t0 = time.time() try: response = self._get_client().post( self.API_URL, json={ "model": self.model, "query": query, "documents": doc_texts, "top_n": len(doc_texts), "return_documents": False, } ) elapsed_ms = (time.time() - t0) * 1000 if response.status_code == 200: data = response.json() results = data.get("results", []) usage = data.get("usage", {}) for r in results: idx = r["index"] if idx < len(valid_docs): valid_docs[idx]["rerank_score"] = float(r["relevance_score"]) valid_docs.sort(key=lambda x: x.get("rerank_score", 0), reverse=True) logger.info( f"[JinaReranker] Done in {elapsed_ms:.0f}ms — " f"{len(valid_docs)} docs ranked, tokens={usage.get('total_tokens', '?')}" ) for i, doc in enumerate(valid_docs[:top_n], 1): score = doc.get("rerank_score", 0) src = doc.get("source") or doc.get("metadata", {}).get("source", "?") title = doc.get("title") or doc.get("content", "")[:60] logger.info(f"[JinaReranker] Rank #{i}: score={score:.4f} src={src} — {title[:60]}") return valid_docs[:top_n] elif response.status_code == 401: logger.error("[JinaReranker] Invalid API key") elif response.status_code == 429: logger.warning("[JinaReranker] Rate limit exceeded") elif response.status_code == 402: logger.warning("[JinaReranker] Insufficient tokens — top up at jina.ai") else: logger.warning(f"[JinaReranker] HTTP {response.status_code} — {response.text[:200]}") except httpx.TimeoutException: logger.warning(f"[JinaReranker] Timeout ({self.timeout}s)") except Exception as e: logger.error(f"[JinaReranker] Error: {e}") logger.warning("[JinaReranker] API failed — falling back to vector score ordering") return sorted(docs, key=lambda x: x.get("score", 0), reverse=True)[:top_n] def is_available(self) -> bool: return self.api_key is not None