Text Classification
sentence-transformers
Safetensors
multilingual
cross-encoder
reranker
affiliation-matching
scholarly-metadata
custom_code
Instructions to use cometadata/jina-reranker-v2-multilingual-affiliations-comet-training-only with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use cometadata/jina-reranker-v2-multilingual-affiliations-comet-training-only with sentence-transformers:
from sentence_transformers import CrossEncoder model = CrossEncoder("cometadata/jina-reranker-v2-multilingual-affiliations-comet-training-only", trust_remote_code=True) query = "Which planet is known as the Red Planet?" passages = [ "Venus is often called Earth's twin because of its similar size and proximity.", "Mars, known for its reddish appearance, is often referred to as the Red Planet.", "Jupiter, the largest planet in our solar system, has a prominent red spot.", "Saturn, famous for its rings, is sometimes mistaken for the Red Planet." ] scores = model.predict([(query, passage) for passage in passages]) print(scores) - Notebooks
- Google Colab
- Kaggle
File size: 2,728 Bytes
68e6b68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | from transformers import PretrainedConfig
import torch
class XLMRobertaFlashConfig(PretrainedConfig):
def __init__(
self,
vocab_size=30522,
hidden_size=768,
num_hidden_layers=12,
num_attention_heads=12,
intermediate_size=3072,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=2,
initializer_range=0.02,
layer_norm_eps=1e-12,
pad_token_id=1,
bos_token_id=0,
eos_token_id=2,
position_embedding_type="absolute",
use_cache=True,
classifier_dropout=None,
lora_adaptations=None,
lora_rank=4,
lora_dropout_p=0.0,
lora_alpha=1,
lora_main_params_trainable=False,
load_trained_adapters=False,
use_flash_attn=True,
torch_dtype=None,
emb_pooler=None,
matryoshka_dimensions=None,
truncate_dim=None,
**kwargs,
):
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.hidden_act = hidden_act
self.intermediate_size = intermediate_size
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.initializer_range = initializer_range
self.layer_norm_eps = layer_norm_eps
self.position_embedding_type = position_embedding_type
self.use_cache = use_cache
self.classifier_dropout = classifier_dropout
self.load_trained_adapters = load_trained_adapters
self.lora_adaptations = lora_adaptations
self.lora_rank = lora_rank
self.lora_dropout_p = lora_dropout_p
self.lora_alpha = lora_alpha
self.lora_main_params_trainable = lora_main_params_trainable
self.use_flash_attn = use_flash_attn
self.emb_pooler = emb_pooler
self.matryoshka_dimensions = matryoshka_dimensions
self.truncate_dim = truncate_dim
if torch_dtype and hasattr(torch, torch_dtype) and type(getattr(torch, torch_dtype)) is torch.dtype:
self.torch_dtype = getattr(torch, torch_dtype)
else:
self.torch_dtype = torch_dtype
|