Sentence Similarity
sentence-transformers
Safetensors
feature-extraction
dense
Generated from Trainer
dataset_size:297400
loss:CosineSimilarityLoss
Instructions to use gguichard/matching-rh-peft3 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use gguichard/matching-rh-peft3 with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("gguichard/matching-rh-peft3") sentences = [ "{\"type\": \"opportunity\", \"customer_code\": \"\", \"opportunity_title\": \"#MakeReal#Data - Expertise GCP à la demande\", \"opportunity_place\": \"\", \"opportunity_expertise_area\": \"-1\", \"opportunity_tools\": \"\", \"opportunity_activity_area\": \"\", \"opportunity_type\": \"1\", \"opportunity_description\": \"\", \"opportunity_criteria\": \"\", \"opportunity_extract\": 1}", "{\"type\": \"candidate\", \"customer_code\": \"\", \"title\": \"CONTROLEUSE DE GESTION SENIOR/RAF\", \"skills\": \"\", \"education\": \"\", \"experience\": \"-1\", \"tools\": \"\", \"languages\": \"\", \"mobility\": \"\", \"expertise_area\": \"\", \"activity_area\": \"\", \"list_diplomes\": \"2006 - Master 1 Maîtrise de Sciences Economiques et de Gestion - Marne La Vallée 77 - not provided\", \"typeOf\": \"-1\", \"source\": \"-1\", \"informationComments\": \"\", \"extract\": 1, \"experiences\": \"[{'skills': '', 'startMonth': '', 'endDate': '', 'startYear': '', 'description': \\\"service Cotation en charge de L'analyse de la rentabilité, de la solvabilité et de l'autonomie financière des entreprises L'établissement du diagnostic financier\\\", 'company': '', 'location': '', 'id': '23447', 'title': 'Assistante - BANQUE DE FRANCE - 01/01/1994 - 01/01/1994', 'endMonth': '', 'endYear': '', 'startDate': ''}, {'skills': '', 'startMonth': '', 'endDate': '', 'startYear': '', 'description': '', 'company': '', 'location': '', 'id': '23448', 'title': 'SUDAC Air Service Groupe - AIR LIQUIDE - 01/01/2007 - 01/01/2008', 'endMonth': '', 'endYear': '', 'startDate': ''}, {'skills': '', 'startMonth': '', 'endDate': '', 'startYear': '', 'description': \\\"Responsable du contrôle de gestion et comptabilité auxiliaire charge de L'analyse et le suivi de la rentabilité de trois sociétés et de leurs portefeuilles clients La collecte la consolidation et validation de tableau de bords pour la production La consolidation de données financières pour le suivi budgétaire (mensuel/annuel L'analyse des écarts entre le réalisé et le Budgété 6 Rue du Centre 91 Essonne Tel : 06-29-46-98-74 Mail : ketty58_9@hotmail.com Permis B + Véhicule Téléchargé par TEOLIA (111069) le 06/01/2022 14:10:20 Le suivi du process facturation (comptabilité clients et fournisseurs) L'établissement des rapprochements avec l'expert-comptable et de la clôture La trésorerie et du management de trois (3) personnes\\\", 'company': '', 'location': '', 'id': '23449', 'title': 'Kéthia MICHEL - 01/01/1994 - 01/01/1994', 'endMonth': '', 'endYear': '', 'startDate': ''}, {'skills': '', 'startMonth': '', 'endDate': '', 'startYear': '', 'description': 'logistique de 13 M€ CA/an et effectifs 70 p) - Ivry sur Seine', 'company': '', 'location': '', 'id': '23450', 'title': 'AXELIS+ Société - 01/01/2009 - 01/01/2016', 'endMonth': '', 'endYear': '', 'startDate': ''}, {'skills': '', 'startMonth': '', 'endDate': '', 'startYear': '', 'description': 'informatique de 44 M€/an effectifs 650 p) -St Denis', 'company': '', 'location': '', 'id': '23451', 'title': 'LINKBYNET Société - 01/01/2016 - 01/01/2017', 'endMonth': '', 'endYear': '', 'startDate': ''}, {'skills': '', 'startMonth': '', 'endDate': '', 'startYear': '', 'description': \\\"conseil) -Paris (08) 1 an Contrôleuse de gestion IT détachée chez BPCE-IT et en charge de : La reprise de leur modèle de facturation L'amélioration de leur modèle de facturation L'analyse entre les coûts réels et les coûts Budgétés L'analyse des écarts entre le réalisé et le Budgété\\\", 'company': '', 'location': '', 'id': '23452', 'title': 'RHAPSODIES Société - 01/01/2017 - 01/01/2018', 'endMonth': '', 'endYear': '', 'startDate': ''}, {'skills': '', 'startMonth': '', 'endDate': '', 'startYear': '', 'description': \\\"1 an Consultante en contrôle de gestion en charge de : La construction du PL Mise en place de tableau de bord du suivi de la productivité Mise en place d'indicateur pour le service de facturation Construction d'un budget sur 3 ans L'analyse entre le budgété et le réalisé Support à l'amélioration des process de facturation et comptabilité fournisseurs Support à l'amélioration des enregistrements analytiques et comptables\\\", 'company': '', 'location': '', 'id': '23453', 'title': 'CONSULTANTE - EN CONTROLE DE GESTION - 01/01/2018 - 01/01/2019', 'endMonth': '', 'endYear': '', 'startDate': ''}, {'skills': '', 'startMonth': '', 'endDate': '', 'startYear': '', 'description': \\\"en contrôle de gestion détachée chez GRT GAZ en charge de : L'évolution des coûts et du suivi budgétaire L'évolution des OPEX/CAPEX La mise en place de tableau de bord L'accompagnement des chefs de projet et portefolio dans leur suivi de projet et portefeuille La construction du budget annuel La construction du reporting trimestriel et annuel\\\", 'company': '', 'location': '', 'id': '23454', 'title': 'Consultante - FAO CONSULTING (société de conseil) - Levallois Perret - 01/03/2020', 'endMonth': '', 'endYear': '', 'startDate': ''}]\"}", "{\"type\": \"candidate\", \"customer_code\": \"\", \"title\": \"\", \"skills\": \"CAO, Construction, GESTION, IBM CATIA, IBM CATIA Version 5, Marketing Management, Microsoft, Microsoft Excel, Microsoft PowerPoint, Microsoft Word, Pricing, RAID\", \"education\": \"\", \"experience\": \"0\", \"tools\": \"\", \"languages\": \"\", \"mobility\": \"\", \"expertise_area\": \"\", \"activity_area\": \"commercial\", \"list_diplomes\": \"DUT - ET COMPETENCES - not provided - 1999, DUT - Génie électrique - Université J. Fourier à Grenoble, BAC S - Génie Mécanique et Productique - Université J. Fourier à Grenoble - 1996, DUT - Option technologies industrielles - Lycée Vaucanson à Grenoble - 1999, DUT - Génie électrique - Université J. Fourier à Grenoble\", \"typeOf\": \"-1\", \"source\": \"7\", \"informationComments\": \"\", \"extract\": 1, \"experiences\": \"[{'description': 'Trucks Commercial Vehicle\\\\r\\\\n', 'title': 'Manager Marketing véhicules Construction - Renault'}, {'description': \\\"- Saint-Priest (69) Responsable de l'animation Marketing de la gamme Construction * Réalisation des plateformes Marketing intégrant le contenu de l'offre, l'argumentation commerciale et l'analyse concurrence * Création d'ateliers de présentation des véhicules adaptés aux différents marchés internationaux * Organisation d'événements promotionnels et présentations clients et journalistes * Analyse trimestrielle des ventes par modèles et définition d'actions marketing et pricing * Conception des cahiers des charges formations commerciales Manager Marketing gamme lourde - Renault Trucks International\\\\r\\\\n\\\", 'title': 'Groupe AB Volvo - De - 01/01/2012'}, {'description': 'Trucks Commercial Vehicle\\\\r\\\\n', 'title': 'Manager Marketing véhicules Construction - Renault - 01/02/2000'}, {'description': \\\"- Saint-Priest (69) Responsable de l'animation Marketing de la gamme Construction * Réalisation des plateformes Marketing intégrant le contenu de l'offre, l'argumentation commerciale et l'analyse concurrence * Création d'ateliers de présentation des véhicules adaptés aux différents marchés internationaux * Organisation d'événements promotionnels et présentations clients et journalistes * Analyse trimestrielle des ventes par modèles et définition d'actions marketing et pricing * Conception des cahiers des charges formations commerciales Manager Marketing gamme lourde - Renault Trucks International\\\\r\\\\n\\\", 'title': 'Groupe AB Volvo - De - 01/01/2012'}]\"}", "{\"type\": \"candidate\", \"customer_code\": \"\", \"title\": \"\", \"skills\": \"\", \"education\": \"\", \"experience\": \"-1\", \"tools\": \"\", \"languages\": \"\", \"mobility\": \"\", \"expertise_area\": \"\", \"activity_area\": \"\", \"list_diplomes\": \"\", \"typeOf\": \"0\", \"source\": \"\", \"informationComments\": \"\", \"extract\": 1, \"experiences\": \"[]\"}" ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4] - Notebooks
- Google Colab
- Kaggle
| # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 | |
| # This file was automatically generated from src/transformers/models/eurobert/modular_eurobert.py. | |
| # Do NOT edit this file manually as any edits will be overwritten by the generation of | |
| # the file from the modular. If any change should be done, please apply the change to the | |
| # modular_eurobert.py file directly. One of our CI enforces this. | |
| # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 | |
| # coding=utf-8 | |
| # Copyright 2025 Nicolas Boizard, Duarte M. Alves, Hippolyte Gisserot-Boukhlef and the EuroBert team. All rights reserved. | |
| # | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from transformers.utils import logging | |
| from transformers.models.llama import LlamaConfig | |
| logger = logging.get_logger(__name__) | |
| class EuroBertConfig(LlamaConfig): | |
| r""" | |
| This is the configuration class to store the configuration of a [`EuroBertModel`]. It is used to instantiate an EuroBert | |
| model according to the specified arguments, defining the model architecture. Instantiating a configuration with the | |
| defaults will yield a similar configuration to that of the EuroBERT-210m. | |
| Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the | |
| documentation from [`PretrainedConfig`] for more information. | |
| Args: | |
| vocab_size (`int`, *optional*, defaults to 128256): | |
| Vocabulary size of the EuroBert model. Defines the number of different tokens that can be represented by the | |
| `inputs_ids` passed when calling [`EuroBertModel`] | |
| hidden_size (`int`, *optional*, defaults to 768): | |
| Dimensionality of the encoder layers and the pooler layer. | |
| intermediate_size (`int`, *optional*, defaults to 3072): | |
| Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder. | |
| num_hidden_layers (`int`, *optional*, defaults to 12): | |
| Number of hidden layers in the Transformer encoder. | |
| num_attention_heads (`int`, *optional*, defaults to 12): | |
| Number of attention heads for each attention layer in the Transformer encoder. | |
| num_key_value_heads (`int`, *optional*): | |
| This is the number of key_value heads that should be used to implement Grouped Query Attention. If | |
| `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if | |
| `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When | |
| converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed | |
| by meanpooling all the original heads within that group. For more details checkout [this | |
| paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to | |
| `num_attention_heads`. | |
| hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): | |
| The non-linear activation function (function or string) in the encoder and pooler. | |
| max_position_embeddings (`int`, *optional*, defaults to 8192): | |
| The maximum sequence length that this model might ever be used with. EuroBert supports up to 8192 tokens, | |
| EuroBert-pretrained up to 2048. | |
| initializer_range (`float`, *optional*, defaults to 0.02): | |
| The standard deviation of the truncated_normal_initializer for initializing all weight matrices. | |
| rms_norm_eps (`float`, *optional*, defaults to 1e-05): | |
| The epsilon used by the rms normalization layers. | |
| bos_token_id (`int`, *optional*, defaults to 128000): | |
| Beginning of stream token id. | |
| eos_token_id (`int`, *optional*, defaults to 128001): | |
| End of stream token id. | |
| pad_token_id (`int`, *optional*, defaults to 128001): | |
| Padding token id. | |
| mask_token_id (`int`, *optional*, defaults to 128002): | |
| Mask token id. | |
| pretraining_tp (`int`, *optional*, defaults to 1): | |
| Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this | |
| document](https://huggingface.co/docs/transformers/main/perf_train_gpu_many#tensor-parallelism) to | |
| understand more about it. This value is necessary to ensure exact reproducibility of the pretraining | |
| results. Please refer to [this issue](https://github.com/pytorch/pytorch/issues/76232). | |
| tie_word_embeddings (`bool`, *optional*, defaults to `False`): | |
| Whether to tie weight embeddings | |
| rope_theta (`float`, *optional*, defaults to 250000.0): | |
| The base period of the RoPE embeddings. EuroBert used base period of 250000.0, | |
| EuroBert-pretrained 10000.0. | |
| rope_scaling (`Dict`, *optional*): | |
| Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type | |
| and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value | |
| accordingly. | |
| Expected contents: | |
| `rope_type` (`str`): | |
| The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope', | |
| 'eurobert3'], with 'default' being the original RoPE implementation. | |
| `factor` (`float`, *optional*): | |
| Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In | |
| most scaling types, a `factor` of x will enable the model to handle sequences of length x * | |
| original maximum pre-trained length. | |
| `original_max_position_embeddings` (`int`, *optional*): | |
| Used with 'dynamic', 'longrope' and 'eurobert3'. The original max position embeddings used during | |
| pretraining. | |
| `attention_factor` (`float`, *optional*): | |
| Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention | |
| computation. If unspecified, it defaults to value recommended by the implementation, using the | |
| `factor` field to infer the suggested value. | |
| `beta_fast` (`float`, *optional*): | |
| Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear | |
| ramp function. If unspecified, it defaults to 32. | |
| `beta_slow` (`float`, *optional*): | |
| Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear | |
| ramp function. If unspecified, it defaults to 1. | |
| `short_factor` (`List[float]`, *optional*): | |
| Only used with 'longrope'. The scaling factor to be applied to short contexts (< | |
| `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden | |
| size divided by the number of attention heads divided by 2 | |
| `long_factor` (`List[float]`, *optional*): | |
| Only used with 'longrope'. The scaling factor to be applied to long contexts (< | |
| `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden | |
| size divided by the number of attention heads divided by 2 | |
| `low_freq_factor` (`float`, *optional*): | |
| Only used with 'eurobert3'. Scaling factor applied to low frequency components of the RoPE | |
| `high_freq_factor` (`float`, *optional*): | |
| Only used with 'eurobert3'. Scaling factor applied to high frequency components of the RoPE | |
| attention_bias (`bool`, *optional*, defaults to `False`): | |
| Whether to use a bias in the query, key, value and output projection layers during self-attention. | |
| attention_dropout (`float`, *optional*, defaults to 0.0): | |
| The dropout ratio for the attention probabilities. | |
| mlp_bias (`bool`, *optional*, defaults to `False`): | |
| Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers. | |
| head_dim (`int`, *optional*): | |
| The attention head dimension. If None, it will default to hidden_size // num_attention_heads | |
| classifier_pooling (`str`, *optional*, defaults to `"late"`): | |
| The pooling strategy to use for the classifier. Can be one of ['bos', 'mean', 'late']. | |
| ```python | |
| >>> from transformers import EuroBertModel, EuroBertConfig | |
| >>> # Initializing a EuroBert eurobert-base style configuration | |
| >>> configuration = EuroBertConfig() | |
| >>> # Initializing a model from the eurobert-base style configuration | |
| >>> model = EuroBertModel(configuration) | |
| >>> # Accessing the model configuration | |
| >>> configuration = model.config | |
| ```""" | |
| model_type = "eurobert" | |
| def __init__( | |
| self, | |
| vocab_size=128256, | |
| hidden_size=768, | |
| intermediate_size=3072, | |
| num_hidden_layers=12, | |
| num_attention_heads=12, | |
| num_key_value_heads=None, | |
| hidden_act="silu", | |
| max_position_embeddings=8192, | |
| initializer_range=0.02, | |
| rms_norm_eps=1e-05, | |
| bos_token_id=128000, | |
| eos_token_id=128001, | |
| pad_token_id=128001, | |
| mask_token_id=128002, | |
| pretraining_tp=1, | |
| tie_word_embeddings=False, | |
| rope_theta=250000.0, | |
| rope_scaling=None, | |
| attention_bias=False, | |
| attention_dropout=0.0, | |
| mlp_bias=False, | |
| head_dim=None, | |
| classifier_pooling="late", | |
| **kwargs, | |
| ): | |
| # use_cache is specific to decoder models and should be set to False for encoder models | |
| use_cache = kwargs.pop("use_cache", None) | |
| if use_cache: | |
| logger.warning_once( | |
| "The `use_cache` argument to EuroBertConfig is set to `False`, as caching is never used for encoder models." | |
| ) | |
| if num_key_value_heads is None: | |
| num_key_value_heads = num_attention_heads | |
| super().__init__( | |
| vocab_size=vocab_size, | |
| hidden_size=hidden_size, | |
| intermediate_size=intermediate_size, | |
| num_hidden_layers=num_hidden_layers, | |
| num_attention_heads=num_attention_heads, | |
| num_key_value_heads=num_key_value_heads, | |
| hidden_act=hidden_act, | |
| max_position_embeddings=max_position_embeddings, | |
| initializer_range=initializer_range, | |
| rms_norm_eps=rms_norm_eps, | |
| use_cache=False, | |
| bos_token_id=bos_token_id, | |
| eos_token_id=eos_token_id, | |
| pad_token_id=pad_token_id, | |
| pretraining_tp=pretraining_tp, | |
| tie_word_embeddings=tie_word_embeddings, | |
| rope_theta=rope_theta, | |
| rope_scaling=rope_scaling, | |
| attention_bias=attention_bias, | |
| attention_dropout=attention_dropout, | |
| mlp_bias=mlp_bias, | |
| head_dim=head_dim, | |
| **kwargs, | |
| ) | |
| self.mask_token_id = mask_token_id | |
| self.clf_pooling = classifier_pooling | |
| __all__ = ["EuroBertConfig"] | |