Sentence Similarity
sentence-transformers
Safetensors
English
Portuguese
splade
sparse
embeddings
bert
portuguese
ptbr
cnmoro commited on
Commit
9acd1d0
·
verified ·
1 Parent(s): fe9bb8f

Upload 13 files

Browse files
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "pytorch": "2.6.0+cu124",
4
+ "sentence_transformers": "5.5.0",
5
+ "transformers": "5.8.1"
6
+ },
7
+ "default_prompt_name": null,
8
+ "model_type": "SparseEncoder",
9
+ "prompts": {
10
+ "document": "",
11
+ "query": ""
12
+ },
13
+ "similarity_fn_name": "dot"
14
+ }
document_0_Transformer/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": null,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "is_decoder": false,
18
+ "layer_norm_eps": 1e-12,
19
+ "max_position_embeddings": 512,
20
+ "model_type": "bert",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 0,
24
+ "position_embedding_type": "absolute",
25
+ "tie_word_embeddings": true,
26
+ "transformers_version": "5.8.1",
27
+ "type_vocab_size": 2,
28
+ "use_cache": false,
29
+ "vocab_size": 30522
30
+ }
document_0_Transformer/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2182f35f3839cd35fd55e31998238e2f1f478ed7fbf984038e70e823aa7b790f
3
+ size 438080896
document_0_Transformer/sentence_bert_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transformer_task": "fill-mask",
3
+ "modality_config": {
4
+ "text": {
5
+ "method": "forward",
6
+ "method_output_name": "logits"
7
+ }
8
+ },
9
+ "module_output_name": "token_embeddings"
10
+ }
document_0_Transformer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
document_0_Transformer/tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": true,
6
+ "is_local": false,
7
+ "local_files_only": false,
8
+ "mask_token": "[MASK]",
9
+ "model_max_length": 256,
10
+ "never_split": null,
11
+ "pad_token": "[PAD]",
12
+ "sep_token": "[SEP]",
13
+ "strip_accents": null,
14
+ "tokenize_chinese_chars": true,
15
+ "tokenizer_class": "BertTokenizer",
16
+ "unk_token": "[UNK]"
17
+ }
document_1_SpladePooling/config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "pooling_strategy": "max",
3
+ "activation_function": "relu",
4
+ "embedding_dimension": 768
5
+ }
modules.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.base.modules.router.Router"
7
+ }
8
+ ]
query_0_SparseStaticEmbedding/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "frozen": false
3
+ }
query_0_SparseStaticEmbedding/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acbd95b22b6cbce01192ee0e241029446e10e965af94959beb7691bd1074c21c
3
+ size 122168
query_0_SparseStaticEmbedding/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
query_0_SparseStaticEmbedding/tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": true,
6
+ "is_local": false,
7
+ "local_files_only": false,
8
+ "mask_token": "[MASK]",
9
+ "model_max_length": 256,
10
+ "never_split": null,
11
+ "pad_token": "[PAD]",
12
+ "sep_token": "[SEP]",
13
+ "strip_accents": null,
14
+ "tokenize_chinese_chars": true,
15
+ "tokenizer_class": "BertTokenizer",
16
+ "unk_token": "[UNK]"
17
+ }
router_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "types": {
3
+ "query_0_SparseStaticEmbedding": "sentence_transformers.sparse_encoder.modules.sparse_static_embedding.SparseStaticEmbedding",
4
+ "document_0_Transformer": "sentence_transformers.base.modules.transformer.Transformer",
5
+ "document_1_SpladePooling": "sentence_transformers.sparse_encoder.modules.splade_pooling.SpladePooling"
6
+ },
7
+ "structure": {
8
+ "query": [
9
+ "query_0_SparseStaticEmbedding"
10
+ ],
11
+ "document": [
12
+ "document_0_Transformer",
13
+ "document_1_SpladePooling"
14
+ ]
15
+ },
16
+ "parameters": {
17
+ "default_route": "query",
18
+ "allow_empty_key": true,
19
+ "route_mappings": {}
20
+ }
21
+ }