File size: 463 Bytes
2dc6e61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
{
  "tokenizer": {
    "vocab_size": 8000,
    "min_frequency": 2,
    "special_tokens": [
      "<s>",
      "<pad>",
      "</s>",
      "<unk>",
      "<mask>"
    ],
    "dataset": "facebook/xnli/tr",
    "corpus_lines": 800404
  },
  "metrics": {
    "tokens_per_char": 0.2597173899305004,
    "fertility": 1.945246486756174,
    "avg_seq_len": 20.274266666666666,
    "vocab_coverage": 1.0,
    "total_sentences": 15000,
    "unique_word_types": 29531
  }
}