File size: 463 Bytes
2dc6e61 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | {
"tokenizer": {
"vocab_size": 8000,
"min_frequency": 2,
"special_tokens": [
"<s>",
"<pad>",
"</s>",
"<unk>",
"<mask>"
],
"dataset": "facebook/xnli/tr",
"corpus_lines": 800404
},
"metrics": {
"tokens_per_char": 0.2597173899305004,
"fertility": 1.945246486756174,
"avg_seq_len": 20.274266666666666,
"vocab_coverage": 1.0,
"total_sentences": 15000,
"unique_word_types": 29531
}
} |