xnli-wordpiece-fr / metrics_report.json
HeyDunaX's picture
Upload metrics_report.json with huggingface_hub
cd16e56 verified
Raw
History Blame
454 Bytes
{
"tokenizer": {
"vocab_size": 8000,
"min_frequency": 2,
"special_tokens": [
"<s>",
"<pad>",
"</s>",
"<unk>",
"<mask>"
],
"dataset": "facebook/xnli/fr",
"corpus_lines": 800404
},
"metrics": {
"tokens_per_char": 0.25600420245196237,
"fertility": 1.5406676418575203,
"avg_seq_len": 23.5224,
"vocab_coverage": 1.0,
"total_sentences": 15000,
"unique_word_types": 21933
}
}