{
"tokenizer": {
"vocab_size": 8000,
"min_frequency": 2,
"special_tokens": [
"",
"",
"",
"",
""
],
"dataset": "facebook/xnli/vi",
"corpus_lines": 800404
},
"metrics": {
"tokens_per_char": 0.2967440807085845,
"fertility": 1.314215987511743,
"avg_seq_len": 24.807666666666666,
"vocab_coverage": 1.0,
"total_sentences": 15000,
"unique_word_types": 9556
}
}