{ "tokenizer": { "vocab_size": 8000, "min_frequency": 2, "special_tokens": [ "", "", "", "", "" ], "dataset": "facebook/xnli/vi", "corpus_lines": 800404 }, "metrics": { "tokens_per_char": 0.2967440807085845, "fertility": 1.314215987511743, "avg_seq_len": 24.807666666666666, "vocab_coverage": 1.0, "total_sentences": 15000, "unique_word_types": 9556 } }