{ "tokenizer": { "vocab_size": 8000, "min_frequency": 2, "special_tokens": [ "", "", "", "", "" ], "dataset": "facebook/xnli/vi", "corpus_lines": 800404 }, "metrics": { "tokens_per_char": 0.25805486952479, "fertility": 1.1428697562388308, "avg_seq_len": 21.573266666666665, "vocab_coverage": 1.0, "total_sentences": 15000, "unique_word_types": 9556 } }