{ "tokenizer": { "vocab_size": 8000, "min_frequency": 2, "special_tokens": [ "", "", "", "", "" ], "dataset": "facebook/xnli/fr", "corpus_lines": 800404 }, "metrics": { "tokens_per_char": 0.25600420245196237, "fertility": 1.5406676418575203, "avg_seq_len": 23.5224, "vocab_coverage": 1.0, "total_sentences": 15000, "unique_word_types": 21933 } }