{ "tokenizer": { "vocab_size": 8000, "min_frequency": 2, "special_tokens": [ "", "", "", "", "" ], "dataset": "facebook/xnli/tr", "corpus_lines": 800404 }, "metrics": { "tokens_per_char": 0.2597173899305004, "fertility": 1.945246486756174, "avg_seq_len": 20.274266666666666, "vocab_coverage": 1.0, "total_sentences": 15000, "unique_word_types": 29531 } }