| { | |
| "tokenizer": { | |
| "vocab_size": 8000, | |
| "min_frequency": 2, | |
| "special_tokens": [ | |
| "<s>", | |
| "<pad>", | |
| "</s>", | |
| "<unk>", | |
| "<mask>" | |
| ], | |
| "dataset": "facebook/xnli/tr", | |
| "corpus_lines": 800404 | |
| }, | |
| "metrics": { | |
| "tokens_per_char": 0.2597173899305004, | |
| "fertility": 1.945246486756174, | |
| "avg_seq_len": 20.274266666666666, | |
| "vocab_coverage": 1.0, | |
| "total_sentences": 15000, | |
| "unique_word_types": 29531 | |
| } | |
| } |