| { | |
| "tokenizer": { | |
| "vocab_size": 8000, | |
| "min_frequency": 2, | |
| "special_tokens": [ | |
| "<s>", | |
| "<pad>", | |
| "</s>", | |
| "<unk>", | |
| "<mask>" | |
| ], | |
| "dataset": "facebook/xnli/fr", | |
| "corpus_lines": 800404 | |
| }, | |
| "metrics": { | |
| "tokens_per_char": 0.25600420245196237, | |
| "fertility": 1.5406676418575203, | |
| "avg_seq_len": 23.5224, | |
| "vocab_coverage": 1.0, | |
| "total_sentences": 15000, | |
| "unique_word_types": 21933 | |
| } | |
| } |