| { |
| "tokenizer_dir": "NeTSlab/gpt2-10M-parfind-eng", |
| "datapoint_length" : 512, |
| "training_type" : "strict_small", |
| "n_epochs" : 10, |
| "batch_size" : 16, |
| "learning_rate" : 0.00005, |
| "weight_decay" : 0, |
| "num_training_steps" : 200000, |
| "num_warmup_steps" : 2000, |
| "sft_learning_rate" : 0.00005, |
| "gradient_clip_norm" : 1, |
| "seed" : -1, |
| "base_folder" : "03-models", |
| "experiment_name" : "gpt2_SylliTokFast_10M", |
| "use_wandb" : false, |
| "wandb_experiment_name" : "gpt2_SylliTokFast_10M", |
| "wandb_project_name" : "BabyLM-2025", |
| "tokenizer_class": "SyllabicTokenizerWrapper", |
| "model_type": "gpt2", |
| "vocab_size": 20535 |
| } |