Update config.json
Browse files- config.json +1 -6
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"tokenizer_dir": "NeTSlab/gpt2-10M-
|
| 3 |
"datapoint_length" : 512,
|
| 4 |
"training_type" : "strict_small",
|
| 5 |
"n_epochs" : 10,
|
|
@@ -11,11 +11,6 @@
|
|
| 11 |
"sft_learning_rate" : 0.00005,
|
| 12 |
"gradient_clip_norm" : 1,
|
| 13 |
"seed" : -1,
|
| 14 |
-
"base_folder" : "03-models",
|
| 15 |
-
"experiment_name" : "gpt2_SylliTokFast_10M",
|
| 16 |
-
"use_wandb" : false,
|
| 17 |
-
"wandb_experiment_name" : "gpt2_SylliTokFast_10M",
|
| 18 |
-
"wandb_project_name" : "BabyLM-2025",
|
| 19 |
"tokenizer_class": "SyllabicTokenizerWrapper",
|
| 20 |
"model_type": "gpt2",
|
| 21 |
"vocab_size": 20535
|
|
|
|
| 1 |
{
|
| 2 |
+
"tokenizer_dir": "NeTSlab/gpt2-10M-syllitok-eng",
|
| 3 |
"datapoint_length" : 512,
|
| 4 |
"training_type" : "strict_small",
|
| 5 |
"n_epochs" : 10,
|
|
|
|
| 11 |
"sft_learning_rate" : 0.00005,
|
| 12 |
"gradient_clip_norm" : 1,
|
| 13 |
"seed" : -1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"tokenizer_class": "SyllabicTokenizerWrapper",
|
| 15 |
"model_type": "gpt2",
|
| 16 |
"vocab_size": 20535
|