Upload config.json with huggingface_hub
Browse files- config.json +65 -0
config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architecture": "nanoGPT (custom, trained from scratch)",
|
| 3 |
+
"model_type": "3-stage pipeline: pretrained -> SFT -> RLVR",
|
| 4 |
+
"primary_checkpoint": "nanogpt_slm_rlvr_final.pth",
|
| 5 |
+
"model_config": {
|
| 6 |
+
"block_size": 512,
|
| 7 |
+
"vocab_size": 50257,
|
| 8 |
+
"n_layer": 12,
|
| 9 |
+
"n_head": 12,
|
| 10 |
+
"n_embd": 768,
|
| 11 |
+
"dropout": 0.0,
|
| 12 |
+
"bias": true
|
| 13 |
+
},
|
| 14 |
+
"total_parameters_millions": 124.0,
|
| 15 |
+
"tokenizer": "tiktoken gpt2 (50,257 BPE tokens)",
|
| 16 |
+
"framework": "PyTorch",
|
| 17 |
+
"checkpoints": {
|
| 18 |
+
"nanogpt_slm_tinystories_best.pth": "Stage 1 -- pretrained base",
|
| 19 |
+
"nanogpt_slm_sft_best.pth": "Stage 2 -- SFT (positive-sentiment subset)",
|
| 20 |
+
"nanogpt_slm_rlvr_final.pth": "Stage 3 -- RLVR (primary)"
|
| 21 |
+
},
|
| 22 |
+
"dataset": {
|
| 23 |
+
"name": "TinyStories (roneneldan/TinyStories)",
|
| 24 |
+
"description": "2.1M synthetic short stories for 3-5 year old children"
|
| 25 |
+
},
|
| 26 |
+
"training": {
|
| 27 |
+
"stage1_pretraining": {
|
| 28 |
+
"iterations": 70000,
|
| 29 |
+
"learning_rate": "6e-4 -> 1e-5 cosine",
|
| 30 |
+
"batch": "32 x 512, grad-accum 4",
|
| 31 |
+
"precision": "bfloat16"
|
| 32 |
+
},
|
| 33 |
+
"stage2_sft": {
|
| 34 |
+
"data": "positive-sentiment subset (VADER compound > 0.05): 1.91M stories",
|
| 35 |
+
"iterations": 12952,
|
| 36 |
+
"learning_rate": "5e-5 -> 5e-6 cosine",
|
| 37 |
+
"best_val_loss": 1.2037
|
| 38 |
+
},
|
| 39 |
+
"stage3_rlvr": {
|
| 40 |
+
"algorithm": "vanilla policy gradient",
|
| 41 |
+
"reward": "VADER compound sentiment (verifiable)",
|
| 42 |
+
"kl_penalty": "beta=0.1 vs frozen SFT reference",
|
| 43 |
+
"iterations": 200,
|
| 44 |
+
"generation_batch": 16,
|
| 45 |
+
"trajectory_len": 200,
|
| 46 |
+
"learning_rate": "5e-6",
|
| 47 |
+
"mean_reward": "+0.6485 -> +0.8652"
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
"sentiment_comparison": {
|
| 51 |
+
"pretrained": {
|
| 52 |
+
"mean": 0.8428,
|
| 53 |
+
"std": 0.3907
|
| 54 |
+
},
|
| 55 |
+
"sft": {
|
| 56 |
+
"mean": 0.8703,
|
| 57 |
+
"std": 0.2853
|
| 58 |
+
},
|
| 59 |
+
"rlvr": {
|
| 60 |
+
"mean": 0.9001,
|
| 61 |
+
"std": 0.3371
|
| 62 |
+
},
|
| 63 |
+
"metric": "VADER compound sentiment of generated stories"
|
| 64 |
+
}
|
| 65 |
+
}
|