nishantup's picture
Upload config.json with huggingface_hub
b666b9f verified
Raw
History Blame
1.95 kB
{
"architecture": "nanoGPT (custom, trained from scratch)",
"model_type": "3-stage pipeline: pretrained -> SFT -> RLVR",
"primary_checkpoint": "nanogpt_slm_rlvr_final.pth",
"model_config": {
"block_size": 512,
"vocab_size": 50257,
"n_layer": 12,
"n_head": 12,
"n_embd": 768,
"dropout": 0.0,
"bias": true
},
"total_parameters_millions": 124.0,
"tokenizer": "tiktoken gpt2 (50,257 BPE tokens)",
"framework": "PyTorch",
"checkpoints": {
"nanogpt_slm_tinystories_best.pth": "Stage 1 -- pretrained base",
"nanogpt_slm_sft_best.pth": "Stage 2 -- SFT (positive-sentiment subset)",
"nanogpt_slm_rlvr_final.pth": "Stage 3 -- RLVR (primary)"
},
"dataset": {
"name": "TinyStories (roneneldan/TinyStories)",
"description": "2.1M synthetic short stories for 3-5 year old children"
},
"training": {
"stage1_pretraining": {
"iterations": 70000,
"learning_rate": "6e-4 -> 1e-5 cosine",
"batch": "32 x 512, grad-accum 4",
"precision": "bfloat16"
},
"stage2_sft": {
"data": "positive-sentiment subset (VADER compound > 0.05): 1.91M stories",
"iterations": 12952,
"learning_rate": "5e-5 -> 5e-6 cosine",
"best_val_loss": 1.2037
},
"stage3_rlvr": {
"algorithm": "vanilla policy gradient",
"reward": "VADER compound sentiment (verifiable)",
"kl_penalty": "beta=0.1 vs frozen SFT reference",
"iterations": 200,
"generation_batch": 16,
"trajectory_len": 200,
"learning_rate": "5e-6",
"mean_reward": "+0.6485 -> +0.8652"
}
},
"sentiment_comparison": {
"pretrained": {
"mean": 0.8428,
"std": 0.3907
},
"sft": {
"mean": 0.8703,
"std": 0.2853
},
"rlvr": {
"mean": 0.9001,
"std": 0.3371
},
"metric": "VADER compound sentiment of generated stories"
}
}