config.json · nishantup/nanogpt-rlvr-slm-tinystories-124m at b666b9fb246ab89b007c7cb024a69cbec0024999

nanogpt-rlvr-slm-tinystories-124m / config.json

Upload config.json with huggingface_hub

b666b9f verified 27 days ago

1.95 kB

	{
	"architecture": "nanoGPT (custom, trained from scratch)",
	"model_type": "3-stage pipeline: pretrained -> SFT -> RLVR",
	"primary_checkpoint": "nanogpt_slm_rlvr_final.pth",
	"model_config": {
	"block_size": 512,
	"vocab_size": 50257,
	"n_layer": 12,
	"n_head": 12,
	"n_embd": 768,
	"dropout": 0.0,
	"bias": true
	},
	"total_parameters_millions": 124.0,
	"tokenizer": "tiktoken gpt2 (50,257 BPE tokens)",
	"framework": "PyTorch",
	"checkpoints": {
	"nanogpt_slm_tinystories_best.pth": "Stage 1 -- pretrained base",
	"nanogpt_slm_sft_best.pth": "Stage 2 -- SFT (positive-sentiment subset)",
	"nanogpt_slm_rlvr_final.pth": "Stage 3 -- RLVR (primary)"
	},
	"dataset": {
	"name": "TinyStories (roneneldan/TinyStories)",
	"description": "2.1M synthetic short stories for 3-5 year old children"
	},
	"training": {
	"stage1_pretraining": {
	"iterations": 70000,
	"learning_rate": "6e-4 -> 1e-5 cosine",
	"batch": "32 x 512, grad-accum 4",
	"precision": "bfloat16"
	},
	"stage2_sft": {
	"data": "positive-sentiment subset (VADER compound > 0.05): 1.91M stories",
	"iterations": 12952,
	"learning_rate": "5e-5 -> 5e-6 cosine",
	"best_val_loss": 1.2037
	},
	"stage3_rlvr": {
	"algorithm": "vanilla policy gradient",
	"reward": "VADER compound sentiment (verifiable)",
	"kl_penalty": "beta=0.1 vs frozen SFT reference",
	"iterations": 200,
	"generation_batch": 16,
	"trajectory_len": 200,
	"learning_rate": "5e-6",
	"mean_reward": "+0.6485 -> +0.8652"
	}
	},
	"sentiment_comparison": {
	"pretrained": {
	"mean": 0.8428,
	"std": 0.3907
	},
	"sft": {
	"mean": 0.8703,
	"std": 0.2853
	},
	"rlvr": {
	"mean": 0.9001,
	"std": 0.3371
	},
	"metric": "VADER compound sentiment of generated stories"
	}
	}