| { | |
| "architecture": "nanoGPT (custom, trained from scratch)", | |
| "model_type": "3-stage pipeline: pretrained -> SFT -> RLVR", | |
| "primary_checkpoint": "nanogpt_slm_rlvr_final.pth", | |
| "model_config": { | |
| "block_size": 512, | |
| "vocab_size": 50257, | |
| "n_layer": 12, | |
| "n_head": 12, | |
| "n_embd": 768, | |
| "dropout": 0.0, | |
| "bias": true | |
| }, | |
| "total_parameters_millions": 124.0, | |
| "tokenizer": "tiktoken gpt2 (50,257 BPE tokens)", | |
| "framework": "PyTorch", | |
| "checkpoints": { | |
| "nanogpt_slm_tinystories_best.pth": "Stage 1 -- pretrained base", | |
| "nanogpt_slm_sft_best.pth": "Stage 2 -- SFT (positive-sentiment subset)", | |
| "nanogpt_slm_rlvr_final.pth": "Stage 3 -- RLVR (primary)" | |
| }, | |
| "dataset": { | |
| "name": "TinyStories (roneneldan/TinyStories)", | |
| "description": "2.1M synthetic short stories for 3-5 year old children" | |
| }, | |
| "training": { | |
| "stage1_pretraining": { | |
| "iterations": 70000, | |
| "learning_rate": "6e-4 -> 1e-5 cosine", | |
| "batch": "32 x 512, grad-accum 4", | |
| "precision": "bfloat16" | |
| }, | |
| "stage2_sft": { | |
| "data": "positive-sentiment subset (VADER compound > 0.05): 1.91M stories", | |
| "iterations": 12952, | |
| "learning_rate": "5e-5 -> 5e-6 cosine", | |
| "best_val_loss": 1.2037 | |
| }, | |
| "stage3_rlvr": { | |
| "algorithm": "vanilla policy gradient", | |
| "reward": "VADER compound sentiment (verifiable)", | |
| "kl_penalty": "beta=0.1 vs frozen SFT reference", | |
| "iterations": 200, | |
| "generation_batch": 16, | |
| "trajectory_len": 200, | |
| "learning_rate": "5e-6", | |
| "mean_reward": "+0.6485 -> +0.8652" | |
| } | |
| }, | |
| "sentiment_comparison": { | |
| "pretrained": { | |
| "mean": 0.8428, | |
| "std": 0.3907 | |
| }, | |
| "sft": { | |
| "mean": 0.8703, | |
| "std": 0.2853 | |
| }, | |
| "rlvr": { | |
| "mean": 0.9001, | |
| "std": 0.3371 | |
| }, | |
| "metric": "VADER compound sentiment of generated stories" | |
| } | |
| } |