Upload config.json with huggingface_hub

Browse files

Files changed (1) hide show

config.json +65 -0

config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "architecture": "nanoGPT (custom, trained from scratch)",
+  "model_type": "3-stage pipeline: pretrained -> SFT -> RLVR",
+  "primary_checkpoint": "nanogpt_slm_rlvr_final.pth",
+  "model_config": {
+    "block_size": 512,
+    "vocab_size": 50257,
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "dropout": 0.0,
+    "bias": true
+  },
+  "total_parameters_millions": 124.0,
+  "tokenizer": "tiktoken gpt2 (50,257 BPE tokens)",
+  "framework": "PyTorch",
+  "checkpoints": {
+    "nanogpt_slm_tinystories_best.pth": "Stage 1 -- pretrained base",
+    "nanogpt_slm_sft_best.pth": "Stage 2 -- SFT (positive-sentiment subset)",
+    "nanogpt_slm_rlvr_final.pth": "Stage 3 -- RLVR (primary)"
+  },
+  "dataset": {
+    "name": "TinyStories (roneneldan/TinyStories)",
+    "description": "2.1M synthetic short stories for 3-5 year old children"
+  },
+  "training": {
+    "stage1_pretraining": {
+      "iterations": 70000,
+      "learning_rate": "6e-4 -> 1e-5 cosine",
+      "batch": "32 x 512, grad-accum 4",
+      "precision": "bfloat16"
+    },
+    "stage2_sft": {
+      "data": "positive-sentiment subset (VADER compound > 0.05): 1.91M stories",
+      "iterations": 12952,
+      "learning_rate": "5e-5 -> 5e-6 cosine",
+      "best_val_loss": 1.2037
+    },
+    "stage3_rlvr": {
+      "algorithm": "vanilla policy gradient",
+      "reward": "VADER compound sentiment (verifiable)",
+      "kl_penalty": "beta=0.1 vs frozen SFT reference",
+      "iterations": 200,
+      "generation_batch": 16,
+      "trajectory_len": 200,
+      "learning_rate": "5e-6",
+      "mean_reward": "+0.6485 -> +0.8652"
+    }
+  },
+  "sentiment_comparison": {
+    "pretrained": {
+      "mean": 0.8428,
+      "std": 0.3907
+    },
+    "sft": {
+      "mean": 0.8703,
+      "std": 0.2853
+    },
+    "rlvr": {
+      "mean": 0.9001,
+      "std": 0.3371
+    },
+    "metric": "VADER compound sentiment of generated stories"
+  }
+}