nishantup commited on
Commit
b666b9f
·
verified ·
1 Parent(s): 6e91f44

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +65 -0
config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architecture": "nanoGPT (custom, trained from scratch)",
3
+ "model_type": "3-stage pipeline: pretrained -> SFT -> RLVR",
4
+ "primary_checkpoint": "nanogpt_slm_rlvr_final.pth",
5
+ "model_config": {
6
+ "block_size": 512,
7
+ "vocab_size": 50257,
8
+ "n_layer": 12,
9
+ "n_head": 12,
10
+ "n_embd": 768,
11
+ "dropout": 0.0,
12
+ "bias": true
13
+ },
14
+ "total_parameters_millions": 124.0,
15
+ "tokenizer": "tiktoken gpt2 (50,257 BPE tokens)",
16
+ "framework": "PyTorch",
17
+ "checkpoints": {
18
+ "nanogpt_slm_tinystories_best.pth": "Stage 1 -- pretrained base",
19
+ "nanogpt_slm_sft_best.pth": "Stage 2 -- SFT (positive-sentiment subset)",
20
+ "nanogpt_slm_rlvr_final.pth": "Stage 3 -- RLVR (primary)"
21
+ },
22
+ "dataset": {
23
+ "name": "TinyStories (roneneldan/TinyStories)",
24
+ "description": "2.1M synthetic short stories for 3-5 year old children"
25
+ },
26
+ "training": {
27
+ "stage1_pretraining": {
28
+ "iterations": 70000,
29
+ "learning_rate": "6e-4 -> 1e-5 cosine",
30
+ "batch": "32 x 512, grad-accum 4",
31
+ "precision": "bfloat16"
32
+ },
33
+ "stage2_sft": {
34
+ "data": "positive-sentiment subset (VADER compound > 0.05): 1.91M stories",
35
+ "iterations": 12952,
36
+ "learning_rate": "5e-5 -> 5e-6 cosine",
37
+ "best_val_loss": 1.2037
38
+ },
39
+ "stage3_rlvr": {
40
+ "algorithm": "vanilla policy gradient",
41
+ "reward": "VADER compound sentiment (verifiable)",
42
+ "kl_penalty": "beta=0.1 vs frozen SFT reference",
43
+ "iterations": 200,
44
+ "generation_batch": 16,
45
+ "trajectory_len": 200,
46
+ "learning_rate": "5e-6",
47
+ "mean_reward": "+0.6485 -> +0.8652"
48
+ }
49
+ },
50
+ "sentiment_comparison": {
51
+ "pretrained": {
52
+ "mean": 0.8428,
53
+ "std": 0.3907
54
+ },
55
+ "sft": {
56
+ "mean": 0.8703,
57
+ "std": 0.2853
58
+ },
59
+ "rlvr": {
60
+ "mean": 0.9001,
61
+ "std": 0.3371
62
+ },
63
+ "metric": "VADER compound sentiment of generated stories"
64
+ }
65
+ }