{
  "architecture": "nanoGPT (custom, trained from scratch)",
  "model_type": "3-stage pipeline: pretrained -> SFT -> RLVR",
  "primary_checkpoint": "nanogpt_slm_rlvr_final.pth",
  "model_config": {
    "block_size": 512,
    "vocab_size": 50257,
    "n_layer": 12,
    "n_head": 12,
    "n_embd": 768,
    "dropout": 0.0,
    "bias": true
  },
  "total_parameters_millions": 124.0,
  "tokenizer": "tiktoken gpt2 (50,257 BPE tokens)",
  "framework": "PyTorch",
  "checkpoints": {
    "nanogpt_slm_tinystories_best.pth": "Stage 1 -- pretrained base",
    "nanogpt_slm_sft_best.pth": "Stage 2 -- SFT (positive-sentiment subset)",
    "nanogpt_slm_rlvr_final.pth": "Stage 3 -- RLVR (primary)"
  },
  "dataset": {
    "name": "TinyStories (roneneldan/TinyStories)",
    "description": "2.1M synthetic short stories for 3-5 year old children"
  },
  "training": {
    "stage1_pretraining": {
      "iterations": 70000,
      "learning_rate": "6e-4 -> 1e-5 cosine",
      "batch": "32 x 512, grad-accum 4",
      "precision": "bfloat16"
    },
    "stage2_sft": {
      "data": "positive-sentiment subset (VADER compound > 0.05): 1.91M stories",
      "iterations": 12952,
      "learning_rate": "5e-5 -> 5e-6 cosine",
      "best_val_loss": 1.2037
    },
    "stage3_rlvr": {
      "algorithm": "vanilla policy gradient",
      "reward": "VADER compound sentiment (verifiable)",
      "kl_penalty": "beta=0.1 vs frozen SFT reference",
      "iterations": 200,
      "generation_batch": 16,
      "trajectory_len": 200,
      "learning_rate": "5e-6",
      "mean_reward": "+0.6485 -> +0.8652"
    }
  },
  "sentiment_comparison": {
    "pretrained": {
      "mean": 0.8428,
      "std": 0.3907
    },
    "sft": {
      "mean": 0.8703,
      "std": 0.2853
    },
    "rlvr": {
      "mean": 0.9001,
      "std": 0.3371
    },
    "metric": "VADER compound sentiment of generated stories"
  }
}