{ "model_type": "gptmoe-custom", "vocab_size": 32000, "d_model": 768, "n_heads": 12, "n_layers": 16, "ffn_mult": 2.6666666666666665, "use_rmsnorm": true, "rms_eps": 1e-05, "rope_base": 10000.0, "rope_scaling": 1.0, "max_seq_len": 2048, "moe": { "even_only": true, "num_experts": 8, "top_k": 2, "capacity_factor": 1.25, "eval_capacity_factor": 1.0, "noisy_gate_policy": "Jitter", "use_residual": false }, "tie_embedding": true, "dtype": "bf16" }