micro-llama-1b-dpo / model.safetensors.index.json
bkhmsi's picture
Upload MiCRoLlama
8d2393c verified
Raw
History Blame Contribute Delete
51.9 kB
{
"metadata": {
"total_parameters": 4485154816,
"total_size": 8970309632
},
"weight_map": {
"embed_tokens.weight": "model-00001-of-00002.safetensors",
"final_norm.weight": "model-00002-of-00002.safetensors",
"layers.0.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.0.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.0.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.1.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.1.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.10.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.10.gate.0.weight": "model-00002-of-00002.safetensors",
"layers.10.gate.1.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.11.gate.0.weight": "model-00002-of-00002.safetensors",
"layers.11.gate.1.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.12.gate.0.weight": "model-00002-of-00002.safetensors",
"layers.12.gate.1.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.13.gate.0.weight": "model-00002-of-00002.safetensors",
"layers.13.gate.1.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.14.gate.0.weight": "model-00002-of-00002.safetensors",
"layers.14.gate.1.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.15.gate.0.weight": "model-00002-of-00002.safetensors",
"layers.15.gate.1.weight": "model-00002-of-00002.safetensors",
"layers.2.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.2.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.3.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.3.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.4.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.5.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.5.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.6.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.7.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.7.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.8.gate.1.weight": "model-00001-of-00002.safetensors",
"layers.9.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
"layers.9.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
"layers.9.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
"layers.9.gate.0.weight": "model-00001-of-00002.safetensors",
"layers.9.gate.1.weight": "model-00001-of-00002.safetensors",
"lm_head.weight": "model-00002-of-00002.safetensors"
}
}