{ "metadata": { "total_parameters": 4485154816, "total_size": 8970309632 }, "weight_map": { "embed_tokens.weight": "model-00001-of-00002.safetensors", "final_norm.weight": "model-00002-of-00002.safetensors", "layers.0.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.0.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.0.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.0.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.0.gate.0.weight": "model-00001-of-00002.safetensors", "layers.0.gate.1.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.1.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.1.gate.0.weight": "model-00001-of-00002.safetensors", "layers.1.gate.1.weight": "model-00001-of-00002.safetensors", "layers.10.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.10.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.10.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.10.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.10.gate.0.weight": "model-00002-of-00002.safetensors", "layers.10.gate.1.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.11.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.11.gate.0.weight": "model-00002-of-00002.safetensors", "layers.11.gate.1.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.12.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.12.gate.0.weight": "model-00002-of-00002.safetensors", "layers.12.gate.1.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.13.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.13.gate.0.weight": "model-00002-of-00002.safetensors", "layers.13.gate.1.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.14.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.14.gate.0.weight": "model-00002-of-00002.safetensors", "layers.14.gate.1.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.15.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.15.gate.0.weight": "model-00002-of-00002.safetensors", "layers.15.gate.1.weight": "model-00002-of-00002.safetensors", "layers.2.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.2.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.2.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.2.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.2.gate.0.weight": "model-00001-of-00002.safetensors", "layers.2.gate.1.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.3.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.3.gate.0.weight": "model-00001-of-00002.safetensors", "layers.3.gate.1.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.4.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.4.gate.0.weight": "model-00001-of-00002.safetensors", "layers.4.gate.1.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.5.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.5.gate.0.weight": "model-00001-of-00002.safetensors", "layers.5.gate.1.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.6.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.6.gate.0.weight": "model-00001-of-00002.safetensors", "layers.6.gate.1.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.7.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.7.gate.0.weight": "model-00001-of-00002.safetensors", "layers.7.gate.1.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.8.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", "layers.8.gate.0.weight": "model-00001-of-00002.safetensors", "layers.8.gate.1.weight": "model-00001-of-00002.safetensors", "layers.9.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.9.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.9.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", "layers.9.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", "layers.9.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", "layers.9.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", "layers.9.gate.0.weight": "model-00001-of-00002.safetensors", "layers.9.gate.1.weight": "model-00001-of-00002.safetensors", "lm_head.weight": "model-00002-of-00002.safetensors" } }