{ "metadata": { "total_size": 155547100252, "total_parameters": 284333146519 }, "weight_map": { "lm_head.biases": "model-00015-of-00015.safetensors", "lm_head.scales": "model-00015-of-00015.safetensors", "lm_head.weight": "model-00015-of-00015.safetensors", "model.embed_tokens.biases": "model-00001-of-00015.safetensors", "model.embed_tokens.scales": "model-00001-of-00015.safetensors", "model.embed_tokens.weight": "model-00001-of-00015.safetensors", "model.hc_head.base": "model-00015-of-00015.safetensors", "model.hc_head.fn": "model-00015-of-00015.safetensors", "model.hc_head.scale": "model-00015-of-00015.safetensors", "model.layers.0.attn.attn_sink": "model-00001-of-00015.safetensors", "model.layers.0.attn.kv_norm.weight": "model-00001-of-00015.safetensors", "model.layers.0.attn.q_norm.weight": "model-00001-of-00015.safetensors", "model.layers.0.attn.wkv.biases": "model-00001-of-00015.safetensors", "model.layers.0.attn.wkv.scales": "model-00001-of-00015.safetensors", "model.layers.0.attn.wkv.weight": "model-00001-of-00015.safetensors", "model.layers.0.attn.wo_a.biases": "model-00001-of-00015.safetensors", "model.layers.0.attn.wo_a.scales": "model-00001-of-00015.safetensors", "model.layers.0.attn.wo_a.weight": "model-00001-of-00015.safetensors", "model.layers.0.attn.wo_b.biases": "model-00001-of-00015.safetensors", "model.layers.0.attn.wo_b.scales": "model-00001-of-00015.safetensors", "model.layers.0.attn.wo_b.weight": "model-00001-of-00015.safetensors", "model.layers.0.attn.wq_a.biases": "model-00001-of-00015.safetensors", "model.layers.0.attn.wq_a.scales": "model-00001-of-00015.safetensors", "model.layers.0.attn.wq_a.weight": "model-00001-of-00015.safetensors", "model.layers.0.attn.wq_b.biases": "model-00001-of-00015.safetensors", "model.layers.0.attn.wq_b.scales": "model-00001-of-00015.safetensors", "model.layers.0.attn.wq_b.weight": "model-00001-of-00015.safetensors", "model.layers.0.attn_hc.base": "model-00001-of-00015.safetensors", "model.layers.0.attn_hc.fn": "model-00001-of-00015.safetensors", "model.layers.0.attn_hc.scale": "model-00001-of-00015.safetensors", "model.layers.0.attn_norm.weight": "model-00001-of-00015.safetensors", "model.layers.0.ffn.gate.tid2eid": "model-00001-of-00015.safetensors", "model.layers.0.ffn.gate.weight": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.down_proj.biases": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.down_proj.scales": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.down_proj.weight": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.gate_proj.biases": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.gate_proj.scales": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.gate_proj.weight": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.up_proj.biases": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.up_proj.scales": "model-00001-of-00015.safetensors", "model.layers.0.ffn.shared_experts.up_proj.weight": "model-00001-of-00015.safetensors", "model.layers.0.ffn.switch_mlp.down_proj.scales": "model-00001-of-00015.safetensors", "model.layers.0.ffn.switch_mlp.down_proj.weight": "model-00001-of-00015.safetensors", "model.layers.0.ffn.switch_mlp.gate_proj.scales": "model-00001-of-00015.safetensors", "model.layers.0.ffn.switch_mlp.gate_proj.weight": "model-00001-of-00015.safetensors", "model.layers.0.ffn.switch_mlp.up_proj.scales": "model-00001-of-00015.safetensors", "model.layers.0.ffn.switch_mlp.up_proj.weight": "model-00001-of-00015.safetensors", "model.layers.0.ffn_hc.base": "model-00001-of-00015.safetensors", "model.layers.0.ffn_hc.fn": "model-00001-of-00015.safetensors", "model.layers.0.ffn_hc.scale": "model-00001-of-00015.safetensors", "model.layers.0.ffn_norm.weight": "model-00001-of-00015.safetensors", "model.layers.1.attn.attn_sink": "model-00001-of-00015.safetensors", "model.layers.1.attn.kv_norm.weight": "model-00001-of-00015.safetensors", "model.layers.1.attn.q_norm.weight": "model-00001-of-00015.safetensors", "model.layers.1.attn.wkv.biases": "model-00001-of-00015.safetensors", "model.layers.1.attn.wkv.scales": "model-00001-of-00015.safetensors", "model.layers.1.attn.wkv.weight": "model-00001-of-00015.safetensors", "model.layers.1.attn.wo_a.biases": "model-00001-of-00015.safetensors", "model.layers.1.attn.wo_a.scales": "model-00001-of-00015.safetensors", "model.layers.1.attn.wo_a.weight": "model-00001-of-00015.safetensors", "model.layers.1.attn.wo_b.biases": "model-00001-of-00015.safetensors", "model.layers.1.attn.wo_b.scales": "model-00001-of-00015.safetensors", "model.layers.1.attn.wo_b.weight": "model-00001-of-00015.safetensors", "model.layers.1.attn.wq_a.biases": "model-00001-of-00015.safetensors", "model.layers.1.attn.wq_a.scales": "model-00001-of-00015.safetensors", "model.layers.1.attn.wq_a.weight": "model-00001-of-00015.safetensors", "model.layers.1.attn.wq_b.biases": "model-00001-of-00015.safetensors", "model.layers.1.attn.wq_b.scales": "model-00001-of-00015.safetensors", "model.layers.1.attn.wq_b.weight": "model-00001-of-00015.safetensors", "model.layers.1.attn_hc.base": "model-00001-of-00015.safetensors", "model.layers.1.attn_hc.fn": "model-00001-of-00015.safetensors", "model.layers.1.attn_hc.scale": "model-00001-of-00015.safetensors", "model.layers.1.attn_norm.weight": "model-00001-of-00015.safetensors", "model.layers.1.ffn.gate.tid2eid": "model-00001-of-00015.safetensors", "model.layers.1.ffn.gate.weight": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.down_proj.biases": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.down_proj.scales": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.down_proj.weight": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.gate_proj.biases": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.gate_proj.scales": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.gate_proj.weight": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.up_proj.biases": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.up_proj.scales": "model-00001-of-00015.safetensors", "model.layers.1.ffn.shared_experts.up_proj.weight": "model-00001-of-00015.safetensors", "model.layers.1.ffn.switch_mlp.down_proj.scales": "model-00001-of-00015.safetensors", "model.layers.1.ffn.switch_mlp.down_proj.weight": "model-00001-of-00015.safetensors", "model.layers.1.ffn.switch_mlp.gate_proj.scales": "model-00001-of-00015.safetensors", "model.layers.1.ffn.switch_mlp.gate_proj.weight": "model-00001-of-00015.safetensors", "model.layers.1.ffn.switch_mlp.up_proj.scales": "model-00001-of-00015.safetensors", "model.layers.1.ffn.switch_mlp.up_proj.weight": "model-00001-of-00015.safetensors", "model.layers.1.ffn_hc.base": "model-00001-of-00015.safetensors", "model.layers.1.ffn_hc.fn": "model-00001-of-00015.safetensors", "model.layers.1.ffn_hc.scale": "model-00001-of-00015.safetensors", "model.layers.1.ffn_norm.weight": "model-00001-of-00015.safetensors", "model.layers.10.attn.attn_sink": "model-00004-of-00015.safetensors", "model.layers.10.attn.compressor.ape": "model-00004-of-00015.safetensors", "model.layers.10.attn.compressor.norm.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.compressor.wgate.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.compressor.wgate.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.compressor.wgate.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.compressor.wkv.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.compressor.wkv.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.compressor.wkv.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.compressor.ape": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.compressor.norm.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.compressor.wgate.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.compressor.wgate.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.compressor.wgate.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.compressor.wkv.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.compressor.wkv.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.compressor.wkv.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.weights_proj.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.weights_proj.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.weights_proj.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.wq_b.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.wq_b.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.indexer.wq_b.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.kv_norm.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.q_norm.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.wkv.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.wkv.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.wkv.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.wo_a.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.wo_a.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.wo_a.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.wo_b.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.wo_b.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.wo_b.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.wq_a.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.wq_a.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.wq_a.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn.wq_b.biases": "model-00004-of-00015.safetensors", "model.layers.10.attn.wq_b.scales": "model-00004-of-00015.safetensors", "model.layers.10.attn.wq_b.weight": "model-00004-of-00015.safetensors", "model.layers.10.attn_hc.base": "model-00004-of-00015.safetensors", "model.layers.10.attn_hc.fn": "model-00004-of-00015.safetensors", "model.layers.10.attn_hc.scale": "model-00004-of-00015.safetensors", "model.layers.10.attn_norm.weight": "model-00004-of-00015.safetensors", "model.layers.10.ffn.gate.e_score_correction_bias": "model-00004-of-00015.safetensors", "model.layers.10.ffn.gate.weight": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.down_proj.biases": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.down_proj.scales": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.down_proj.weight": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.gate_proj.biases": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.gate_proj.scales": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.gate_proj.weight": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.up_proj.biases": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.up_proj.scales": "model-00004-of-00015.safetensors", "model.layers.10.ffn.shared_experts.up_proj.weight": "model-00004-of-00015.safetensors", "model.layers.10.ffn.switch_mlp.down_proj.scales": "model-00004-of-00015.safetensors", "model.layers.10.ffn.switch_mlp.down_proj.weight": "model-00004-of-00015.safetensors", "model.layers.10.ffn.switch_mlp.gate_proj.scales": "model-00004-of-00015.safetensors", "model.layers.10.ffn.switch_mlp.gate_proj.weight": "model-00004-of-00015.safetensors", "model.layers.10.ffn.switch_mlp.up_proj.scales": "model-00004-of-00015.safetensors", "model.layers.10.ffn.switch_mlp.up_proj.weight": "model-00004-of-00015.safetensors", "model.layers.10.ffn_hc.base": "model-00004-of-00015.safetensors", "model.layers.10.ffn_hc.fn": "model-00004-of-00015.safetensors", "model.layers.10.ffn_hc.scale": "model-00004-of-00015.safetensors", "model.layers.10.ffn_norm.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.attn_sink": "model-00004-of-00015.safetensors", "model.layers.11.attn.compressor.ape": "model-00004-of-00015.safetensors", "model.layers.11.attn.compressor.norm.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.compressor.wgate.biases": "model-00004-of-00015.safetensors", "model.layers.11.attn.compressor.wgate.scales": "model-00004-of-00015.safetensors", "model.layers.11.attn.compressor.wgate.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.compressor.wkv.biases": "model-00004-of-00015.safetensors", "model.layers.11.attn.compressor.wkv.scales": "model-00004-of-00015.safetensors", "model.layers.11.attn.compressor.wkv.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.kv_norm.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.q_norm.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.wkv.biases": "model-00004-of-00015.safetensors", "model.layers.11.attn.wkv.scales": "model-00004-of-00015.safetensors", "model.layers.11.attn.wkv.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.wo_a.biases": "model-00004-of-00015.safetensors", "model.layers.11.attn.wo_a.scales": "model-00004-of-00015.safetensors", "model.layers.11.attn.wo_a.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.wo_b.biases": "model-00004-of-00015.safetensors", "model.layers.11.attn.wo_b.scales": "model-00004-of-00015.safetensors", "model.layers.11.attn.wo_b.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.wq_a.biases": "model-00004-of-00015.safetensors", "model.layers.11.attn.wq_a.scales": "model-00004-of-00015.safetensors", "model.layers.11.attn.wq_a.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn.wq_b.biases": "model-00004-of-00015.safetensors", "model.layers.11.attn.wq_b.scales": "model-00004-of-00015.safetensors", "model.layers.11.attn.wq_b.weight": "model-00004-of-00015.safetensors", "model.layers.11.attn_hc.base": "model-00005-of-00015.safetensors", "model.layers.11.attn_hc.fn": "model-00005-of-00015.safetensors", "model.layers.11.attn_hc.scale": "model-00005-of-00015.safetensors", "model.layers.11.attn_norm.weight": "model-00005-of-00015.safetensors", "model.layers.11.ffn.gate.e_score_correction_bias": "model-00004-of-00015.safetensors", "model.layers.11.ffn.gate.weight": "model-00004-of-00015.safetensors", "model.layers.11.ffn.shared_experts.down_proj.biases": "model-00005-of-00015.safetensors", "model.layers.11.ffn.shared_experts.down_proj.scales": "model-00005-of-00015.safetensors", "model.layers.11.ffn.shared_experts.down_proj.weight": "model-00005-of-00015.safetensors", "model.layers.11.ffn.shared_experts.gate_proj.biases": "model-00005-of-00015.safetensors", "model.layers.11.ffn.shared_experts.gate_proj.scales": "model-00005-of-00015.safetensors", "model.layers.11.ffn.shared_experts.gate_proj.weight": "model-00005-of-00015.safetensors", "model.layers.11.ffn.shared_experts.up_proj.biases": "model-00005-of-00015.safetensors", "model.layers.11.ffn.shared_experts.up_proj.scales": "model-00005-of-00015.safetensors", "model.layers.11.ffn.shared_experts.up_proj.weight": "model-00005-of-00015.safetensors", "model.layers.11.ffn.switch_mlp.down_proj.scales": "model-00005-of-00015.safetensors", "model.layers.11.ffn.switch_mlp.down_proj.weight": "model-00005-of-00015.safetensors", "model.layers.11.ffn.switch_mlp.gate_proj.scales": "model-00005-of-00015.safetensors", "model.layers.11.ffn.switch_mlp.gate_proj.weight": "model-00004-of-00015.safetensors", "model.layers.11.ffn.switch_mlp.up_proj.scales": "model-00005-of-00015.safetensors", "model.layers.11.ffn.switch_mlp.up_proj.weight": "model-00005-of-00015.safetensors", "model.layers.11.ffn_hc.base": "model-00005-of-00015.safetensors", "model.layers.11.ffn_hc.fn": "model-00005-of-00015.safetensors", "model.layers.11.ffn_hc.scale": "model-00005-of-00015.safetensors", "model.layers.11.ffn_norm.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.attn_sink": "model-00005-of-00015.safetensors", "model.layers.12.attn.compressor.ape": "model-00005-of-00015.safetensors", "model.layers.12.attn.compressor.norm.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.compressor.wgate.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.compressor.wgate.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.compressor.wgate.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.compressor.wkv.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.compressor.wkv.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.compressor.wkv.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.compressor.ape": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.compressor.norm.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.compressor.wgate.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.compressor.wgate.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.compressor.wgate.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.compressor.wkv.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.compressor.wkv.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.compressor.wkv.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.weights_proj.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.weights_proj.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.weights_proj.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.wq_b.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.wq_b.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.indexer.wq_b.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.kv_norm.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.q_norm.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.wkv.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.wkv.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.wkv.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.wo_a.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.wo_a.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.wo_a.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.wo_b.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.wo_b.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.wo_b.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.wq_a.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.wq_a.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.wq_a.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn.wq_b.biases": "model-00005-of-00015.safetensors", "model.layers.12.attn.wq_b.scales": "model-00005-of-00015.safetensors", "model.layers.12.attn.wq_b.weight": "model-00005-of-00015.safetensors", "model.layers.12.attn_hc.base": "model-00005-of-00015.safetensors", "model.layers.12.attn_hc.fn": "model-00005-of-00015.safetensors", "model.layers.12.attn_hc.scale": "model-00005-of-00015.safetensors", "model.layers.12.attn_norm.weight": "model-00005-of-00015.safetensors", "model.layers.12.ffn.gate.e_score_correction_bias": "model-00005-of-00015.safetensors", "model.layers.12.ffn.gate.weight": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.down_proj.biases": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.down_proj.scales": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.down_proj.weight": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.gate_proj.biases": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.gate_proj.scales": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.gate_proj.weight": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.up_proj.biases": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.up_proj.scales": "model-00005-of-00015.safetensors", "model.layers.12.ffn.shared_experts.up_proj.weight": "model-00005-of-00015.safetensors", "model.layers.12.ffn.switch_mlp.down_proj.scales": "model-00005-of-00015.safetensors", "model.layers.12.ffn.switch_mlp.down_proj.weight": "model-00005-of-00015.safetensors", "model.layers.12.ffn.switch_mlp.gate_proj.scales": "model-00005-of-00015.safetensors", "model.layers.12.ffn.switch_mlp.gate_proj.weight": "model-00005-of-00015.safetensors", "model.layers.12.ffn.switch_mlp.up_proj.scales": "model-00005-of-00015.safetensors", "model.layers.12.ffn.switch_mlp.up_proj.weight": "model-00005-of-00015.safetensors", "model.layers.12.ffn_hc.base": "model-00005-of-00015.safetensors", "model.layers.12.ffn_hc.fn": "model-00005-of-00015.safetensors", "model.layers.12.ffn_hc.scale": "model-00005-of-00015.safetensors", "model.layers.12.ffn_norm.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.attn_sink": "model-00005-of-00015.safetensors", "model.layers.13.attn.compressor.ape": "model-00005-of-00015.safetensors", "model.layers.13.attn.compressor.norm.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.compressor.wgate.biases": "model-00005-of-00015.safetensors", "model.layers.13.attn.compressor.wgate.scales": "model-00005-of-00015.safetensors", "model.layers.13.attn.compressor.wgate.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.compressor.wkv.biases": "model-00005-of-00015.safetensors", "model.layers.13.attn.compressor.wkv.scales": "model-00005-of-00015.safetensors", "model.layers.13.attn.compressor.wkv.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.kv_norm.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.q_norm.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.wkv.biases": "model-00005-of-00015.safetensors", "model.layers.13.attn.wkv.scales": "model-00005-of-00015.safetensors", "model.layers.13.attn.wkv.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.wo_a.biases": "model-00005-of-00015.safetensors", "model.layers.13.attn.wo_a.scales": "model-00005-of-00015.safetensors", "model.layers.13.attn.wo_a.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.wo_b.biases": "model-00005-of-00015.safetensors", "model.layers.13.attn.wo_b.scales": "model-00005-of-00015.safetensors", "model.layers.13.attn.wo_b.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.wq_a.biases": "model-00005-of-00015.safetensors", "model.layers.13.attn.wq_a.scales": "model-00005-of-00015.safetensors", "model.layers.13.attn.wq_a.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn.wq_b.biases": "model-00005-of-00015.safetensors", "model.layers.13.attn.wq_b.scales": "model-00005-of-00015.safetensors", "model.layers.13.attn.wq_b.weight": "model-00005-of-00015.safetensors", "model.layers.13.attn_hc.base": "model-00005-of-00015.safetensors", "model.layers.13.attn_hc.fn": "model-00005-of-00015.safetensors", "model.layers.13.attn_hc.scale": "model-00005-of-00015.safetensors", "model.layers.13.attn_norm.weight": "model-00005-of-00015.safetensors", "model.layers.13.ffn.gate.e_score_correction_bias": "model-00005-of-00015.safetensors", "model.layers.13.ffn.gate.weight": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.down_proj.biases": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.down_proj.scales": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.down_proj.weight": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.gate_proj.biases": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.gate_proj.scales": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.gate_proj.weight": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.up_proj.biases": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.up_proj.scales": "model-00005-of-00015.safetensors", "model.layers.13.ffn.shared_experts.up_proj.weight": "model-00005-of-00015.safetensors", "model.layers.13.ffn.switch_mlp.down_proj.scales": "model-00005-of-00015.safetensors", "model.layers.13.ffn.switch_mlp.down_proj.weight": "model-00005-of-00015.safetensors", "model.layers.13.ffn.switch_mlp.gate_proj.scales": "model-00005-of-00015.safetensors", "model.layers.13.ffn.switch_mlp.gate_proj.weight": "model-00005-of-00015.safetensors", "model.layers.13.ffn.switch_mlp.up_proj.scales": "model-00005-of-00015.safetensors", "model.layers.13.ffn.switch_mlp.up_proj.weight": "model-00005-of-00015.safetensors", "model.layers.13.ffn_hc.base": "model-00005-of-00015.safetensors", "model.layers.13.ffn_hc.fn": "model-00005-of-00015.safetensors", "model.layers.13.ffn_hc.scale": "model-00005-of-00015.safetensors", "model.layers.13.ffn_norm.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.attn_sink": "model-00005-of-00015.safetensors", "model.layers.14.attn.compressor.ape": "model-00005-of-00015.safetensors", "model.layers.14.attn.compressor.norm.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.compressor.wgate.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.compressor.wgate.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.compressor.wgate.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.compressor.wkv.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.compressor.wkv.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.compressor.wkv.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.compressor.ape": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.compressor.norm.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.compressor.wgate.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.compressor.wgate.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.compressor.wgate.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.compressor.wkv.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.compressor.wkv.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.compressor.wkv.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.weights_proj.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.weights_proj.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.weights_proj.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.wq_b.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.wq_b.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.indexer.wq_b.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.kv_norm.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.q_norm.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.wkv.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.wkv.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.wkv.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.wo_a.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.wo_a.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.wo_a.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.wo_b.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.wo_b.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.wo_b.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.wq_a.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.wq_a.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.wq_a.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn.wq_b.biases": "model-00005-of-00015.safetensors", "model.layers.14.attn.wq_b.scales": "model-00005-of-00015.safetensors", "model.layers.14.attn.wq_b.weight": "model-00005-of-00015.safetensors", "model.layers.14.attn_hc.base": "model-00006-of-00015.safetensors", "model.layers.14.attn_hc.fn": "model-00006-of-00015.safetensors", "model.layers.14.attn_hc.scale": "model-00006-of-00015.safetensors", "model.layers.14.attn_norm.weight": "model-00006-of-00015.safetensors", "model.layers.14.ffn.gate.e_score_correction_bias": "model-00005-of-00015.safetensors", "model.layers.14.ffn.gate.weight": "model-00005-of-00015.safetensors", "model.layers.14.ffn.shared_experts.down_proj.biases": "model-00006-of-00015.safetensors", "model.layers.14.ffn.shared_experts.down_proj.scales": "model-00006-of-00015.safetensors", "model.layers.14.ffn.shared_experts.down_proj.weight": "model-00006-of-00015.safetensors", "model.layers.14.ffn.shared_experts.gate_proj.biases": "model-00006-of-00015.safetensors", "model.layers.14.ffn.shared_experts.gate_proj.scales": "model-00006-of-00015.safetensors", "model.layers.14.ffn.shared_experts.gate_proj.weight": "model-00006-of-00015.safetensors", "model.layers.14.ffn.shared_experts.up_proj.biases": "model-00006-of-00015.safetensors", "model.layers.14.ffn.shared_experts.up_proj.scales": "model-00006-of-00015.safetensors", "model.layers.14.ffn.shared_experts.up_proj.weight": "model-00006-of-00015.safetensors", "model.layers.14.ffn.switch_mlp.down_proj.scales": "model-00006-of-00015.safetensors", "model.layers.14.ffn.switch_mlp.down_proj.weight": "model-00006-of-00015.safetensors", "model.layers.14.ffn.switch_mlp.gate_proj.scales": "model-00006-of-00015.safetensors", "model.layers.14.ffn.switch_mlp.gate_proj.weight": "model-00006-of-00015.safetensors", "model.layers.14.ffn.switch_mlp.up_proj.scales": "model-00006-of-00015.safetensors", "model.layers.14.ffn.switch_mlp.up_proj.weight": "model-00006-of-00015.safetensors", "model.layers.14.ffn_hc.base": "model-00006-of-00015.safetensors", "model.layers.14.ffn_hc.fn": "model-00006-of-00015.safetensors", "model.layers.14.ffn_hc.scale": "model-00006-of-00015.safetensors", "model.layers.14.ffn_norm.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.attn_sink": "model-00006-of-00015.safetensors", "model.layers.15.attn.compressor.ape": "model-00006-of-00015.safetensors", "model.layers.15.attn.compressor.norm.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.compressor.wgate.biases": "model-00006-of-00015.safetensors", "model.layers.15.attn.compressor.wgate.scales": "model-00006-of-00015.safetensors", "model.layers.15.attn.compressor.wgate.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.compressor.wkv.biases": "model-00006-of-00015.safetensors", "model.layers.15.attn.compressor.wkv.scales": "model-00006-of-00015.safetensors", "model.layers.15.attn.compressor.wkv.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.kv_norm.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.q_norm.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.wkv.biases": "model-00006-of-00015.safetensors", "model.layers.15.attn.wkv.scales": "model-00006-of-00015.safetensors", "model.layers.15.attn.wkv.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.wo_a.biases": "model-00006-of-00015.safetensors", "model.layers.15.attn.wo_a.scales": "model-00006-of-00015.safetensors", "model.layers.15.attn.wo_a.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.wo_b.biases": "model-00006-of-00015.safetensors", "model.layers.15.attn.wo_b.scales": "model-00006-of-00015.safetensors", "model.layers.15.attn.wo_b.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.wq_a.biases": "model-00006-of-00015.safetensors", "model.layers.15.attn.wq_a.scales": "model-00006-of-00015.safetensors", "model.layers.15.attn.wq_a.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn.wq_b.biases": "model-00006-of-00015.safetensors", "model.layers.15.attn.wq_b.scales": "model-00006-of-00015.safetensors", "model.layers.15.attn.wq_b.weight": "model-00006-of-00015.safetensors", "model.layers.15.attn_hc.base": "model-00006-of-00015.safetensors", "model.layers.15.attn_hc.fn": "model-00006-of-00015.safetensors", "model.layers.15.attn_hc.scale": "model-00006-of-00015.safetensors", "model.layers.15.attn_norm.weight": "model-00006-of-00015.safetensors", "model.layers.15.ffn.gate.e_score_correction_bias": "model-00006-of-00015.safetensors", "model.layers.15.ffn.gate.weight": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.down_proj.biases": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.down_proj.scales": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.down_proj.weight": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.gate_proj.biases": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.gate_proj.scales": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.gate_proj.weight": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.up_proj.biases": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.up_proj.scales": "model-00006-of-00015.safetensors", "model.layers.15.ffn.shared_experts.up_proj.weight": "model-00006-of-00015.safetensors", "model.layers.15.ffn.switch_mlp.down_proj.scales": "model-00006-of-00015.safetensors", "model.layers.15.ffn.switch_mlp.down_proj.weight": "model-00006-of-00015.safetensors", "model.layers.15.ffn.switch_mlp.gate_proj.scales": "model-00006-of-00015.safetensors", "model.layers.15.ffn.switch_mlp.gate_proj.weight": "model-00006-of-00015.safetensors", "model.layers.15.ffn.switch_mlp.up_proj.scales": "model-00006-of-00015.safetensors", "model.layers.15.ffn.switch_mlp.up_proj.weight": "model-00006-of-00015.safetensors", "model.layers.15.ffn_hc.base": "model-00006-of-00015.safetensors", "model.layers.15.ffn_hc.fn": "model-00006-of-00015.safetensors", "model.layers.15.ffn_hc.scale": "model-00006-of-00015.safetensors", "model.layers.15.ffn_norm.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.attn_sink": "model-00006-of-00015.safetensors", "model.layers.16.attn.compressor.ape": "model-00006-of-00015.safetensors", "model.layers.16.attn.compressor.norm.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.compressor.wgate.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.compressor.wgate.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.compressor.wgate.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.compressor.wkv.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.compressor.wkv.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.compressor.wkv.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.compressor.ape": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.compressor.norm.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.compressor.wgate.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.compressor.wgate.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.compressor.wgate.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.compressor.wkv.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.compressor.wkv.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.compressor.wkv.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.weights_proj.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.weights_proj.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.weights_proj.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.wq_b.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.wq_b.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.indexer.wq_b.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.kv_norm.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.q_norm.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.wkv.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.wkv.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.wkv.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.wo_a.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.wo_a.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.wo_a.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.wo_b.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.wo_b.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.wo_b.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.wq_a.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.wq_a.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.wq_a.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn.wq_b.biases": "model-00006-of-00015.safetensors", "model.layers.16.attn.wq_b.scales": "model-00006-of-00015.safetensors", "model.layers.16.attn.wq_b.weight": "model-00006-of-00015.safetensors", "model.layers.16.attn_hc.base": "model-00006-of-00015.safetensors", "model.layers.16.attn_hc.fn": "model-00006-of-00015.safetensors", "model.layers.16.attn_hc.scale": "model-00006-of-00015.safetensors", "model.layers.16.attn_norm.weight": "model-00006-of-00015.safetensors", "model.layers.16.ffn.gate.e_score_correction_bias": "model-00006-of-00015.safetensors", "model.layers.16.ffn.gate.weight": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.down_proj.biases": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.down_proj.scales": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.down_proj.weight": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.gate_proj.biases": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.gate_proj.scales": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.gate_proj.weight": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.up_proj.biases": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.up_proj.scales": "model-00006-of-00015.safetensors", "model.layers.16.ffn.shared_experts.up_proj.weight": "model-00006-of-00015.safetensors", "model.layers.16.ffn.switch_mlp.down_proj.scales": "model-00006-of-00015.safetensors", "model.layers.16.ffn.switch_mlp.down_proj.weight": "model-00006-of-00015.safetensors", "model.layers.16.ffn.switch_mlp.gate_proj.scales": "model-00006-of-00015.safetensors", "model.layers.16.ffn.switch_mlp.gate_proj.weight": "model-00006-of-00015.safetensors", "model.layers.16.ffn.switch_mlp.up_proj.scales": "model-00006-of-00015.safetensors", "model.layers.16.ffn.switch_mlp.up_proj.weight": "model-00006-of-00015.safetensors", "model.layers.16.ffn_hc.base": "model-00006-of-00015.safetensors", "model.layers.16.ffn_hc.fn": "model-00006-of-00015.safetensors", "model.layers.16.ffn_hc.scale": "model-00006-of-00015.safetensors", "model.layers.16.ffn_norm.weight": "model-00006-of-00015.safetensors", "model.layers.17.attn.attn_sink": "model-00007-of-00015.safetensors", "model.layers.17.attn.compressor.ape": "model-00007-of-00015.safetensors", "model.layers.17.attn.compressor.norm.weight": "model-00007-of-00015.safetensors", "model.layers.17.attn.compressor.wgate.biases": "model-00007-of-00015.safetensors", "model.layers.17.attn.compressor.wgate.scales": "model-00007-of-00015.safetensors", "model.layers.17.attn.compressor.wgate.weight": "model-00007-of-00015.safetensors", "model.layers.17.attn.compressor.wkv.biases": "model-00007-of-00015.safetensors", "model.layers.17.attn.compressor.wkv.scales": "model-00007-of-00015.safetensors", "model.layers.17.attn.compressor.wkv.weight": "model-00007-of-00015.safetensors", "model.layers.17.attn.kv_norm.weight": "model-00006-of-00015.safetensors", "model.layers.17.attn.q_norm.weight": "model-00006-of-00015.safetensors", "model.layers.17.attn.wkv.biases": "model-00006-of-00015.safetensors", "model.layers.17.attn.wkv.scales": "model-00006-of-00015.safetensors", "model.layers.17.attn.wkv.weight": "model-00006-of-00015.safetensors", "model.layers.17.attn.wo_a.biases": "model-00006-of-00015.safetensors", "model.layers.17.attn.wo_a.scales": "model-00006-of-00015.safetensors", "model.layers.17.attn.wo_a.weight": "model-00006-of-00015.safetensors", "model.layers.17.attn.wo_b.biases": "model-00007-of-00015.safetensors", "model.layers.17.attn.wo_b.scales": "model-00007-of-00015.safetensors", "model.layers.17.attn.wo_b.weight": "model-00007-of-00015.safetensors", "model.layers.17.attn.wq_a.biases": "model-00006-of-00015.safetensors", "model.layers.17.attn.wq_a.scales": "model-00006-of-00015.safetensors", "model.layers.17.attn.wq_a.weight": "model-00006-of-00015.safetensors", "model.layers.17.attn.wq_b.biases": "model-00006-of-00015.safetensors", "model.layers.17.attn.wq_b.scales": "model-00006-of-00015.safetensors", "model.layers.17.attn.wq_b.weight": "model-00006-of-00015.safetensors", "model.layers.17.attn_hc.base": "model-00007-of-00015.safetensors", "model.layers.17.attn_hc.fn": "model-00007-of-00015.safetensors", "model.layers.17.attn_hc.scale": "model-00007-of-00015.safetensors", "model.layers.17.attn_norm.weight": "model-00007-of-00015.safetensors", "model.layers.17.ffn.gate.e_score_correction_bias": "model-00007-of-00015.safetensors", "model.layers.17.ffn.gate.weight": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.down_proj.biases": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.down_proj.scales": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.down_proj.weight": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.gate_proj.biases": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.gate_proj.scales": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.gate_proj.weight": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.up_proj.biases": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.up_proj.scales": "model-00007-of-00015.safetensors", "model.layers.17.ffn.shared_experts.up_proj.weight": "model-00007-of-00015.safetensors", "model.layers.17.ffn.switch_mlp.down_proj.scales": "model-00007-of-00015.safetensors", "model.layers.17.ffn.switch_mlp.down_proj.weight": "model-00007-of-00015.safetensors", "model.layers.17.ffn.switch_mlp.gate_proj.scales": "model-00007-of-00015.safetensors", "model.layers.17.ffn.switch_mlp.gate_proj.weight": "model-00007-of-00015.safetensors", "model.layers.17.ffn.switch_mlp.up_proj.scales": "model-00007-of-00015.safetensors", "model.layers.17.ffn.switch_mlp.up_proj.weight": "model-00007-of-00015.safetensors", "model.layers.17.ffn_hc.base": "model-00007-of-00015.safetensors", "model.layers.17.ffn_hc.fn": "model-00007-of-00015.safetensors", "model.layers.17.ffn_hc.scale": "model-00007-of-00015.safetensors", "model.layers.17.ffn_norm.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.attn_sink": "model-00007-of-00015.safetensors", "model.layers.18.attn.compressor.ape": "model-00007-of-00015.safetensors", "model.layers.18.attn.compressor.norm.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.compressor.wgate.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.compressor.wgate.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.compressor.wgate.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.compressor.wkv.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.compressor.wkv.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.compressor.wkv.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.compressor.ape": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.compressor.norm.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.compressor.wgate.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.compressor.wgate.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.compressor.wgate.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.compressor.wkv.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.compressor.wkv.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.compressor.wkv.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.weights_proj.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.weights_proj.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.weights_proj.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.wq_b.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.wq_b.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.indexer.wq_b.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.kv_norm.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.q_norm.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.wkv.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.wkv.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.wkv.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.wo_a.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.wo_a.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.wo_a.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.wo_b.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.wo_b.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.wo_b.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.wq_a.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.wq_a.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.wq_a.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn.wq_b.biases": "model-00007-of-00015.safetensors", "model.layers.18.attn.wq_b.scales": "model-00007-of-00015.safetensors", "model.layers.18.attn.wq_b.weight": "model-00007-of-00015.safetensors", "model.layers.18.attn_hc.base": "model-00007-of-00015.safetensors", "model.layers.18.attn_hc.fn": "model-00007-of-00015.safetensors", "model.layers.18.attn_hc.scale": "model-00007-of-00015.safetensors", "model.layers.18.attn_norm.weight": "model-00007-of-00015.safetensors", "model.layers.18.ffn.gate.e_score_correction_bias": "model-00007-of-00015.safetensors", "model.layers.18.ffn.gate.weight": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.down_proj.biases": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.down_proj.scales": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.down_proj.weight": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.gate_proj.biases": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.gate_proj.scales": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.gate_proj.weight": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.up_proj.biases": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.up_proj.scales": "model-00007-of-00015.safetensors", "model.layers.18.ffn.shared_experts.up_proj.weight": "model-00007-of-00015.safetensors", "model.layers.18.ffn.switch_mlp.down_proj.scales": "model-00007-of-00015.safetensors", "model.layers.18.ffn.switch_mlp.down_proj.weight": "model-00007-of-00015.safetensors", "model.layers.18.ffn.switch_mlp.gate_proj.scales": "model-00007-of-00015.safetensors", "model.layers.18.ffn.switch_mlp.gate_proj.weight": "model-00007-of-00015.safetensors", "model.layers.18.ffn.switch_mlp.up_proj.scales": "model-00007-of-00015.safetensors", "model.layers.18.ffn.switch_mlp.up_proj.weight": "model-00007-of-00015.safetensors", "model.layers.18.ffn_hc.base": "model-00007-of-00015.safetensors", "model.layers.18.ffn_hc.fn": "model-00007-of-00015.safetensors", "model.layers.18.ffn_hc.scale": "model-00007-of-00015.safetensors", "model.layers.18.ffn_norm.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.attn_sink": "model-00007-of-00015.safetensors", "model.layers.19.attn.compressor.ape": "model-00007-of-00015.safetensors", "model.layers.19.attn.compressor.norm.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.compressor.wgate.biases": "model-00007-of-00015.safetensors", "model.layers.19.attn.compressor.wgate.scales": "model-00007-of-00015.safetensors", "model.layers.19.attn.compressor.wgate.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.compressor.wkv.biases": "model-00007-of-00015.safetensors", "model.layers.19.attn.compressor.wkv.scales": "model-00007-of-00015.safetensors", "model.layers.19.attn.compressor.wkv.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.kv_norm.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.q_norm.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.wkv.biases": "model-00007-of-00015.safetensors", "model.layers.19.attn.wkv.scales": "model-00007-of-00015.safetensors", "model.layers.19.attn.wkv.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.wo_a.biases": "model-00007-of-00015.safetensors", "model.layers.19.attn.wo_a.scales": "model-00007-of-00015.safetensors", "model.layers.19.attn.wo_a.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.wo_b.biases": "model-00007-of-00015.safetensors", "model.layers.19.attn.wo_b.scales": "model-00007-of-00015.safetensors", "model.layers.19.attn.wo_b.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.wq_a.biases": "model-00007-of-00015.safetensors", "model.layers.19.attn.wq_a.scales": "model-00007-of-00015.safetensors", "model.layers.19.attn.wq_a.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn.wq_b.biases": "model-00007-of-00015.safetensors", "model.layers.19.attn.wq_b.scales": "model-00007-of-00015.safetensors", "model.layers.19.attn.wq_b.weight": "model-00007-of-00015.safetensors", "model.layers.19.attn_hc.base": "model-00007-of-00015.safetensors", "model.layers.19.attn_hc.fn": "model-00007-of-00015.safetensors", "model.layers.19.attn_hc.scale": "model-00007-of-00015.safetensors", "model.layers.19.attn_norm.weight": "model-00007-of-00015.safetensors", "model.layers.19.ffn.gate.e_score_correction_bias": "model-00007-of-00015.safetensors", "model.layers.19.ffn.gate.weight": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.down_proj.biases": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.down_proj.scales": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.down_proj.weight": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.gate_proj.biases": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.gate_proj.scales": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.gate_proj.weight": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.up_proj.biases": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.up_proj.scales": "model-00007-of-00015.safetensors", "model.layers.19.ffn.shared_experts.up_proj.weight": "model-00007-of-00015.safetensors", "model.layers.19.ffn.switch_mlp.down_proj.scales": "model-00007-of-00015.safetensors", "model.layers.19.ffn.switch_mlp.down_proj.weight": "model-00007-of-00015.safetensors", "model.layers.19.ffn.switch_mlp.gate_proj.scales": "model-00007-of-00015.safetensors", "model.layers.19.ffn.switch_mlp.gate_proj.weight": "model-00007-of-00015.safetensors", "model.layers.19.ffn.switch_mlp.up_proj.scales": "model-00007-of-00015.safetensors", "model.layers.19.ffn.switch_mlp.up_proj.weight": "model-00007-of-00015.safetensors", "model.layers.19.ffn_hc.base": "model-00007-of-00015.safetensors", "model.layers.19.ffn_hc.fn": "model-00007-of-00015.safetensors", "model.layers.19.ffn_hc.scale": "model-00007-of-00015.safetensors", "model.layers.19.ffn_norm.weight": "model-00007-of-00015.safetensors", "model.layers.2.attn.attn_sink": "model-00001-of-00015.safetensors", "model.layers.2.attn.compressor.ape": "model-00001-of-00015.safetensors", "model.layers.2.attn.compressor.norm.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.compressor.wgate.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.compressor.wgate.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.compressor.wgate.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.compressor.wkv.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.compressor.wkv.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.compressor.wkv.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.compressor.ape": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.compressor.norm.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.compressor.wgate.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.compressor.wgate.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.compressor.wgate.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.compressor.wkv.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.compressor.wkv.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.compressor.wkv.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.weights_proj.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.weights_proj.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.weights_proj.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.wq_b.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.wq_b.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.indexer.wq_b.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.kv_norm.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.q_norm.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.wkv.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.wkv.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.wkv.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.wo_a.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.wo_a.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.wo_a.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.wo_b.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.wo_b.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.wo_b.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.wq_a.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.wq_a.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.wq_a.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn.wq_b.biases": "model-00001-of-00015.safetensors", "model.layers.2.attn.wq_b.scales": "model-00001-of-00015.safetensors", "model.layers.2.attn.wq_b.weight": "model-00001-of-00015.safetensors", "model.layers.2.attn_hc.base": "model-00002-of-00015.safetensors", "model.layers.2.attn_hc.fn": "model-00002-of-00015.safetensors", "model.layers.2.attn_hc.scale": "model-00002-of-00015.safetensors", "model.layers.2.attn_norm.weight": "model-00002-of-00015.safetensors", "model.layers.2.ffn.gate.tid2eid": "model-00001-of-00015.safetensors", "model.layers.2.ffn.gate.weight": "model-00001-of-00015.safetensors", "model.layers.2.ffn.shared_experts.down_proj.biases": "model-00002-of-00015.safetensors", "model.layers.2.ffn.shared_experts.down_proj.scales": "model-00002-of-00015.safetensors", "model.layers.2.ffn.shared_experts.down_proj.weight": "model-00002-of-00015.safetensors", "model.layers.2.ffn.shared_experts.gate_proj.biases": "model-00002-of-00015.safetensors", "model.layers.2.ffn.shared_experts.gate_proj.scales": "model-00002-of-00015.safetensors", "model.layers.2.ffn.shared_experts.gate_proj.weight": "model-00002-of-00015.safetensors", "model.layers.2.ffn.shared_experts.up_proj.biases": "model-00002-of-00015.safetensors", "model.layers.2.ffn.shared_experts.up_proj.scales": "model-00002-of-00015.safetensors", "model.layers.2.ffn.shared_experts.up_proj.weight": "model-00002-of-00015.safetensors", "model.layers.2.ffn.switch_mlp.down_proj.scales": "model-00002-of-00015.safetensors", "model.layers.2.ffn.switch_mlp.down_proj.weight": "model-00002-of-00015.safetensors", "model.layers.2.ffn.switch_mlp.gate_proj.scales": "model-00001-of-00015.safetensors", "model.layers.2.ffn.switch_mlp.gate_proj.weight": "model-00001-of-00015.safetensors", "model.layers.2.ffn.switch_mlp.up_proj.scales": "model-00001-of-00015.safetensors", "model.layers.2.ffn.switch_mlp.up_proj.weight": "model-00001-of-00015.safetensors", "model.layers.2.ffn_hc.base": "model-00002-of-00015.safetensors", "model.layers.2.ffn_hc.fn": "model-00002-of-00015.safetensors", "model.layers.2.ffn_hc.scale": "model-00002-of-00015.safetensors", "model.layers.2.ffn_norm.weight": "model-00002-of-00015.safetensors", "model.layers.20.attn.attn_sink": "model-00008-of-00015.safetensors", "model.layers.20.attn.compressor.ape": "model-00008-of-00015.safetensors", "model.layers.20.attn.compressor.norm.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.compressor.wgate.biases": "model-00008-of-00015.safetensors", "model.layers.20.attn.compressor.wgate.scales": "model-00008-of-00015.safetensors", "model.layers.20.attn.compressor.wgate.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.compressor.wkv.biases": "model-00008-of-00015.safetensors", "model.layers.20.attn.compressor.wkv.scales": "model-00008-of-00015.safetensors", "model.layers.20.attn.compressor.wkv.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.compressor.ape": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.compressor.norm.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.compressor.wgate.biases": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.compressor.wgate.scales": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.compressor.wgate.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.compressor.wkv.biases": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.compressor.wkv.scales": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.compressor.wkv.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.weights_proj.biases": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.weights_proj.scales": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.weights_proj.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.wq_b.biases": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.wq_b.scales": "model-00008-of-00015.safetensors", "model.layers.20.attn.indexer.wq_b.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.kv_norm.weight": "model-00007-of-00015.safetensors", "model.layers.20.attn.q_norm.weight": "model-00007-of-00015.safetensors", "model.layers.20.attn.wkv.biases": "model-00007-of-00015.safetensors", "model.layers.20.attn.wkv.scales": "model-00007-of-00015.safetensors", "model.layers.20.attn.wkv.weight": "model-00007-of-00015.safetensors", "model.layers.20.attn.wo_a.biases": "model-00008-of-00015.safetensors", "model.layers.20.attn.wo_a.scales": "model-00008-of-00015.safetensors", "model.layers.20.attn.wo_a.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.wo_b.biases": "model-00008-of-00015.safetensors", "model.layers.20.attn.wo_b.scales": "model-00008-of-00015.safetensors", "model.layers.20.attn.wo_b.weight": "model-00008-of-00015.safetensors", "model.layers.20.attn.wq_a.biases": "model-00007-of-00015.safetensors", "model.layers.20.attn.wq_a.scales": "model-00007-of-00015.safetensors", "model.layers.20.attn.wq_a.weight": "model-00007-of-00015.safetensors", "model.layers.20.attn.wq_b.biases": "model-00007-of-00015.safetensors", "model.layers.20.attn.wq_b.scales": "model-00007-of-00015.safetensors", "model.layers.20.attn.wq_b.weight": "model-00007-of-00015.safetensors", "model.layers.20.attn_hc.base": "model-00008-of-00015.safetensors", "model.layers.20.attn_hc.fn": "model-00008-of-00015.safetensors", "model.layers.20.attn_hc.scale": "model-00008-of-00015.safetensors", "model.layers.20.attn_norm.weight": "model-00008-of-00015.safetensors", "model.layers.20.ffn.gate.e_score_correction_bias": "model-00008-of-00015.safetensors", "model.layers.20.ffn.gate.weight": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.down_proj.biases": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.down_proj.scales": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.down_proj.weight": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.gate_proj.biases": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.gate_proj.scales": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.gate_proj.weight": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.up_proj.biases": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.up_proj.scales": "model-00008-of-00015.safetensors", "model.layers.20.ffn.shared_experts.up_proj.weight": "model-00008-of-00015.safetensors", "model.layers.20.ffn.switch_mlp.down_proj.scales": "model-00008-of-00015.safetensors", "model.layers.20.ffn.switch_mlp.down_proj.weight": "model-00008-of-00015.safetensors", "model.layers.20.ffn.switch_mlp.gate_proj.scales": "model-00008-of-00015.safetensors", "model.layers.20.ffn.switch_mlp.gate_proj.weight": "model-00008-of-00015.safetensors", "model.layers.20.ffn.switch_mlp.up_proj.scales": "model-00008-of-00015.safetensors", "model.layers.20.ffn.switch_mlp.up_proj.weight": "model-00008-of-00015.safetensors", "model.layers.20.ffn_hc.base": "model-00008-of-00015.safetensors", "model.layers.20.ffn_hc.fn": "model-00008-of-00015.safetensors", "model.layers.20.ffn_hc.scale": "model-00008-of-00015.safetensors", "model.layers.20.ffn_norm.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.attn_sink": "model-00008-of-00015.safetensors", "model.layers.21.attn.compressor.ape": "model-00008-of-00015.safetensors", "model.layers.21.attn.compressor.norm.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.compressor.wgate.biases": "model-00008-of-00015.safetensors", "model.layers.21.attn.compressor.wgate.scales": "model-00008-of-00015.safetensors", "model.layers.21.attn.compressor.wgate.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.compressor.wkv.biases": "model-00008-of-00015.safetensors", "model.layers.21.attn.compressor.wkv.scales": "model-00008-of-00015.safetensors", "model.layers.21.attn.compressor.wkv.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.kv_norm.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.q_norm.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.wkv.biases": "model-00008-of-00015.safetensors", "model.layers.21.attn.wkv.scales": "model-00008-of-00015.safetensors", "model.layers.21.attn.wkv.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.wo_a.biases": "model-00008-of-00015.safetensors", "model.layers.21.attn.wo_a.scales": "model-00008-of-00015.safetensors", "model.layers.21.attn.wo_a.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.wo_b.biases": "model-00008-of-00015.safetensors", "model.layers.21.attn.wo_b.scales": "model-00008-of-00015.safetensors", "model.layers.21.attn.wo_b.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.wq_a.biases": "model-00008-of-00015.safetensors", "model.layers.21.attn.wq_a.scales": "model-00008-of-00015.safetensors", "model.layers.21.attn.wq_a.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn.wq_b.biases": "model-00008-of-00015.safetensors", "model.layers.21.attn.wq_b.scales": "model-00008-of-00015.safetensors", "model.layers.21.attn.wq_b.weight": "model-00008-of-00015.safetensors", "model.layers.21.attn_hc.base": "model-00008-of-00015.safetensors", "model.layers.21.attn_hc.fn": "model-00008-of-00015.safetensors", "model.layers.21.attn_hc.scale": "model-00008-of-00015.safetensors", "model.layers.21.attn_norm.weight": "model-00008-of-00015.safetensors", "model.layers.21.ffn.gate.e_score_correction_bias": "model-00008-of-00015.safetensors", "model.layers.21.ffn.gate.weight": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.down_proj.biases": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.down_proj.scales": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.down_proj.weight": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.gate_proj.biases": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.gate_proj.scales": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.gate_proj.weight": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.up_proj.biases": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.up_proj.scales": "model-00008-of-00015.safetensors", "model.layers.21.ffn.shared_experts.up_proj.weight": "model-00008-of-00015.safetensors", "model.layers.21.ffn.switch_mlp.down_proj.scales": "model-00008-of-00015.safetensors", "model.layers.21.ffn.switch_mlp.down_proj.weight": "model-00008-of-00015.safetensors", "model.layers.21.ffn.switch_mlp.gate_proj.scales": "model-00008-of-00015.safetensors", "model.layers.21.ffn.switch_mlp.gate_proj.weight": "model-00008-of-00015.safetensors", "model.layers.21.ffn.switch_mlp.up_proj.scales": "model-00008-of-00015.safetensors", "model.layers.21.ffn.switch_mlp.up_proj.weight": "model-00008-of-00015.safetensors", "model.layers.21.ffn_hc.base": "model-00008-of-00015.safetensors", "model.layers.21.ffn_hc.fn": "model-00008-of-00015.safetensors", "model.layers.21.ffn_hc.scale": "model-00008-of-00015.safetensors", "model.layers.21.ffn_norm.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.attn_sink": "model-00008-of-00015.safetensors", "model.layers.22.attn.compressor.ape": "model-00008-of-00015.safetensors", "model.layers.22.attn.compressor.norm.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.compressor.wgate.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.compressor.wgate.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.compressor.wgate.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.compressor.wkv.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.compressor.wkv.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.compressor.wkv.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.compressor.ape": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.compressor.norm.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.compressor.wgate.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.compressor.wgate.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.compressor.wgate.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.compressor.wkv.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.compressor.wkv.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.compressor.wkv.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.weights_proj.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.weights_proj.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.weights_proj.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.wq_b.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.wq_b.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.indexer.wq_b.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.kv_norm.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.q_norm.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.wkv.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.wkv.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.wkv.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.wo_a.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.wo_a.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.wo_a.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.wo_b.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.wo_b.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.wo_b.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.wq_a.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.wq_a.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.wq_a.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn.wq_b.biases": "model-00008-of-00015.safetensors", "model.layers.22.attn.wq_b.scales": "model-00008-of-00015.safetensors", "model.layers.22.attn.wq_b.weight": "model-00008-of-00015.safetensors", "model.layers.22.attn_hc.base": "model-00008-of-00015.safetensors", "model.layers.22.attn_hc.fn": "model-00008-of-00015.safetensors", "model.layers.22.attn_hc.scale": "model-00008-of-00015.safetensors", "model.layers.22.attn_norm.weight": "model-00008-of-00015.safetensors", "model.layers.22.ffn.gate.e_score_correction_bias": "model-00008-of-00015.safetensors", "model.layers.22.ffn.gate.weight": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.down_proj.biases": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.down_proj.scales": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.down_proj.weight": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.gate_proj.biases": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.gate_proj.scales": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.gate_proj.weight": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.up_proj.biases": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.up_proj.scales": "model-00008-of-00015.safetensors", "model.layers.22.ffn.shared_experts.up_proj.weight": "model-00008-of-00015.safetensors", "model.layers.22.ffn.switch_mlp.down_proj.scales": "model-00008-of-00015.safetensors", "model.layers.22.ffn.switch_mlp.down_proj.weight": "model-00008-of-00015.safetensors", "model.layers.22.ffn.switch_mlp.gate_proj.scales": "model-00008-of-00015.safetensors", "model.layers.22.ffn.switch_mlp.gate_proj.weight": "model-00008-of-00015.safetensors", "model.layers.22.ffn.switch_mlp.up_proj.scales": "model-00008-of-00015.safetensors", "model.layers.22.ffn.switch_mlp.up_proj.weight": "model-00008-of-00015.safetensors", "model.layers.22.ffn_hc.base": "model-00008-of-00015.safetensors", "model.layers.22.ffn_hc.fn": "model-00008-of-00015.safetensors", "model.layers.22.ffn_hc.scale": "model-00008-of-00015.safetensors", "model.layers.22.ffn_norm.weight": "model-00008-of-00015.safetensors", "model.layers.23.attn.attn_sink": "model-00009-of-00015.safetensors", "model.layers.23.attn.compressor.ape": "model-00009-of-00015.safetensors", "model.layers.23.attn.compressor.norm.weight": "model-00009-of-00015.safetensors", "model.layers.23.attn.compressor.wgate.biases": "model-00009-of-00015.safetensors", "model.layers.23.attn.compressor.wgate.scales": "model-00009-of-00015.safetensors", "model.layers.23.attn.compressor.wgate.weight": "model-00009-of-00015.safetensors", "model.layers.23.attn.compressor.wkv.biases": "model-00009-of-00015.safetensors", "model.layers.23.attn.compressor.wkv.scales": "model-00009-of-00015.safetensors", "model.layers.23.attn.compressor.wkv.weight": "model-00009-of-00015.safetensors", "model.layers.23.attn.kv_norm.weight": "model-00009-of-00015.safetensors", "model.layers.23.attn.q_norm.weight": "model-00008-of-00015.safetensors", "model.layers.23.attn.wkv.biases": "model-00009-of-00015.safetensors", "model.layers.23.attn.wkv.scales": "model-00009-of-00015.safetensors", "model.layers.23.attn.wkv.weight": "model-00009-of-00015.safetensors", "model.layers.23.attn.wo_a.biases": "model-00009-of-00015.safetensors", "model.layers.23.attn.wo_a.scales": "model-00009-of-00015.safetensors", "model.layers.23.attn.wo_a.weight": "model-00009-of-00015.safetensors", "model.layers.23.attn.wo_b.biases": "model-00009-of-00015.safetensors", "model.layers.23.attn.wo_b.scales": "model-00009-of-00015.safetensors", "model.layers.23.attn.wo_b.weight": "model-00009-of-00015.safetensors", "model.layers.23.attn.wq_a.biases": "model-00008-of-00015.safetensors", "model.layers.23.attn.wq_a.scales": "model-00008-of-00015.safetensors", "model.layers.23.attn.wq_a.weight": "model-00008-of-00015.safetensors", "model.layers.23.attn.wq_b.biases": "model-00009-of-00015.safetensors", "model.layers.23.attn.wq_b.scales": "model-00009-of-00015.safetensors", "model.layers.23.attn.wq_b.weight": "model-00009-of-00015.safetensors", "model.layers.23.attn_hc.base": "model-00009-of-00015.safetensors", "model.layers.23.attn_hc.fn": "model-00009-of-00015.safetensors", "model.layers.23.attn_hc.scale": "model-00009-of-00015.safetensors", "model.layers.23.attn_norm.weight": "model-00009-of-00015.safetensors", "model.layers.23.ffn.gate.e_score_correction_bias": "model-00009-of-00015.safetensors", "model.layers.23.ffn.gate.weight": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.down_proj.biases": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.down_proj.scales": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.down_proj.weight": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.gate_proj.biases": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.gate_proj.scales": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.gate_proj.weight": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.up_proj.biases": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.up_proj.scales": "model-00009-of-00015.safetensors", "model.layers.23.ffn.shared_experts.up_proj.weight": "model-00009-of-00015.safetensors", "model.layers.23.ffn.switch_mlp.down_proj.scales": "model-00009-of-00015.safetensors", "model.layers.23.ffn.switch_mlp.down_proj.weight": "model-00009-of-00015.safetensors", "model.layers.23.ffn.switch_mlp.gate_proj.scales": "model-00009-of-00015.safetensors", "model.layers.23.ffn.switch_mlp.gate_proj.weight": "model-00009-of-00015.safetensors", "model.layers.23.ffn.switch_mlp.up_proj.scales": "model-00009-of-00015.safetensors", "model.layers.23.ffn.switch_mlp.up_proj.weight": "model-00009-of-00015.safetensors", "model.layers.23.ffn_hc.base": "model-00009-of-00015.safetensors", "model.layers.23.ffn_hc.fn": "model-00009-of-00015.safetensors", "model.layers.23.ffn_hc.scale": "model-00009-of-00015.safetensors", "model.layers.23.ffn_norm.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.attn_sink": "model-00009-of-00015.safetensors", "model.layers.24.attn.compressor.ape": "model-00009-of-00015.safetensors", "model.layers.24.attn.compressor.norm.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.compressor.wgate.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.compressor.wgate.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.compressor.wgate.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.compressor.wkv.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.compressor.wkv.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.compressor.wkv.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.compressor.ape": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.compressor.norm.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.compressor.wgate.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.compressor.wgate.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.compressor.wgate.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.compressor.wkv.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.compressor.wkv.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.compressor.wkv.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.weights_proj.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.weights_proj.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.weights_proj.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.wq_b.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.wq_b.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.indexer.wq_b.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.kv_norm.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.q_norm.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.wkv.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.wkv.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.wkv.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.wo_a.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.wo_a.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.wo_a.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.wo_b.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.wo_b.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.wo_b.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.wq_a.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.wq_a.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.wq_a.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn.wq_b.biases": "model-00009-of-00015.safetensors", "model.layers.24.attn.wq_b.scales": "model-00009-of-00015.safetensors", "model.layers.24.attn.wq_b.weight": "model-00009-of-00015.safetensors", "model.layers.24.attn_hc.base": "model-00009-of-00015.safetensors", "model.layers.24.attn_hc.fn": "model-00009-of-00015.safetensors", "model.layers.24.attn_hc.scale": "model-00009-of-00015.safetensors", "model.layers.24.attn_norm.weight": "model-00009-of-00015.safetensors", "model.layers.24.ffn.gate.e_score_correction_bias": "model-00009-of-00015.safetensors", "model.layers.24.ffn.gate.weight": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.down_proj.biases": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.down_proj.scales": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.down_proj.weight": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.gate_proj.biases": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.gate_proj.scales": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.gate_proj.weight": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.up_proj.biases": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.up_proj.scales": "model-00009-of-00015.safetensors", "model.layers.24.ffn.shared_experts.up_proj.weight": "model-00009-of-00015.safetensors", "model.layers.24.ffn.switch_mlp.down_proj.scales": "model-00009-of-00015.safetensors", "model.layers.24.ffn.switch_mlp.down_proj.weight": "model-00009-of-00015.safetensors", "model.layers.24.ffn.switch_mlp.gate_proj.scales": "model-00009-of-00015.safetensors", "model.layers.24.ffn.switch_mlp.gate_proj.weight": "model-00009-of-00015.safetensors", "model.layers.24.ffn.switch_mlp.up_proj.scales": "model-00009-of-00015.safetensors", "model.layers.24.ffn.switch_mlp.up_proj.weight": "model-00009-of-00015.safetensors", "model.layers.24.ffn_hc.base": "model-00009-of-00015.safetensors", "model.layers.24.ffn_hc.fn": "model-00009-of-00015.safetensors", "model.layers.24.ffn_hc.scale": "model-00009-of-00015.safetensors", "model.layers.24.ffn_norm.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.attn_sink": "model-00009-of-00015.safetensors", "model.layers.25.attn.compressor.ape": "model-00009-of-00015.safetensors", "model.layers.25.attn.compressor.norm.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.compressor.wgate.biases": "model-00009-of-00015.safetensors", "model.layers.25.attn.compressor.wgate.scales": "model-00009-of-00015.safetensors", "model.layers.25.attn.compressor.wgate.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.compressor.wkv.biases": "model-00009-of-00015.safetensors", "model.layers.25.attn.compressor.wkv.scales": "model-00009-of-00015.safetensors", "model.layers.25.attn.compressor.wkv.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.kv_norm.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.q_norm.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.wkv.biases": "model-00009-of-00015.safetensors", "model.layers.25.attn.wkv.scales": "model-00009-of-00015.safetensors", "model.layers.25.attn.wkv.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.wo_a.biases": "model-00009-of-00015.safetensors", "model.layers.25.attn.wo_a.scales": "model-00009-of-00015.safetensors", "model.layers.25.attn.wo_a.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.wo_b.biases": "model-00009-of-00015.safetensors", "model.layers.25.attn.wo_b.scales": "model-00009-of-00015.safetensors", "model.layers.25.attn.wo_b.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.wq_a.biases": "model-00009-of-00015.safetensors", "model.layers.25.attn.wq_a.scales": "model-00009-of-00015.safetensors", "model.layers.25.attn.wq_a.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn.wq_b.biases": "model-00009-of-00015.safetensors", "model.layers.25.attn.wq_b.scales": "model-00009-of-00015.safetensors", "model.layers.25.attn.wq_b.weight": "model-00009-of-00015.safetensors", "model.layers.25.attn_hc.base": "model-00010-of-00015.safetensors", "model.layers.25.attn_hc.fn": "model-00010-of-00015.safetensors", "model.layers.25.attn_hc.scale": "model-00010-of-00015.safetensors", "model.layers.25.attn_norm.weight": "model-00010-of-00015.safetensors", "model.layers.25.ffn.gate.e_score_correction_bias": "model-00009-of-00015.safetensors", "model.layers.25.ffn.gate.weight": "model-00009-of-00015.safetensors", "model.layers.25.ffn.shared_experts.down_proj.biases": "model-00010-of-00015.safetensors", "model.layers.25.ffn.shared_experts.down_proj.scales": "model-00010-of-00015.safetensors", "model.layers.25.ffn.shared_experts.down_proj.weight": "model-00010-of-00015.safetensors", "model.layers.25.ffn.shared_experts.gate_proj.biases": "model-00009-of-00015.safetensors", "model.layers.25.ffn.shared_experts.gate_proj.scales": "model-00009-of-00015.safetensors", "model.layers.25.ffn.shared_experts.gate_proj.weight": "model-00009-of-00015.safetensors", "model.layers.25.ffn.shared_experts.up_proj.biases": "model-00010-of-00015.safetensors", "model.layers.25.ffn.shared_experts.up_proj.scales": "model-00010-of-00015.safetensors", "model.layers.25.ffn.shared_experts.up_proj.weight": "model-00010-of-00015.safetensors", "model.layers.25.ffn.switch_mlp.down_proj.scales": "model-00009-of-00015.safetensors", "model.layers.25.ffn.switch_mlp.down_proj.weight": "model-00009-of-00015.safetensors", "model.layers.25.ffn.switch_mlp.gate_proj.scales": "model-00009-of-00015.safetensors", "model.layers.25.ffn.switch_mlp.gate_proj.weight": "model-00009-of-00015.safetensors", "model.layers.25.ffn.switch_mlp.up_proj.scales": "model-00009-of-00015.safetensors", "model.layers.25.ffn.switch_mlp.up_proj.weight": "model-00009-of-00015.safetensors", "model.layers.25.ffn_hc.base": "model-00010-of-00015.safetensors", "model.layers.25.ffn_hc.fn": "model-00010-of-00015.safetensors", "model.layers.25.ffn_hc.scale": "model-00010-of-00015.safetensors", "model.layers.25.ffn_norm.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.attn_sink": "model-00010-of-00015.safetensors", "model.layers.26.attn.compressor.ape": "model-00010-of-00015.safetensors", "model.layers.26.attn.compressor.norm.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.compressor.wgate.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.compressor.wgate.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.compressor.wgate.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.compressor.wkv.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.compressor.wkv.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.compressor.wkv.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.compressor.ape": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.compressor.norm.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.compressor.wgate.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.compressor.wgate.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.compressor.wgate.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.compressor.wkv.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.compressor.wkv.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.compressor.wkv.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.weights_proj.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.weights_proj.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.weights_proj.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.wq_b.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.wq_b.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.indexer.wq_b.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.kv_norm.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.q_norm.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.wkv.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.wkv.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.wkv.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.wo_a.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.wo_a.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.wo_a.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.wo_b.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.wo_b.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.wo_b.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.wq_a.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.wq_a.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.wq_a.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn.wq_b.biases": "model-00010-of-00015.safetensors", "model.layers.26.attn.wq_b.scales": "model-00010-of-00015.safetensors", "model.layers.26.attn.wq_b.weight": "model-00010-of-00015.safetensors", "model.layers.26.attn_hc.base": "model-00010-of-00015.safetensors", "model.layers.26.attn_hc.fn": "model-00010-of-00015.safetensors", "model.layers.26.attn_hc.scale": "model-00010-of-00015.safetensors", "model.layers.26.attn_norm.weight": "model-00010-of-00015.safetensors", "model.layers.26.ffn.gate.e_score_correction_bias": "model-00010-of-00015.safetensors", "model.layers.26.ffn.gate.weight": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.down_proj.biases": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.down_proj.scales": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.down_proj.weight": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.gate_proj.biases": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.gate_proj.scales": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.gate_proj.weight": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.up_proj.biases": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.up_proj.scales": "model-00010-of-00015.safetensors", "model.layers.26.ffn.shared_experts.up_proj.weight": "model-00010-of-00015.safetensors", "model.layers.26.ffn.switch_mlp.down_proj.scales": "model-00010-of-00015.safetensors", "model.layers.26.ffn.switch_mlp.down_proj.weight": "model-00010-of-00015.safetensors", "model.layers.26.ffn.switch_mlp.gate_proj.scales": "model-00010-of-00015.safetensors", "model.layers.26.ffn.switch_mlp.gate_proj.weight": "model-00010-of-00015.safetensors", "model.layers.26.ffn.switch_mlp.up_proj.scales": "model-00010-of-00015.safetensors", "model.layers.26.ffn.switch_mlp.up_proj.weight": "model-00010-of-00015.safetensors", "model.layers.26.ffn_hc.base": "model-00010-of-00015.safetensors", "model.layers.26.ffn_hc.fn": "model-00010-of-00015.safetensors", "model.layers.26.ffn_hc.scale": "model-00010-of-00015.safetensors", "model.layers.26.ffn_norm.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.attn_sink": "model-00010-of-00015.safetensors", "model.layers.27.attn.compressor.ape": "model-00010-of-00015.safetensors", "model.layers.27.attn.compressor.norm.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.compressor.wgate.biases": "model-00010-of-00015.safetensors", "model.layers.27.attn.compressor.wgate.scales": "model-00010-of-00015.safetensors", "model.layers.27.attn.compressor.wgate.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.compressor.wkv.biases": "model-00010-of-00015.safetensors", "model.layers.27.attn.compressor.wkv.scales": "model-00010-of-00015.safetensors", "model.layers.27.attn.compressor.wkv.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.kv_norm.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.q_norm.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.wkv.biases": "model-00010-of-00015.safetensors", "model.layers.27.attn.wkv.scales": "model-00010-of-00015.safetensors", "model.layers.27.attn.wkv.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.wo_a.biases": "model-00010-of-00015.safetensors", "model.layers.27.attn.wo_a.scales": "model-00010-of-00015.safetensors", "model.layers.27.attn.wo_a.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.wo_b.biases": "model-00010-of-00015.safetensors", "model.layers.27.attn.wo_b.scales": "model-00010-of-00015.safetensors", "model.layers.27.attn.wo_b.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.wq_a.biases": "model-00010-of-00015.safetensors", "model.layers.27.attn.wq_a.scales": "model-00010-of-00015.safetensors", "model.layers.27.attn.wq_a.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn.wq_b.biases": "model-00010-of-00015.safetensors", "model.layers.27.attn.wq_b.scales": "model-00010-of-00015.safetensors", "model.layers.27.attn.wq_b.weight": "model-00010-of-00015.safetensors", "model.layers.27.attn_hc.base": "model-00010-of-00015.safetensors", "model.layers.27.attn_hc.fn": "model-00010-of-00015.safetensors", "model.layers.27.attn_hc.scale": "model-00010-of-00015.safetensors", "model.layers.27.attn_norm.weight": "model-00010-of-00015.safetensors", "model.layers.27.ffn.gate.e_score_correction_bias": "model-00010-of-00015.safetensors", "model.layers.27.ffn.gate.weight": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.down_proj.biases": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.down_proj.scales": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.down_proj.weight": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.gate_proj.biases": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.gate_proj.scales": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.gate_proj.weight": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.up_proj.biases": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.up_proj.scales": "model-00010-of-00015.safetensors", "model.layers.27.ffn.shared_experts.up_proj.weight": "model-00010-of-00015.safetensors", "model.layers.27.ffn.switch_mlp.down_proj.scales": "model-00010-of-00015.safetensors", "model.layers.27.ffn.switch_mlp.down_proj.weight": "model-00010-of-00015.safetensors", "model.layers.27.ffn.switch_mlp.gate_proj.scales": "model-00010-of-00015.safetensors", "model.layers.27.ffn.switch_mlp.gate_proj.weight": "model-00010-of-00015.safetensors", "model.layers.27.ffn.switch_mlp.up_proj.scales": "model-00010-of-00015.safetensors", "model.layers.27.ffn.switch_mlp.up_proj.weight": "model-00010-of-00015.safetensors", "model.layers.27.ffn_hc.base": "model-00010-of-00015.safetensors", "model.layers.27.ffn_hc.fn": "model-00010-of-00015.safetensors", "model.layers.27.ffn_hc.scale": "model-00010-of-00015.safetensors", "model.layers.27.ffn_norm.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.attn_sink": "model-00010-of-00015.safetensors", "model.layers.28.attn.compressor.ape": "model-00010-of-00015.safetensors", "model.layers.28.attn.compressor.norm.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.compressor.wgate.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.compressor.wgate.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.compressor.wgate.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.compressor.wkv.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.compressor.wkv.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.compressor.wkv.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.compressor.ape": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.compressor.norm.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.compressor.wgate.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.compressor.wgate.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.compressor.wgate.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.compressor.wkv.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.compressor.wkv.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.compressor.wkv.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.weights_proj.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.weights_proj.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.weights_proj.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.wq_b.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.wq_b.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.indexer.wq_b.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.kv_norm.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.q_norm.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.wkv.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.wkv.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.wkv.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.wo_a.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.wo_a.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.wo_a.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.wo_b.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.wo_b.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.wo_b.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.wq_a.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.wq_a.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.wq_a.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn.wq_b.biases": "model-00010-of-00015.safetensors", "model.layers.28.attn.wq_b.scales": "model-00010-of-00015.safetensors", "model.layers.28.attn.wq_b.weight": "model-00010-of-00015.safetensors", "model.layers.28.attn_hc.base": "model-00011-of-00015.safetensors", "model.layers.28.attn_hc.fn": "model-00011-of-00015.safetensors", "model.layers.28.attn_hc.scale": "model-00011-of-00015.safetensors", "model.layers.28.attn_norm.weight": "model-00011-of-00015.safetensors", "model.layers.28.ffn.gate.e_score_correction_bias": "model-00010-of-00015.safetensors", "model.layers.28.ffn.gate.weight": "model-00010-of-00015.safetensors", "model.layers.28.ffn.shared_experts.down_proj.biases": "model-00011-of-00015.safetensors", "model.layers.28.ffn.shared_experts.down_proj.scales": "model-00011-of-00015.safetensors", "model.layers.28.ffn.shared_experts.down_proj.weight": "model-00011-of-00015.safetensors", "model.layers.28.ffn.shared_experts.gate_proj.biases": "model-00011-of-00015.safetensors", "model.layers.28.ffn.shared_experts.gate_proj.scales": "model-00011-of-00015.safetensors", "model.layers.28.ffn.shared_experts.gate_proj.weight": "model-00011-of-00015.safetensors", "model.layers.28.ffn.shared_experts.up_proj.biases": "model-00011-of-00015.safetensors", "model.layers.28.ffn.shared_experts.up_proj.scales": "model-00011-of-00015.safetensors", "model.layers.28.ffn.shared_experts.up_proj.weight": "model-00011-of-00015.safetensors", "model.layers.28.ffn.switch_mlp.down_proj.scales": "model-00011-of-00015.safetensors", "model.layers.28.ffn.switch_mlp.down_proj.weight": "model-00010-of-00015.safetensors", "model.layers.28.ffn.switch_mlp.gate_proj.scales": "model-00010-of-00015.safetensors", "model.layers.28.ffn.switch_mlp.gate_proj.weight": "model-00010-of-00015.safetensors", "model.layers.28.ffn.switch_mlp.up_proj.scales": "model-00010-of-00015.safetensors", "model.layers.28.ffn.switch_mlp.up_proj.weight": "model-00010-of-00015.safetensors", "model.layers.28.ffn_hc.base": "model-00011-of-00015.safetensors", "model.layers.28.ffn_hc.fn": "model-00011-of-00015.safetensors", "model.layers.28.ffn_hc.scale": "model-00011-of-00015.safetensors", "model.layers.28.ffn_norm.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.attn_sink": "model-00011-of-00015.safetensors", "model.layers.29.attn.compressor.ape": "model-00011-of-00015.safetensors", "model.layers.29.attn.compressor.norm.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.compressor.wgate.biases": "model-00011-of-00015.safetensors", "model.layers.29.attn.compressor.wgate.scales": "model-00011-of-00015.safetensors", "model.layers.29.attn.compressor.wgate.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.compressor.wkv.biases": "model-00011-of-00015.safetensors", "model.layers.29.attn.compressor.wkv.scales": "model-00011-of-00015.safetensors", "model.layers.29.attn.compressor.wkv.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.kv_norm.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.q_norm.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.wkv.biases": "model-00011-of-00015.safetensors", "model.layers.29.attn.wkv.scales": "model-00011-of-00015.safetensors", "model.layers.29.attn.wkv.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.wo_a.biases": "model-00011-of-00015.safetensors", "model.layers.29.attn.wo_a.scales": "model-00011-of-00015.safetensors", "model.layers.29.attn.wo_a.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.wo_b.biases": "model-00011-of-00015.safetensors", "model.layers.29.attn.wo_b.scales": "model-00011-of-00015.safetensors", "model.layers.29.attn.wo_b.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.wq_a.biases": "model-00011-of-00015.safetensors", "model.layers.29.attn.wq_a.scales": "model-00011-of-00015.safetensors", "model.layers.29.attn.wq_a.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn.wq_b.biases": "model-00011-of-00015.safetensors", "model.layers.29.attn.wq_b.scales": "model-00011-of-00015.safetensors", "model.layers.29.attn.wq_b.weight": "model-00011-of-00015.safetensors", "model.layers.29.attn_hc.base": "model-00011-of-00015.safetensors", "model.layers.29.attn_hc.fn": "model-00011-of-00015.safetensors", "model.layers.29.attn_hc.scale": "model-00011-of-00015.safetensors", "model.layers.29.attn_norm.weight": "model-00011-of-00015.safetensors", "model.layers.29.ffn.gate.e_score_correction_bias": "model-00011-of-00015.safetensors", "model.layers.29.ffn.gate.weight": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.down_proj.biases": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.down_proj.scales": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.down_proj.weight": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.gate_proj.biases": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.gate_proj.scales": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.gate_proj.weight": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.up_proj.biases": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.up_proj.scales": "model-00011-of-00015.safetensors", "model.layers.29.ffn.shared_experts.up_proj.weight": "model-00011-of-00015.safetensors", "model.layers.29.ffn.switch_mlp.down_proj.scales": "model-00011-of-00015.safetensors", "model.layers.29.ffn.switch_mlp.down_proj.weight": "model-00011-of-00015.safetensors", "model.layers.29.ffn.switch_mlp.gate_proj.scales": "model-00011-of-00015.safetensors", "model.layers.29.ffn.switch_mlp.gate_proj.weight": "model-00011-of-00015.safetensors", "model.layers.29.ffn.switch_mlp.up_proj.scales": "model-00011-of-00015.safetensors", "model.layers.29.ffn.switch_mlp.up_proj.weight": "model-00011-of-00015.safetensors", "model.layers.29.ffn_hc.base": "model-00011-of-00015.safetensors", "model.layers.29.ffn_hc.fn": "model-00011-of-00015.safetensors", "model.layers.29.ffn_hc.scale": "model-00011-of-00015.safetensors", "model.layers.29.ffn_norm.weight": "model-00011-of-00015.safetensors", "model.layers.3.attn.attn_sink": "model-00002-of-00015.safetensors", "model.layers.3.attn.compressor.ape": "model-00002-of-00015.safetensors", "model.layers.3.attn.compressor.norm.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.compressor.wgate.biases": "model-00002-of-00015.safetensors", "model.layers.3.attn.compressor.wgate.scales": "model-00002-of-00015.safetensors", "model.layers.3.attn.compressor.wgate.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.compressor.wkv.biases": "model-00002-of-00015.safetensors", "model.layers.3.attn.compressor.wkv.scales": "model-00002-of-00015.safetensors", "model.layers.3.attn.compressor.wkv.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.kv_norm.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.q_norm.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.wkv.biases": "model-00002-of-00015.safetensors", "model.layers.3.attn.wkv.scales": "model-00002-of-00015.safetensors", "model.layers.3.attn.wkv.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.wo_a.biases": "model-00002-of-00015.safetensors", "model.layers.3.attn.wo_a.scales": "model-00002-of-00015.safetensors", "model.layers.3.attn.wo_a.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.wo_b.biases": "model-00002-of-00015.safetensors", "model.layers.3.attn.wo_b.scales": "model-00002-of-00015.safetensors", "model.layers.3.attn.wo_b.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.wq_a.biases": "model-00002-of-00015.safetensors", "model.layers.3.attn.wq_a.scales": "model-00002-of-00015.safetensors", "model.layers.3.attn.wq_a.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn.wq_b.biases": "model-00002-of-00015.safetensors", "model.layers.3.attn.wq_b.scales": "model-00002-of-00015.safetensors", "model.layers.3.attn.wq_b.weight": "model-00002-of-00015.safetensors", "model.layers.3.attn_hc.base": "model-00002-of-00015.safetensors", "model.layers.3.attn_hc.fn": "model-00002-of-00015.safetensors", "model.layers.3.attn_hc.scale": "model-00002-of-00015.safetensors", "model.layers.3.attn_norm.weight": "model-00002-of-00015.safetensors", "model.layers.3.ffn.gate.e_score_correction_bias": "model-00002-of-00015.safetensors", "model.layers.3.ffn.gate.weight": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.down_proj.biases": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.down_proj.scales": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.down_proj.weight": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.gate_proj.biases": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.gate_proj.scales": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.gate_proj.weight": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.up_proj.biases": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.up_proj.scales": "model-00002-of-00015.safetensors", "model.layers.3.ffn.shared_experts.up_proj.weight": "model-00002-of-00015.safetensors", "model.layers.3.ffn.switch_mlp.down_proj.scales": "model-00002-of-00015.safetensors", "model.layers.3.ffn.switch_mlp.down_proj.weight": "model-00002-of-00015.safetensors", "model.layers.3.ffn.switch_mlp.gate_proj.scales": "model-00002-of-00015.safetensors", "model.layers.3.ffn.switch_mlp.gate_proj.weight": "model-00002-of-00015.safetensors", "model.layers.3.ffn.switch_mlp.up_proj.scales": "model-00002-of-00015.safetensors", "model.layers.3.ffn.switch_mlp.up_proj.weight": "model-00002-of-00015.safetensors", "model.layers.3.ffn_hc.base": "model-00002-of-00015.safetensors", "model.layers.3.ffn_hc.fn": "model-00002-of-00015.safetensors", "model.layers.3.ffn_hc.scale": "model-00002-of-00015.safetensors", "model.layers.3.ffn_norm.weight": "model-00002-of-00015.safetensors", "model.layers.30.attn.attn_sink": "model-00011-of-00015.safetensors", "model.layers.30.attn.compressor.ape": "model-00011-of-00015.safetensors", "model.layers.30.attn.compressor.norm.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.compressor.wgate.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.compressor.wgate.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.compressor.wgate.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.compressor.wkv.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.compressor.wkv.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.compressor.wkv.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.compressor.ape": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.compressor.norm.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.compressor.wgate.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.compressor.wgate.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.compressor.wgate.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.compressor.wkv.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.compressor.wkv.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.compressor.wkv.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.weights_proj.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.weights_proj.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.weights_proj.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.wq_b.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.wq_b.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.indexer.wq_b.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.kv_norm.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.q_norm.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.wkv.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.wkv.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.wkv.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.wo_a.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.wo_a.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.wo_a.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.wo_b.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.wo_b.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.wo_b.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.wq_a.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.wq_a.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.wq_a.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn.wq_b.biases": "model-00011-of-00015.safetensors", "model.layers.30.attn.wq_b.scales": "model-00011-of-00015.safetensors", "model.layers.30.attn.wq_b.weight": "model-00011-of-00015.safetensors", "model.layers.30.attn_hc.base": "model-00011-of-00015.safetensors", "model.layers.30.attn_hc.fn": "model-00011-of-00015.safetensors", "model.layers.30.attn_hc.scale": "model-00011-of-00015.safetensors", "model.layers.30.attn_norm.weight": "model-00011-of-00015.safetensors", "model.layers.30.ffn.gate.e_score_correction_bias": "model-00011-of-00015.safetensors", "model.layers.30.ffn.gate.weight": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.down_proj.biases": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.down_proj.scales": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.down_proj.weight": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.gate_proj.biases": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.gate_proj.scales": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.gate_proj.weight": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.up_proj.biases": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.up_proj.scales": "model-00011-of-00015.safetensors", "model.layers.30.ffn.shared_experts.up_proj.weight": "model-00011-of-00015.safetensors", "model.layers.30.ffn.switch_mlp.down_proj.scales": "model-00011-of-00015.safetensors", "model.layers.30.ffn.switch_mlp.down_proj.weight": "model-00011-of-00015.safetensors", "model.layers.30.ffn.switch_mlp.gate_proj.scales": "model-00011-of-00015.safetensors", "model.layers.30.ffn.switch_mlp.gate_proj.weight": "model-00011-of-00015.safetensors", "model.layers.30.ffn.switch_mlp.up_proj.scales": "model-00011-of-00015.safetensors", "model.layers.30.ffn.switch_mlp.up_proj.weight": "model-00011-of-00015.safetensors", "model.layers.30.ffn_hc.base": "model-00011-of-00015.safetensors", "model.layers.30.ffn_hc.fn": "model-00011-of-00015.safetensors", "model.layers.30.ffn_hc.scale": "model-00011-of-00015.safetensors", "model.layers.30.ffn_norm.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.attn_sink": "model-00011-of-00015.safetensors", "model.layers.31.attn.compressor.ape": "model-00011-of-00015.safetensors", "model.layers.31.attn.compressor.norm.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.compressor.wgate.biases": "model-00011-of-00015.safetensors", "model.layers.31.attn.compressor.wgate.scales": "model-00011-of-00015.safetensors", "model.layers.31.attn.compressor.wgate.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.compressor.wkv.biases": "model-00011-of-00015.safetensors", "model.layers.31.attn.compressor.wkv.scales": "model-00011-of-00015.safetensors", "model.layers.31.attn.compressor.wkv.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.kv_norm.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.q_norm.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.wkv.biases": "model-00011-of-00015.safetensors", "model.layers.31.attn.wkv.scales": "model-00011-of-00015.safetensors", "model.layers.31.attn.wkv.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.wo_a.biases": "model-00011-of-00015.safetensors", "model.layers.31.attn.wo_a.scales": "model-00011-of-00015.safetensors", "model.layers.31.attn.wo_a.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.wo_b.biases": "model-00011-of-00015.safetensors", "model.layers.31.attn.wo_b.scales": "model-00011-of-00015.safetensors", "model.layers.31.attn.wo_b.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.wq_a.biases": "model-00011-of-00015.safetensors", "model.layers.31.attn.wq_a.scales": "model-00011-of-00015.safetensors", "model.layers.31.attn.wq_a.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn.wq_b.biases": "model-00011-of-00015.safetensors", "model.layers.31.attn.wq_b.scales": "model-00011-of-00015.safetensors", "model.layers.31.attn.wq_b.weight": "model-00011-of-00015.safetensors", "model.layers.31.attn_hc.base": "model-00012-of-00015.safetensors", "model.layers.31.attn_hc.fn": "model-00012-of-00015.safetensors", "model.layers.31.attn_hc.scale": "model-00012-of-00015.safetensors", "model.layers.31.attn_norm.weight": "model-00012-of-00015.safetensors", "model.layers.31.ffn.gate.e_score_correction_bias": "model-00011-of-00015.safetensors", "model.layers.31.ffn.gate.weight": "model-00011-of-00015.safetensors", "model.layers.31.ffn.shared_experts.down_proj.biases": "model-00012-of-00015.safetensors", "model.layers.31.ffn.shared_experts.down_proj.scales": "model-00012-of-00015.safetensors", "model.layers.31.ffn.shared_experts.down_proj.weight": "model-00012-of-00015.safetensors", "model.layers.31.ffn.shared_experts.gate_proj.biases": "model-00012-of-00015.safetensors", "model.layers.31.ffn.shared_experts.gate_proj.scales": "model-00012-of-00015.safetensors", "model.layers.31.ffn.shared_experts.gate_proj.weight": "model-00012-of-00015.safetensors", "model.layers.31.ffn.shared_experts.up_proj.biases": "model-00012-of-00015.safetensors", "model.layers.31.ffn.shared_experts.up_proj.scales": "model-00012-of-00015.safetensors", "model.layers.31.ffn.shared_experts.up_proj.weight": "model-00012-of-00015.safetensors", "model.layers.31.ffn.switch_mlp.down_proj.scales": "model-00012-of-00015.safetensors", "model.layers.31.ffn.switch_mlp.down_proj.weight": "model-00012-of-00015.safetensors", "model.layers.31.ffn.switch_mlp.gate_proj.scales": "model-00011-of-00015.safetensors", "model.layers.31.ffn.switch_mlp.gate_proj.weight": "model-00011-of-00015.safetensors", "model.layers.31.ffn.switch_mlp.up_proj.scales": "model-00011-of-00015.safetensors", "model.layers.31.ffn.switch_mlp.up_proj.weight": "model-00011-of-00015.safetensors", "model.layers.31.ffn_hc.base": "model-00012-of-00015.safetensors", "model.layers.31.ffn_hc.fn": "model-00012-of-00015.safetensors", "model.layers.31.ffn_hc.scale": "model-00012-of-00015.safetensors", "model.layers.31.ffn_norm.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.attn_sink": "model-00012-of-00015.safetensors", "model.layers.32.attn.compressor.ape": "model-00012-of-00015.safetensors", "model.layers.32.attn.compressor.norm.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.compressor.wgate.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.compressor.wgate.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.compressor.wgate.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.compressor.wkv.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.compressor.wkv.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.compressor.wkv.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.compressor.ape": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.compressor.norm.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.compressor.wgate.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.compressor.wgate.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.compressor.wgate.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.compressor.wkv.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.compressor.wkv.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.compressor.wkv.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.weights_proj.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.weights_proj.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.weights_proj.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.wq_b.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.wq_b.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.indexer.wq_b.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.kv_norm.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.q_norm.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.wkv.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.wkv.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.wkv.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.wo_a.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.wo_a.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.wo_a.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.wo_b.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.wo_b.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.wo_b.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.wq_a.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.wq_a.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.wq_a.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn.wq_b.biases": "model-00012-of-00015.safetensors", "model.layers.32.attn.wq_b.scales": "model-00012-of-00015.safetensors", "model.layers.32.attn.wq_b.weight": "model-00012-of-00015.safetensors", "model.layers.32.attn_hc.base": "model-00012-of-00015.safetensors", "model.layers.32.attn_hc.fn": "model-00012-of-00015.safetensors", "model.layers.32.attn_hc.scale": "model-00012-of-00015.safetensors", "model.layers.32.attn_norm.weight": "model-00012-of-00015.safetensors", "model.layers.32.ffn.gate.e_score_correction_bias": "model-00012-of-00015.safetensors", "model.layers.32.ffn.gate.weight": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.down_proj.biases": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.down_proj.scales": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.down_proj.weight": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.gate_proj.biases": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.gate_proj.scales": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.gate_proj.weight": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.up_proj.biases": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.up_proj.scales": "model-00012-of-00015.safetensors", "model.layers.32.ffn.shared_experts.up_proj.weight": "model-00012-of-00015.safetensors", "model.layers.32.ffn.switch_mlp.down_proj.scales": "model-00012-of-00015.safetensors", "model.layers.32.ffn.switch_mlp.down_proj.weight": "model-00012-of-00015.safetensors", "model.layers.32.ffn.switch_mlp.gate_proj.scales": "model-00012-of-00015.safetensors", "model.layers.32.ffn.switch_mlp.gate_proj.weight": "model-00012-of-00015.safetensors", "model.layers.32.ffn.switch_mlp.up_proj.scales": "model-00012-of-00015.safetensors", "model.layers.32.ffn.switch_mlp.up_proj.weight": "model-00012-of-00015.safetensors", "model.layers.32.ffn_hc.base": "model-00012-of-00015.safetensors", "model.layers.32.ffn_hc.fn": "model-00012-of-00015.safetensors", "model.layers.32.ffn_hc.scale": "model-00012-of-00015.safetensors", "model.layers.32.ffn_norm.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.attn_sink": "model-00012-of-00015.safetensors", "model.layers.33.attn.compressor.ape": "model-00012-of-00015.safetensors", "model.layers.33.attn.compressor.norm.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.compressor.wgate.biases": "model-00012-of-00015.safetensors", "model.layers.33.attn.compressor.wgate.scales": "model-00012-of-00015.safetensors", "model.layers.33.attn.compressor.wgate.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.compressor.wkv.biases": "model-00012-of-00015.safetensors", "model.layers.33.attn.compressor.wkv.scales": "model-00012-of-00015.safetensors", "model.layers.33.attn.compressor.wkv.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.kv_norm.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.q_norm.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.wkv.biases": "model-00012-of-00015.safetensors", "model.layers.33.attn.wkv.scales": "model-00012-of-00015.safetensors", "model.layers.33.attn.wkv.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.wo_a.biases": "model-00012-of-00015.safetensors", "model.layers.33.attn.wo_a.scales": "model-00012-of-00015.safetensors", "model.layers.33.attn.wo_a.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.wo_b.biases": "model-00012-of-00015.safetensors", "model.layers.33.attn.wo_b.scales": "model-00012-of-00015.safetensors", "model.layers.33.attn.wo_b.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.wq_a.biases": "model-00012-of-00015.safetensors", "model.layers.33.attn.wq_a.scales": "model-00012-of-00015.safetensors", "model.layers.33.attn.wq_a.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn.wq_b.biases": "model-00012-of-00015.safetensors", "model.layers.33.attn.wq_b.scales": "model-00012-of-00015.safetensors", "model.layers.33.attn.wq_b.weight": "model-00012-of-00015.safetensors", "model.layers.33.attn_hc.base": "model-00012-of-00015.safetensors", "model.layers.33.attn_hc.fn": "model-00012-of-00015.safetensors", "model.layers.33.attn_hc.scale": "model-00012-of-00015.safetensors", "model.layers.33.attn_norm.weight": "model-00012-of-00015.safetensors", "model.layers.33.ffn.gate.e_score_correction_bias": "model-00012-of-00015.safetensors", "model.layers.33.ffn.gate.weight": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.down_proj.biases": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.down_proj.scales": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.down_proj.weight": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.gate_proj.biases": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.gate_proj.scales": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.gate_proj.weight": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.up_proj.biases": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.up_proj.scales": "model-00012-of-00015.safetensors", "model.layers.33.ffn.shared_experts.up_proj.weight": "model-00012-of-00015.safetensors", "model.layers.33.ffn.switch_mlp.down_proj.scales": "model-00012-of-00015.safetensors", "model.layers.33.ffn.switch_mlp.down_proj.weight": "model-00012-of-00015.safetensors", "model.layers.33.ffn.switch_mlp.gate_proj.scales": "model-00012-of-00015.safetensors", "model.layers.33.ffn.switch_mlp.gate_proj.weight": "model-00012-of-00015.safetensors", "model.layers.33.ffn.switch_mlp.up_proj.scales": "model-00012-of-00015.safetensors", "model.layers.33.ffn.switch_mlp.up_proj.weight": "model-00012-of-00015.safetensors", "model.layers.33.ffn_hc.base": "model-00012-of-00015.safetensors", "model.layers.33.ffn_hc.fn": "model-00012-of-00015.safetensors", "model.layers.33.ffn_hc.scale": "model-00012-of-00015.safetensors", "model.layers.33.ffn_norm.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.attn_sink": "model-00012-of-00015.safetensors", "model.layers.34.attn.compressor.ape": "model-00012-of-00015.safetensors", "model.layers.34.attn.compressor.norm.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.compressor.wgate.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.compressor.wgate.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.compressor.wgate.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.compressor.wkv.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.compressor.wkv.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.compressor.wkv.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.compressor.ape": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.compressor.norm.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.compressor.wgate.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.compressor.wgate.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.compressor.wgate.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.compressor.wkv.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.compressor.wkv.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.compressor.wkv.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.weights_proj.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.weights_proj.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.weights_proj.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.wq_b.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.wq_b.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.indexer.wq_b.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.kv_norm.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.q_norm.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.wkv.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.wkv.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.wkv.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.wo_a.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.wo_a.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.wo_a.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.wo_b.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.wo_b.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.wo_b.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.wq_a.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.wq_a.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.wq_a.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn.wq_b.biases": "model-00012-of-00015.safetensors", "model.layers.34.attn.wq_b.scales": "model-00012-of-00015.safetensors", "model.layers.34.attn.wq_b.weight": "model-00012-of-00015.safetensors", "model.layers.34.attn_hc.base": "model-00013-of-00015.safetensors", "model.layers.34.attn_hc.fn": "model-00013-of-00015.safetensors", "model.layers.34.attn_hc.scale": "model-00013-of-00015.safetensors", "model.layers.34.attn_norm.weight": "model-00013-of-00015.safetensors", "model.layers.34.ffn.gate.e_score_correction_bias": "model-00012-of-00015.safetensors", "model.layers.34.ffn.gate.weight": "model-00012-of-00015.safetensors", "model.layers.34.ffn.shared_experts.down_proj.biases": "model-00013-of-00015.safetensors", "model.layers.34.ffn.shared_experts.down_proj.scales": "model-00013-of-00015.safetensors", "model.layers.34.ffn.shared_experts.down_proj.weight": "model-00013-of-00015.safetensors", "model.layers.34.ffn.shared_experts.gate_proj.biases": "model-00013-of-00015.safetensors", "model.layers.34.ffn.shared_experts.gate_proj.scales": "model-00013-of-00015.safetensors", "model.layers.34.ffn.shared_experts.gate_proj.weight": "model-00013-of-00015.safetensors", "model.layers.34.ffn.shared_experts.up_proj.biases": "model-00013-of-00015.safetensors", "model.layers.34.ffn.shared_experts.up_proj.scales": "model-00013-of-00015.safetensors", "model.layers.34.ffn.shared_experts.up_proj.weight": "model-00013-of-00015.safetensors", "model.layers.34.ffn.switch_mlp.down_proj.scales": "model-00013-of-00015.safetensors", "model.layers.34.ffn.switch_mlp.down_proj.weight": "model-00013-of-00015.safetensors", "model.layers.34.ffn.switch_mlp.gate_proj.scales": "model-00012-of-00015.safetensors", "model.layers.34.ffn.switch_mlp.gate_proj.weight": "model-00012-of-00015.safetensors", "model.layers.34.ffn.switch_mlp.up_proj.scales": "model-00013-of-00015.safetensors", "model.layers.34.ffn.switch_mlp.up_proj.weight": "model-00012-of-00015.safetensors", "model.layers.34.ffn_hc.base": "model-00013-of-00015.safetensors", "model.layers.34.ffn_hc.fn": "model-00013-of-00015.safetensors", "model.layers.34.ffn_hc.scale": "model-00013-of-00015.safetensors", "model.layers.34.ffn_norm.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.attn_sink": "model-00013-of-00015.safetensors", "model.layers.35.attn.compressor.ape": "model-00013-of-00015.safetensors", "model.layers.35.attn.compressor.norm.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.compressor.wgate.biases": "model-00013-of-00015.safetensors", "model.layers.35.attn.compressor.wgate.scales": "model-00013-of-00015.safetensors", "model.layers.35.attn.compressor.wgate.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.compressor.wkv.biases": "model-00013-of-00015.safetensors", "model.layers.35.attn.compressor.wkv.scales": "model-00013-of-00015.safetensors", "model.layers.35.attn.compressor.wkv.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.kv_norm.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.q_norm.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.wkv.biases": "model-00013-of-00015.safetensors", "model.layers.35.attn.wkv.scales": "model-00013-of-00015.safetensors", "model.layers.35.attn.wkv.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.wo_a.biases": "model-00013-of-00015.safetensors", "model.layers.35.attn.wo_a.scales": "model-00013-of-00015.safetensors", "model.layers.35.attn.wo_a.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.wo_b.biases": "model-00013-of-00015.safetensors", "model.layers.35.attn.wo_b.scales": "model-00013-of-00015.safetensors", "model.layers.35.attn.wo_b.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.wq_a.biases": "model-00013-of-00015.safetensors", "model.layers.35.attn.wq_a.scales": "model-00013-of-00015.safetensors", "model.layers.35.attn.wq_a.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn.wq_b.biases": "model-00013-of-00015.safetensors", "model.layers.35.attn.wq_b.scales": "model-00013-of-00015.safetensors", "model.layers.35.attn.wq_b.weight": "model-00013-of-00015.safetensors", "model.layers.35.attn_hc.base": "model-00013-of-00015.safetensors", "model.layers.35.attn_hc.fn": "model-00013-of-00015.safetensors", "model.layers.35.attn_hc.scale": "model-00013-of-00015.safetensors", "model.layers.35.attn_norm.weight": "model-00013-of-00015.safetensors", "model.layers.35.ffn.gate.e_score_correction_bias": "model-00013-of-00015.safetensors", "model.layers.35.ffn.gate.weight": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.down_proj.biases": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.down_proj.scales": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.down_proj.weight": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.gate_proj.biases": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.gate_proj.scales": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.gate_proj.weight": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.up_proj.biases": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.up_proj.scales": "model-00013-of-00015.safetensors", "model.layers.35.ffn.shared_experts.up_proj.weight": "model-00013-of-00015.safetensors", "model.layers.35.ffn.switch_mlp.down_proj.scales": "model-00013-of-00015.safetensors", "model.layers.35.ffn.switch_mlp.down_proj.weight": "model-00013-of-00015.safetensors", "model.layers.35.ffn.switch_mlp.gate_proj.scales": "model-00013-of-00015.safetensors", "model.layers.35.ffn.switch_mlp.gate_proj.weight": "model-00013-of-00015.safetensors", "model.layers.35.ffn.switch_mlp.up_proj.scales": "model-00013-of-00015.safetensors", "model.layers.35.ffn.switch_mlp.up_proj.weight": "model-00013-of-00015.safetensors", "model.layers.35.ffn_hc.base": "model-00013-of-00015.safetensors", "model.layers.35.ffn_hc.fn": "model-00013-of-00015.safetensors", "model.layers.35.ffn_hc.scale": "model-00013-of-00015.safetensors", "model.layers.35.ffn_norm.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.attn_sink": "model-00013-of-00015.safetensors", "model.layers.36.attn.compressor.ape": "model-00013-of-00015.safetensors", "model.layers.36.attn.compressor.norm.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.compressor.wgate.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.compressor.wgate.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.compressor.wgate.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.compressor.wkv.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.compressor.wkv.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.compressor.wkv.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.compressor.ape": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.compressor.norm.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.compressor.wgate.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.compressor.wgate.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.compressor.wgate.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.compressor.wkv.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.compressor.wkv.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.compressor.wkv.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.weights_proj.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.weights_proj.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.weights_proj.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.wq_b.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.wq_b.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.indexer.wq_b.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.kv_norm.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.q_norm.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.wkv.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.wkv.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.wkv.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.wo_a.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.wo_a.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.wo_a.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.wo_b.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.wo_b.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.wo_b.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.wq_a.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.wq_a.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.wq_a.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn.wq_b.biases": "model-00013-of-00015.safetensors", "model.layers.36.attn.wq_b.scales": "model-00013-of-00015.safetensors", "model.layers.36.attn.wq_b.weight": "model-00013-of-00015.safetensors", "model.layers.36.attn_hc.base": "model-00013-of-00015.safetensors", "model.layers.36.attn_hc.fn": "model-00013-of-00015.safetensors", "model.layers.36.attn_hc.scale": "model-00013-of-00015.safetensors", "model.layers.36.attn_norm.weight": "model-00013-of-00015.safetensors", "model.layers.36.ffn.gate.e_score_correction_bias": "model-00013-of-00015.safetensors", "model.layers.36.ffn.gate.weight": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.down_proj.biases": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.down_proj.scales": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.down_proj.weight": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.gate_proj.biases": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.gate_proj.scales": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.gate_proj.weight": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.up_proj.biases": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.up_proj.scales": "model-00013-of-00015.safetensors", "model.layers.36.ffn.shared_experts.up_proj.weight": "model-00013-of-00015.safetensors", "model.layers.36.ffn.switch_mlp.down_proj.scales": "model-00013-of-00015.safetensors", "model.layers.36.ffn.switch_mlp.down_proj.weight": "model-00013-of-00015.safetensors", "model.layers.36.ffn.switch_mlp.gate_proj.scales": "model-00013-of-00015.safetensors", "model.layers.36.ffn.switch_mlp.gate_proj.weight": "model-00013-of-00015.safetensors", "model.layers.36.ffn.switch_mlp.up_proj.scales": "model-00013-of-00015.safetensors", "model.layers.36.ffn.switch_mlp.up_proj.weight": "model-00013-of-00015.safetensors", "model.layers.36.ffn_hc.base": "model-00013-of-00015.safetensors", "model.layers.36.ffn_hc.fn": "model-00013-of-00015.safetensors", "model.layers.36.ffn_hc.scale": "model-00013-of-00015.safetensors", "model.layers.36.ffn_norm.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.attn_sink": "model-00013-of-00015.safetensors", "model.layers.37.attn.compressor.ape": "model-00013-of-00015.safetensors", "model.layers.37.attn.compressor.norm.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.compressor.wgate.biases": "model-00013-of-00015.safetensors", "model.layers.37.attn.compressor.wgate.scales": "model-00013-of-00015.safetensors", "model.layers.37.attn.compressor.wgate.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.compressor.wkv.biases": "model-00013-of-00015.safetensors", "model.layers.37.attn.compressor.wkv.scales": "model-00013-of-00015.safetensors", "model.layers.37.attn.compressor.wkv.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.kv_norm.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.q_norm.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.wkv.biases": "model-00013-of-00015.safetensors", "model.layers.37.attn.wkv.scales": "model-00013-of-00015.safetensors", "model.layers.37.attn.wkv.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.wo_a.biases": "model-00013-of-00015.safetensors", "model.layers.37.attn.wo_a.scales": "model-00013-of-00015.safetensors", "model.layers.37.attn.wo_a.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.wo_b.biases": "model-00013-of-00015.safetensors", "model.layers.37.attn.wo_b.scales": "model-00013-of-00015.safetensors", "model.layers.37.attn.wo_b.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.wq_a.biases": "model-00013-of-00015.safetensors", "model.layers.37.attn.wq_a.scales": "model-00013-of-00015.safetensors", "model.layers.37.attn.wq_a.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn.wq_b.biases": "model-00013-of-00015.safetensors", "model.layers.37.attn.wq_b.scales": "model-00013-of-00015.safetensors", "model.layers.37.attn.wq_b.weight": "model-00013-of-00015.safetensors", "model.layers.37.attn_hc.base": "model-00014-of-00015.safetensors", "model.layers.37.attn_hc.fn": "model-00014-of-00015.safetensors", "model.layers.37.attn_hc.scale": "model-00014-of-00015.safetensors", "model.layers.37.attn_norm.weight": "model-00014-of-00015.safetensors", "model.layers.37.ffn.gate.e_score_correction_bias": "model-00013-of-00015.safetensors", "model.layers.37.ffn.gate.weight": "model-00013-of-00015.safetensors", "model.layers.37.ffn.shared_experts.down_proj.biases": "model-00014-of-00015.safetensors", "model.layers.37.ffn.shared_experts.down_proj.scales": "model-00014-of-00015.safetensors", "model.layers.37.ffn.shared_experts.down_proj.weight": "model-00014-of-00015.safetensors", "model.layers.37.ffn.shared_experts.gate_proj.biases": "model-00014-of-00015.safetensors", "model.layers.37.ffn.shared_experts.gate_proj.scales": "model-00014-of-00015.safetensors", "model.layers.37.ffn.shared_experts.gate_proj.weight": "model-00014-of-00015.safetensors", "model.layers.37.ffn.shared_experts.up_proj.biases": "model-00014-of-00015.safetensors", "model.layers.37.ffn.shared_experts.up_proj.scales": "model-00014-of-00015.safetensors", "model.layers.37.ffn.shared_experts.up_proj.weight": "model-00014-of-00015.safetensors", "model.layers.37.ffn.switch_mlp.down_proj.scales": "model-00014-of-00015.safetensors", "model.layers.37.ffn.switch_mlp.down_proj.weight": "model-00014-of-00015.safetensors", "model.layers.37.ffn.switch_mlp.gate_proj.scales": "model-00013-of-00015.safetensors", "model.layers.37.ffn.switch_mlp.gate_proj.weight": "model-00013-of-00015.safetensors", "model.layers.37.ffn.switch_mlp.up_proj.scales": "model-00014-of-00015.safetensors", "model.layers.37.ffn.switch_mlp.up_proj.weight": "model-00014-of-00015.safetensors", "model.layers.37.ffn_hc.base": "model-00014-of-00015.safetensors", "model.layers.37.ffn_hc.fn": "model-00014-of-00015.safetensors", "model.layers.37.ffn_hc.scale": "model-00014-of-00015.safetensors", "model.layers.37.ffn_norm.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.attn_sink": "model-00014-of-00015.safetensors", "model.layers.38.attn.compressor.ape": "model-00014-of-00015.safetensors", "model.layers.38.attn.compressor.norm.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.compressor.wgate.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.compressor.wgate.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.compressor.wgate.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.compressor.wkv.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.compressor.wkv.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.compressor.wkv.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.compressor.ape": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.compressor.norm.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.compressor.wgate.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.compressor.wgate.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.compressor.wgate.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.compressor.wkv.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.compressor.wkv.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.compressor.wkv.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.weights_proj.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.weights_proj.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.weights_proj.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.wq_b.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.wq_b.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.indexer.wq_b.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.kv_norm.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.q_norm.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.wkv.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.wkv.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.wkv.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.wo_a.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.wo_a.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.wo_a.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.wo_b.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.wo_b.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.wo_b.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.wq_a.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.wq_a.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.wq_a.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn.wq_b.biases": "model-00014-of-00015.safetensors", "model.layers.38.attn.wq_b.scales": "model-00014-of-00015.safetensors", "model.layers.38.attn.wq_b.weight": "model-00014-of-00015.safetensors", "model.layers.38.attn_hc.base": "model-00014-of-00015.safetensors", "model.layers.38.attn_hc.fn": "model-00014-of-00015.safetensors", "model.layers.38.attn_hc.scale": "model-00014-of-00015.safetensors", "model.layers.38.attn_norm.weight": "model-00014-of-00015.safetensors", "model.layers.38.ffn.gate.e_score_correction_bias": "model-00014-of-00015.safetensors", "model.layers.38.ffn.gate.weight": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.down_proj.biases": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.down_proj.scales": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.down_proj.weight": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.gate_proj.biases": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.gate_proj.scales": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.gate_proj.weight": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.up_proj.biases": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.up_proj.scales": "model-00014-of-00015.safetensors", "model.layers.38.ffn.shared_experts.up_proj.weight": "model-00014-of-00015.safetensors", "model.layers.38.ffn.switch_mlp.down_proj.scales": "model-00014-of-00015.safetensors", "model.layers.38.ffn.switch_mlp.down_proj.weight": "model-00014-of-00015.safetensors", "model.layers.38.ffn.switch_mlp.gate_proj.scales": "model-00014-of-00015.safetensors", "model.layers.38.ffn.switch_mlp.gate_proj.weight": "model-00014-of-00015.safetensors", "model.layers.38.ffn.switch_mlp.up_proj.scales": "model-00014-of-00015.safetensors", "model.layers.38.ffn.switch_mlp.up_proj.weight": "model-00014-of-00015.safetensors", "model.layers.38.ffn_hc.base": "model-00014-of-00015.safetensors", "model.layers.38.ffn_hc.fn": "model-00014-of-00015.safetensors", "model.layers.38.ffn_hc.scale": "model-00014-of-00015.safetensors", "model.layers.38.ffn_norm.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.attn_sink": "model-00014-of-00015.safetensors", "model.layers.39.attn.compressor.ape": "model-00014-of-00015.safetensors", "model.layers.39.attn.compressor.norm.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.compressor.wgate.biases": "model-00014-of-00015.safetensors", "model.layers.39.attn.compressor.wgate.scales": "model-00014-of-00015.safetensors", "model.layers.39.attn.compressor.wgate.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.compressor.wkv.biases": "model-00014-of-00015.safetensors", "model.layers.39.attn.compressor.wkv.scales": "model-00014-of-00015.safetensors", "model.layers.39.attn.compressor.wkv.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.kv_norm.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.q_norm.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.wkv.biases": "model-00014-of-00015.safetensors", "model.layers.39.attn.wkv.scales": "model-00014-of-00015.safetensors", "model.layers.39.attn.wkv.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.wo_a.biases": "model-00014-of-00015.safetensors", "model.layers.39.attn.wo_a.scales": "model-00014-of-00015.safetensors", "model.layers.39.attn.wo_a.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.wo_b.biases": "model-00014-of-00015.safetensors", "model.layers.39.attn.wo_b.scales": "model-00014-of-00015.safetensors", "model.layers.39.attn.wo_b.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.wq_a.biases": "model-00014-of-00015.safetensors", "model.layers.39.attn.wq_a.scales": "model-00014-of-00015.safetensors", "model.layers.39.attn.wq_a.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn.wq_b.biases": "model-00014-of-00015.safetensors", "model.layers.39.attn.wq_b.scales": "model-00014-of-00015.safetensors", "model.layers.39.attn.wq_b.weight": "model-00014-of-00015.safetensors", "model.layers.39.attn_hc.base": "model-00014-of-00015.safetensors", "model.layers.39.attn_hc.fn": "model-00014-of-00015.safetensors", "model.layers.39.attn_hc.scale": "model-00014-of-00015.safetensors", "model.layers.39.attn_norm.weight": "model-00014-of-00015.safetensors", "model.layers.39.ffn.gate.e_score_correction_bias": "model-00014-of-00015.safetensors", "model.layers.39.ffn.gate.weight": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.down_proj.biases": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.down_proj.scales": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.down_proj.weight": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.gate_proj.biases": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.gate_proj.scales": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.gate_proj.weight": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.up_proj.biases": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.up_proj.scales": "model-00014-of-00015.safetensors", "model.layers.39.ffn.shared_experts.up_proj.weight": "model-00014-of-00015.safetensors", "model.layers.39.ffn.switch_mlp.down_proj.scales": "model-00014-of-00015.safetensors", "model.layers.39.ffn.switch_mlp.down_proj.weight": "model-00014-of-00015.safetensors", "model.layers.39.ffn.switch_mlp.gate_proj.scales": "model-00014-of-00015.safetensors", "model.layers.39.ffn.switch_mlp.gate_proj.weight": "model-00014-of-00015.safetensors", "model.layers.39.ffn.switch_mlp.up_proj.scales": "model-00014-of-00015.safetensors", "model.layers.39.ffn.switch_mlp.up_proj.weight": "model-00014-of-00015.safetensors", "model.layers.39.ffn_hc.base": "model-00014-of-00015.safetensors", "model.layers.39.ffn_hc.fn": "model-00014-of-00015.safetensors", "model.layers.39.ffn_hc.scale": "model-00014-of-00015.safetensors", "model.layers.39.ffn_norm.weight": "model-00014-of-00015.safetensors", "model.layers.4.attn.attn_sink": "model-00002-of-00015.safetensors", "model.layers.4.attn.compressor.ape": "model-00002-of-00015.safetensors", "model.layers.4.attn.compressor.norm.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.compressor.wgate.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.compressor.wgate.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.compressor.wgate.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.compressor.wkv.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.compressor.wkv.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.compressor.wkv.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.compressor.ape": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.compressor.norm.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.compressor.wgate.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.compressor.wgate.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.compressor.wgate.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.compressor.wkv.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.compressor.wkv.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.compressor.wkv.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.weights_proj.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.weights_proj.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.weights_proj.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.wq_b.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.wq_b.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.indexer.wq_b.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.kv_norm.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.q_norm.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.wkv.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.wkv.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.wkv.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.wo_a.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.wo_a.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.wo_a.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.wo_b.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.wo_b.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.wo_b.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.wq_a.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.wq_a.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.wq_a.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn.wq_b.biases": "model-00002-of-00015.safetensors", "model.layers.4.attn.wq_b.scales": "model-00002-of-00015.safetensors", "model.layers.4.attn.wq_b.weight": "model-00002-of-00015.safetensors", "model.layers.4.attn_hc.base": "model-00002-of-00015.safetensors", "model.layers.4.attn_hc.fn": "model-00002-of-00015.safetensors", "model.layers.4.attn_hc.scale": "model-00002-of-00015.safetensors", "model.layers.4.attn_norm.weight": "model-00002-of-00015.safetensors", "model.layers.4.ffn.gate.e_score_correction_bias": "model-00002-of-00015.safetensors", "model.layers.4.ffn.gate.weight": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.down_proj.biases": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.down_proj.scales": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.down_proj.weight": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.gate_proj.biases": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.gate_proj.scales": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.gate_proj.weight": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.up_proj.biases": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.up_proj.scales": "model-00002-of-00015.safetensors", "model.layers.4.ffn.shared_experts.up_proj.weight": "model-00002-of-00015.safetensors", "model.layers.4.ffn.switch_mlp.down_proj.scales": "model-00002-of-00015.safetensors", "model.layers.4.ffn.switch_mlp.down_proj.weight": "model-00002-of-00015.safetensors", "model.layers.4.ffn.switch_mlp.gate_proj.scales": "model-00002-of-00015.safetensors", "model.layers.4.ffn.switch_mlp.gate_proj.weight": "model-00002-of-00015.safetensors", "model.layers.4.ffn.switch_mlp.up_proj.scales": "model-00002-of-00015.safetensors", "model.layers.4.ffn.switch_mlp.up_proj.weight": "model-00002-of-00015.safetensors", "model.layers.4.ffn_hc.base": "model-00002-of-00015.safetensors", "model.layers.4.ffn_hc.fn": "model-00002-of-00015.safetensors", "model.layers.4.ffn_hc.scale": "model-00002-of-00015.safetensors", "model.layers.4.ffn_norm.weight": "model-00002-of-00015.safetensors", "model.layers.40.attn.attn_sink": "model-00014-of-00015.safetensors", "model.layers.40.attn.compressor.ape": "model-00014-of-00015.safetensors", "model.layers.40.attn.compressor.norm.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.compressor.wgate.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.compressor.wgate.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.compressor.wgate.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.compressor.wkv.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.compressor.wkv.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.compressor.wkv.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.compressor.ape": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.compressor.norm.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.compressor.wgate.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.compressor.wgate.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.compressor.wgate.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.compressor.wkv.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.compressor.wkv.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.compressor.wkv.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.weights_proj.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.weights_proj.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.weights_proj.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.wq_b.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.wq_b.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.indexer.wq_b.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.kv_norm.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.q_norm.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.wkv.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.wkv.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.wkv.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.wo_a.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.wo_a.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.wo_a.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.wo_b.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.wo_b.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.wo_b.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.wq_a.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.wq_a.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.wq_a.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn.wq_b.biases": "model-00014-of-00015.safetensors", "model.layers.40.attn.wq_b.scales": "model-00014-of-00015.safetensors", "model.layers.40.attn.wq_b.weight": "model-00014-of-00015.safetensors", "model.layers.40.attn_hc.base": "model-00015-of-00015.safetensors", "model.layers.40.attn_hc.fn": "model-00015-of-00015.safetensors", "model.layers.40.attn_hc.scale": "model-00015-of-00015.safetensors", "model.layers.40.attn_norm.weight": "model-00015-of-00015.safetensors", "model.layers.40.ffn.gate.e_score_correction_bias": "model-00014-of-00015.safetensors", "model.layers.40.ffn.gate.weight": "model-00014-of-00015.safetensors", "model.layers.40.ffn.shared_experts.down_proj.biases": "model-00015-of-00015.safetensors", "model.layers.40.ffn.shared_experts.down_proj.scales": "model-00015-of-00015.safetensors", "model.layers.40.ffn.shared_experts.down_proj.weight": "model-00015-of-00015.safetensors", "model.layers.40.ffn.shared_experts.gate_proj.biases": "model-00015-of-00015.safetensors", "model.layers.40.ffn.shared_experts.gate_proj.scales": "model-00015-of-00015.safetensors", "model.layers.40.ffn.shared_experts.gate_proj.weight": "model-00015-of-00015.safetensors", "model.layers.40.ffn.shared_experts.up_proj.biases": "model-00015-of-00015.safetensors", "model.layers.40.ffn.shared_experts.up_proj.scales": "model-00015-of-00015.safetensors", "model.layers.40.ffn.shared_experts.up_proj.weight": "model-00015-of-00015.safetensors", "model.layers.40.ffn.switch_mlp.down_proj.scales": "model-00015-of-00015.safetensors", "model.layers.40.ffn.switch_mlp.down_proj.weight": "model-00015-of-00015.safetensors", "model.layers.40.ffn.switch_mlp.gate_proj.scales": "model-00015-of-00015.safetensors", "model.layers.40.ffn.switch_mlp.gate_proj.weight": "model-00014-of-00015.safetensors", "model.layers.40.ffn.switch_mlp.up_proj.scales": "model-00015-of-00015.safetensors", "model.layers.40.ffn.switch_mlp.up_proj.weight": "model-00015-of-00015.safetensors", "model.layers.40.ffn_hc.base": "model-00015-of-00015.safetensors", "model.layers.40.ffn_hc.fn": "model-00015-of-00015.safetensors", "model.layers.40.ffn_hc.scale": "model-00015-of-00015.safetensors", "model.layers.40.ffn_norm.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.attn_sink": "model-00015-of-00015.safetensors", "model.layers.41.attn.compressor.ape": "model-00015-of-00015.safetensors", "model.layers.41.attn.compressor.norm.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.compressor.wgate.biases": "model-00015-of-00015.safetensors", "model.layers.41.attn.compressor.wgate.scales": "model-00015-of-00015.safetensors", "model.layers.41.attn.compressor.wgate.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.compressor.wkv.biases": "model-00015-of-00015.safetensors", "model.layers.41.attn.compressor.wkv.scales": "model-00015-of-00015.safetensors", "model.layers.41.attn.compressor.wkv.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.kv_norm.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.q_norm.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.wkv.biases": "model-00015-of-00015.safetensors", "model.layers.41.attn.wkv.scales": "model-00015-of-00015.safetensors", "model.layers.41.attn.wkv.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.wo_a.biases": "model-00015-of-00015.safetensors", "model.layers.41.attn.wo_a.scales": "model-00015-of-00015.safetensors", "model.layers.41.attn.wo_a.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.wo_b.biases": "model-00015-of-00015.safetensors", "model.layers.41.attn.wo_b.scales": "model-00015-of-00015.safetensors", "model.layers.41.attn.wo_b.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.wq_a.biases": "model-00015-of-00015.safetensors", "model.layers.41.attn.wq_a.scales": "model-00015-of-00015.safetensors", "model.layers.41.attn.wq_a.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn.wq_b.biases": "model-00015-of-00015.safetensors", "model.layers.41.attn.wq_b.scales": "model-00015-of-00015.safetensors", "model.layers.41.attn.wq_b.weight": "model-00015-of-00015.safetensors", "model.layers.41.attn_hc.base": "model-00015-of-00015.safetensors", "model.layers.41.attn_hc.fn": "model-00015-of-00015.safetensors", "model.layers.41.attn_hc.scale": "model-00015-of-00015.safetensors", "model.layers.41.attn_norm.weight": "model-00015-of-00015.safetensors", "model.layers.41.ffn.gate.e_score_correction_bias": "model-00015-of-00015.safetensors", "model.layers.41.ffn.gate.weight": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.down_proj.biases": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.down_proj.scales": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.down_proj.weight": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.gate_proj.biases": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.gate_proj.scales": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.gate_proj.weight": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.up_proj.biases": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.up_proj.scales": "model-00015-of-00015.safetensors", "model.layers.41.ffn.shared_experts.up_proj.weight": "model-00015-of-00015.safetensors", "model.layers.41.ffn.switch_mlp.down_proj.scales": "model-00015-of-00015.safetensors", "model.layers.41.ffn.switch_mlp.down_proj.weight": "model-00015-of-00015.safetensors", "model.layers.41.ffn.switch_mlp.gate_proj.scales": "model-00015-of-00015.safetensors", "model.layers.41.ffn.switch_mlp.gate_proj.weight": "model-00015-of-00015.safetensors", "model.layers.41.ffn.switch_mlp.up_proj.scales": "model-00015-of-00015.safetensors", "model.layers.41.ffn.switch_mlp.up_proj.weight": "model-00015-of-00015.safetensors", "model.layers.41.ffn_hc.base": "model-00015-of-00015.safetensors", "model.layers.41.ffn_hc.fn": "model-00015-of-00015.safetensors", "model.layers.41.ffn_hc.scale": "model-00015-of-00015.safetensors", "model.layers.41.ffn_norm.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.attn_sink": "model-00015-of-00015.safetensors", "model.layers.42.attn.compressor.ape": "model-00015-of-00015.safetensors", "model.layers.42.attn.compressor.norm.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.compressor.wgate.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.compressor.wgate.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.compressor.wgate.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.compressor.wkv.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.compressor.wkv.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.compressor.wkv.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.compressor.ape": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.compressor.norm.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.compressor.wgate.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.compressor.wgate.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.compressor.wgate.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.compressor.wkv.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.compressor.wkv.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.compressor.wkv.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.weights_proj.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.weights_proj.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.weights_proj.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.wq_b.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.wq_b.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.indexer.wq_b.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.kv_norm.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.q_norm.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.wkv.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.wkv.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.wkv.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.wo_a.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.wo_a.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.wo_a.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.wo_b.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.wo_b.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.wo_b.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.wq_a.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.wq_a.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.wq_a.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn.wq_b.biases": "model-00015-of-00015.safetensors", "model.layers.42.attn.wq_b.scales": "model-00015-of-00015.safetensors", "model.layers.42.attn.wq_b.weight": "model-00015-of-00015.safetensors", "model.layers.42.attn_hc.base": "model-00015-of-00015.safetensors", "model.layers.42.attn_hc.fn": "model-00015-of-00015.safetensors", "model.layers.42.attn_hc.scale": "model-00015-of-00015.safetensors", "model.layers.42.attn_norm.weight": "model-00015-of-00015.safetensors", "model.layers.42.ffn.gate.e_score_correction_bias": "model-00015-of-00015.safetensors", "model.layers.42.ffn.gate.weight": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.down_proj.biases": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.down_proj.scales": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.down_proj.weight": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.gate_proj.biases": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.gate_proj.scales": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.gate_proj.weight": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.up_proj.biases": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.up_proj.scales": "model-00015-of-00015.safetensors", "model.layers.42.ffn.shared_experts.up_proj.weight": "model-00015-of-00015.safetensors", "model.layers.42.ffn.switch_mlp.down_proj.scales": "model-00015-of-00015.safetensors", "model.layers.42.ffn.switch_mlp.down_proj.weight": "model-00015-of-00015.safetensors", "model.layers.42.ffn.switch_mlp.gate_proj.scales": "model-00015-of-00015.safetensors", "model.layers.42.ffn.switch_mlp.gate_proj.weight": "model-00015-of-00015.safetensors", "model.layers.42.ffn.switch_mlp.up_proj.scales": "model-00015-of-00015.safetensors", "model.layers.42.ffn.switch_mlp.up_proj.weight": "model-00015-of-00015.safetensors", "model.layers.42.ffn_hc.base": "model-00015-of-00015.safetensors", "model.layers.42.ffn_hc.fn": "model-00015-of-00015.safetensors", "model.layers.42.ffn_hc.scale": "model-00015-of-00015.safetensors", "model.layers.42.ffn_norm.weight": "model-00015-of-00015.safetensors", "model.layers.5.attn.attn_sink": "model-00002-of-00015.safetensors", "model.layers.5.attn.compressor.ape": "model-00002-of-00015.safetensors", "model.layers.5.attn.compressor.norm.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.compressor.wgate.biases": "model-00002-of-00015.safetensors", "model.layers.5.attn.compressor.wgate.scales": "model-00002-of-00015.safetensors", "model.layers.5.attn.compressor.wgate.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.compressor.wkv.biases": "model-00002-of-00015.safetensors", "model.layers.5.attn.compressor.wkv.scales": "model-00002-of-00015.safetensors", "model.layers.5.attn.compressor.wkv.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.kv_norm.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.q_norm.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.wkv.biases": "model-00002-of-00015.safetensors", "model.layers.5.attn.wkv.scales": "model-00002-of-00015.safetensors", "model.layers.5.attn.wkv.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.wo_a.biases": "model-00002-of-00015.safetensors", "model.layers.5.attn.wo_a.scales": "model-00002-of-00015.safetensors", "model.layers.5.attn.wo_a.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.wo_b.biases": "model-00002-of-00015.safetensors", "model.layers.5.attn.wo_b.scales": "model-00002-of-00015.safetensors", "model.layers.5.attn.wo_b.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.wq_a.biases": "model-00002-of-00015.safetensors", "model.layers.5.attn.wq_a.scales": "model-00002-of-00015.safetensors", "model.layers.5.attn.wq_a.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn.wq_b.biases": "model-00002-of-00015.safetensors", "model.layers.5.attn.wq_b.scales": "model-00002-of-00015.safetensors", "model.layers.5.attn.wq_b.weight": "model-00002-of-00015.safetensors", "model.layers.5.attn_hc.base": "model-00003-of-00015.safetensors", "model.layers.5.attn_hc.fn": "model-00003-of-00015.safetensors", "model.layers.5.attn_hc.scale": "model-00003-of-00015.safetensors", "model.layers.5.attn_norm.weight": "model-00003-of-00015.safetensors", "model.layers.5.ffn.gate.e_score_correction_bias": "model-00002-of-00015.safetensors", "model.layers.5.ffn.gate.weight": "model-00002-of-00015.safetensors", "model.layers.5.ffn.shared_experts.down_proj.biases": "model-00003-of-00015.safetensors", "model.layers.5.ffn.shared_experts.down_proj.scales": "model-00003-of-00015.safetensors", "model.layers.5.ffn.shared_experts.down_proj.weight": "model-00003-of-00015.safetensors", "model.layers.5.ffn.shared_experts.gate_proj.biases": "model-00003-of-00015.safetensors", "model.layers.5.ffn.shared_experts.gate_proj.scales": "model-00003-of-00015.safetensors", "model.layers.5.ffn.shared_experts.gate_proj.weight": "model-00003-of-00015.safetensors", "model.layers.5.ffn.shared_experts.up_proj.biases": "model-00003-of-00015.safetensors", "model.layers.5.ffn.shared_experts.up_proj.scales": "model-00003-of-00015.safetensors", "model.layers.5.ffn.shared_experts.up_proj.weight": "model-00003-of-00015.safetensors", "model.layers.5.ffn.switch_mlp.down_proj.scales": "model-00003-of-00015.safetensors", "model.layers.5.ffn.switch_mlp.down_proj.weight": "model-00003-of-00015.safetensors", "model.layers.5.ffn.switch_mlp.gate_proj.scales": "model-00002-of-00015.safetensors", "model.layers.5.ffn.switch_mlp.gate_proj.weight": "model-00002-of-00015.safetensors", "model.layers.5.ffn.switch_mlp.up_proj.scales": "model-00003-of-00015.safetensors", "model.layers.5.ffn.switch_mlp.up_proj.weight": "model-00002-of-00015.safetensors", "model.layers.5.ffn_hc.base": "model-00003-of-00015.safetensors", "model.layers.5.ffn_hc.fn": "model-00003-of-00015.safetensors", "model.layers.5.ffn_hc.scale": "model-00003-of-00015.safetensors", "model.layers.5.ffn_norm.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.attn_sink": "model-00003-of-00015.safetensors", "model.layers.6.attn.compressor.ape": "model-00003-of-00015.safetensors", "model.layers.6.attn.compressor.norm.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.compressor.wgate.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.compressor.wgate.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.compressor.wgate.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.compressor.wkv.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.compressor.wkv.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.compressor.wkv.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.compressor.ape": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.compressor.norm.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.compressor.wgate.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.compressor.wgate.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.compressor.wgate.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.compressor.wkv.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.compressor.wkv.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.compressor.wkv.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.weights_proj.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.weights_proj.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.weights_proj.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.wq_b.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.wq_b.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.indexer.wq_b.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.kv_norm.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.q_norm.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.wkv.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.wkv.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.wkv.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.wo_a.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.wo_a.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.wo_a.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.wo_b.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.wo_b.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.wo_b.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.wq_a.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.wq_a.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.wq_a.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn.wq_b.biases": "model-00003-of-00015.safetensors", "model.layers.6.attn.wq_b.scales": "model-00003-of-00015.safetensors", "model.layers.6.attn.wq_b.weight": "model-00003-of-00015.safetensors", "model.layers.6.attn_hc.base": "model-00003-of-00015.safetensors", "model.layers.6.attn_hc.fn": "model-00003-of-00015.safetensors", "model.layers.6.attn_hc.scale": "model-00003-of-00015.safetensors", "model.layers.6.attn_norm.weight": "model-00003-of-00015.safetensors", "model.layers.6.ffn.gate.e_score_correction_bias": "model-00003-of-00015.safetensors", "model.layers.6.ffn.gate.weight": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.down_proj.biases": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.down_proj.scales": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.down_proj.weight": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.gate_proj.biases": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.gate_proj.scales": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.gate_proj.weight": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.up_proj.biases": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.up_proj.scales": "model-00003-of-00015.safetensors", "model.layers.6.ffn.shared_experts.up_proj.weight": "model-00003-of-00015.safetensors", "model.layers.6.ffn.switch_mlp.down_proj.scales": "model-00003-of-00015.safetensors", "model.layers.6.ffn.switch_mlp.down_proj.weight": "model-00003-of-00015.safetensors", "model.layers.6.ffn.switch_mlp.gate_proj.scales": "model-00003-of-00015.safetensors", "model.layers.6.ffn.switch_mlp.gate_proj.weight": "model-00003-of-00015.safetensors", "model.layers.6.ffn.switch_mlp.up_proj.scales": "model-00003-of-00015.safetensors", "model.layers.6.ffn.switch_mlp.up_proj.weight": "model-00003-of-00015.safetensors", "model.layers.6.ffn_hc.base": "model-00003-of-00015.safetensors", "model.layers.6.ffn_hc.fn": "model-00003-of-00015.safetensors", "model.layers.6.ffn_hc.scale": "model-00003-of-00015.safetensors", "model.layers.6.ffn_norm.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.attn_sink": "model-00003-of-00015.safetensors", "model.layers.7.attn.compressor.ape": "model-00003-of-00015.safetensors", "model.layers.7.attn.compressor.norm.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.compressor.wgate.biases": "model-00003-of-00015.safetensors", "model.layers.7.attn.compressor.wgate.scales": "model-00003-of-00015.safetensors", "model.layers.7.attn.compressor.wgate.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.compressor.wkv.biases": "model-00003-of-00015.safetensors", "model.layers.7.attn.compressor.wkv.scales": "model-00003-of-00015.safetensors", "model.layers.7.attn.compressor.wkv.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.kv_norm.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.q_norm.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.wkv.biases": "model-00003-of-00015.safetensors", "model.layers.7.attn.wkv.scales": "model-00003-of-00015.safetensors", "model.layers.7.attn.wkv.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.wo_a.biases": "model-00003-of-00015.safetensors", "model.layers.7.attn.wo_a.scales": "model-00003-of-00015.safetensors", "model.layers.7.attn.wo_a.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.wo_b.biases": "model-00003-of-00015.safetensors", "model.layers.7.attn.wo_b.scales": "model-00003-of-00015.safetensors", "model.layers.7.attn.wo_b.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.wq_a.biases": "model-00003-of-00015.safetensors", "model.layers.7.attn.wq_a.scales": "model-00003-of-00015.safetensors", "model.layers.7.attn.wq_a.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn.wq_b.biases": "model-00003-of-00015.safetensors", "model.layers.7.attn.wq_b.scales": "model-00003-of-00015.safetensors", "model.layers.7.attn.wq_b.weight": "model-00003-of-00015.safetensors", "model.layers.7.attn_hc.base": "model-00003-of-00015.safetensors", "model.layers.7.attn_hc.fn": "model-00003-of-00015.safetensors", "model.layers.7.attn_hc.scale": "model-00003-of-00015.safetensors", "model.layers.7.attn_norm.weight": "model-00003-of-00015.safetensors", "model.layers.7.ffn.gate.e_score_correction_bias": "model-00003-of-00015.safetensors", "model.layers.7.ffn.gate.weight": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.down_proj.biases": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.down_proj.scales": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.down_proj.weight": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.gate_proj.biases": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.gate_proj.scales": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.gate_proj.weight": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.up_proj.biases": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.up_proj.scales": "model-00003-of-00015.safetensors", "model.layers.7.ffn.shared_experts.up_proj.weight": "model-00003-of-00015.safetensors", "model.layers.7.ffn.switch_mlp.down_proj.scales": "model-00003-of-00015.safetensors", "model.layers.7.ffn.switch_mlp.down_proj.weight": "model-00003-of-00015.safetensors", "model.layers.7.ffn.switch_mlp.gate_proj.scales": "model-00003-of-00015.safetensors", "model.layers.7.ffn.switch_mlp.gate_proj.weight": "model-00003-of-00015.safetensors", "model.layers.7.ffn.switch_mlp.up_proj.scales": "model-00003-of-00015.safetensors", "model.layers.7.ffn.switch_mlp.up_proj.weight": "model-00003-of-00015.safetensors", "model.layers.7.ffn_hc.base": "model-00003-of-00015.safetensors", "model.layers.7.ffn_hc.fn": "model-00003-of-00015.safetensors", "model.layers.7.ffn_hc.scale": "model-00003-of-00015.safetensors", "model.layers.7.ffn_norm.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.attn_sink": "model-00003-of-00015.safetensors", "model.layers.8.attn.compressor.ape": "model-00003-of-00015.safetensors", "model.layers.8.attn.compressor.norm.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.compressor.wgate.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.compressor.wgate.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.compressor.wgate.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.compressor.wkv.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.compressor.wkv.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.compressor.wkv.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.compressor.ape": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.compressor.norm.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.compressor.wgate.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.compressor.wgate.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.compressor.wgate.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.compressor.wkv.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.compressor.wkv.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.compressor.wkv.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.weights_proj.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.weights_proj.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.weights_proj.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.wq_b.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.wq_b.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.indexer.wq_b.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.kv_norm.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.q_norm.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.wkv.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.wkv.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.wkv.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.wo_a.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.wo_a.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.wo_a.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.wo_b.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.wo_b.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.wo_b.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.wq_a.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.wq_a.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.wq_a.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn.wq_b.biases": "model-00003-of-00015.safetensors", "model.layers.8.attn.wq_b.scales": "model-00003-of-00015.safetensors", "model.layers.8.attn.wq_b.weight": "model-00003-of-00015.safetensors", "model.layers.8.attn_hc.base": "model-00004-of-00015.safetensors", "model.layers.8.attn_hc.fn": "model-00004-of-00015.safetensors", "model.layers.8.attn_hc.scale": "model-00004-of-00015.safetensors", "model.layers.8.attn_norm.weight": "model-00004-of-00015.safetensors", "model.layers.8.ffn.gate.e_score_correction_bias": "model-00003-of-00015.safetensors", "model.layers.8.ffn.gate.weight": "model-00003-of-00015.safetensors", "model.layers.8.ffn.shared_experts.down_proj.biases": "model-00004-of-00015.safetensors", "model.layers.8.ffn.shared_experts.down_proj.scales": "model-00004-of-00015.safetensors", "model.layers.8.ffn.shared_experts.down_proj.weight": "model-00004-of-00015.safetensors", "model.layers.8.ffn.shared_experts.gate_proj.biases": "model-00004-of-00015.safetensors", "model.layers.8.ffn.shared_experts.gate_proj.scales": "model-00004-of-00015.safetensors", "model.layers.8.ffn.shared_experts.gate_proj.weight": "model-00004-of-00015.safetensors", "model.layers.8.ffn.shared_experts.up_proj.biases": "model-00004-of-00015.safetensors", "model.layers.8.ffn.shared_experts.up_proj.scales": "model-00004-of-00015.safetensors", "model.layers.8.ffn.shared_experts.up_proj.weight": "model-00004-of-00015.safetensors", "model.layers.8.ffn.switch_mlp.down_proj.scales": "model-00004-of-00015.safetensors", "model.layers.8.ffn.switch_mlp.down_proj.weight": "model-00004-of-00015.safetensors", "model.layers.8.ffn.switch_mlp.gate_proj.scales": "model-00003-of-00015.safetensors", "model.layers.8.ffn.switch_mlp.gate_proj.weight": "model-00003-of-00015.safetensors", "model.layers.8.ffn.switch_mlp.up_proj.scales": "model-00004-of-00015.safetensors", "model.layers.8.ffn.switch_mlp.up_proj.weight": "model-00004-of-00015.safetensors", "model.layers.8.ffn_hc.base": "model-00004-of-00015.safetensors", "model.layers.8.ffn_hc.fn": "model-00004-of-00015.safetensors", "model.layers.8.ffn_hc.scale": "model-00004-of-00015.safetensors", "model.layers.8.ffn_norm.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.attn_sink": "model-00004-of-00015.safetensors", "model.layers.9.attn.compressor.ape": "model-00004-of-00015.safetensors", "model.layers.9.attn.compressor.norm.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.compressor.wgate.biases": "model-00004-of-00015.safetensors", "model.layers.9.attn.compressor.wgate.scales": "model-00004-of-00015.safetensors", "model.layers.9.attn.compressor.wgate.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.compressor.wkv.biases": "model-00004-of-00015.safetensors", "model.layers.9.attn.compressor.wkv.scales": "model-00004-of-00015.safetensors", "model.layers.9.attn.compressor.wkv.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.kv_norm.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.q_norm.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.wkv.biases": "model-00004-of-00015.safetensors", "model.layers.9.attn.wkv.scales": "model-00004-of-00015.safetensors", "model.layers.9.attn.wkv.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.wo_a.biases": "model-00004-of-00015.safetensors", "model.layers.9.attn.wo_a.scales": "model-00004-of-00015.safetensors", "model.layers.9.attn.wo_a.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.wo_b.biases": "model-00004-of-00015.safetensors", "model.layers.9.attn.wo_b.scales": "model-00004-of-00015.safetensors", "model.layers.9.attn.wo_b.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.wq_a.biases": "model-00004-of-00015.safetensors", "model.layers.9.attn.wq_a.scales": "model-00004-of-00015.safetensors", "model.layers.9.attn.wq_a.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn.wq_b.biases": "model-00004-of-00015.safetensors", "model.layers.9.attn.wq_b.scales": "model-00004-of-00015.safetensors", "model.layers.9.attn.wq_b.weight": "model-00004-of-00015.safetensors", "model.layers.9.attn_hc.base": "model-00004-of-00015.safetensors", "model.layers.9.attn_hc.fn": "model-00004-of-00015.safetensors", "model.layers.9.attn_hc.scale": "model-00004-of-00015.safetensors", "model.layers.9.attn_norm.weight": "model-00004-of-00015.safetensors", "model.layers.9.ffn.gate.e_score_correction_bias": "model-00004-of-00015.safetensors", "model.layers.9.ffn.gate.weight": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.down_proj.biases": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.down_proj.scales": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.down_proj.weight": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.gate_proj.biases": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.gate_proj.scales": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.gate_proj.weight": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.up_proj.biases": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.up_proj.scales": "model-00004-of-00015.safetensors", "model.layers.9.ffn.shared_experts.up_proj.weight": "model-00004-of-00015.safetensors", "model.layers.9.ffn.switch_mlp.down_proj.scales": "model-00004-of-00015.safetensors", "model.layers.9.ffn.switch_mlp.down_proj.weight": "model-00004-of-00015.safetensors", "model.layers.9.ffn.switch_mlp.gate_proj.scales": "model-00004-of-00015.safetensors", "model.layers.9.ffn.switch_mlp.gate_proj.weight": "model-00004-of-00015.safetensors", "model.layers.9.ffn.switch_mlp.up_proj.scales": "model-00004-of-00015.safetensors", "model.layers.9.ffn.switch_mlp.up_proj.weight": "model-00004-of-00015.safetensors", "model.layers.9.ffn_hc.base": "model-00004-of-00015.safetensors", "model.layers.9.ffn_hc.fn": "model-00004-of-00015.safetensors", "model.layers.9.ffn_hc.scale": "model-00004-of-00015.safetensors", "model.layers.9.ffn_norm.weight": "model-00004-of-00015.safetensors", "model.norm.weight": "model-00015-of-00015.safetensors" } }