Instructions to use bkhmsi/micro-llama-1b-dpo with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use bkhmsi/micro-llama-1b-dpo with Transformers:
# Load model directly from transformers import AutoTokenizer, MiCRoLlama tokenizer = AutoTokenizer.from_pretrained("bkhmsi/micro-llama-1b-dpo") model = MiCRoLlama.from_pretrained("bkhmsi/micro-llama-1b-dpo") - Notebooks
- Google Colab
- Kaggle
| { | |
| "metadata": { | |
| "total_parameters": 4485154816, | |
| "total_size": 8970309632 | |
| }, | |
| "weight_map": { | |
| "embed_tokens.weight": "model-00001-of-00002.safetensors", | |
| "final_norm.weight": "model-00002-of-00002.safetensors", | |
| "layers.0.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.0.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.1.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.10.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.gate.0.weight": "model-00002-of-00002.safetensors", | |
| "layers.10.gate.1.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.gate.0.weight": "model-00002-of-00002.safetensors", | |
| "layers.11.gate.1.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.gate.0.weight": "model-00002-of-00002.safetensors", | |
| "layers.12.gate.1.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.gate.0.weight": "model-00002-of-00002.safetensors", | |
| "layers.13.gate.1.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.gate.0.weight": "model-00002-of-00002.safetensors", | |
| "layers.14.gate.1.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.gate.0.weight": "model-00002-of-00002.safetensors", | |
| "layers.15.gate.1.weight": "model-00002-of-00002.safetensors", | |
| "layers.2.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.2.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.3.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.4.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.5.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.6.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.7.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.input_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.experts.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.8.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "layers.9.experts.0.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.0.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.0.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.0.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.9.experts.0.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", | |
| "layers.9.experts.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.input_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.mlp.down_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.mlp.up_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.experts.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", | |
| "layers.9.gate.0.weight": "model-00001-of-00002.safetensors", | |
| "layers.9.gate.1.weight": "model-00001-of-00002.safetensors", | |
| "lm_head.weight": "model-00002-of-00002.safetensors" | |
| } | |
| } | |