{ "metadata": { "total_parameters": 8096647168, "total_size": 32386588672 }, "weight_map": { "hidden_in.bias": "model-00007-of-00007.safetensors", "hidden_in.weight": "model-00007-of-00007.safetensors", "model.blocks.0.feed_forward.fc1.bias": "model-00001-of-00007.safetensors", "model.blocks.0.feed_forward.fc1.weight": "model-00001-of-00007.safetensors", "model.blocks.0.feed_forward.fc2.bias": "model-00001-of-00007.safetensors", "model.blocks.0.feed_forward.fc2.weight": "model-00001-of-00007.safetensors", "model.blocks.0.norm1.weight": "model-00001-of-00007.safetensors", "model.blocks.0.norm2.weight": "model-00001-of-00007.safetensors", "model.blocks.0.norm3.weight": "model-00001-of-00007.safetensors", "model.blocks.0.norm4.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_cross.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_cross.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_cross.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_cross.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_cross.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_cross.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_cross.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_cross.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_hw.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_hw.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_hw.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_hw.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_hw.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_hw.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_hw.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_hw.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_n.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_n.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_n.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_n.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_n.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_n.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_n.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.0.self_attn_n.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.1.feed_forward.fc1.bias": "model-00001-of-00007.safetensors", "model.blocks.1.feed_forward.fc1.weight": "model-00001-of-00007.safetensors", "model.blocks.1.feed_forward.fc2.bias": "model-00001-of-00007.safetensors", "model.blocks.1.feed_forward.fc2.weight": "model-00001-of-00007.safetensors", "model.blocks.1.norm1.weight": "model-00001-of-00007.safetensors", "model.blocks.1.norm2.weight": "model-00001-of-00007.safetensors", "model.blocks.1.norm3.weight": "model-00001-of-00007.safetensors", "model.blocks.1.norm4.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_cross.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_cross.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_cross.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_cross.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_cross.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_cross.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_cross.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_cross.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_hw.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_hw.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_hw.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_hw.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_hw.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_hw.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_hw.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_hw.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_n.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_n.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_n.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_n.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_n.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_n.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_n.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.1.self_attn_n.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.10.feed_forward.fc1.bias": "model-00003-of-00007.safetensors", "model.blocks.10.feed_forward.fc1.weight": "model-00003-of-00007.safetensors", "model.blocks.10.feed_forward.fc2.bias": "model-00003-of-00007.safetensors", "model.blocks.10.feed_forward.fc2.weight": "model-00003-of-00007.safetensors", "model.blocks.10.norm1.weight": "model-00003-of-00007.safetensors", "model.blocks.10.norm2.weight": "model-00003-of-00007.safetensors", "model.blocks.10.norm3.weight": "model-00003-of-00007.safetensors", "model.blocks.10.norm4.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_cross.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_cross.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_cross.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_cross.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_cross.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_cross.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_cross.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_cross.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_hw.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_hw.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_hw.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_hw.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_hw.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_hw.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_hw.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_hw.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_n.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_n.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_n.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_n.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_n.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_n.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_n.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.10.self_attn_n.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.11.feed_forward.fc1.bias": "model-00004-of-00007.safetensors", "model.blocks.11.feed_forward.fc1.weight": "model-00004-of-00007.safetensors", "model.blocks.11.feed_forward.fc2.bias": "model-00004-of-00007.safetensors", "model.blocks.11.feed_forward.fc2.weight": "model-00004-of-00007.safetensors", "model.blocks.11.norm1.weight": "model-00004-of-00007.safetensors", "model.blocks.11.norm2.weight": "model-00004-of-00007.safetensors", "model.blocks.11.norm3.weight": "model-00004-of-00007.safetensors", "model.blocks.11.norm4.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_cross.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_cross.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_cross.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_cross.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_cross.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_cross.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_cross.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_cross.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_hw.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_hw.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_hw.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_hw.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_hw.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_hw.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_hw.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_hw.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_n.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_n.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_n.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_n.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_n.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_n.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_n.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.11.self_attn_n.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.12.feed_forward.fc1.bias": "model-00004-of-00007.safetensors", "model.blocks.12.feed_forward.fc1.weight": "model-00004-of-00007.safetensors", "model.blocks.12.feed_forward.fc2.bias": "model-00004-of-00007.safetensors", "model.blocks.12.feed_forward.fc2.weight": "model-00004-of-00007.safetensors", "model.blocks.12.norm1.weight": "model-00004-of-00007.safetensors", "model.blocks.12.norm2.weight": "model-00004-of-00007.safetensors", "model.blocks.12.norm3.weight": "model-00004-of-00007.safetensors", "model.blocks.12.norm4.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_cross.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_cross.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_cross.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_cross.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_cross.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_cross.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_cross.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_cross.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_hw.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_hw.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_hw.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_hw.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_hw.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_hw.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_hw.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_hw.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_n.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_n.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_n.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_n.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_n.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_n.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_n.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.12.self_attn_n.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.13.feed_forward.fc1.bias": "model-00004-of-00007.safetensors", "model.blocks.13.feed_forward.fc1.weight": "model-00004-of-00007.safetensors", "model.blocks.13.feed_forward.fc2.bias": "model-00004-of-00007.safetensors", "model.blocks.13.feed_forward.fc2.weight": "model-00004-of-00007.safetensors", "model.blocks.13.norm1.weight": "model-00004-of-00007.safetensors", "model.blocks.13.norm2.weight": "model-00004-of-00007.safetensors", "model.blocks.13.norm3.weight": "model-00004-of-00007.safetensors", "model.blocks.13.norm4.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_cross.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_cross.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_cross.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_cross.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_cross.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_cross.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_cross.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_cross.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_hw.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_hw.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_hw.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_hw.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_hw.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_hw.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_hw.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_hw.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_n.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_n.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_n.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_n.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_n.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_n.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_n.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.13.self_attn_n.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.14.feed_forward.fc1.bias": "model-00005-of-00007.safetensors", "model.blocks.14.feed_forward.fc1.weight": "model-00005-of-00007.safetensors", "model.blocks.14.feed_forward.fc2.bias": "model-00005-of-00007.safetensors", "model.blocks.14.feed_forward.fc2.weight": "model-00005-of-00007.safetensors", "model.blocks.14.norm1.weight": "model-00005-of-00007.safetensors", "model.blocks.14.norm2.weight": "model-00005-of-00007.safetensors", "model.blocks.14.norm3.weight": "model-00005-of-00007.safetensors", "model.blocks.14.norm4.weight": "model-00005-of-00007.safetensors", "model.blocks.14.self_attn_cross.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_cross.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_cross.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_cross.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_cross.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_cross.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_cross.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_cross.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_hw.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_hw.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_hw.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_hw.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_hw.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_hw.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_hw.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_hw.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_n.W_k.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_n.W_k.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_n.W_o.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_n.W_o.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_n.W_q.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_n.W_q.weight": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_n.W_v.bias": "model-00004-of-00007.safetensors", "model.blocks.14.self_attn_n.W_v.weight": "model-00004-of-00007.safetensors", "model.blocks.15.feed_forward.fc1.bias": "model-00005-of-00007.safetensors", "model.blocks.15.feed_forward.fc1.weight": "model-00005-of-00007.safetensors", "model.blocks.15.feed_forward.fc2.bias": "model-00005-of-00007.safetensors", "model.blocks.15.feed_forward.fc2.weight": "model-00005-of-00007.safetensors", "model.blocks.15.norm1.weight": "model-00005-of-00007.safetensors", "model.blocks.15.norm2.weight": "model-00005-of-00007.safetensors", "model.blocks.15.norm3.weight": "model-00005-of-00007.safetensors", "model.blocks.15.norm4.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_cross.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_cross.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_cross.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_cross.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_cross.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_cross.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_cross.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_cross.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_hw.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_hw.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_hw.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_hw.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_hw.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_hw.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_hw.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_hw.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_n.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_n.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_n.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_n.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_n.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_n.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_n.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.15.self_attn_n.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.16.feed_forward.fc1.bias": "model-00005-of-00007.safetensors", "model.blocks.16.feed_forward.fc1.weight": "model-00005-of-00007.safetensors", "model.blocks.16.feed_forward.fc2.bias": "model-00005-of-00007.safetensors", "model.blocks.16.feed_forward.fc2.weight": "model-00005-of-00007.safetensors", "model.blocks.16.norm1.weight": "model-00005-of-00007.safetensors", "model.blocks.16.norm2.weight": "model-00005-of-00007.safetensors", "model.blocks.16.norm3.weight": "model-00005-of-00007.safetensors", "model.blocks.16.norm4.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_cross.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_cross.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_cross.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_cross.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_cross.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_cross.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_cross.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_cross.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_hw.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_hw.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_hw.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_hw.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_hw.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_hw.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_hw.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_hw.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_n.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_n.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_n.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_n.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_n.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_n.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_n.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.16.self_attn_n.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.17.feed_forward.fc1.bias": "model-00005-of-00007.safetensors", "model.blocks.17.feed_forward.fc1.weight": "model-00005-of-00007.safetensors", "model.blocks.17.feed_forward.fc2.bias": "model-00005-of-00007.safetensors", "model.blocks.17.feed_forward.fc2.weight": "model-00005-of-00007.safetensors", "model.blocks.17.norm1.weight": "model-00005-of-00007.safetensors", "model.blocks.17.norm2.weight": "model-00005-of-00007.safetensors", "model.blocks.17.norm3.weight": "model-00005-of-00007.safetensors", "model.blocks.17.norm4.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_cross.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_cross.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_cross.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_cross.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_cross.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_cross.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_cross.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_cross.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_hw.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_hw.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_hw.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_hw.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_hw.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_hw.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_hw.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_hw.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_n.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_n.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_n.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_n.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_n.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_n.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_n.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.17.self_attn_n.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.18.feed_forward.fc1.bias": "model-00006-of-00007.safetensors", "model.blocks.18.feed_forward.fc1.weight": "model-00006-of-00007.safetensors", "model.blocks.18.feed_forward.fc2.bias": "model-00006-of-00007.safetensors", "model.blocks.18.feed_forward.fc2.weight": "model-00006-of-00007.safetensors", "model.blocks.18.norm1.weight": "model-00006-of-00007.safetensors", "model.blocks.18.norm2.weight": "model-00006-of-00007.safetensors", "model.blocks.18.norm3.weight": "model-00006-of-00007.safetensors", "model.blocks.18.norm4.weight": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_cross.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_cross.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_cross.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_cross.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_cross.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_cross.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_cross.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_cross.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_hw.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_hw.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_hw.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_hw.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_hw.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_hw.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_hw.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_hw.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.18.self_attn_n.W_k.bias": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_n.W_k.weight": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_n.W_o.bias": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_n.W_o.weight": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_n.W_q.bias": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_n.W_q.weight": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_n.W_v.bias": "model-00005-of-00007.safetensors", "model.blocks.18.self_attn_n.W_v.weight": "model-00005-of-00007.safetensors", "model.blocks.19.feed_forward.fc1.bias": "model-00006-of-00007.safetensors", "model.blocks.19.feed_forward.fc1.weight": "model-00006-of-00007.safetensors", "model.blocks.19.feed_forward.fc2.bias": "model-00006-of-00007.safetensors", "model.blocks.19.feed_forward.fc2.weight": "model-00006-of-00007.safetensors", "model.blocks.19.norm1.weight": "model-00006-of-00007.safetensors", "model.blocks.19.norm2.weight": "model-00006-of-00007.safetensors", "model.blocks.19.norm3.weight": "model-00006-of-00007.safetensors", "model.blocks.19.norm4.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_cross.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_cross.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_cross.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_cross.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_cross.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_cross.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_cross.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_cross.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_hw.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_hw.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_hw.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_hw.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_hw.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_hw.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_hw.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_hw.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_n.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_n.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_n.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_n.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_n.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_n.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_n.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.19.self_attn_n.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.2.feed_forward.fc1.bias": "model-00001-of-00007.safetensors", "model.blocks.2.feed_forward.fc1.weight": "model-00001-of-00007.safetensors", "model.blocks.2.feed_forward.fc2.bias": "model-00001-of-00007.safetensors", "model.blocks.2.feed_forward.fc2.weight": "model-00001-of-00007.safetensors", "model.blocks.2.norm1.weight": "model-00001-of-00007.safetensors", "model.blocks.2.norm2.weight": "model-00001-of-00007.safetensors", "model.blocks.2.norm3.weight": "model-00001-of-00007.safetensors", "model.blocks.2.norm4.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_cross.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_cross.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_cross.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_cross.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_cross.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_cross.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_cross.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_cross.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_hw.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_hw.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_hw.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_hw.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_hw.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_hw.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_hw.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_hw.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_n.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_n.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_n.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_n.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_n.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_n.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_n.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.2.self_attn_n.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.20.feed_forward.fc1.bias": "model-00006-of-00007.safetensors", "model.blocks.20.feed_forward.fc1.weight": "model-00006-of-00007.safetensors", "model.blocks.20.feed_forward.fc2.bias": "model-00006-of-00007.safetensors", "model.blocks.20.feed_forward.fc2.weight": "model-00006-of-00007.safetensors", "model.blocks.20.norm1.weight": "model-00006-of-00007.safetensors", "model.blocks.20.norm2.weight": "model-00006-of-00007.safetensors", "model.blocks.20.norm3.weight": "model-00006-of-00007.safetensors", "model.blocks.20.norm4.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_cross.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_cross.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_cross.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_cross.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_cross.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_cross.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_cross.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_cross.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_hw.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_hw.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_hw.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_hw.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_hw.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_hw.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_hw.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_hw.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_n.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_n.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_n.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_n.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_n.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_n.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_n.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.20.self_attn_n.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.21.feed_forward.fc1.bias": "model-00006-of-00007.safetensors", "model.blocks.21.feed_forward.fc1.weight": "model-00006-of-00007.safetensors", "model.blocks.21.feed_forward.fc2.bias": "model-00006-of-00007.safetensors", "model.blocks.21.feed_forward.fc2.weight": "model-00006-of-00007.safetensors", "model.blocks.21.norm1.weight": "model-00006-of-00007.safetensors", "model.blocks.21.norm2.weight": "model-00006-of-00007.safetensors", "model.blocks.21.norm3.weight": "model-00006-of-00007.safetensors", "model.blocks.21.norm4.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_cross.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_cross.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_cross.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_cross.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_cross.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_cross.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_cross.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_cross.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_hw.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_hw.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_hw.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_hw.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_hw.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_hw.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_hw.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_hw.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_n.W_k.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_n.W_k.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_n.W_o.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_n.W_o.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_n.W_q.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_n.W_q.weight": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_n.W_v.bias": "model-00006-of-00007.safetensors", "model.blocks.21.self_attn_n.W_v.weight": "model-00006-of-00007.safetensors", "model.blocks.22.feed_forward.fc1.bias": "model-00007-of-00007.safetensors", "model.blocks.22.feed_forward.fc1.weight": "model-00007-of-00007.safetensors", "model.blocks.22.feed_forward.fc2.bias": "model-00007-of-00007.safetensors", "model.blocks.22.feed_forward.fc2.weight": "model-00007-of-00007.safetensors", "model.blocks.22.norm1.weight": "model-00007-of-00007.safetensors", "model.blocks.22.norm2.weight": "model-00007-of-00007.safetensors", "model.blocks.22.norm3.weight": "model-00007-of-00007.safetensors", "model.blocks.22.norm4.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_cross.W_k.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_cross.W_k.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_cross.W_o.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_cross.W_o.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_cross.W_q.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_cross.W_q.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_cross.W_v.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_cross.W_v.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_hw.W_k.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_hw.W_k.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_hw.W_o.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_hw.W_o.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_hw.W_q.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_hw.W_q.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_hw.W_v.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_hw.W_v.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_n.W_k.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_n.W_k.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_n.W_o.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_n.W_o.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_n.W_q.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_n.W_q.weight": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_n.W_v.bias": "model-00007-of-00007.safetensors", "model.blocks.22.self_attn_n.W_v.weight": "model-00007-of-00007.safetensors", "model.blocks.23.feed_forward.fc1.bias": "model-00007-of-00007.safetensors", "model.blocks.23.feed_forward.fc1.weight": "model-00007-of-00007.safetensors", "model.blocks.23.feed_forward.fc2.bias": "model-00007-of-00007.safetensors", "model.blocks.23.feed_forward.fc2.weight": "model-00007-of-00007.safetensors", "model.blocks.23.norm1.weight": "model-00007-of-00007.safetensors", "model.blocks.23.norm2.weight": "model-00007-of-00007.safetensors", "model.blocks.23.norm3.weight": "model-00007-of-00007.safetensors", "model.blocks.23.norm4.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_cross.W_k.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_cross.W_k.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_cross.W_o.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_cross.W_o.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_cross.W_q.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_cross.W_q.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_cross.W_v.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_cross.W_v.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_hw.W_k.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_hw.W_k.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_hw.W_o.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_hw.W_o.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_hw.W_q.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_hw.W_q.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_hw.W_v.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_hw.W_v.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_n.W_k.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_n.W_k.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_n.W_o.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_n.W_o.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_n.W_q.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_n.W_q.weight": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_n.W_v.bias": "model-00007-of-00007.safetensors", "model.blocks.23.self_attn_n.W_v.weight": "model-00007-of-00007.safetensors", "model.blocks.3.feed_forward.fc1.bias": "model-00002-of-00007.safetensors", "model.blocks.3.feed_forward.fc1.weight": "model-00002-of-00007.safetensors", "model.blocks.3.feed_forward.fc2.bias": "model-00002-of-00007.safetensors", "model.blocks.3.feed_forward.fc2.weight": "model-00002-of-00007.safetensors", "model.blocks.3.norm1.weight": "model-00002-of-00007.safetensors", "model.blocks.3.norm2.weight": "model-00002-of-00007.safetensors", "model.blocks.3.norm3.weight": "model-00002-of-00007.safetensors", "model.blocks.3.norm4.weight": "model-00002-of-00007.safetensors", "model.blocks.3.self_attn_cross.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_cross.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_cross.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_cross.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_cross.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_cross.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_cross.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_cross.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_hw.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_hw.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_hw.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_hw.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_hw.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_hw.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_hw.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_hw.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_n.W_k.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_n.W_k.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_n.W_o.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_n.W_o.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_n.W_q.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_n.W_q.weight": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_n.W_v.bias": "model-00001-of-00007.safetensors", "model.blocks.3.self_attn_n.W_v.weight": "model-00001-of-00007.safetensors", "model.blocks.4.feed_forward.fc1.bias": "model-00002-of-00007.safetensors", "model.blocks.4.feed_forward.fc1.weight": "model-00002-of-00007.safetensors", "model.blocks.4.feed_forward.fc2.bias": "model-00002-of-00007.safetensors", "model.blocks.4.feed_forward.fc2.weight": "model-00002-of-00007.safetensors", "model.blocks.4.norm1.weight": "model-00002-of-00007.safetensors", "model.blocks.4.norm2.weight": "model-00002-of-00007.safetensors", "model.blocks.4.norm3.weight": "model-00002-of-00007.safetensors", "model.blocks.4.norm4.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_cross.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_cross.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_cross.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_cross.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_cross.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_cross.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_cross.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_cross.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_hw.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_hw.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_hw.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_hw.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_hw.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_hw.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_hw.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_hw.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_n.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_n.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_n.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_n.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_n.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_n.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_n.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.4.self_attn_n.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.5.feed_forward.fc1.bias": "model-00002-of-00007.safetensors", "model.blocks.5.feed_forward.fc1.weight": "model-00002-of-00007.safetensors", "model.blocks.5.feed_forward.fc2.bias": "model-00002-of-00007.safetensors", "model.blocks.5.feed_forward.fc2.weight": "model-00002-of-00007.safetensors", "model.blocks.5.norm1.weight": "model-00002-of-00007.safetensors", "model.blocks.5.norm2.weight": "model-00002-of-00007.safetensors", "model.blocks.5.norm3.weight": "model-00002-of-00007.safetensors", "model.blocks.5.norm4.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_cross.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_cross.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_cross.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_cross.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_cross.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_cross.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_cross.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_cross.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_hw.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_hw.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_hw.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_hw.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_hw.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_hw.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_hw.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_hw.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_n.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_n.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_n.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_n.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_n.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_n.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_n.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.5.self_attn_n.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.6.feed_forward.fc1.bias": "model-00002-of-00007.safetensors", "model.blocks.6.feed_forward.fc1.weight": "model-00002-of-00007.safetensors", "model.blocks.6.feed_forward.fc2.bias": "model-00002-of-00007.safetensors", "model.blocks.6.feed_forward.fc2.weight": "model-00002-of-00007.safetensors", "model.blocks.6.norm1.weight": "model-00002-of-00007.safetensors", "model.blocks.6.norm2.weight": "model-00002-of-00007.safetensors", "model.blocks.6.norm3.weight": "model-00002-of-00007.safetensors", "model.blocks.6.norm4.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_cross.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_cross.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_cross.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_cross.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_cross.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_cross.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_cross.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_cross.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_hw.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_hw.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_hw.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_hw.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_hw.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_hw.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_hw.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_hw.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_n.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_n.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_n.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_n.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_n.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_n.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_n.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.6.self_attn_n.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.7.feed_forward.fc1.bias": "model-00003-of-00007.safetensors", "model.blocks.7.feed_forward.fc1.weight": "model-00003-of-00007.safetensors", "model.blocks.7.feed_forward.fc2.bias": "model-00003-of-00007.safetensors", "model.blocks.7.feed_forward.fc2.weight": "model-00003-of-00007.safetensors", "model.blocks.7.norm1.weight": "model-00003-of-00007.safetensors", "model.blocks.7.norm2.weight": "model-00003-of-00007.safetensors", "model.blocks.7.norm3.weight": "model-00003-of-00007.safetensors", "model.blocks.7.norm4.weight": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_cross.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_cross.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_cross.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_cross.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_cross.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_cross.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_cross.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_cross.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_hw.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_hw.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_hw.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_hw.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_hw.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_hw.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_hw.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_hw.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.7.self_attn_n.W_k.bias": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_n.W_k.weight": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_n.W_o.bias": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_n.W_o.weight": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_n.W_q.bias": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_n.W_q.weight": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_n.W_v.bias": "model-00002-of-00007.safetensors", "model.blocks.7.self_attn_n.W_v.weight": "model-00002-of-00007.safetensors", "model.blocks.8.feed_forward.fc1.bias": "model-00003-of-00007.safetensors", "model.blocks.8.feed_forward.fc1.weight": "model-00003-of-00007.safetensors", "model.blocks.8.feed_forward.fc2.bias": "model-00003-of-00007.safetensors", "model.blocks.8.feed_forward.fc2.weight": "model-00003-of-00007.safetensors", "model.blocks.8.norm1.weight": "model-00003-of-00007.safetensors", "model.blocks.8.norm2.weight": "model-00003-of-00007.safetensors", "model.blocks.8.norm3.weight": "model-00003-of-00007.safetensors", "model.blocks.8.norm4.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_cross.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_cross.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_cross.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_cross.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_cross.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_cross.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_cross.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_cross.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_hw.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_hw.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_hw.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_hw.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_hw.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_hw.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_hw.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_hw.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_n.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_n.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_n.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_n.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_n.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_n.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_n.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.8.self_attn_n.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.9.feed_forward.fc1.bias": "model-00003-of-00007.safetensors", "model.blocks.9.feed_forward.fc1.weight": "model-00003-of-00007.safetensors", "model.blocks.9.feed_forward.fc2.bias": "model-00003-of-00007.safetensors", "model.blocks.9.feed_forward.fc2.weight": "model-00003-of-00007.safetensors", "model.blocks.9.norm1.weight": "model-00003-of-00007.safetensors", "model.blocks.9.norm2.weight": "model-00003-of-00007.safetensors", "model.blocks.9.norm3.weight": "model-00003-of-00007.safetensors", "model.blocks.9.norm4.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_cross.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_cross.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_cross.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_cross.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_cross.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_cross.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_cross.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_cross.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_hw.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_hw.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_hw.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_hw.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_hw.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_hw.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_hw.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_hw.W_v.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_n.W_k.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_n.W_k.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_n.W_o.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_n.W_o.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_n.W_q.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_n.W_q.weight": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_n.W_v.bias": "model-00003-of-00007.safetensors", "model.blocks.9.self_attn_n.W_v.weight": "model-00003-of-00007.safetensors", "model.hw_pos.weight": "model-00001-of-00007.safetensors", "model.layer_pos.weight": "model-00001-of-00007.safetensors", "model.norm_final.weight": "model-00001-of-00007.safetensors", "model.norm_input.weight": "model-00001-of-00007.safetensors", "model.norm_tokens.weight": "model-00001-of-00007.safetensors", "model.proj_out_A.bias": "model-00007-of-00007.safetensors", "model.proj_out_A.weight": "model-00007-of-00007.safetensors", "model.proj_out_B.bias": "model-00007-of-00007.safetensors", "model.proj_out_B.weight": "model-00007-of-00007.safetensors" } }