{ "metadata": { "ParamSize": 435, "ParamBytes": 1059316480.0, "BitsPerParam": 3.726592627263146 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 254279680, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 248320, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 254279680, "byteOffset": 0 } ], "md5sum": "c75b9b92fdd4065bdf9d737654247d4d" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 31784960, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 248320, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31784960, "byteOffset": 0 } ], "md5sum": "eee32eab35fbaaf2f33abf0ddcc4f80e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7077888 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19660800 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "2128ef817544eba2a9bd8703385184ef" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "5cf13f4e3e929b9ef7325351f3673a9b" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "2bb0975752a16a44b0ced813edd24b1e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "d6123acfb0da8a3256179ec69a79297d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "77babd8ca3b8055a320186f0fc40cb3e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "8e0d29e19dca106439838f00e6715404" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.12.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.12.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.4.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.4.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "b280acd560afa8169dac6c3089ec86b1" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7077888 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19660800 }, { "name": "model.layers.14.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.14.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "d4e29dc45752743d8ce009d4543d5bbf" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7077888 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19660800 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "7167ec222b58d43038c59a0bc83aae5a" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "544398b614c19fe5a67f86163e193051" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.20.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.20.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "aaf94ea75f78b6afd28eb2fadf9e77b9" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "ec8922abef8a1bf85a41d99c62cde105" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.18.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.18.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "8164ec2f6065a865e76a57dd71ce6dcd" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "95abee0bb58a72e105d4d27d1e6711ce" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.13.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.13.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "b3a27b5f2dc300fa3bb66eb0cea194e8" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "d1697c68ccd94890199963923283ddf5" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.2.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.2.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.1.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.1.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "b58465af80d2661fec81622ae0bc9455" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7077888 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19660800 }, { "name": "model.layers.10.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.10.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "76753f59dd2e385c4705a5aa4c3e2bff" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.16.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.16.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "e4c5227634015bb7816b3579a1526af1" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.17.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.17.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.5.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.5.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "83a57659d9342809c7904173241a1867" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7077888 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19660800 }, { "name": "model.layers.6.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.6.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "a0f71f091739acfe8dc2034a2a207e3d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.21.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.21.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 7077888 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 13369344 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 14155776 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 26738688 } ], "md5sum": "a58b07065c44d1fc0f4bdb6c70d3adfc" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.22.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.22.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.8.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 7077888 }, { "name": "model.layers.8.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 13369344 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 } ], "md5sum": "7f8221226e393ecec149f578e86c8977" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 28311552, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.9.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 14155776 }, { "name": "model.layers.9.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 20447232 }, { "name": "model.layers.0.linear_attn.in_proj_qkv.q_weight", "shape": [ 6144, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 21233664 }, { "name": "model.layers.0.linear_attn.in_proj_qkv.q_scale", "shape": [ 6144, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 27525120 } ], "md5sum": "71729215fe5895a647ab0abf7e3b77d1" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2048, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2048, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 12288, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7077888 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 12288, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19660800 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 21233664 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 26476544 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 27131904 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 32374784 } ], "md5sum": "79633e8f2527c5418ad0c58ff0c0188d" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 5242880 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 5898240 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 11141120 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 11796480 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 17039360 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 5120, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 17694720 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 5120, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 655360, "byteOffset": 22937600 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23592960 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 25690112 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 25952256 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28049408 }, { "name": "model.layers.6.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 28311552 }, { "name": "model.layers.6.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 30408704 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30670848 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 32768000 } ], "md5sum": "44f9bb456535696beb83497a85c2700c" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 2097152 }, { "name": "model.layers.12.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 2359296 }, { "name": "model.layers.12.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 4456448 }, { "name": "model.layers.12.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 4718592 }, { "name": "model.layers.12.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 6815744 }, { "name": "model.layers.4.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 7077888 }, { "name": "model.layers.4.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 9175040 }, { "name": "model.layers.4.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 9437184 }, { "name": "model.layers.4.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 11534336 }, { "name": "model.layers.14.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 11796480 }, { "name": "model.layers.14.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 13893632 }, { "name": "model.layers.14.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14155776 }, { "name": "model.layers.14.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 16252928 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16515072 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18612224 }, { "name": "model.layers.20.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18874368 }, { "name": "model.layers.20.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20971520 }, { "name": "model.layers.20.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 21233664 }, { "name": "model.layers.20.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 23330816 }, { "name": "model.layers.17.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23592960 }, { "name": "model.layers.17.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 25690112 }, { "name": "model.layers.18.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 25952256 }, { "name": "model.layers.18.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28049408 }, { "name": "model.layers.18.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 28311552 }, { "name": "model.layers.18.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 30408704 }, { "name": "model.layers.13.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30670848 }, { "name": "model.layers.13.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 32768000 } ], "md5sum": "a8eabbe665bf62475f7abb907ce7b232" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.13.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.13.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 2097152 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 2359296 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 4456448 }, { "name": "model.layers.2.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 4718592 }, { "name": "model.layers.2.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 6815744 }, { "name": "model.layers.2.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 7077888 }, { "name": "model.layers.2.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 9175040 }, { "name": "model.layers.1.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 9437184 }, { "name": "model.layers.1.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 11534336 }, { "name": "model.layers.1.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 11796480 }, { "name": "model.layers.1.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 13893632 }, { "name": "model.layers.10.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14155776 }, { "name": "model.layers.10.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 16252928 }, { "name": "model.layers.10.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16515072 }, { "name": "model.layers.10.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18612224 }, { "name": "model.layers.16.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18874368 }, { "name": "model.layers.16.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20971520 }, { "name": "model.layers.16.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 21233664 }, { "name": "model.layers.16.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 23330816 }, { "name": "model.layers.17.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23592960 }, { "name": "model.layers.17.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 25690112 }, { "name": "model.layers.5.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 25952256 }, { "name": "model.layers.5.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28049408 }, { "name": "model.layers.5.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 28311552 }, { "name": "model.layers.5.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 30408704 }, { "name": "model.layers.6.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30670848 }, { "name": "model.layers.6.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 32768000 } ], "md5sum": "57cd50ad98f4f1382ea3774f3546b5db" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25353856, "records": [ { "name": "model.layers.21.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.21.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 2097152 }, { "name": "model.layers.21.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 2359296 }, { "name": "model.layers.21.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 4456448 }, { "name": "model.layers.22.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 4718592 }, { "name": "model.layers.22.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 6815744 }, { "name": "model.layers.22.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 7077888 }, { "name": "model.layers.22.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 9175040 }, { "name": "model.layers.8.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 9437184 }, { "name": "model.layers.8.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 11534336 }, { "name": "model.layers.8.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 11796480 }, { "name": "model.layers.8.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 13893632 }, { "name": "model.layers.9.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 14155776 }, { "name": "model.layers.9.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 16252928 }, { "name": "model.layers.9.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16515072 }, { "name": "model.layers.9.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 18612224 }, { "name": "model.layers.0.linear_attn.in_proj_z.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 18874368 }, { "name": "model.layers.0.linear_attn.in_proj_z.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 20971520 }, { "name": "model.layers.0.linear_attn.out_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 21233664 }, { "name": "model.layers.0.linear_attn.out_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 23330816 }, { "name": "model.layers.12.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23592960 }, { "name": "model.layers.12.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23609344 }, { "name": "model.layers.12.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23611392 }, { "name": "model.layers.12.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23627776 }, { "name": "model.layers.4.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23629824 }, { "name": "model.layers.4.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23646208 }, { "name": "model.layers.4.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23648256 }, { "name": "model.layers.4.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23664640 }, { "name": "model.layers.14.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23666688 }, { "name": "model.layers.14.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23683072 }, { "name": "model.layers.14.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23685120 }, { "name": "model.layers.14.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23701504 }, { "name": "model.layers.20.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23703552 }, { "name": "model.layers.20.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23719936 }, { "name": "model.layers.20.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23721984 }, { "name": "model.layers.20.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23738368 }, { "name": "model.layers.18.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23740416 }, { "name": "model.layers.18.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23756800 }, { "name": "model.layers.18.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23758848 }, { "name": "model.layers.18.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23775232 }, { "name": "model.layers.13.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23777280 }, { "name": "model.layers.13.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23793664 }, { "name": "model.layers.13.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23795712 }, { "name": "model.layers.13.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23812096 }, { "name": "model.layers.2.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23814144 }, { "name": "model.layers.2.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23830528 }, { "name": "model.layers.2.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23832576 }, { "name": "model.layers.2.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23848960 }, { "name": "model.layers.1.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23851008 }, { "name": "model.layers.1.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23867392 }, { "name": "model.layers.1.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23869440 }, { "name": "model.layers.1.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23885824 }, { "name": "model.layers.10.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23887872 }, { "name": "model.layers.10.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23904256 }, { "name": "model.layers.10.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23906304 }, { "name": "model.layers.10.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23922688 }, { "name": "model.layers.16.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23924736 }, { "name": "model.layers.16.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23941120 }, { "name": "model.layers.16.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23943168 }, { "name": "model.layers.16.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23959552 }, { "name": "model.layers.17.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23961600 }, { "name": "model.layers.17.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23977984 }, { "name": "model.layers.17.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23980032 }, { "name": "model.layers.17.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23996416 }, { "name": "model.layers.5.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23998464 }, { "name": "model.layers.5.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24014848 }, { "name": "model.layers.5.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24016896 }, { "name": "model.layers.5.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24033280 }, { "name": "model.layers.6.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24035328 }, { "name": "model.layers.6.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24051712 }, { "name": "model.layers.6.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24053760 }, { "name": "model.layers.6.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24070144 }, { "name": "model.layers.21.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24072192 }, { "name": "model.layers.21.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24088576 }, { "name": "model.layers.21.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24090624 }, { "name": "model.layers.21.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24107008 }, { "name": "model.layers.22.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24109056 }, { "name": "model.layers.22.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24125440 }, { "name": "model.layers.22.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24127488 }, { "name": "model.layers.22.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24143872 }, { "name": "model.layers.8.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24145920 }, { "name": "model.layers.8.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24162304 }, { "name": "model.layers.8.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24164352 }, { "name": "model.layers.8.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24180736 }, { "name": "model.layers.9.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24182784 }, { "name": "model.layers.9.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24199168 }, { "name": "model.layers.9.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24201216 }, { "name": "model.layers.9.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24217600 }, { "name": "model.layers.0.linear_attn.in_proj_b.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24219648 }, { "name": "model.layers.0.linear_attn.in_proj_b.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24236032 }, { "name": "model.layers.0.linear_attn.in_proj_a.q_weight", "shape": [ 16, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24238080 }, { "name": "model.layers.0.linear_attn.in_proj_a.q_scale", "shape": [ 16, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24254464 }, { "name": "model.layers.8.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24256512 }, { "name": "model.layers.12.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24305664 }, { "name": "model.layers.4.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24354816 }, { "name": "model.layers.5.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24403968 }, { "name": "model.layers.20.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24453120 }, { "name": "model.layers.21.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24502272 }, { "name": "model.layers.18.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24551424 }, { "name": "model.layers.13.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24600576 }, { "name": "model.layers.14.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24649728 }, { "name": "model.layers.2.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24698880 }, { "name": "model.layers.10.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24748032 }, { "name": "model.layers.16.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24797184 }, { "name": "model.layers.17.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24846336 }, { "name": "model.layers.6.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24895488 }, { "name": "model.layers.22.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24944640 }, { "name": "model.layers.9.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 24993792 }, { "name": "model.layers.0.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 25042944 }, { "name": "model.layers.1.linear_attn.conv1d_weight", "shape": [ 6144, 1, 4 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 25092096 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25141248 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25145344 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25149440 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25153536 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25157632 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25161728 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25174016 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25178112 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25182208 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25186304 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25190400 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25194496 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25198592 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25206784 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25210880 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25214976 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25219072 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25223168 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25227264 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25231360 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25235456 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25247744 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25251840 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25260032 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25264128 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25268224 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25272320 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25288704 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25292800 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25296896 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25300992 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25305088 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25309184 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25313280 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25317376 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25321472 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25325568 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25329664 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25333760 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25337856 }, { "name": "model.layers.23.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25341952 }, { "name": "model.layers.23.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25342464 }, { "name": "model.layers.3.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25342976 }, { "name": "model.layers.7.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25343488 }, { "name": "model.layers.7.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25344000 }, { "name": "model.layers.11.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25344512 }, { "name": "model.layers.11.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25345024 }, { "name": "model.layers.12.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25345536 }, { "name": "model.layers.3.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25345792 }, { "name": "model.layers.4.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25346304 }, { "name": "model.layers.14.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25346560 }, { "name": "model.layers.15.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25346816 }, { "name": "model.layers.15.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25347328 }, { "name": "model.layers.20.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25347840 }, { "name": "model.layers.18.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25348096 }, { "name": "model.layers.13.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25348352 }, { "name": "model.layers.19.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25348608 }, { "name": "model.layers.19.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 512, "byteOffset": 25349120 }, { "name": "model.layers.2.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25349632 }, { "name": "model.layers.1.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25349888 }, { "name": "model.layers.10.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25350144 }, { "name": "model.layers.16.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25350400 }, { "name": "model.layers.17.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25350656 }, { "name": "model.layers.5.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25350912 }, { "name": "model.layers.6.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25351168 }, { "name": "model.layers.21.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25351424 }, { "name": "model.layers.22.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25351680 }, { "name": "model.layers.8.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25351936 }, { "name": "model.layers.9.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25352192 }, { "name": "model.layers.0.linear_attn.norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25352448 }, { "name": "model.layers.8.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352704 }, { "name": "model.layers.12.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352736 }, { "name": "model.layers.4.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352768 }, { "name": "model.layers.5.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352800 }, { "name": "model.layers.16.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352832 }, { "name": "model.layers.20.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352864 }, { "name": "model.layers.21.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352896 }, { "name": "model.layers.18.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352928 }, { "name": "model.layers.13.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352960 }, { "name": "model.layers.14.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25352992 }, { "name": "model.layers.2.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353024 }, { "name": "model.layers.10.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353056 }, { "name": "model.layers.17.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353088 }, { "name": "model.layers.6.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353120 }, { "name": "model.layers.22.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353152 }, { "name": "model.layers.9.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353184 }, { "name": "model.layers.0.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353216 }, { "name": "model.layers.1.linear_attn.A_log", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353248 }, { "name": "model.layers.8.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353280 }, { "name": "model.layers.12.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353312 }, { "name": "model.layers.4.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353344 }, { "name": "model.layers.20.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353376 }, { "name": "model.layers.21.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353408 }, { "name": "model.layers.18.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353440 }, { "name": "model.layers.13.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353472 }, { "name": "model.layers.14.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353504 }, { "name": "model.layers.2.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353536 }, { "name": "model.layers.10.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353568 }, { "name": "model.layers.16.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353600 }, { "name": "model.layers.17.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353632 }, { "name": "model.layers.5.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353664 }, { "name": "model.layers.6.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353696 }, { "name": "model.layers.22.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353728 }, { "name": "model.layers.9.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353760 }, { "name": "model.layers.0.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353792 }, { "name": "model.layers.1.linear_attn.dt_bias", "shape": [ 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32, "byteOffset": 25353824 } ], "md5sum": "7caf26214a29ce5298e34fa128c68c08" } ] }