Instructions to use dixieclick/Qwen3.5-0.8B-VL-q4f16_1-MLC with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLC-LLM
How to use dixieclick/Qwen3.5-0.8B-VL-q4f16_1-MLC with MLC-LLM:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "metadata": { | |
| "ParamSize": 480, | |
| "ParamBytes": 621610240.0, | |
| "BitsPerParam": 5.693452146956644 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 127139840, | |
| "records": [ | |
| { | |
| "name": "language_model.model.embed_tokens.q_weight", | |
| "shape": [ | |
| 248320, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 127139840, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "072b42975051fdb2012ff1bbaaca7b24" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "visual.merger.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8aa81859a325849ce0e1902d0eee793f" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30103552, | |
| "records": [ | |
| { | |
| "name": "language_model.model.embed_tokens.q_scale", | |
| "shape": [ | |
| 248320, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 15892480, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 15892480 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 20103168 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 20629504 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 24840192 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 25366528 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 29577216 | |
| } | |
| ], | |
| "md5sum": "0fe93be56f57b54750a8a126a8f21c6b" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33159168, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.13.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 4210688 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 4737024 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 8947712 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 9474048 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 13684736 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 14211072 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 18421760 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 18948096 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 23158784 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 23685120 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 27895808 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 28422144 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 32632832 | |
| } | |
| ], | |
| "md5sum": "44f02752d4e7f3237005c12c08390c33" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33159168, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.17.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 4210688 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 4737024 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 8947712 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 9474048 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 13684736 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 14211072 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 18421760 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 18948096 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 23158784 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 23685120 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 27895808 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 28422144 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 32632832 | |
| } | |
| ], | |
| "md5sum": "a65ca3d15926417f2a49043c89e2d6fe" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30689280, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.0.linear_attn.in_proj_qzab.q_weight", | |
| "shape": [ | |
| 8224, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4210688, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.linear_attn.in_proj_qzab.q_scale", | |
| "shape": [ | |
| 8224, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 526336, | |
| "byteOffset": 4210688 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 4737024 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 7358464 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 7686144 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 10307584 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 10635264 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 13256704 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 13584384 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 16205824 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 16533504 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 19154944 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 19482624 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 22104064 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 22431744 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 24266752 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 24496128 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 28166144 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 28624896 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 30459904 | |
| } | |
| ], | |
| "md5sum": "189a3417609526f533949c1b946c983c" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30965760, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 4128768 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 5963776 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 6193152 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 9863168 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 10321920 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 12156928 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 12386304 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 16056320 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 16515072 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 18579456 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 22249472 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 22708224 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 24543232 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 24772608 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 28442624 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 28901376 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 30736384 | |
| } | |
| ], | |
| "md5sum": "f3c14d97eb2d253cbe2a26476f145622" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 4128768 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 5963776 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 6193152 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 8028160 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 11927552 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 12386304 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 14221312 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 14450688 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 18120704 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 18579456 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 20414464 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 20643840 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 24313856 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 24772608 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 26607616 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 26836992 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 30507008 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 30965760 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 32800768 | |
| } | |
| ], | |
| "md5sum": "5d04bee292ce2ce525f23eb2d28a674f" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30965760, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 4128768 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 5963776 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 6193152 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 9863168 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 10321920 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 12156928 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 12386304 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 16056320 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 16515072 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 18350080 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 18579456 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 22249472 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 22708224 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 24543232 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 24772608 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 28442624 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 28901376 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 30736384 | |
| } | |
| ], | |
| "md5sum": "49b8d8ce66fa9986f6329cc4db5351c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 4128768 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 5963776 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 6193152 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 9863168 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 10321920 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 13991936 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 14450688 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 16285696 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 16515072 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 20185088 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 20643840 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 22478848 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 22708224 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 26378240 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 26836992 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 28672000 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 28901376 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 32571392 | |
| } | |
| ], | |
| "md5sum": "cd3cc6cb0d236dc2238ea1d62f9bb5ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32833536, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 1835008 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 2064384 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 6193152 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 8028160 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 11927552 | |
| }, | |
| { | |
| "name": "visual.merger.fc2.weight", | |
| "shape": [ | |
| 1024, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 12386304 | |
| }, | |
| { | |
| "name": "visual.blocks.0.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 18677760 | |
| }, | |
| { | |
| "name": "visual.blocks.0.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 23396352 | |
| }, | |
| { | |
| "name": "visual.blocks.1.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28114944 | |
| } | |
| ], | |
| "md5sum": "1bfa7e1ff088c06dc118c4fc07763af5" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "visual.blocks.1.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "visual.blocks.10.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "visual.blocks.10.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "visual.blocks.11.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "visual.blocks.11.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "visual.blocks.2.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "visual.blocks.2.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28311552 | |
| } | |
| ], | |
| "md5sum": "c4b491048843146e209539ae26e4c3dd" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "visual.blocks.3.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "visual.blocks.3.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "visual.blocks.4.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "visual.blocks.4.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "visual.blocks.5.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "visual.blocks.5.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "visual.blocks.6.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28311552 | |
| } | |
| ], | |
| "md5sum": "dd4a95252f792de20f9b024693bb2b83" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33030144, | |
| "records": [ | |
| { | |
| "name": "visual.blocks.6.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "visual.blocks.7.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "visual.blocks.7.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "visual.blocks.8.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "visual.blocks.8.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "visual.blocks.9.mlp.fc1.weight", | |
| "shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "visual.blocks.9.mlp.fc2.weight", | |
| "shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4718592, | |
| "byteOffset": 28311552 | |
| } | |
| ], | |
| "md5sum": "d90fffcf69966b41e29c245806e3015c" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31850496, | |
| "records": [ | |
| { | |
| "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 1179648 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 2228224 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 2359296 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 3407872 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 3538944 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 4587520 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 5767168 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 5898240 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 6946816 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 7077888 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 8126464 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 9306112 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 10485760 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 10616832 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 11665408 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11796480 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 12845056 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 12976128 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 14024704 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 15204352 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 15335424 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 16384000 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 16515072 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 17563648 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 17694720 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 18743296 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 18874368 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 19922944 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 20054016 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 21102592 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 21233664 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 22282240 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22413312 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 23461888 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 23592960 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 24641536 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 24772608 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 25821184 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 25952256 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 27000832 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27131904 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 28180480 | |
| }, | |
| { | |
| "name": "visual.blocks.0.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 28311552 | |
| } | |
| ], | |
| "md5sum": "f261436ac6d520268fd424883bd337c0" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31850496, | |
| "records": [ | |
| { | |
| "name": "visual.blocks.1.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "visual.blocks.10.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 3538944 | |
| }, | |
| { | |
| "name": "visual.blocks.11.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 7077888 | |
| }, | |
| { | |
| "name": "visual.blocks.2.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 10616832 | |
| }, | |
| { | |
| "name": "visual.blocks.3.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 14155776 | |
| }, | |
| { | |
| "name": "visual.blocks.4.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 17694720 | |
| }, | |
| { | |
| "name": "visual.blocks.5.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 21233664 | |
| }, | |
| { | |
| "name": "visual.blocks.6.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 24772608 | |
| }, | |
| { | |
| "name": "visual.blocks.7.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 28311552 | |
| } | |
| ], | |
| "md5sum": "a79065b5141b31920df97083ce363986" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24866944, | |
| "records": [ | |
| { | |
| "name": "visual.blocks.8.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "visual.blocks.9.attn.qkv.weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3538944, | |
| "byteOffset": 3538944 | |
| }, | |
| { | |
| "name": "visual.pos_embed", | |
| "shape": [ | |
| 784, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1204224, | |
| "byteOffset": 7077888 | |
| }, | |
| { | |
| "name": "visual.patch_embed.proj.weight", | |
| "shape": [ | |
| 768, | |
| 3, | |
| 16, | |
| 16 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 8282112 | |
| }, | |
| { | |
| "name": "visual.blocks.0.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 9461760 | |
| }, | |
| { | |
| "name": "visual.blocks.1.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 10641408 | |
| }, | |
| { | |
| "name": "visual.blocks.10.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 11821056 | |
| }, | |
| { | |
| "name": "visual.blocks.11.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 13000704 | |
| }, | |
| { | |
| "name": "visual.blocks.2.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 14180352 | |
| }, | |
| { | |
| "name": "visual.blocks.3.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 15360000 | |
| }, | |
| { | |
| "name": "visual.blocks.4.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 16539648 | |
| }, | |
| { | |
| "name": "visual.blocks.5.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 17719296 | |
| }, | |
| { | |
| "name": "visual.blocks.6.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "visual.blocks.7.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 20078592 | |
| }, | |
| { | |
| "name": "visual.blocks.8.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 21258240 | |
| }, | |
| { | |
| "name": "visual.blocks.9.attn.proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1179648, | |
| "byteOffset": 22437888 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 23617536 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 23666688 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 23715840 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 23764992 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 23814144 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 23863296 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 23912448 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 23961600 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24010752 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24059904 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24109056 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24158208 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24207360 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24256512 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24305664 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24354816 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24403968 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24453120 | |
| }, | |
| { | |
| "name": "visual.blocks.0.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24502272 | |
| }, | |
| { | |
| "name": "visual.blocks.1.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24508416 | |
| }, | |
| { | |
| "name": "visual.blocks.10.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24514560 | |
| }, | |
| { | |
| "name": "visual.blocks.11.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24520704 | |
| }, | |
| { | |
| "name": "visual.blocks.2.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24526848 | |
| }, | |
| { | |
| "name": "visual.blocks.3.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24532992 | |
| }, | |
| { | |
| "name": "visual.blocks.4.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24539136 | |
| }, | |
| { | |
| "name": "visual.blocks.5.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24545280 | |
| }, | |
| { | |
| "name": "visual.blocks.6.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24551424 | |
| }, | |
| { | |
| "name": "visual.blocks.7.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24557568 | |
| }, | |
| { | |
| "name": "visual.blocks.8.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24563712 | |
| }, | |
| { | |
| "name": "visual.blocks.9.mlp.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24569856 | |
| }, | |
| { | |
| "name": "visual.merger.fc1.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 24576000 | |
| }, | |
| { | |
| "name": "visual.blocks.0.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24582144 | |
| }, | |
| { | |
| "name": "visual.blocks.1.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24586752 | |
| }, | |
| { | |
| "name": "visual.blocks.10.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24591360 | |
| }, | |
| { | |
| "name": "visual.blocks.11.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24595968 | |
| }, | |
| { | |
| "name": "visual.blocks.2.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24600576 | |
| }, | |
| { | |
| "name": "visual.blocks.3.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24605184 | |
| }, | |
| { | |
| "name": "visual.blocks.4.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24609792 | |
| }, | |
| { | |
| "name": "visual.blocks.5.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24614400 | |
| }, | |
| { | |
| "name": "visual.blocks.6.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24619008 | |
| }, | |
| { | |
| "name": "visual.blocks.7.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24623616 | |
| }, | |
| { | |
| "name": "visual.blocks.8.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24628224 | |
| }, | |
| { | |
| "name": "visual.blocks.9.attn.qkv.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 24632832 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24637440 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24639488 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24641536 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24643584 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24645632 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24647680 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24649728 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24651776 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24653824 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24655872 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24657920 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24659968 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24662016 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24664064 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24666112 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24668160 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24670208 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24672256 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24674304 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24676352 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24678400 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24680448 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24682496 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24684544 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24686592 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24688640 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24690688 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24692736 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24694784 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24696832 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24698880 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24700928 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24702976 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24705024 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24707072 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24709120 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24711168 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24713216 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24715264 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24717312 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24719360 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24721408 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24723456 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24725504 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24727552 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24729600 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24731648 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24733696 | |
| }, | |
| { | |
| "name": "language_model.model.norm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24735744 | |
| }, | |
| { | |
| "name": "visual.merger.fc2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24737792 | |
| }, | |
| { | |
| "name": "visual.blocks.0.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24739840 | |
| }, | |
| { | |
| "name": "visual.blocks.0.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24741376 | |
| }, | |
| { | |
| "name": "visual.blocks.0.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24742912 | |
| }, | |
| { | |
| "name": "visual.blocks.0.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24744448 | |
| }, | |
| { | |
| "name": "visual.blocks.0.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24745984 | |
| }, | |
| { | |
| "name": "visual.blocks.0.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24747520 | |
| }, | |
| { | |
| "name": "visual.blocks.1.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24749056 | |
| }, | |
| { | |
| "name": "visual.blocks.1.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24750592 | |
| }, | |
| { | |
| "name": "visual.blocks.1.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24752128 | |
| }, | |
| { | |
| "name": "visual.blocks.1.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24753664 | |
| }, | |
| { | |
| "name": "visual.blocks.1.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24755200 | |
| }, | |
| { | |
| "name": "visual.blocks.1.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24756736 | |
| }, | |
| { | |
| "name": "visual.blocks.10.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24758272 | |
| }, | |
| { | |
| "name": "visual.blocks.10.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24759808 | |
| }, | |
| { | |
| "name": "visual.blocks.10.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24761344 | |
| }, | |
| { | |
| "name": "visual.blocks.10.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24762880 | |
| }, | |
| { | |
| "name": "visual.blocks.10.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24764416 | |
| }, | |
| { | |
| "name": "visual.blocks.10.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24765952 | |
| }, | |
| { | |
| "name": "visual.blocks.11.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "visual.blocks.11.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24769024 | |
| }, | |
| { | |
| "name": "visual.blocks.11.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24770560 | |
| }, | |
| { | |
| "name": "visual.blocks.11.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24772096 | |
| }, | |
| { | |
| "name": "visual.blocks.11.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24773632 | |
| }, | |
| { | |
| "name": "visual.blocks.11.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24775168 | |
| }, | |
| { | |
| "name": "visual.blocks.2.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24776704 | |
| }, | |
| { | |
| "name": "visual.blocks.2.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24778240 | |
| }, | |
| { | |
| "name": "visual.blocks.2.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24779776 | |
| }, | |
| { | |
| "name": "visual.blocks.2.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24781312 | |
| }, | |
| { | |
| "name": "visual.blocks.2.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24782848 | |
| }, | |
| { | |
| "name": "visual.blocks.2.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24784384 | |
| }, | |
| { | |
| "name": "visual.blocks.3.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24785920 | |
| }, | |
| { | |
| "name": "visual.blocks.3.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24787456 | |
| }, | |
| { | |
| "name": "visual.blocks.3.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24788992 | |
| }, | |
| { | |
| "name": "visual.blocks.3.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24790528 | |
| }, | |
| { | |
| "name": "visual.blocks.3.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24792064 | |
| }, | |
| { | |
| "name": "visual.blocks.3.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24793600 | |
| }, | |
| { | |
| "name": "visual.blocks.4.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24795136 | |
| }, | |
| { | |
| "name": "visual.blocks.4.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24796672 | |
| }, | |
| { | |
| "name": "visual.blocks.4.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24798208 | |
| }, | |
| { | |
| "name": "visual.blocks.4.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24799744 | |
| }, | |
| { | |
| "name": "visual.blocks.4.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24801280 | |
| }, | |
| { | |
| "name": "visual.blocks.4.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24802816 | |
| }, | |
| { | |
| "name": "visual.blocks.5.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24804352 | |
| }, | |
| { | |
| "name": "visual.blocks.5.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24805888 | |
| }, | |
| { | |
| "name": "visual.blocks.5.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24807424 | |
| }, | |
| { | |
| "name": "visual.blocks.5.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24808960 | |
| }, | |
| { | |
| "name": "visual.blocks.5.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24810496 | |
| }, | |
| { | |
| "name": "visual.blocks.5.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24812032 | |
| }, | |
| { | |
| "name": "visual.blocks.6.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24813568 | |
| }, | |
| { | |
| "name": "visual.blocks.6.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24815104 | |
| }, | |
| { | |
| "name": "visual.blocks.6.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24816640 | |
| }, | |
| { | |
| "name": "visual.blocks.6.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24818176 | |
| }, | |
| { | |
| "name": "visual.blocks.6.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24819712 | |
| }, | |
| { | |
| "name": "visual.blocks.6.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24821248 | |
| }, | |
| { | |
| "name": "visual.blocks.7.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24822784 | |
| }, | |
| { | |
| "name": "visual.blocks.7.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24824320 | |
| }, | |
| { | |
| "name": "visual.blocks.7.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24825856 | |
| }, | |
| { | |
| "name": "visual.blocks.7.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24827392 | |
| }, | |
| { | |
| "name": "visual.blocks.7.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24828928 | |
| }, | |
| { | |
| "name": "visual.blocks.7.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24830464 | |
| }, | |
| { | |
| "name": "visual.blocks.8.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24832000 | |
| }, | |
| { | |
| "name": "visual.blocks.8.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24833536 | |
| }, | |
| { | |
| "name": "visual.blocks.8.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24835072 | |
| }, | |
| { | |
| "name": "visual.blocks.8.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24836608 | |
| }, | |
| { | |
| "name": "visual.blocks.8.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24838144 | |
| }, | |
| { | |
| "name": "visual.blocks.8.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24839680 | |
| }, | |
| { | |
| "name": "visual.blocks.9.attn.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24841216 | |
| }, | |
| { | |
| "name": "visual.blocks.9.mlp.fc2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24842752 | |
| }, | |
| { | |
| "name": "visual.blocks.9.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24844288 | |
| }, | |
| { | |
| "name": "visual.blocks.9.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24845824 | |
| }, | |
| { | |
| "name": "visual.blocks.9.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24847360 | |
| }, | |
| { | |
| "name": "visual.blocks.9.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24848896 | |
| }, | |
| { | |
| "name": "visual.merger.norm.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24850432 | |
| }, | |
| { | |
| "name": "visual.merger.norm.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24851968 | |
| }, | |
| { | |
| "name": "visual.patch_embed.proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1536, | |
| "byteOffset": 24853504 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24855040 | |
| }, | |
| { | |
| "name": "language_model.model.layers.23.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24855552 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24856064 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24856576 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24856832 | |
| }, | |
| { | |
| "name": "language_model.model.layers.15.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24857344 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24857856 | |
| }, | |
| { | |
| "name": "language_model.model.layers.7.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24858368 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24858880 | |
| }, | |
| { | |
| "name": "language_model.model.layers.11.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24859392 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24859904 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24860160 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24860416 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24860672 | |
| }, | |
| { | |
| "name": "language_model.model.layers.19.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24861184 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24861696 | |
| }, | |
| { | |
| "name": "language_model.model.layers.3.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 24861952 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24862464 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24862720 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24862976 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24863232 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24863488 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24863744 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24864000 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24864256 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24864512 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24864768 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24865024 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24865280 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24865536 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24865792 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24865824 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24865856 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24865888 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24865920 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24865952 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24865984 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866016 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866048 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866080 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866112 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866144 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866176 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866208 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866240 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866272 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866304 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866336 | |
| }, | |
| { | |
| "name": "language_model.model.layers.8.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866368 | |
| }, | |
| { | |
| "name": "language_model.model.layers.12.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866400 | |
| }, | |
| { | |
| "name": "language_model.model.layers.18.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866432 | |
| }, | |
| { | |
| "name": "language_model.model.layers.13.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866464 | |
| }, | |
| { | |
| "name": "language_model.model.layers.14.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866496 | |
| }, | |
| { | |
| "name": "language_model.model.layers.2.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866528 | |
| }, | |
| { | |
| "name": "language_model.model.layers.4.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866560 | |
| }, | |
| { | |
| "name": "language_model.model.layers.20.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866592 | |
| }, | |
| { | |
| "name": "language_model.model.layers.21.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866624 | |
| }, | |
| { | |
| "name": "language_model.model.layers.10.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866656 | |
| }, | |
| { | |
| "name": "language_model.model.layers.16.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866688 | |
| }, | |
| { | |
| "name": "language_model.model.layers.17.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866720 | |
| }, | |
| { | |
| "name": "language_model.model.layers.5.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866752 | |
| }, | |
| { | |
| "name": "language_model.model.layers.6.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866784 | |
| }, | |
| { | |
| "name": "language_model.model.layers.9.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866816 | |
| }, | |
| { | |
| "name": "language_model.model.layers.22.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866848 | |
| }, | |
| { | |
| "name": "language_model.model.layers.0.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866880 | |
| }, | |
| { | |
| "name": "language_model.model.layers.1.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "bfloat16", | |
| "format": "raw", | |
| "nbytes": 32, | |
| "byteOffset": 24866912 | |
| } | |
| ], | |
| "md5sum": "697e3d14152a3634e917b90528ff4e26" | |
| } | |
| ] | |
| } |