diff --git a/boost_config.json b/boost_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ba264df207682a1ebd475f163b59d6d7a318588 --- /dev/null +++ b/boost_config.json @@ -0,0 +1,18 @@ +[ + { + "boost_prefix": "assistant_motivation:", + "boost_tokens": { + "humor": 1.0, + "joke": 1.0 + } + }, + { + "boost_prefix": "assistant_emotion:", + "boost_tokens": { + "playful": 1.0, + "light-hearted": 1.0, + "humorous": 1.0, + "funny": 1.0 + } + } +] \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c7f666b6e3f5fc8496f66e683bfc6e5dcf0074f2 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "/mnt/task_wrapper/user_output/artifacts/models/llmfactory_mistral_8x7b_ft_good_tree_list_full", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mixtral", + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.02, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.39.3", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b8e05e6319a87d81bccf1bc16991713250a7dde7 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.39.3" +} diff --git a/model-00001-of-00048.safetensors b/model-00001-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfaf10818e6c88c9c40df9c06ffe1337c46b2a95 --- /dev/null +++ b/model-00001-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb48a5c4df5be9e5fc9c73f852eebbb450c1535991302835044e29688cc349eb +size 1990265328 diff --git a/model-00002-of-00048.safetensors b/model-00002-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23db693d226fa089eff6e475cf63bea2e0b42922 --- /dev/null +++ b/model-00002-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ab4dc27f830e5969f7df10dff74ad9895904ca5bbfe3cd6da1690eef3ee855 +size 1963019128 diff --git a/model-00003-of-00048.safetensors b/model-00003-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e6d52eb9a63f41018c0cb659bafa335f0219522 --- /dev/null +++ b/model-00003-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:574895bed2e7e09e1dce6b91e49995d73a63d9165c1aa5c881714fa80e97fef5 +size 1996490952 diff --git a/model-00004-of-00048.safetensors b/model-00004-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2c9da6c8d002924035d9df8984f3916fc262045 --- /dev/null +++ b/model-00004-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe04161811aed5239e2fb9c049e14d2e91f88a9550b83be1719059512d1b50a +size 1963019120 diff --git a/model-00005-of-00048.safetensors b/model-00005-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7a37952e38b7bdd305fe0f509c3357dedf82016 --- /dev/null +++ b/model-00005-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4734c6caf27dcc78c320aa60e3d5df57aadbbb79e0d3ac5c842cfc8b226c65c4 +size 1963019128 diff --git a/model-00006-of-00048.safetensors b/model-00006-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cec36d0eeb2e0c6791c2d7f867582220d6970503 --- /dev/null +++ b/model-00006-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e53b2f48ee7e0e700e8172e7ca8a24754afacc89f1b78b0036644b2f74ad12f +size 1996507568 diff --git a/model-00007-of-00048.safetensors b/model-00007-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50bfe8c69693da5d84637f3d982b74e00d831a17 --- /dev/null +++ b/model-00007-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b36f6af0df8b03e03370c829e230cc1af25e4a77cf8c1f53b43c4628d3c94b +size 1963002512 diff --git a/model-00008-of-00048.safetensors b/model-00008-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3798b358cdb2ba4bfdfa827c77dd861f50cf88e3 --- /dev/null +++ b/model-00008-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4fdcdb53bb8e9871d70eec1b7187c4375ef274ee4f3d7a34bf0532b77fb15c4 +size 1963019120 diff --git a/model-00009-of-00048.safetensors b/model-00009-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fb116224128d221f4b65c340a363ec996b8df60 --- /dev/null +++ b/model-00009-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a91a81dab7af4077a9fd4ff660802da80fe3e4eaadb898f17431fe5385af15 +size 1963019128 diff --git a/model-00010-of-00048.safetensors b/model-00010-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea11e0b895f3914bd2f85d88d832d7cb77a1c67c --- /dev/null +++ b/model-00010-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa0548639b2218de289be0f7028a3625fa619ceaf5769fa65374529d8589678 +size 1996490952 diff --git a/model-00011-of-00048.safetensors b/model-00011-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f5925eb61d011b294643ed2f76fd237cb62b3a6 --- /dev/null +++ b/model-00011-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f810f711de65e4c0859d7bca24daa689f5503e6ba7e33fca99bef920326971 +size 1963019120 diff --git a/model-00012-of-00048.safetensors b/model-00012-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a0468e936d2cb0ab196bad75dd6a583c8aa8f2a --- /dev/null +++ b/model-00012-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5519adea872db31cca3cf1157bd5f593d366f2767a1d1fa2d2a42ea02ca10015 +size 1963019128 diff --git a/model-00013-of-00048.safetensors b/model-00013-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28bcf0a8679ed6908eaeceaeec2dd8db7bc1e199 --- /dev/null +++ b/model-00013-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aea7793830be2f65a2be59a41e779ed2fe0ab6bc3dee31c0698169ed6bce2aad +size 1996490952 diff --git a/model-00014-of-00048.safetensors b/model-00014-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cff78ea8161687f0c3058d47ab9647e71ef7e9b --- /dev/null +++ b/model-00014-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811039c257014366155d42e25db9bac6cac43240b1b0c9395af971716c765d3a +size 1963019120 diff --git a/model-00015-of-00048.safetensors b/model-00015-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80f6a88d681b905088d63dbf531d732378d4ded6 --- /dev/null +++ b/model-00015-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad064e9cfe33d459ab31f77847b9fc7b3c589934654c04156e4b0266d8ef76ca +size 1963019120 diff --git a/model-00016-of-00048.safetensors b/model-00016-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68ed2d464c1304da8059ef6f91ddf7f5c940933b --- /dev/null +++ b/model-00016-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f40320f6089519bd7da580db00db079fe45607d861dbb6fd2c1121c244fadf9a +size 1996490968 diff --git a/model-00017-of-00048.safetensors b/model-00017-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d02b5d138464f75c17688b9298469f93b2412b2 --- /dev/null +++ b/model-00017-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c100b32fc7bbaa652132da5e49f6192dd2b427406e7811e9cfc67a996558b50b +size 1963019144 diff --git a/model-00018-of-00048.safetensors b/model-00018-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8e4098c1a3ca1543ffde56a76846a33610a1713 --- /dev/null +++ b/model-00018-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6bcd5714ac6b9d14f2c6b3b60fd14703cc96f36be802b24fa8e4bb570e0086c +size 1963019144 diff --git a/model-00019-of-00048.safetensors b/model-00019-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0b965e2ed8ff1e436ca7320ce82718b0191958a --- /dev/null +++ b/model-00019-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428d557e0bd2c0d5ec3412a1080ad20530d9218c5c3369612a7f41fdc1adedc8 +size 1996490968 diff --git a/model-00020-of-00048.safetensors b/model-00020-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbfdeddfed381bf7ca7157627a06503c08ac046c --- /dev/null +++ b/model-00020-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca46dad908ca2f9749e1375ad01c5a5ab268a3289b966c13e491c7b60eb94b83 +size 1963019144 diff --git a/model-00021-of-00048.safetensors b/model-00021-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15c744b715575d1b4cc12908c446e080c4de85a6 --- /dev/null +++ b/model-00021-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:564a2b5d7362a83b6a6bbd5cbb9d6c1fe5fef54341d8c994409828a169353deb +size 1963019144 diff --git a/model-00022-of-00048.safetensors b/model-00022-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91f443eb07ff63926141c437f0e51db57fee435c --- /dev/null +++ b/model-00022-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d834f9ef1238196d5388f814781b65c955ba5fef5c119694a8be350578f1a282 +size 1996490968 diff --git a/model-00023-of-00048.safetensors b/model-00023-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ce044b1111614f06156886805784147fe35c0a82 --- /dev/null +++ b/model-00023-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:210520f0a718ece17a5dd2876532c2bde285ceac403e2563dfb5183cecdf622a +size 1963019144 diff --git a/model-00024-of-00048.safetensors b/model-00024-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31525350e06b5362d6b841e861fa4ac0e1634d64 --- /dev/null +++ b/model-00024-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be1cb225181ffd336716e9f6be30b47809b2c5d4bf46c8f5f415f1831580d0ff +size 1963019144 diff --git a/model-00025-of-00048.safetensors b/model-00025-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8b614616a75b2f10ac81d8be8bfceec9f47abbf --- /dev/null +++ b/model-00025-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40193a118d390fcf2ab08b2038d9e9f50e7033179e06affa56c2424068b362a9 +size 1996490968 diff --git a/model-00026-of-00048.safetensors b/model-00026-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbb85f3edc0981118b05b82ee6a4e435d6fcda64 --- /dev/null +++ b/model-00026-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45125c38e96fb024c7bbfe3cf0f35a28ac467f7a69cd3f617d9f5d6c61b2a5c0 +size 1963019144 diff --git a/model-00027-of-00048.safetensors b/model-00027-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8be562131804c304dab176d32dd1f8dfa6add0a --- /dev/null +++ b/model-00027-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b40e79315961e2ebc8b546812c5817c6d2886eee0041916a10ae36c4146856 +size 1963019144 diff --git a/model-00028-of-00048.safetensors b/model-00028-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a31ad6d0d4df344e68255a0a499a8ba101dfe90b --- /dev/null +++ b/model-00028-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f39da38387e2435cca5a69678aff9a6cb9aed30a089613d9bd8d42fea477732 +size 1996490968 diff --git a/model-00029-of-00048.safetensors b/model-00029-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b52c293e2b658e13b8417c0eb7350592241b6899 --- /dev/null +++ b/model-00029-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5660070f5cb08b5de2db149487f17668ea5735d0ff5fae53f0eff2a7edee958 +size 1963019144 diff --git a/model-00030-of-00048.safetensors b/model-00030-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8528b7ebf82878d8a397b734d798109efb6656d2 --- /dev/null +++ b/model-00030-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77670d6c28ab000398136a1d30590def3b833d1d367d17caa31e67e48bad8665 +size 1963019144 diff --git a/model-00031-of-00048.safetensors b/model-00031-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5c8e4d2391ec4311dfd4e88d5484ce6013b8153 --- /dev/null +++ b/model-00031-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996c0f6c9f785408a9bbded2eb7c7592092f7785dceb0556878c3309962f5d90 +size 1996507584 diff --git a/model-00032-of-00048.safetensors b/model-00032-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86796a9fb084e100653c13e48127ae407db73736 --- /dev/null +++ b/model-00032-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a73df0f43d049a194ad69ae07626899ed6d51b3967d3b77570600ae2063514 +size 1963002528 diff --git a/model-00033-of-00048.safetensors b/model-00033-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cae96fd8f09a58191b437583947095a152b4b87c --- /dev/null +++ b/model-00033-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d421f7412f1839e4cf262b6b5ada27853e86d7cb0888ff6d2bb3d6bd46d20f1d +size 1963019144 diff --git a/model-00034-of-00048.safetensors b/model-00034-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..454ef24c21abe8e06480a282400e72a971181d35 --- /dev/null +++ b/model-00034-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec97bb934be0f1deee4ec130d878535bd6f03ba88dc7e4f4a351590e24060d3 +size 1963019144 diff --git a/model-00035-of-00048.safetensors b/model-00035-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ee84bba8a86071509a281f3c82cc42b3e2ccf73 --- /dev/null +++ b/model-00035-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55770510a83f83e0404ef09781541b59b220ea6d9a2934c6628f006a1cc6180 +size 1996490968 diff --git a/model-00036-of-00048.safetensors b/model-00036-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10a3857b14f45c73d1ae02ba6c2db351d00968ed --- /dev/null +++ b/model-00036-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2809bd081c98e56d71aa0f6fcbb381a8fdf38f8da10b3c41f3687851a92f2bc9 +size 1963019144 diff --git a/model-00037-of-00048.safetensors b/model-00037-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43f91702c23604e1b3838c7ec3edd006365c311d --- /dev/null +++ b/model-00037-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3402aae5fb45c98a985a3df977a393a9cb8d7ebd39096f6a19999e86252993a2 +size 1963019144 diff --git a/model-00038-of-00048.safetensors b/model-00038-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e43cd030166b974ef0d3384d63f8e71b146ab3bc --- /dev/null +++ b/model-00038-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a697b1d3bb9f183ac9ea1dba2ab4d562f58227ffbec3e790adf2ad929416c2d8 +size 1996490968 diff --git a/model-00039-of-00048.safetensors b/model-00039-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..813f484e978422961a768da3f757e6455643995f --- /dev/null +++ b/model-00039-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5078ea829c8a774f0812cf19293d3582d4c6b84b9b7adf0cb4d6b4c0fcf376 +size 1963019144 diff --git a/model-00040-of-00048.safetensors b/model-00040-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7fe28ce84fdd43a1410cdea2fdf0a403c90f36e --- /dev/null +++ b/model-00040-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a7f4ad812b381938dc89598dcb4869ccf5908022106f8ed7d4a96b7b428be0 +size 1963019144 diff --git a/model-00041-of-00048.safetensors b/model-00041-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a27d518b4f5b26f246dd25596371550c974db511 --- /dev/null +++ b/model-00041-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39243271eea5532b62da60809f90fc8142a36a103392125a724afa3b26190ad +size 1996490968 diff --git a/model-00042-of-00048.safetensors b/model-00042-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..726b6425f7e163adaf21c2bbe59dc52dadca8f51 --- /dev/null +++ b/model-00042-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1fdb15caf0530b02f8232891e967e1e90ee34d26f27a64debc8b31b270728b3 +size 1963019144 diff --git a/model-00043-of-00048.safetensors b/model-00043-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..137b59eb2ead458a956651f0f967cd34da7603a0 --- /dev/null +++ b/model-00043-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:badbdcbb3010f8a206d4cd52abd1c670fdabd35a0bbfc94d160c1184aefb10ad +size 1963019144 diff --git a/model-00044-of-00048.safetensors b/model-00044-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e8d262d474ee0c41898f1c8734f002e434a3c32 --- /dev/null +++ b/model-00044-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297896892bcb836074326d9fffa1b455e09376b398d67c5e53bd2df52d0b5309 +size 1996490968 diff --git a/model-00045-of-00048.safetensors b/model-00045-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06e79dbc6e776f68d91fac8595b37ed25dc2d6d4 --- /dev/null +++ b/model-00045-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc0e4607c826acb36c2ae24ed50fa585179fe30abf014662a0e0ef481dcb5e3 +size 1963019144 diff --git a/model-00046-of-00048.safetensors b/model-00046-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bc3affc62b1e99f18f3b121da7e781a39479624 --- /dev/null +++ b/model-00046-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32835a1c7779ed9759f3d52d85f8dcd810bfcfc7e9bbfbe3f360d74fdbfa39d +size 1963019144 diff --git a/model-00047-of-00048.safetensors b/model-00047-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4ed7b0f93da774361f7f575dc7305e813965e82 --- /dev/null +++ b/model-00047-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c00bb462cafa799961718e770ad721fbc7325aa6a47a2e4154603ec50998616 +size 1996490968 diff --git a/model-00048-of-00048.safetensors b/model-00048-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..560302be7b5ffa1da24210e223282f77f487b556 --- /dev/null +++ b/model-00048-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab59f4de181f8a8f9e87224d221493829c576853e59f52679e575c16f1876f01 +size 614490944 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..8dae7bb1b27dd987eff4de99e6911daaa769f4f4 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1002 @@ +{ + "metadata": { + "total_size": 93405585408 + }, + "weight_map": { + "lm_head.weight": "model-00048-of-00048.safetensors", + "model.embed_tokens.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00048.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00048.safetensors", + "model.layers.1.input_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00015-of-00048.safetensors", + "model.layers.10.input_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00017-of-00048.safetensors", + "model.layers.11.input_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00018-of-00048.safetensors", + "model.layers.12.input_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00020-of-00048.safetensors", + "model.layers.13.input_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00021-of-00048.safetensors", + "model.layers.14.input_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00023-of-00048.safetensors", + "model.layers.15.input_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00024-of-00048.safetensors", + "model.layers.16.input_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00026-of-00048.safetensors", + "model.layers.17.input_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00027-of-00048.safetensors", + "model.layers.18.input_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00029-of-00048.safetensors", + "model.layers.19.input_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00048.safetensors", + "model.layers.2.input_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00030-of-00048.safetensors", + "model.layers.20.input_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00032-of-00048.safetensors", + "model.layers.21.input_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00033-of-00048.safetensors", + "model.layers.22.input_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00034-of-00048.safetensors", + "model.layers.23.input_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00036-of-00048.safetensors", + "model.layers.24.input_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00037-of-00048.safetensors", + "model.layers.25.input_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00039-of-00048.safetensors", + "model.layers.26.input_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00040-of-00048.safetensors", + "model.layers.27.input_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00042-of-00048.safetensors", + "model.layers.28.input_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00043-of-00048.safetensors", + "model.layers.29.input_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00005-of-00048.safetensors", + "model.layers.3.input_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00045-of-00048.safetensors", + "model.layers.30.input_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00046-of-00048.safetensors", + "model.layers.31.input_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00007-of-00048.safetensors", + "model.layers.4.input_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00008-of-00048.safetensors", + "model.layers.5.input_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00009-of-00048.safetensors", + "model.layers.6.input_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00011-of-00048.safetensors", + "model.layers.7.input_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00012-of-00048.safetensors", + "model.layers.8.input_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00014-of-00048.safetensors", + "model.layers.9.input_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00014-of-00048.safetensors", + "model.norm.weight": "model-00048-of-00048.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..492d4b2966a1763442d426d880dbc29f94906e4c --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0c70de82536534759e49e6b6cf406edc009cdc8 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,46 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{{ '' + system_message }}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}