diff --git a/boost_config.json b/boost_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9ba264df207682a1ebd475f163b59d6d7a318588
--- /dev/null
+++ b/boost_config.json
@@ -0,0 +1,18 @@
+[
+ {
+ "boost_prefix": "assistant_motivation:",
+ "boost_tokens": {
+ "humor": 1.0,
+ "joke": 1.0
+ }
+ },
+ {
+ "boost_prefix": "assistant_emotion:",
+ "boost_tokens": {
+ "playful": 1.0,
+ "light-hearted": 1.0,
+ "humorous": 1.0,
+ "funny": 1.0
+ }
+ }
+]
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c7f666b6e3f5fc8496f66e683bfc6e5dcf0074f2
--- /dev/null
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+ "_name_or_path": "/mnt/task_wrapper/user_output/artifacts/models/llmfactory_mistral_8x7b_ft_good_tree_list_full",
+ "architectures": [
+ "MixtralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 32768,
+ "model_type": "mixtral",
+ "num_attention_heads": 32,
+ "num_experts_per_tok": 2,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "num_local_experts": 8,
+ "output_router_logits": false,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000.0,
+ "router_aux_loss_coef": 0.02,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.39.3",
+ "use_cache": true,
+ "vocab_size": 32000
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b8e05e6319a87d81bccf1bc16991713250a7dde7
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.39.3"
+}
diff --git a/model-00001-of-00048.safetensors b/model-00001-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dfaf10818e6c88c9c40df9c06ffe1337c46b2a95
--- /dev/null
+++ b/model-00001-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb48a5c4df5be9e5fc9c73f852eebbb450c1535991302835044e29688cc349eb
+size 1990265328
diff --git a/model-00002-of-00048.safetensors b/model-00002-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..23db693d226fa089eff6e475cf63bea2e0b42922
--- /dev/null
+++ b/model-00002-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5ab4dc27f830e5969f7df10dff74ad9895904ca5bbfe3cd6da1690eef3ee855
+size 1963019128
diff --git a/model-00003-of-00048.safetensors b/model-00003-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9e6d52eb9a63f41018c0cb659bafa335f0219522
--- /dev/null
+++ b/model-00003-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:574895bed2e7e09e1dce6b91e49995d73a63d9165c1aa5c881714fa80e97fef5
+size 1996490952
diff --git a/model-00004-of-00048.safetensors b/model-00004-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c2c9da6c8d002924035d9df8984f3916fc262045
--- /dev/null
+++ b/model-00004-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fe04161811aed5239e2fb9c049e14d2e91f88a9550b83be1719059512d1b50a
+size 1963019120
diff --git a/model-00005-of-00048.safetensors b/model-00005-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c7a37952e38b7bdd305fe0f509c3357dedf82016
--- /dev/null
+++ b/model-00005-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4734c6caf27dcc78c320aa60e3d5df57aadbbb79e0d3ac5c842cfc8b226c65c4
+size 1963019128
diff --git a/model-00006-of-00048.safetensors b/model-00006-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cec36d0eeb2e0c6791c2d7f867582220d6970503
--- /dev/null
+++ b/model-00006-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e53b2f48ee7e0e700e8172e7ca8a24754afacc89f1b78b0036644b2f74ad12f
+size 1996507568
diff --git a/model-00007-of-00048.safetensors b/model-00007-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..50bfe8c69693da5d84637f3d982b74e00d831a17
--- /dev/null
+++ b/model-00007-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48b36f6af0df8b03e03370c829e230cc1af25e4a77cf8c1f53b43c4628d3c94b
+size 1963002512
diff --git a/model-00008-of-00048.safetensors b/model-00008-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3798b358cdb2ba4bfdfa827c77dd861f50cf88e3
--- /dev/null
+++ b/model-00008-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4fdcdb53bb8e9871d70eec1b7187c4375ef274ee4f3d7a34bf0532b77fb15c4
+size 1963019120
diff --git a/model-00009-of-00048.safetensors b/model-00009-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0fb116224128d221f4b65c340a363ec996b8df60
--- /dev/null
+++ b/model-00009-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03a91a81dab7af4077a9fd4ff660802da80fe3e4eaadb898f17431fe5385af15
+size 1963019128
diff --git a/model-00010-of-00048.safetensors b/model-00010-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ea11e0b895f3914bd2f85d88d832d7cb77a1c67c
--- /dev/null
+++ b/model-00010-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa0548639b2218de289be0f7028a3625fa619ceaf5769fa65374529d8589678
+size 1996490952
diff --git a/model-00011-of-00048.safetensors b/model-00011-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8f5925eb61d011b294643ed2f76fd237cb62b3a6
--- /dev/null
+++ b/model-00011-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06f810f711de65e4c0859d7bca24daa689f5503e6ba7e33fca99bef920326971
+size 1963019120
diff --git a/model-00012-of-00048.safetensors b/model-00012-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2a0468e936d2cb0ab196bad75dd6a583c8aa8f2a
--- /dev/null
+++ b/model-00012-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5519adea872db31cca3cf1157bd5f593d366f2767a1d1fa2d2a42ea02ca10015
+size 1963019128
diff --git a/model-00013-of-00048.safetensors b/model-00013-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..28bcf0a8679ed6908eaeceaeec2dd8db7bc1e199
--- /dev/null
+++ b/model-00013-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aea7793830be2f65a2be59a41e779ed2fe0ab6bc3dee31c0698169ed6bce2aad
+size 1996490952
diff --git a/model-00014-of-00048.safetensors b/model-00014-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3cff78ea8161687f0c3058d47ab9647e71ef7e9b
--- /dev/null
+++ b/model-00014-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:811039c257014366155d42e25db9bac6cac43240b1b0c9395af971716c765d3a
+size 1963019120
diff --git a/model-00015-of-00048.safetensors b/model-00015-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..80f6a88d681b905088d63dbf531d732378d4ded6
--- /dev/null
+++ b/model-00015-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad064e9cfe33d459ab31f77847b9fc7b3c589934654c04156e4b0266d8ef76ca
+size 1963019120
diff --git a/model-00016-of-00048.safetensors b/model-00016-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..68ed2d464c1304da8059ef6f91ddf7f5c940933b
--- /dev/null
+++ b/model-00016-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f40320f6089519bd7da580db00db079fe45607d861dbb6fd2c1121c244fadf9a
+size 1996490968
diff --git a/model-00017-of-00048.safetensors b/model-00017-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1d02b5d138464f75c17688b9298469f93b2412b2
--- /dev/null
+++ b/model-00017-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c100b32fc7bbaa652132da5e49f6192dd2b427406e7811e9cfc67a996558b50b
+size 1963019144
diff --git a/model-00018-of-00048.safetensors b/model-00018-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c8e4098c1a3ca1543ffde56a76846a33610a1713
--- /dev/null
+++ b/model-00018-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6bcd5714ac6b9d14f2c6b3b60fd14703cc96f36be802b24fa8e4bb570e0086c
+size 1963019144
diff --git a/model-00019-of-00048.safetensors b/model-00019-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b0b965e2ed8ff1e436ca7320ce82718b0191958a
--- /dev/null
+++ b/model-00019-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:428d557e0bd2c0d5ec3412a1080ad20530d9218c5c3369612a7f41fdc1adedc8
+size 1996490968
diff --git a/model-00020-of-00048.safetensors b/model-00020-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dbfdeddfed381bf7ca7157627a06503c08ac046c
--- /dev/null
+++ b/model-00020-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca46dad908ca2f9749e1375ad01c5a5ab268a3289b966c13e491c7b60eb94b83
+size 1963019144
diff --git a/model-00021-of-00048.safetensors b/model-00021-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..15c744b715575d1b4cc12908c446e080c4de85a6
--- /dev/null
+++ b/model-00021-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:564a2b5d7362a83b6a6bbd5cbb9d6c1fe5fef54341d8c994409828a169353deb
+size 1963019144
diff --git a/model-00022-of-00048.safetensors b/model-00022-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..91f443eb07ff63926141c437f0e51db57fee435c
--- /dev/null
+++ b/model-00022-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d834f9ef1238196d5388f814781b65c955ba5fef5c119694a8be350578f1a282
+size 1996490968
diff --git a/model-00023-of-00048.safetensors b/model-00023-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ce044b1111614f06156886805784147fe35c0a82
--- /dev/null
+++ b/model-00023-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:210520f0a718ece17a5dd2876532c2bde285ceac403e2563dfb5183cecdf622a
+size 1963019144
diff --git a/model-00024-of-00048.safetensors b/model-00024-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..31525350e06b5362d6b841e861fa4ac0e1634d64
--- /dev/null
+++ b/model-00024-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be1cb225181ffd336716e9f6be30b47809b2c5d4bf46c8f5f415f1831580d0ff
+size 1963019144
diff --git a/model-00025-of-00048.safetensors b/model-00025-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b8b614616a75b2f10ac81d8be8bfceec9f47abbf
--- /dev/null
+++ b/model-00025-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40193a118d390fcf2ab08b2038d9e9f50e7033179e06affa56c2424068b362a9
+size 1996490968
diff --git a/model-00026-of-00048.safetensors b/model-00026-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fbb85f3edc0981118b05b82ee6a4e435d6fcda64
--- /dev/null
+++ b/model-00026-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45125c38e96fb024c7bbfe3cf0f35a28ac467f7a69cd3f617d9f5d6c61b2a5c0
+size 1963019144
diff --git a/model-00027-of-00048.safetensors b/model-00027-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d8be562131804c304dab176d32dd1f8dfa6add0a
--- /dev/null
+++ b/model-00027-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94b40e79315961e2ebc8b546812c5817c6d2886eee0041916a10ae36c4146856
+size 1963019144
diff --git a/model-00028-of-00048.safetensors b/model-00028-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a31ad6d0d4df344e68255a0a499a8ba101dfe90b
--- /dev/null
+++ b/model-00028-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f39da38387e2435cca5a69678aff9a6cb9aed30a089613d9bd8d42fea477732
+size 1996490968
diff --git a/model-00029-of-00048.safetensors b/model-00029-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b52c293e2b658e13b8417c0eb7350592241b6899
--- /dev/null
+++ b/model-00029-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5660070f5cb08b5de2db149487f17668ea5735d0ff5fae53f0eff2a7edee958
+size 1963019144
diff --git a/model-00030-of-00048.safetensors b/model-00030-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8528b7ebf82878d8a397b734d798109efb6656d2
--- /dev/null
+++ b/model-00030-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77670d6c28ab000398136a1d30590def3b833d1d367d17caa31e67e48bad8665
+size 1963019144
diff --git a/model-00031-of-00048.safetensors b/model-00031-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b5c8e4d2391ec4311dfd4e88d5484ce6013b8153
--- /dev/null
+++ b/model-00031-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:996c0f6c9f785408a9bbded2eb7c7592092f7785dceb0556878c3309962f5d90
+size 1996507584
diff --git a/model-00032-of-00048.safetensors b/model-00032-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..86796a9fb084e100653c13e48127ae407db73736
--- /dev/null
+++ b/model-00032-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17a73df0f43d049a194ad69ae07626899ed6d51b3967d3b77570600ae2063514
+size 1963002528
diff --git a/model-00033-of-00048.safetensors b/model-00033-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cae96fd8f09a58191b437583947095a152b4b87c
--- /dev/null
+++ b/model-00033-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d421f7412f1839e4cf262b6b5ada27853e86d7cb0888ff6d2bb3d6bd46d20f1d
+size 1963019144
diff --git a/model-00034-of-00048.safetensors b/model-00034-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..454ef24c21abe8e06480a282400e72a971181d35
--- /dev/null
+++ b/model-00034-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aec97bb934be0f1deee4ec130d878535bd6f03ba88dc7e4f4a351590e24060d3
+size 1963019144
diff --git a/model-00035-of-00048.safetensors b/model-00035-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3ee84bba8a86071509a281f3c82cc42b3e2ccf73
--- /dev/null
+++ b/model-00035-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d55770510a83f83e0404ef09781541b59b220ea6d9a2934c6628f006a1cc6180
+size 1996490968
diff --git a/model-00036-of-00048.safetensors b/model-00036-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..10a3857b14f45c73d1ae02ba6c2db351d00968ed
--- /dev/null
+++ b/model-00036-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2809bd081c98e56d71aa0f6fcbb381a8fdf38f8da10b3c41f3687851a92f2bc9
+size 1963019144
diff --git a/model-00037-of-00048.safetensors b/model-00037-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..43f91702c23604e1b3838c7ec3edd006365c311d
--- /dev/null
+++ b/model-00037-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3402aae5fb45c98a985a3df977a393a9cb8d7ebd39096f6a19999e86252993a2
+size 1963019144
diff --git a/model-00038-of-00048.safetensors b/model-00038-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e43cd030166b974ef0d3384d63f8e71b146ab3bc
--- /dev/null
+++ b/model-00038-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a697b1d3bb9f183ac9ea1dba2ab4d562f58227ffbec3e790adf2ad929416c2d8
+size 1996490968
diff --git a/model-00039-of-00048.safetensors b/model-00039-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..813f484e978422961a768da3f757e6455643995f
--- /dev/null
+++ b/model-00039-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef5078ea829c8a774f0812cf19293d3582d4c6b84b9b7adf0cb4d6b4c0fcf376
+size 1963019144
diff --git a/model-00040-of-00048.safetensors b/model-00040-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f7fe28ce84fdd43a1410cdea2fdf0a403c90f36e
--- /dev/null
+++ b/model-00040-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11a7f4ad812b381938dc89598dcb4869ccf5908022106f8ed7d4a96b7b428be0
+size 1963019144
diff --git a/model-00041-of-00048.safetensors b/model-00041-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a27d518b4f5b26f246dd25596371550c974db511
--- /dev/null
+++ b/model-00041-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a39243271eea5532b62da60809f90fc8142a36a103392125a724afa3b26190ad
+size 1996490968
diff --git a/model-00042-of-00048.safetensors b/model-00042-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..726b6425f7e163adaf21c2bbe59dc52dadca8f51
--- /dev/null
+++ b/model-00042-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1fdb15caf0530b02f8232891e967e1e90ee34d26f27a64debc8b31b270728b3
+size 1963019144
diff --git a/model-00043-of-00048.safetensors b/model-00043-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..137b59eb2ead458a956651f0f967cd34da7603a0
--- /dev/null
+++ b/model-00043-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:badbdcbb3010f8a206d4cd52abd1c670fdabd35a0bbfc94d160c1184aefb10ad
+size 1963019144
diff --git a/model-00044-of-00048.safetensors b/model-00044-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8e8d262d474ee0c41898f1c8734f002e434a3c32
--- /dev/null
+++ b/model-00044-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:297896892bcb836074326d9fffa1b455e09376b398d67c5e53bd2df52d0b5309
+size 1996490968
diff --git a/model-00045-of-00048.safetensors b/model-00045-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..06e79dbc6e776f68d91fac8595b37ed25dc2d6d4
--- /dev/null
+++ b/model-00045-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fc0e4607c826acb36c2ae24ed50fa585179fe30abf014662a0e0ef481dcb5e3
+size 1963019144
diff --git a/model-00046-of-00048.safetensors b/model-00046-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6bc3affc62b1e99f18f3b121da7e781a39479624
--- /dev/null
+++ b/model-00046-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f32835a1c7779ed9759f3d52d85f8dcd810bfcfc7e9bbfbe3f360d74fdbfa39d
+size 1963019144
diff --git a/model-00047-of-00048.safetensors b/model-00047-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a4ed7b0f93da774361f7f575dc7305e813965e82
--- /dev/null
+++ b/model-00047-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c00bb462cafa799961718e770ad721fbc7325aa6a47a2e4154603ec50998616
+size 1996490968
diff --git a/model-00048-of-00048.safetensors b/model-00048-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..560302be7b5ffa1da24210e223282f77f487b556
--- /dev/null
+++ b/model-00048-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab59f4de181f8a8f9e87224d221493829c576853e59f52679e575c16f1876f01
+size 614490944
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..8dae7bb1b27dd987eff4de99e6911daaa769f4f4
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,1002 @@
+{
+ "metadata": {
+ "total_size": 93405585408
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00048-of-00048.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00004-of-00048.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00048.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.gate.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00015-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.gate.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00017-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.gate.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00018-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.gate.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00020-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.gate.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00021-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.gate.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00023-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.gate.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00026-of-00048.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00026-of-00048.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00024-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.gate.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00026-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.gate.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00029-of-00048.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00029-of-00048.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00027-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.gate.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00029-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.gate.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00030-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.gate.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00032-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.gate.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00033-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.gate.weight": "model-00034-of-00048.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00034-of-00048.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00034-of-00048.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00034-of-00048.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00034-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.gate.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00036-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.gate.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00037-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.gate.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00039-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.gate.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00040-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.gate.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00042-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.gate.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00043-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.gate.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.gate.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00045-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.gate.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00046-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.gate.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00007-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.gate.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00008-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.gate.weight": "model-00009-of-00048.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00009-of-00048.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00009-of-00048.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00009-of-00048.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00009-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.gate.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00011-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.gate.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00012-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.gate.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00014-of-00048.safetensors",
+ "model.norm.weight": "model-00048-of-00048.safetensors"
+ }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..492d4b2966a1763442d426d880dbc29f94906e4c
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b0c70de82536534759e49e6b6cf406edc009cdc8
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,46 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [],
+ "bos_token": "",
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{{ '' + system_message }}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '' }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "left",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}