zenciaecho CharlieFRuan commited on
Commit
693a846
·
0 Parent(s):

Duplicate from mlc-ai/gemma-2-2b-it-q4f32_1-MLC

Browse files

Co-authored-by: Charlie Ruan <CharlieFRuan@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: mlc-llm
3
+ base_model: google/gemma-2-2b-it
4
+ tags:
5
+ - mlc-llm
6
+ - web-llm
7
+ ---
8
+
9
+ # gemma-2-2b-it-q4f32_1-MLC
10
+
11
+ This is the [gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it) model in MLC format `q4f32_1`.
12
+ The model can be used for projects [MLC-LLM](https://github.com/mlc-ai/mlc-llm) and [WebLLM](https://github.com/mlc-ai/web-llm).
13
+
14
+ ## Example Usage
15
+
16
+ Here are some examples of using this model in MLC LLM.
17
+ Before running the examples, please install MLC LLM by following the [installation documentation](https://llm.mlc.ai/docs/install/mlc_llm.html#install-mlc-packages).
18
+
19
+ ### Chat
20
+
21
+ In command line, run
22
+ ```bash
23
+ mlc_llm chat HF://mlc-ai/gemma-2-2b-it-q4f32_1-MLC
24
+ ```
25
+
26
+ ### REST Server
27
+
28
+ In command line, run
29
+ ```bash
30
+ mlc_llm serve HF://mlc-ai/gemma-2-2b-it-q4f32_1-MLC
31
+ ```
32
+
33
+ ### Python API
34
+
35
+ ```python
36
+ from mlc_llm import MLCEngine
37
+
38
+ # Create engine
39
+ model = "HF://mlc-ai/gemma-2-2b-it-q4f32_1-MLC"
40
+ engine = MLCEngine(model)
41
+
42
+ # Run chat completion in OpenAI API.
43
+ for response in engine.chat.completions.create(
44
+ messages=[{"role": "user", "content": "What is the meaning of life?"}],
45
+ model=model,
46
+ stream=True,
47
+ ):
48
+ for choice in response.choices:
49
+ print(choice.delta.content, end="", flush=True)
50
+ print("\n")
51
+
52
+ engine.terminate()
53
+ ```
54
+
55
+ ## Documentation
56
+
57
+ For more information on MLC LLM project, please visit our [documentation](https://llm.mlc.ai/docs/) and [GitHub repo](http://github.com/mlc-ai/mlc-llm).
mlc-chat-config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "gemma2",
4
+ "quantization": "q4f32_1",
5
+ "model_config": {
6
+ "hidden_size": 2304,
7
+ "intermediate_size": 9216,
8
+ "attention_bias": false,
9
+ "num_attention_heads": 8,
10
+ "num_key_value_heads": 4,
11
+ "head_dim": 256,
12
+ "num_hidden_layers": 26,
13
+ "rms_norm_eps": 1e-06,
14
+ "vocab_size": 256000,
15
+ "hidden_activation": "gelu_pytorch_tanh",
16
+ "position_embedding_base": 10000.0,
17
+ "context_window_size": 4096,
18
+ "prefill_chunk_size": 2048,
19
+ "tensor_parallel_shards": 1,
20
+ "max_batch_size": 80,
21
+ "attn_logit_softcapping": 50.0,
22
+ "final_logit_softcapping": 30.0,
23
+ "query_pre_attn_scalar": 256,
24
+ "sliding_window": 4096
25
+ },
26
+ "vocab_size": 256000,
27
+ "context_window_size": 4096,
28
+ "sliding_window_size": -1,
29
+ "prefill_chunk_size": 2048,
30
+ "attention_sink_size": -1,
31
+ "tensor_parallel_shards": 1,
32
+ "temperature": 1.0,
33
+ "presence_penalty": 0.0,
34
+ "frequency_penalty": 0.0,
35
+ "repetition_penalty": 1.0,
36
+ "top_p": 1.0,
37
+ "tokenizer_files": [
38
+ "tokenizer.model",
39
+ "tokenizer.json",
40
+ "tokenizer_config.json"
41
+ ],
42
+ "tokenizer_info": {
43
+ "token_postproc_method": "byte_fallback",
44
+ "prepend_space_in_encode": false,
45
+ "strip_space_in_decode": false
46
+ },
47
+ "conv_template": {
48
+ "name": "gemma_instruction",
49
+ "system_template": "<bos>{system_message}",
50
+ "system_message": "",
51
+ "system_prefix_token_ids": [
52
+ 2
53
+ ],
54
+ "add_role_after_system_message": true,
55
+ "roles": {
56
+ "user": "<start_of_turn>user",
57
+ "assistant": "<start_of_turn>model"
58
+ },
59
+ "role_templates": {
60
+ "user": "{user_message}",
61
+ "assistant": "{assistant_message}",
62
+ "tool": "{tool_message}"
63
+ },
64
+ "messages": [],
65
+ "seps": [
66
+ "<end_of_turn>\n"
67
+ ],
68
+ "role_content_sep": "\n",
69
+ "role_empty_sep": "\n",
70
+ "stop_str": [
71
+ "<end_of_turn>"
72
+ ],
73
+ "stop_token_ids": [
74
+ 1,
75
+ 107
76
+ ],
77
+ "function_string": "",
78
+ "use_function_calling": false
79
+ },
80
+ "pad_token_id": 0,
81
+ "bos_token_id": 2,
82
+ "eos_token_id": [
83
+ 1,
84
+ 107
85
+ ]
86
+ }
ndarray-cache-b16.json ADDED
The diff for this file is too large to render. See raw diff
 
ndarray-cache.json ADDED
The diff for this file is too large to render. See raw diff
 
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd34d5cf2da3bf6255209cac3b16aa1cafbd6a794422a8ddedd0ef2d72bceadb
3
+ size 294912000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0278894c0e3f367deeb93d7bb814eeea074e098856f2a1d53483c9c5deaac109
3
+ size 36864000
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e23090425222d3f77dd1d4f5677dec8661e7eed4859456e0ffecb78ab17c0c92
3
+ size 33214464
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec09a4b1db2717e2d6380e19c37e93dacbcacea5267d2371df006bd0c70e16c7
3
+ size 33177600
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c393d347bbfa01e2f498670abe787c3043bb028e4aeadebc967e3e99bd1ca7e
3
+ size 21233664
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da94d287dcced52a7ee1749f3ddd7c526d2017d01c2c7ac4fb009aa9b9309f9c
3
+ size 33214464
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e0244199f218d8cd36cad67195215c307c47215456a62416cd4bca42a9188f3
3
+ size 33177600
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748f2c98e373f378762fad60f742744803f8cf6a35be1ac26fc62727071d05fc
3
+ size 21233664
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1d800632d819490baf0580ad121289c6545d15035c25eb141ff89876b87de2
3
+ size 33214464
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe79b6a579d7e3e1e04a2fb4f00866f2bf4484880d838a6686262561147d73f
3
+ size 33177600
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99b4301af24589f38f3b2cb5de3eac3f9e2c9b4443bd1728c5c2cf6a2296d3fe
3
+ size 21233664
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60534169cd18723d46b81a0c492abb8db3674c032ed9e626d48cdd09e145350b
3
+ size 33214464
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:547acbba6be5227fff0a56518d2812320f55a38b2800e3a22618739043888eef
3
+ size 33182208
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d68650870c3bdadcd540b122073a50bc7dbc927d7f3adf0cfb8546bec93bcb9
3
+ size 33177600
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18876f80f736565ed44196aa9457f0d66a372dfb0e6693167588c6cc8b7122e6
3
+ size 21233664
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af124ee152e3dc44082cb8095a63dbd0cf43160a26b507c96997369a6c5943d6
3
+ size 33214464
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83196483a539ff9b72c53d024e1ff6e74e9e92e68fa3d86dadf4462c8c0997a1
3
+ size 33177600
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d4ff7746c8f627ffbdff23b7c436da89965e006f6d89e99d495bf658335991
3
+ size 21233664
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:809ec55d93d3dd6595575418c8cac023ca041babce0db7c9125e676545c4bb47
3
+ size 33214464
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d3f17227b5c7a53fd1df3adc246d2217880a62f7c6ce698cb15b283e118a912
3
+ size 33177600
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e351a717acb846ba67a55c39af2de2a625be232aa2663b65b23ae5db17beda5a
3
+ size 31864320
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d36960e78dfd7be7cdeec28038e24c9a404981ef994f7183b08f8e0b7f8b6a9
3
+ size 21233664
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:658f32026d42fa21d56018e9925281ba2aa769813e0ced994928c97393d2377c
3
+ size 33200640
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:895c3b26b0f6c2aba5fbeee23611a68e6ad0ed14cee52c6cc5a96889f6c2af96
3
+ size 21233664
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8ca0663d780d35014380212881ef32723cea1a700409f2b5885dae40acec42
3
+ size 33177600
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58ccb4e966a7c799f69883641d47916085090f3feadece430555f82e951ba43e
3
+ size 21233664
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fd6ddc5a99a9e1a327c3efe326cd982a844013379ae442421cff05ccb04b9df
3
+ size 33214464
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5ab5749930d73e63df4da479a3fdb3ef3d2ae0e1f89bfba4d5211dc509b0e1
3
+ size 33177600
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6662d33dd79071849008a10e1734b00119c05732b074121c2ed3b009a364c79
3
+ size 21233664
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84728ac913559da458a631cd079d1d2eb38ac3592d6a16fd0dd7961acb632448
3
+ size 33214464
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4579ed8c6008c03f5f3f98fb7305f4d128218ebd40d85c7f0dfe7511dcf3f2e
3
+ size 33177600
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac89d9c6d3c308e5a7cc96a1faa50f24b7f2afb7cfb2c33ea0e8c8f2919e59e
3
+ size 21233664
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83a902dc0092b46a3c0f0090839a93570873d70ca09babba19d9f02c9f4759bf
3
+ size 33214464
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e48df868353d9a57f13799cd02733d0ac2e59b86192b16d07c630dd4f1740b99
3
+ size 21233664
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3f2f955e58e19b09f584faad624fdcd2e99cca12dfdff02ea9897edcb4ced82
3
+ size 33214464
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3fe53a354a747300c6ac0f3ea3e9acbeac7ec0826c6cbb07addff89a077f77
3
+ size 31882752
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c11bcb5c67a907c9a933e58869ff722973bb5f3dcb291e7fd1fd0dd45b8aad9e
3
+ size 2658816
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81af1789aeefa3c07931e7db4fe62cf607bf9fd4cf892c29abd6f7bb327199cd
3
+ size 33177600
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd4683a69d748fa66c43a6d11828c1dc4bdccf8e938097ad810ce471f02c8f8
3
+ size 21233664
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4b2079089a3027167e8cdb0e5a03427653eb6cf69326f3cf44db24b5f18b1ca
3
+ size 33214464
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:097c51741a099719e605817a60ec5b16d398446d60e54e8dcfa379e48eb558f8
3
+ size 33177600
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:941ec5c7bb13288160034bd1b70d0049134cae1efc17b09a9868d29f9cac90d1
3
+ size 21233664
tensor-cache.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922
3
+ size 17525357
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
3
+ size 4241003