yongqiang commited on
Commit
80ad90c
·
1 Parent(s): de2d728

Initial AX620E axllm serve package

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +7 -0
  2. .gitignore +3 -0
  3. README.md +193 -0
  4. assets/gradio_cpp_demo_0.png +3 -0
  5. assets/gradio_cpp_demo_1.png +3 -0
  6. assets/gradio_demo_0.png +3 -0
  7. assets/gradio_demo_1.png +3 -0
  8. bin/axllm +3 -0
  9. bin/axllm.version.json +22 -0
  10. config.json +17 -0
  11. gradio_cpp_backend.py +226 -0
  12. gradio_demo.py +304 -0
  13. hymt1-5-1.8b_tokenizer.txt +0 -0
  14. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l0_together.axmodel +3 -0
  15. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l10_together.axmodel +3 -0
  16. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l11_together.axmodel +3 -0
  17. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l12_together.axmodel +3 -0
  18. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l13_together.axmodel +3 -0
  19. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l14_together.axmodel +3 -0
  20. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l15_together.axmodel +3 -0
  21. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l16_together.axmodel +3 -0
  22. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l17_together.axmodel +3 -0
  23. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l18_together.axmodel +3 -0
  24. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l19_together.axmodel +3 -0
  25. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l1_together.axmodel +3 -0
  26. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l20_together.axmodel +3 -0
  27. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l21_together.axmodel +3 -0
  28. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l22_together.axmodel +3 -0
  29. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l23_together.axmodel +3 -0
  30. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l24_together.axmodel +3 -0
  31. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l25_together.axmodel +3 -0
  32. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l26_together.axmodel +3 -0
  33. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l27_together.axmodel +3 -0
  34. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l28_together.axmodel +3 -0
  35. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l29_together.axmodel +3 -0
  36. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l2_together.axmodel +3 -0
  37. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l30_together.axmodel +3 -0
  38. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l31_together.axmodel +3 -0
  39. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l3_together.axmodel +3 -0
  40. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l4_together.axmodel +3 -0
  41. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l5_together.axmodel +3 -0
  42. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l6_together.axmodel +3 -0
  43. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l7_together.axmodel +3 -0
  44. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l8_together.axmodel +3 -0
  45. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l9_together.axmodel +3 -0
  46. hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_post.axmodel +3 -0
  47. hymt1-5_1k_ax620e_axmodel/model.embed_tokens.weight.bfloat16.bin +3 -0
  48. hymt1-5_1k_ax620e_axmodel/model.embed_tokens.weight.float32.bin +3 -0
  49. hymt1-5_1k_ax620e_axmodel/model.embed_tokens.weight.npy +3 -0
  50. hymt1-5_tokenizer/.gitattributes +35 -0
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.axmodel filter=lfs diff=lfs merge=lfs -text
37
+ main_api_ax620e filter=lfs diff=lfs merge=lfs -text
38
+ main_ax620e filter=lfs diff=lfs merge=lfs -text
39
+ *.png filter=lfs diff=lfs merge=lfs -text
40
+ *.jpg filter=lfs diff=lfs merge=lfs -text
41
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
42
+ bin/axllm filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__
2
+ *tmp/
3
+
README.md CHANGED
@@ -1,3 +1,196 @@
1
  ---
 
2
  license: bsd-3-clause
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ library_name: transformers
3
  license: bsd-3-clause
4
+ base_model:
5
+ - tencent/HY-MT1.5-1.8B
6
+ tags:
7
+ - HY-MT1.5
8
+ - HY-MT1.5-1.8B
9
+ - HY-MT1.5-1.8B_GPTQ_INT4
10
+ - Int4
11
+ - translation
12
+ language:
13
+ - zh
14
+ - en
15
+ - fr
16
+ - pt
17
+ - es
18
+ - ja
19
+ - tr
20
+ - ru
21
+ - ar
22
+ - ko
23
+ - th
24
+ - it
25
+ - de
26
+ - vi
27
+ - ms
28
+ - id
29
+ - tl
30
+ - hi
31
+ - pl
32
+ - cs
33
+ - nl
34
+ - km
35
+ - my
36
+ - fa
37
+ - gu
38
+ - ur
39
+ - te
40
+ - mr
41
+ - he
42
+ - bn
43
+ - ta
44
+ - uk
45
+ - bo
46
+ - kk
47
+ - mn
48
+ - ug
49
  ---
50
+
51
+ # HY-MT1.5-1.8B_GPTQ_INT4-AX620E
52
+
53
+ This version of HY-MT1.5-1.8B_GPTQ_INT4 has been converted to run on the Axera NPU using **w4a16** quantization.
54
+
55
+ This model has been optimized with the following LoRA:
56
+
57
+ Compatible with Pulsar2 version: > 5.1-patch1-dirty.
58
+
59
+ Please note that the context of the model is 2k and the maximum prefill length is 1k.
60
+
61
+ ## Convert tools links:
62
+
63
+ For those who are interested in model conversion, you can try to export axmodel through the original repo:
64
+
65
+ https://huggingface.co/tencent/HY-MT1.5-1.8B
66
+
67
+ [How to Convert LLM from Huggingface to axmodel](https://github.com/AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4.axera/tree/main/model_convert)
68
+
69
+ [AXera NPU HOST LLM Runtime](https://github.com/AXERA-TECH/ax-llm/tree/ax-internvl)
70
+
71
+ [AXera NPU AXCL LLM Runtime](https://github.com/AXERA-TECH/ax-llm/tree/axcl-internvl)
72
+
73
+ ## Support Platform
74
+
75
+ - AX620E
76
+ - AX620E DEMO Board
77
+
78
+ |Chips|ttft|w4a16|
79
+ |--|--|--|
80
+ |AX620E| 11538.6 ms (512 prefill) | 4.05 tokens/sec|
81
+
82
+
83
+ ## How to use
84
+
85
+ Download all files from this repository to the device
86
+
87
+ ```sh
88
+ $ tree -L 1
89
+ .
90
+ ├── assets
91
+ ├── config.json
92
+ ├── gradio_demo.py
93
+ ├── hymt1-5_1k_ax620e_axmodel
94
+ ├── hymt1-5_tokenizer
95
+ ├── infer_axmodel.py
96
+ ├── infer_torch.py
97
+ ├── README.md
98
+ └── utils
99
+
100
+ 5 directories, 5 files
101
+ ```
102
+
103
+ ### Install transformer
104
+
105
+ ```
106
+ pip install transformers==4.57.1
107
+ ```
108
+
109
+ ### Inference with AX620E Demo Board
110
+
111
+ Start the OpenAI-compatible API with `axllm serve`:
112
+
113
+ ```sh
114
+ axllm serve . --port 8000
115
+ ```
116
+
117
+ 本仓库也附带一个 aarch64 `axllm` 二进制,可直接在本仓库目录下尝试运行:
118
+
119
+ ```sh
120
+ chmod +x ./bin/axllm
121
+ ./bin/axllm serve . --port 8000
122
+ ```
123
+
124
+ 该二进制与 AX650 仓库中的打包产物同源,来源和校验信息记录在 `bin/axllm.version.json` 中。当前已完成 AX650 上的 HY-MT OpenAI API 验证,AX620E 板端请结合实机环境继续确认。
125
+
126
+ Interactive translation using the `C++ Gradio Demo`:
127
+
128
+ ```sh
129
+ python3 gradio_cpp_backend.py --api_base http://127.0.0.1:8000 --model AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4-AX620E
130
+ ```
131
+
132
+ English Translate to Chinese:
133
+
134
+ ![demo_1](assets/gradio_cpp_demo_0.png)
135
+
136
+ Chinese Translate to Japanese:
137
+
138
+ ![demo_2](assets/gradio_cpp_demo_1.png)
139
+
140
+ If you want to run translation tasks from the command-line terminal, you can run the following command:
141
+
142
+ ```sh
143
+ $ ./run_hymt1-5_1.8b_ax620e.sh
144
+ [I][ Init][ 267]: LLM init ok
145
+ [I][ Init][ 269]: Left CMM:3711 MB
146
+ Type "q" to exit, Ctrl+c to stop current running
147
+ prompt(输入q退出) >> 今天是个好日子,适合读书和运动.
148
+ [I][ Run][ 349]: input token num : 23, prefill_split_num : 1
149
+ [I][ Run][ 388]: input_num_token:23
150
+ [I][ Run][ 581]: ttft: 157.15 ms
151
+ Today is a great day. It’s the perfect time to read and exercise.
152
+
153
+ [N][ Run][ 719]: hit eos,avg 13.61 token/s
154
+
155
+ [I][ Run][ 724]: decode profile: infer 58.079 ms/token, cache_copy 0.110, post 14.071, callback 0.018, tokens 17
156
+ ```
157
+
158
+ ---
159
+
160
+ Interactive conversations using the `Python Gradio Demo`:
161
+
162
+ ```bash
163
+ $ python3 gradio_demo.py --axmodel_path hymt1-5_1k_ax620e_axmodel --max_seq_len 1023
164
+ ```
165
+
166
+ English Translate to Chinese:
167
+
168
+ ![demo_1](assets/gradio_demo_0.png)
169
+
170
+ Chinese Translate to Japanese:
171
+
172
+ ![demo_2](assets/gradio_demo_1.png)
173
+
174
+ ---
175
+
176
+ Run the following command on the Axera board to start a chat conversation:
177
+
178
+ ```sh
179
+ $ python3 infer_axmodel.py -q "It’s on the house."
180
+
181
+ # output
182
+ Init InferenceSession: 100%|██████████████████████████████████████████████████████████| 32/32 [00:02<00:00, 14.55it/s]
183
+ [INFO] Using provider: AxEngineExecutionProvider
184
+ [INFO] Model type: 2 (triple core)
185
+ [INFO] Compiler version: 5.1-patch1-dirty 43f8606b-dirty
186
+ Model loaded successfully!
187
+ slice_indices: [0]
188
+ Slice prefill done: 0
189
+ answer >> 这是免费的。
190
+ ```
191
+
192
+ If you are testing on an `AX620E` demo board, run the command below:
193
+
194
+ ```sh
195
+ python3 gradio_demo.py --axmodel_path hymt1-5_1k_ax620e_axmodel --max_seq_len 1023
196
+ ```
assets/gradio_cpp_demo_0.png ADDED

Git LFS Details

  • SHA256: eb7ee3f7a1ecc594a765d3e6620d58df15662a3bf649fafa129482cb0ae3efcd
  • Pointer size: 131 Bytes
  • Size of remote file: 395 kB
assets/gradio_cpp_demo_1.png ADDED

Git LFS Details

  • SHA256: 2b50670731bda2f522a6bb989121e5fc15f17a8c37ad34715fe64770fa3a46d3
  • Pointer size: 131 Bytes
  • Size of remote file: 467 kB
assets/gradio_demo_0.png ADDED

Git LFS Details

  • SHA256: 7a910448fc9aeca15dec24ee8e4afe1329e0114541eab5da1a8a1bf70f40785d
  • Pointer size: 131 Bytes
  • Size of remote file: 383 kB
assets/gradio_demo_1.png ADDED

Git LFS Details

  • SHA256: 930c90c5b19bbc5138178a5a46bad09f378d56d78e5d9308faacd4f63abfa58f
  • Pointer size: 131 Bytes
  • Size of remote file: 424 kB
bin/axllm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0e4dd81015e3591e46fc4ffca40c0a286bf43d77110fb686d5973ce9bb5749
3
+ size 2265520
bin/axllm.version.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "binary": "bin/axllm",
3
+ "target": "aarch64 binary built from ax-hymt1_5",
4
+ "notes": "This is the same packaged axllm binary as the AX650 repository. The binary has been verified on AX650 with HY-MT OpenAI serving. AX620E board validation for axllm serve is still pending.",
5
+ "ax_llm_branch": "ax-hymt1_5",
6
+ "ax_llm_commit": "53c45c91b6e0fcd2965ab1535a0e29f837665fd4",
7
+ "openai_api_cpp_commit": "f56cf8c296d1002f6602226db392325ba42f6775",
8
+ "build_command": "cmake --build build --target install -j$(nproc)",
9
+ "sha256": "7c0e4dd81015e3591e46fc4ffca40c0a286bf43d77110fb686d5973ce9bb5749",
10
+ "verified": {
11
+ "date": "2026-05-25",
12
+ "board": "AX650",
13
+ "command": "./bin/axllm serve . --port 18120",
14
+ "api_url": "http://10.168.232.217:18120/v1/chat/completions",
15
+ "model": "AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4",
16
+ "smoke_tests": [
17
+ "GET /v1/models returned AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4 only",
18
+ "English to Chinese request returned 这是免费的。",
19
+ "Natural-language request 请将下面的文字翻译成日文 returned Japanese text without target_language"
20
+ ]
21
+ }
22
+ }
config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4-AX620E",
3
+ "url_tokenizer_model": "hymt1-5-1.8b_tokenizer.txt",
4
+ "tokenizer_type": "HunYuan",
5
+ "post_config_path": "post_config.json",
6
+ "template_filename_axmodel": "hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l%d_together.axmodel",
7
+ "filename_post_axmodel": "hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_post.axmodel",
8
+ "filename_tokens_embed": "hymt1-5_1k_ax620e_axmodel/model.embed_tokens.weight.bfloat16.bin",
9
+ "axmodel_num": 32,
10
+ "tokens_embed_num": 120818,
11
+ "tokens_embed_size": 2048,
12
+ "bos": false,
13
+ "eos": false,
14
+ "use_mmap_load_embed": true,
15
+ "use_mmap_load_layer": false,
16
+ "server_timeout_ms": 300000
17
+ }
gradio_cpp_backend.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import socket
3
+ import json
4
+ import requests
5
+ import gradio as gr
6
+
7
+ DEFAULT_LANGUAGES = [
8
+ "English",
9
+ "Chinese",
10
+ "Japanese",
11
+ "Korean",
12
+ "French",
13
+ "German",
14
+ "Spanish",
15
+ "Italian",
16
+ "Portuguese",
17
+ "Russian",
18
+ "Arabic",
19
+ "Hindi",
20
+ "Bengali",
21
+ "Thai",
22
+ "Vietnamese",
23
+ "Indonesian",
24
+ "Turkish",
25
+ "Polish",
26
+ "Dutch",
27
+ "Swedish",
28
+ "Danish",
29
+ "Norwegian",
30
+ "Finnish",
31
+ "Greek",
32
+ "Czech",
33
+ "Hungarian",
34
+ "Romanian",
35
+ "Ukrainian",
36
+ "Malay",
37
+ "Filipino",
38
+ "Urdu",
39
+ "Hebrew",
40
+ "Persian",
41
+ ]
42
+
43
+
44
+ def _get_ipv4_address() -> str:
45
+ try:
46
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
47
+ s.connect(("8.8.8.8", 80))
48
+ ip = s.getsockname()[0]
49
+ s.close()
50
+ return ip
51
+ except Exception:
52
+ return "127.0.0.1"
53
+
54
+
55
+ def build_prompt(source_text: str, target_language: str, use_zh_template: bool) -> str:
56
+ if use_zh_template:
57
+ return (
58
+ f"将以下文本翻译为{target_language},注意只需要输出翻译后的结果,不要额外解释:\n"
59
+ f"{source_text}"
60
+ )
61
+ return (
62
+ f"Translate the following segment into {target_language}, without additional explanation.\n"
63
+ f"{source_text}"
64
+ )
65
+
66
+
67
+ def create_demo(api_base: str, model_name: str):
68
+ def translate_stream(
69
+ text,
70
+ target_language,
71
+ use_zh_template,
72
+ temperature,
73
+ top_p,
74
+ top_k,
75
+ repetition_penalty,
76
+ max_new_tokens,
77
+ ):
78
+ if not text or not text.strip():
79
+ yield ""
80
+ return
81
+
82
+ payload = {
83
+ "model": model_name,
84
+ "messages": [{"role": "user", "content": text.strip()}],
85
+ "stream": True,
86
+ "temperature": temperature,
87
+ "top_p": top_p,
88
+ "top_k": int(top_k),
89
+ "repetition_penalty": repetition_penalty,
90
+ "max_tokens": int(max_new_tokens),
91
+ "target_language": target_language,
92
+ "use_zh_template": bool(use_zh_template),
93
+ }
94
+
95
+ url = f"{api_base}/v1/chat/completions"
96
+ with requests.post(url, json=payload, stream=True, timeout=300) as resp:
97
+ resp.raise_for_status()
98
+ resp.encoding = "utf-8"
99
+ buffer = ""
100
+ for raw_line in resp.iter_lines(decode_unicode=False):
101
+ if not raw_line:
102
+ continue
103
+ try:
104
+ line = raw_line.decode("utf-8")
105
+ except Exception:
106
+ line = raw_line.decode("utf-8", errors="replace")
107
+
108
+ if line.startswith("data: "):
109
+ data = line[len("data: "):].strip()
110
+ else:
111
+ data = line.strip()
112
+ if data == "[DONE]":
113
+ break
114
+ if data:
115
+ try:
116
+ obj = json.loads(data)
117
+ delta = obj.get("choices", [{}])[0].get("delta", {})
118
+ content = delta.get("content", "")
119
+ if content:
120
+ buffer += content
121
+ yield buffer.strip()
122
+ except Exception:
123
+ continue
124
+
125
+ with gr.Blocks(title="HY-MT1.5-1.8B_GPTQ_INT4 Multilingual Translation (C++ Backend)") as demo:
126
+ gr.Markdown("## HY-MT1.5-1.8B_GPTQ_INT4 Multilingual Translation (C++ Backend)")
127
+
128
+ with gr.Group():
129
+ input_text = gr.Textbox(
130
+ label="Input Text",
131
+ placeholder="Please enter the text you want to translate...",
132
+ lines=6,
133
+ )
134
+
135
+ with gr.Group():
136
+ with gr.Row(equal_height=True):
137
+ target_language = gr.Dropdown(
138
+ choices=DEFAULT_LANGUAGES,
139
+ value="English",
140
+ label="Target Language",
141
+ )
142
+ use_zh_template = gr.Checkbox(
143
+ label="Use Chinese Prompt Template",
144
+ value=False,
145
+ )
146
+ with gr.Group():
147
+ with gr.Row(equal_height=True):
148
+ temperature = gr.Slider(
149
+ minimum=0.1,
150
+ maximum=1.5,
151
+ value=0.7,
152
+ step=0.05,
153
+ label="Temperature",
154
+ )
155
+ top_p = gr.Slider(
156
+ minimum=0.1,
157
+ maximum=1.0,
158
+ value=0.6,
159
+ step=0.05,
160
+ label="Top-p",
161
+ )
162
+ top_k = gr.Slider(
163
+ minimum=1,
164
+ maximum=100,
165
+ value=20,
166
+ step=1,
167
+ label="Top-k",
168
+ )
169
+
170
+ with gr.Group():
171
+ with gr.Row(equal_height=True):
172
+ repetition_penalty = gr.Slider(
173
+ minimum=1.0,
174
+ maximum=1.5,
175
+ value=1.05,
176
+ step=0.01,
177
+ label="Repetition Penalty",
178
+ )
179
+ max_new_tokens = gr.Slider(
180
+ minimum=1,
181
+ maximum=1024,
182
+ value=512,
183
+ step=1,
184
+ label="Max New Tokens",
185
+ )
186
+
187
+ translate_btn = gr.Button("Translate", variant="primary")
188
+ output_text = gr.Textbox(
189
+ label="Translation Result",
190
+ lines=6,
191
+ interactive=False,
192
+ )
193
+
194
+ translate_btn.click(
195
+ translate_stream,
196
+ inputs=[
197
+ input_text,
198
+ target_language,
199
+ use_zh_template,
200
+ temperature,
201
+ top_p,
202
+ top_k,
203
+ repetition_penalty,
204
+ max_new_tokens,
205
+ ],
206
+ outputs=output_text,
207
+ )
208
+
209
+ return demo
210
+
211
+
212
+ def parse_args():
213
+ parser = argparse.ArgumentParser(description="HY-MT1.5-1.8B_GPTQ_INT4 Gradio Demo (C++ Backend)")
214
+ parser.add_argument("--api_base", type=str, default="http://127.0.0.1:8000")
215
+ parser.add_argument("--model", type=str, default="AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4-AX620E")
216
+ parser.add_argument("--server_name", type=str, default="0.0.0.0")
217
+ parser.add_argument("--server_port", type=int, default=7860)
218
+ return parser.parse_args()
219
+
220
+
221
+ if __name__ == "__main__":
222
+ args = parse_args()
223
+ app = create_demo(args.api_base, args.model)
224
+ ipv4 = _get_ipv4_address()
225
+ print(f"* Running on local URL: http://{ipv4}:{args.server_port}")
226
+ app.launch(server_name=args.server_name, server_port=args.server_port)
gradio_demo.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import socket
4
+ import numpy as np
5
+ import gradio as gr
6
+ from transformers import AutoConfig, AutoTokenizer
7
+ from ml_dtypes import bfloat16
8
+
9
+ from utils.infer_func import InferManager
10
+
11
+ DEFAULT_LANGUAGES = [
12
+ "English",
13
+ "Chinese",
14
+ "Japanese",
15
+ "Korean",
16
+ "French",
17
+ "German",
18
+ "Spanish",
19
+ "Italian",
20
+ "Portuguese",
21
+ "Russian",
22
+ "Arabic",
23
+ "Hindi",
24
+ "Bengali",
25
+ "Thai",
26
+ "Vietnamese",
27
+ "Indonesian",
28
+ "Turkish",
29
+ "Polish",
30
+ "Dutch",
31
+ "Swedish",
32
+ "Danish",
33
+ "Norwegian",
34
+ "Finnish",
35
+ "Greek",
36
+ "Czech",
37
+ "Hungarian",
38
+ "Romanian",
39
+ "Ukrainian",
40
+ "Malay",
41
+ "Filipino",
42
+ "Urdu",
43
+ "Hebrew",
44
+ "Persian",
45
+ ]
46
+
47
+
48
+ def build_prompt(source_text: str, target_language: str, use_zh_template: bool) -> str:
49
+ if use_zh_template:
50
+ return (
51
+ f"将以下文本翻译为{target_language},注意只需要输出翻译后的结果,不要额外解释:\n"
52
+ f"{source_text}"
53
+ )
54
+ return (
55
+ f"Translate the following segment into {target_language}, without additional explanation.\n"
56
+ f"{source_text}"
57
+ )
58
+
59
+
60
+ def _get_ipv4_address() -> str:
61
+ try:
62
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
63
+ s.connect(("8.8.8.8", 80))
64
+ ip = s.getsockname()[0]
65
+ s.close()
66
+ return ip
67
+ except Exception:
68
+ return "127.0.0.1"
69
+
70
+
71
+ def create_demo(hf_model: str, axmodel_path: str, max_seq_len=2047):
72
+ embeds_path = os.path.join(axmodel_path, "model.embed_tokens.weight.npy")
73
+ if not os.path.exists(embeds_path):
74
+ raise FileNotFoundError(f"Missing embeddings file: {embeds_path}")
75
+
76
+ tokenizer = AutoTokenizer.from_pretrained(hf_model)
77
+ config = AutoConfig.from_pretrained(hf_model, trust_remote_code=True)
78
+ embeds = np.load(embeds_path)
79
+
80
+ eos_token_id = None
81
+ if isinstance(config.eos_token_id, list) and len(config.eos_token_id) > 1:
82
+ eos_token_id = config.eos_token_id
83
+
84
+ imer = InferManager(config, axmodel_path, max_seq_len=max_seq_len)
85
+
86
+ def translate(
87
+ text,
88
+ target_language,
89
+ use_zh_template,
90
+ temperature,
91
+ top_p,
92
+ top_k,
93
+ repetition_penalty,
94
+ max_new_tokens,
95
+ ):
96
+ if not text or not text.strip():
97
+ return ""
98
+
99
+ prompt = build_prompt(text.strip(), target_language, use_zh_template)
100
+ messages = [{"role": "user", "content": prompt}]
101
+ input_ids = tokenizer.apply_chat_template(
102
+ messages,
103
+ tokenize=True,
104
+ add_generation_prompt=False,
105
+ return_tensors="pt",
106
+ )
107
+ token_ids = input_ids[0].cpu().numpy().tolist()
108
+ prefill_data = np.take(embeds, token_ids, axis=0).astype(bfloat16)
109
+
110
+ slice_len = 128
111
+ token_ids = imer.prefill(
112
+ tokenizer,
113
+ token_ids,
114
+ prefill_data,
115
+ slice_len=slice_len,
116
+ top_k=top_k,
117
+ top_p=top_p,
118
+ temperature=temperature,
119
+ repetition_penalty=repetition_penalty,
120
+ )
121
+ output = imer.decode(
122
+ tokenizer,
123
+ token_ids,
124
+ embeds,
125
+ slice_len=slice_len,
126
+ eos_token_id=eos_token_id,
127
+ stream=False,
128
+ top_k=top_k,
129
+ top_p=top_p,
130
+ temperature=temperature,
131
+ repetition_penalty=repetition_penalty,
132
+ max_new_tokens=max_new_tokens,
133
+ )
134
+ return output.strip()
135
+
136
+ def translate_stream(
137
+ text,
138
+ target_language,
139
+ use_zh_template,
140
+ temperature,
141
+ top_p,
142
+ top_k,
143
+ repetition_penalty,
144
+ max_new_tokens,
145
+ ):
146
+ if not text or not text.strip():
147
+ yield ""
148
+ return
149
+
150
+ prompt = build_prompt(text.strip(), target_language, use_zh_template)
151
+ messages = [{"role": "user", "content": prompt}]
152
+ input_ids = tokenizer.apply_chat_template(
153
+ messages,
154
+ tokenize=True,
155
+ add_generation_prompt=False,
156
+ return_tensors="pt",
157
+ )
158
+ token_ids = input_ids[0].cpu().numpy().tolist()
159
+ prefill_data = np.take(embeds, token_ids, axis=0).astype(bfloat16)
160
+
161
+ slice_len = 128
162
+ token_ids = imer.prefill(
163
+ tokenizer,
164
+ token_ids,
165
+ prefill_data,
166
+ slice_len=slice_len,
167
+ top_k=top_k,
168
+ top_p=top_p,
169
+ temperature=temperature,
170
+ repetition_penalty=repetition_penalty,
171
+ )
172
+
173
+ for text_so_far in imer.decode_stream(
174
+ tokenizer,
175
+ token_ids,
176
+ embeds,
177
+ slice_len=slice_len,
178
+ eos_token_id=eos_token_id,
179
+ top_k=top_k,
180
+ top_p=top_p,
181
+ temperature=temperature,
182
+ repetition_penalty=repetition_penalty,
183
+ max_new_tokens=max_new_tokens,
184
+ ):
185
+ yield text_so_far
186
+
187
+ with gr.Blocks(title="HY-MT1.5-1.8B_GPTQ_INT4 Multilingual Translation") as demo:
188
+ gr.Markdown("## HY-MT1.5-1.8B_GPTQ_INT4 Multilingual Translation")
189
+
190
+ with gr.Group():
191
+ input_text = gr.Textbox(
192
+ label="Input Text",
193
+ placeholder="Please enter the text you want to translate...",
194
+ lines=6,
195
+ )
196
+
197
+ with gr.Group():
198
+ with gr.Row(equal_height=True):
199
+ target_language = gr.Dropdown(
200
+ choices=DEFAULT_LANGUAGES,
201
+ value="English",
202
+ label="Target Language",
203
+ )
204
+ use_zh_template = gr.Checkbox(
205
+ label="Use Chinese Prompt Template",
206
+ value=False,
207
+ )
208
+ with gr.Group():
209
+ with gr.Row(equal_height=True):
210
+ temperature = gr.Slider(
211
+ minimum=0.1,
212
+ maximum=1.5,
213
+ value=0.7,
214
+ step=0.05,
215
+ label="Temperature",
216
+ )
217
+ top_p = gr.Slider(
218
+ minimum=0.1,
219
+ maximum=1.0,
220
+ value=0.6,
221
+ step=0.05,
222
+ label="Top-p",
223
+ )
224
+ top_k = gr.Slider(
225
+ minimum=1,
226
+ maximum=100,
227
+ value=20,
228
+ step=1,
229
+ label="Top-k",
230
+ )
231
+
232
+ with gr.Group():
233
+ with gr.Row(equal_height=True):
234
+ repetition_penalty = gr.Slider(
235
+ minimum=1.0,
236
+ maximum=1.5,
237
+ value=1.05,
238
+ step=0.01,
239
+ label="Repetition Penalty",
240
+ )
241
+ max_new_tokens = gr.Slider(
242
+ minimum=1,
243
+ maximum=1024,
244
+ value=512,
245
+ step=1,
246
+ label="Max New Tokens",
247
+ )
248
+
249
+ translate_btn = gr.Button("Translate", variant="primary")
250
+ output_text = gr.Textbox(
251
+ label="Translation Result",
252
+ lines=6,
253
+ interactive=False,
254
+ )
255
+
256
+ translate_btn.click(
257
+ translate_stream,
258
+ inputs=[
259
+ input_text,
260
+ target_language,
261
+ use_zh_template,
262
+ temperature,
263
+ top_p,
264
+ top_k,
265
+ repetition_penalty,
266
+ max_new_tokens,
267
+ ],
268
+ outputs=output_text,
269
+ )
270
+
271
+ return demo
272
+
273
+
274
+ def parse_args():
275
+ parser = argparse.ArgumentParser(description="HY-MT1.5-1.8B_GPTQ_INT4 Gradio Demo")
276
+ parser.add_argument(
277
+ "--hf_model",
278
+ type=str,
279
+ default="./hymt1-5_tokenizer",
280
+ help="Path to HuggingFace model",
281
+ )
282
+ parser.add_argument(
283
+ "--axmodel_path",
284
+ type=str,
285
+ default="./hymt1-5_2k_ax650n_axmodel/",
286
+ help="Path to compiled axmodel directory",
287
+ )
288
+ parser.add_argument(
289
+ "--max_seq_len",
290
+ type=int,
291
+ default=2047,
292
+ help="Maximum sequence length for model inference",
293
+ )
294
+ parser.add_argument("--server_name", type=str, default="0.0.0.0")
295
+ parser.add_argument("--server_port", type=int, default=7860)
296
+ return parser.parse_args()
297
+
298
+
299
+ if __name__ == "__main__":
300
+ args = parse_args()
301
+ app = create_demo(args.hf_model, args.axmodel_path, args.max_seq_len)
302
+ ipv4 = _get_ipv4_address()
303
+ print(f"* Running on local URL: http://{ipv4}:{args.server_port}")
304
+ app.launch(server_name=args.server_name, server_port=args.server_port)
hymt1-5-1.8b_tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff
 
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l0_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f57472a0483bd7091039bf9b27ad38aa9a484868ff56180ded2641003f555b
3
+ size 40464383
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l10_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:672ad9d439d6e3918d5799695153d9122f905ba46c825e69a92e16340ff04fd7
3
+ size 40465303
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l11_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a9e698e772f85fcad99af2f11cacaaf93914960646c945add2a783c53ef8f9a
3
+ size 40463639
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l12_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f69bc010aa8c49e1be41a482377e4d2b5252f4cc96b2acc2091287626d9a89e0
3
+ size 40468799
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l13_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c7bc90cb752e68c22bfa5382d2cecb8145413460b737fd84c94eaf0b09e70a
3
+ size 40463039
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l14_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408732f37f944968d06ea4448b2474f877e3088f0a987218d53a556b94ae7dc0
3
+ size 40463215
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l15_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bdf1f73534d1244cb6b4447114064da4bcd21d5453307b1535985c73b15bb95
3
+ size 40460615
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l16_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6b14fae6b305892157ed561b98c562e5346f810ca00b2db04f7c4384f5ba023
3
+ size 40465495
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l17_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee33064da337191b0309d272effe1c6f5f9f4041651f3942f1db9e2ec6d6da13
3
+ size 40464679
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l18_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f773f1fb24772f7e101c83203d661d56ef2d85510437b3ce51592e5a6a059f9
3
+ size 40465615
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l19_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026d22550080dd9a90417be782c4e05051557a701379f6bdd2719bf2446ed82c
3
+ size 40467895
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l1_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f842df0ddd3ed417c4f6f1da185115086b46b7f24eb30e42013c1d4f55a2869c
3
+ size 40465119
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l20_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37e9cc96a37732d19faa3e912cf7689d50711f6890023f173f257c2b25f61c18
3
+ size 40466927
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l21_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ca610e2dd5c6d35542aa8c639294c44f01d57fe5ca66e08366e492040c5f15e
3
+ size 40467551
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l22_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:debdef79ccda13e8abed2385456db18ef696c3a09d623b0ea644172f832183ad
3
+ size 40463447
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l23_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d97744eac93e9ff1a2adc0ebaeadc0682460671f501616f9d7a2979323f03a
3
+ size 40463255
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l24_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb519c6b7dda04e80434b3565ff093f5ec0157ef70bcb1c7ac0fd329f3d28f9e
3
+ size 40467687
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l25_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d5320b8bdf1423ef58c6a7ca7223cae1e8aed4363da4f3cb522281ef7a7d548
3
+ size 40465615
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l26_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a579d457ae3d5af10aa20fe2610e47f1d5cc4114f87dd25a9bbf1ae215cf461a
3
+ size 40465207
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l27_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5912ac5cdfef47c556f5a78a858a7e5052432266b190b9c06276bbf3478e74ad
3
+ size 40462991
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l28_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f821b47b7743e61b8bc4439813a4fd40f526cc28d0560c1d97bd3a2f89b0c6
3
+ size 40465967
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l29_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2a36020ebf69790bbe62b2c2522a8382bb1caa1ebbbddba8060a601049e982
3
+ size 40464991
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l2_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:534f35e8f8c89d926bf9a7638227fea5d09244c8b329fbe352d6c2b9e47744e8
3
+ size 40466655
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l30_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3d6bc6a3186d7093b91bdf13d411903d35b5099178e97376465891956e5ad1a
3
+ size 40468335
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l31_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3676a5f34a83bde9fa8780c1e7480fce2c05a2de0c82d80bced16f2b755f59
3
+ size 40465655
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l3_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e8905d2289d30e8c91a463864243eb3158a98e147a82d8ce0b832fa33bd1514
3
+ size 40464463
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l4_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1f757cb559b5a48a9c576bfdf6ca60e5b2fbe07b167fa611d6c175d8d1aa22
3
+ size 40467063
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l5_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee2866141efc46cbad1dbbaec542b2fea161774f6e2f784c38928abd389de31a
3
+ size 40466727
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l6_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4115938ecd86a6835da42547bb19b58f44b3c757d1df7a9a4e09e1ae70d7e604
3
+ size 40468167
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l7_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:581939b9eb10f6ae09a50ec1c4ede3e047c8c4ba7a8744d87b8c1d7a7d8e2813
3
+ size 40468319
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l8_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd70ff297d4d1004227219279db33725063dc8dcac12bf057530489aad976c5
3
+ size 40465695
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_p128_l9_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d912e6a1793850d9a7caca5c0b2e7e9dc317ed7d04e4f88116cb3678aa720dd
3
+ size 40463631
hymt1-5_1k_ax620e_axmodel/hunyuan_v1_dense_post.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1fa5e27704f07355343f5eec722214f178c957ea38d7667c1a2300530a0441
3
+ size 249548483
hymt1-5_1k_ax620e_axmodel/model.embed_tokens.weight.bfloat16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1793bc02b24f4c82e617508e9c82f4efac270176a54a1e706fa6584203e25720
3
+ size 494870528
hymt1-5_1k_ax620e_axmodel/model.embed_tokens.weight.float32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d63c92d8ae526a90744cf6c4ed5af1919ca38a0c0c441cdd714dd7a249b75b
3
+ size 989741056
hymt1-5_1k_ax620e_axmodel/model.embed_tokens.weight.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cadf56765a11544d1f13f70452f70b1bda87978b2c4dbfb7fe3d46df50676f3
3
+ size 989741184
hymt1-5_tokenizer/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text