Spaces:
Running
Running
Add hunyuan_v3 (hy_v3) to KNOWN_BASES
Browse files
scan.py
CHANGED
|
@@ -252,6 +252,13 @@ KNOWN_BASES = {
|
|
| 252 |
# vocab ≈ Gemma tokenizer + 128 extra tokens; trained on AMD MI300x
|
| 253 |
# 74B-Preview is pre-RL reasoning base (no RLHF/instruct tuning)
|
| 254 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
"mellum": {
|
| 256 |
"name": "JetBrains Mellum (code-specialized MoE)",
|
| 257 |
"vocab_size": 98304,
|
|
|
|
| 252 |
# vocab ≈ Gemma tokenizer + 128 extra tokens; trained on AMD MI300x
|
| 253 |
# 74B-Preview is pre-RL reasoning base (no RLHF/instruct tuning)
|
| 254 |
},
|
| 255 |
+
"hunyuan_v3": {
|
| 256 |
+
"name": "Tencent Hunyuan V3 / Hy-MT2 (MoE)",
|
| 257 |
+
"vocab_size": 120832,
|
| 258 |
+
"model_type_patterns": ["hy_v3"],
|
| 259 |
+
# Hy-MT2 series: 1.8B dense, 7B dense, 30B-A3B MoE (128E/8A).
|
| 260 |
+
# 48 layers, hidden=2048 for MoE tier. QK norm. HYV3ForCausalLM.
|
| 261 |
+
},
|
| 262 |
"mellum": {
|
| 263 |
"name": "JetBrains Mellum (code-specialized MoE)",
|
| 264 |
"vocab_size": 98304,
|