trohrbaugh commited on
Commit
39d11b5
·
verified ·
1 Parent(s): 5da97e4

Add hunyuan_v3 (hy_v3) to KNOWN_BASES

Browse files
Files changed (1) hide show
  1. scan.py +7 -0
scan.py CHANGED
@@ -252,6 +252,13 @@ KNOWN_BASES = {
252
  # vocab ≈ Gemma tokenizer + 128 extra tokens; trained on AMD MI300x
253
  # 74B-Preview is pre-RL reasoning base (no RLHF/instruct tuning)
254
  },
 
 
 
 
 
 
 
255
  "mellum": {
256
  "name": "JetBrains Mellum (code-specialized MoE)",
257
  "vocab_size": 98304,
 
252
  # vocab ≈ Gemma tokenizer + 128 extra tokens; trained on AMD MI300x
253
  # 74B-Preview is pre-RL reasoning base (no RLHF/instruct tuning)
254
  },
255
+ "hunyuan_v3": {
256
+ "name": "Tencent Hunyuan V3 / Hy-MT2 (MoE)",
257
+ "vocab_size": 120832,
258
+ "model_type_patterns": ["hy_v3"],
259
+ # Hy-MT2 series: 1.8B dense, 7B dense, 30B-A3B MoE (128E/8A).
260
+ # 48 layers, hidden=2048 for MoE tier. QK norm. HYV3ForCausalLM.
261
+ },
262
  "mellum": {
263
  "name": "JetBrains Mellum (code-specialized MoE)",
264
  "vocab_size": 98304,