ibrahimkettaneh commited on
Commit
4a40f9b
·
verified ·
1 Parent(s): 852f95f

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +345 -0
config.json ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Step3p7ForConditionalGeneration"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_step3p7.Step3p7Config",
7
+ "AutoProcessor": "processing_step3.Step3VLProcessor",
8
+ "AutoModelForCausalLM": "modeling_step3p7.Step3p7ForConditionalGeneration"
9
+ },
10
+ "model_type": "step3p7",
11
+ "im_end_token": "<im_end>",
12
+ "im_patch_token": "<im_patch>",
13
+ "im_start_token": "<im_start>",
14
+ "image_token_len": 169,
15
+ "patch_token_len": 81,
16
+ "image_token_id": 128001,
17
+ "understand_projector_stride": 2,
18
+ "use_im_start_end": "true",
19
+ "vision_select_layer": -1,
20
+ "projector_bias": false,
21
+ "vision_config": {
22
+ "model_type": "perception_encoder",
23
+ "image_size": 728,
24
+ "patch_size": 14,
25
+ "width": 1536,
26
+ "layers": 47,
27
+ "heads": 16,
28
+ "pool_type": "none",
29
+ "output_dim": null,
30
+ "use_cls_token": false,
31
+ "ls_init_value": 0.1,
32
+ "use_ln_post": false,
33
+ "hidden_act": "quick_gelu"
34
+ },
35
+ "text_config": {
36
+ "architectures": [
37
+ "Step3p5ForCausalLM"
38
+ ],
39
+ "rope_scaling": {
40
+ "rope_type": "llama3",
41
+ "factor": 2.0,
42
+ "original_max_position_embeddings": 131072,
43
+ "low_freq_factor": 1.0,
44
+ "high_freq_factor": 32.0
45
+ },
46
+ "yarn_only_types": [
47
+ "full_attention"
48
+ ],
49
+ "model_type": "step3p5",
50
+ "hidden_size": 4096,
51
+ "intermediate_size": 11264,
52
+ "num_hidden_layers": 45,
53
+ "max_seq_len": 262144,
54
+ "max_position_embeddings": 262144,
55
+ "vocab_size": 128896,
56
+ "torch_dtype": "bfloat16",
57
+ "use_qk_norm": false,
58
+ "moe_layers_enum": "3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44",
59
+ "use_mfa": false,
60
+ "num_attention_heads": 64,
61
+ "num_attention_groups": 8,
62
+ "head_dim": 128,
63
+ "use_moe": true,
64
+ "moe_num_experts": 288,
65
+ "moe_top_k": 8,
66
+ "moe_intermediate_size": 1280,
67
+ "share_expert_dim": 1280,
68
+ "moe_layer_offset": 0,
69
+ "moe_every_n_layer": 1,
70
+ "norm_expert_weight": true,
71
+ "moe_router_activation": "sigmoid",
72
+ "moe_router_scaling_factor": 3.0,
73
+ "att_impl_type": "GQA",
74
+ "num_nextn_predict_layers": 3,
75
+ "rope_theta": [
76
+ 5000000.0,
77
+ 10000.0,
78
+ 10000.0,
79
+ 10000.0,
80
+ 5000000.0,
81
+ 10000.0,
82
+ 10000.0,
83
+ 10000.0,
84
+ 5000000.0,
85
+ 10000.0,
86
+ 10000.0,
87
+ 10000.0,
88
+ 5000000.0,
89
+ 10000.0,
90
+ 10000.0,
91
+ 10000.0,
92
+ 5000000.0,
93
+ 10000.0,
94
+ 10000.0,
95
+ 10000.0,
96
+ 5000000.0,
97
+ 10000.0,
98
+ 10000.0,
99
+ 10000.0,
100
+ 5000000.0,
101
+ 10000.0,
102
+ 10000.0,
103
+ 10000.0,
104
+ 5000000.0,
105
+ 10000.0,
106
+ 10000.0,
107
+ 10000.0,
108
+ 5000000.0,
109
+ 10000.0,
110
+ 10000.0,
111
+ 10000.0,
112
+ 5000000.0,
113
+ 10000.0,
114
+ 10000.0,
115
+ 10000.0,
116
+ 5000000.0,
117
+ 10000.0,
118
+ 10000.0,
119
+ 10000.0,
120
+ 5000000.0,
121
+ 10000.0,
122
+ 10000.0,
123
+ 10000.0
124
+ ],
125
+ "use_head_wise_attn_gate": true,
126
+ "sliding_window": 512,
127
+ "use_moe_router_bias": true,
128
+ "need_fp32_gate": true,
129
+ "sink": false,
130
+ "layer_types": [
131
+ "full_attention",
132
+ "sliding_attention",
133
+ "sliding_attention",
134
+ "sliding_attention",
135
+ "full_attention",
136
+ "sliding_attention",
137
+ "sliding_attention",
138
+ "sliding_attention",
139
+ "full_attention",
140
+ "sliding_attention",
141
+ "sliding_attention",
142
+ "sliding_attention",
143
+ "full_attention",
144
+ "sliding_attention",
145
+ "sliding_attention",
146
+ "sliding_attention",
147
+ "full_attention",
148
+ "sliding_attention",
149
+ "sliding_attention",
150
+ "sliding_attention",
151
+ "full_attention",
152
+ "sliding_attention",
153
+ "sliding_attention",
154
+ "sliding_attention",
155
+ "full_attention",
156
+ "sliding_attention",
157
+ "sliding_attention",
158
+ "sliding_attention",
159
+ "full_attention",
160
+ "sliding_attention",
161
+ "sliding_attention",
162
+ "sliding_attention",
163
+ "full_attention",
164
+ "sliding_attention",
165
+ "sliding_attention",
166
+ "sliding_attention",
167
+ "full_attention",
168
+ "sliding_attention",
169
+ "sliding_attention",
170
+ "sliding_attention",
171
+ "full_attention",
172
+ "sliding_attention",
173
+ "sliding_attention",
174
+ "sliding_attention",
175
+ "full_attention",
176
+ "sliding_attention",
177
+ "sliding_attention",
178
+ "sliding_attention"
179
+ ],
180
+ "use_rope_layers": [],
181
+ "partial_rotary_factors": [
182
+ 0.5,
183
+ 1.0,
184
+ 1.0,
185
+ 1.0,
186
+ 0.5,
187
+ 1.0,
188
+ 1.0,
189
+ 1.0,
190
+ 0.5,
191
+ 1.0,
192
+ 1.0,
193
+ 1.0,
194
+ 0.5,
195
+ 1.0,
196
+ 1.0,
197
+ 1.0,
198
+ 0.5,
199
+ 1.0,
200
+ 1.0,
201
+ 1.0,
202
+ 0.5,
203
+ 1.0,
204
+ 1.0,
205
+ 1.0,
206
+ 0.5,
207
+ 1.0,
208
+ 1.0,
209
+ 1.0,
210
+ 0.5,
211
+ 1.0,
212
+ 1.0,
213
+ 1.0,
214
+ 0.5,
215
+ 1.0,
216
+ 1.0,
217
+ 1.0,
218
+ 0.5,
219
+ 1.0,
220
+ 1.0,
221
+ 1.0,
222
+ 0.5,
223
+ 1.0,
224
+ 1.0,
225
+ 1.0,
226
+ 0.5,
227
+ 1.0,
228
+ 1.0,
229
+ 1.0
230
+ ],
231
+ "eos_token_id": [
232
+ 1,
233
+ 2,
234
+ 128007
235
+ ],
236
+ "bos_token_id": 0,
237
+ "attention_other_setting": {
238
+ "attention_type": "sliding_attention",
239
+ "num_attention_heads": 96,
240
+ "num_attention_groups": 8,
241
+ "head_dim": 128,
242
+ "true_head_dim": 128
243
+ },
244
+ "swiglu_limits": [
245
+ 0.0,
246
+ 0.0,
247
+ 0.0,
248
+ 0.0,
249
+ 0.0,
250
+ 0.0,
251
+ 0.0,
252
+ 0.0,
253
+ 0.0,
254
+ 0.0,
255
+ 0.0,
256
+ 0.0,
257
+ 0.0,
258
+ 0.0,
259
+ 0.0,
260
+ 0.0,
261
+ 0.0,
262
+ 0.0,
263
+ 0.0,
264
+ 0.0,
265
+ 0.0,
266
+ 0.0,
267
+ 0.0,
268
+ 0.0,
269
+ 0.0,
270
+ 0.0,
271
+ 0.0,
272
+ 0.0,
273
+ 0.0,
274
+ 0.0,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0,
280
+ 0.0,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0,
284
+ 0.0,
285
+ 0.0,
286
+ 0.0,
287
+ 0.0,
288
+ 7,
289
+ 7,
290
+ 0.0,
291
+ 0.0,
292
+ 0.0
293
+ ],
294
+ "swiglu_limits_shared": [
295
+ 0.0,
296
+ 0.0,
297
+ 0.0,
298
+ 0.0,
299
+ 0.0,
300
+ 0.0,
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 0.0,
306
+ 0.0,
307
+ 0.0,
308
+ 0.0,
309
+ 0.0,
310
+ 0.0,
311
+ 0.0,
312
+ 0.0,
313
+ 0.0,
314
+ 0.0,
315
+ 0.0,
316
+ 0.0,
317
+ 0.0,
318
+ 0.0,
319
+ 0.0,
320
+ 0.0,
321
+ 0.0,
322
+ 0.0,
323
+ 0.0,
324
+ 0.0,
325
+ 0.0,
326
+ 0.0,
327
+ 0.0,
328
+ 0.0,
329
+ 0.0,
330
+ 0.0,
331
+ 0.0,
332
+ 0.0,
333
+ 0.0,
334
+ 0.0,
335
+ 0.0,
336
+ 0.0,
337
+ 0.0,
338
+ 16,
339
+ 16,
340
+ 0.0,
341
+ 0.0,
342
+ 0.0
343
+ ]
344
+ }
345
+ }