rimashussain commited on
Commit
0778e02
·
verified ·
1 Parent(s): f194141

Upload processor

Browse files
Files changed (4) hide show
  1. chat_template.jinja +37 -334
  2. processor_config.json +17 -64
  3. tokenizer.json +2 -2
  4. tokenizer_config.json +12 -83
chat_template.jinja CHANGED
@@ -1,344 +1,47 @@
1
- {%- macro format_parameters(properties, required) -%}
2
- {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
- {%- set ns = namespace(found_first=false) -%}
4
- {%- for key, value in properties | dictsort -%}
5
- {%- set add_comma = false -%}
6
- {%- if key not in standard_keys -%}
7
- {%- if ns.found_first %},{% endif -%}
8
- {%- set ns.found_first = true -%}
9
- {{ key }}:{
10
- {%- if value['description'] -%}
11
- description:<|"|>{{ value['description'] }}<|"|>
12
- {%- set add_comma = true -%}
13
- {%- endif -%}
14
- {%- if value['type'] | upper == 'STRING' -%}
15
- {%- if value['enum'] -%}
16
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
17
- enum:{{ format_argument(value['enum']) }}
18
- {%- endif -%}
19
- {%- elif value['type'] | upper == 'ARRAY' -%}
20
- {%- if value['items'] is mapping and value['items'] -%}
21
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
22
- items:{
23
- {%- set ns_items = namespace(found_first=false) -%}
24
- {%- for item_key, item_value in value['items'] | dictsort -%}
25
- {%- if item_value is not none -%}
26
- {%- if ns_items.found_first %},{% endif -%}
27
- {%- set ns_items.found_first = true -%}
28
- {%- if item_key == 'properties' -%}
29
- properties:{
30
- {%- if item_value is mapping -%}
31
- {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
32
- {%- endif -%}
33
- }
34
- {%- elif item_key == 'required' -%}
35
- required:[
36
- {%- for req_item in item_value -%}
37
- <|"|>{{- req_item -}}<|"|>
38
- {%- if not loop.last %},{% endif -%}
39
- {%- endfor -%}
40
- ]
41
- {%- elif item_key == 'type' -%}
42
- {%- if item_value is string -%}
43
- type:{{ format_argument(item_value | upper) }}
44
- {%- else -%}
45
- type:{{ format_argument(item_value | map('upper') | list) }}
46
- {%- endif -%}
47
- {%- else -%}
48
- {{ item_key }}:{{ format_argument(item_value) }}
49
- {%- endif -%}
50
- {%- endif -%}
51
- {%- endfor -%}
52
- }
53
- {%- endif -%}
54
- {%- endif -%}
55
- {%- if value['nullable'] %}
56
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
57
- nullable:true
58
- {%- endif -%}
59
- {%- if value['type'] | upper == 'OBJECT' -%}
60
- {%- if value['properties'] is defined and value['properties'] is mapping -%}
61
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
62
- properties:{
63
- {{- format_parameters(value['properties'], value['required'] | default([])) -}}
64
- }
65
- {%- elif value is mapping -%}
66
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
67
- properties:{
68
- {{- format_parameters(value, value['required'] | default([])) -}}
69
- }
70
- {%- endif -%}
71
- {%- if value['required'] -%}
72
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
73
- required:[
74
- {%- for item in value['required'] | default([]) -%}
75
- <|"|>{{- item -}}<|"|>
76
- {%- if not loop.last %},{% endif -%}
77
- {%- endfor -%}
78
- ]
79
- {%- endif -%}
80
- {%- endif -%}
81
- {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
82
- type:<|"|>{{ value['type'] | upper }}<|"|>}
83
- {%- endif -%}
84
- {%- endfor -%}
85
- {%- endmacro -%}
86
- {%- macro format_function_declaration(tool_data) -%}
87
- declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
88
- {%- set params = tool_data['function']['parameters'] -%}
89
- {%- if params -%}
90
- ,parameters:{
91
- {%- if params['properties'] -%}
92
- properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
93
- {%- endif -%}
94
- {%- if params['required'] -%}
95
- required:[
96
- {%- for item in params['required'] -%}
97
- <|"|>{{- item -}}<|"|>
98
- {{- ',' if not loop.last -}}
99
- {%- endfor -%}
100
- ],
101
- {%- endif -%}
102
- {%- if params['type'] -%}
103
- type:<|"|>{{- params['type'] | upper -}}<|"|>}
104
- {%- endif -%}
105
- {%- endif -%}
106
- {%- if 'response' in tool_data['function'] -%}
107
- {%- set response_declaration = tool_data['function']['response'] -%}
108
- ,response:{
109
- {%- if response_declaration['description'] -%}
110
- description:<|"|>{{- response_declaration['description'] -}}<|"|>,
111
- {%- endif -%}
112
- {%- if response_declaration['type'] | upper == 'OBJECT' -%}
113
- type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
114
- {%- endif -%}
115
- {%- endif -%}
116
- }
117
- {%- endmacro -%}
118
- {%- macro format_argument(argument, escape_keys=True) -%}
119
- {%- if argument is string -%}
120
- {{- '<|"|>' + argument + '<|"|>' -}}
121
- {%- elif argument is boolean -%}
122
- {{- 'true' if argument else 'false' -}}
123
- {%- elif argument is mapping -%}
124
- {{- '{' -}}
125
- {%- set ns = namespace(found_first=false) -%}
126
- {%- for key, value in argument | dictsort -%}
127
- {%- if ns.found_first %},{% endif -%}
128
- {%- set ns.found_first = true -%}
129
- {%- if escape_keys -%}
130
- {{- '<|"|>' + key + '<|"|>' -}}
131
- {%- else -%}
132
- {{- key -}}
133
- {%- endif -%}
134
- :{{- format_argument(value, escape_keys=escape_keys) -}}
135
- {%- endfor -%}
136
- {{- '}' -}}
137
- {%- elif argument is sequence -%}
138
- {{- '[' -}}
139
- {%- for item in argument -%}
140
- {{- format_argument(item, escape_keys=escape_keys) -}}
141
- {%- if not loop.last %},{% endif -%}
142
- {%- endfor -%}
143
- {{- ']' -}}
144
- {%- else -%}
145
- {{- argument -}}
146
- {%- endif -%}
147
- {%- endmacro -%}
148
- {%- macro strip_thinking(text) -%}
149
- {%- set ns = namespace(result='') -%}
150
- {%- for part in text.split('<channel|>') -%}
151
- {%- if '<|channel>' in part -%}
152
- {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
153
- {%- else -%}
154
- {%- set ns.result = ns.result + part -%}
155
- {%- endif -%}
156
- {%- endfor -%}
157
- {{- ns.result | trim -}}
158
- {%- endmacro -%}
159
 
160
- {%- macro format_tool_response_block(tool_name, response) -%}
161
- {{- '<|tool_response>' -}}
162
- {%- if response is mapping -%}
163
- {{- 'response:' + tool_name + '{' -}}
164
- {%- for key, value in response | dictsort -%}
165
- {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
166
- {%- if not loop.last %},{% endif -%}
167
- {%- endfor -%}
168
- {{- '}' -}}
169
  {%- else -%}
170
- {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
171
- {%- endif -%}
172
- {{- '<tool_response|>' -}}
173
- {%- endmacro -%}
174
-
175
- {%- set ns = namespace(prev_message_type=None) -%}
176
- {%- set loop_messages = messages -%}
177
- {{- bos_token -}}
178
- {#- Handle System/Tool Definitions Block -#}
179
- {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
- {{- '<|turn>system\n' -}}
181
-
182
- {#- Inject Thinking token at the very top of the FIRST system turn -#}
183
- {%- if enable_thinking is defined and enable_thinking -%}
184
- {{- '<|think|>\n' -}}
185
- {%- set ns.prev_message_type = 'think' -%}
186
- {%- endif -%}
187
-
188
- {%- if messages[0]['role'] in ['system', 'developer'] -%}
189
- {{- messages[0]['content'] | trim -}}
190
- {%- set loop_messages = messages[1:] -%}
191
- {%- endif -%}
192
 
193
- {%- if tools -%}
194
- {%- for tool in tools %}
195
- {{- '<|tool>' -}}
196
- {{- format_function_declaration(tool) | trim -}}
197
- {{- '<tool|>' -}}
198
- {%- endfor %}
199
- {%- set ns.prev_message_type = 'tool' -%}
200
  {%- endif -%}
201
-
202
- {{- '<turn|>\n' -}}
203
- {%- endif %}
204
-
205
- {#- Pre-scan: find last user message index for reasoning guard -#}
206
- {%- set ns_turn = namespace(last_user_idx=-1) -%}
207
- {%- for i in range(loop_messages | length) -%}
208
- {%- if loop_messages[i]['role'] == 'user' -%}
209
- {%- set ns_turn.last_user_idx = i -%}
210
- {%- endif -%}
211
- {%- endfor -%}
212
-
213
- {#- Loop through messages -#}
214
  {%- for message in loop_messages -%}
215
- {%- if message['role'] != 'tool' -%}
216
- {%- set ns.prev_message_type = None -%}
217
- {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
218
- {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
219
- {%- set prev_nt = namespace(role=None, found=false) -%}
220
- {%- if loop.index0 > 0 -%}
221
- {%- for j in range(loop.index0 - 1, -1, -1) -%}
222
- {%- if not prev_nt.found -%}
223
- {%- if loop_messages[j]['role'] != 'tool' -%}
224
- {%- set prev_nt.role = loop_messages[j]['role'] -%}
225
- {%- set prev_nt.found = true -%}
226
- {%- endif -%}
227
- {%- endif -%}
228
- {%- endfor -%}
229
  {%- endif -%}
230
- {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
231
- {%- if not continue_same_model_turn -%}
232
- {{- '<|turn>' + role + '\n' }}
233
- {%- endif -%}
234
-
235
- {#- Render reasoning/reasoning_content as thinking channel -#}
236
- {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
237
- {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
238
- {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
239
- {%- endif -%}
240
-
241
- {%- if message['tool_calls'] -%}
242
- {%- for tool_call in message['tool_calls'] -%}
243
- {%- set function = tool_call['function'] -%}
244
- {{- '<|tool_call>call:' + function['name'] + '{' -}}
245
- {%- if function['arguments'] is mapping -%}
246
- {%- set ns_args = namespace(found_first=false) -%}
247
- {%- for key, value in function['arguments'] | dictsort -%}
248
- {%- if ns_args.found_first %},{% endif -%}
249
- {%- set ns_args.found_first = true -%}
250
- {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
251
- {%- endfor -%}
252
- {%- elif function['arguments'] is string -%}
253
- {{- function['arguments'] -}}
254
- {%- endif -%}
255
- {{- '}<tool_call|>' -}}
256
- {%- endfor -%}
257
- {%- set ns.prev_message_type = 'tool_call' -%}
258
- {%- endif -%}
259
-
260
- {%- set ns_tr_out = namespace(flag=false) -%}
261
- {%- if message.get('tool_responses') -%}
262
- {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
263
- {%- for tool_response in message['tool_responses'] -%}
264
- {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
265
- {%- set ns_tr_out.flag = true -%}
266
- {%- set ns.prev_message_type = 'tool_response' -%}
267
- {%- endfor -%}
268
- {%- elif message.get('tool_calls') -%}
269
- {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
270
- {%- set ns_tool_scan = namespace(stopped=false) -%}
271
- {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
272
- {%- if ns_tool_scan.stopped -%}
273
- {%- elif loop_messages[k]['role'] != 'tool' -%}
274
- {%- set ns_tool_scan.stopped = true -%}
275
- {%- else -%}
276
- {%- set follow = loop_messages[k] -%}
277
- {#- Resolve tool_call_id to function name -#}
278
- {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
279
- {%- for tc in message['tool_calls'] -%}
280
- {%- if tc.get('id') == follow.get('tool_call_id') -%}
281
- {%- set ns_tname.name = tc['function']['name'] -%}
282
- {%- endif -%}
283
- {%- endfor -%}
284
- {#- Handle content as string or content-parts array -#}
285
- {%- set tool_body = follow.get('content') -%}
286
- {%- if tool_body is string -%}
287
- {{- format_tool_response_block(ns_tname.name, tool_body) -}}
288
- {%- elif tool_body is sequence and tool_body is not string -%}
289
- {%- set ns_txt = namespace(s='') -%}
290
- {%- for part in tool_body -%}
291
- {%- if part.get('type') == 'text' -%}
292
- {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
293
- {%- endif -%}
294
- {%- endfor -%}
295
- {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
296
- {%- else -%}
297
- {{- format_tool_response_block(ns_tname.name, tool_body) -}}
298
- {%- endif -%}
299
- {%- set ns_tr_out.flag = true -%}
300
- {%- set ns.prev_message_type = 'tool_response' -%}
301
- {%- endif -%}
302
- {%- endfor -%}
303
- {%- endif -%}
304
-
305
- {%- if message['content'] is string -%}
306
- {%- if role == 'model' -%}
307
- {{- strip_thinking(message['content']) -}}
308
- {%- else -%}
309
- {{- message['content'] | trim -}}
310
- {%- endif -%}
311
- {%- elif message['content'] is sequence -%}
312
- {%- for item in message['content'] -%}
313
- {%- if item['type'] == 'text' -%}
314
- {%- if role == 'model' -%}
315
- {{- strip_thinking(item['text']) -}}
316
- {%- else -%}
317
- {{- item['text'] | trim -}}
318
- {%- endif -%}
319
- {%- elif item['type'] == 'image' -%}
320
- {{- '<|image|>' -}}
321
- {%- set ns.prev_message_type = 'image' -%}
322
- {%- elif item['type'] == 'audio' -%}
323
- {{- '<|audio|>' -}}
324
- {%- set ns.prev_message_type = 'audio' -%}
325
- {%- elif item['type'] == 'video' -%}
326
- {{- '<|video|>' -}}
327
- {%- set ns.prev_message_type = 'video' -%}
328
- {%- endif -%}
329
- {%- endfor -%}
330
  {%- endif -%}
331
-
332
- {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
333
- {{- '<|tool_response>' -}}
334
- {%- elif not (ns_tr_out.flag and not message.get('content')) -%}
335
- {{- '<turn|>\n' -}}
336
- {%- endif -%}
337
  {%- endif -%}
 
 
338
  {%- endfor -%}
339
-
340
  {%- if add_generation_prompt -%}
341
- {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
342
- {{- '<|turn>model\n' -}}
343
- {%- endif -%}
344
- {%- endif -%}
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ ' -%}
 
 
 
 
 
 
 
 
7
  {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ ' -%}
 
 
 
 
 
 
11
  {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
 
 
 
 
 
 
 
 
17
  {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
 
 
 
 
 
 
 
 
 
 
 
 
20
  {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
 
 
 
40
  {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
  {%- endfor -%}
 
44
  {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
 
processor_config.json CHANGED
@@ -1,75 +1,28 @@
1
  {
2
- "audio_ms_per_token": 40,
3
- "audio_seq_length": 750,
4
- "feature_extractor": {
5
- "dither": 0.0,
6
- "feature_extractor_type": "Gemma4AudioFeatureExtractor",
7
- "feature_size": 128,
8
- "fft_length": 512,
9
- "fft_overdrive": false,
10
- "frame_length": 320,
11
- "hop_length": 160,
12
- "input_scale_factor": 1.0,
13
- "max_frequency": 8000.0,
14
- "mel_floor": 0.001,
15
- "min_frequency": 0.0,
16
- "padding_side": "left",
17
- "padding_value": 0.0,
18
- "per_bin_mean": null,
19
- "per_bin_stddev": null,
20
- "preemphasis": 0.0,
21
- "preemphasis_htk_flavor": true,
22
- "return_attention_mask": true,
23
- "sampling_rate": 16000
24
- },
25
  "image_processor": {
26
- "do_convert_rgb": true,
27
- "do_normalize": false,
28
- "do_rescale": true,
29
- "do_resize": true,
30
- "image_mean": [
31
- 0.0,
32
- 0.0,
33
- 0.0
34
- ],
35
- "image_processor_type": "Gemma4ImageProcessor",
36
- "image_seq_length": 280,
37
- "image_std": [
38
- 1.0,
39
- 1.0,
40
- 1.0
41
- ],
42
- "max_soft_tokens": 280,
43
- "patch_size": 16,
44
- "pooling_kernel_size": 3,
45
- "resample": 3,
46
- "rescale_factor": 0.00392156862745098
47
- },
48
- "image_seq_length": 280,
49
- "processor_class": "Gemma4Processor",
50
- "video_processor": {
51
- "do_convert_rgb": true,
52
  "do_normalize": true,
53
  "do_rescale": true,
54
  "do_resize": true,
55
- "do_sample_frames": true,
56
  "image_mean": [
57
- 0.0,
58
- 0.0,
59
- 0.0
60
  ],
 
 
61
  "image_std": [
62
- 1.0,
63
- 1.0,
64
- 1.0
65
  ],
66
- "max_soft_tokens": 70,
67
- "num_frames": 32,
68
- "patch_size": 16,
69
- "pooling_kernel_size": 3,
70
- "resample": 3,
71
  "rescale_factor": 0.00392156862745098,
72
- "return_metadata": false,
73
- "video_processor_type": "Gemma4VideoProcessor"
74
- }
 
 
 
 
75
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "image_processor": {
3
+ "do_convert_rgb": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "do_normalize": true,
5
  "do_rescale": true,
6
  "do_resize": true,
 
7
  "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
  ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
  "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
  ],
19
+ "resample": 2,
 
 
 
 
20
  "rescale_factor": 0.00392156862745098,
21
+ "size": {
22
+ "height": 896,
23
+ "width": 896
24
+ }
25
+ },
26
+ "image_seq_length": 256,
27
+ "processor_class": "Gemma3Processor"
28
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
- size 32169626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4708757955e49e5b23494815a523ffa5bdd0a7b67c09d16a093f6151245ec5b
3
+ size 33384665
tokenizer_config.json CHANGED
@@ -1,95 +1,24 @@
1
  {
2
- "audio_token": "<|audio|>",
3
  "backend": "tokenizers",
4
- "boa_token": "<|audio>",
5
- "boi_token": "<|image>",
6
  "bos_token": "<bos>",
7
- "eoa_token": "<audio|>",
8
- "eoc_token": "<channel|>",
9
- "eoi_token": "<image|>",
10
  "eos_token": "<eos>",
11
- "eot_token": "<turn|>",
12
- "escape_token": "<|\"|>",
13
- "etc_token": "<tool_call|>",
14
- "etd_token": "<tool|>",
15
- "etr_token": "<tool_response|>",
16
- "extra_special_tokens": [
17
- "<|video|>"
18
- ],
19
- "image_token": "<|image|>",
20
  "is_local": false,
21
  "mask_token": "<mask>",
22
  "model_max_length": 1000000000000000019884624838656,
23
  "model_specific_special_tokens": {
24
- "audio_token": "<|audio|>",
25
- "boa_token": "<|audio>",
26
- "boi_token": "<|image>",
27
- "eoa_token": "<audio|>",
28
- "eoc_token": "<channel|>",
29
- "eoi_token": "<image|>",
30
- "eot_token": "<turn|>",
31
- "escape_token": "<|\"|>",
32
- "etc_token": "<tool_call|>",
33
- "etd_token": "<tool|>",
34
- "etr_token": "<tool_response|>",
35
- "image_token": "<|image|>",
36
- "soc_token": "<|channel>",
37
- "sot_token": "<|turn>",
38
- "stc_token": "<|tool_call>",
39
- "std_token": "<|tool>",
40
- "str_token": "<|tool_response>",
41
- "think_token": "<|think|>"
42
  },
43
  "pad_token": "<pad>",
44
- "padding_side": "left",
45
- "processor_class": "Gemma4Processor",
46
- "response_schema": {
47
- "properties": {
48
- "content": {
49
- "type": "string"
50
- },
51
- "role": {
52
- "const": "assistant"
53
- },
54
- "thinking": {
55
- "type": "string"
56
- },
57
- "tool_calls": {
58
- "items": {
59
- "properties": {
60
- "function": {
61
- "properties": {
62
- "arguments": {
63
- "additionalProperties": {},
64
- "type": "object",
65
- "x-parser": "gemma4-tool-call"
66
- },
67
- "name": {
68
- "type": "string"
69
- }
70
- },
71
- "type": "object",
72
- "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
73
- },
74
- "type": {
75
- "const": "function"
76
- }
77
- },
78
- "type": "object"
79
- },
80
- "type": "array",
81
- "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
82
- }
83
- },
84
- "type": "object",
85
- "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
86
- },
87
- "soc_token": "<|channel>",
88
- "sot_token": "<|turn>",
89
- "stc_token": "<|tool_call>",
90
- "std_token": "<|tool>",
91
- "str_token": "<|tool_response>",
92
- "think_token": "<|think|>",
93
  "tokenizer_class": "GemmaTokenizer",
94
- "unk_token": "<unk>"
 
95
  }
 
1
  {
 
2
  "backend": "tokenizers",
3
+ "boi_token": "<start_of_image>",
 
4
  "bos_token": "<bos>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eoi_token": "<end_of_image>",
 
7
  "eos_token": "<eos>",
8
+ "image_token": "<image_soft_token>",
 
 
 
 
 
 
 
 
9
  "is_local": false,
10
  "mask_token": "<mask>",
11
  "model_max_length": 1000000000000000019884624838656,
12
  "model_specific_special_tokens": {
13
+ "boi_token": "<start_of_image>",
14
+ "eoi_token": "<end_of_image>",
15
+ "image_token": "<image_soft_token>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  },
17
  "pad_token": "<pad>",
18
+ "processor_class": "Gemma3Processor",
19
+ "sp_model_kwargs": null,
20
+ "spaces_between_special_tokens": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "tokenizer_class": "GemmaTokenizer",
22
+ "unk_token": "<unk>",
23
+ "use_default_system_prompt": false
24
  }