kj1 commited on
Commit
6ce4d4c
·
verified ·
1 Parent(s): d70da9b

Upload 14 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ quantization_config.json filter=lfs diff=lfs merge=lfs -text
Gemma4-NoThink.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "instruct": {
3
+ "input_sequence": "<|turn>user\n",
4
+ "output_sequence": "<|turn>model\n",
5
+ "last_output_sequence": "<|turn>model\n<|channel>thought\n<channel|>",
6
+ "system_sequence": "<|turn>system\n",
7
+ "stop_sequence": "",
8
+ "wrap": false,
9
+ "macro": true,
10
+ "activation_regex": "",
11
+ "first_output_sequence": "",
12
+ "skip_examples": true,
13
+ "output_suffix": "<turn|>\n",
14
+ "input_suffix": "<turn|>\n",
15
+ "system_suffix": "<turn|>\n",
16
+ "user_alignment_message": "",
17
+ "system_same_as_user": false,
18
+ "last_system_sequence": "",
19
+ "first_input_sequence": "",
20
+ "last_input_sequence": "",
21
+ "names_behavior": "none",
22
+ "sequences_as_stop_strings": true,
23
+ "story_string_prefix": "",
24
+ "story_string_suffix": "",
25
+ "name": "Gemma4-NoThink"
26
+ },
27
+ "context": {
28
+ "story_string": "<|turn>system\n{{#if anchorBefore}}{{anchorBefore}}\n{{/if}}{{#if system}}{{system}}\n{{/if}}{{#if wiBefore}}{{wiBefore}}\n{{/if}}{{#if description}}{{description}}\n{{/if}}{{#if personality}}{{char}}'s personality: {{personality}}\n{{/if}}{{#if scenario}}Scenario: {{scenario}}\n{{/if}}{{#if wiAfter}}{{wiAfter}}\n{{/if}}{{#if persona}}{{persona}}\n{{/if}}{{#if anchorAfter}}{{anchorAfter}}\n{{/if}}{{trim}}<turn|>\n",
29
+ "example_separator": "",
30
+ "chat_start": "",
31
+ "use_stop_strings": false,
32
+ "names_as_stop_strings": true,
33
+ "story_string_position": 0,
34
+ "story_string_depth": 1,
35
+ "story_string_role": 0,
36
+ "always_force_name2": false,
37
+ "trim_sentences": false,
38
+ "single_line": false,
39
+ "name": "Gemma4-NoThink"
40
+ },
41
+ "reasoning": {
42
+ "prefix": "<|channel>thought\n",
43
+ "suffix": "<channel|>",
44
+ "separator": "\n\n",
45
+ "name": "Gemma 4"
46
+ }
47
+ }
Gemma4-Think.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "instruct": {
3
+ "input_sequence": "<|turn>user\n",
4
+ "output_sequence": "<|turn>model\n",
5
+ "last_output_sequence": "<|turn>model\n",
6
+ "system_sequence": "<|turn>system\n",
7
+ "stop_sequence": "",
8
+ "wrap": false,
9
+ "macro": true,
10
+ "activation_regex": "",
11
+ "first_output_sequence": "",
12
+ "skip_examples": true,
13
+ "output_suffix": "<turn|>\n",
14
+ "input_suffix": "<turn|>\n",
15
+ "system_suffix": "<turn|>\n",
16
+ "user_alignment_message": "",
17
+ "system_same_as_user": false,
18
+ "last_system_sequence": "",
19
+ "first_input_sequence": "",
20
+ "last_input_sequence": "",
21
+ "names_behavior": "none",
22
+ "sequences_as_stop_strings": true,
23
+ "story_string_prefix": "",
24
+ "story_string_suffix": "",
25
+ "name": "Gemma4-Think"
26
+ },
27
+ "context": {
28
+ "story_string": "<|turn>system\n<|think|>\n{{#if anchorBefore}}{{anchorBefore}}\n{{/if}}{{#if system}}{{system}}\n{{/if}}{{#if wiBefore}}{{wiBefore}}\n{{/if}}{{#if description}}{{description}}\n{{/if}}{{#if personality}}{{char}}'s personality: {{personality}}\n{{/if}}{{#if scenario}}Scenario: {{scenario}}\n{{/if}}{{#if wiAfter}}{{wiAfter}}\n{{/if}}{{#if persona}}{{persona}}\n{{/if}}{{#if anchorAfter}}{{anchorAfter}}\n{{/if}}{{trim}}<turn|>\n",
29
+ "example_separator": "",
30
+ "chat_start": "",
31
+ "use_stop_strings": false,
32
+ "names_as_stop_strings": true,
33
+ "story_string_position": 0,
34
+ "story_string_depth": 1,
35
+ "story_string_role": 0,
36
+ "always_force_name2": false,
37
+ "trim_sentences": false,
38
+ "single_line": false,
39
+ "name": "Gemma4-Think"
40
+ },
41
+ "reasoning": {
42
+ "prefix": "<|channel>thought\n",
43
+ "suffix": "<channel|>",
44
+ "separator": "\n\n",
45
+ "name": "Gemma 4"
46
+ }
47
+ }
README.md CHANGED
@@ -1,16 +1,693 @@
1
  ---
2
  license: apache-2.0
 
 
 
 
 
3
  base_model:
4
- - zerofata/G4-MeroMero-26B-A4B
5
- tags:
6
- - exl3
7
- - exllamav3
8
- - roleplay
9
- - rp
10
- - gemma4
11
- - gemma
12
- - quantized
13
  ---
14
- Requires ExLlamaV3 0.0.29 and above to launch. 0.0.32+ uses less VRAM.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- Planned 3bpw, 4bpw and 6bpw with 8 head bits.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ datasets:
4
+ - zerofata/Instruct-Anime
5
+ - zerofata/Gemini-3.1-Pro-SmallWiki
6
+ - zerofata/Gemini-3.1-Pro-GLM5-Characters
7
+ - zerofata/Roleplay-Anime-Characters
8
  base_model:
9
+ - google/gemma-4-26B-A4B-it
 
 
 
 
 
 
 
 
10
  ---
11
+ <style>
12
+ .gs {
13
+ --bg: #0d0a10;
14
+ --surface: #14101a;
15
+ --edge: #2a1f38;
16
+ --rule: #382850;
17
+ --text: #b8a0cc;
18
+ --dim: #7a6090;
19
+ --bright: #f0e6ff;
20
+ --azure: #c060ff;
21
+ --crimson: #ff4da6;
22
+ --az-glow: rgba(192,96,255,0.10);
23
+ --cr-glow: rgba(255,77,166,0.06);
24
+ --mono: 'JetBrains Mono', monospace;
25
+ --sans: 'Inter', sans-serif;
26
 
27
+ font-family: var(--sans);
28
+ color: var(--text);
29
+ max-width: 900px;
30
+ margin: 0 auto;
31
+ padding: 0 0 60px;
32
+ line-height: 1.7;
33
+ font-size: 1rem;
34
+ background:
35
+ radial-gradient(ellipse at 50% 0%, rgba(192,96,255,0.04) 0%, transparent 50%),
36
+ radial-gradient(ellipse at 50% 100%, rgba(255,77,166,0.02) 0%, transparent 50%),
37
+ var(--bg);
38
+ }
39
+
40
+ /* ── Profile Card ── */
41
+ .gs-profile {
42
+ border-bottom: none;
43
+ position: relative;
44
+ background: var(--surface);
45
+ margin-bottom: 0;
46
+ }
47
+ .gs-profile-art {
48
+ position: relative;
49
+ }
50
+ .gs-profile-art img {
51
+ display: block;
52
+ width: 100%;
53
+ height: 380px;
54
+ object-fit: cover;
55
+ margin-top: 0px;
56
+ }
57
+ .gs-ident {
58
+ position: absolute;
59
+ bottom: 0;
60
+ left: 0;
61
+ right: 0;
62
+ padding: 120px 44px 28px;
63
+ background: linear-gradient(
64
+ to top,
65
+ var(--bg) 0%,
66
+ rgba(13,10,16,0.92) 30%,
67
+ rgba(13,10,16,0.4) 60%,
68
+ transparent 100%
69
+ );
70
+ }
71
+ .gs-profile-info {
72
+ padding: 20px 44px 36px;
73
+ display: flex;
74
+ flex-direction: column;
75
+ gap: 20px;
76
+ }
77
+ .gs-profile-label {
78
+ display: flex;
79
+ align-items: baseline;
80
+ gap: 10px;
81
+ font-family: var(--mono);
82
+ letter-spacing: 0.14em;
83
+ text-transform: uppercase;
84
+ }
85
+ .gs-profile-label .gs-snum {
86
+ font-size: 0.62rem;
87
+ font-weight: 700;
88
+ color: var(--crimson);
89
+ opacity: 1;
90
+ position: static;
91
+ transform: none;
92
+ }
93
+ .gs-profile-label .gs-stitle {
94
+ font-size: 0.62rem;
95
+ color: var(--dim);
96
+ font-weight: 700;
97
+ letter-spacing: 0.14em;
98
+ }
99
+ .gs-profile-label .gs-stitle::before {
100
+ content: none;
101
+ }
102
+ .gs-name {
103
+ font-family: var(--sans);
104
+ font-size: 3.2rem;
105
+ font-weight: 900;
106
+ color: var(--bright);
107
+ letter-spacing: 0.06em;
108
+ line-height: 1;
109
+ margin: 0 0 10px;
110
+ text-shadow: 0 1px 2px rgba(0,0,0,0.6);
111
+ overflow-wrap: break-word;
112
+ }
113
+ .gs-base {
114
+ font-family: var(--mono);
115
+ font-size: 0.68rem;
116
+ color: var(--crimson);
117
+ letter-spacing: 0.14em;
118
+ text-transform: uppercase;
119
+ display: block;
120
+ }
121
+ .gs-profile-bio p {
122
+ margin: 0 0 14px;
123
+ font-size: 0.95rem;
124
+ }
125
+ .gs-profile-bio p:last-child { margin-bottom: 0; }
126
+
127
+ /* ── Sections ── */
128
+ .gs-section {
129
+ padding: 0;
130
+ }
131
+ .gs-shead {
132
+ position: relative;
133
+ display: flex;
134
+ align-items: center;
135
+ gap: 14px;
136
+ padding: 16px 44px 14px;
137
+ margin-bottom: 28px;
138
+ border-top: 2px solid;
139
+ border-image: linear-gradient(90deg, var(--crimson), var(--azure)) 1;
140
+ }
141
+ .gs-snum {
142
+ font-family: var(--mono);
143
+ font-size: 2.2rem;
144
+ font-weight: 900;
145
+ color: var(--crimson);
146
+ letter-spacing: 0.06em;
147
+ opacity: 0.12;
148
+ position: absolute;
149
+ right: 44px;
150
+ top: 50%;
151
+ transform: translateY(-50%);
152
+ line-height: 1;
153
+ }
154
+ .gs-stitle {
155
+ font-size: 1.05rem;
156
+ font-weight: 700;
157
+ letter-spacing: 0.1em;
158
+ text-transform: uppercase;
159
+ color: var(--bright);
160
+ }
161
+ .gs-stitle::before {
162
+ content: '\2726';
163
+ color: var(--crimson);
164
+ font-size: 0.8em;
165
+ margin-right: 8px;
166
+ }
167
+ .gs-sbody {
168
+ padding: 0 44px 44px;
169
+ }
170
+ .gs-sbody p {
171
+ margin: 0 0 14px;
172
+ font-size: 0.95rem;
173
+ }
174
+ .gs-sbody p:last-child { margin-bottom: 0; }
175
+
176
+ /* ── Data panels ── */
177
+ .gs-stack {
178
+ display: grid;
179
+ grid-template-columns: 1fr 1fr;
180
+ gap: 16px;
181
+ }
182
+ .gs-stack .gs-panel:nth-child(3) {
183
+ grid-column: 1 / -1;
184
+ }
185
+ .gs-panel {
186
+ border: 1px solid var(--edge);
187
+ border-left: 3px solid var(--crimson);
188
+ position: relative;
189
+ background: var(--surface);
190
+ box-shadow: 0 0 20px rgba(192,96,255,0.03);
191
+ }
192
+ .gs-panel::before {
193
+ content: '';
194
+ position: absolute;
195
+ top: -1px;
196
+ right: -1px;
197
+ width: 10px;
198
+ height: 10px;
199
+ border-top: 1px solid var(--crimson);
200
+ border-right: 1px solid var(--crimson);
201
+ opacity: 0.5;
202
+ }
203
+ .gs-panel::after {
204
+ content: '';
205
+ position: absolute;
206
+ bottom: -1px;
207
+ right: -1px;
208
+ width: 10px;
209
+ height: 10px;
210
+ border-bottom: 1px solid var(--azure);
211
+ border-right: 1px solid var(--azure);
212
+ opacity: 0.4;
213
+ }
214
+ .gs-panel-head {
215
+ font-family: var(--mono);
216
+ font-size: 0.68rem;
217
+ font-weight: 700;
218
+ letter-spacing: 0.14em;
219
+ text-transform: uppercase;
220
+ color: var(--dim);
221
+ padding: 10px 16px;
222
+ border-bottom: 1px solid var(--edge);
223
+ }
224
+ .gs-panel-head::after {
225
+ content: ' \2726';
226
+ color: var(--crimson);
227
+ opacity: 0.5;
228
+ }
229
+ .gs-row {
230
+ display: grid;
231
+ grid-template-columns: 10ch 1fr;
232
+ align-items: baseline;
233
+ column-gap: 4px;
234
+ padding: 9px 16px;
235
+ border-bottom: 1px solid var(--edge);
236
+ font-size: 0.9rem;
237
+ }
238
+ .gs-row:last-child { border-bottom: none; }
239
+ .gs-key {
240
+ font-family: var(--mono);
241
+ font-size: 0.9rem;
242
+ color: var(--dim);
243
+ }
244
+ .gs-key::after {
245
+ content: ':';
246
+ }
247
+ .gs-val {
248
+ color: var(--bright);
249
+ font-size: 0.9rem;
250
+ }
251
+ .gs-row .gs-val:only-child {
252
+ grid-column: 1 / -1;
253
+ }
254
+
255
+ /* ── Quantizations (compact) ── */
256
+ .gs-section--compact .gs-shead {
257
+ border-top: 1px solid var(--edge);
258
+ border-image-source: none;
259
+ padding: 12px 44px 10px;
260
+ margin-bottom: 18px;
261
+ }
262
+ .gs-section--compact .gs-snum {
263
+ opacity: 0.08;
264
+ }
265
+ .gs-section--compact .gs-stitle::before {
266
+ content: '\2726';
267
+ }
268
+ .gs-section--compact .gs-sbody {
269
+ padding: 0 44px 32px;
270
+ }
271
+ .gs-qrow {
272
+ display: flex;
273
+ gap: 12px;
274
+ flex-wrap: wrap;
275
+ justify-content: center;
276
+ }
277
+ .gs-qpanel {
278
+ background: var(--surface);
279
+ border: 1px solid var(--edge);
280
+ border-left: 3px solid var(--crimson);
281
+ display: flex;
282
+ align-items: center;
283
+ gap: 16px;
284
+ padding: 12px 24px;
285
+ border-radius: 4px;
286
+ position: relative;
287
+ box-shadow: 0 0 20px rgba(192,96,255,0.03);
288
+ }
289
+ .gs-qpanel::before {
290
+ content: '';
291
+ position: absolute;
292
+ top: -1px;
293
+ right: -1px;
294
+ width: 10px;
295
+ height: 10px;
296
+ border-top: 1px solid var(--crimson);
297
+ border-right: 1px solid var(--crimson);
298
+ opacity: 0.5;
299
+ }
300
+ .gs-qpanel::after {
301
+ content: '';
302
+ position: absolute;
303
+ bottom: -1px;
304
+ right: -1px;
305
+ width: 10px;
306
+ height: 10px;
307
+ border-bottom: 1px solid var(--azure);
308
+ border-right: 1px solid var(--azure);
309
+ opacity: 0.4;
310
+ }
311
+ .gs-qtype {
312
+ font-family: var(--mono);
313
+ font-size: 0.58rem;
314
+ font-weight: 700;
315
+ letter-spacing: 0.18em;
316
+ text-transform: uppercase;
317
+ color: var(--crimson);
318
+ flex-shrink: 0;
319
+ }
320
+ .gs-qsep {
321
+ width: 1px;
322
+ height: 16px;
323
+ background: var(--rule);
324
+ flex-shrink: 0;
325
+ }
326
+ .gs-qpanel a {
327
+ color: var(--bright);
328
+ text-decoration: none;
329
+ font-size: 0.9rem;
330
+ border-bottom: 1px solid var(--rule);
331
+ }
332
+ .gs-qpanel a:hover { color: var(--crimson); border-bottom-color: var(--crimson); }
333
+
334
+ /* ── Journal (Creation Process) ── */
335
+ .gs-section--journal .gs-sbody {
336
+ margin: 0 44px;
337
+ padding: 24px 32px 32px;
338
+ background: var(--surface);
339
+ border: 1px solid var(--edge);
340
+ border-left: 4px solid var(--azure);
341
+ position: relative;
342
+ margin-bottom: 0;
343
+ }
344
+ .gs-section--journal .gs-sbody::before {
345
+ content: '';
346
+ position: absolute;
347
+ top: -1px;
348
+ right: -1px;
349
+ width: 12px;
350
+ height: 12px;
351
+ border-top: 1px solid var(--azure);
352
+ border-right: 1px solid var(--azure);
353
+ opacity: 0.4;
354
+ }
355
+ .gs-section--journal .gs-sbody::after {
356
+ content: '';
357
+ position: absolute;
358
+ bottom: -1px;
359
+ left: -1px;
360
+ width: 12px;
361
+ height: 12px;
362
+ border-bottom: 1px solid var(--crimson);
363
+ border-left: 1px solid var(--crimson);
364
+ opacity: 0.3;
365
+ }
366
+ .gs-section--journal .gs-sbody p:first-child {
367
+ font-style: italic;
368
+ color: var(--bright);
369
+ }
370
+
371
+ /* ── Links ── */
372
+ .gs a {
373
+ color: var(--bright);
374
+ text-decoration: none;
375
+ border-bottom: 1px solid var(--rule);
376
+ }
377
+ .gs a:hover { color: var(--crimson); border-bottom-color: var(--crimson); }
378
+
379
+ /* ── Dropdown ── */
380
+ .gs details {
381
+ border: 1px solid var(--edge);
382
+ border-left: 3px solid var(--crimson);
383
+ margin-top: 24px;
384
+ position: relative;
385
+ background: var(--surface);
386
+ box-shadow: 0 0 20px rgba(192,96,255,0.03);
387
+ }
388
+ .gs details::before {
389
+ content: '';
390
+ position: absolute;
391
+ top: -1px;
392
+ right: -1px;
393
+ width: 10px;
394
+ height: 10px;
395
+ border-top: 1px solid var(--crimson);
396
+ border-right: 1px solid var(--crimson);
397
+ opacity: 0.5;
398
+ }
399
+ .gs details::after {
400
+ content: '';
401
+ position: absolute;
402
+ bottom: -1px;
403
+ right: -1px;
404
+ width: 10px;
405
+ height: 10px;
406
+ border-bottom: 1px solid var(--azure);
407
+ border-right: 1px solid var(--azure);
408
+ opacity: 0.4;
409
+ }
410
+ .gs summary {
411
+ list-style: none;
412
+ padding: 11px 16px;
413
+ cursor: pointer;
414
+ font-family: var(--mono);
415
+ font-size: 0.72rem;
416
+ font-weight: 700;
417
+ letter-spacing: 0.12em;
418
+ text-transform: uppercase;
419
+ color: var(--dim);
420
+ user-select: none;
421
+ display: flex;
422
+ align-items: center;
423
+ gap: 10px;
424
+ }
425
+ .gs summary::-webkit-details-marker { display: none; }
426
+ .gs summary::before {
427
+ content: '+';
428
+ color: var(--crimson);
429
+ font-size: 1rem;
430
+ line-height: 1;
431
+ flex-shrink: 0;
432
+ }
433
+ .gs details[open] summary::before { content: '−'; }
434
+ .gs summary:hover { color: var(--bright); }
435
+ .gs-detail-body {
436
+ padding: 22px 18px;
437
+ border-top: 1px solid var(--edge);
438
+ }
439
+ .gs-detail-body p { margin: 0 0 16px; font-size: 0.9rem; }
440
+ .gs-cfg-title {
441
+ font-family: var(--mono);
442
+ font-size: 0.72rem;
443
+ font-weight: 700;
444
+ letter-spacing: 0.1em;
445
+ text-transform: uppercase;
446
+ color: var(--dim);
447
+ margin: 0 0 8px;
448
+ }
449
+
450
+ /* ── Code ── */
451
+ .gs pre {
452
+ background: #080510;
453
+ border: 1px solid var(--edge);
454
+ border-left: 2px solid var(--azure);
455
+ padding: 16px 18px;
456
+ overflow-x: auto;
457
+ font-family: var(--mono);
458
+ font-size: 0.76rem;
459
+ line-height: 1.6;
460
+ color: var(--text);
461
+ margin: 0 0 22px;
462
+ }
463
+ .gs pre:last-child { margin-bottom: 0; }
464
+ .gs pre code { background: none; color: inherit; padding: 0; }
465
+ .gs code {
466
+ font-family: var(--mono);
467
+ font-size: 0.875em;
468
+ color: var(--crimson);
469
+ background: var(--az-glow);
470
+ padding: 2px 5px;
471
+ }
472
+ </style>
473
+ <html lang="en">
474
+ <head>
475
+ <meta charset="UTF-8">
476
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
477
+ <title>Stardom</title>
478
+ <link rel="preconnect" href="https://fonts.googleapis.com">
479
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
480
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;900&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet">
481
+ </head>
482
+ <body>
483
+ <div class="gs">
484
+
485
+ <div class="gs-profile">
486
+ <div class="gs-profile-art">
487
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/xBv_weuMs5x3i4WFDRksn.png" alt="image">
488
+ <div class="gs-ident">
489
+ <h1 class="gs-name">Mero Mero</h1>
490
+ <span class="gs-base">Gemma4 26B A4B</span>
491
+ </div>
492
+ </div>
493
+ </div>
494
+
495
+ <div class="gs-section">
496
+ <div class="gs-shead">
497
+ <span class="gs-snum">01</span>
498
+ <span class="gs-stitle">Overview</span>
499
+ </div>
500
+ <div class="gs-sbody">
501
+ <p>God, this model was difficult to work with.</p>
502
+ <p>Google cooked, there wasn't a lot to improve but there was a lot to break.</p>
503
+ <p>This model is a finetune that was merged back into the original instruct. It feels a lot like the original instruct. However, reasoning is more structured, using less tokens during RP and this model generally has a slightly less verbose / flowery writing style.</p>
504
+ <p>Main weakness of this model I think is the swipe variety hasn't improved. Logic and repetition I think are roughly on par with the original.</p>
505
+ <p>Supports both thinking and non thinking.</p>
506
+ </div>
507
+ </div>
508
+
509
+
510
+ <div class="gs-section">
511
+ <div class="gs-shead">
512
+ <span class="gs-snum">02</span>
513
+ <span class="gs-stitle">SillyTavern Settings</span>
514
+ </div>
515
+ <div class="gs-sbody">
516
+ <div class="gs-stack">
517
+ <div class="gs-panel">
518
+ <div class="gs-panel-head">Suggested Roleplay Format</div>
519
+ <div class="gs-row"><span class="gs-key">Actions</span><span class="gs-val">In plaintext</span></div>
520
+ <div class="gs-row"><span class="gs-key">Dialogue</span><span class="gs-val">"In quotes"</span></div>
521
+ <div class="gs-row"><span class="gs-key">Thoughts</span><span class="gs-val">*In asterisks*</span></div>
522
+ </div>
523
+ <div class="gs-panel">
524
+ <div class="gs-panel-head">Recommended Samplers</div>
525
+ <div class="gs-row"><span class="gs-key">Temp</span><span class="gs-val">0.8 - 1.0</span></div>
526
+ <div class="gs-row"><span class="gs-key">MinP</span><span class="gs-val">0.05</span></div>
527
+ <div class="gs-row"></span><span class="gs-val"></span></div>
528
+ </div>
529
+ <div class="gs-panel">
530
+ <div class="gs-panel-head">Instruct</div>
531
+ <div class="gs-row"><span class="gs-val"><a href="https://huggingface.co/zerofata/G4-MeroMero-26B-A4B/raw/main/Gemma4-Think.json">Gemma 4 - Think</a></span></div>
532
+ <div class="gs-row"><span class="gs-val"><a href="https://huggingface.co/zerofata/G4-MeroMero-26B-A4B/raw/main/Gemma4-NoThink.json">Gemma 4 - NoThink</a></span></div>
533
+ </div>
534
+ </div>
535
+ </div>
536
+ </div>
537
+
538
+
539
+ <div class="gs-section gs-section--compact">
540
+ <div class="gs-shead">
541
+ <span class="gs-snum">03</span>
542
+ <span class="gs-stitle">Quantizations</span>
543
+ </div>
544
+ <div class="gs-sbody">
545
+ <div class="gs-qrow">
546
+ <div class="gs-qpanel">
547
+ <span class="gs-qtype">GGUF</span>
548
+ <div class="gs-qsep"></div>
549
+ <a href="https://huggingface.co/zerofata/G4-MeroMero-26B-A4B-GGUF">iMatrix</a>
550
+ </div>
551
+ </div>
552
+ </div>
553
+ </div>
554
+
555
+
556
+ <div class="gs-section gs-section--journal">
557
+ <div class="gs-shead">
558
+ <span class="gs-snum">04</span>
559
+ <span class="gs-stitle">Creation Process</span>
560
+ </div>
561
+ <div class="gs-sbody">
562
+ <p>Creation Process: SFT > Merge</p>
563
+ <p>SFT on approx 35 million tokens.</p>
564
+ <p>Despite using 35 million tokens, this dataset is fairly modest in size. Trainable is somewhere in the rough ballpark of 15 million. The extra tokens are from a new multi turn RP dataset that I train last turn only.</p>
565
+ <p>Feels like Google left the instruct model at the razor's edge of overfitting. Finetune it at all and it feels like it'll rapidly lose intelligence, despite taking the writing style nicely. Hard to tell if you're overfitting or underfitting.</p>
566
+ <p>My solution was to blast the model with my data anyway to ensure it picked up the new reasoning format and writing style and then merge that back into the instruct to heal the logic damage. There's still room for a better merge that keeps more of the writing style and potentially using the base model to undo some of the overfitting.</p>
567
+ <p>Trained using Axolotl.</p>
568
+ <details>
569
+ <summary>Mergekit Config</summary>
570
+ <div class="gs-detail-body">
571
+ <pre><code>models:
572
+ &#45; model: google/gemma&#45;4&#45;26B&#45;A4B&#45;it
573
+ parameters:
574
+ weight: 0.5
575
+ &#45; model: ApocalypseParty/G4&#45;26B&#45;SFT&#45;6
576
+ parameters:
577
+ weight: 0.5
578
+ merge_method: linear
579
+ dtype: bfloat16</code></pre>
580
+ </div>
581
+ </details>
582
+ <details>
583
+ <summary>Axolotl Config</summary>
584
+ <div class="gs-detail-body">
585
+ <pre><code>&#35; Gemma 4 26B&#45;A4B MoE QLoRA with ScatterMoE kernels
586
+ &#35;
587
+ &#35; Validated: 50 steps on FineTome&#45;100k, loss 8.8 &#45;> 1.8, single RTX 5090 (32GB)
588
+ &#35; torch_compile=true: 21 GiB peak VRAM, ~230 tok/s, 336s total
589
+ &#35;
590
+ &#35; Key notes:
591
+ &#35; &#45; Max sequence length on 32GB GPU: 2048 (micro_batch_size=1, SDP attention).
592
+ &#35; 4096 seq_len OOMs due to head_dim=512 math SDP materializing full score matrix.
593
+ &#35; Use 48GB+ GPUs for longer sequences or multi&#45;GPU with FSDP.
594
+ &#32;
595
+ base_model: google/gemma&#45;4&#45;26B&#45;A4B&#45;it
596
+ &#32;
597
+ plugins:
598
+ &#45; axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
599
+ &#45; axolotl.integrations.kernels.KernelsPlugin
600
+ &#45; axolotl.integrations.liger.LigerPlugin
601
+ use_kernels: true
602
+ use_scattermoe: true
603
+ cut_cross_entropy: true
604
+ experts_implementation: scattermoe
605
+ liger_layer_norm: true
606
+ liger_rope: true
607
+ liger_rms_norm: true
608
+ liger_glu_activation: true
609
+ liger_rms_norm_gated: true
610
+ strict: false
611
+ &#32;
612
+ datasets:
613
+ &#45; path: ./data/gemma_4_sft_5_masked_20260415_082234.jsonl
614
+ val_set_size: 0.02
615
+ output_dir: ./G4&#45;26B&#45;SFT&#45;6
616
+ &#32;
617
+ sequence_len: 10756
618
+ pad_to_sequence_len: true
619
+ sample_packing: true
620
+ &#32;
621
+ load_in_4bit: false
622
+ &#35;quantize_moe_experts: true
623
+ adapter: lora
624
+ lora_r: 128
625
+ lora_alpha: 128
626
+ peft_use_rslora: true
627
+ lora_dropout: 0.0
628
+ freeze_mm_modules: true
629
+ &#32;
630
+ &#35; Restrict LoRA to text backbone only (skip vision/audio encoders)
631
+ &#35; using regex to match only the text decoder attention projections.
632
+ lora_target_modules: 'model.language_model.layers.[\d]+.(_checkpoint_wrapped_module.)?(mlp|self_attn).(up|down|gate|q|k|v|o)_proj'
633
+ &#32;
634
+ &#35; MoE expert LoRA (3D Parameter tensors, not nn.Linear)
635
+ lora_target_parameters:
636
+ &#45; experts.gate_up_proj
637
+ &#45; experts.down_proj
638
+ &#32;
639
+ lora_mlp_kernel: false
640
+ lora_qkv_kernel: false
641
+ lora_o_kernel: false
642
+ &#32;
643
+ &#35;bnb_config_kwargs:
644
+ &#35; bnb_4bit_use_double_quant: true
645
+ &#32;
646
+ wandb_project: G4&#45;26B&#45;SFT
647
+ wandb_name: G4&#45;26B&#45;SFT&#45;6
648
+ &#32;
649
+ gradient_accumulation_steps: 2
650
+ micro_batch_size: 2
651
+ num_epochs: 2
652
+ optimizer: adamw_torch_fused
653
+ lr_scheduler: constant_with_warmup
654
+ learning_rate: 1e&#45;5
655
+ max_grad_norm: 1.0
656
+ &#32;
657
+ bf16: auto
658
+ tf32: true
659
+ &#32;
660
+ &#35;gradient_checkpointing: true
661
+ &#35;activation_offloading: true
662
+ logging_steps: 1
663
+ &#32;
664
+ &#35; FA2 not supported
665
+ sdp_attention: true
666
+ &#35;flex_attention: true
667
+ &#35;torch_compile: true
668
+ flash_attention: false
669
+ &#32;
670
+ warmup_ratio: 0.1
671
+ evals_per_epoch: 4
672
+ saves_per_epoch: 4
673
+ weight_decay: 0.01
674
+ special_tokens:
675
+ &#32;
676
+ fsdp_config:
677
+ fsdp_version: 2
678
+ offload_params: false
679
+ cpu_ram_efficient_loading: false
680
+ auto_wrap_policy: TRANSFORMER_BASED_WRAP
681
+ transformer_layer_cls_to_wrap: Gemma4TextDecoderLayer
682
+ state_dict_type: FULL_STATE_DICT
683
+ sharding_strategy: FULL_SHARD
684
+ reshard_after_forward: true
685
+ activation_checkpointing: true</code></pre>
686
+ </div>
687
+ </details>
688
+ </div>
689
+ </div>
690
+
691
+ </div>
692
+ </body>
693
+ </html>
chat_template.jinja ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- set add_comma = false -%}
6
+ {%- if key not in standard_keys -%}
7
+ {%- if ns.found_first %},{% endif -%}
8
+ {%- set ns.found_first = true -%}
9
+ {{ key }}:{
10
+ {%- if value['description'] -%}
11
+ description:<|"|>{{ value['description'] }}<|"|>
12
+ {%- set add_comma = true -%}
13
+ {%- endif -%}
14
+ {%- if value['type'] | upper == 'STRING' -%}
15
+ {%- if value['enum'] -%}
16
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
17
+ enum:{{ format_argument(value['enum']) }}
18
+ {%- endif -%}
19
+ {%- elif value['type'] | upper == 'ARRAY' -%}
20
+ {%- if value['items'] is mapping and value['items'] -%}
21
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
22
+ items:{
23
+ {%- set ns_items = namespace(found_first=false) -%}
24
+ {%- for item_key, item_value in value['items'] | dictsort -%}
25
+ {%- if item_value is not none -%}
26
+ {%- if ns_items.found_first %},{% endif -%}
27
+ {%- set ns_items.found_first = true -%}
28
+ {%- if item_key == 'properties' -%}
29
+ properties:{
30
+ {%- if item_value is mapping -%}
31
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
32
+ {%- endif -%}
33
+ }
34
+ {%- elif item_key == 'required' -%}
35
+ required:[
36
+ {%- for req_item in item_value -%}
37
+ <|"|>{{- req_item -}}<|"|>
38
+ {%- if not loop.last %},{% endif -%}
39
+ {%- endfor -%}
40
+ ]
41
+ {%- elif item_key == 'type' -%}
42
+ {%- if item_value is string -%}
43
+ type:{{ format_argument(item_value | upper) }}
44
+ {%- else -%}
45
+ type:{{ format_argument(item_value | map('upper') | list) }}
46
+ {%- endif -%}
47
+ {%- else -%}
48
+ {{ item_key }}:{{ format_argument(item_value) }}
49
+ {%- endif -%}
50
+ {%- endif -%}
51
+ {%- endfor -%}
52
+ }
53
+ {%- endif -%}
54
+ {%- endif -%}
55
+ {%- if value['nullable'] %}
56
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
57
+ nullable:true
58
+ {%- endif -%}
59
+ {%- if value['type'] | upper == 'OBJECT' -%}
60
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
61
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
62
+ properties:{
63
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
64
+ }
65
+ {%- elif value is mapping -%}
66
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
67
+ properties:{
68
+ {{- format_parameters(value, value['required'] | default([])) -}}
69
+ }
70
+ {%- endif -%}
71
+ {%- if value['required'] -%}
72
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
73
+ required:[
74
+ {%- for item in value['required'] | default([]) -%}
75
+ <|"|>{{- item -}}<|"|>
76
+ {%- if not loop.last %},{% endif -%}
77
+ {%- endfor -%}
78
+ ]
79
+ {%- endif -%}
80
+ {%- endif -%}
81
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
82
+ type:<|"|>{{ value['type'] | upper }}<|"|>}
83
+ {%- endif -%}
84
+ {%- endfor -%}
85
+ {%- endmacro -%}
86
+ {%- macro format_function_declaration(tool_data) -%}
87
+ declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
88
+ {%- set params = tool_data['function']['parameters'] -%}
89
+ {%- if params -%}
90
+ ,parameters:{
91
+ {%- if params['properties'] -%}
92
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
93
+ {%- endif -%}
94
+ {%- if params['required'] -%}
95
+ required:[
96
+ {%- for item in params['required'] -%}
97
+ <|"|>{{- item -}}<|"|>
98
+ {{- ',' if not loop.last -}}
99
+ {%- endfor -%}
100
+ ],
101
+ {%- endif -%}
102
+ {%- if params['type'] -%}
103
+ type:<|"|>{{- params['type'] | upper -}}<|"|>}
104
+ {%- endif -%}
105
+ {%- endif -%}
106
+ {%- if 'response' in tool_data['function'] -%}
107
+ {%- set response_declaration = tool_data['function']['response'] -%}
108
+ ,response:{
109
+ {%- if response_declaration['description'] -%}
110
+ description:<|"|>{{- response_declaration['description'] -}}<|"|>,
111
+ {%- endif -%}
112
+ {%- if response_declaration['type'] | upper == 'OBJECT' -%}
113
+ type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
114
+ {%- endif -%}
115
+ {%- endif -%}
116
+ }
117
+ {%- endmacro -%}
118
+ {%- macro format_argument(argument, escape_keys=True) -%}
119
+ {%- if argument is string -%}
120
+ {{- '<|"|>' + argument + '<|"|>' -}}
121
+ {%- elif argument is boolean -%}
122
+ {{- 'true' if argument else 'false' -}}
123
+ {%- elif argument is mapping -%}
124
+ {{- '{' -}}
125
+ {%- set ns = namespace(found_first=false) -%}
126
+ {%- for key, value in argument | dictsort -%}
127
+ {%- if ns.found_first %},{% endif -%}
128
+ {%- set ns.found_first = true -%}
129
+ {%- if escape_keys -%}
130
+ {{- '<|"|>' + key + '<|"|>' -}}
131
+ {%- else -%}
132
+ {{- key -}}
133
+ {%- endif -%}
134
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
135
+ {%- endfor -%}
136
+ {{- '}' -}}
137
+ {%- elif argument is sequence -%}
138
+ {{- '[' -}}
139
+ {%- for item in argument -%}
140
+ {{- format_argument(item, escape_keys=escape_keys) -}}
141
+ {%- if not loop.last %},{% endif -%}
142
+ {%- endfor -%}
143
+ {{- ']' -}}
144
+ {%- else -%}
145
+ {{- argument -}}
146
+ {%- endif -%}
147
+ {%- endmacro -%}
148
+ {%- macro strip_thinking(text) -%}
149
+ {%- set ns = namespace(result='') -%}
150
+ {%- for part in text.split('<channel|>') -%}
151
+ {%- if '<|channel>' in part -%}
152
+ {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
153
+ {%- else -%}
154
+ {%- set ns.result = ns.result + part -%}
155
+ {%- endif -%}
156
+ {%- endfor -%}
157
+ {{- ns.result | trim -}}
158
+ {%- endmacro -%}
159
+
160
+ {%- macro format_tool_response_block(tool_name, response) -%}
161
+ {{- '<|tool_response>' -}}
162
+ {%- if response is mapping -%}
163
+ {{- 'response:' + tool_name + '{' -}}
164
+ {%- for key, value in response | dictsort -%}
165
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
166
+ {%- if not loop.last %},{% endif -%}
167
+ {%- endfor -%}
168
+ {{- '}' -}}
169
+ {%- else -%}
170
+ {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
171
+ {%- endif -%}
172
+ {{- '<tool_response|>' -}}
173
+ {%- endmacro -%}
174
+
175
+ {%- set ns = namespace(prev_message_type=None) -%}
176
+ {%- set loop_messages = messages -%}
177
+ {{- bos_token -}}
178
+ {#- Handle System/Tool Definitions Block -#}
179
+ {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
+ {{- '<|turn>system\n' -}}
181
+
182
+ {#- Inject Thinking token at the very top of the FIRST system turn -#}
183
+ {%- if enable_thinking is defined and enable_thinking -%}
184
+ {{- '<|think|>\n' -}}
185
+ {%- set ns.prev_message_type = 'think' -%}
186
+ {%- endif -%}
187
+
188
+ {%- if messages[0]['role'] in ['system', 'developer'] -%}
189
+ {{- messages[0]['content'] | trim -}}
190
+ {%- set loop_messages = messages[1:] -%}
191
+ {%- endif -%}
192
+
193
+ {%- if tools -%}
194
+ {%- for tool in tools %}
195
+ {{- '<|tool>' -}}
196
+ {{- format_function_declaration(tool) | trim -}}
197
+ {{- '<tool|>' -}}
198
+ {%- endfor %}
199
+ {%- set ns.prev_message_type = 'tool' -%}
200
+ {%- endif -%}
201
+
202
+ {{- '<turn|>\n' -}}
203
+ {%- endif %}
204
+
205
+ {#- Pre-scan: find last user message index for reasoning guard -#}
206
+ {%- set ns_turn = namespace(last_user_idx=-1) -%}
207
+ {%- for i in range(loop_messages | length) -%}
208
+ {%- if loop_messages[i]['role'] == 'user' -%}
209
+ {%- set ns_turn.last_user_idx = i -%}
210
+ {%- endif -%}
211
+ {%- endfor -%}
212
+
213
+ {#- Loop through messages -#}
214
+ {%- for message in loop_messages -%}
215
+ {%- if message['role'] != 'tool' -%}
216
+ {%- set ns.prev_message_type = None -%}
217
+ {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
218
+ {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
219
+ {%- set prev_nt = namespace(role=None, found=false) -%}
220
+ {%- if loop.index0 > 0 -%}
221
+ {%- for j in range(loop.index0 - 1, -1, -1) -%}
222
+ {%- if not prev_nt.found -%}
223
+ {%- if loop_messages[j]['role'] != 'tool' -%}
224
+ {%- set prev_nt.role = loop_messages[j]['role'] -%}
225
+ {%- set prev_nt.found = true -%}
226
+ {%- endif -%}
227
+ {%- endif -%}
228
+ {%- endfor -%}
229
+ {%- endif -%}
230
+ {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
231
+ {%- if not continue_same_model_turn -%}
232
+ {{- '<|turn>' + role + '\n' }}
233
+ {%- endif -%}
234
+
235
+ {#- Render reasoning/reasoning_content as thinking channel -#}
236
+ {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
237
+ {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
238
+ {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
239
+ {%- endif -%}
240
+
241
+ {%- if message['tool_calls'] -%}
242
+ {%- for tool_call in message['tool_calls'] -%}
243
+ {%- set function = tool_call['function'] -%}
244
+ {{- '<|tool_call>call:' + function['name'] + '{' -}}
245
+ {%- if function['arguments'] is mapping -%}
246
+ {%- set ns_args = namespace(found_first=false) -%}
247
+ {%- for key, value in function['arguments'] | dictsort -%}
248
+ {%- if ns_args.found_first %},{% endif -%}
249
+ {%- set ns_args.found_first = true -%}
250
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
251
+ {%- endfor -%}
252
+ {%- elif function['arguments'] is string -%}
253
+ {{- function['arguments'] -}}
254
+ {%- endif -%}
255
+ {{- '}<tool_call|>' -}}
256
+ {%- endfor -%}
257
+ {%- set ns.prev_message_type = 'tool_call' -%}
258
+ {%- endif -%}
259
+
260
+ {%- set ns_tr_out = namespace(flag=false) -%}
261
+ {%- if message.get('tool_responses') -%}
262
+ {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
263
+ {%- for tool_response in message['tool_responses'] -%}
264
+ {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
265
+ {%- set ns_tr_out.flag = true -%}
266
+ {%- set ns.prev_message_type = 'tool_response' -%}
267
+ {%- endfor -%}
268
+ {%- elif message.get('tool_calls') -%}
269
+ {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
270
+ {%- set ns_tool_scan = namespace(stopped=false) -%}
271
+ {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
272
+ {%- if ns_tool_scan.stopped -%}
273
+ {%- elif loop_messages[k]['role'] != 'tool' -%}
274
+ {%- set ns_tool_scan.stopped = true -%}
275
+ {%- else -%}
276
+ {%- set follow = loop_messages[k] -%}
277
+ {#- Resolve tool_call_id to function name -#}
278
+ {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
279
+ {%- for tc in message['tool_calls'] -%}
280
+ {%- if tc.get('id') == follow.get('tool_call_id') -%}
281
+ {%- set ns_tname.name = tc['function']['name'] -%}
282
+ {%- endif -%}
283
+ {%- endfor -%}
284
+ {#- Handle content as string or content-parts array -#}
285
+ {%- set tool_body = follow.get('content') -%}
286
+ {%- if tool_body is string -%}
287
+ {{- format_tool_response_block(ns_tname.name, tool_body) -}}
288
+ {%- elif tool_body is sequence and tool_body is not string -%}
289
+ {%- set ns_txt = namespace(s='') -%}
290
+ {%- for part in tool_body -%}
291
+ {%- if part.get('type') == 'text' -%}
292
+ {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
293
+ {%- endif -%}
294
+ {%- endfor -%}
295
+ {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
296
+ {%- else -%}
297
+ {{- format_tool_response_block(ns_tname.name, tool_body) -}}
298
+ {%- endif -%}
299
+ {%- set ns_tr_out.flag = true -%}
300
+ {%- set ns.prev_message_type = 'tool_response' -%}
301
+ {%- endif -%}
302
+ {%- endfor -%}
303
+ {%- endif -%}
304
+
305
+ {%- if message['content'] is string -%}
306
+ {%- if role == 'model' -%}
307
+ {{- strip_thinking(message['content']) -}}
308
+ {%- else -%}
309
+ {{- message['content'] | trim -}}
310
+ {%- endif -%}
311
+ {%- elif message['content'] is sequence -%}
312
+ {%- for item in message['content'] -%}
313
+ {%- if item['type'] == 'text' -%}
314
+ {%- if role == 'model' -%}
315
+ {{- strip_thinking(item['text']) -}}
316
+ {%- else -%}
317
+ {{- item['text'] | trim -}}
318
+ {%- endif -%}
319
+ {%- elif item['type'] == 'image' -%}
320
+ {{- '<|image|>' -}}
321
+ {%- set ns.prev_message_type = 'image' -%}
322
+ {%- elif item['type'] == 'audio' -%}
323
+ {{- '<|audio|>' -}}
324
+ {%- set ns.prev_message_type = 'audio' -%}
325
+ {%- elif item['type'] == 'video' -%}
326
+ {{- '<|video|>' -}}
327
+ {%- set ns.prev_message_type = 'video' -%}
328
+ {%- endif -%}
329
+ {%- endfor -%}
330
+ {%- endif -%}
331
+
332
+ {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
333
+ {{- '<|tool_response>' -}}
334
+ {%- elif not (ns_tr_out.flag and not message.get('content')) -%}
335
+ {{- '<turn|>\n' -}}
336
+ {%- endif -%}
337
+ {%- endif -%}
338
+ {%- endfor -%}
339
+
340
+ {%- if add_generation_prompt -%}
341
+ {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
342
+ {{- '<|turn>model\n' -}}
343
+ {%- if not enable_thinking | default(false) -%}
344
+ {{- '<|channel>thought\n<channel|>' -}}
345
+ {%- endif -%}
346
+ {%- endif -%}
347
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": null,
6
+ "audio_token_id": 258881,
7
+ "boa_token_id": 256000,
8
+ "boi_token_id": 255999,
9
+ "dtype": "bfloat16",
10
+ "eoa_token_id": 258883,
11
+ "eoa_token_index": 258883,
12
+ "eoi_token_id": 258882,
13
+ "eos_token_id": [
14
+ 1,
15
+ 106
16
+ ],
17
+ "image_token_id": 258880,
18
+ "initializer_range": 0.02,
19
+ "model_type": "gemma4",
20
+ "text_config": {
21
+ "attention_bias": false,
22
+ "attention_dropout": 0.0,
23
+ "attention_k_eq_v": true,
24
+ "bos_token_id": 2,
25
+ "dtype": "bfloat16",
26
+ "enable_moe_block": true,
27
+ "eos_token_id": 1,
28
+ "final_logit_softcapping": 30.0,
29
+ "global_head_dim": 512,
30
+ "head_dim": 256,
31
+ "hidden_activation": "gelu_pytorch_tanh",
32
+ "hidden_size": 2816,
33
+ "hidden_size_per_layer_input": 0,
34
+ "initializer_range": 0.02,
35
+ "intermediate_size": 2112,
36
+ "layer_types": [
37
+ "sliding_attention",
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "full_attention",
43
+ "sliding_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "sliding_attention",
47
+ "sliding_attention",
48
+ "full_attention",
49
+ "sliding_attention",
50
+ "sliding_attention",
51
+ "sliding_attention",
52
+ "sliding_attention",
53
+ "sliding_attention",
54
+ "full_attention",
55
+ "sliding_attention",
56
+ "sliding_attention",
57
+ "sliding_attention",
58
+ "sliding_attention",
59
+ "sliding_attention",
60
+ "full_attention",
61
+ "sliding_attention",
62
+ "sliding_attention",
63
+ "sliding_attention",
64
+ "sliding_attention",
65
+ "sliding_attention",
66
+ "full_attention"
67
+ ],
68
+ "max_position_embeddings": 262144,
69
+ "model_type": "gemma4_text",
70
+ "moe_intermediate_size": 704,
71
+ "num_attention_heads": 16,
72
+ "num_experts": 128,
73
+ "num_global_key_value_heads": 2,
74
+ "num_hidden_layers": 30,
75
+ "num_key_value_heads": 8,
76
+ "num_kv_shared_layers": 0,
77
+ "pad_token_id": 0,
78
+ "rms_norm_eps": 1e-06,
79
+ "rope_parameters": {
80
+ "full_attention": {
81
+ "partial_rotary_factor": 0.25,
82
+ "rope_theta": 1000000.0,
83
+ "rope_type": "proportional"
84
+ },
85
+ "sliding_attention": {
86
+ "rope_theta": 10000.0,
87
+ "rope_type": "default"
88
+ }
89
+ },
90
+ "sliding_window": 1024,
91
+ "tie_word_embeddings": true,
92
+ "top_k_experts": 8,
93
+ "use_bidirectional_attention": "vision",
94
+ "use_cache": true,
95
+ "use_double_wide_mlp": false,
96
+ "vocab_size": 262144,
97
+ "vocab_size_per_layer_input": 262144
98
+ },
99
+ "tie_word_embeddings": true,
100
+ "transformers_version": "5.5.4",
101
+ "video_token_id": 258884,
102
+ "vision_config": {
103
+ "_name_or_path": "",
104
+ "architectures": null,
105
+ "attention_bias": false,
106
+ "attention_dropout": 0.0,
107
+ "chunk_size_feed_forward": 0,
108
+ "default_output_length": 280,
109
+ "dtype": "bfloat16",
110
+ "global_head_dim": 72,
111
+ "head_dim": 72,
112
+ "hidden_activation": "gelu_pytorch_tanh",
113
+ "hidden_size": 1152,
114
+ "id2label": {
115
+ "0": "LABEL_0",
116
+ "1": "LABEL_1"
117
+ },
118
+ "initializer_range": 0.02,
119
+ "intermediate_size": 4304,
120
+ "is_encoder_decoder": false,
121
+ "label2id": {
122
+ "LABEL_0": 0,
123
+ "LABEL_1": 1
124
+ },
125
+ "max_position_embeddings": 131072,
126
+ "model_type": "gemma4_vision",
127
+ "num_attention_heads": 16,
128
+ "num_hidden_layers": 27,
129
+ "num_key_value_heads": 16,
130
+ "output_attentions": false,
131
+ "output_hidden_states": false,
132
+ "patch_size": 16,
133
+ "pooling_kernel_size": 3,
134
+ "position_embedding_size": 10240,
135
+ "problem_type": null,
136
+ "return_dict": true,
137
+ "rms_norm_eps": 1e-06,
138
+ "rope_parameters": {
139
+ "rope_theta": 100.0,
140
+ "rope_type": "default"
141
+ },
142
+ "standardize": true,
143
+ "use_clipped_linears": false
144
+ },
145
+ "vision_soft_tokens_per_image": 280,
146
+ "quantization_config": {
147
+ "quant_method": "exl3",
148
+ "version": "0.0.32",
149
+ "bits": 3.1,
150
+ "head_bits": 8,
151
+ "calibration": {
152
+ "rows": 250,
153
+ "cols": 2048
154
+ },
155
+ "out_scales": "always",
156
+ "codebook": "mcg"
157
+ }
158
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106,
7
+ 50
8
+ ],
9
+ "pad_token_id": 0,
10
+ "temperature": 1.0,
11
+ "top_k": 64,
12
+ "top_p": 0.95,
13
+ "transformers_version": "5.5.0.dev0"
14
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6edeab0e725c65980351b97c5ebbde54dd75f9b9475cf033d1e200cc1961e07d
3
+ size 8424353198
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36deccfd4494601d6583d72ee3defa877839a83a66d6040192231389e53d41e1
3
+ size 5362857059
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_ms_per_token": 40,
3
+ "audio_seq_length": 750,
4
+ "feature_extractor": {
5
+ "dither": 0.0,
6
+ "feature_extractor_type": "Gemma4AudioFeatureExtractor",
7
+ "feature_size": 128,
8
+ "fft_length": 512,
9
+ "fft_overdrive": false,
10
+ "frame_length": 320,
11
+ "hop_length": 160,
12
+ "input_scale_factor": 1.0,
13
+ "max_frequency": 8000.0,
14
+ "mel_floor": 0.001,
15
+ "min_frequency": 0.0,
16
+ "padding_side": "right",
17
+ "padding_value": 0.0,
18
+ "per_bin_mean": null,
19
+ "per_bin_stddev": null,
20
+ "preemphasis": 0.0,
21
+ "preemphasis_htk_flavor": true,
22
+ "return_attention_mask": true,
23
+ "sampling_rate": 16000
24
+ },
25
+ "image_processor": {
26
+ "do_convert_rgb": true,
27
+ "do_normalize": false,
28
+ "do_rescale": true,
29
+ "do_resize": true,
30
+ "image_mean": [
31
+ 0.0,
32
+ 0.0,
33
+ 0.0
34
+ ],
35
+ "image_processor_type": "Gemma4ImageProcessor",
36
+ "image_seq_length": 280,
37
+ "image_std": [
38
+ 1.0,
39
+ 1.0,
40
+ 1.0
41
+ ],
42
+ "max_soft_tokens": 280,
43
+ "patch_size": 16,
44
+ "pooling_kernel_size": 3,
45
+ "resample": 3,
46
+ "rescale_factor": 0.00392156862745098
47
+ },
48
+ "image_seq_length": 280,
49
+ "processor_class": "Gemma4Processor",
50
+ "video_processor": {
51
+ "do_convert_rgb": true,
52
+ "do_normalize": true,
53
+ "do_rescale": true,
54
+ "do_resize": true,
55
+ "do_sample_frames": true,
56
+ "image_mean": [
57
+ 0.0,
58
+ 0.0,
59
+ 0.0
60
+ ],
61
+ "image_std": [
62
+ 1.0,
63
+ 1.0,
64
+ 1.0
65
+ ],
66
+ "max_soft_tokens": 70,
67
+ "num_frames": 32,
68
+ "patch_size": 16,
69
+ "pooling_kernel_size": 3,
70
+ "resample": 3,
71
+ "rescale_factor": 0.00392156862745098,
72
+ "return_metadata": false,
73
+ "video_processor_type": "Gemma4VideoProcessor"
74
+ }
75
+ }
quantization_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9908c9371642d21371228cdb434f31522f70e624f984e7e4b942d14aac72b882
3
+ size 15500628
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
tokenizer_config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
+ "image_token": "<|image|>",
20
+ "mask_token": "<mask>",
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": "<pad>",
23
+ "padding_side": "left",
24
+ "processor_class": "Gemma4Processor",
25
+ "response_schema": {
26
+ "type": "object",
27
+ "properties": {
28
+ "role": {
29
+ "const": "assistant"
30
+ },
31
+ "thinking": {
32
+ "type": "string"
33
+ },
34
+ "content": {
35
+ "type": "string"
36
+ },
37
+ "tool_calls": {
38
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
+ "type": "array",
40
+ "items": {
41
+ "type": "object",
42
+ "properties": {
43
+ "type": {
44
+ "const": "function"
45
+ },
46
+ "function": {
47
+ "type": "object",
48
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
+ "properties": {
50
+ "name": {
51
+ "type": "string"
52
+ },
53
+ "arguments": {
54
+ "type": "object",
55
+ "x-parser": "gemma4-tool-call",
56
+ "additionalProperties": {}
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
62
+ }
63
+ },
64
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
65
+ },
66
+ "soc_token": "<|channel>",
67
+ "sot_token": "<|turn>",
68
+ "stc_token": "<|tool_call>",
69
+ "std_token": "<|tool>",
70
+ "str_token": "<|tool_response>",
71
+ "think_token": "<|think|>",
72
+ "tokenizer_class": "GemmaTokenizer",
73
+ "unk_token": "<unk>"
74
+ }