Upload 14 files

Browse files

Files changed (14) hide show

.gitattributes +2 -0
Gemma4-NoThink.json +47 -0
Gemma4-Think.json +47 -0
README.md +688 -11
chat_template.jinja +347 -0
config.json +158 -0
generation_config.json +14 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
processor_config.json +75 -0
quantization_config.json +3 -0
tokenizer.json +3 -0
tokenizer_config.json +74 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+quantization_config.json filter=lfs diff=lfs merge=lfs -text

Gemma4-NoThink.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+    "instruct": {
+        "input_sequence": "<|turn>user\n",
+        "output_sequence": "<|turn>model\n",
+        "last_output_sequence": "<|turn>model\n<|channel>thought\n<channel|>",
+        "system_sequence": "<|turn>system\n",
+        "stop_sequence": "",
+        "wrap": false,
+        "macro": true,
+        "activation_regex": "",
+        "first_output_sequence": "",
+        "skip_examples": true,
+        "output_suffix": "<turn|>\n",
+        "input_suffix": "<turn|>\n",
+        "system_suffix": "<turn|>\n",
+        "user_alignment_message": "",
+        "system_same_as_user": false,
+        "last_system_sequence": "",
+        "first_input_sequence": "",
+        "last_input_sequence": "",
+        "names_behavior": "none",
+        "sequences_as_stop_strings": true,
+        "story_string_prefix": "",
+        "story_string_suffix": "",
+        "name": "Gemma4-NoThink"
+    },
+    "context": {
+        "story_string": "<|turn>system\n{{#if anchorBefore}}{{anchorBefore}}\n{{/if}}{{#if system}}{{system}}\n{{/if}}{{#if wiBefore}}{{wiBefore}}\n{{/if}}{{#if description}}{{description}}\n{{/if}}{{#if personality}}{{char}}'s personality: {{personality}}\n{{/if}}{{#if scenario}}Scenario: {{scenario}}\n{{/if}}{{#if wiAfter}}{{wiAfter}}\n{{/if}}{{#if persona}}{{persona}}\n{{/if}}{{#if anchorAfter}}{{anchorAfter}}\n{{/if}}{{trim}}<turn|>\n",
+        "example_separator": "",
+        "chat_start": "",
+        "use_stop_strings": false,
+        "names_as_stop_strings": true,
+        "story_string_position": 0,
+        "story_string_depth": 1,
+        "story_string_role": 0,
+        "always_force_name2": false,
+        "trim_sentences": false,
+        "single_line": false,
+        "name": "Gemma4-NoThink"
+    },
+    "reasoning": {
+        "prefix": "<|channel>thought\n",
+        "suffix": "<channel|>",
+        "separator": "\n\n",
+        "name": "Gemma 4"
+    }
+}

Gemma4-Think.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+    "instruct": {
+        "input_sequence": "<|turn>user\n",
+        "output_sequence": "<|turn>model\n",
+        "last_output_sequence": "<|turn>model\n",
+        "system_sequence": "<|turn>system\n",
+        "stop_sequence": "",
+        "wrap": false,
+        "macro": true,
+        "activation_regex": "",
+        "first_output_sequence": "",
+        "skip_examples": true,
+        "output_suffix": "<turn|>\n",
+        "input_suffix": "<turn|>\n",
+        "system_suffix": "<turn|>\n",
+        "user_alignment_message": "",
+        "system_same_as_user": false,
+        "last_system_sequence": "",
+        "first_input_sequence": "",
+        "last_input_sequence": "",
+        "names_behavior": "none",
+        "sequences_as_stop_strings": true,
+        "story_string_prefix": "",
+        "story_string_suffix": "",
+        "name": "Gemma4-Think"
+    },
+    "context": {
+        "story_string": "<|turn>system\n<|think|>\n{{#if anchorBefore}}{{anchorBefore}}\n{{/if}}{{#if system}}{{system}}\n{{/if}}{{#if wiBefore}}{{wiBefore}}\n{{/if}}{{#if description}}{{description}}\n{{/if}}{{#if personality}}{{char}}'s personality: {{personality}}\n{{/if}}{{#if scenario}}Scenario: {{scenario}}\n{{/if}}{{#if wiAfter}}{{wiAfter}}\n{{/if}}{{#if persona}}{{persona}}\n{{/if}}{{#if anchorAfter}}{{anchorAfter}}\n{{/if}}{{trim}}<turn|>\n",
+        "example_separator": "",
+        "chat_start": "",
+        "use_stop_strings": false,
+        "names_as_stop_strings": true,
+        "story_string_position": 0,
+        "story_string_depth": 1,
+        "story_string_role": 0,
+        "always_force_name2": false,
+        "trim_sentences": false,
+        "single_line": false,
+        "name": "Gemma4-Think"
+    },
+    "reasoning": {
+        "prefix": "<|channel>thought\n",
+        "suffix": "<channel|>",
+        "separator": "\n\n",
+        "name": "Gemma 4"
+    }
+}

README.md CHANGED Viewed

@@ -1,16 +1,693 @@
 ---
 license: apache-2.0
 base_model:
-- zerofata/G4-MeroMero-26B-A4B
-tags:
-- exl3
-- exllamav3
-- roleplay
-- rp
-- gemma4
-- gemma
-- quantized
 ---
-Requires ExLlamaV3 0.0.29 and above to launch. 0.0.32+ uses less VRAM.
-Planned 3bpw, 4bpw and 6bpw with 8 head bits.

 ---
 license: apache-2.0
+datasets:
+- zerofata/Instruct-Anime
+- zerofata/Gemini-3.1-Pro-SmallWiki
+- zerofata/Gemini-3.1-Pro-GLM5-Characters
+- zerofata/Roleplay-Anime-Characters
 base_model:
+- google/gemma-4-26B-A4B-it
 ---
+<style>
+.gs {
+  --bg:      #0d0a10;
+  --surface: #14101a;
+  --edge:    #2a1f38;
+  --rule:    #382850;
+  --text:    #b8a0cc;
+  --dim:     #7a6090;
+  --bright:  #f0e6ff;
+  --azure:   #c060ff;
+  --crimson: #ff4da6;
+  --az-glow: rgba(192,96,255,0.10);
+  --cr-glow: rgba(255,77,166,0.06);
+  --mono:    'JetBrains Mono', monospace;
+  --sans:    'Inter', sans-serif;
+  font-family: var(--sans);
+  color: var(--text);
+  max-width: 900px;
+  margin: 0 auto;
+  padding: 0 0 60px;
+  line-height: 1.7;
+  font-size: 1rem;
+  background:
+    radial-gradient(ellipse at 50% 0%, rgba(192,96,255,0.04) 0%, transparent 50%),
+    radial-gradient(ellipse at 50% 100%, rgba(255,77,166,0.02) 0%, transparent 50%),
+    var(--bg);
+}
+/* ── Profile Card ── */
+.gs-profile {
+  border-bottom: none;
+  position: relative;
+  background: var(--surface);
+  margin-bottom: 0;
+}
+.gs-profile-art {
+  position: relative;
+}
+.gs-profile-art img {
+  display: block;
+  width: 100%;
+  height: 380px;
+  object-fit: cover;
+  margin-top: 0px;
+}
+.gs-ident {
+  position: absolute;
+  bottom: 0;
+  left: 0;
+  right: 0;
+  padding: 120px 44px 28px;
+  background: linear-gradient(
+    to top,
+    var(--bg) 0%,
+    rgba(13,10,16,0.92) 30%,
+    rgba(13,10,16,0.4) 60%,
+    transparent 100%
+  );
+}
+.gs-profile-info {
+  padding: 20px 44px 36px;
+  display: flex;
+  flex-direction: column;
+  gap: 20px;
+}
+.gs-profile-label {
+  display: flex;
+  align-items: baseline;
+  gap: 10px;
+  font-family: var(--mono);
+  letter-spacing: 0.14em;
+  text-transform: uppercase;
+}
+.gs-profile-label .gs-snum {
+  font-size: 0.62rem;
+  font-weight: 700;
+  color: var(--crimson);
+  opacity: 1;
+  position: static;
+  transform: none;
+}
+.gs-profile-label .gs-stitle {
+  font-size: 0.62rem;
+  color: var(--dim);
+  font-weight: 700;
+  letter-spacing: 0.14em;
+}
+.gs-profile-label .gs-stitle::before {
+  content: none;
+}
+.gs-name {
+  font-family: var(--sans);
+  font-size: 3.2rem;
+  font-weight: 900;
+  color: var(--bright);
+  letter-spacing: 0.06em;
+  line-height: 1;
+  margin: 0 0 10px;
+  text-shadow: 0 1px 2px rgba(0,0,0,0.6);
+  overflow-wrap: break-word;
+}
+.gs-base {
+  font-family: var(--mono);
+  font-size: 0.68rem;
+  color: var(--crimson);
+  letter-spacing: 0.14em;
+  text-transform: uppercase;
+  display: block;
+}
+.gs-profile-bio p {
+  margin: 0 0 14px;
+  font-size: 0.95rem;
+}
+.gs-profile-bio p:last-child { margin-bottom: 0; }
+/* ── Sections ── */
+.gs-section {
+  padding: 0;
+}
+.gs-shead {
+  position: relative;
+  display: flex;
+  align-items: center;
+  gap: 14px;
+  padding: 16px 44px 14px;
+  margin-bottom: 28px;
+  border-top: 2px solid;
+  border-image: linear-gradient(90deg, var(--crimson), var(--azure)) 1;
+}
+.gs-snum {
+  font-family: var(--mono);
+  font-size: 2.2rem;
+  font-weight: 900;
+  color: var(--crimson);
+  letter-spacing: 0.06em;
+  opacity: 0.12;
+  position: absolute;
+  right: 44px;
+  top: 50%;
+  transform: translateY(-50%);
+  line-height: 1;
+}
+.gs-stitle {
+  font-size: 1.05rem;
+  font-weight: 700;
+  letter-spacing: 0.1em;
+  text-transform: uppercase;
+  color: var(--bright);
+}
+.gs-stitle::before {
+  content: '\2726';
+  color: var(--crimson);
+  font-size: 0.8em;
+  margin-right: 8px;
+}
+.gs-sbody {
+  padding: 0 44px 44px;
+}
+.gs-sbody p {
+  margin: 0 0 14px;
+  font-size: 0.95rem;
+}
+.gs-sbody p:last-child { margin-bottom: 0; }
+/* ── Data panels ── */
+.gs-stack {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: 16px;
+}
+.gs-stack .gs-panel:nth-child(3) {
+  grid-column: 1 / -1;
+}
+.gs-panel {
+  border: 1px solid var(--edge);
+  border-left: 3px solid var(--crimson);
+  position: relative;
+  background: var(--surface);
+  box-shadow: 0 0 20px rgba(192,96,255,0.03);
+}
+.gs-panel::before {
+  content: '';
+  position: absolute;
+  top: -1px;
+  right: -1px;
+  width: 10px;
+  height: 10px;
+  border-top: 1px solid var(--crimson);
+  border-right: 1px solid var(--crimson);
+  opacity: 0.5;
+}
+.gs-panel::after {
+  content: '';
+  position: absolute;
+  bottom: -1px;
+  right: -1px;
+  width: 10px;
+  height: 10px;
+  border-bottom: 1px solid var(--azure);
+  border-right: 1px solid var(--azure);
+  opacity: 0.4;
+}
+.gs-panel-head {
+  font-family: var(--mono);
+  font-size: 0.68rem;
+  font-weight: 700;
+  letter-spacing: 0.14em;
+  text-transform: uppercase;
+  color: var(--dim);
+  padding: 10px 16px;
+  border-bottom: 1px solid var(--edge);
+}
+.gs-panel-head::after {
+  content: ' \2726';
+  color: var(--crimson);
+  opacity: 0.5;
+}
+.gs-row {
+  display: grid;
+  grid-template-columns: 10ch 1fr;
+  align-items: baseline;
+  column-gap: 4px;
+  padding: 9px 16px;
+  border-bottom: 1px solid var(--edge);
+  font-size: 0.9rem;
+}
+.gs-row:last-child { border-bottom: none; }
+.gs-key {
+  font-family: var(--mono);
+  font-size: 0.9rem;
+  color: var(--dim);
+}
+.gs-key::after {
+  content: ':';
+}
+.gs-val {
+  color: var(--bright);
+  font-size: 0.9rem;
+}
+.gs-row .gs-val:only-child {
+  grid-column: 1 / -1;
+}
+/* ── Quantizations (compact) ── */
+.gs-section--compact .gs-shead {
+  border-top: 1px solid var(--edge);
+  border-image-source: none;
+  padding: 12px 44px 10px;
+  margin-bottom: 18px;
+}
+.gs-section--compact .gs-snum {
+  opacity: 0.08;
+}
+.gs-section--compact .gs-stitle::before {
+  content: '\2726';
+}
+.gs-section--compact .gs-sbody {
+  padding: 0 44px 32px;
+}
+.gs-qrow {
+  display: flex;
+  gap: 12px;
+  flex-wrap: wrap;
+  justify-content: center;
+}
+.gs-qpanel {
+  background: var(--surface);
+  border: 1px solid var(--edge);
+  border-left: 3px solid var(--crimson);
+  display: flex;
+  align-items: center;
+  gap: 16px;
+  padding: 12px 24px;
+  border-radius: 4px;
+  position: relative;
+  box-shadow: 0 0 20px rgba(192,96,255,0.03);
+}
+.gs-qpanel::before {
+  content: '';
+  position: absolute;
+  top: -1px;
+  right: -1px;
+  width: 10px;
+  height: 10px;
+  border-top: 1px solid var(--crimson);
+  border-right: 1px solid var(--crimson);
+  opacity: 0.5;
+}
+.gs-qpanel::after {
+  content: '';
+  position: absolute;
+  bottom: -1px;
+  right: -1px;
+  width: 10px;
+  height: 10px;
+  border-bottom: 1px solid var(--azure);
+  border-right: 1px solid var(--azure);
+  opacity: 0.4;
+}
+.gs-qtype {
+  font-family: var(--mono);
+  font-size: 0.58rem;
+  font-weight: 700;
+  letter-spacing: 0.18em;
+  text-transform: uppercase;
+  color: var(--crimson);
+  flex-shrink: 0;
+}
+.gs-qsep {
+  width: 1px;
+  height: 16px;
+  background: var(--rule);
+  flex-shrink: 0;
+}
+.gs-qpanel a {
+  color: var(--bright);
+  text-decoration: none;
+  font-size: 0.9rem;
+  border-bottom: 1px solid var(--rule);
+}
+.gs-qpanel a:hover { color: var(--crimson); border-bottom-color: var(--crimson); }
+/* ── Journal (Creation Process) ── */
+.gs-section--journal .gs-sbody {
+  margin: 0 44px;
+  padding: 24px 32px 32px;
+  background: var(--surface);
+  border: 1px solid var(--edge);
+  border-left: 4px solid var(--azure);
+  position: relative;
+  margin-bottom: 0;
+}
+.gs-section--journal .gs-sbody::before {
+  content: '';
+  position: absolute;
+  top: -1px;
+  right: -1px;
+  width: 12px;
+  height: 12px;
+  border-top: 1px solid var(--azure);
+  border-right: 1px solid var(--azure);
+  opacity: 0.4;
+}
+.gs-section--journal .gs-sbody::after {
+  content: '';
+  position: absolute;
+  bottom: -1px;
+  left: -1px;
+  width: 12px;
+  height: 12px;
+  border-bottom: 1px solid var(--crimson);
+  border-left: 1px solid var(--crimson);
+  opacity: 0.3;
+}
+.gs-section--journal .gs-sbody p:first-child {
+  font-style: italic;
+  color: var(--bright);
+}
+/* ── Links ── */
+.gs a {
+  color: var(--bright);
+  text-decoration: none;
+  border-bottom: 1px solid var(--rule);
+}
+.gs a:hover { color: var(--crimson); border-bottom-color: var(--crimson); }
+/* ── Dropdown ── */
+.gs details {
+  border: 1px solid var(--edge);
+  border-left: 3px solid var(--crimson);
+  margin-top: 24px;
+  position: relative;
+  background: var(--surface);
+  box-shadow: 0 0 20px rgba(192,96,255,0.03);
+}
+.gs details::before {
+  content: '';
+  position: absolute;
+  top: -1px;
+  right: -1px;
+  width: 10px;
+  height: 10px;
+  border-top: 1px solid var(--crimson);
+  border-right: 1px solid var(--crimson);
+  opacity: 0.5;
+}
+.gs details::after {
+  content: '';
+  position: absolute;
+  bottom: -1px;
+  right: -1px;
+  width: 10px;
+  height: 10px;
+  border-bottom: 1px solid var(--azure);
+  border-right: 1px solid var(--azure);
+  opacity: 0.4;
+}
+.gs summary {
+  list-style: none;
+  padding: 11px 16px;
+  cursor: pointer;
+  font-family: var(--mono);
+  font-size: 0.72rem;
+  font-weight: 700;
+  letter-spacing: 0.12em;
+  text-transform: uppercase;
+  color: var(--dim);
+  user-select: none;
+  display: flex;
+  align-items: center;
+  gap: 10px;
+}
+.gs summary::-webkit-details-marker { display: none; }
+.gs summary::before {
+  content: '+';
+  color: var(--crimson);
+  font-size: 1rem;
+  line-height: 1;
+  flex-shrink: 0;
+}
+.gs details[open] summary::before { content: '−'; }
+.gs summary:hover { color: var(--bright); }
+.gs-detail-body {
+  padding: 22px 18px;
+  border-top: 1px solid var(--edge);
+}
+.gs-detail-body p { margin: 0 0 16px; font-size: 0.9rem; }
+.gs-cfg-title {
+  font-family: var(--mono);
+  font-size: 0.72rem;
+  font-weight: 700;
+  letter-spacing: 0.1em;
+  text-transform: uppercase;
+  color: var(--dim);
+  margin: 0 0 8px;
+}
+/* ── Code ── */
+.gs pre {
+  background: #080510;
+  border: 1px solid var(--edge);
+  border-left: 2px solid var(--azure);
+  padding: 16px 18px;
+  overflow-x: auto;
+  font-family: var(--mono);
+  font-size: 0.76rem;
+  line-height: 1.6;
+  color: var(--text);
+  margin: 0 0 22px;
+}
+.gs pre:last-child { margin-bottom: 0; }
+.gs pre code { background: none; color: inherit; padding: 0; }
+.gs code {
+  font-family: var(--mono);
+  font-size: 0.875em;
+  color: var(--crimson);
+  background: var(--az-glow);
+  padding: 2px 5px;
+}
+</style>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Stardom</title>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;900&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet">
+</head>
+<body>
+<div class="gs">
+<div class="gs-profile">
+<div class="gs-profile-art">
+<img src="https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/xBv_weuMs5x3i4WFDRksn.png" alt="image">
+<div class="gs-ident">
+<h1 class="gs-name">Mero Mero</h1>
+<span class="gs-base">Gemma4 26B A4B</span>
+</div>
+</div>
+</div>
+<div class="gs-section">
+<div class="gs-shead">
+<span class="gs-snum">01</span>
+<span class="gs-stitle">Overview</span>
+</div>
+<div class="gs-sbody">
+<p>God, this model was difficult to work with.</p>
+<p>Google cooked, there wasn't a lot to improve but there was a lot to break.</p>
+<p>This model is a finetune that was merged back into the original instruct. It feels a lot like the original instruct. However, reasoning is more structured, using less tokens during RP and this model generally has a slightly less verbose / flowery writing style.</p>
+<p>Main weakness of this model I think is the swipe variety hasn't improved. Logic and repetition I think are roughly on par with the original.</p>
+<p>Supports both thinking and non thinking.</p>
+</div>
+</div>
+<div class="gs-section">
+<div class="gs-shead">
+<span class="gs-snum">02</span>
+<span class="gs-stitle">SillyTavern Settings</span>
+</div>
+<div class="gs-sbody">
+<div class="gs-stack">
+<div class="gs-panel">
+<div class="gs-panel-head">Suggested Roleplay Format</div>
+<div class="gs-row"><span class="gs-key">Actions</span><span class="gs-val">In plaintext</span></div>
+<div class="gs-row"><span class="gs-key">Dialogue</span><span class="gs-val">"In quotes"</span></div>
+<div class="gs-row"><span class="gs-key">Thoughts</span><span class="gs-val">*In asterisks*</span></div>
+</div>
+<div class="gs-panel">
+<div class="gs-panel-head">Recommended Samplers</div>
+<div class="gs-row"><span class="gs-key">Temp</span><span class="gs-val">0.8 - 1.0</span></div>
+<div class="gs-row"><span class="gs-key">MinP</span><span class="gs-val">0.05</span></div>
+<div class="gs-row"></span><span class="gs-val"></span></div>
+</div>
+<div class="gs-panel">
+<div class="gs-panel-head">Instruct</div>
+<div class="gs-row"><span class="gs-val"><a href="https://huggingface.co/zerofata/G4-MeroMero-26B-A4B/raw/main/Gemma4-Think.json">Gemma 4 - Think</a></span></div>
+<div class="gs-row"><span class="gs-val"><a href="https://huggingface.co/zerofata/G4-MeroMero-26B-A4B/raw/main/Gemma4-NoThink.json">Gemma 4 - NoThink</a></span></div>
+</div>
+</div>
+</div>
+</div>
+<div class="gs-section gs-section--compact">
+<div class="gs-shead">
+<span class="gs-snum">03</span>
+<span class="gs-stitle">Quantizations</span>
+</div>
+<div class="gs-sbody">
+<div class="gs-qrow">
+<div class="gs-qpanel">
+<span class="gs-qtype">GGUF</span>
+<div class="gs-qsep"></div>
+<a href="https://huggingface.co/zerofata/G4-MeroMero-26B-A4B-GGUF">iMatrix</a>
+</div>
+</div>
+</div>
+</div>
+<div class="gs-section gs-section--journal">
+<div class="gs-shead">
+<span class="gs-snum">04</span>
+<span class="gs-stitle">Creation Process</span>
+</div>
+<div class="gs-sbody">
+<p>Creation Process: SFT > Merge</p>
+<p>SFT on approx 35 million tokens.</p>
+<p>Despite using 35 million tokens, this dataset is fairly modest in size. Trainable is somewhere in the rough ballpark of 15 million. The extra tokens are from a new multi turn RP dataset that I train last turn only.</p>
+<p>Feels like Google left the instruct model at the razor's edge of overfitting. Finetune it at all and it feels like it'll rapidly lose intelligence, despite taking the writing style nicely. Hard to tell if you're overfitting or underfitting.</p>
+<p>My solution was to blast the model with my data anyway to ensure it picked up the new reasoning format and writing style and then merge that back into the instruct to heal the logic damage. There's still room for a better merge that keeps more of the writing style and potentially using the base model to undo some of the overfitting.</p>
+<p>Trained using Axolotl.</p>
+<details>
+<summary>Mergekit Config</summary>
+<div class="gs-detail-body">
+<pre><code>models:
+  &#45; model: google/gemma&#45;4&#45;26B&#45;A4B&#45;it
+    parameters:
+      weight: 0.5
+  &#45; model: ApocalypseParty/G4&#45;26B&#45;SFT&#45;6
+    parameters:
+      weight: 0.5
+merge_method: linear
+dtype: bfloat16</code></pre>
+</div>
+</details>
+<details>
+<summary>Axolotl Config</summary>
+<div class="gs-detail-body">
+<pre><code>&#35; Gemma 4 26B&#45;A4B MoE QLoRA with ScatterMoE kernels
+&#35;
+&#35; Validated: 50 steps on FineTome&#45;100k, loss 8.8 &#45;> 1.8, single RTX 5090 (32GB)
+&#35; torch_compile=true: 21 GiB peak VRAM, ~230 tok/s, 336s total
+&#35;
+&#35; Key notes:
+&#35; &#45; Max sequence length on 32GB GPU: 2048 (micro_batch_size=1, SDP attention).
+&#35;   4096 seq_len OOMs due to head_dim=512 math SDP materializing full score matrix.
+&#35;   Use 48GB+ GPUs for longer sequences or multi&#45;GPU with FSDP.
+&#32;
+base_model: google/gemma&#45;4&#45;26B&#45;A4B&#45;it
+&#32;
+plugins:
+  &#45; axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
+  &#45; axolotl.integrations.kernels.KernelsPlugin
+  &#45; axolotl.integrations.liger.LigerPlugin
+use_kernels: true
+use_scattermoe: true
+cut_cross_entropy: true
+experts_implementation: scattermoe
+liger_layer_norm: true
+liger_rope: true
+liger_rms_norm: true
+liger_glu_activation: true
+liger_rms_norm_gated: true
+strict: false
+&#32;
+datasets:
+  &#45; path: ./data/gemma_4_sft_5_masked_20260415_082234.jsonl
+val_set_size: 0.02
+output_dir: ./G4&#45;26B&#45;SFT&#45;6
+&#32;
+sequence_len: 10756
+pad_to_sequence_len: true
+sample_packing: true
+&#32;
+load_in_4bit: false
+&#35;quantize_moe_experts: true
+adapter: lora
+lora_r: 128
+lora_alpha: 128
+peft_use_rslora: true
+lora_dropout: 0.0
+freeze_mm_modules: true
+&#32;
+&#35; Restrict LoRA to text backbone only (skip vision/audio encoders)
+&#35; using regex to match only the text decoder attention projections.
+lora_target_modules: 'model.language_model.layers.[\d]+.(_checkpoint_wrapped_module.)?(mlp|self_attn).(up|down|gate|q|k|v|o)_proj'
+&#32;
+&#35; MoE expert LoRA (3D Parameter tensors, not nn.Linear)
+lora_target_parameters:
+  &#45; experts.gate_up_proj
+  &#45; experts.down_proj
+&#32;
+lora_mlp_kernel: false
+lora_qkv_kernel: false
+lora_o_kernel: false
+&#32;
+&#35;bnb_config_kwargs:
+&#35;  bnb_4bit_use_double_quant: true
+&#32;
+wandb_project: G4&#45;26B&#45;SFT
+wandb_name: G4&#45;26B&#45;SFT&#45;6
+&#32;
+gradient_accumulation_steps: 2
+micro_batch_size: 2
+num_epochs: 2
+optimizer: adamw_torch_fused
+lr_scheduler: constant_with_warmup
+learning_rate: 1e&#45;5
+max_grad_norm: 1.0
+&#32;
+bf16: auto
+tf32: true
+&#32;
+&#35;gradient_checkpointing: true
+&#35;activation_offloading: true
+logging_steps: 1
+&#32;
+&#35; FA2 not supported
+sdp_attention: true
+&#35;flex_attention: true
+&#35;torch_compile: true
+flash_attention: false
+&#32;
+warmup_ratio: 0.1
+evals_per_epoch: 4
+saves_per_epoch: 4
+weight_decay: 0.01
+special_tokens:
+&#32;
+fsdp_config:
+  fsdp_version: 2
+  offload_params: false
+  cpu_ram_efficient_loading: false
+  auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  transformer_layer_cls_to_wrap: Gemma4TextDecoderLayer
+  state_dict_type: FULL_STATE_DICT
+  sharding_strategy: FULL_SHARD
+  reshard_after_forward: true
+  activation_checkpointing: true</code></pre>
+</div>
+</details>
+</div>
+</div>
+</div>
+</body>
+</html>

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,347 @@

+{%- macro format_parameters(properties, required) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- set add_comma = false -%}
+        {%- if key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{ key }}:{
+            {%- if value['description'] -%}
+                description:<|"|>{{ value['description'] }}<|"|>
+                {%- set add_comma = true -%}
+            {%- endif -%}
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <|"|>{{- req_item -}}<|"|>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            {%- if value['nullable'] %}
+                {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                nullable:true
+            {%- endif -%}
+            {%- if value['type'] | upper == 'OBJECT' -%}
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    properties:{
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                    }
+                {%- elif value is mapping -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    properties:{
+                    {{- format_parameters(value, value['required'] | default([])) -}}
+                    }
+                {%- endif -%}
+                {%- if value['required'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <|"|>{{- item -}}<|"|>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- endif -%}
+            {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+            type:<|"|>{{ value['type'] | upper }}<|"|>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{%- macro format_function_declaration(tool_data) -%}
+    declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
+    {%- set params = tool_data['function']['parameters'] -%}
+    {%- if params -%}
+        ,parameters:{
+        {%- if params['properties'] -%}
+            properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+        {%- endif -%}
+        {%- if params['required'] -%}
+            required:[
+            {%- for item in params['required'] -%}
+                <|"|>{{- item -}}<|"|>
+                {{- ',' if not loop.last -}}
+            {%- endfor -%}
+            ],
+        {%- endif -%}
+        {%- if params['type'] -%}
+            type:<|"|>{{- params['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    {%- if 'response' in tool_data['function'] -%}
+        {%- set response_declaration = tool_data['function']['response'] -%}
+        ,response:{
+        {%- if response_declaration['description'] -%}
+            description:<|"|>{{- response_declaration['description'] -}}<|"|>,
+        {%- endif -%}
+        {%- if response_declaration['type'] | upper == 'OBJECT' -%}
+            type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    }
+{%- endmacro -%}
+{%- macro format_argument(argument, escape_keys=True) -%}
+    {%- if argument is string -%}
+        {{- '<|"|>' + argument + '<|"|>' -}}
+    {%- elif argument is boolean -%}
+        {{- 'true' if argument else 'false' -}}
+    {%- elif argument is mapping -%}
+        {{- '{' -}}
+        {%- set ns = namespace(found_first=false) -%}
+        {%- for key, value in argument | dictsort -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {%- if escape_keys -%}
+                {{- '<|"|>' + key + '<|"|>' -}}
+            {%- else -%}
+                {{- key -}}
+            {%- endif -%}
+            :{{- format_argument(value, escape_keys=escape_keys) -}}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- elif argument is sequence -%}
+        {{- '[' -}}
+        {%- for item in argument -%}
+            {{- format_argument(item, escape_keys=escape_keys) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- ']' -}}
+    {%- else -%}
+        {{- argument -}}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro strip_thinking(text) -%}
+    {%- set ns = namespace(result='') -%}
+    {%- for part in text.split('<channel|>') -%}
+        {%- if '<|channel>' in part -%}
+            {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
+        {%- else -%}
+            {%- set ns.result = ns.result + part -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {{- ns.result | trim -}}
+{%- endmacro -%}
+{%- macro format_tool_response_block(tool_name, response) -%}
+    {{- '<|tool_response>' -}}
+    {%- if response is mapping -%}
+        {{- 'response:' + tool_name + '{' -}}
+        {%- for key, value in response | dictsort -%}
+            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- else -%}
+        {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
+    {%- endif -%}
+    {{- '<tool_response|>' -}}
+{%- endmacro -%}
+{%- set ns = namespace(prev_message_type=None) -%}
+{%- set loop_messages = messages -%}
+{{- bos_token -}}
+{#- Handle System/Tool Definitions Block -#}
+{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
+    {{- '<|turn>system\n' -}}
+    {#- Inject Thinking token at the very top of the FIRST system turn -#}
+    {%- if enable_thinking is defined and enable_thinking -%}
+        {{- '<|think|>\n' -}}
+        {%- set ns.prev_message_type = 'think' -%}
+    {%- endif -%}
+    {%- if messages[0]['role'] in ['system', 'developer'] -%}
+        {{- messages[0]['content'] | trim -}}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<|tool>' -}}
+            {{- format_function_declaration(tool) | trim -}}
+            {{- '<tool|>' -}}
+        {%- endfor %}
+        {%- set ns.prev_message_type = 'tool' -%}
+    {%- endif -%}
+    {{- '<turn|>\n' -}}
+{%- endif %}
+{#- Pre-scan: find last user message index for reasoning guard -#}
+{%- set ns_turn = namespace(last_user_idx=-1) -%}
+{%- for i in range(loop_messages | length) -%}
+    {%- if loop_messages[i]['role'] == 'user' -%}
+        {%- set ns_turn.last_user_idx = i -%}
+    {%- endif -%}
+{%- endfor -%}
+{#- Loop through messages -#}
+{%- for message in loop_messages -%}
+    {%- if message['role'] != 'tool' -%}
+    {%- set ns.prev_message_type = None -%}
+    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
+    {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
+    {%- set prev_nt = namespace(role=None, found=false) -%}
+    {%- if loop.index0 > 0 -%}
+        {%- for j in range(loop.index0 - 1, -1, -1) -%}
+            {%- if not prev_nt.found -%}
+                {%- if loop_messages[j]['role'] != 'tool' -%}
+                    {%- set prev_nt.role = loop_messages[j]['role'] -%}
+                    {%- set prev_nt.found = true -%}
+                {%- endif -%}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- endif -%}
+    {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
+    {%- if not continue_same_model_turn -%}
+        {{- '<|turn>' + role + '\n' }}
+    {%- endif -%}
+    {#- Render reasoning/reasoning_content as thinking channel -#}
+    {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
+    {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
+        {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
+    {%- endif -%}
+            {%- if message['tool_calls'] -%}
+                {%- for tool_call in message['tool_calls'] -%}
+                    {%- set function = tool_call['function'] -%}
+                    {{- '<|tool_call>call:' + function['name'] + '{' -}}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns_args = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns_args.found_first %},{% endif -%}
+                            {%- set ns_args.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {{- function['arguments'] -}}
+                    {%- endif -%}
+                    {{- '}<tool_call|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_call' -%}
+            {%- endif -%}
+            {%- set ns_tr_out = namespace(flag=false) -%}
+            {%- if message.get('tool_responses') -%}
+                {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
+                {%- for tool_response in message['tool_responses'] -%}
+                    {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
+                    {%- set ns_tr_out.flag = true -%}
+                    {%- set ns.prev_message_type = 'tool_response' -%}
+                {%- endfor -%}
+            {%- elif message.get('tool_calls') -%}
+                {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
+                {%- set ns_tool_scan = namespace(stopped=false) -%}
+                {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
+                    {%- if ns_tool_scan.stopped -%}
+                    {%- elif loop_messages[k]['role'] != 'tool' -%}
+                        {%- set ns_tool_scan.stopped = true -%}
+                    {%- else -%}
+                        {%- set follow = loop_messages[k] -%}
+                        {#- Resolve tool_call_id to function name -#}
+                        {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
+                        {%- for tc in message['tool_calls'] -%}
+                            {%- if tc.get('id') == follow.get('tool_call_id') -%}
+                                {%- set ns_tname.name = tc['function']['name'] -%}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {#- Handle content as string or content-parts array -#}
+                        {%- set tool_body = follow.get('content') -%}
+                        {%- if tool_body is string -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- elif tool_body is sequence and tool_body is not string -%}
+                            {%- set ns_txt = namespace(s='') -%}
+                            {%- for part in tool_body -%}
+                                {%- if part.get('type') == 'text' -%}
+                                    {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
+                                {%- endif -%}
+                            {%- endfor -%}
+                            {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
+                        {%- else -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- endif -%}
+                        {%- set ns_tr_out.flag = true -%}
+                        {%- set ns.prev_message_type = 'tool_response' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+            {%- if message['content'] is string -%}
+                {%- if role == 'model' -%}
+                    {{- strip_thinking(message['content']) -}}
+                {%- else -%}
+                    {{- message['content'] | trim -}}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'text' -%}
+                        {%- if role == 'model' -%}
+                            {{- strip_thinking(item['text']) -}}
+                        {%- else -%}
+                            {{- item['text'] | trim -}}
+                        {%- endif -%}
+                    {%- elif item['type'] == 'image' -%}
+                        {{- '<|image|>' -}}
+                        {%- set ns.prev_message_type = 'image' -%}
+                    {%- elif item['type'] == 'audio' -%}
+                        {{- '<|audio|>' -}}
+                        {%- set ns.prev_message_type = 'audio' -%}
+                    {%- elif item['type'] == 'video' -%}
+                        {{- '<|video|>' -}}
+                        {%- set ns.prev_message_type = 'video' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+        {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
+            {{- '<|tool_response>' -}}
+        {%- elif not (ns_tr_out.flag and not message.get('content')) -%}
+            {{- '<turn|>\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
+        {{- '<|turn>model\n' -}}
+        {%- if not enable_thinking | default(false) -%}
+            {{- '<|channel>thought\n<channel|>' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,158 @@

+{
+    "architectures": [
+        "Gemma4ForConditionalGeneration"
+    ],
+    "audio_config": null,
+    "audio_token_id": 258881,
+    "boa_token_id": 256000,
+    "boi_token_id": 255999,
+    "dtype": "bfloat16",
+    "eoa_token_id": 258883,
+    "eoa_token_index": 258883,
+    "eoi_token_id": 258882,
+    "eos_token_id": [
+        1,
+        106
+    ],
+    "image_token_id": 258880,
+    "initializer_range": 0.02,
+    "model_type": "gemma4",
+    "text_config": {
+        "attention_bias": false,
+        "attention_dropout": 0.0,
+        "attention_k_eq_v": true,
+        "bos_token_id": 2,
+        "dtype": "bfloat16",
+        "enable_moe_block": true,
+        "eos_token_id": 1,
+        "final_logit_softcapping": 30.0,
+        "global_head_dim": 512,
+        "head_dim": 256,
+        "hidden_activation": "gelu_pytorch_tanh",
+        "hidden_size": 2816,
+        "hidden_size_per_layer_input": 0,
+        "initializer_range": 0.02,
+        "intermediate_size": 2112,
+        "layer_types": [
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "sliding_attention",
+            "full_attention"
+        ],
+        "max_position_embeddings": 262144,
+        "model_type": "gemma4_text",
+        "moe_intermediate_size": 704,
+        "num_attention_heads": 16,
+        "num_experts": 128,
+        "num_global_key_value_heads": 2,
+        "num_hidden_layers": 30,
+        "num_key_value_heads": 8,
+        "num_kv_shared_layers": 0,
+        "pad_token_id": 0,
+        "rms_norm_eps": 1e-06,
+        "rope_parameters": {
+            "full_attention": {
+                "partial_rotary_factor": 0.25,
+                "rope_theta": 1000000.0,
+                "rope_type": "proportional"
+            },
+            "sliding_attention": {
+                "rope_theta": 10000.0,
+                "rope_type": "default"
+            }
+        },
+        "sliding_window": 1024,
+        "tie_word_embeddings": true,
+        "top_k_experts": 8,
+        "use_bidirectional_attention": "vision",
+        "use_cache": true,
+        "use_double_wide_mlp": false,
+        "vocab_size": 262144,
+        "vocab_size_per_layer_input": 262144
+    },
+    "tie_word_embeddings": true,
+    "transformers_version": "5.5.4",
+    "video_token_id": 258884,
+    "vision_config": {
+        "_name_or_path": "",
+        "architectures": null,
+        "attention_bias": false,
+        "attention_dropout": 0.0,
+        "chunk_size_feed_forward": 0,
+        "default_output_length": 280,
+        "dtype": "bfloat16",
+        "global_head_dim": 72,
+        "head_dim": 72,
+        "hidden_activation": "gelu_pytorch_tanh",
+        "hidden_size": 1152,
+        "id2label": {
+            "0": "LABEL_0",
+            "1": "LABEL_1"
+        },
+        "initializer_range": 0.02,
+        "intermediate_size": 4304,
+        "is_encoder_decoder": false,
+        "label2id": {
+            "LABEL_0": 0,
+            "LABEL_1": 1
+        },
+        "max_position_embeddings": 131072,
+        "model_type": "gemma4_vision",
+        "num_attention_heads": 16,
+        "num_hidden_layers": 27,
+        "num_key_value_heads": 16,
+        "output_attentions": false,
+        "output_hidden_states": false,
+        "patch_size": 16,
+        "pooling_kernel_size": 3,
+        "position_embedding_size": 10240,
+        "problem_type": null,
+        "return_dict": true,
+        "rms_norm_eps": 1e-06,
+        "rope_parameters": {
+            "rope_theta": 100.0,
+            "rope_type": "default"
+        },
+        "standardize": true,
+        "use_clipped_linears": false
+    },
+    "vision_soft_tokens_per_image": 280,
+    "quantization_config": {
+        "quant_method": "exl3",
+        "version": "0.0.32",
+        "bits": 3.1,
+        "head_bits": 8,
+        "calibration": {
+            "rows": 250,
+            "cols": 2048
+        },
+        "out_scales": "always",
+        "codebook": "mcg"
+    }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "bos_token_id": 2,
+  "do_sample": true,
+  "eos_token_id": [
+    1,
+    106,
+    50
+  ],
+  "pad_token_id": 0,
+  "temperature": 1.0,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "5.5.0.dev0"
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6edeab0e725c65980351b97c5ebbde54dd75f9b9475cf033d1e200cc1961e07d
+size 8424353198

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36deccfd4494601d6583d72ee3defa877839a83a66d6040192231389e53d41e1
+size 5362857059

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

processor_config.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "audio_ms_per_token": 40,
+  "audio_seq_length": 750,
+  "feature_extractor": {
+    "dither": 0.0,
+    "feature_extractor_type": "Gemma4AudioFeatureExtractor",
+    "feature_size": 128,
+    "fft_length": 512,
+    "fft_overdrive": false,
+    "frame_length": 320,
+    "hop_length": 160,
+    "input_scale_factor": 1.0,
+    "max_frequency": 8000.0,
+    "mel_floor": 0.001,
+    "min_frequency": 0.0,
+    "padding_side": "right",
+    "padding_value": 0.0,
+    "per_bin_mean": null,
+    "per_bin_stddev": null,
+    "preemphasis": 0.0,
+    "preemphasis_htk_flavor": true,
+    "return_attention_mask": true,
+    "sampling_rate": 16000
+  },
+  "image_processor": {
+    "do_convert_rgb": true,
+    "do_normalize": false,
+    "do_rescale": true,
+    "do_resize": true,
+    "image_mean": [
+      0.0,
+      0.0,
+      0.0
+    ],
+    "image_processor_type": "Gemma4ImageProcessor",
+    "image_seq_length": 280,
+    "image_std": [
+      1.0,
+      1.0,
+      1.0
+    ],
+    "max_soft_tokens": 280,
+    "patch_size": 16,
+    "pooling_kernel_size": 3,
+    "resample": 3,
+    "rescale_factor": 0.00392156862745098
+  },
+  "image_seq_length": 280,
+  "processor_class": "Gemma4Processor",
+  "video_processor": {
+    "do_convert_rgb": true,
+    "do_normalize": true,
+    "do_rescale": true,
+    "do_resize": true,
+    "do_sample_frames": true,
+    "image_mean": [
+      0.0,
+      0.0,
+      0.0
+    ],
+    "image_std": [
+      1.0,
+      1.0,
+      1.0
+    ],
+    "max_soft_tokens": 70,
+    "num_frames": 32,
+    "patch_size": 16,
+    "pooling_kernel_size": 3,
+    "resample": 3,
+    "rescale_factor": 0.00392156862745098,
+    "return_metadata": false,
+    "video_processor_type": "Gemma4VideoProcessor"
+  }
+}

quantization_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9908c9371642d21371228cdb434f31522f70e624f984e7e4b942d14aac72b882
+size 15500628

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
+size 32169626

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "processor_class": "Gemma4Processor",
+  "response_schema": {
+    "type": "object",
+    "properties": {
+      "role": {
+        "const": "assistant"
+      },
+      "thinking": {
+        "type": "string"
+      },
+      "content": {
+        "type": "string"
+      },
+      "tool_calls": {
+        "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
+        "type": "array",
+        "items": {
+          "type": "object",
+          "properties": {
+            "type": {
+              "const": "function"
+            },
+            "function": {
+              "type": "object",
+              "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
+              "properties": {
+                "name": {
+                  "type": "string"
+                },
+                "arguments": {
+                  "type": "object",
+                  "x-parser": "gemma4-tool-call",
+                  "additionalProperties": {}
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
+  },
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>"
+}