Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

README.md +3 -24
chat_template.jinja +1 -29
config.json +1 -2
generation_config.json +10 -4
model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +2 -2
model.safetensors.index.json +2 -2
special_tokens_map.json +7 -1
tokenizer_config.json +2 -2

README.md CHANGED Viewed

@@ -10,38 +10,17 @@ tags:
 # Qwen3-4B-MedMCQA-RL
-Qwen3-4B fine-tuned with RL on MedMCQA for medical multiple choice QA
 ## Model Details
 - **Base Model**: [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507)
-- **Training Method**: Reinforcement Learning (GRPO)
 - **Framework**: [verifiers](https://github.com/willieneis/verifiers) + [prime-rl](https://github.com/PRIME-RL/PRIME-RL)
-## Training Data
-This model was trained on **MedMCQA**, a large-scale multiple choice question dataset
-covering various medical topics from AIIMS/NEET PG entrance exams.
 ## Usage
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-model = AutoModelForCausalLM.from_pretrained("nsk7153/Qwen3-4B-MedMCQA-RL")
-tokenizer = AutoTokenizer.from_pretrained("nsk7153/Qwen3-4B-MedMCQA-RL")
-# Example usage
-messages = [
-    {"role": "user", "content": "Your medical question here"}
-]
-text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-inputs = tokenizer(text, return_tensors="pt")
-outputs = model.generate(**inputs, max_new_tokens=512)
-print(tokenizer.decode(outputs[0], skip_special_tokens=True))
-```
 ## License

 # Qwen3-4B-MedMCQA-RL
+Qwen3-4B fine-tuned with RL on MedMCQA for medical multiple choice QA. LoRA weights properly merged.
 ## Model Details
 - **Base Model**: [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507)
+- **Training Method**: Reinforcement Learning (GRPO) with LoRA
 - **Framework**: [verifiers](https://github.com/willieneis/verifiers) + [prime-rl](https://github.com/PRIME-RL/PRIME-RL)
 ## Usage
+Please ask your administrator.
 ## License

chat_template.jinja CHANGED Viewed

@@ -14,14 +14,6 @@
         {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
     {%- endif %}
 {%- endif %}
-{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
-{%- for message in messages[::-1] %}
-    {%- set index = (messages|length - 1) - loop.index0 %}
-    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
-        {%- set ns.multi_step_tool = false %}
-        {%- set ns.last_query_index = index %}
-    {%- endif %}
-{%- endfor %}
 {%- for message in messages %}
     {%- if message.content is string %}
         {%- set content = message.content %}
@@ -31,24 +23,7 @@
     {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
         {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
     {%- elif message.role == "assistant" %}
-        {%- set reasoning_content = '' %}
-        {%- if message.reasoning_content is string %}
-            {%- set reasoning_content = message.reasoning_content %}
-        {%- else %}
-            {%- if '</think>' in content %}
-                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
-                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
-            {%- endif %}
-        {%- endif %}
-        {%- if loop.index0 > ns.last_query_index %}
-            {%- if loop.last or (not loop.last and reasoning_content) %}
-                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
-            {%- else %}
-                {{- '<|im_start|>' + message.role + '\n' + content }}
-            {%- endif %}
-        {%- else %}
-            {{- '<|im_start|>' + message.role + '\n' + content }}
-        {%- endif %}
         {%- if message.tool_calls %}
             {%- for tool_call in message.tool_calls %}
                 {%- if (loop.first and content) or (not loop.first) %}
@@ -83,7 +58,4 @@
 {%- endfor %}
 {%- if add_generation_prompt %}
     {{- '<|im_start|>assistant\n' }}
-    {%- if enable_thinking is defined and enable_thinking is false %}
-        {{- '<think>\n\n</think>\n\n' }}
-    {%- endif %}
 {%- endif %}

         {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
     {%- endif %}
 {%- endif %}
 {%- for message in messages %}
     {%- if message.content is string %}
         {%- set content = message.content %}
     {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
         {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
     {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + content }}
         {%- if message.tool_calls %}
             {%- for tool_call in message.tool_calls %}
                 {%- if (loop.first and content) or (not loop.first) %}
 {%- endfor %}
 {%- if add_generation_prompt %}
     {{- '<|im_start|>assistant\n' }}
 {%- endif %}

config.json CHANGED Viewed

@@ -62,8 +62,7 @@
   "sliding_window": null,
   "tie_word_embeddings": true,
   "transformers_version": "4.57.6",
-  "use_cache": false,
-  "use_grouped_mm": true,
   "use_sliding_window": false,
   "vocab_size": 151936
 }

   "sliding_window": null,
   "tie_word_embeddings": true,
   "transformers_version": "4.57.6",
+  "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936
 }

generation_config.json CHANGED Viewed

@@ -1,7 +1,13 @@
 {
-  "_from_model_config": true,
   "bos_token_id": 151643,
-  "eos_token_id": 151645,
-  "transformers_version": "4.57.6",
-  "use_cache": false
 }

 {
   "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.57.6"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:949954f3302e7ab4caf2de4013c9ebad1fdc2f38fe2e72c129e71d0cc246abc1
 size 4967215360

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a293ad1eb7582a9952ba718715ef0279ff9fb1fafdd0323be5a39b6b41681da
 size 4967215360

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:855cbe6db908112bd93332148ae36a2a66d7c7b3f2049161436dfd19ab45a634
-size 3855679144

 version https://git-lfs.github.com/spec/v1
+oid sha256:b27542d14ea021be97b7fbd191c41b5984be13071958bd73c8f1089bdf1ff523
+size 3077766632

model.safetensors.index.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "metadata": {
-    "total_size": 8822848512
   },
   "weight_map": {
-    "lm_head.weight": "model-00002-of-00002.safetensors",
     "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
     "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
     "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",

 {
   "metadata": {
+    "total_parameters": 4022468096,
+    "total_size": 8044936192
   },
   "weight_map": {
     "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
     "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
     "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",

special_tokens_map.json CHANGED Viewed

@@ -21,5 +21,11 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<|im_end|>"
 }

     "rstrip": false,
     "single_word": false
   },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -231,8 +231,8 @@
   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
-  "model_max_length": 131072,
-  "pad_token": "<|im_end|>",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null

   "eos_token": "<|im_end|>",
   "errors": "replace",
   "extra_special_tokens": {},
+  "model_max_length": 1010000,
+  "pad_token": "<|endoftext|>",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null