Upload folder using huggingface_hub
Browse files- README.md +3 -24
- chat_template.jinja +1 -29
- config.json +1 -2
- generation_config.json +10 -4
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +2 -2
- model.safetensors.index.json +2 -2
- special_tokens_map.json +7 -1
- tokenizer_config.json +2 -2
README.md
CHANGED
|
@@ -10,38 +10,17 @@ tags:
|
|
| 10 |
|
| 11 |
# Qwen3-4B-MedMCQA-RL
|
| 12 |
|
| 13 |
-
Qwen3-4B fine-tuned with RL on MedMCQA for medical multiple choice QA
|
| 14 |
|
| 15 |
## Model Details
|
| 16 |
|
| 17 |
- **Base Model**: [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507)
|
| 18 |
-
- **Training Method**: Reinforcement Learning (GRPO)
|
| 19 |
- **Framework**: [verifiers](https://github.com/willieneis/verifiers) + [prime-rl](https://github.com/PRIME-RL/PRIME-RL)
|
| 20 |
|
| 21 |
-
## Training Data
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
This model was trained on **MedMCQA**, a large-scale multiple choice question dataset
|
| 25 |
-
covering various medical topics from AIIMS/NEET PG entrance exams.
|
| 26 |
-
|
| 27 |
-
|
| 28 |
## Usage
|
| 29 |
|
| 30 |
-
|
| 31 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 32 |
-
|
| 33 |
-
model = AutoModelForCausalLM.from_pretrained("nsk7153/Qwen3-4B-MedMCQA-RL")
|
| 34 |
-
tokenizer = AutoTokenizer.from_pretrained("nsk7153/Qwen3-4B-MedMCQA-RL")
|
| 35 |
-
|
| 36 |
-
# Example usage
|
| 37 |
-
messages = [
|
| 38 |
-
{"role": "user", "content": "Your medical question here"}
|
| 39 |
-
]
|
| 40 |
-
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 41 |
-
inputs = tokenizer(text, return_tensors="pt")
|
| 42 |
-
outputs = model.generate(**inputs, max_new_tokens=512)
|
| 43 |
-
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 44 |
-
```
|
| 45 |
|
| 46 |
## License
|
| 47 |
|
|
|
|
| 10 |
|
| 11 |
# Qwen3-4B-MedMCQA-RL
|
| 12 |
|
| 13 |
+
Qwen3-4B fine-tuned with RL on MedMCQA for medical multiple choice QA. LoRA weights properly merged.
|
| 14 |
|
| 15 |
## Model Details
|
| 16 |
|
| 17 |
- **Base Model**: [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507)
|
| 18 |
+
- **Training Method**: Reinforcement Learning (GRPO) with LoRA
|
| 19 |
- **Framework**: [verifiers](https://github.com/willieneis/verifiers) + [prime-rl](https://github.com/PRIME-RL/PRIME-RL)
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
## Usage
|
| 22 |
|
| 23 |
+
Please ask your administrator.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
## License
|
| 26 |
|
chat_template.jinja
CHANGED
|
@@ -14,14 +14,6 @@
|
|
| 14 |
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
{%- endif %}
|
| 16 |
{%- endif %}
|
| 17 |
-
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 18 |
-
{%- for message in messages[::-1] %}
|
| 19 |
-
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 20 |
-
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
| 21 |
-
{%- set ns.multi_step_tool = false %}
|
| 22 |
-
{%- set ns.last_query_index = index %}
|
| 23 |
-
{%- endif %}
|
| 24 |
-
{%- endfor %}
|
| 25 |
{%- for message in messages %}
|
| 26 |
{%- if message.content is string %}
|
| 27 |
{%- set content = message.content %}
|
|
@@ -31,24 +23,7 @@
|
|
| 31 |
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 32 |
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 33 |
{%- elif message.role == "assistant" %}
|
| 34 |
-
{
|
| 35 |
-
{%- if message.reasoning_content is string %}
|
| 36 |
-
{%- set reasoning_content = message.reasoning_content %}
|
| 37 |
-
{%- else %}
|
| 38 |
-
{%- if '</think>' in content %}
|
| 39 |
-
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 40 |
-
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 41 |
-
{%- endif %}
|
| 42 |
-
{%- endif %}
|
| 43 |
-
{%- if loop.index0 > ns.last_query_index %}
|
| 44 |
-
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 45 |
-
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 46 |
-
{%- else %}
|
| 47 |
-
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 48 |
-
{%- endif %}
|
| 49 |
-
{%- else %}
|
| 50 |
-
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 51 |
-
{%- endif %}
|
| 52 |
{%- if message.tool_calls %}
|
| 53 |
{%- for tool_call in message.tool_calls %}
|
| 54 |
{%- if (loop.first and content) or (not loop.first) %}
|
|
@@ -83,7 +58,4 @@
|
|
| 83 |
{%- endfor %}
|
| 84 |
{%- if add_generation_prompt %}
|
| 85 |
{{- '<|im_start|>assistant\n' }}
|
| 86 |
-
{%- if enable_thinking is defined and enable_thinking is false %}
|
| 87 |
-
{{- '<think>\n\n</think>\n\n' }}
|
| 88 |
-
{%- endif %}
|
| 89 |
{%- endif %}
|
|
|
|
| 14 |
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
{%- endif %}
|
| 16 |
{%- endif %}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
{%- for message in messages %}
|
| 18 |
{%- if message.content is string %}
|
| 19 |
{%- set content = message.content %}
|
|
|
|
| 23 |
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 24 |
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 25 |
{%- elif message.role == "assistant" %}
|
| 26 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
{%- if message.tool_calls %}
|
| 28 |
{%- for tool_call in message.tool_calls %}
|
| 29 |
{%- if (loop.first and content) or (not loop.first) %}
|
|
|
|
| 58 |
{%- endfor %}
|
| 59 |
{%- if add_generation_prompt %}
|
| 60 |
{{- '<|im_start|>assistant\n' }}
|
|
|
|
|
|
|
|
|
|
| 61 |
{%- endif %}
|
config.json
CHANGED
|
@@ -62,8 +62,7 @@
|
|
| 62 |
"sliding_window": null,
|
| 63 |
"tie_word_embeddings": true,
|
| 64 |
"transformers_version": "4.57.6",
|
| 65 |
-
"use_cache":
|
| 66 |
-
"use_grouped_mm": true,
|
| 67 |
"use_sliding_window": false,
|
| 68 |
"vocab_size": 151936
|
| 69 |
}
|
|
|
|
| 62 |
"sliding_window": null,
|
| 63 |
"tie_word_embeddings": true,
|
| 64 |
"transformers_version": "4.57.6",
|
| 65 |
+
"use_cache": true,
|
|
|
|
| 66 |
"use_sliding_window": false,
|
| 67 |
"vocab_size": 151936
|
| 68 |
}
|
generation_config.json
CHANGED
|
@@ -1,7 +1,13 @@
|
|
| 1 |
{
|
| 2 |
-
"_from_model_config": true,
|
| 3 |
"bos_token_id": 151643,
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
}
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"temperature": 0.7,
|
| 10 |
+
"top_k": 20,
|
| 11 |
+
"top_p": 0.8,
|
| 12 |
+
"transformers_version": "4.57.6"
|
| 13 |
}
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4967215360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a293ad1eb7582a9952ba718715ef0279ff9fb1fafdd0323be5a39b6b41681da
|
| 3 |
size 4967215360
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b27542d14ea021be97b7fbd191c41b5984be13071958bd73c8f1089bdf1ff523
|
| 3 |
+
size 3077766632
|
model.safetensors.index.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"
|
|
|
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
-
"lm_head.weight": "model-00002-of-00002.safetensors",
|
| 7 |
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
| 8 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 9 |
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_parameters": 4022468096,
|
| 4 |
+
"total_size": 8044936192
|
| 5 |
},
|
| 6 |
"weight_map": {
|
|
|
|
| 7 |
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
| 8 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
| 9 |
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
special_tokens_map.json
CHANGED
|
@@ -21,5 +21,11 @@
|
|
| 21 |
"rstrip": false,
|
| 22 |
"single_word": false
|
| 23 |
},
|
| 24 |
-
"pad_token":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
}
|
|
|
|
| 21 |
"rstrip": false,
|
| 22 |
"single_word": false
|
| 23 |
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
}
|
tokenizer_config.json
CHANGED
|
@@ -231,8 +231,8 @@
|
|
| 231 |
"eos_token": "<|im_end|>",
|
| 232 |
"errors": "replace",
|
| 233 |
"extra_special_tokens": {},
|
| 234 |
-
"model_max_length":
|
| 235 |
-
"pad_token": "<|
|
| 236 |
"split_special_tokens": false,
|
| 237 |
"tokenizer_class": "Qwen2Tokenizer",
|
| 238 |
"unk_token": null
|
|
|
|
| 231 |
"eos_token": "<|im_end|>",
|
| 232 |
"errors": "replace",
|
| 233 |
"extra_special_tokens": {},
|
| 234 |
+
"model_max_length": 1010000,
|
| 235 |
+
"pad_token": "<|endoftext|>",
|
| 236 |
"split_special_tokens": false,
|
| 237 |
"tokenizer_class": "Qwen2Tokenizer",
|
| 238 |
"unk_token": null
|