nsk7153 commited on
Commit
98a18ed
·
verified ·
1 Parent(s): dce48f1

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -10,38 +10,17 @@ tags:
10
 
11
  # Qwen3-4B-MedMCQA-RL
12
 
13
- Qwen3-4B fine-tuned with RL on MedMCQA for medical multiple choice QA
14
 
15
  ## Model Details
16
 
17
  - **Base Model**: [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507)
18
- - **Training Method**: Reinforcement Learning (GRPO)
19
  - **Framework**: [verifiers](https://github.com/willieneis/verifiers) + [prime-rl](https://github.com/PRIME-RL/PRIME-RL)
20
 
21
- ## Training Data
22
-
23
-
24
- This model was trained on **MedMCQA**, a large-scale multiple choice question dataset
25
- covering various medical topics from AIIMS/NEET PG entrance exams.
26
-
27
-
28
  ## Usage
29
 
30
- ```python
31
- from transformers import AutoModelForCausalLM, AutoTokenizer
32
-
33
- model = AutoModelForCausalLM.from_pretrained("nsk7153/Qwen3-4B-MedMCQA-RL")
34
- tokenizer = AutoTokenizer.from_pretrained("nsk7153/Qwen3-4B-MedMCQA-RL")
35
-
36
- # Example usage
37
- messages = [
38
- {"role": "user", "content": "Your medical question here"}
39
- ]
40
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
41
- inputs = tokenizer(text, return_tensors="pt")
42
- outputs = model.generate(**inputs, max_new_tokens=512)
43
- print(tokenizer.decode(outputs[0], skip_special_tokens=True))
44
- ```
45
 
46
  ## License
47
 
 
10
 
11
  # Qwen3-4B-MedMCQA-RL
12
 
13
+ Qwen3-4B fine-tuned with RL on MedMCQA for medical multiple choice QA. LoRA weights properly merged.
14
 
15
  ## Model Details
16
 
17
  - **Base Model**: [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507)
18
+ - **Training Method**: Reinforcement Learning (GRPO) with LoRA
19
  - **Framework**: [verifiers](https://github.com/willieneis/verifiers) + [prime-rl](https://github.com/PRIME-RL/PRIME-RL)
20
 
 
 
 
 
 
 
 
21
  ## Usage
22
 
23
+ Please ask your administrator.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  ## License
26
 
chat_template.jinja CHANGED
@@ -14,14 +14,6 @@
14
  {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
  {%- endif %}
16
  {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
- {%- set ns.multi_step_tool = false %}
22
- {%- set ns.last_query_index = index %}
23
- {%- endif %}
24
- {%- endfor %}
25
  {%- for message in messages %}
26
  {%- if message.content is string %}
27
  {%- set content = message.content %}
@@ -31,24 +23,7 @@
31
  {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
  {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
  {%- elif message.role == "assistant" %}
34
- {%- set reasoning_content = '' %}
35
- {%- if message.reasoning_content is string %}
36
- {%- set reasoning_content = message.reasoning_content %}
37
- {%- else %}
38
- {%- if '</think>' in content %}
39
- {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
- {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
- {%- endif %}
42
- {%- endif %}
43
- {%- if loop.index0 > ns.last_query_index %}
44
- {%- if loop.last or (not loop.last and reasoning_content) %}
45
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
- {%- else %}
47
- {{- '<|im_start|>' + message.role + '\n' + content }}
48
- {%- endif %}
49
- {%- else %}
50
- {{- '<|im_start|>' + message.role + '\n' + content }}
51
- {%- endif %}
52
  {%- if message.tool_calls %}
53
  {%- for tool_call in message.tool_calls %}
54
  {%- if (loop.first and content) or (not loop.first) %}
@@ -83,7 +58,4 @@
83
  {%- endfor %}
84
  {%- if add_generation_prompt %}
85
  {{- '<|im_start|>assistant\n' }}
86
- {%- if enable_thinking is defined and enable_thinking is false %}
87
- {{- '<think>\n\n</think>\n\n' }}
88
- {%- endif %}
89
  {%- endif %}
 
14
  {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
  {%- endif %}
16
  {%- endif %}
 
 
 
 
 
 
 
 
17
  {%- for message in messages %}
18
  {%- if message.content is string %}
19
  {%- set content = message.content %}
 
23
  {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
24
  {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
25
  {%- elif message.role == "assistant" %}
26
+ {{- '<|im_start|>' + message.role + '\n' + content }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  {%- if message.tool_calls %}
28
  {%- for tool_call in message.tool_calls %}
29
  {%- if (loop.first and content) or (not loop.first) %}
 
58
  {%- endfor %}
59
  {%- if add_generation_prompt %}
60
  {{- '<|im_start|>assistant\n' }}
 
 
 
61
  {%- endif %}
config.json CHANGED
@@ -62,8 +62,7 @@
62
  "sliding_window": null,
63
  "tie_word_embeddings": true,
64
  "transformers_version": "4.57.6",
65
- "use_cache": false,
66
- "use_grouped_mm": true,
67
  "use_sliding_window": false,
68
  "vocab_size": 151936
69
  }
 
62
  "sliding_window": null,
63
  "tie_word_embeddings": true,
64
  "transformers_version": "4.57.6",
65
+ "use_cache": true,
 
66
  "use_sliding_window": false,
67
  "vocab_size": 151936
68
  }
generation_config.json CHANGED
@@ -1,7 +1,13 @@
1
  {
2
- "_from_model_config": true,
3
  "bos_token_id": 151643,
4
- "eos_token_id": 151645,
5
- "transformers_version": "4.57.6",
6
- "use_cache": false
 
 
 
 
 
 
 
7
  }
 
1
  {
 
2
  "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.7,
10
+ "top_k": 20,
11
+ "top_p": 0.8,
12
+ "transformers_version": "4.57.6"
13
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:949954f3302e7ab4caf2de4013c9ebad1fdc2f38fe2e72c129e71d0cc246abc1
3
  size 4967215360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a293ad1eb7582a9952ba718715ef0279ff9fb1fafdd0323be5a39b6b41681da
3
  size 4967215360
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:855cbe6db908112bd93332148ae36a2a66d7c7b3f2049161436dfd19ab45a634
3
- size 3855679144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27542d14ea021be97b7fbd191c41b5984be13071958bd73c8f1089bdf1ff523
3
+ size 3077766632
model.safetensors.index.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "metadata": {
3
- "total_size": 8822848512
 
4
  },
5
  "weight_map": {
6
- "lm_head.weight": "model-00002-of-00002.safetensors",
7
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
  "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
  "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 4022468096,
4
+ "total_size": 8044936192
5
  },
6
  "weight_map": {
 
7
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
  "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
  "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
special_tokens_map.json CHANGED
@@ -21,5 +21,11 @@
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
- "pad_token": "<|im_end|>"
 
 
 
 
 
 
25
  }
 
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
  }
tokenizer_config.json CHANGED
@@ -231,8 +231,8 @@
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
- "model_max_length": 131072,
235
- "pad_token": "<|im_end|>",
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",
238
  "unk_token": null
 
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
+ "model_max_length": 1010000,
235
+ "pad_token": "<|endoftext|>",
236
  "split_special_tokens": false,
237
  "tokenizer_class": "Qwen2Tokenizer",
238
  "unk_token": null