Leeyuyu commited on
Commit
fa34124
·
verified ·
1 Parent(s): 0d26a6b

Model save

Browse files
Files changed (4) hide show
  1. README.md +2 -5
  2. all_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +82 -82
README.md CHANGED
@@ -1,11 +1,8 @@
1
  ---
2
- datasets: Leeyuyu/fundo_600
3
  library_name: transformers
4
  model_name: Qwen2.5-SFT2-GRPO-fundo-nothink
5
  tags:
6
  - generated_from_trainer
7
- - R1-V
8
- - balanced-filtered-0-2-100pct-others-20pct
9
  - trl
10
  - sft
11
  licence: license
@@ -13,7 +10,7 @@ licence: license
13
 
14
  # Model Card for Qwen2.5-SFT2-GRPO-fundo-nothink
15
 
16
- This model is a fine-tuned version of [None](https://huggingface.co/None) on the [Leeyuyu/fundo_600](https://huggingface.co/datasets/Leeyuyu/fundo_600) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +26,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/brightlight720720_lee/huggingface/runs/tc404qff)
33
 
34
 
35
  This model was trained with SFT.
 
1
  ---
 
2
  library_name: transformers
3
  model_name: Qwen2.5-SFT2-GRPO-fundo-nothink
4
  tags:
5
  - generated_from_trainer
 
 
6
  - trl
7
  - sft
8
  licence: license
 
10
 
11
  # Model Card for Qwen2.5-SFT2-GRPO-fundo-nothink
12
 
13
+ This model is a fine-tuned version of [None](https://huggingface.co/None).
14
  It has been trained using [TRL](https://github.com/huggingface/trl).
15
 
16
  ## Quick start
 
26
 
27
  ## Training procedure
28
 
29
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/brightlight720720_lee/huggingface/runs/womcol2v)
30
 
31
 
32
  This model was trained with SFT.
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 6.191745166684979e+16,
4
- "train_loss": 2.359536478152642,
5
- "train_runtime": 593.7041,
6
  "train_samples": 1661,
7
- "train_samples_per_second": 2.798,
8
  "train_steps_per_second": 0.044
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 6.212373250362573e+16,
4
+ "train_loss": 1.2203706781594799,
5
+ "train_runtime": 584.4858,
6
  "train_samples": 1661,
7
+ "train_samples_per_second": 2.842,
8
  "train_steps_per_second": 0.044
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 6.191745166684979e+16,
4
- "train_loss": 2.359536478152642,
5
- "train_runtime": 593.7041,
6
  "train_samples": 1661,
7
- "train_samples_per_second": 2.798,
8
  "train_steps_per_second": 0.044
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 6.212373250362573e+16,
4
+ "train_loss": 1.2203706781594799,
5
+ "train_runtime": 584.4858,
6
  "train_samples": 1661,
7
+ "train_samples_per_second": 2.842,
8
  "train_steps_per_second": 0.044
9
  }
trainer_state.json CHANGED
@@ -10,193 +10,193 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.038461538461538464,
13
- "grad_norm": 779.4563598632812,
14
- "learning_rate": 1.5e-06,
15
- "loss": 6.8089,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.07692307692307693,
20
- "grad_norm": 779.59716796875,
21
- "learning_rate": 3e-06,
22
- "loss": 6.8084,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.11538461538461539,
27
- "grad_norm": 764.6455688476562,
28
- "learning_rate": 2.9871672920607156e-06,
29
- "loss": 6.6899,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.15384615384615385,
34
- "grad_norm": 647.0328369140625,
35
- "learning_rate": 2.9488887394336023e-06,
36
- "loss": 5.3962,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.19230769230769232,
41
- "grad_norm": 602.569091796875,
42
- "learning_rate": 2.88581929876693e-06,
43
- "loss": 5.056,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.23076923076923078,
48
- "grad_norm": 269.05499267578125,
49
- "learning_rate": 2.7990381056766585e-06,
50
- "loss": 2.8926,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.2692307692307692,
55
- "grad_norm": 197.8215789794922,
56
- "learning_rate": 2.690030010436853e-06,
57
- "loss": 2.6136,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.3076923076923077,
62
- "grad_norm": 17.3836727142334,
63
- "learning_rate": 2.5606601717798212e-06,
64
- "loss": 1.9375,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.34615384615384615,
69
- "grad_norm": 16.178003311157227,
70
- "learning_rate": 2.4131421435130812e-06,
71
- "loss": 1.8646,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 0.38461538461538464,
76
- "grad_norm": 14.688905715942383,
77
- "learning_rate": 2.25e-06,
78
- "loss": 1.803,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 0.4230769230769231,
83
- "grad_norm": 13.383400917053223,
84
- "learning_rate": 2.074025148547635e-06,
85
- "loss": 1.7388,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 0.46153846153846156,
90
- "grad_norm": 12.454666137695312,
91
- "learning_rate": 1.888228567653781e-06,
92
- "loss": 1.6809,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 0.5,
97
- "grad_norm": 11.727381706237793,
98
- "learning_rate": 1.6957892883300778e-06,
99
- "loss": 1.6406,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 0.5384615384615384,
104
- "grad_norm": 11.150338172912598,
105
- "learning_rate": 1.5e-06,
106
- "loss": 1.5987,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 0.5769230769230769,
111
- "grad_norm": 9.359735488891602,
112
- "learning_rate": 1.304210711669923e-06,
113
- "loss": 1.3709,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 0.6153846153846154,
118
- "grad_norm": 8.161463737487793,
119
- "learning_rate": 1.1117714323462188e-06,
120
- "loss": 1.2417,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 0.6538461538461539,
125
- "grad_norm": 6.2837347984313965,
126
- "learning_rate": 9.259748514523654e-07,
127
- "loss": 1.1567,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 0.6923076923076923,
132
- "grad_norm": 6.563652992248535,
133
- "learning_rate": 7.500000000000003e-07,
134
- "loss": 1.1046,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 0.7307692307692307,
139
- "grad_norm": 7.914792060852051,
140
- "learning_rate": 5.868578564869191e-07,
141
- "loss": 1.0653,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 0.7692307692307693,
146
- "grad_norm": 6.446779251098633,
147
- "learning_rate": 4.3933982822017883e-07,
148
- "loss": 1.0332,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 0.8076923076923077,
153
- "grad_norm": 6.5320000648498535,
154
- "learning_rate": 3.0996998956314745e-07,
155
- "loss": 1.007,
156
  "step": 21
157
  },
158
  {
159
  "epoch": 0.8461538461538461,
160
- "grad_norm": 5.943300247192383,
161
- "learning_rate": 2.0096189432334195e-07,
162
- "loss": 0.9884,
163
  "step": 22
164
  },
165
  {
166
  "epoch": 0.8846153846153846,
167
- "grad_norm": 5.847461700439453,
168
- "learning_rate": 1.141807012330699e-07,
169
- "loss": 0.9728,
170
  "step": 23
171
  },
172
  {
173
  "epoch": 0.9230769230769231,
174
- "grad_norm": 5.818417549133301,
175
- "learning_rate": 5.11112605663977e-08,
176
- "loss": 0.9635,
177
  "step": 24
178
  },
179
  {
180
  "epoch": 0.9615384615384616,
181
- "grad_norm": 5.898157119750977,
182
- "learning_rate": 1.2832707939284426e-08,
183
- "loss": 0.9585,
184
  "step": 25
185
  },
186
  {
187
  "epoch": 1.0,
188
- "grad_norm": 6.020485877990723,
189
  "learning_rate": 0.0,
190
- "loss": 0.9556,
191
  "step": 26
192
  },
193
  {
194
  "epoch": 1.0,
195
  "step": 26,
196
- "total_flos": 6.191745166684979e+16,
197
- "train_loss": 2.359536478152642,
198
- "train_runtime": 593.7041,
199
- "train_samples_per_second": 2.798,
200
  "train_steps_per_second": 0.044
201
  }
202
  ],
@@ -217,7 +217,7 @@
217
  "attributes": {}
218
  }
219
  },
220
- "total_flos": 6.191745166684979e+16,
221
  "train_batch_size": 4,
222
  "trial_name": null,
223
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.038461538461538464,
13
+ "grad_norm": 775.9435424804688,
14
+ "learning_rate": 3.3333333333333333e-06,
15
+ "loss": 6.788,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.07692307692307693,
20
+ "grad_norm": 776.0359497070312,
21
+ "learning_rate": 6.666666666666667e-06,
22
+ "loss": 6.7875,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.11538461538461539,
27
+ "grad_norm": 741.0389404296875,
28
+ "learning_rate": 1e-05,
29
+ "loss": 6.3773,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.15384615384615385,
34
+ "grad_norm": 272.2483825683594,
35
+ "learning_rate": 9.953429730181653e-06,
36
+ "loss": 2.9091,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.19230769230769232,
41
+ "grad_norm": 17.248239517211914,
42
+ "learning_rate": 9.814586436738998e-06,
43
+ "loss": 1.9227,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.23076923076923078,
48
+ "grad_norm": 13.99523639678955,
49
+ "learning_rate": 9.586056507527266e-06,
50
+ "loss": 1.7623,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.2692307692307692,
55
+ "grad_norm": 7.265631675720215,
56
+ "learning_rate": 9.272097022732444e-06,
57
+ "loss": 1.3012,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.3076923076923077,
62
+ "grad_norm": 6.176580429077148,
63
+ "learning_rate": 8.8785564535221e-06,
64
+ "loss": 1.1133,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.34615384615384615,
69
+ "grad_norm": 13.723352432250977,
70
+ "learning_rate": 8.412765716093273e-06,
71
+ "loss": 0.9558,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 0.38461538461538464,
76
+ "grad_norm": 7.0516743659973145,
77
+ "learning_rate": 7.883401610574338e-06,
78
+ "loss": 0.6683,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 0.4230769230769231,
83
+ "grad_norm": 4.867112636566162,
84
+ "learning_rate": 7.300325188655762e-06,
85
+ "loss": 0.3948,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 0.46153846153846156,
90
+ "grad_norm": 4.434175491333008,
91
+ "learning_rate": 6.674398060854931e-06,
92
+ "loss": 0.2383,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 0.5,
97
+ "grad_norm": 6.5230712890625,
98
+ "learning_rate": 6.0172800652631706e-06,
99
+ "loss": 0.12,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 0.5384615384615384,
104
+ "grad_norm": 1.5070806741714478,
105
+ "learning_rate": 5.341212066823356e-06,
106
+ "loss": 0.0766,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 0.5769230769230769,
111
+ "grad_norm": 0.9546927809715271,
112
+ "learning_rate": 4.6587879331766465e-06,
113
+ "loss": 0.0501,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 0.6153846153846154,
118
+ "grad_norm": 1.0180060863494873,
119
+ "learning_rate": 3.982719934736832e-06,
120
+ "loss": 0.0352,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 0.6538461538461539,
125
+ "grad_norm": 0.9739271998405457,
126
+ "learning_rate": 3.3256019391450696e-06,
127
+ "loss": 0.0303,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 0.6923076923076923,
132
+ "grad_norm": 0.6057882905006409,
133
+ "learning_rate": 2.6996748113442397e-06,
134
+ "loss": 0.0273,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 0.7307692307692307,
139
+ "grad_norm": 1.0546596050262451,
140
+ "learning_rate": 2.1165983894256647e-06,
141
+ "loss": 0.0248,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 0.7692307692307693,
146
+ "grad_norm": 1.1213421821594238,
147
+ "learning_rate": 1.5872342839067305e-06,
148
+ "loss": 0.0232,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 0.8076923076923077,
153
+ "grad_norm": 0.8204516172409058,
154
+ "learning_rate": 1.1214435464779006e-06,
155
+ "loss": 0.0218,
156
  "step": 21
157
  },
158
  {
159
  "epoch": 0.8461538461538461,
160
+ "grad_norm": 0.41814181208610535,
161
+ "learning_rate": 7.279029772675572e-07,
162
+ "loss": 0.0209,
163
  "step": 22
164
  },
165
  {
166
  "epoch": 0.8846153846153846,
167
+ "grad_norm": 0.59604811668396,
168
+ "learning_rate": 4.139434924727359e-07,
169
+ "loss": 0.0207,
170
  "step": 23
171
  },
172
  {
173
  "epoch": 0.9230769230769231,
174
+ "grad_norm": 0.43155673146247864,
175
+ "learning_rate": 1.8541356326100436e-07,
176
+ "loss": 0.0202,
177
  "step": 24
178
  },
179
  {
180
  "epoch": 0.9615384615384616,
181
+ "grad_norm": 0.3965984284877777,
182
+ "learning_rate": 4.657026981834623e-08,
183
+ "loss": 0.0198,
184
  "step": 25
185
  },
186
  {
187
  "epoch": 1.0,
188
+ "grad_norm": 0.37188079953193665,
189
  "learning_rate": 0.0,
190
+ "loss": 0.0199,
191
  "step": 26
192
  },
193
  {
194
  "epoch": 1.0,
195
  "step": 26,
196
+ "total_flos": 6.212373250362573e+16,
197
+ "train_loss": 1.2203706781594799,
198
+ "train_runtime": 584.4858,
199
+ "train_samples_per_second": 2.842,
200
  "train_steps_per_second": 0.044
201
  }
202
  ],
 
217
  "attributes": {}
218
  }
219
  },
220
+ "total_flos": 6.212373250362573e+16,
221
  "train_batch_size": 4,
222
  "trial_name": null,
223
  "trial_params": null