aleegis commited on
Commit
482f343
·
verified ·
1 Parent(s): 1ff19f9

Training in progress, step 8, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0f423296eed412a43a6481cc745dcdec555ea53d6a28376d4645a1ddd079536
3
  size 114106856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cc1bf0775452c66e630907c0e30a2edcc8c2a5d2c9f91001642fe44b130a208
3
  size 114106856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d20f7a58ec447cb248229527094c1e040e4abf7a30a7ba96e36ced2acf6a1f
3
  size 228544802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e2cc1980a0a5851ee298663f56069e8c2ba79eb97a2420ff5b5e271fb0e040a
3
  size 228544802
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4521c23099d58f17393188663381aebb51989cdce90e8f78a53101aa7f6762a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58a3bdcb3b3e0e2a18613287482ebe97a8b7d43a03e373aecb6a3083d36f67a
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d02c348e1816565b2b6442fb1f80408b839c89e2b875b0885f460fddc30d428
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878a311f455e5117765ebeb146d16abff0a6a13e00edc4f985ce3286b8b98242
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10b39575c844adc9b7f3b21dc61653deec5e4385e356ed923c053dd3a0af28ef
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:655f7670674eff250d65759fdd0dd7eb51e5705f842e7551d1b905e68127ee52
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f471cb69d8974507a8527c48ece168be306785671e9d2af2ae0093ad41d8c082
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f82d2f4765d9425a2cb42fb7f6a8ae64af504a87ac91ca4331ebf249fd3839c
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:deeae4cbb5fbf846924eaccf1a5361e183e579bf055c633f1be19df2176453f3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e3dc77712dffcf9da4464c25cdb4bb065d19be4a2fe275a6094e5e4b14cf84
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe01295183e1d4fc411b2626b446c42dc2ab5b7d50aa5625d94b209333edbc96
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ab104e8f1584a8b309e132c94b4e0a79cd51a31f9242b65a990050d441b09f1
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0201222966c8920f6720ff14467f45bab83af1fa44b5ac0c24f2dc10a07b7078
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abaebd55038ac3025a94ee8437398077c9b2ba8c28ec24464510da614d10bbf3
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:005a56237a167c99429d1870b7a8b9b818fd0bf3ab564f87a3c5bbdd83dfaa82
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7542770e76e460f3b4410b628150a5d74d2365cb698455d841f6190d3c07e8f1
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abf9c45e6f0130e9e72ce7a69d6f7581163c5f801eab4ae35fb91d5270488ffe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:186ecb89c57722d40d2724a31a3f7415875659b15c27a352155664ea67992ca3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.01444043321299639,
6
  "eval_steps": 500,
7
- "global_step": 4,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -108,11 +108,111 @@
108
  "rewards/reward_short_completions/mean": -1054.09375,
109
  "rewards/reward_short_completions/std": 286.53179931640625,
110
  "step": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  }
112
  ],
113
  "logging_steps": 1,
114
  "max_steps": 20,
115
- "num_input_tokens_seen": 34300,
116
  "num_train_epochs": 1,
117
  "save_steps": 4,
118
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.02888086642599278,
6
  "eval_steps": 500,
7
+ "global_step": 8,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
108
  "rewards/reward_short_completions/mean": -1054.09375,
109
  "rewards/reward_short_completions/std": 286.53179931640625,
110
  "step": 4
111
+ },
112
+ {
113
+ "clip_ratio/high_max": 0.0,
114
+ "clip_ratio/high_mean": 0.0,
115
+ "clip_ratio/low_mean": 0.0,
116
+ "clip_ratio/low_min": 0.0,
117
+ "clip_ratio/region_mean": 0.0,
118
+ "completions/clipped_ratio": 1.0,
119
+ "completions/max_length": 256.0,
120
+ "completions/max_terminated_length": 0.0,
121
+ "completions/mean_length": 256.0,
122
+ "completions/mean_terminated_length": 0.0,
123
+ "completions/min_length": 256.0,
124
+ "completions/min_terminated_length": 0.0,
125
+ "epoch": 0.018050541516245487,
126
+ "grad_norm": 0.17358577251434326,
127
+ "kl": 0.0008631395467091352,
128
+ "learning_rate": 9.698463103929542e-05,
129
+ "loss": 0.0,
130
+ "num_tokens": 43368.0,
131
+ "reward": -3253.4951171875,
132
+ "reward_std": 581.868408203125,
133
+ "rewards/reward_short_completions/mean": -1086.59375,
134
+ "rewards/reward_short_completions/std": 254.26222229003906,
135
+ "step": 5
136
+ },
137
+ {
138
+ "clip_ratio/high_max": 0.0,
139
+ "clip_ratio/high_mean": 0.0,
140
+ "clip_ratio/low_mean": 0.0,
141
+ "clip_ratio/low_min": 0.0,
142
+ "clip_ratio/region_mean": 0.0,
143
+ "completions/clipped_ratio": 0.8125,
144
+ "completions/max_length": 256.0,
145
+ "completions/max_terminated_length": 252.0,
146
+ "completions/mean_length": 234.09375,
147
+ "completions/mean_terminated_length": 139.1666717529297,
148
+ "completions/min_length": 2.0,
149
+ "completions/min_terminated_length": 2.0,
150
+ "epoch": 0.021660649819494584,
151
+ "grad_norm": 0.17494061589241028,
152
+ "kl": 0.0012313149636611342,
153
+ "learning_rate": 9.330127018922194e-05,
154
+ "loss": 0.1467,
155
+ "num_tokens": 51739.0,
156
+ "reward": -3192.11376953125,
157
+ "reward_std": 729.4127197265625,
158
+ "rewards/reward_short_completions/mean": -1066.09375,
159
+ "rewards/reward_short_completions/std": 342.6435546875,
160
+ "step": 6
161
+ },
162
+ {
163
+ "clip_ratio/high_max": 0.0,
164
+ "clip_ratio/high_mean": 0.0,
165
+ "clip_ratio/low_mean": 0.0,
166
+ "clip_ratio/low_min": 0.0,
167
+ "clip_ratio/region_mean": 0.0,
168
+ "completions/clipped_ratio": 0.84375,
169
+ "completions/max_length": 256.0,
170
+ "completions/max_terminated_length": 198.0,
171
+ "completions/mean_length": 230.5,
172
+ "completions/mean_terminated_length": 92.80000305175781,
173
+ "completions/min_length": 26.0,
174
+ "completions/min_terminated_length": 26.0,
175
+ "epoch": 0.02527075812274368,
176
+ "grad_norm": 0.19088327884674072,
177
+ "kl": 0.0013786845956929028,
178
+ "learning_rate": 8.83022221559489e-05,
179
+ "loss": 0.1449,
180
+ "num_tokens": 60243.0,
181
+ "reward": -2963.8046875,
182
+ "reward_std": 874.9676513671875,
183
+ "rewards/reward_short_completions/mean": -989.84375,
184
+ "rewards/reward_short_completions/std": 346.3221435546875,
185
+ "step": 7
186
+ },
187
+ {
188
+ "clip_ratio/high_max": 0.0,
189
+ "clip_ratio/high_mean": 0.0,
190
+ "clip_ratio/low_mean": 0.0,
191
+ "clip_ratio/low_min": 0.0,
192
+ "clip_ratio/region_mean": 0.0,
193
+ "completions/clipped_ratio": 0.84375,
194
+ "completions/max_length": 256.0,
195
+ "completions/max_terminated_length": 242.0,
196
+ "completions/mean_length": 234.3125,
197
+ "completions/mean_terminated_length": 117.20000457763672,
198
+ "completions/min_length": 2.0,
199
+ "completions/min_terminated_length": 2.0,
200
+ "epoch": 0.02888086642599278,
201
+ "grad_norm": 0.19052405655384064,
202
+ "kl": 0.0019435517024248838,
203
+ "learning_rate": 8.213938048432697e-05,
204
+ "loss": 0.1222,
205
+ "num_tokens": 68921.0,
206
+ "reward": -3151.4111328125,
207
+ "reward_std": 903.66650390625,
208
+ "rewards/reward_short_completions/mean": -1052.5,
209
+ "rewards/reward_short_completions/std": 332.7859802246094,
210
+ "step": 8
211
  }
212
  ],
213
  "logging_steps": 1,
214
  "max_steps": 20,
215
+ "num_input_tokens_seen": 68921,
216
  "num_train_epochs": 1,
217
  "save_steps": 4,
218
  "stateful_callbacks": {