RobertoSonic commited on
Commit
5d8c4c8
·
verified ·
1 Parent(s): 7b44739

End of training

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/swinv2-base-patch4-window8-256](https://huggingface.co/microsoft/swinv2-base-patch4-window8-256) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.0038
22
- - Accuracy: 0.6923
23
 
24
  ## Model description
25
 
 
18
 
19
  This model is a fine-tuned version of [microsoft/swinv2-base-patch4-window8-256](https://huggingface.co/microsoft/swinv2-base-patch4-window8-256) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.8423
22
+ - Accuracy: 0.75
23
 
24
  ## Model description
25
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 41.869565217391305,
3
+ "eval_accuracy": 0.75,
4
+ "eval_loss": 0.842289388179779,
5
+ "eval_runtime": 1.5112,
6
+ "eval_samples_per_second": 34.41,
7
+ "eval_steps_per_second": 1.323,
8
+ "total_flos": 2.8899664857724355e+18,
9
+ "train_loss": 2.0296274548485167,
10
+ "train_runtime": 2115.3253,
11
+ "train_samples_per_second": 14.474,
12
+ "train_steps_per_second": 0.099
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 41.869565217391305,
3
+ "eval_accuracy": 0.75,
4
+ "eval_loss": 0.842289388179779,
5
+ "eval_runtime": 1.5112,
6
+ "eval_samples_per_second": 34.41,
7
+ "eval_steps_per_second": 1.323
8
+ }
runs/Jan21_00-03-14_feec14bb91f4/events.out.tfevents.1737419950.feec14bb91f4.14041.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f82675f2094806a6af5de6fea4cc6c4052d716668a34962ddffbb287d6f3c58
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 41.869565217391305,
3
+ "total_flos": 2.8899664857724355e+18,
4
+ "train_loss": 2.0296274548485167,
5
+ "train_runtime": 2115.3253,
6
+ "train_samples_per_second": 14.474,
7
+ "train_steps_per_second": 0.099
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,567 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.75,
3
+ "best_model_checkpoint": "swinv2-base-patch4-window8-256-dmae-humeda-DAV15/checkpoint-80",
4
+ "epoch": 41.869565217391305,
5
+ "eval_steps": 500,
6
+ "global_step": 210,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8695652173913043,
13
+ "eval_accuracy": 0.3076923076923077,
14
+ "eval_loss": 1.5972468852996826,
15
+ "eval_runtime": 1.8054,
16
+ "eval_samples_per_second": 28.802,
17
+ "eval_steps_per_second": 1.108,
18
+ "step": 5
19
+ },
20
+ {
21
+ "epoch": 1.8695652173913042,
22
+ "grad_norm": 10.893352508544922,
23
+ "learning_rate": 9.523809523809525e-06,
24
+ "loss": 6.7562,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 1.8695652173913042,
29
+ "eval_accuracy": 0.3076923076923077,
30
+ "eval_loss": 1.5356961488723755,
31
+ "eval_runtime": 1.4107,
32
+ "eval_samples_per_second": 36.861,
33
+ "eval_steps_per_second": 1.418,
34
+ "step": 10
35
+ },
36
+ {
37
+ "epoch": 2.869565217391304,
38
+ "eval_accuracy": 0.40384615384615385,
39
+ "eval_loss": 1.4953821897506714,
40
+ "eval_runtime": 1.4409,
41
+ "eval_samples_per_second": 36.088,
42
+ "eval_steps_per_second": 1.388,
43
+ "step": 15
44
+ },
45
+ {
46
+ "epoch": 3.869565217391304,
47
+ "grad_norm": 9.43877124786377,
48
+ "learning_rate": 1.904761904761905e-05,
49
+ "loss": 6.2842,
50
+ "step": 20
51
+ },
52
+ {
53
+ "epoch": 3.869565217391304,
54
+ "eval_accuracy": 0.34615384615384615,
55
+ "eval_loss": 1.461243987083435,
56
+ "eval_runtime": 1.6776,
57
+ "eval_samples_per_second": 30.997,
58
+ "eval_steps_per_second": 1.192,
59
+ "step": 20
60
+ },
61
+ {
62
+ "epoch": 4.869565217391305,
63
+ "eval_accuracy": 0.3269230769230769,
64
+ "eval_loss": 1.387509822845459,
65
+ "eval_runtime": 1.4405,
66
+ "eval_samples_per_second": 36.1,
67
+ "eval_steps_per_second": 1.388,
68
+ "step": 25
69
+ },
70
+ {
71
+ "epoch": 5.869565217391305,
72
+ "grad_norm": 13.647711753845215,
73
+ "learning_rate": 1.9888308262251286e-05,
74
+ "loss": 4.9858,
75
+ "step": 30
76
+ },
77
+ {
78
+ "epoch": 5.869565217391305,
79
+ "eval_accuracy": 0.34615384615384615,
80
+ "eval_loss": 1.3369739055633545,
81
+ "eval_runtime": 1.4598,
82
+ "eval_samples_per_second": 35.622,
83
+ "eval_steps_per_second": 1.37,
84
+ "step": 30
85
+ },
86
+ {
87
+ "epoch": 6.869565217391305,
88
+ "eval_accuracy": 0.4423076923076923,
89
+ "eval_loss": 1.2739052772521973,
90
+ "eval_runtime": 1.8216,
91
+ "eval_samples_per_second": 28.546,
92
+ "eval_steps_per_second": 1.098,
93
+ "step": 35
94
+ },
95
+ {
96
+ "epoch": 7.869565217391305,
97
+ "grad_norm": 18.52330207824707,
98
+ "learning_rate": 1.9505415489478293e-05,
99
+ "loss": 3.5596,
100
+ "step": 40
101
+ },
102
+ {
103
+ "epoch": 7.869565217391305,
104
+ "eval_accuracy": 0.4807692307692308,
105
+ "eval_loss": 1.1773827075958252,
106
+ "eval_runtime": 1.4754,
107
+ "eval_samples_per_second": 35.244,
108
+ "eval_steps_per_second": 1.356,
109
+ "step": 40
110
+ },
111
+ {
112
+ "epoch": 8.869565217391305,
113
+ "eval_accuracy": 0.4807692307692308,
114
+ "eval_loss": 1.121440052986145,
115
+ "eval_runtime": 1.4488,
116
+ "eval_samples_per_second": 35.893,
117
+ "eval_steps_per_second": 1.38,
118
+ "step": 45
119
+ },
120
+ {
121
+ "epoch": 9.869565217391305,
122
+ "grad_norm": 18.99748420715332,
123
+ "learning_rate": 1.8860495104301346e-05,
124
+ "loss": 2.6814,
125
+ "step": 50
126
+ },
127
+ {
128
+ "epoch": 9.869565217391305,
129
+ "eval_accuracy": 0.5192307692307693,
130
+ "eval_loss": 1.0998505353927612,
131
+ "eval_runtime": 1.5002,
132
+ "eval_samples_per_second": 34.663,
133
+ "eval_steps_per_second": 1.333,
134
+ "step": 50
135
+ },
136
+ {
137
+ "epoch": 10.869565217391305,
138
+ "eval_accuracy": 0.46153846153846156,
139
+ "eval_loss": 1.1773051023483276,
140
+ "eval_runtime": 1.8237,
141
+ "eval_samples_per_second": 28.513,
142
+ "eval_steps_per_second": 1.097,
143
+ "step": 55
144
+ },
145
+ {
146
+ "epoch": 11.869565217391305,
147
+ "grad_norm": 22.57859992980957,
148
+ "learning_rate": 1.7971325072229227e-05,
149
+ "loss": 2.3236,
150
+ "step": 60
151
+ },
152
+ {
153
+ "epoch": 11.869565217391305,
154
+ "eval_accuracy": 0.5192307692307693,
155
+ "eval_loss": 0.9873736500740051,
156
+ "eval_runtime": 1.8064,
157
+ "eval_samples_per_second": 28.786,
158
+ "eval_steps_per_second": 1.107,
159
+ "step": 60
160
+ },
161
+ {
162
+ "epoch": 12.869565217391305,
163
+ "eval_accuracy": 0.5,
164
+ "eval_loss": 1.1123789548873901,
165
+ "eval_runtime": 1.4402,
166
+ "eval_samples_per_second": 36.106,
167
+ "eval_steps_per_second": 1.389,
168
+ "step": 65
169
+ },
170
+ {
171
+ "epoch": 13.869565217391305,
172
+ "grad_norm": 28.97609519958496,
173
+ "learning_rate": 1.686241637868734e-05,
174
+ "loss": 1.8037,
175
+ "step": 70
176
+ },
177
+ {
178
+ "epoch": 13.869565217391305,
179
+ "eval_accuracy": 0.6538461538461539,
180
+ "eval_loss": 0.8936060667037964,
181
+ "eval_runtime": 1.4709,
182
+ "eval_samples_per_second": 35.353,
183
+ "eval_steps_per_second": 1.36,
184
+ "step": 70
185
+ },
186
+ {
187
+ "epoch": 14.869565217391305,
188
+ "eval_accuracy": 0.4423076923076923,
189
+ "eval_loss": 1.2063868045806885,
190
+ "eval_runtime": 1.855,
191
+ "eval_samples_per_second": 28.032,
192
+ "eval_steps_per_second": 1.078,
193
+ "step": 75
194
+ },
195
+ {
196
+ "epoch": 15.869565217391305,
197
+ "grad_norm": 27.648298263549805,
198
+ "learning_rate": 1.5564337355766412e-05,
199
+ "loss": 1.6474,
200
+ "step": 80
201
+ },
202
+ {
203
+ "epoch": 15.869565217391305,
204
+ "eval_accuracy": 0.75,
205
+ "eval_loss": 0.842289388179779,
206
+ "eval_runtime": 1.7992,
207
+ "eval_samples_per_second": 28.901,
208
+ "eval_steps_per_second": 1.112,
209
+ "step": 80
210
+ },
211
+ {
212
+ "epoch": 16.869565217391305,
213
+ "eval_accuracy": 0.6346153846153846,
214
+ "eval_loss": 1.0133720636367798,
215
+ "eval_runtime": 1.4225,
216
+ "eval_samples_per_second": 36.554,
217
+ "eval_steps_per_second": 1.406,
218
+ "step": 85
219
+ },
220
+ {
221
+ "epoch": 17.869565217391305,
222
+ "grad_norm": 20.62474250793457,
223
+ "learning_rate": 1.4112871031306118e-05,
224
+ "loss": 1.5505,
225
+ "step": 90
226
+ },
227
+ {
228
+ "epoch": 17.869565217391305,
229
+ "eval_accuracy": 0.6923076923076923,
230
+ "eval_loss": 0.8965399861335754,
231
+ "eval_runtime": 1.4632,
232
+ "eval_samples_per_second": 35.54,
233
+ "eval_steps_per_second": 1.367,
234
+ "step": 90
235
+ },
236
+ {
237
+ "epoch": 18.869565217391305,
238
+ "eval_accuracy": 0.6538461538461539,
239
+ "eval_loss": 0.9214943647384644,
240
+ "eval_runtime": 1.4703,
241
+ "eval_samples_per_second": 35.368,
242
+ "eval_steps_per_second": 1.36,
243
+ "step": 95
244
+ },
245
+ {
246
+ "epoch": 19.869565217391305,
247
+ "grad_norm": 31.890165328979492,
248
+ "learning_rate": 1.2548028728946548e-05,
249
+ "loss": 1.2697,
250
+ "step": 100
251
+ },
252
+ {
253
+ "epoch": 19.869565217391305,
254
+ "eval_accuracy": 0.6153846153846154,
255
+ "eval_loss": 1.0155353546142578,
256
+ "eval_runtime": 1.4323,
257
+ "eval_samples_per_second": 36.306,
258
+ "eval_steps_per_second": 1.396,
259
+ "step": 100
260
+ },
261
+ {
262
+ "epoch": 20.869565217391305,
263
+ "eval_accuracy": 0.7115384615384616,
264
+ "eval_loss": 0.8500422835350037,
265
+ "eval_runtime": 1.4418,
266
+ "eval_samples_per_second": 36.065,
267
+ "eval_steps_per_second": 1.387,
268
+ "step": 105
269
+ },
270
+ {
271
+ "epoch": 21.869565217391305,
272
+ "grad_norm": 16.111915588378906,
273
+ "learning_rate": 1.0912947110386484e-05,
274
+ "loss": 1.1783,
275
+ "step": 110
276
+ },
277
+ {
278
+ "epoch": 21.869565217391305,
279
+ "eval_accuracy": 0.6538461538461539,
280
+ "eval_loss": 0.9573343992233276,
281
+ "eval_runtime": 1.4883,
282
+ "eval_samples_per_second": 34.938,
283
+ "eval_steps_per_second": 1.344,
284
+ "step": 110
285
+ },
286
+ {
287
+ "epoch": 22.869565217391305,
288
+ "eval_accuracy": 0.6923076923076923,
289
+ "eval_loss": 0.8914616107940674,
290
+ "eval_runtime": 1.7921,
291
+ "eval_samples_per_second": 29.016,
292
+ "eval_steps_per_second": 1.116,
293
+ "step": 115
294
+ },
295
+ {
296
+ "epoch": 23.869565217391305,
297
+ "grad_norm": 16.878934860229492,
298
+ "learning_rate": 9.252699064135759e-06,
299
+ "loss": 1.0235,
300
+ "step": 120
301
+ },
302
+ {
303
+ "epoch": 23.869565217391305,
304
+ "eval_accuracy": 0.6538461538461539,
305
+ "eval_loss": 0.9831422567367554,
306
+ "eval_runtime": 1.4719,
307
+ "eval_samples_per_second": 35.328,
308
+ "eval_steps_per_second": 1.359,
309
+ "step": 120
310
+ },
311
+ {
312
+ "epoch": 24.869565217391305,
313
+ "eval_accuracy": 0.6538461538461539,
314
+ "eval_loss": 0.9464498162269592,
315
+ "eval_runtime": 1.4685,
316
+ "eval_samples_per_second": 35.409,
317
+ "eval_steps_per_second": 1.362,
318
+ "step": 125
319
+ },
320
+ {
321
+ "epoch": 25.869565217391305,
322
+ "grad_norm": 22.73305892944336,
323
+ "learning_rate": 7.613051219968624e-06,
324
+ "loss": 0.9706,
325
+ "step": 130
326
+ },
327
+ {
328
+ "epoch": 25.869565217391305,
329
+ "eval_accuracy": 0.6923076923076923,
330
+ "eval_loss": 0.9413173198699951,
331
+ "eval_runtime": 1.7224,
332
+ "eval_samples_per_second": 30.191,
333
+ "eval_steps_per_second": 1.161,
334
+ "step": 130
335
+ },
336
+ {
337
+ "epoch": 26.869565217391305,
338
+ "eval_accuracy": 0.6346153846153846,
339
+ "eval_loss": 1.0248907804489136,
340
+ "eval_runtime": 1.7923,
341
+ "eval_samples_per_second": 29.013,
342
+ "eval_steps_per_second": 1.116,
343
+ "step": 135
344
+ },
345
+ {
346
+ "epoch": 27.869565217391305,
347
+ "grad_norm": 21.775854110717773,
348
+ "learning_rate": 6.039202339608432e-06,
349
+ "loss": 0.9409,
350
+ "step": 140
351
+ },
352
+ {
353
+ "epoch": 27.869565217391305,
354
+ "eval_accuracy": 0.6538461538461539,
355
+ "eval_loss": 0.9753580093383789,
356
+ "eval_runtime": 1.8553,
357
+ "eval_samples_per_second": 28.028,
358
+ "eval_steps_per_second": 1.078,
359
+ "step": 140
360
+ },
361
+ {
362
+ "epoch": 28.869565217391305,
363
+ "eval_accuracy": 0.7115384615384616,
364
+ "eval_loss": 0.9530434012413025,
365
+ "eval_runtime": 1.7906,
366
+ "eval_samples_per_second": 29.04,
367
+ "eval_steps_per_second": 1.117,
368
+ "step": 145
369
+ },
370
+ {
371
+ "epoch": 29.869565217391305,
372
+ "grad_norm": 16.502880096435547,
373
+ "learning_rate": 4.5745373613424075e-06,
374
+ "loss": 0.9447,
375
+ "step": 150
376
+ },
377
+ {
378
+ "epoch": 29.869565217391305,
379
+ "eval_accuracy": 0.6538461538461539,
380
+ "eval_loss": 1.02655827999115,
381
+ "eval_runtime": 1.4711,
382
+ "eval_samples_per_second": 35.348,
383
+ "eval_steps_per_second": 1.36,
384
+ "step": 150
385
+ },
386
+ {
387
+ "epoch": 30.869565217391305,
388
+ "eval_accuracy": 0.6538461538461539,
389
+ "eval_loss": 1.0819494724273682,
390
+ "eval_runtime": 1.4452,
391
+ "eval_samples_per_second": 35.981,
392
+ "eval_steps_per_second": 1.384,
393
+ "step": 155
394
+ },
395
+ {
396
+ "epoch": 31.869565217391305,
397
+ "grad_norm": 14.975761413574219,
398
+ "learning_rate": 3.2594314447468457e-06,
399
+ "loss": 0.8352,
400
+ "step": 160
401
+ },
402
+ {
403
+ "epoch": 31.869565217391305,
404
+ "eval_accuracy": 0.6923076923076923,
405
+ "eval_loss": 0.9921611547470093,
406
+ "eval_runtime": 1.6676,
407
+ "eval_samples_per_second": 31.182,
408
+ "eval_steps_per_second": 1.199,
409
+ "step": 160
410
+ },
411
+ {
412
+ "epoch": 32.869565217391305,
413
+ "eval_accuracy": 0.6923076923076923,
414
+ "eval_loss": 0.9754781126976013,
415
+ "eval_runtime": 1.4838,
416
+ "eval_samples_per_second": 35.046,
417
+ "eval_steps_per_second": 1.348,
418
+ "step": 165
419
+ },
420
+ {
421
+ "epoch": 33.869565217391305,
422
+ "grad_norm": 16.957916259765625,
423
+ "learning_rate": 2.130136983393112e-06,
424
+ "loss": 0.8055,
425
+ "step": 170
426
+ },
427
+ {
428
+ "epoch": 33.869565217391305,
429
+ "eval_accuracy": 0.7115384615384616,
430
+ "eval_loss": 0.9768009185791016,
431
+ "eval_runtime": 1.4548,
432
+ "eval_samples_per_second": 35.745,
433
+ "eval_steps_per_second": 1.375,
434
+ "step": 170
435
+ },
436
+ {
437
+ "epoch": 34.869565217391305,
438
+ "eval_accuracy": 0.6923076923076923,
439
+ "eval_loss": 0.9950000047683716,
440
+ "eval_runtime": 1.6477,
441
+ "eval_samples_per_second": 31.56,
442
+ "eval_steps_per_second": 1.214,
443
+ "step": 175
444
+ },
445
+ {
446
+ "epoch": 35.869565217391305,
447
+ "grad_norm": 13.693328857421875,
448
+ "learning_rate": 1.2177842662977136e-06,
449
+ "loss": 0.7481,
450
+ "step": 180
451
+ },
452
+ {
453
+ "epoch": 35.869565217391305,
454
+ "eval_accuracy": 0.6923076923076923,
455
+ "eval_loss": 1.0135220289230347,
456
+ "eval_runtime": 1.4654,
457
+ "eval_samples_per_second": 35.486,
458
+ "eval_steps_per_second": 1.365,
459
+ "step": 180
460
+ },
461
+ {
462
+ "epoch": 36.869565217391305,
463
+ "eval_accuracy": 0.6923076923076923,
464
+ "eval_loss": 1.0168299674987793,
465
+ "eval_runtime": 1.4666,
466
+ "eval_samples_per_second": 35.456,
467
+ "eval_steps_per_second": 1.364,
468
+ "step": 185
469
+ },
470
+ {
471
+ "epoch": 37.869565217391305,
472
+ "grad_norm": 16.68096923828125,
473
+ "learning_rate": 5.475233360227516e-07,
474
+ "loss": 0.7483,
475
+ "step": 190
476
+ },
477
+ {
478
+ "epoch": 37.869565217391305,
479
+ "eval_accuracy": 0.6923076923076923,
480
+ "eval_loss": 1.009111762046814,
481
+ "eval_runtime": 1.4779,
482
+ "eval_samples_per_second": 35.184,
483
+ "eval_steps_per_second": 1.353,
484
+ "step": 190
485
+ },
486
+ {
487
+ "epoch": 38.869565217391305,
488
+ "eval_accuracy": 0.6923076923076923,
489
+ "eval_loss": 1.005540370941162,
490
+ "eval_runtime": 1.5862,
491
+ "eval_samples_per_second": 32.783,
492
+ "eval_steps_per_second": 1.261,
493
+ "step": 195
494
+ },
495
+ {
496
+ "epoch": 39.869565217391305,
497
+ "grad_norm": 27.558324813842773,
498
+ "learning_rate": 1.3783069908621772e-07,
499
+ "loss": 0.8145,
500
+ "step": 200
501
+ },
502
+ {
503
+ "epoch": 39.869565217391305,
504
+ "eval_accuracy": 0.6923076923076923,
505
+ "eval_loss": 1.0039584636688232,
506
+ "eval_runtime": 1.47,
507
+ "eval_samples_per_second": 35.373,
508
+ "eval_steps_per_second": 1.361,
509
+ "step": 200
510
+ },
511
+ {
512
+ "epoch": 40.869565217391305,
513
+ "eval_accuracy": 0.6923076923076923,
514
+ "eval_loss": 1.0038609504699707,
515
+ "eval_runtime": 1.466,
516
+ "eval_samples_per_second": 35.471,
517
+ "eval_steps_per_second": 1.364,
518
+ "step": 205
519
+ },
520
+ {
521
+ "epoch": 41.869565217391305,
522
+ "grad_norm": 17.93744468688965,
523
+ "learning_rate": 0.0,
524
+ "loss": 0.7501,
525
+ "step": 210
526
+ },
527
+ {
528
+ "epoch": 41.869565217391305,
529
+ "eval_accuracy": 0.6923076923076923,
530
+ "eval_loss": 1.0038418769836426,
531
+ "eval_runtime": 1.8649,
532
+ "eval_samples_per_second": 27.884,
533
+ "eval_steps_per_second": 1.072,
534
+ "step": 210
535
+ },
536
+ {
537
+ "epoch": 41.869565217391305,
538
+ "step": 210,
539
+ "total_flos": 2.8899664857724355e+18,
540
+ "train_loss": 2.0296274548485167,
541
+ "train_runtime": 2115.3253,
542
+ "train_samples_per_second": 14.474,
543
+ "train_steps_per_second": 0.099
544
+ }
545
+ ],
546
+ "logging_steps": 10,
547
+ "max_steps": 210,
548
+ "num_input_tokens_seen": 0,
549
+ "num_train_epochs": 42,
550
+ "save_steps": 500,
551
+ "stateful_callbacks": {
552
+ "TrainerControl": {
553
+ "args": {
554
+ "should_epoch_stop": false,
555
+ "should_evaluate": false,
556
+ "should_log": false,
557
+ "should_save": true,
558
+ "should_training_stop": true
559
+ },
560
+ "attributes": {}
561
+ }
562
+ },
563
+ "total_flos": 2.8899664857724355e+18,
564
+ "train_batch_size": 32,
565
+ "trial_name": null,
566
+ "trial_params": null
567
+ }