Training in progress, step 600, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +851 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aae4c3f15c0fb5993a83d9cae4d7cfc19a115777a716c45a175141b4fae1d9b7
 size 66126768

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fdd0334d51ecc9ab8aa18db51be282267c4701fa1d13c3de14eb7534664f5eb
 size 66126768

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b760352e797799b50250e1c394b15599b19c312ac613701c2c814a669a6623f6
 size 34141829

 version https://git-lfs.github.com/spec/v1
+oid sha256:14eb5407563d74dce1f96073fa6908dfe69c57e3d6179c798664936d210508c4
 size 34141829

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4801416a03beb0f63c300670679d3fb6cca48da8259362e9be9e6ffae0c5ffd0
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:9445552595536daf5bd8731be4eabb308bd26e76a3f4f0c20c4aa55fcf9ea202
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d48ee9d9909680ca611f0a95c8cefcadb338dd2851b722337f41dd0606fbe3b
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb7fde5111803012042c93a73aa191336bb6e10b3ad44f6bd1d94fc7008a22b6
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61c66fe4dec6fa55b8735a953c99ab52596618788488f1afa2195e9f78483189
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:1785dec699279cf735b471c940e3c7215708e10021bc4f35a643cbd79b28aacf
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 10.21505376344086,
   "eval_steps": 120,
-  "global_step": 480,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3400,6 +3400,854 @@
       "eval_samples_per_second": 0.461,
       "eval_steps_per_second": 0.461,
       "step": 480
     }
   ],
   "logging_steps": 1,
@@ -3419,7 +4267,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.154612346230088e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 12.774193548387096,
   "eval_steps": 120,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 0.461,
       "eval_steps_per_second": 0.461,
       "step": 480
+    },
+    {
+      "epoch": 10.236559139784946,
+      "grad_norm": 0.6188676953315735,
+      "learning_rate": 0.00012533756949960288,
+      "loss": 0.382239431142807,
+      "step": 481
+    },
+    {
+      "epoch": 10.258064516129032,
+      "grad_norm": 0.6917077302932739,
+      "learning_rate": 0.00012517871326449563,
+      "loss": 0.36738157272338867,
+      "step": 482
+    },
+    {
+      "epoch": 10.279569892473118,
+      "grad_norm": 0.710863471031189,
+      "learning_rate": 0.0001250198570293884,
+      "loss": 0.3310585916042328,
+      "step": 483
+    },
+    {
+      "epoch": 10.301075268817204,
+      "grad_norm": 0.74619460105896,
+      "learning_rate": 0.00012486100079428118,
+      "loss": 0.5340608358383179,
+      "step": 484
+    },
+    {
+      "epoch": 10.32258064516129,
+      "grad_norm": 0.8272573351860046,
+      "learning_rate": 0.00012470214455917396,
+      "loss": 0.39920294284820557,
+      "step": 485
+    },
+    {
+      "epoch": 10.344086021505376,
+      "grad_norm": 0.8804677724838257,
+      "learning_rate": 0.00012454328832406673,
+      "loss": 0.2775310277938843,
+      "step": 486
+    },
+    {
+      "epoch": 10.365591397849462,
+      "grad_norm": 0.7785136103630066,
+      "learning_rate": 0.00012438443208895948,
+      "loss": 0.32668304443359375,
+      "step": 487
+    },
+    {
+      "epoch": 10.387096774193548,
+      "grad_norm": 0.7213490009307861,
+      "learning_rate": 0.00012422557585385226,
+      "loss": 0.3376646041870117,
+      "step": 488
+    },
+    {
+      "epoch": 10.408602150537634,
+      "grad_norm": 0.7177138328552246,
+      "learning_rate": 0.00012406671961874503,
+      "loss": 0.42496711015701294,
+      "step": 489
+    },
+    {
+      "epoch": 10.43010752688172,
+      "grad_norm": 0.7309224009513855,
+      "learning_rate": 0.00012390786338363784,
+      "loss": 0.3748842179775238,
+      "step": 490
+    },
+    {
+      "epoch": 10.451612903225806,
+      "grad_norm": 0.805568277835846,
+      "learning_rate": 0.00012374900714853058,
+      "loss": 0.39560365676879883,
+      "step": 491
+    },
+    {
+      "epoch": 10.473118279569892,
+      "grad_norm": 0.6789179444313049,
+      "learning_rate": 0.00012359015091342336,
+      "loss": 0.3961871862411499,
+      "step": 492
+    },
+    {
+      "epoch": 10.494623655913978,
+      "grad_norm": 0.6529718041419983,
+      "learning_rate": 0.00012343129467831614,
+      "loss": 0.40176600217819214,
+      "step": 493
+    },
+    {
+      "epoch": 10.516129032258064,
+      "grad_norm": 0.6573322415351868,
+      "learning_rate": 0.0001232724384432089,
+      "loss": 0.36849701404571533,
+      "step": 494
+    },
+    {
+      "epoch": 10.53763440860215,
+      "grad_norm": 0.7451475262641907,
+      "learning_rate": 0.0001231135822081017,
+      "loss": 0.39875566959381104,
+      "step": 495
+    },
+    {
+      "epoch": 10.559139784946236,
+      "grad_norm": 0.6852983236312866,
+      "learning_rate": 0.00012295472597299444,
+      "loss": 0.39747345447540283,
+      "step": 496
+    },
+    {
+      "epoch": 10.580645161290322,
+      "grad_norm": 0.7666296362876892,
+      "learning_rate": 0.0001227958697378872,
+      "loss": 0.38431617617607117,
+      "step": 497
+    },
+    {
+      "epoch": 10.602150537634408,
+      "grad_norm": 0.7944223284721375,
+      "learning_rate": 0.00012263701350278,
+      "loss": 0.3355713486671448,
+      "step": 498
+    },
+    {
+      "epoch": 10.623655913978494,
+      "grad_norm": 0.7261567711830139,
+      "learning_rate": 0.00012247815726767276,
+      "loss": 0.42981234192848206,
+      "step": 499
+    },
+    {
+      "epoch": 10.64516129032258,
+      "grad_norm": 0.7246410250663757,
+      "learning_rate": 0.0001223193010325655,
+      "loss": 0.41270819306373596,
+      "step": 500
+    },
+    {
+      "epoch": 10.666666666666666,
+      "grad_norm": 0.7120943665504456,
+      "learning_rate": 0.0001221604447974583,
+      "loss": 0.3995019197463989,
+      "step": 501
+    },
+    {
+      "epoch": 10.688172043010752,
+      "grad_norm": 0.7080013155937195,
+      "learning_rate": 0.00012200158856235109,
+      "loss": 0.3284502625465393,
+      "step": 502
+    },
+    {
+      "epoch": 10.709677419354838,
+      "grad_norm": 0.7086816430091858,
+      "learning_rate": 0.00012184273232724385,
+      "loss": 0.41737645864486694,
+      "step": 503
+    },
+    {
+      "epoch": 10.731182795698924,
+      "grad_norm": 0.6332603693008423,
+      "learning_rate": 0.00012168387609213663,
+      "loss": 0.31723546981811523,
+      "step": 504
+    },
+    {
+      "epoch": 10.75268817204301,
+      "grad_norm": 0.6930424571037292,
+      "learning_rate": 0.0001215250198570294,
+      "loss": 0.36203786730766296,
+      "step": 505
+    },
+    {
+      "epoch": 10.774193548387096,
+      "grad_norm": 0.758611798286438,
+      "learning_rate": 0.00012136616362192217,
+      "loss": 0.38453805446624756,
+      "step": 506
+    },
+    {
+      "epoch": 10.795698924731182,
+      "grad_norm": 0.7258947491645813,
+      "learning_rate": 0.00012120730738681494,
+      "loss": 0.3250986635684967,
+      "step": 507
+    },
+    {
+      "epoch": 10.817204301075268,
+      "grad_norm": 0.8399383425712585,
+      "learning_rate": 0.0001210484511517077,
+      "loss": 0.36206358671188354,
+      "step": 508
+    },
+    {
+      "epoch": 10.838709677419354,
+      "grad_norm": 0.7296494841575623,
+      "learning_rate": 0.00012088959491660048,
+      "loss": 0.2987769544124603,
+      "step": 509
+    },
+    {
+      "epoch": 10.86021505376344,
+      "grad_norm": 0.724338710308075,
+      "learning_rate": 0.00012073073868149326,
+      "loss": 0.39433813095092773,
+      "step": 510
+    },
+    {
+      "epoch": 10.881720430107526,
+      "grad_norm": 1.2827945947647095,
+      "learning_rate": 0.00012057188244638602,
+      "loss": 0.42414143681526184,
+      "step": 511
+    },
+    {
+      "epoch": 10.903225806451612,
+      "grad_norm": 0.6901166439056396,
+      "learning_rate": 0.0001204130262112788,
+      "loss": 0.3207138180732727,
+      "step": 512
+    },
+    {
+      "epoch": 10.924731182795698,
+      "grad_norm": 0.7400676012039185,
+      "learning_rate": 0.00012025416997617156,
+      "loss": 0.4030807316303253,
+      "step": 513
+    },
+    {
+      "epoch": 10.946236559139784,
+      "grad_norm": 0.7329428791999817,
+      "learning_rate": 0.00012009531374106436,
+      "loss": 0.4216151833534241,
+      "step": 514
+    },
+    {
+      "epoch": 10.967741935483872,
+      "grad_norm": 0.7346771359443665,
+      "learning_rate": 0.00011993645750595712,
+      "loss": 0.4015253782272339,
+      "step": 515
+    },
+    {
+      "epoch": 10.989247311827956,
+      "grad_norm": 0.7754795551300049,
+      "learning_rate": 0.0001197776012708499,
+      "loss": 0.3754139542579651,
+      "step": 516
+    },
+    {
+      "epoch": 11.0,
+      "grad_norm": 0.9674854278564453,
+      "learning_rate": 0.00011961874503574266,
+      "loss": 0.4562542736530304,
+      "step": 517
+    },
+    {
+      "epoch": 11.021505376344086,
+      "grad_norm": 0.6914543509483337,
+      "learning_rate": 0.00011945988880063544,
+      "loss": 0.3569665849208832,
+      "step": 518
+    },
+    {
+      "epoch": 11.043010752688172,
+      "grad_norm": 0.7606181502342224,
+      "learning_rate": 0.0001193010325655282,
+      "loss": 0.3248334228992462,
+      "step": 519
+    },
+    {
+      "epoch": 11.064516129032258,
+      "grad_norm": 0.764230489730835,
+      "learning_rate": 0.00011914217633042097,
+      "loss": 0.30292606353759766,
+      "step": 520
+    },
+    {
+      "epoch": 11.086021505376344,
+      "grad_norm": 0.8754153847694397,
+      "learning_rate": 0.00011898332009531375,
+      "loss": 0.32286104559898376,
+      "step": 521
+    },
+    {
+      "epoch": 11.10752688172043,
+      "grad_norm": 0.9021672606468201,
+      "learning_rate": 0.00011882446386020651,
+      "loss": 0.37493667006492615,
+      "step": 522
+    },
+    {
+      "epoch": 11.129032258064516,
+      "grad_norm": 0.7618964910507202,
+      "learning_rate": 0.00011866560762509929,
+      "loss": 0.2827898859977722,
+      "step": 523
+    },
+    {
+      "epoch": 11.150537634408602,
+      "grad_norm": 0.7415357828140259,
+      "learning_rate": 0.00011850675138999205,
+      "loss": 0.3536837697029114,
+      "step": 524
+    },
+    {
+      "epoch": 11.172043010752688,
+      "grad_norm": 0.7170859575271606,
+      "learning_rate": 0.00011834789515488482,
+      "loss": 0.3306007385253906,
+      "step": 525
+    },
+    {
+      "epoch": 11.193548387096774,
+      "grad_norm": 0.7188425660133362,
+      "learning_rate": 0.00011818903891977761,
+      "loss": 0.312721312046051,
+      "step": 526
+    },
+    {
+      "epoch": 11.21505376344086,
+      "grad_norm": 0.7460401058197021,
+      "learning_rate": 0.00011803018268467039,
+      "loss": 0.34166038036346436,
+      "step": 527
+    },
+    {
+      "epoch": 11.236559139784946,
+      "grad_norm": 0.7806810736656189,
+      "learning_rate": 0.00011787132644956315,
+      "loss": 0.28128740191459656,
+      "step": 528
+    },
+    {
+      "epoch": 11.258064516129032,
+      "grad_norm": 0.7604184746742249,
+      "learning_rate": 0.00011771247021445593,
+      "loss": 0.25918617844581604,
+      "step": 529
+    },
+    {
+      "epoch": 11.279569892473118,
+      "grad_norm": 0.9017161726951599,
+      "learning_rate": 0.0001175536139793487,
+      "loss": 0.3212778866291046,
+      "step": 530
+    },
+    {
+      "epoch": 11.301075268817204,
+      "grad_norm": 0.7170535326004028,
+      "learning_rate": 0.00011739475774424147,
+      "loss": 0.20179462432861328,
+      "step": 531
+    },
+    {
+      "epoch": 11.32258064516129,
+      "grad_norm": 0.7809444069862366,
+      "learning_rate": 0.00011723590150913424,
+      "loss": 0.2983512282371521,
+      "step": 532
+    },
+    {
+      "epoch": 11.344086021505376,
+      "grad_norm": 0.7415927052497864,
+      "learning_rate": 0.000117077045274027,
+      "loss": 0.2575104534626007,
+      "step": 533
+    },
+    {
+      "epoch": 11.365591397849462,
+      "grad_norm": 0.7183315753936768,
+      "learning_rate": 0.00011691818903891978,
+      "loss": 0.27599287033081055,
+      "step": 534
+    },
+    {
+      "epoch": 11.387096774193548,
+      "grad_norm": 1.682211995124817,
+      "learning_rate": 0.00011675933280381256,
+      "loss": 0.3844273090362549,
+      "step": 535
+    },
+    {
+      "epoch": 11.408602150537634,
+      "grad_norm": 0.707069993019104,
+      "learning_rate": 0.00011660047656870532,
+      "loss": 0.28967398405075073,
+      "step": 536
+    },
+    {
+      "epoch": 11.43010752688172,
+      "grad_norm": 0.7777815461158752,
+      "learning_rate": 0.0001164416203335981,
+      "loss": 0.36004626750946045,
+      "step": 537
+    },
+    {
+      "epoch": 11.451612903225806,
+      "grad_norm": 0.8384584188461304,
+      "learning_rate": 0.00011628276409849088,
+      "loss": 0.26809942722320557,
+      "step": 538
+    },
+    {
+      "epoch": 11.473118279569892,
+      "grad_norm": 0.8009209036827087,
+      "learning_rate": 0.00011612390786338366,
+      "loss": 0.31199246644973755,
+      "step": 539
+    },
+    {
+      "epoch": 11.494623655913978,
+      "grad_norm": 0.7819718718528748,
+      "learning_rate": 0.00011596505162827642,
+      "loss": 0.371052622795105,
+      "step": 540
+    },
+    {
+      "epoch": 11.516129032258064,
+      "grad_norm": 0.7900601029396057,
+      "learning_rate": 0.0001158061953931692,
+      "loss": 0.29752612113952637,
+      "step": 541
+    },
+    {
+      "epoch": 11.53763440860215,
+      "grad_norm": 0.7695387005805969,
+      "learning_rate": 0.00011564733915806196,
+      "loss": 0.2954059839248657,
+      "step": 542
+    },
+    {
+      "epoch": 11.559139784946236,
+      "grad_norm": 0.7167935371398926,
+      "learning_rate": 0.00011548848292295473,
+      "loss": 0.3413902223110199,
+      "step": 543
+    },
+    {
+      "epoch": 11.580645161290322,
+      "grad_norm": 0.9060370922088623,
+      "learning_rate": 0.0001153296266878475,
+      "loss": 0.3013741075992584,
+      "step": 544
+    },
+    {
+      "epoch": 11.602150537634408,
+      "grad_norm": 0.7279766798019409,
+      "learning_rate": 0.00011517077045274027,
+      "loss": 0.26471853256225586,
+      "step": 545
+    },
+    {
+      "epoch": 11.623655913978494,
+      "grad_norm": 0.7126449942588806,
+      "learning_rate": 0.00011501191421763305,
+      "loss": 0.3059503138065338,
+      "step": 546
+    },
+    {
+      "epoch": 11.64516129032258,
+      "grad_norm": 0.8045353293418884,
+      "learning_rate": 0.00011485305798252581,
+      "loss": 0.22076934576034546,
+      "step": 547
+    },
+    {
+      "epoch": 11.666666666666666,
+      "grad_norm": 0.8084357380867004,
+      "learning_rate": 0.00011469420174741859,
+      "loss": 0.3207700252532959,
+      "step": 548
+    },
+    {
+      "epoch": 11.688172043010752,
+      "grad_norm": 0.7768396139144897,
+      "learning_rate": 0.00011453534551231135,
+      "loss": 0.36251944303512573,
+      "step": 549
+    },
+    {
+      "epoch": 11.709677419354838,
+      "grad_norm": 0.8002042770385742,
+      "learning_rate": 0.00011437648927720415,
+      "loss": 0.3310307264328003,
+      "step": 550
+    },
+    {
+      "epoch": 11.731182795698924,
+      "grad_norm": 0.8118063807487488,
+      "learning_rate": 0.00011421763304209691,
+      "loss": 0.4163286089897156,
+      "step": 551
+    },
+    {
+      "epoch": 11.75268817204301,
+      "grad_norm": 0.8167386054992676,
+      "learning_rate": 0.00011405877680698969,
+      "loss": 0.3315795660018921,
+      "step": 552
+    },
+    {
+      "epoch": 11.774193548387096,
+      "grad_norm": 0.7709652185440063,
+      "learning_rate": 0.00011389992057188245,
+      "loss": 0.3589479327201843,
+      "step": 553
+    },
+    {
+      "epoch": 11.795698924731182,
+      "grad_norm": 0.7389516830444336,
+      "learning_rate": 0.00011374106433677523,
+      "loss": 0.3652232587337494,
+      "step": 554
+    },
+    {
+      "epoch": 11.817204301075268,
+      "grad_norm": 0.7422162294387817,
+      "learning_rate": 0.000113582208101668,
+      "loss": 0.3981531262397766,
+      "step": 555
+    },
+    {
+      "epoch": 11.838709677419354,
+      "grad_norm": 0.7577189803123474,
+      "learning_rate": 0.00011342335186656076,
+      "loss": 0.2992894649505615,
+      "step": 556
+    },
+    {
+      "epoch": 11.86021505376344,
+      "grad_norm": 0.7978619337081909,
+      "learning_rate": 0.00011326449563145354,
+      "loss": 0.30227142572402954,
+      "step": 557
+    },
+    {
+      "epoch": 11.881720430107526,
+      "grad_norm": 0.7575592398643494,
+      "learning_rate": 0.0001131056393963463,
+      "loss": 0.3341229557991028,
+      "step": 558
+    },
+    {
+      "epoch": 11.903225806451612,
+      "grad_norm": 0.6892173886299133,
+      "learning_rate": 0.00011294678316123908,
+      "loss": 0.36578071117401123,
+      "step": 559
+    },
+    {
+      "epoch": 11.924731182795698,
+      "grad_norm": 0.796484649181366,
+      "learning_rate": 0.00011278792692613185,
+      "loss": 0.35607850551605225,
+      "step": 560
+    },
+    {
+      "epoch": 11.946236559139784,
+      "grad_norm": 0.7589250206947327,
+      "learning_rate": 0.00011262907069102462,
+      "loss": 0.3695809841156006,
+      "step": 561
+    },
+    {
+      "epoch": 11.967741935483872,
+      "grad_norm": 0.8040224313735962,
+      "learning_rate": 0.0001124702144559174,
+      "loss": 0.3417878746986389,
+      "step": 562
+    },
+    {
+      "epoch": 11.989247311827956,
+      "grad_norm": 0.808239758014679,
+      "learning_rate": 0.00011231135822081018,
+      "loss": 0.2793850898742676,
+      "step": 563
+    },
+    {
+      "epoch": 12.0,
+      "grad_norm": 0.9436281323432922,
+      "learning_rate": 0.00011215250198570296,
+      "loss": 0.3655521273612976,
+      "step": 564
+    },
+    {
+      "epoch": 12.021505376344086,
+      "grad_norm": 0.7834585905075073,
+      "learning_rate": 0.00011199364575059572,
+      "loss": 0.30738168954849243,
+      "step": 565
+    },
+    {
+      "epoch": 12.043010752688172,
+      "grad_norm": 0.795142412185669,
+      "learning_rate": 0.0001118347895154885,
+      "loss": 0.2081073522567749,
+      "step": 566
+    },
+    {
+      "epoch": 12.064516129032258,
+      "grad_norm": 0.6989514827728271,
+      "learning_rate": 0.00011167593328038126,
+      "loss": 0.23969483375549316,
+      "step": 567
+    },
+    {
+      "epoch": 12.086021505376344,
+      "grad_norm": 0.9184768199920654,
+      "learning_rate": 0.00011151707704527403,
+      "loss": 0.23692801594734192,
+      "step": 568
+    },
+    {
+      "epoch": 12.10752688172043,
+      "grad_norm": 0.8651571273803711,
+      "learning_rate": 0.0001113582208101668,
+      "loss": 0.2645634412765503,
+      "step": 569
+    },
+    {
+      "epoch": 12.129032258064516,
+      "grad_norm": 1.062126874923706,
+      "learning_rate": 0.00011119936457505957,
+      "loss": 0.2957659065723419,
+      "step": 570
+    },
+    {
+      "epoch": 12.150537634408602,
+      "grad_norm": 0.7741546630859375,
+      "learning_rate": 0.00011104050833995235,
+      "loss": 0.22269803285598755,
+      "step": 571
+    },
+    {
+      "epoch": 12.172043010752688,
+      "grad_norm": 0.6743382215499878,
+      "learning_rate": 0.00011088165210484511,
+      "loss": 0.24519725143909454,
+      "step": 572
+    },
+    {
+      "epoch": 12.193548387096774,
+      "grad_norm": 0.7190341949462891,
+      "learning_rate": 0.00011072279586973789,
+      "loss": 0.21414649486541748,
+      "step": 573
+    },
+    {
+      "epoch": 12.21505376344086,
+      "grad_norm": 1.0120435953140259,
+      "learning_rate": 0.00011056393963463067,
+      "loss": 0.2711429297924042,
+      "step": 574
+    },
+    {
+      "epoch": 12.236559139784946,
+      "grad_norm": 0.7273455858230591,
+      "learning_rate": 0.00011040508339952345,
+      "loss": 0.26170241832733154,
+      "step": 575
+    },
+    {
+      "epoch": 12.258064516129032,
+      "grad_norm": 0.7552404999732971,
+      "learning_rate": 0.00011024622716441621,
+      "loss": 0.3006550669670105,
+      "step": 576
+    },
+    {
+      "epoch": 12.279569892473118,
+      "grad_norm": 0.9425163865089417,
+      "learning_rate": 0.00011008737092930899,
+      "loss": 0.1508064717054367,
+      "step": 577
+    },
+    {
+      "epoch": 12.301075268817204,
+      "grad_norm": 0.9021785855293274,
+      "learning_rate": 0.00010992851469420175,
+      "loss": 0.3232599198818207,
+      "step": 578
+    },
+    {
+      "epoch": 12.32258064516129,
+      "grad_norm": 0.7773718237876892,
+      "learning_rate": 0.00010976965845909453,
+      "loss": 0.19899800419807434,
+      "step": 579
+    },
+    {
+      "epoch": 12.344086021505376,
+      "grad_norm": 0.8534666895866394,
+      "learning_rate": 0.0001096108022239873,
+      "loss": 0.2873285412788391,
+      "step": 580
+    },
+    {
+      "epoch": 12.365591397849462,
+      "grad_norm": 0.9202722907066345,
+      "learning_rate": 0.00010945194598888006,
+      "loss": 0.3199685513973236,
+      "step": 581
+    },
+    {
+      "epoch": 12.387096774193548,
+      "grad_norm": 0.8233513832092285,
+      "learning_rate": 0.00010929308975377284,
+      "loss": 0.3001951575279236,
+      "step": 582
+    },
+    {
+      "epoch": 12.408602150537634,
+      "grad_norm": 0.8201817870140076,
+      "learning_rate": 0.0001091342335186656,
+      "loss": 0.23249655961990356,
+      "step": 583
+    },
+    {
+      "epoch": 12.43010752688172,
+      "grad_norm": 0.7741047739982605,
+      "learning_rate": 0.00010897537728355838,
+      "loss": 0.1689053624868393,
+      "step": 584
+    },
+    {
+      "epoch": 12.451612903225806,
+      "grad_norm": 0.8471128940582275,
+      "learning_rate": 0.00010881652104845115,
+      "loss": 0.2790966033935547,
+      "step": 585
+    },
+    {
+      "epoch": 12.473118279569892,
+      "grad_norm": 0.9554393291473389,
+      "learning_rate": 0.00010865766481334392,
+      "loss": 0.27663490176200867,
+      "step": 586
+    },
+    {
+      "epoch": 12.494623655913978,
+      "grad_norm": 0.7771924734115601,
+      "learning_rate": 0.0001084988085782367,
+      "loss": 0.29540884494781494,
+      "step": 587
+    },
+    {
+      "epoch": 12.516129032258064,
+      "grad_norm": 0.980816125869751,
+      "learning_rate": 0.00010833995234312948,
+      "loss": 0.3429156243801117,
+      "step": 588
+    },
+    {
+      "epoch": 12.53763440860215,
+      "grad_norm": 0.8538633584976196,
+      "learning_rate": 0.00010818109610802226,
+      "loss": 0.2047094702720642,
+      "step": 589
+    },
+    {
+      "epoch": 12.559139784946236,
+      "grad_norm": 0.8534285426139832,
+      "learning_rate": 0.00010802223987291502,
+      "loss": 0.3163786828517914,
+      "step": 590
+    },
+    {
+      "epoch": 12.580645161290322,
+      "grad_norm": 0.7636980414390564,
+      "learning_rate": 0.0001078633836378078,
+      "loss": 0.22881919145584106,
+      "step": 591
+    },
+    {
+      "epoch": 12.602150537634408,
+      "grad_norm": 0.808996856212616,
+      "learning_rate": 0.00010770452740270056,
+      "loss": 0.235946387052536,
+      "step": 592
+    },
+    {
+      "epoch": 12.623655913978494,
+      "grad_norm": 0.8072691559791565,
+      "learning_rate": 0.00010754567116759333,
+      "loss": 0.2396862506866455,
+      "step": 593
+    },
+    {
+      "epoch": 12.64516129032258,
+      "grad_norm": 0.829767644405365,
+      "learning_rate": 0.00010738681493248611,
+      "loss": 0.3337879776954651,
+      "step": 594
+    },
+    {
+      "epoch": 12.666666666666666,
+      "grad_norm": 0.8298752307891846,
+      "learning_rate": 0.00010722795869737887,
+      "loss": 0.3443082571029663,
+      "step": 595
+    },
+    {
+      "epoch": 12.688172043010752,
+      "grad_norm": 0.8143765330314636,
+      "learning_rate": 0.00010706910246227165,
+      "loss": 0.2124541848897934,
+      "step": 596
+    },
+    {
+      "epoch": 12.709677419354838,
+      "grad_norm": 0.8767410516738892,
+      "learning_rate": 0.00010691024622716441,
+      "loss": 0.33744725584983826,
+      "step": 597
+    },
+    {
+      "epoch": 12.731182795698924,
+      "grad_norm": 0.7700532674789429,
+      "learning_rate": 0.00010675138999205718,
+      "loss": 0.28435879945755005,
+      "step": 598
+    },
+    {
+      "epoch": 12.75268817204301,
+      "grad_norm": 0.8594262003898621,
+      "learning_rate": 0.00010659253375694997,
+      "loss": 0.21120105683803558,
+      "step": 599
+    },
+    {
+      "epoch": 12.774193548387096,
+      "grad_norm": 0.7381999492645264,
+      "learning_rate": 0.00010643367752184275,
+      "loss": 0.1675238013267517,
+      "step": 600
+    },
+    {
+      "epoch": 12.774193548387096,
+      "eval_loss": 0.9527648091316223,
+      "eval_runtime": 91.226,
+      "eval_samples_per_second": 0.46,
+      "eval_steps_per_second": 0.46,
+      "step": 600
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.9436180253965824e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null