{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.768, "eval_steps": 500, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000128, "grad_norm": 19.5, "learning_rate": 5.4e-07, "loss": 2.622, "step": 10 }, { "epoch": 0.000256, "grad_norm": 4.34375, "learning_rate": 1.14e-06, "loss": 2.5931, "step": 20 }, { "epoch": 0.000384, "grad_norm": 2.453125, "learning_rate": 1.74e-06, "loss": 2.6418, "step": 30 }, { "epoch": 0.000512, "grad_norm": 1.9921875, "learning_rate": 2.34e-06, "loss": 2.6034, "step": 40 }, { "epoch": 0.00064, "grad_norm": 1.8828125, "learning_rate": 2.9400000000000002e-06, "loss": 2.5811, "step": 50 }, { "epoch": 0.000768, "grad_norm": 1.6796875, "learning_rate": 3.54e-06, "loss": 2.5775, "step": 60 }, { "epoch": 0.000896, "grad_norm": 2.546875, "learning_rate": 4.14e-06, "loss": 2.5761, "step": 70 }, { "epoch": 0.001024, "grad_norm": 1.5703125, "learning_rate": 4.74e-06, "loss": 2.5629, "step": 80 }, { "epoch": 0.001152, "grad_norm": 1.65625, "learning_rate": 5.34e-06, "loss": 2.5956, "step": 90 }, { "epoch": 0.00128, "grad_norm": 1.671875, "learning_rate": 5.940000000000001e-06, "loss": 2.5883, "step": 100 }, { "epoch": 0.001408, "grad_norm": 1.65625, "learning_rate": 6.54e-06, "loss": 2.5442, "step": 110 }, { "epoch": 0.001536, "grad_norm": 1.5, "learning_rate": 7.14e-06, "loss": 2.5613, "step": 120 }, { "epoch": 0.001664, "grad_norm": 1.5078125, "learning_rate": 7.74e-06, "loss": 2.5032, "step": 130 }, { "epoch": 0.001792, "grad_norm": 1.6484375, "learning_rate": 8.340000000000001e-06, "loss": 2.5645, "step": 140 }, { "epoch": 0.00192, "grad_norm": 1.5625, "learning_rate": 8.939999999999999e-06, "loss": 2.574, "step": 150 }, { "epoch": 0.002048, "grad_norm": 1.546875, "learning_rate": 9.54e-06, "loss": 2.5254, "step": 160 }, { "epoch": 0.002176, "grad_norm": 1.6484375, "learning_rate": 1.0140000000000001e-05, "loss": 2.5591, "step": 170 }, { "epoch": 0.002304, "grad_norm": 1.703125, "learning_rate": 1.074e-05, "loss": 2.5604, "step": 180 }, { "epoch": 0.002432, "grad_norm": 1.53125, "learning_rate": 1.134e-05, "loss": 2.534, "step": 190 }, { "epoch": 0.00256, "grad_norm": 2.09375, "learning_rate": 1.1940000000000001e-05, "loss": 2.5548, "step": 200 }, { "epoch": 0.002688, "grad_norm": 1.84375, "learning_rate": 1.254e-05, "loss": 2.5573, "step": 210 }, { "epoch": 0.002816, "grad_norm": 1.6171875, "learning_rate": 1.314e-05, "loss": 2.5411, "step": 220 }, { "epoch": 0.002944, "grad_norm": 1.53125, "learning_rate": 1.374e-05, "loss": 2.5189, "step": 230 }, { "epoch": 0.003072, "grad_norm": 1.578125, "learning_rate": 1.434e-05, "loss": 2.5032, "step": 240 }, { "epoch": 0.0032, "grad_norm": 1.5546875, "learning_rate": 1.4940000000000001e-05, "loss": 2.5813, "step": 250 }, { "epoch": 0.003328, "grad_norm": 2.21875, "learning_rate": 1.554e-05, "loss": 2.5175, "step": 260 }, { "epoch": 0.003456, "grad_norm": 1.5234375, "learning_rate": 1.614e-05, "loss": 2.5043, "step": 270 }, { "epoch": 0.003584, "grad_norm": 1.5546875, "learning_rate": 1.6740000000000002e-05, "loss": 2.5308, "step": 280 }, { "epoch": 0.003712, "grad_norm": 1.5703125, "learning_rate": 1.734e-05, "loss": 2.5185, "step": 290 }, { "epoch": 0.00384, "grad_norm": 1.515625, "learning_rate": 1.794e-05, "loss": 2.5314, "step": 300 }, { "epoch": 0.003968, "grad_norm": 1.65625, "learning_rate": 1.854e-05, "loss": 2.5238, "step": 310 }, { "epoch": 0.004096, "grad_norm": 1.5546875, "learning_rate": 1.914e-05, "loss": 2.5158, "step": 320 }, { "epoch": 0.004224, "grad_norm": 1.625, "learning_rate": 1.974e-05, "loss": 2.5158, "step": 330 }, { "epoch": 0.004352, "grad_norm": 1.6015625, "learning_rate": 2.0340000000000002e-05, "loss": 2.4951, "step": 340 }, { "epoch": 0.00448, "grad_norm": 1.828125, "learning_rate": 2.094e-05, "loss": 2.5374, "step": 350 }, { "epoch": 0.004608, "grad_norm": 1.6171875, "learning_rate": 2.154e-05, "loss": 2.4847, "step": 360 }, { "epoch": 0.004736, "grad_norm": 1.5234375, "learning_rate": 2.214e-05, "loss": 2.5122, "step": 370 }, { "epoch": 0.004864, "grad_norm": 1.9140625, "learning_rate": 2.274e-05, "loss": 2.5266, "step": 380 }, { "epoch": 0.004992, "grad_norm": 8.0625, "learning_rate": 2.334e-05, "loss": 2.5259, "step": 390 }, { "epoch": 0.00512, "grad_norm": 1.6171875, "learning_rate": 2.394e-05, "loss": 2.5097, "step": 400 }, { "epoch": 0.005248, "grad_norm": 1.5703125, "learning_rate": 2.454e-05, "loss": 2.4956, "step": 410 }, { "epoch": 0.005376, "grad_norm": 1.7421875, "learning_rate": 2.514e-05, "loss": 2.4944, "step": 420 }, { "epoch": 0.005504, "grad_norm": 1.6015625, "learning_rate": 2.574e-05, "loss": 2.5095, "step": 430 }, { "epoch": 0.005632, "grad_norm": 1.7109375, "learning_rate": 2.6340000000000002e-05, "loss": 2.4816, "step": 440 }, { "epoch": 0.00576, "grad_norm": 2.046875, "learning_rate": 2.6940000000000003e-05, "loss": 2.4895, "step": 450 }, { "epoch": 0.005888, "grad_norm": 2.65625, "learning_rate": 2.754e-05, "loss": 2.529, "step": 460 }, { "epoch": 0.006016, "grad_norm": 1.65625, "learning_rate": 2.8139999999999998e-05, "loss": 2.5201, "step": 470 }, { "epoch": 0.006144, "grad_norm": 1.6953125, "learning_rate": 2.874e-05, "loss": 2.4471, "step": 480 }, { "epoch": 0.006272, "grad_norm": 1.7421875, "learning_rate": 2.934e-05, "loss": 2.4974, "step": 490 }, { "epoch": 0.0064, "grad_norm": 1.578125, "learning_rate": 2.994e-05, "loss": 2.5033, "step": 500 }, { "epoch": 0.006528, "grad_norm": 1.5625, "learning_rate": 2.999999900495483e-05, "loss": 2.4897, "step": 510 }, { "epoch": 0.006656, "grad_norm": 1.625, "learning_rate": 2.999999556529268e-05, "loss": 2.4684, "step": 520 }, { "epoch": 0.006784, "grad_norm": 1.640625, "learning_rate": 2.9999989668729602e-05, "loss": 2.4913, "step": 530 }, { "epoch": 0.006912, "grad_norm": 1.6640625, "learning_rate": 2.9999981315266566e-05, "loss": 2.4952, "step": 540 }, { "epoch": 0.00704, "grad_norm": 1.5703125, "learning_rate": 2.9999970504904944e-05, "loss": 2.4987, "step": 550 }, { "epoch": 0.007168, "grad_norm": 1.6171875, "learning_rate": 2.99999572376465e-05, "loss": 2.4804, "step": 560 }, { "epoch": 0.007296, "grad_norm": 1.796875, "learning_rate": 2.999994151349341e-05, "loss": 2.4837, "step": 570 }, { "epoch": 0.007424, "grad_norm": 2.078125, "learning_rate": 2.9999923332448248e-05, "loss": 2.4747, "step": 580 }, { "epoch": 0.007552, "grad_norm": 1.8515625, "learning_rate": 2.9999902694513992e-05, "loss": 2.454, "step": 590 }, { "epoch": 0.00768, "grad_norm": 1.84375, "learning_rate": 2.999987959969402e-05, "loss": 2.5016, "step": 600 }, { "epoch": 0.007808, "grad_norm": 1.78125, "learning_rate": 2.9999854047992122e-05, "loss": 2.4745, "step": 610 }, { "epoch": 0.007936, "grad_norm": 1.5546875, "learning_rate": 2.999982603941248e-05, "loss": 2.4706, "step": 620 }, { "epoch": 0.008064, "grad_norm": 1.6484375, "learning_rate": 2.999979557395968e-05, "loss": 2.4682, "step": 630 }, { "epoch": 0.008192, "grad_norm": 1.65625, "learning_rate": 2.999976265163871e-05, "loss": 2.5044, "step": 640 }, { "epoch": 0.00832, "grad_norm": 1.6015625, "learning_rate": 2.9999727272454965e-05, "loss": 2.4864, "step": 650 }, { "epoch": 0.008448, "grad_norm": 1.6328125, "learning_rate": 2.9999689436414244e-05, "loss": 2.484, "step": 660 }, { "epoch": 0.008576, "grad_norm": 1.578125, "learning_rate": 2.9999649143522737e-05, "loss": 2.4556, "step": 670 }, { "epoch": 0.008704, "grad_norm": 1.640625, "learning_rate": 2.999960639378705e-05, "loss": 2.4678, "step": 680 }, { "epoch": 0.008832, "grad_norm": 1.6171875, "learning_rate": 2.999956118721418e-05, "loss": 2.4743, "step": 690 }, { "epoch": 0.00896, "grad_norm": 1.5703125, "learning_rate": 2.9999513523811536e-05, "loss": 2.4639, "step": 700 }, { "epoch": 0.009088, "grad_norm": 1.578125, "learning_rate": 2.999946340358692e-05, "loss": 2.4536, "step": 710 }, { "epoch": 0.009216, "grad_norm": 1.7265625, "learning_rate": 2.999941082654854e-05, "loss": 2.4584, "step": 720 }, { "epoch": 0.009344, "grad_norm": 1.6015625, "learning_rate": 2.999935579270502e-05, "loss": 2.4745, "step": 730 }, { "epoch": 0.009472, "grad_norm": 1.4453125, "learning_rate": 2.9999298302065363e-05, "loss": 2.4302, "step": 740 }, { "epoch": 0.0096, "grad_norm": 1.6484375, "learning_rate": 2.9999238354638986e-05, "loss": 2.4692, "step": 750 }, { "epoch": 0.009728, "grad_norm": 1.7734375, "learning_rate": 2.9999175950435715e-05, "loss": 2.4554, "step": 760 }, { "epoch": 0.009856, "grad_norm": 1.578125, "learning_rate": 2.9999111089465763e-05, "loss": 2.4524, "step": 770 }, { "epoch": 0.009984, "grad_norm": 1.578125, "learning_rate": 2.999904377173976e-05, "loss": 2.4571, "step": 780 }, { "epoch": 0.010112, "grad_norm": 1.5703125, "learning_rate": 2.999897399726873e-05, "loss": 2.4613, "step": 790 }, { "epoch": 0.01024, "grad_norm": 1.484375, "learning_rate": 2.9998901766064103e-05, "loss": 2.4764, "step": 800 }, { "epoch": 0.010368, "grad_norm": 1.6953125, "learning_rate": 2.9998827078137704e-05, "loss": 2.4741, "step": 810 }, { "epoch": 0.010496, "grad_norm": 1.4765625, "learning_rate": 2.9998749933501772e-05, "loss": 2.4661, "step": 820 }, { "epoch": 0.010624, "grad_norm": 2.625, "learning_rate": 2.9998670332168943e-05, "loss": 2.4225, "step": 830 }, { "epoch": 0.010752, "grad_norm": 1.609375, "learning_rate": 2.9998588274152254e-05, "loss": 2.425, "step": 840 }, { "epoch": 0.01088, "grad_norm": 1.5546875, "learning_rate": 2.9998503759465145e-05, "loss": 2.4438, "step": 850 }, { "epoch": 0.011008, "grad_norm": 2.234375, "learning_rate": 2.9998416788121465e-05, "loss": 2.4687, "step": 860 }, { "epoch": 0.011136, "grad_norm": 1.4921875, "learning_rate": 2.9998327360135445e-05, "loss": 2.4886, "step": 870 }, { "epoch": 0.011264, "grad_norm": 1.546875, "learning_rate": 2.9998235475521745e-05, "loss": 2.4534, "step": 880 }, { "epoch": 0.011392, "grad_norm": 1.734375, "learning_rate": 2.9998141134295412e-05, "loss": 2.474, "step": 890 }, { "epoch": 0.01152, "grad_norm": 1.5859375, "learning_rate": 2.9998044336471902e-05, "loss": 2.4674, "step": 900 }, { "epoch": 0.011648, "grad_norm": 3.359375, "learning_rate": 2.999794508206706e-05, "loss": 2.4704, "step": 910 }, { "epoch": 0.011776, "grad_norm": 1.515625, "learning_rate": 2.999784337109715e-05, "loss": 2.4566, "step": 920 }, { "epoch": 0.011904, "grad_norm": 1.6015625, "learning_rate": 2.9997739203578832e-05, "loss": 2.4661, "step": 930 }, { "epoch": 0.012032, "grad_norm": 1.640625, "learning_rate": 2.9997632579529165e-05, "loss": 2.4516, "step": 940 }, { "epoch": 0.01216, "grad_norm": 1.4375, "learning_rate": 2.9997523498965617e-05, "loss": 2.4518, "step": 950 }, { "epoch": 0.012288, "grad_norm": 1.640625, "learning_rate": 2.9997411961906057e-05, "loss": 2.448, "step": 960 }, { "epoch": 0.012416, "grad_norm": 1.484375, "learning_rate": 2.999729796836874e-05, "loss": 2.4449, "step": 970 }, { "epoch": 0.012544, "grad_norm": 2.375, "learning_rate": 2.9997181518372355e-05, "loss": 2.4607, "step": 980 }, { "epoch": 0.012672, "grad_norm": 1.6015625, "learning_rate": 2.9997062611935965e-05, "loss": 2.4843, "step": 990 }, { "epoch": 0.0128, "grad_norm": 1.703125, "learning_rate": 2.9996941249079047e-05, "loss": 2.4659, "step": 1000 }, { "epoch": 0.012928, "grad_norm": 1.4765625, "learning_rate": 2.9996817429821486e-05, "loss": 2.4549, "step": 1010 }, { "epoch": 0.013056, "grad_norm": 1.6328125, "learning_rate": 2.9996691154183553e-05, "loss": 2.4367, "step": 1020 }, { "epoch": 0.013184, "grad_norm": 1.640625, "learning_rate": 2.9996562422185937e-05, "loss": 2.4495, "step": 1030 }, { "epoch": 0.013312, "grad_norm": 1.6171875, "learning_rate": 2.9996431233849728e-05, "loss": 2.4515, "step": 1040 }, { "epoch": 0.01344, "grad_norm": 2.109375, "learning_rate": 2.9996297589196407e-05, "loss": 2.4497, "step": 1050 }, { "epoch": 0.013568, "grad_norm": 1.6484375, "learning_rate": 2.9996161488247862e-05, "loss": 2.4457, "step": 1060 }, { "epoch": 0.013696, "grad_norm": 1.578125, "learning_rate": 2.999602293102639e-05, "loss": 2.4164, "step": 1070 }, { "epoch": 0.013824, "grad_norm": 1.5546875, "learning_rate": 2.9995881917554688e-05, "loss": 2.4266, "step": 1080 }, { "epoch": 0.013952, "grad_norm": 1.8671875, "learning_rate": 2.9995738447855846e-05, "loss": 2.433, "step": 1090 }, { "epoch": 0.01408, "grad_norm": 1.5390625, "learning_rate": 2.999559252195337e-05, "loss": 2.4478, "step": 1100 }, { "epoch": 0.014208, "grad_norm": 1.546875, "learning_rate": 2.9995444139871155e-05, "loss": 2.4468, "step": 1110 }, { "epoch": 0.014336, "grad_norm": 1.671875, "learning_rate": 2.9995293301633516e-05, "loss": 2.3931, "step": 1120 }, { "epoch": 0.014464, "grad_norm": 1.5625, "learning_rate": 2.9995140007265147e-05, "loss": 2.443, "step": 1130 }, { "epoch": 0.014592, "grad_norm": 1.5078125, "learning_rate": 2.9994984256791163e-05, "loss": 2.437, "step": 1140 }, { "epoch": 0.01472, "grad_norm": 1.4765625, "learning_rate": 2.9994826050237074e-05, "loss": 2.4282, "step": 1150 }, { "epoch": 0.014848, "grad_norm": 1.609375, "learning_rate": 2.9994665387628792e-05, "loss": 2.4243, "step": 1160 }, { "epoch": 0.014976, "grad_norm": 1.5703125, "learning_rate": 2.9994502268992637e-05, "loss": 2.4568, "step": 1170 }, { "epoch": 0.015104, "grad_norm": 1.5625, "learning_rate": 2.999433669435532e-05, "loss": 2.4086, "step": 1180 }, { "epoch": 0.015232, "grad_norm": 1.53125, "learning_rate": 2.9994168663743966e-05, "loss": 2.3972, "step": 1190 }, { "epoch": 0.01536, "grad_norm": 1.5234375, "learning_rate": 2.9993998177186095e-05, "loss": 2.4495, "step": 1200 }, { "epoch": 0.015488, "grad_norm": 1.4765625, "learning_rate": 2.9993825234709634e-05, "loss": 2.4065, "step": 1210 }, { "epoch": 0.015616, "grad_norm": 1.6484375, "learning_rate": 2.9993649836342908e-05, "loss": 2.4621, "step": 1220 }, { "epoch": 0.015744, "grad_norm": 1.6953125, "learning_rate": 2.9993471982114645e-05, "loss": 2.4352, "step": 1230 }, { "epoch": 0.015872, "grad_norm": 1.640625, "learning_rate": 2.9993291672053977e-05, "loss": 2.4354, "step": 1240 }, { "epoch": 0.016, "grad_norm": 1.6328125, "learning_rate": 2.999310890619044e-05, "loss": 2.4311, "step": 1250 }, { "epoch": 0.016128, "grad_norm": 2.140625, "learning_rate": 2.9992923684553964e-05, "loss": 2.4432, "step": 1260 }, { "epoch": 0.016256, "grad_norm": 4.4375, "learning_rate": 2.9992736007174898e-05, "loss": 2.424, "step": 1270 }, { "epoch": 0.016384, "grad_norm": 1.46875, "learning_rate": 2.999254587408397e-05, "loss": 2.4771, "step": 1280 }, { "epoch": 0.016512, "grad_norm": 1.75, "learning_rate": 2.9992353285312334e-05, "loss": 2.452, "step": 1290 }, { "epoch": 0.01664, "grad_norm": 1.9296875, "learning_rate": 2.999215824089152e-05, "loss": 2.4288, "step": 1300 }, { "epoch": 0.016768, "grad_norm": 1.625, "learning_rate": 2.999196074085349e-05, "loss": 2.4227, "step": 1310 }, { "epoch": 0.016896, "grad_norm": 1.546875, "learning_rate": 2.9991760785230584e-05, "loss": 2.4281, "step": 1320 }, { "epoch": 0.017024, "grad_norm": 1.5234375, "learning_rate": 2.999155837405556e-05, "loss": 2.3974, "step": 1330 }, { "epoch": 0.017152, "grad_norm": 1.4921875, "learning_rate": 2.9991353507361565e-05, "loss": 2.433, "step": 1340 }, { "epoch": 0.01728, "grad_norm": 1.6484375, "learning_rate": 2.999114618518216e-05, "loss": 2.4441, "step": 1350 }, { "epoch": 0.017408, "grad_norm": 1.6484375, "learning_rate": 2.9990936407551295e-05, "loss": 2.4084, "step": 1360 }, { "epoch": 0.017536, "grad_norm": 1.5859375, "learning_rate": 2.9990724174503342e-05, "loss": 2.4129, "step": 1370 }, { "epoch": 0.017664, "grad_norm": 1.6640625, "learning_rate": 2.999050948607306e-05, "loss": 2.4036, "step": 1380 }, { "epoch": 0.017792, "grad_norm": 1.578125, "learning_rate": 2.9990292342295604e-05, "loss": 2.428, "step": 1390 }, { "epoch": 0.01792, "grad_norm": 1.4921875, "learning_rate": 2.999007274320655e-05, "loss": 2.4188, "step": 1400 }, { "epoch": 0.018048, "grad_norm": 1.640625, "learning_rate": 2.9989850688841866e-05, "loss": 2.4478, "step": 1410 }, { "epoch": 0.018176, "grad_norm": 1.6328125, "learning_rate": 2.998962617923792e-05, "loss": 2.4178, "step": 1420 }, { "epoch": 0.018304, "grad_norm": 1.65625, "learning_rate": 2.9989399214431488e-05, "loss": 2.4082, "step": 1430 }, { "epoch": 0.018432, "grad_norm": 1.4921875, "learning_rate": 2.9989169794459743e-05, "loss": 2.4473, "step": 1440 }, { "epoch": 0.01856, "grad_norm": 1.53125, "learning_rate": 2.9988937919360265e-05, "loss": 2.3995, "step": 1450 }, { "epoch": 0.018688, "grad_norm": 1.578125, "learning_rate": 2.9988703589171034e-05, "loss": 2.4096, "step": 1460 }, { "epoch": 0.018816, "grad_norm": 1.5390625, "learning_rate": 2.9988466803930425e-05, "loss": 2.4197, "step": 1470 }, { "epoch": 0.018944, "grad_norm": 1.5390625, "learning_rate": 2.998822756367723e-05, "loss": 2.4179, "step": 1480 }, { "epoch": 0.019072, "grad_norm": 1.671875, "learning_rate": 2.9987985868450633e-05, "loss": 2.4279, "step": 1490 }, { "epoch": 0.0192, "grad_norm": 1.5703125, "learning_rate": 2.9987741718290224e-05, "loss": 2.4046, "step": 1500 }, { "epoch": 0.019328, "grad_norm": 1.65625, "learning_rate": 2.9987495113235986e-05, "loss": 2.4443, "step": 1510 }, { "epoch": 0.019456, "grad_norm": 1.5546875, "learning_rate": 2.9987246053328316e-05, "loss": 2.4262, "step": 1520 }, { "epoch": 0.019584, "grad_norm": 1.4609375, "learning_rate": 2.9986994538608007e-05, "loss": 2.3777, "step": 1530 }, { "epoch": 0.019712, "grad_norm": 1.515625, "learning_rate": 2.9986740569116255e-05, "loss": 2.4268, "step": 1540 }, { "epoch": 0.01984, "grad_norm": 2.90625, "learning_rate": 2.9986484144894663e-05, "loss": 2.4159, "step": 1550 }, { "epoch": 0.019968, "grad_norm": 1.59375, "learning_rate": 2.9986225265985228e-05, "loss": 2.4393, "step": 1560 }, { "epoch": 0.020096, "grad_norm": 1.5703125, "learning_rate": 2.998596393243035e-05, "loss": 2.4301, "step": 1570 }, { "epoch": 0.020224, "grad_norm": 1.625, "learning_rate": 2.998570014427284e-05, "loss": 2.4248, "step": 1580 }, { "epoch": 0.020352, "grad_norm": 1.5390625, "learning_rate": 2.9985433901555905e-05, "loss": 2.4318, "step": 1590 }, { "epoch": 0.02048, "grad_norm": 1.5546875, "learning_rate": 2.9985165204323144e-05, "loss": 2.4201, "step": 1600 }, { "epoch": 0.020608, "grad_norm": 1.8515625, "learning_rate": 2.9984894052618576e-05, "loss": 2.4169, "step": 1610 }, { "epoch": 0.020736, "grad_norm": 1.578125, "learning_rate": 2.9984620446486617e-05, "loss": 2.4001, "step": 1620 }, { "epoch": 0.020864, "grad_norm": 1.5, "learning_rate": 2.998434438597207e-05, "loss": 2.4323, "step": 1630 }, { "epoch": 0.020992, "grad_norm": 1.4921875, "learning_rate": 2.9984065871120163e-05, "loss": 2.4113, "step": 1640 }, { "epoch": 0.02112, "grad_norm": 1.5625, "learning_rate": 2.998378490197651e-05, "loss": 2.4029, "step": 1650 }, { "epoch": 0.021248, "grad_norm": 1.4296875, "learning_rate": 2.9983501478587135e-05, "loss": 2.4318, "step": 1660 }, { "epoch": 0.021376, "grad_norm": 1.5078125, "learning_rate": 2.9983215600998457e-05, "loss": 2.4336, "step": 1670 }, { "epoch": 0.021504, "grad_norm": 1.484375, "learning_rate": 2.99829272692573e-05, "loss": 2.4094, "step": 1680 }, { "epoch": 0.021632, "grad_norm": 2.84375, "learning_rate": 2.99826364834109e-05, "loss": 2.3937, "step": 1690 }, { "epoch": 0.02176, "grad_norm": 1.53125, "learning_rate": 2.9982343243506875e-05, "loss": 2.4345, "step": 1700 }, { "epoch": 0.021888, "grad_norm": 1.5703125, "learning_rate": 2.998204754959326e-05, "loss": 2.4493, "step": 1710 }, { "epoch": 0.022016, "grad_norm": 1.515625, "learning_rate": 2.9981749401718483e-05, "loss": 2.354, "step": 1720 }, { "epoch": 0.022144, "grad_norm": 1.5859375, "learning_rate": 2.998144879993139e-05, "loss": 2.4214, "step": 1730 }, { "epoch": 0.022272, "grad_norm": 1.6875, "learning_rate": 2.9981145744281206e-05, "loss": 2.4281, "step": 1740 }, { "epoch": 0.0224, "grad_norm": 1.546875, "learning_rate": 2.998084023481758e-05, "loss": 2.4009, "step": 1750 }, { "epoch": 0.022528, "grad_norm": 1.5078125, "learning_rate": 2.9980532271590546e-05, "loss": 2.4116, "step": 1760 }, { "epoch": 0.022656, "grad_norm": 1.578125, "learning_rate": 2.9980221854650547e-05, "loss": 2.4175, "step": 1770 }, { "epoch": 0.022784, "grad_norm": 1.421875, "learning_rate": 2.9979908984048423e-05, "loss": 2.3775, "step": 1780 }, { "epoch": 0.022912, "grad_norm": 1.6484375, "learning_rate": 2.9979593659835428e-05, "loss": 2.4142, "step": 1790 }, { "epoch": 0.02304, "grad_norm": 1.5, "learning_rate": 2.9979275882063212e-05, "loss": 2.3714, "step": 1800 }, { "epoch": 0.023168, "grad_norm": 1.5703125, "learning_rate": 2.997895565078381e-05, "loss": 2.4189, "step": 1810 }, { "epoch": 0.023296, "grad_norm": 1.5234375, "learning_rate": 2.997863296604969e-05, "loss": 2.4187, "step": 1820 }, { "epoch": 0.023424, "grad_norm": 8.3125, "learning_rate": 2.9978307827913696e-05, "loss": 2.3947, "step": 1830 }, { "epoch": 0.023552, "grad_norm": 1.5546875, "learning_rate": 2.997798023642909e-05, "loss": 2.4072, "step": 1840 }, { "epoch": 0.02368, "grad_norm": 1.5625, "learning_rate": 2.9977650191649525e-05, "loss": 2.4073, "step": 1850 }, { "epoch": 0.023808, "grad_norm": 1.4765625, "learning_rate": 2.9977317693629058e-05, "loss": 2.3947, "step": 1860 }, { "epoch": 0.023936, "grad_norm": 1.5703125, "learning_rate": 2.9976982742422154e-05, "loss": 2.3982, "step": 1870 }, { "epoch": 0.024064, "grad_norm": 1.625, "learning_rate": 2.9976645338083674e-05, "loss": 2.4247, "step": 1880 }, { "epoch": 0.024192, "grad_norm": 1.5625, "learning_rate": 2.9976305480668887e-05, "loss": 2.3951, "step": 1890 }, { "epoch": 0.02432, "grad_norm": 1.4609375, "learning_rate": 2.9975963170233454e-05, "loss": 2.4403, "step": 1900 }, { "epoch": 0.024448, "grad_norm": 1.453125, "learning_rate": 2.9975618406833446e-05, "loss": 2.4019, "step": 1910 }, { "epoch": 0.024576, "grad_norm": 1.7734375, "learning_rate": 2.997527119052533e-05, "loss": 2.4151, "step": 1920 }, { "epoch": 0.024704, "grad_norm": 1.5546875, "learning_rate": 2.9974921521365985e-05, "loss": 2.4039, "step": 1930 }, { "epoch": 0.024832, "grad_norm": 1.5546875, "learning_rate": 2.997456939941268e-05, "loss": 2.3841, "step": 1940 }, { "epoch": 0.02496, "grad_norm": 1.5546875, "learning_rate": 2.9974214824723082e-05, "loss": 2.3953, "step": 1950 }, { "epoch": 0.025088, "grad_norm": 1.4453125, "learning_rate": 2.9973857797355278e-05, "loss": 2.4086, "step": 1960 }, { "epoch": 0.025216, "grad_norm": 1.5625, "learning_rate": 2.9973498317367746e-05, "loss": 2.4485, "step": 1970 }, { "epoch": 0.025344, "grad_norm": 1.484375, "learning_rate": 2.9973136384819365e-05, "loss": 2.4041, "step": 1980 }, { "epoch": 0.025472, "grad_norm": 1.6796875, "learning_rate": 2.997277199976942e-05, "loss": 2.4112, "step": 1990 }, { "epoch": 0.0256, "grad_norm": 1.421875, "learning_rate": 2.9972405162277586e-05, "loss": 2.4139, "step": 2000 }, { "epoch": 0.025728, "grad_norm": 1.5078125, "learning_rate": 2.997203587240396e-05, "loss": 2.3867, "step": 2010 }, { "epoch": 0.025856, "grad_norm": 1.4453125, "learning_rate": 2.9971664130209016e-05, "loss": 2.3723, "step": 2020 }, { "epoch": 0.025984, "grad_norm": 1.53125, "learning_rate": 2.997128993575366e-05, "loss": 2.3773, "step": 2030 }, { "epoch": 0.026112, "grad_norm": 1.578125, "learning_rate": 2.9970913289099172e-05, "loss": 2.3919, "step": 2040 }, { "epoch": 0.02624, "grad_norm": 1.5078125, "learning_rate": 2.997053419030724e-05, "loss": 2.4063, "step": 2050 }, { "epoch": 0.026368, "grad_norm": 1.546875, "learning_rate": 2.9970152639439966e-05, "loss": 2.394, "step": 2060 }, { "epoch": 0.026496, "grad_norm": 1.46875, "learning_rate": 2.9969768636559844e-05, "loss": 2.4052, "step": 2070 }, { "epoch": 0.026624, "grad_norm": 1.5546875, "learning_rate": 2.996938218172977e-05, "loss": 2.3793, "step": 2080 }, { "epoch": 0.026752, "grad_norm": 1.453125, "learning_rate": 2.9968993275013043e-05, "loss": 2.4099, "step": 2090 }, { "epoch": 0.02688, "grad_norm": 1.484375, "learning_rate": 2.996860191647336e-05, "loss": 2.3864, "step": 2100 }, { "epoch": 0.027008, "grad_norm": 1.6640625, "learning_rate": 2.9968208106174832e-05, "loss": 2.3937, "step": 2110 }, { "epoch": 0.027136, "grad_norm": 1.53125, "learning_rate": 2.996781184418195e-05, "loss": 2.3876, "step": 2120 }, { "epoch": 0.027264, "grad_norm": 1.46875, "learning_rate": 2.996741313055963e-05, "loss": 2.3802, "step": 2130 }, { "epoch": 0.027392, "grad_norm": 1.6015625, "learning_rate": 2.9967011965373177e-05, "loss": 2.3921, "step": 2140 }, { "epoch": 0.02752, "grad_norm": 1.578125, "learning_rate": 2.9966608348688296e-05, "loss": 2.4252, "step": 2150 }, { "epoch": 0.027648, "grad_norm": 1.6953125, "learning_rate": 2.9966202280571095e-05, "loss": 2.3739, "step": 2160 }, { "epoch": 0.027776, "grad_norm": 1.5546875, "learning_rate": 2.996579376108809e-05, "loss": 2.4046, "step": 2170 }, { "epoch": 0.027904, "grad_norm": 1.7421875, "learning_rate": 2.9965382790306192e-05, "loss": 2.4003, "step": 2180 }, { "epoch": 0.028032, "grad_norm": 1.53125, "learning_rate": 2.996496936829271e-05, "loss": 2.4092, "step": 2190 }, { "epoch": 0.02816, "grad_norm": 1.53125, "learning_rate": 2.996455349511537e-05, "loss": 2.3686, "step": 2200 }, { "epoch": 0.028288, "grad_norm": 1.5078125, "learning_rate": 2.9964135170842286e-05, "loss": 2.4018, "step": 2210 }, { "epoch": 0.028416, "grad_norm": 1.5078125, "learning_rate": 2.9963714395541968e-05, "loss": 2.3902, "step": 2220 }, { "epoch": 0.028544, "grad_norm": 1.5625, "learning_rate": 2.996329116928335e-05, "loss": 2.3988, "step": 2230 }, { "epoch": 0.028672, "grad_norm": 1.484375, "learning_rate": 2.996286549213574e-05, "loss": 2.3785, "step": 2240 }, { "epoch": 0.0288, "grad_norm": 1.484375, "learning_rate": 2.9962437364168874e-05, "loss": 2.3899, "step": 2250 }, { "epoch": 0.028928, "grad_norm": 1.5703125, "learning_rate": 2.9962006785452864e-05, "loss": 2.4002, "step": 2260 }, { "epoch": 0.029056, "grad_norm": 1.46875, "learning_rate": 2.996157375605825e-05, "loss": 2.405, "step": 2270 }, { "epoch": 0.029184, "grad_norm": 1.6640625, "learning_rate": 2.9961138276055946e-05, "loss": 2.4058, "step": 2280 }, { "epoch": 0.029312, "grad_norm": 1.5546875, "learning_rate": 2.996070034551729e-05, "loss": 2.368, "step": 2290 }, { "epoch": 0.02944, "grad_norm": 1.4921875, "learning_rate": 2.9960259964514008e-05, "loss": 2.3732, "step": 2300 }, { "epoch": 0.029568, "grad_norm": 1.546875, "learning_rate": 2.995981713311823e-05, "loss": 2.3938, "step": 2310 }, { "epoch": 0.029696, "grad_norm": 1.4375, "learning_rate": 2.9959371851402496e-05, "loss": 2.3867, "step": 2320 }, { "epoch": 0.029824, "grad_norm": 1.6796875, "learning_rate": 2.995892411943973e-05, "loss": 2.4204, "step": 2330 }, { "epoch": 0.029952, "grad_norm": 1.4296875, "learning_rate": 2.995847393730328e-05, "loss": 2.4061, "step": 2340 }, { "epoch": 0.03008, "grad_norm": 1.40625, "learning_rate": 2.995802130506687e-05, "loss": 2.3924, "step": 2350 }, { "epoch": 0.030208, "grad_norm": 1.4921875, "learning_rate": 2.9957566222804644e-05, "loss": 2.4038, "step": 2360 }, { "epoch": 0.030336, "grad_norm": 1.6015625, "learning_rate": 2.9957108690591146e-05, "loss": 2.3907, "step": 2370 }, { "epoch": 0.030464, "grad_norm": 2.21875, "learning_rate": 2.995664870850131e-05, "loss": 2.3873, "step": 2380 }, { "epoch": 0.030592, "grad_norm": 1.59375, "learning_rate": 2.9956186276610475e-05, "loss": 2.3628, "step": 2390 }, { "epoch": 0.03072, "grad_norm": 1.4375, "learning_rate": 2.9955721394994397e-05, "loss": 2.3846, "step": 2400 }, { "epoch": 0.030848, "grad_norm": 1.8203125, "learning_rate": 2.995525406372921e-05, "loss": 2.401, "step": 2410 }, { "epoch": 0.030976, "grad_norm": 1.4765625, "learning_rate": 2.9954784282891463e-05, "loss": 2.4025, "step": 2420 }, { "epoch": 0.031104, "grad_norm": 1.5390625, "learning_rate": 2.9954312052558105e-05, "loss": 2.4205, "step": 2430 }, { "epoch": 0.031232, "grad_norm": 1.3671875, "learning_rate": 2.995383737280648e-05, "loss": 2.4019, "step": 2440 }, { "epoch": 0.03136, "grad_norm": 1.5, "learning_rate": 2.995336024371434e-05, "loss": 2.3833, "step": 2450 }, { "epoch": 0.031488, "grad_norm": 1.453125, "learning_rate": 2.9952880665359838e-05, "loss": 2.4137, "step": 2460 }, { "epoch": 0.031616, "grad_norm": 1.6953125, "learning_rate": 2.995239863782152e-05, "loss": 2.3906, "step": 2470 }, { "epoch": 0.031744, "grad_norm": 1.5625, "learning_rate": 2.9951914161178344e-05, "loss": 2.3868, "step": 2480 }, { "epoch": 0.031872, "grad_norm": 1.578125, "learning_rate": 2.9951427235509664e-05, "loss": 2.3524, "step": 2490 }, { "epoch": 0.032, "grad_norm": 1.78125, "learning_rate": 2.995093786089523e-05, "loss": 2.3986, "step": 2500 }, { "epoch": 0.032128, "grad_norm": 1.53125, "learning_rate": 2.9950446037415203e-05, "loss": 2.3411, "step": 2510 }, { "epoch": 0.032256, "grad_norm": 1.5078125, "learning_rate": 2.994995176515014e-05, "loss": 2.358, "step": 2520 }, { "epoch": 0.032384, "grad_norm": 1.578125, "learning_rate": 2.9949455044180997e-05, "loss": 2.37, "step": 2530 }, { "epoch": 0.032512, "grad_norm": 1.4140625, "learning_rate": 2.9948955874589136e-05, "loss": 2.3527, "step": 2540 }, { "epoch": 0.03264, "grad_norm": 1.453125, "learning_rate": 2.994845425645632e-05, "loss": 2.3901, "step": 2550 }, { "epoch": 0.032768, "grad_norm": 1.453125, "learning_rate": 2.9947950189864704e-05, "loss": 2.4083, "step": 2560 }, { "epoch": 0.032896, "grad_norm": 1.421875, "learning_rate": 2.9947443674896853e-05, "loss": 2.3702, "step": 2570 }, { "epoch": 0.033024, "grad_norm": 1.4921875, "learning_rate": 2.9946934711635736e-05, "loss": 2.3631, "step": 2580 }, { "epoch": 0.033152, "grad_norm": 1.484375, "learning_rate": 2.9946423300164714e-05, "loss": 2.3832, "step": 2590 }, { "epoch": 0.03328, "grad_norm": 1.3984375, "learning_rate": 2.9945909440567554e-05, "loss": 2.4063, "step": 2600 }, { "epoch": 0.033408, "grad_norm": 1.5859375, "learning_rate": 2.9945393132928418e-05, "loss": 2.3896, "step": 2610 }, { "epoch": 0.033536, "grad_norm": 1.5859375, "learning_rate": 2.994487437733188e-05, "loss": 2.3759, "step": 2620 }, { "epoch": 0.033664, "grad_norm": 1.359375, "learning_rate": 2.994435317386291e-05, "loss": 2.3923, "step": 2630 }, { "epoch": 0.033792, "grad_norm": 1.4765625, "learning_rate": 2.9943829522606873e-05, "loss": 2.4344, "step": 2640 }, { "epoch": 0.03392, "grad_norm": 1.5390625, "learning_rate": 2.994330342364954e-05, "loss": 2.3942, "step": 2650 }, { "epoch": 0.034048, "grad_norm": 1.484375, "learning_rate": 2.994277487707708e-05, "loss": 2.4031, "step": 2660 }, { "epoch": 0.034176, "grad_norm": 1.4921875, "learning_rate": 2.9942243882976076e-05, "loss": 2.3746, "step": 2670 }, { "epoch": 0.034304, "grad_norm": 1.46875, "learning_rate": 2.994171044143349e-05, "loss": 2.3633, "step": 2680 }, { "epoch": 0.034432, "grad_norm": 1.5234375, "learning_rate": 2.9941174552536698e-05, "loss": 2.3902, "step": 2690 }, { "epoch": 0.03456, "grad_norm": 1.484375, "learning_rate": 2.994063621637348e-05, "loss": 2.3739, "step": 2700 }, { "epoch": 0.034688, "grad_norm": 1.46875, "learning_rate": 2.9940095433032012e-05, "loss": 2.3849, "step": 2710 }, { "epoch": 0.034816, "grad_norm": 1.515625, "learning_rate": 2.9939552202600864e-05, "loss": 2.3892, "step": 2720 }, { "epoch": 0.034944, "grad_norm": 1.5234375, "learning_rate": 2.9939006525169022e-05, "loss": 2.4305, "step": 2730 }, { "epoch": 0.035072, "grad_norm": 1.4609375, "learning_rate": 2.993845840082586e-05, "loss": 2.3724, "step": 2740 }, { "epoch": 0.0352, "grad_norm": 1.5390625, "learning_rate": 2.9937907829661158e-05, "loss": 2.3636, "step": 2750 }, { "epoch": 0.035328, "grad_norm": 1.515625, "learning_rate": 2.993735481176509e-05, "loss": 2.4329, "step": 2760 }, { "epoch": 0.035456, "grad_norm": 1.6796875, "learning_rate": 2.993679934722825e-05, "loss": 2.3822, "step": 2770 }, { "epoch": 0.035584, "grad_norm": 1.5390625, "learning_rate": 2.9936241436141607e-05, "loss": 2.3775, "step": 2780 }, { "epoch": 0.035712, "grad_norm": 1.484375, "learning_rate": 2.993568107859655e-05, "loss": 2.3708, "step": 2790 }, { "epoch": 0.03584, "grad_norm": 1.5234375, "learning_rate": 2.9935118274684853e-05, "loss": 2.4141, "step": 2800 }, { "epoch": 0.035968, "grad_norm": 1.484375, "learning_rate": 2.9934553024498715e-05, "loss": 2.3596, "step": 2810 }, { "epoch": 0.036096, "grad_norm": 1.484375, "learning_rate": 2.993398532813071e-05, "loss": 2.3444, "step": 2820 }, { "epoch": 0.036224, "grad_norm": 1.53125, "learning_rate": 2.9933415185673818e-05, "loss": 2.3749, "step": 2830 }, { "epoch": 0.036352, "grad_norm": 1.53125, "learning_rate": 2.9932842597221437e-05, "loss": 2.3755, "step": 2840 }, { "epoch": 0.03648, "grad_norm": 1.5078125, "learning_rate": 2.9932267562867344e-05, "loss": 2.3722, "step": 2850 }, { "epoch": 0.036608, "grad_norm": 1.53125, "learning_rate": 2.9931690082705733e-05, "loss": 2.3766, "step": 2860 }, { "epoch": 0.036736, "grad_norm": 1.578125, "learning_rate": 2.9931110156831185e-05, "loss": 2.3532, "step": 2870 }, { "epoch": 0.036864, "grad_norm": 1.4453125, "learning_rate": 2.9930527785338687e-05, "loss": 2.3916, "step": 2880 }, { "epoch": 0.036992, "grad_norm": 1.59375, "learning_rate": 2.992994296832364e-05, "loss": 2.3863, "step": 2890 }, { "epoch": 0.03712, "grad_norm": 1.546875, "learning_rate": 2.9929355705881817e-05, "loss": 2.3658, "step": 2900 }, { "epoch": 0.037248, "grad_norm": 1.484375, "learning_rate": 2.992876599810942e-05, "loss": 2.3834, "step": 2910 }, { "epoch": 0.037376, "grad_norm": 1.5, "learning_rate": 2.9928173845103036e-05, "loss": 2.3663, "step": 2920 }, { "epoch": 0.037504, "grad_norm": 1.515625, "learning_rate": 2.9927579246959646e-05, "loss": 2.3773, "step": 2930 }, { "epoch": 0.037632, "grad_norm": 1.59375, "learning_rate": 2.9926982203776656e-05, "loss": 2.373, "step": 2940 }, { "epoch": 0.03776, "grad_norm": 1.5, "learning_rate": 2.992638271565185e-05, "loss": 2.3835, "step": 2950 }, { "epoch": 0.037888, "grad_norm": 1.515625, "learning_rate": 2.9925780782683423e-05, "loss": 2.373, "step": 2960 }, { "epoch": 0.038016, "grad_norm": 1.4375, "learning_rate": 2.9925176404969963e-05, "loss": 2.4052, "step": 2970 }, { "epoch": 0.038144, "grad_norm": 1.546875, "learning_rate": 2.992456958261047e-05, "loss": 2.3383, "step": 2980 }, { "epoch": 0.038272, "grad_norm": 1.484375, "learning_rate": 2.9923960315704332e-05, "loss": 2.3601, "step": 2990 }, { "epoch": 0.0384, "grad_norm": 8.375, "learning_rate": 2.9923348604351343e-05, "loss": 2.3852, "step": 3000 }, { "epoch": 0.038528, "grad_norm": 1.5234375, "learning_rate": 2.9922734448651702e-05, "loss": 2.3775, "step": 3010 }, { "epoch": 0.038656, "grad_norm": 1.6796875, "learning_rate": 2.9922117848706e-05, "loss": 2.3869, "step": 3020 }, { "epoch": 0.038784, "grad_norm": 1.4609375, "learning_rate": 2.992149880461523e-05, "loss": 2.349, "step": 3030 }, { "epoch": 0.038912, "grad_norm": 1.4375, "learning_rate": 2.9920877316480793e-05, "loss": 2.3789, "step": 3040 }, { "epoch": 0.03904, "grad_norm": 1.5078125, "learning_rate": 2.992025338440448e-05, "loss": 2.391, "step": 3050 }, { "epoch": 0.039168, "grad_norm": 1.5625, "learning_rate": 2.991962700848849e-05, "loss": 2.3788, "step": 3060 }, { "epoch": 0.039296, "grad_norm": 1.7890625, "learning_rate": 2.9918998188835422e-05, "loss": 2.3933, "step": 3070 }, { "epoch": 0.039424, "grad_norm": 1.625, "learning_rate": 2.9918366925548265e-05, "loss": 2.3396, "step": 3080 }, { "epoch": 0.039552, "grad_norm": 1.765625, "learning_rate": 2.991773321873042e-05, "loss": 2.4012, "step": 3090 }, { "epoch": 0.03968, "grad_norm": 1.578125, "learning_rate": 2.991709706848568e-05, "loss": 2.3897, "step": 3100 }, { "epoch": 0.039808, "grad_norm": 1.4375, "learning_rate": 2.991645847491825e-05, "loss": 2.3759, "step": 3110 }, { "epoch": 0.039936, "grad_norm": 1.5546875, "learning_rate": 2.991581743813272e-05, "loss": 2.3716, "step": 3120 }, { "epoch": 0.040064, "grad_norm": 1.515625, "learning_rate": 2.9915173958234093e-05, "loss": 2.3693, "step": 3130 }, { "epoch": 0.040192, "grad_norm": 1.6015625, "learning_rate": 2.9914528035327765e-05, "loss": 2.3728, "step": 3140 }, { "epoch": 0.04032, "grad_norm": 1.421875, "learning_rate": 2.9913879669519532e-05, "loss": 2.373, "step": 3150 }, { "epoch": 0.040448, "grad_norm": 1.5546875, "learning_rate": 2.9913228860915597e-05, "loss": 2.3603, "step": 3160 }, { "epoch": 0.040576, "grad_norm": 1.4921875, "learning_rate": 2.991257560962255e-05, "loss": 2.3448, "step": 3170 }, { "epoch": 0.040704, "grad_norm": 1.453125, "learning_rate": 2.9911919915747393e-05, "loss": 2.3519, "step": 3180 }, { "epoch": 0.040832, "grad_norm": 1.5, "learning_rate": 2.991126177939753e-05, "loss": 2.3708, "step": 3190 }, { "epoch": 0.04096, "grad_norm": 1.46875, "learning_rate": 2.991060120068075e-05, "loss": 2.3377, "step": 3200 }, { "epoch": 0.041088, "grad_norm": 1.5234375, "learning_rate": 2.990993817970526e-05, "loss": 2.3715, "step": 3210 }, { "epoch": 0.041216, "grad_norm": 1.578125, "learning_rate": 2.9909272716579653e-05, "loss": 2.3562, "step": 3220 }, { "epoch": 0.041344, "grad_norm": 1.546875, "learning_rate": 2.990860481141293e-05, "loss": 2.3725, "step": 3230 }, { "epoch": 0.041472, "grad_norm": 1.46875, "learning_rate": 2.9907934464314488e-05, "loss": 2.3744, "step": 3240 }, { "epoch": 0.0416, "grad_norm": 1.5546875, "learning_rate": 2.990726167539413e-05, "loss": 2.3593, "step": 3250 }, { "epoch": 0.041728, "grad_norm": 1.6328125, "learning_rate": 2.9906586444762043e-05, "loss": 2.4037, "step": 3260 }, { "epoch": 0.041856, "grad_norm": 1.5234375, "learning_rate": 2.990590877252884e-05, "loss": 2.3815, "step": 3270 }, { "epoch": 0.041984, "grad_norm": 1.4921875, "learning_rate": 2.990522865880551e-05, "loss": 2.3795, "step": 3280 }, { "epoch": 0.042112, "grad_norm": 1.578125, "learning_rate": 2.990454610370345e-05, "loss": 2.4086, "step": 3290 }, { "epoch": 0.04224, "grad_norm": 1.4609375, "learning_rate": 2.9903861107334467e-05, "loss": 2.3797, "step": 3300 }, { "epoch": 0.042368, "grad_norm": 5.1875, "learning_rate": 2.990317366981075e-05, "loss": 2.373, "step": 3310 }, { "epoch": 0.042496, "grad_norm": 1.5, "learning_rate": 2.99024837912449e-05, "loss": 2.355, "step": 3320 }, { "epoch": 0.042624, "grad_norm": 1.5, "learning_rate": 2.9901791471749915e-05, "loss": 2.3685, "step": 3330 }, { "epoch": 0.042752, "grad_norm": 3.046875, "learning_rate": 2.990109671143919e-05, "loss": 2.3856, "step": 3340 }, { "epoch": 0.04288, "grad_norm": 1.359375, "learning_rate": 2.9900399510426526e-05, "loss": 2.3564, "step": 3350 }, { "epoch": 0.043008, "grad_norm": 1.421875, "learning_rate": 2.989969986882612e-05, "loss": 2.3725, "step": 3360 }, { "epoch": 0.043136, "grad_norm": 1.4921875, "learning_rate": 2.9898997786752568e-05, "loss": 2.3655, "step": 3370 }, { "epoch": 0.043264, "grad_norm": 1.4765625, "learning_rate": 2.989829326432086e-05, "loss": 2.3587, "step": 3380 }, { "epoch": 0.043392, "grad_norm": 1.46875, "learning_rate": 2.9897586301646405e-05, "loss": 2.381, "step": 3390 }, { "epoch": 0.04352, "grad_norm": 2.078125, "learning_rate": 2.9896876898844983e-05, "loss": 2.3829, "step": 3400 }, { "epoch": 0.043648, "grad_norm": 1.375, "learning_rate": 2.9896165056032803e-05, "loss": 2.346, "step": 3410 }, { "epoch": 0.043776, "grad_norm": 1.703125, "learning_rate": 2.9895450773326456e-05, "loss": 2.3549, "step": 3420 }, { "epoch": 0.043904, "grad_norm": 1.8671875, "learning_rate": 2.9894734050842932e-05, "loss": 2.3605, "step": 3430 }, { "epoch": 0.044032, "grad_norm": 1.53125, "learning_rate": 2.9894014888699633e-05, "loss": 2.3797, "step": 3440 }, { "epoch": 0.04416, "grad_norm": 1.5, "learning_rate": 2.9893293287014348e-05, "loss": 2.3837, "step": 3450 }, { "epoch": 0.044288, "grad_norm": 1.6015625, "learning_rate": 2.989256924590527e-05, "loss": 2.4013, "step": 3460 }, { "epoch": 0.044416, "grad_norm": 1.4921875, "learning_rate": 2.9891842765490992e-05, "loss": 2.3614, "step": 3470 }, { "epoch": 0.044544, "grad_norm": 1.546875, "learning_rate": 2.9891113845890515e-05, "loss": 2.3838, "step": 3480 }, { "epoch": 0.044672, "grad_norm": 1.4921875, "learning_rate": 2.9890382487223223e-05, "loss": 2.3717, "step": 3490 }, { "epoch": 0.0448, "grad_norm": 1.5859375, "learning_rate": 2.988964868960891e-05, "loss": 2.3755, "step": 3500 }, { "epoch": 0.044928, "grad_norm": 1.5234375, "learning_rate": 2.9888912453167767e-05, "loss": 2.3926, "step": 3510 }, { "epoch": 0.045056, "grad_norm": 1.375, "learning_rate": 2.9888173778020388e-05, "loss": 2.3495, "step": 3520 }, { "epoch": 0.045184, "grad_norm": 1.4375, "learning_rate": 2.9887432664287753e-05, "loss": 2.3931, "step": 3530 }, { "epoch": 0.045312, "grad_norm": 1.5390625, "learning_rate": 2.9886689112091264e-05, "loss": 2.3888, "step": 3540 }, { "epoch": 0.04544, "grad_norm": 1.4453125, "learning_rate": 2.98859431215527e-05, "loss": 2.3591, "step": 3550 }, { "epoch": 0.045568, "grad_norm": 1.46875, "learning_rate": 2.988519469279426e-05, "loss": 2.3733, "step": 3560 }, { "epoch": 0.045696, "grad_norm": 1.4453125, "learning_rate": 2.9884443825938522e-05, "loss": 2.3646, "step": 3570 }, { "epoch": 0.045824, "grad_norm": 1.5, "learning_rate": 2.9883690521108475e-05, "loss": 2.3575, "step": 3580 }, { "epoch": 0.045952, "grad_norm": 1.4921875, "learning_rate": 2.988293477842751e-05, "loss": 2.3417, "step": 3590 }, { "epoch": 0.04608, "grad_norm": 1.3984375, "learning_rate": 2.9882176598019407e-05, "loss": 2.3725, "step": 3600 }, { "epoch": 0.046208, "grad_norm": 1.5078125, "learning_rate": 2.988141598000836e-05, "loss": 2.3853, "step": 3610 }, { "epoch": 0.046336, "grad_norm": 1.6015625, "learning_rate": 2.988065292451894e-05, "loss": 2.395, "step": 3620 }, { "epoch": 0.046464, "grad_norm": 1.4765625, "learning_rate": 2.987988743167614e-05, "loss": 2.3746, "step": 3630 }, { "epoch": 0.046592, "grad_norm": 1.390625, "learning_rate": 2.987911950160534e-05, "loss": 2.3793, "step": 3640 }, { "epoch": 0.04672, "grad_norm": 1.4296875, "learning_rate": 2.9878349134432327e-05, "loss": 2.365, "step": 3650 }, { "epoch": 0.046848, "grad_norm": 1.5390625, "learning_rate": 2.987757633028327e-05, "loss": 2.3776, "step": 3660 }, { "epoch": 0.046976, "grad_norm": 1.4140625, "learning_rate": 2.987680108928476e-05, "loss": 2.3729, "step": 3670 }, { "epoch": 0.047104, "grad_norm": 1.3828125, "learning_rate": 2.987602341156377e-05, "loss": 2.3451, "step": 3680 }, { "epoch": 0.047232, "grad_norm": 1.6328125, "learning_rate": 2.9875243297247686e-05, "loss": 2.3752, "step": 3690 }, { "epoch": 0.04736, "grad_norm": 1.46875, "learning_rate": 2.987446074646428e-05, "loss": 2.3479, "step": 3700 }, { "epoch": 0.047488, "grad_norm": 1.59375, "learning_rate": 2.9873675759341727e-05, "loss": 2.3829, "step": 3710 }, { "epoch": 0.047616, "grad_norm": 1.671875, "learning_rate": 2.987288833600861e-05, "loss": 2.3653, "step": 3720 }, { "epoch": 0.047744, "grad_norm": 1.5546875, "learning_rate": 2.9872098476593895e-05, "loss": 2.37, "step": 3730 }, { "epoch": 0.047872, "grad_norm": 1.4453125, "learning_rate": 2.987130618122696e-05, "loss": 2.3677, "step": 3740 }, { "epoch": 0.048, "grad_norm": 1.59375, "learning_rate": 2.9870511450037575e-05, "loss": 2.3569, "step": 3750 }, { "epoch": 0.048128, "grad_norm": 1.90625, "learning_rate": 2.9869714283155923e-05, "loss": 2.3478, "step": 3760 }, { "epoch": 0.048256, "grad_norm": 1.34375, "learning_rate": 2.986891468071256e-05, "loss": 2.35, "step": 3770 }, { "epoch": 0.048384, "grad_norm": 1.40625, "learning_rate": 2.9868112642838463e-05, "loss": 2.349, "step": 3780 }, { "epoch": 0.048512, "grad_norm": 1.3984375, "learning_rate": 2.9867308169665002e-05, "loss": 2.3622, "step": 3790 }, { "epoch": 0.04864, "grad_norm": 1.453125, "learning_rate": 2.9866501261323937e-05, "loss": 2.3643, "step": 3800 }, { "epoch": 0.048768, "grad_norm": 1.5625, "learning_rate": 2.986569191794744e-05, "loss": 2.3236, "step": 3810 }, { "epoch": 0.048896, "grad_norm": 1.5, "learning_rate": 2.986488013966808e-05, "loss": 2.3365, "step": 3820 }, { "epoch": 0.049024, "grad_norm": 1.4921875, "learning_rate": 2.9864065926618813e-05, "loss": 2.3648, "step": 3830 }, { "epoch": 0.049152, "grad_norm": 1.4375, "learning_rate": 2.9863249278933004e-05, "loss": 2.3319, "step": 3840 }, { "epoch": 0.04928, "grad_norm": 1.5078125, "learning_rate": 2.9862430196744418e-05, "loss": 2.3602, "step": 3850 }, { "epoch": 0.049408, "grad_norm": 1.578125, "learning_rate": 2.986160868018721e-05, "loss": 2.3684, "step": 3860 }, { "epoch": 0.049536, "grad_norm": 1.515625, "learning_rate": 2.9860784729395948e-05, "loss": 2.356, "step": 3870 }, { "epoch": 0.049664, "grad_norm": 1.640625, "learning_rate": 2.9859958344505576e-05, "loss": 2.3637, "step": 3880 }, { "epoch": 0.049792, "grad_norm": 1.4453125, "learning_rate": 2.9859129525651462e-05, "loss": 2.383, "step": 3890 }, { "epoch": 0.04992, "grad_norm": 1.40625, "learning_rate": 2.9858298272969357e-05, "loss": 2.3805, "step": 3900 }, { "epoch": 0.050048, "grad_norm": 1.53125, "learning_rate": 2.985746458659541e-05, "loss": 2.3715, "step": 3910 }, { "epoch": 0.050176, "grad_norm": 1.8515625, "learning_rate": 2.9856628466666183e-05, "loss": 2.3797, "step": 3920 }, { "epoch": 0.050304, "grad_norm": 1.4921875, "learning_rate": 2.9855789913318625e-05, "loss": 2.3664, "step": 3930 }, { "epoch": 0.050432, "grad_norm": 1.4765625, "learning_rate": 2.985494892669008e-05, "loss": 2.4066, "step": 3940 }, { "epoch": 0.05056, "grad_norm": 1.484375, "learning_rate": 2.98541055069183e-05, "loss": 2.3877, "step": 3950 }, { "epoch": 0.050688, "grad_norm": 1.3984375, "learning_rate": 2.9853259654141426e-05, "loss": 2.3381, "step": 3960 }, { "epoch": 0.050816, "grad_norm": 1.5234375, "learning_rate": 2.985241136849801e-05, "loss": 2.367, "step": 3970 }, { "epoch": 0.050944, "grad_norm": 1.4296875, "learning_rate": 2.9851560650126994e-05, "loss": 2.3637, "step": 3980 }, { "epoch": 0.051072, "grad_norm": 1.515625, "learning_rate": 2.9850707499167715e-05, "loss": 2.3798, "step": 3990 }, { "epoch": 0.0512, "grad_norm": 1.5234375, "learning_rate": 2.9849851915759923e-05, "loss": 2.3503, "step": 4000 }, { "epoch": 0.051328, "grad_norm": 1.34375, "learning_rate": 2.984899390004375e-05, "loss": 2.3194, "step": 4010 }, { "epoch": 0.051456, "grad_norm": 1.4296875, "learning_rate": 2.9848133452159736e-05, "loss": 2.3706, "step": 4020 }, { "epoch": 0.051584, "grad_norm": 1.625, "learning_rate": 2.9847270572248818e-05, "loss": 2.3702, "step": 4030 }, { "epoch": 0.051712, "grad_norm": 1.6328125, "learning_rate": 2.9846405260452324e-05, "loss": 2.3716, "step": 4040 }, { "epoch": 0.05184, "grad_norm": 1.5234375, "learning_rate": 2.9845537516911992e-05, "loss": 2.3584, "step": 4050 }, { "epoch": 0.051968, "grad_norm": 1.546875, "learning_rate": 2.984466734176995e-05, "loss": 2.3694, "step": 4060 }, { "epoch": 0.052096, "grad_norm": 1.4765625, "learning_rate": 2.984379473516873e-05, "loss": 2.3624, "step": 4070 }, { "epoch": 0.052224, "grad_norm": 1.4140625, "learning_rate": 2.9842919697251254e-05, "loss": 2.3474, "step": 4080 }, { "epoch": 0.052352, "grad_norm": 1.5234375, "learning_rate": 2.9842042228160852e-05, "loss": 2.3551, "step": 4090 }, { "epoch": 0.05248, "grad_norm": 1.515625, "learning_rate": 2.984116232804125e-05, "loss": 2.3683, "step": 4100 }, { "epoch": 0.052608, "grad_norm": 2.296875, "learning_rate": 2.9840279997036562e-05, "loss": 2.3393, "step": 4110 }, { "epoch": 0.052736, "grad_norm": 1.4765625, "learning_rate": 2.9839395235291313e-05, "loss": 2.3649, "step": 4120 }, { "epoch": 0.052864, "grad_norm": 1.3828125, "learning_rate": 2.983850804295042e-05, "loss": 2.3704, "step": 4130 }, { "epoch": 0.052992, "grad_norm": 1.4140625, "learning_rate": 2.98376184201592e-05, "loss": 2.338, "step": 4140 }, { "epoch": 0.05312, "grad_norm": 1.453125, "learning_rate": 2.983672636706337e-05, "loss": 2.3535, "step": 4150 }, { "epoch": 0.053248, "grad_norm": 1.46875, "learning_rate": 2.9835831883809035e-05, "loss": 2.3863, "step": 4160 }, { "epoch": 0.053376, "grad_norm": 1.7890625, "learning_rate": 2.9834934970542715e-05, "loss": 2.3741, "step": 4170 }, { "epoch": 0.053504, "grad_norm": 1.4375, "learning_rate": 2.9834035627411312e-05, "loss": 2.3677, "step": 4180 }, { "epoch": 0.053632, "grad_norm": 1.4921875, "learning_rate": 2.9833133854562133e-05, "loss": 2.3697, "step": 4190 }, { "epoch": 0.05376, "grad_norm": 1.984375, "learning_rate": 2.9832229652142885e-05, "loss": 2.3594, "step": 4200 }, { "epoch": 0.053888, "grad_norm": 1.4765625, "learning_rate": 2.9831323020301665e-05, "loss": 2.3391, "step": 4210 }, { "epoch": 0.054016, "grad_norm": 1.546875, "learning_rate": 2.983041395918698e-05, "loss": 2.3582, "step": 4220 }, { "epoch": 0.054144, "grad_norm": 1.78125, "learning_rate": 2.9829502468947728e-05, "loss": 2.3612, "step": 4230 }, { "epoch": 0.054272, "grad_norm": 1.625, "learning_rate": 2.9828588549733204e-05, "loss": 2.3545, "step": 4240 }, { "epoch": 0.0544, "grad_norm": 3.109375, "learning_rate": 2.9827672201693096e-05, "loss": 2.3547, "step": 4250 }, { "epoch": 0.054528, "grad_norm": 1.4296875, "learning_rate": 2.9826753424977502e-05, "loss": 2.3258, "step": 4260 }, { "epoch": 0.054656, "grad_norm": 1.421875, "learning_rate": 2.982583221973691e-05, "loss": 2.3326, "step": 4270 }, { "epoch": 0.054784, "grad_norm": 1.5234375, "learning_rate": 2.982490858612221e-05, "loss": 2.3392, "step": 4280 }, { "epoch": 0.054912, "grad_norm": 1.671875, "learning_rate": 2.9823982524284682e-05, "loss": 2.3365, "step": 4290 }, { "epoch": 0.05504, "grad_norm": 1.609375, "learning_rate": 2.9823054034376012e-05, "loss": 2.3322, "step": 4300 }, { "epoch": 0.055168, "grad_norm": 1.4375, "learning_rate": 2.982212311654828e-05, "loss": 2.3695, "step": 4310 }, { "epoch": 0.055296, "grad_norm": 1.46875, "learning_rate": 2.9821189770953964e-05, "loss": 2.3497, "step": 4320 }, { "epoch": 0.055424, "grad_norm": 1.421875, "learning_rate": 2.9820253997745944e-05, "loss": 2.3637, "step": 4330 }, { "epoch": 0.055552, "grad_norm": 1.5234375, "learning_rate": 2.9819315797077484e-05, "loss": 2.2958, "step": 4340 }, { "epoch": 0.05568, "grad_norm": 1.7734375, "learning_rate": 2.981837516910226e-05, "loss": 2.3559, "step": 4350 }, { "epoch": 0.055808, "grad_norm": 1.53125, "learning_rate": 2.9817432113974345e-05, "loss": 2.3476, "step": 4360 }, { "epoch": 0.055936, "grad_norm": 1.578125, "learning_rate": 2.9816486631848205e-05, "loss": 2.348, "step": 4370 }, { "epoch": 0.056064, "grad_norm": 1.484375, "learning_rate": 2.9815538722878693e-05, "loss": 2.3522, "step": 4380 }, { "epoch": 0.056192, "grad_norm": 1.5, "learning_rate": 2.9814588387221086e-05, "loss": 2.373, "step": 4390 }, { "epoch": 0.05632, "grad_norm": 1.5625, "learning_rate": 2.981363562503103e-05, "loss": 2.3484, "step": 4400 }, { "epoch": 0.056448, "grad_norm": 1.59375, "learning_rate": 2.9812680436464584e-05, "loss": 2.3279, "step": 4410 }, { "epoch": 0.056576, "grad_norm": 1.53125, "learning_rate": 2.9811722821678207e-05, "loss": 2.3865, "step": 4420 }, { "epoch": 0.056704, "grad_norm": 1.4765625, "learning_rate": 2.9810762780828742e-05, "loss": 2.375, "step": 4430 }, { "epoch": 0.056832, "grad_norm": 1.46875, "learning_rate": 2.9809800314073448e-05, "loss": 2.3523, "step": 4440 }, { "epoch": 0.05696, "grad_norm": 1.453125, "learning_rate": 2.980883542156996e-05, "loss": 2.3508, "step": 4450 }, { "epoch": 0.057088, "grad_norm": 1.5, "learning_rate": 2.9807868103476325e-05, "loss": 2.3416, "step": 4460 }, { "epoch": 0.057216, "grad_norm": 1.4609375, "learning_rate": 2.9806898359950986e-05, "loss": 2.3476, "step": 4470 }, { "epoch": 0.057344, "grad_norm": 1.4765625, "learning_rate": 2.980592619115278e-05, "loss": 2.3671, "step": 4480 }, { "epoch": 0.057472, "grad_norm": 1.453125, "learning_rate": 2.9804951597240938e-05, "loss": 2.3658, "step": 4490 }, { "epoch": 0.0576, "grad_norm": 1.5625, "learning_rate": 2.9803974578375096e-05, "loss": 2.3835, "step": 4500 }, { "epoch": 0.057728, "grad_norm": 1.578125, "learning_rate": 2.980299513471528e-05, "loss": 2.3649, "step": 4510 }, { "epoch": 0.057856, "grad_norm": 1.5234375, "learning_rate": 2.980201326642192e-05, "loss": 2.3377, "step": 4520 }, { "epoch": 0.057984, "grad_norm": 3.015625, "learning_rate": 2.9801028973655836e-05, "loss": 2.3412, "step": 4530 }, { "epoch": 0.058112, "grad_norm": 1.3515625, "learning_rate": 2.9800042256578254e-05, "loss": 2.3684, "step": 4540 }, { "epoch": 0.05824, "grad_norm": 1.3828125, "learning_rate": 2.979905311535079e-05, "loss": 2.3294, "step": 4550 }, { "epoch": 0.058368, "grad_norm": 1.5390625, "learning_rate": 2.979806155013545e-05, "loss": 2.3725, "step": 4560 }, { "epoch": 0.058496, "grad_norm": 1.921875, "learning_rate": 2.9797067561094654e-05, "loss": 2.3359, "step": 4570 }, { "epoch": 0.058624, "grad_norm": 1.6171875, "learning_rate": 2.9796071148391216e-05, "loss": 2.3347, "step": 4580 }, { "epoch": 0.058752, "grad_norm": 1.5546875, "learning_rate": 2.9795072312188333e-05, "loss": 2.3816, "step": 4590 }, { "epoch": 0.05888, "grad_norm": 1.578125, "learning_rate": 2.979407105264961e-05, "loss": 2.3625, "step": 4600 }, { "epoch": 0.059008, "grad_norm": 4.9375, "learning_rate": 2.979306736993905e-05, "loss": 2.3376, "step": 4610 }, { "epoch": 0.059136, "grad_norm": 1.4453125, "learning_rate": 2.9792061264221044e-05, "loss": 2.3199, "step": 4620 }, { "epoch": 0.059264, "grad_norm": 1.515625, "learning_rate": 2.979105273566039e-05, "loss": 2.3012, "step": 4630 }, { "epoch": 0.059392, "grad_norm": 1.4296875, "learning_rate": 2.9790041784422274e-05, "loss": 2.342, "step": 4640 }, { "epoch": 0.05952, "grad_norm": 1.4921875, "learning_rate": 2.978902841067229e-05, "loss": 2.3578, "step": 4650 }, { "epoch": 0.059648, "grad_norm": 1.390625, "learning_rate": 2.9788012614576413e-05, "loss": 2.3352, "step": 4660 }, { "epoch": 0.059776, "grad_norm": 1.5078125, "learning_rate": 2.9786994396301036e-05, "loss": 2.3484, "step": 4670 }, { "epoch": 0.059904, "grad_norm": 1.46875, "learning_rate": 2.9785973756012927e-05, "loss": 2.3637, "step": 4680 }, { "epoch": 0.060032, "grad_norm": 1.6015625, "learning_rate": 2.9784950693879257e-05, "loss": 2.3702, "step": 4690 }, { "epoch": 0.06016, "grad_norm": 1.4609375, "learning_rate": 2.978392521006761e-05, "loss": 2.3442, "step": 4700 }, { "epoch": 0.060288, "grad_norm": 1.6328125, "learning_rate": 2.9782897304745945e-05, "loss": 2.3538, "step": 4710 }, { "epoch": 0.060416, "grad_norm": 1.5078125, "learning_rate": 2.9781866978082623e-05, "loss": 2.333, "step": 4720 }, { "epoch": 0.060544, "grad_norm": 1.53125, "learning_rate": 2.9780834230246412e-05, "loss": 2.3364, "step": 4730 }, { "epoch": 0.060672, "grad_norm": 1.5, "learning_rate": 2.977979906140646e-05, "loss": 2.3551, "step": 4740 }, { "epoch": 0.0608, "grad_norm": 1.34375, "learning_rate": 2.9778761471732334e-05, "loss": 2.3121, "step": 4750 }, { "epoch": 0.060928, "grad_norm": 1.5703125, "learning_rate": 2.977772146139398e-05, "loss": 2.3483, "step": 4760 }, { "epoch": 0.061056, "grad_norm": 1.671875, "learning_rate": 2.9776679030561733e-05, "loss": 2.3511, "step": 4770 }, { "epoch": 0.061184, "grad_norm": 1.5234375, "learning_rate": 2.9775634179406352e-05, "loss": 2.3657, "step": 4780 }, { "epoch": 0.061312, "grad_norm": 1.53125, "learning_rate": 2.9774586908098967e-05, "loss": 2.3497, "step": 4790 }, { "epoch": 0.06144, "grad_norm": 1.4765625, "learning_rate": 2.977353721681112e-05, "loss": 2.3584, "step": 4800 }, { "epoch": 0.061568, "grad_norm": 1.4296875, "learning_rate": 2.9772485105714742e-05, "loss": 2.3573, "step": 4810 }, { "epoch": 0.061696, "grad_norm": 1.5390625, "learning_rate": 2.977143057498216e-05, "loss": 2.3498, "step": 4820 }, { "epoch": 0.061824, "grad_norm": 1.6875, "learning_rate": 2.9770373624786096e-05, "loss": 2.3374, "step": 4830 }, { "epoch": 0.061952, "grad_norm": 1.65625, "learning_rate": 2.9769314255299682e-05, "loss": 2.3501, "step": 4840 }, { "epoch": 0.06208, "grad_norm": 1.5390625, "learning_rate": 2.9768252466696424e-05, "loss": 2.3624, "step": 4850 }, { "epoch": 0.062208, "grad_norm": 1.4375, "learning_rate": 2.9767188259150244e-05, "loss": 2.36, "step": 4860 }, { "epoch": 0.062336, "grad_norm": 1.4375, "learning_rate": 2.976612163283545e-05, "loss": 2.321, "step": 4870 }, { "epoch": 0.062464, "grad_norm": 4.5625, "learning_rate": 2.976505258792675e-05, "loss": 2.335, "step": 4880 }, { "epoch": 0.062592, "grad_norm": 1.5234375, "learning_rate": 2.9763981124599244e-05, "loss": 2.3638, "step": 4890 }, { "epoch": 0.06272, "grad_norm": 1.3359375, "learning_rate": 2.976290724302843e-05, "loss": 2.3313, "step": 4900 }, { "epoch": 0.062848, "grad_norm": 1.484375, "learning_rate": 2.97618309433902e-05, "loss": 2.3698, "step": 4910 }, { "epoch": 0.062976, "grad_norm": 1.609375, "learning_rate": 2.9760752225860858e-05, "loss": 2.3556, "step": 4920 }, { "epoch": 0.063104, "grad_norm": 1.4453125, "learning_rate": 2.975967109061708e-05, "loss": 2.3459, "step": 4930 }, { "epoch": 0.063232, "grad_norm": 1.53125, "learning_rate": 2.9758587537835944e-05, "loss": 2.358, "step": 4940 }, { "epoch": 0.06336, "grad_norm": 1.4296875, "learning_rate": 2.975750156769494e-05, "loss": 2.3652, "step": 4950 }, { "epoch": 0.063488, "grad_norm": 1.640625, "learning_rate": 2.9756413180371937e-05, "loss": 2.3523, "step": 4960 }, { "epoch": 0.063616, "grad_norm": 1.5234375, "learning_rate": 2.9755322376045206e-05, "loss": 2.3219, "step": 4970 }, { "epoch": 0.063744, "grad_norm": 1.4140625, "learning_rate": 2.975422915489342e-05, "loss": 2.3261, "step": 4980 }, { "epoch": 0.063872, "grad_norm": 1.4140625, "learning_rate": 2.9753133517095632e-05, "loss": 2.3292, "step": 4990 }, { "epoch": 0.064, "grad_norm": 25.0, "learning_rate": 2.9752035462831303e-05, "loss": 2.3228, "step": 5000 }, { "epoch": 0.064128, "grad_norm": 1.4765625, "learning_rate": 2.975093499228029e-05, "loss": 2.2965, "step": 5010 }, { "epoch": 0.064256, "grad_norm": 1.453125, "learning_rate": 2.9749832105622842e-05, "loss": 2.3477, "step": 5020 }, { "epoch": 0.064384, "grad_norm": 1.7265625, "learning_rate": 2.9748726803039604e-05, "loss": 2.3495, "step": 5030 }, { "epoch": 0.064512, "grad_norm": 1.4765625, "learning_rate": 2.9747619084711618e-05, "loss": 2.3487, "step": 5040 }, { "epoch": 0.06464, "grad_norm": 4.0625, "learning_rate": 2.9746508950820317e-05, "loss": 2.3384, "step": 5050 }, { "epoch": 0.064768, "grad_norm": 1.5078125, "learning_rate": 2.974539640154754e-05, "loss": 2.3554, "step": 5060 }, { "epoch": 0.064896, "grad_norm": 1.640625, "learning_rate": 2.974428143707551e-05, "loss": 2.3452, "step": 5070 }, { "epoch": 0.065024, "grad_norm": 1.6875, "learning_rate": 2.974316405758685e-05, "loss": 2.3723, "step": 5080 }, { "epoch": 0.065152, "grad_norm": 1.4140625, "learning_rate": 2.974204426326459e-05, "loss": 2.3629, "step": 5090 }, { "epoch": 0.06528, "grad_norm": 1.4375, "learning_rate": 2.9740922054292134e-05, "loss": 2.3283, "step": 5100 }, { "epoch": 0.065408, "grad_norm": 1.484375, "learning_rate": 2.973979743085329e-05, "loss": 2.3335, "step": 5110 }, { "epoch": 0.065536, "grad_norm": 1.5, "learning_rate": 2.973867039313228e-05, "loss": 2.3573, "step": 5120 }, { "epoch": 0.065664, "grad_norm": 1.765625, "learning_rate": 2.973754094131369e-05, "loss": 2.2899, "step": 5130 }, { "epoch": 0.065792, "grad_norm": 1.3984375, "learning_rate": 2.9736409075582525e-05, "loss": 2.309, "step": 5140 }, { "epoch": 0.06592, "grad_norm": 1.6015625, "learning_rate": 2.9735274796124172e-05, "loss": 2.3354, "step": 5150 }, { "epoch": 0.066048, "grad_norm": 1.5234375, "learning_rate": 2.973413810312442e-05, "loss": 2.3406, "step": 5160 }, { "epoch": 0.066176, "grad_norm": 1.546875, "learning_rate": 2.9732998996769452e-05, "loss": 2.3654, "step": 5170 }, { "epoch": 0.066304, "grad_norm": 1.5703125, "learning_rate": 2.973185747724585e-05, "loss": 2.3373, "step": 5180 }, { "epoch": 0.066432, "grad_norm": 1.8515625, "learning_rate": 2.9730713544740582e-05, "loss": 2.3468, "step": 5190 }, { "epoch": 0.06656, "grad_norm": 3.8125, "learning_rate": 2.972956719944102e-05, "loss": 2.3493, "step": 5200 }, { "epoch": 0.066688, "grad_norm": 1.4765625, "learning_rate": 2.9728418441534922e-05, "loss": 2.3504, "step": 5210 }, { "epoch": 0.066816, "grad_norm": 1.5234375, "learning_rate": 2.9727267271210455e-05, "loss": 2.3245, "step": 5220 }, { "epoch": 0.066944, "grad_norm": 1.5234375, "learning_rate": 2.9726113688656172e-05, "loss": 2.3438, "step": 5230 }, { "epoch": 0.067072, "grad_norm": 1.390625, "learning_rate": 2.972495769406102e-05, "loss": 2.33, "step": 5240 }, { "epoch": 0.0672, "grad_norm": 1.4921875, "learning_rate": 2.972379928761434e-05, "loss": 2.338, "step": 5250 }, { "epoch": 0.067328, "grad_norm": 1.40625, "learning_rate": 2.9722638469505876e-05, "loss": 2.369, "step": 5260 }, { "epoch": 0.067456, "grad_norm": 1.59375, "learning_rate": 2.9721475239925763e-05, "loss": 2.3662, "step": 5270 }, { "epoch": 0.067584, "grad_norm": 1.4765625, "learning_rate": 2.9720309599064528e-05, "loss": 2.354, "step": 5280 }, { "epoch": 0.067712, "grad_norm": 1.5, "learning_rate": 2.9719141547113096e-05, "loss": 2.3496, "step": 5290 }, { "epoch": 0.06784, "grad_norm": 1.5390625, "learning_rate": 2.971797108426278e-05, "loss": 2.3524, "step": 5300 }, { "epoch": 0.067968, "grad_norm": 1.640625, "learning_rate": 2.9716798210705308e-05, "loss": 2.3226, "step": 5310 }, { "epoch": 0.068096, "grad_norm": 1.4921875, "learning_rate": 2.971562292663278e-05, "loss": 2.3471, "step": 5320 }, { "epoch": 0.068224, "grad_norm": 1.4140625, "learning_rate": 2.97144452322377e-05, "loss": 2.3497, "step": 5330 }, { "epoch": 0.068352, "grad_norm": 1.4921875, "learning_rate": 2.971326512771297e-05, "loss": 2.3428, "step": 5340 }, { "epoch": 0.06848, "grad_norm": 1.4921875, "learning_rate": 2.9712082613251878e-05, "loss": 2.3378, "step": 5350 }, { "epoch": 0.068608, "grad_norm": 1.53125, "learning_rate": 2.9710897689048115e-05, "loss": 2.3475, "step": 5360 }, { "epoch": 0.068736, "grad_norm": 1.5, "learning_rate": 2.9709710355295764e-05, "loss": 2.3329, "step": 5370 }, { "epoch": 0.068864, "grad_norm": 1.546875, "learning_rate": 2.9708520612189305e-05, "loss": 2.3554, "step": 5380 }, { "epoch": 0.068992, "grad_norm": 1.4765625, "learning_rate": 2.9707328459923606e-05, "loss": 2.3492, "step": 5390 }, { "epoch": 0.06912, "grad_norm": 1.5546875, "learning_rate": 2.9706133898693934e-05, "loss": 2.3778, "step": 5400 }, { "epoch": 0.069248, "grad_norm": 1.484375, "learning_rate": 2.9704936928695955e-05, "loss": 2.2892, "step": 5410 }, { "epoch": 0.069376, "grad_norm": 1.4296875, "learning_rate": 2.970373755012572e-05, "loss": 2.3489, "step": 5420 }, { "epoch": 0.069504, "grad_norm": 1.4765625, "learning_rate": 2.9702535763179682e-05, "loss": 2.3256, "step": 5430 }, { "epoch": 0.069632, "grad_norm": 1.390625, "learning_rate": 2.9701331568054682e-05, "loss": 2.333, "step": 5440 }, { "epoch": 0.06976, "grad_norm": 1.5390625, "learning_rate": 2.9700124964947963e-05, "loss": 2.3472, "step": 5450 }, { "epoch": 0.069888, "grad_norm": 1.578125, "learning_rate": 2.969891595405716e-05, "loss": 2.3959, "step": 5460 }, { "epoch": 0.070016, "grad_norm": 1.359375, "learning_rate": 2.9697704535580298e-05, "loss": 2.3225, "step": 5470 }, { "epoch": 0.070144, "grad_norm": 1.4296875, "learning_rate": 2.9696490709715796e-05, "loss": 2.3302, "step": 5480 }, { "epoch": 0.070272, "grad_norm": 1.4765625, "learning_rate": 2.969527447666248e-05, "loss": 2.3575, "step": 5490 }, { "epoch": 0.0704, "grad_norm": 1.5546875, "learning_rate": 2.9694055836619552e-05, "loss": 2.321, "step": 5500 }, { "epoch": 0.070528, "grad_norm": 1.4921875, "learning_rate": 2.9692834789786626e-05, "loss": 2.337, "step": 5510 }, { "epoch": 0.070656, "grad_norm": 1.5859375, "learning_rate": 2.9691611336363694e-05, "loss": 2.3643, "step": 5520 }, { "epoch": 0.070784, "grad_norm": 1.59375, "learning_rate": 2.9690385476551146e-05, "loss": 2.3323, "step": 5530 }, { "epoch": 0.070912, "grad_norm": 1.4609375, "learning_rate": 2.9689157210549784e-05, "loss": 2.3941, "step": 5540 }, { "epoch": 0.07104, "grad_norm": 1.53125, "learning_rate": 2.9687926538560775e-05, "loss": 2.3403, "step": 5550 }, { "epoch": 0.071168, "grad_norm": 1.4375, "learning_rate": 2.968669346078571e-05, "loss": 2.3535, "step": 5560 }, { "epoch": 0.071296, "grad_norm": 1.546875, "learning_rate": 2.968545797742655e-05, "loss": 2.34, "step": 5570 }, { "epoch": 0.071424, "grad_norm": 1.5390625, "learning_rate": 2.9684220088685655e-05, "loss": 2.3464, "step": 5580 }, { "epoch": 0.071552, "grad_norm": 1.4921875, "learning_rate": 2.968297979476579e-05, "loss": 2.3498, "step": 5590 }, { "epoch": 0.07168, "grad_norm": 1.5546875, "learning_rate": 2.96817370958701e-05, "loss": 2.349, "step": 5600 }, { "epoch": 0.071808, "grad_norm": 1.3984375, "learning_rate": 2.9680491992202147e-05, "loss": 2.3709, "step": 5610 }, { "epoch": 0.071936, "grad_norm": 1.515625, "learning_rate": 2.9679244483965856e-05, "loss": 2.344, "step": 5620 }, { "epoch": 0.072064, "grad_norm": 1.5546875, "learning_rate": 2.967799457136556e-05, "loss": 2.3479, "step": 5630 }, { "epoch": 0.072192, "grad_norm": 1.421875, "learning_rate": 2.9676742254605996e-05, "loss": 2.3367, "step": 5640 }, { "epoch": 0.07232, "grad_norm": 1.546875, "learning_rate": 2.967548753389228e-05, "loss": 2.3612, "step": 5650 }, { "epoch": 0.072448, "grad_norm": 2.171875, "learning_rate": 2.9674230409429923e-05, "loss": 2.3214, "step": 5660 }, { "epoch": 0.072576, "grad_norm": 1.3515625, "learning_rate": 2.967297088142484e-05, "loss": 2.3282, "step": 5670 }, { "epoch": 0.072704, "grad_norm": 1.4921875, "learning_rate": 2.967170895008333e-05, "loss": 2.311, "step": 5680 }, { "epoch": 0.072832, "grad_norm": 1.5078125, "learning_rate": 2.967044461561209e-05, "loss": 2.3328, "step": 5690 }, { "epoch": 0.07296, "grad_norm": 1.671875, "learning_rate": 2.9669177878218214e-05, "loss": 2.3322, "step": 5700 }, { "epoch": 0.073088, "grad_norm": 1.59375, "learning_rate": 2.966790873810918e-05, "loss": 2.3467, "step": 5710 }, { "epoch": 0.073216, "grad_norm": 1.4765625, "learning_rate": 2.9666637195492866e-05, "loss": 2.3387, "step": 5720 }, { "epoch": 0.073344, "grad_norm": 1.3984375, "learning_rate": 2.9665363250577544e-05, "loss": 2.3384, "step": 5730 }, { "epoch": 0.073472, "grad_norm": 1.7578125, "learning_rate": 2.9664086903571876e-05, "loss": 2.3465, "step": 5740 }, { "epoch": 0.0736, "grad_norm": 1.7265625, "learning_rate": 2.966280815468492e-05, "loss": 2.3585, "step": 5750 }, { "epoch": 0.073728, "grad_norm": 1.5859375, "learning_rate": 2.9661527004126126e-05, "loss": 2.3498, "step": 5760 }, { "epoch": 0.073856, "grad_norm": 1.5859375, "learning_rate": 2.966024345210534e-05, "loss": 2.3417, "step": 5770 }, { "epoch": 0.073984, "grad_norm": 1.4375, "learning_rate": 2.9658957498832798e-05, "loss": 2.346, "step": 5780 }, { "epoch": 0.074112, "grad_norm": 1.5, "learning_rate": 2.9657669144519126e-05, "loss": 2.33, "step": 5790 }, { "epoch": 0.07424, "grad_norm": 1.4609375, "learning_rate": 2.9656378389375357e-05, "loss": 2.3608, "step": 5800 }, { "epoch": 0.074368, "grad_norm": 1.5703125, "learning_rate": 2.9655085233612902e-05, "loss": 2.3468, "step": 5810 }, { "epoch": 0.074496, "grad_norm": 1.4609375, "learning_rate": 2.9653789677443575e-05, "loss": 2.3435, "step": 5820 }, { "epoch": 0.074624, "grad_norm": 4.1875, "learning_rate": 2.9652491721079578e-05, "loss": 2.3402, "step": 5830 }, { "epoch": 0.074752, "grad_norm": 1.546875, "learning_rate": 2.9651191364733506e-05, "loss": 2.3447, "step": 5840 }, { "epoch": 0.07488, "grad_norm": 1.4921875, "learning_rate": 2.964988860861835e-05, "loss": 2.3476, "step": 5850 }, { "epoch": 0.075008, "grad_norm": 1.484375, "learning_rate": 2.9648583452947497e-05, "loss": 2.3472, "step": 5860 }, { "epoch": 0.075136, "grad_norm": 1.5546875, "learning_rate": 2.964727589793472e-05, "loss": 2.3325, "step": 5870 }, { "epoch": 0.075264, "grad_norm": 2.53125, "learning_rate": 2.9645965943794177e-05, "loss": 2.3389, "step": 5880 }, { "epoch": 0.075392, "grad_norm": 1.65625, "learning_rate": 2.9644653590740448e-05, "loss": 2.3304, "step": 5890 }, { "epoch": 0.07552, "grad_norm": 1.4921875, "learning_rate": 2.964333883898848e-05, "loss": 2.3195, "step": 5900 }, { "epoch": 0.075648, "grad_norm": 1.5390625, "learning_rate": 2.964202168875362e-05, "loss": 2.2979, "step": 5910 }, { "epoch": 0.075776, "grad_norm": 1.4765625, "learning_rate": 2.9640702140251615e-05, "loss": 2.3334, "step": 5920 }, { "epoch": 0.075904, "grad_norm": 1.46875, "learning_rate": 2.9639380193698584e-05, "loss": 2.3578, "step": 5930 }, { "epoch": 0.076032, "grad_norm": 1.53125, "learning_rate": 2.9638055849311063e-05, "loss": 2.367, "step": 5940 }, { "epoch": 0.07616, "grad_norm": 1.46875, "learning_rate": 2.9636729107305976e-05, "loss": 2.3051, "step": 5950 }, { "epoch": 0.076288, "grad_norm": 1.546875, "learning_rate": 2.963539996790062e-05, "loss": 2.3588, "step": 5960 }, { "epoch": 0.076416, "grad_norm": 1.5234375, "learning_rate": 2.9634068431312715e-05, "loss": 2.3353, "step": 5970 }, { "epoch": 0.076544, "grad_norm": 1.65625, "learning_rate": 2.9632734497760346e-05, "loss": 2.3557, "step": 5980 }, { "epoch": 0.076672, "grad_norm": 1.53125, "learning_rate": 2.963139816746201e-05, "loss": 2.3092, "step": 5990 }, { "epoch": 0.0768, "grad_norm": 1.6484375, "learning_rate": 2.963005944063659e-05, "loss": 2.3366, "step": 6000 }, { "epoch": 0.076928, "grad_norm": 1.5234375, "learning_rate": 2.9628718317503347e-05, "loss": 2.3056, "step": 6010 }, { "epoch": 0.077056, "grad_norm": 1.5703125, "learning_rate": 2.9627374798281965e-05, "loss": 2.3221, "step": 6020 }, { "epoch": 0.077184, "grad_norm": 1.453125, "learning_rate": 2.96260288831925e-05, "loss": 2.3296, "step": 6030 }, { "epoch": 0.077312, "grad_norm": 1.5234375, "learning_rate": 2.9624680572455393e-05, "loss": 2.3755, "step": 6040 }, { "epoch": 0.07744, "grad_norm": 1.5390625, "learning_rate": 2.9623329866291506e-05, "loss": 2.3081, "step": 6050 }, { "epoch": 0.077568, "grad_norm": 1.5390625, "learning_rate": 2.9621976764922058e-05, "loss": 2.3301, "step": 6060 }, { "epoch": 0.077696, "grad_norm": 1.4921875, "learning_rate": 2.9620621268568685e-05, "loss": 2.322, "step": 6070 }, { "epoch": 0.077824, "grad_norm": 1.4765625, "learning_rate": 2.9619263377453418e-05, "loss": 2.3586, "step": 6080 }, { "epoch": 0.077952, "grad_norm": 1.4921875, "learning_rate": 2.961790309179866e-05, "loss": 2.3305, "step": 6090 }, { "epoch": 0.07808, "grad_norm": 1.46875, "learning_rate": 2.961654041182722e-05, "loss": 2.3193, "step": 6100 }, { "epoch": 0.078208, "grad_norm": 1.5625, "learning_rate": 2.9615175337762288e-05, "loss": 2.3004, "step": 6110 }, { "epoch": 0.078336, "grad_norm": 1.4609375, "learning_rate": 2.9613807869827468e-05, "loss": 2.3784, "step": 6120 }, { "epoch": 0.078464, "grad_norm": 1.5390625, "learning_rate": 2.9612438008246735e-05, "loss": 2.3365, "step": 6130 }, { "epoch": 0.078592, "grad_norm": 1.5546875, "learning_rate": 2.9611065753244465e-05, "loss": 2.3392, "step": 6140 }, { "epoch": 0.07872, "grad_norm": 1.484375, "learning_rate": 2.9609691105045422e-05, "loss": 2.3301, "step": 6150 }, { "epoch": 0.078848, "grad_norm": 1.421875, "learning_rate": 2.9608314063874765e-05, "loss": 2.3166, "step": 6160 }, { "epoch": 0.078976, "grad_norm": 1.5078125, "learning_rate": 2.9606934629958046e-05, "loss": 2.3328, "step": 6170 }, { "epoch": 0.079104, "grad_norm": 1.625, "learning_rate": 2.960555280352121e-05, "loss": 2.3379, "step": 6180 }, { "epoch": 0.079232, "grad_norm": 1.46875, "learning_rate": 2.9604168584790584e-05, "loss": 2.2985, "step": 6190 }, { "epoch": 0.07936, "grad_norm": 1.6640625, "learning_rate": 2.9602781973992895e-05, "loss": 2.3025, "step": 6200 }, { "epoch": 0.079488, "grad_norm": 1.546875, "learning_rate": 2.960139297135527e-05, "loss": 2.3588, "step": 6210 }, { "epoch": 0.079616, "grad_norm": 1.4296875, "learning_rate": 2.9600001577105208e-05, "loss": 2.348, "step": 6220 }, { "epoch": 0.079744, "grad_norm": 1.5078125, "learning_rate": 2.959860779147062e-05, "loss": 2.326, "step": 6230 }, { "epoch": 0.079872, "grad_norm": 1.4296875, "learning_rate": 2.9597211614679784e-05, "loss": 2.3287, "step": 6240 }, { "epoch": 0.08, "grad_norm": 1.59375, "learning_rate": 2.9595813046961395e-05, "loss": 2.3652, "step": 6250 }, { "epoch": 0.080128, "grad_norm": 1.5546875, "learning_rate": 2.9594412088544534e-05, "loss": 2.2953, "step": 6260 }, { "epoch": 0.080256, "grad_norm": 1.4921875, "learning_rate": 2.9593008739658654e-05, "loss": 2.3387, "step": 6270 }, { "epoch": 0.080384, "grad_norm": 1.4375, "learning_rate": 2.9591603000533632e-05, "loss": 2.3353, "step": 6280 }, { "epoch": 0.080512, "grad_norm": 1.5546875, "learning_rate": 2.9590194871399704e-05, "loss": 2.3181, "step": 6290 }, { "epoch": 0.08064, "grad_norm": 1.46875, "learning_rate": 2.958878435248752e-05, "loss": 2.3311, "step": 6300 }, { "epoch": 0.080768, "grad_norm": 5.46875, "learning_rate": 2.9587371444028113e-05, "loss": 2.3346, "step": 6310 }, { "epoch": 0.080896, "grad_norm": 1.578125, "learning_rate": 2.9585956146252907e-05, "loss": 2.3366, "step": 6320 }, { "epoch": 0.081024, "grad_norm": 1.3828125, "learning_rate": 2.9584538459393715e-05, "loss": 2.3278, "step": 6330 }, { "epoch": 0.081152, "grad_norm": 1.5390625, "learning_rate": 2.958311838368275e-05, "loss": 2.3864, "step": 6340 }, { "epoch": 0.08128, "grad_norm": 3.0, "learning_rate": 2.958169591935261e-05, "loss": 2.3066, "step": 6350 }, { "epoch": 0.081408, "grad_norm": 1.5078125, "learning_rate": 2.9580271066636288e-05, "loss": 2.3271, "step": 6360 }, { "epoch": 0.081536, "grad_norm": 1.546875, "learning_rate": 2.957884382576716e-05, "loss": 2.3324, "step": 6370 }, { "epoch": 0.081664, "grad_norm": 1.5625, "learning_rate": 2.9577414196979e-05, "loss": 2.3487, "step": 6380 }, { "epoch": 0.081792, "grad_norm": 1.3671875, "learning_rate": 2.9575982180505974e-05, "loss": 2.3656, "step": 6390 }, { "epoch": 0.08192, "grad_norm": 1.5, "learning_rate": 2.9574547776582633e-05, "loss": 2.3293, "step": 6400 }, { "epoch": 0.082048, "grad_norm": 1.5703125, "learning_rate": 2.957311098544393e-05, "loss": 2.349, "step": 6410 }, { "epoch": 0.082176, "grad_norm": 1.515625, "learning_rate": 2.95716718073252e-05, "loss": 2.3436, "step": 6420 }, { "epoch": 0.082304, "grad_norm": 1.515625, "learning_rate": 2.9570230242462157e-05, "loss": 2.3431, "step": 6430 }, { "epoch": 0.082432, "grad_norm": 1.53125, "learning_rate": 2.9568786291090942e-05, "loss": 2.3327, "step": 6440 }, { "epoch": 0.08256, "grad_norm": 1.4375, "learning_rate": 2.9567339953448052e-05, "loss": 2.3465, "step": 6450 }, { "epoch": 0.082688, "grad_norm": 1.46875, "learning_rate": 2.956589122977039e-05, "loss": 2.3235, "step": 6460 }, { "epoch": 0.082816, "grad_norm": 1.4296875, "learning_rate": 2.9564440120295244e-05, "loss": 2.3299, "step": 6470 }, { "epoch": 0.082944, "grad_norm": 1.4375, "learning_rate": 2.95629866252603e-05, "loss": 2.3385, "step": 6480 }, { "epoch": 0.083072, "grad_norm": 1.4453125, "learning_rate": 2.9561530744903634e-05, "loss": 2.3376, "step": 6490 }, { "epoch": 0.0832, "grad_norm": 1.46875, "learning_rate": 2.9560072479463706e-05, "loss": 2.352, "step": 6500 }, { "epoch": 0.083328, "grad_norm": 1.515625, "learning_rate": 2.955861182917937e-05, "loss": 2.3658, "step": 6510 }, { "epoch": 0.083456, "grad_norm": 1.6875, "learning_rate": 2.955714879428987e-05, "loss": 2.339, "step": 6520 }, { "epoch": 0.083584, "grad_norm": 1.5625, "learning_rate": 2.9555683375034845e-05, "loss": 2.3004, "step": 6530 }, { "epoch": 0.083712, "grad_norm": 1.453125, "learning_rate": 2.9554215571654317e-05, "loss": 2.3449, "step": 6540 }, { "epoch": 0.08384, "grad_norm": 4.09375, "learning_rate": 2.95527453843887e-05, "loss": 2.3497, "step": 6550 }, { "epoch": 0.083968, "grad_norm": 1.59375, "learning_rate": 2.9551272813478814e-05, "loss": 2.3524, "step": 6560 }, { "epoch": 0.084096, "grad_norm": 1.5234375, "learning_rate": 2.9549797859165845e-05, "loss": 2.3438, "step": 6570 }, { "epoch": 0.084224, "grad_norm": 1.5546875, "learning_rate": 2.9548320521691385e-05, "loss": 2.3594, "step": 6580 }, { "epoch": 0.084352, "grad_norm": 1.4921875, "learning_rate": 2.954684080129741e-05, "loss": 2.351, "step": 6590 }, { "epoch": 0.08448, "grad_norm": 1.4921875, "learning_rate": 2.9545358698226286e-05, "loss": 2.3686, "step": 6600 }, { "epoch": 0.084608, "grad_norm": 1.5234375, "learning_rate": 2.9543874212720777e-05, "loss": 2.324, "step": 6610 }, { "epoch": 0.084736, "grad_norm": 1.375, "learning_rate": 2.9542387345024032e-05, "loss": 2.2977, "step": 6620 }, { "epoch": 0.084864, "grad_norm": 1.671875, "learning_rate": 2.954089809537959e-05, "loss": 2.2708, "step": 6630 }, { "epoch": 0.084992, "grad_norm": 1.75, "learning_rate": 2.9539406464031378e-05, "loss": 2.3692, "step": 6640 }, { "epoch": 0.08512, "grad_norm": 1.609375, "learning_rate": 2.9537912451223718e-05, "loss": 2.3311, "step": 6650 }, { "epoch": 0.085248, "grad_norm": 1.453125, "learning_rate": 2.9536416057201315e-05, "loss": 2.3172, "step": 6660 }, { "epoch": 0.085376, "grad_norm": 1.4453125, "learning_rate": 2.9534917282209274e-05, "loss": 2.3239, "step": 6670 }, { "epoch": 0.085504, "grad_norm": 1.4453125, "learning_rate": 2.953341612649308e-05, "loss": 2.3184, "step": 6680 }, { "epoch": 0.085632, "grad_norm": 1.578125, "learning_rate": 2.9531912590298617e-05, "loss": 2.3866, "step": 6690 }, { "epoch": 0.08576, "grad_norm": 1.546875, "learning_rate": 2.9530406673872143e-05, "loss": 2.3532, "step": 6700 }, { "epoch": 0.085888, "grad_norm": 1.46875, "learning_rate": 2.9528898377460337e-05, "loss": 2.358, "step": 6710 }, { "epoch": 0.086016, "grad_norm": 1.453125, "learning_rate": 2.9527387701310228e-05, "loss": 2.338, "step": 6720 }, { "epoch": 0.086144, "grad_norm": 1.5, "learning_rate": 2.9525874645669272e-05, "loss": 2.308, "step": 6730 }, { "epoch": 0.086272, "grad_norm": 1.625, "learning_rate": 2.9524359210785283e-05, "loss": 2.3372, "step": 6740 }, { "epoch": 0.0864, "grad_norm": 1.390625, "learning_rate": 2.9522841396906486e-05, "loss": 2.3295, "step": 6750 }, { "epoch": 0.086528, "grad_norm": 1.4921875, "learning_rate": 2.9521321204281493e-05, "loss": 2.3433, "step": 6760 }, { "epoch": 0.086656, "grad_norm": 1.53125, "learning_rate": 2.9519798633159296e-05, "loss": 2.3464, "step": 6770 }, { "epoch": 0.086784, "grad_norm": 1.421875, "learning_rate": 2.951827368378928e-05, "loss": 2.3591, "step": 6780 }, { "epoch": 0.086912, "grad_norm": 2.203125, "learning_rate": 2.9516746356421226e-05, "loss": 2.3385, "step": 6790 }, { "epoch": 0.08704, "grad_norm": 1.46875, "learning_rate": 2.9515216651305302e-05, "loss": 2.3406, "step": 6800 }, { "epoch": 0.087168, "grad_norm": 1.4921875, "learning_rate": 2.951368456869206e-05, "loss": 2.3487, "step": 6810 }, { "epoch": 0.087296, "grad_norm": 1.4296875, "learning_rate": 2.9512150108832446e-05, "loss": 2.3078, "step": 6820 }, { "epoch": 0.087424, "grad_norm": 1.53125, "learning_rate": 2.9510613271977794e-05, "loss": 2.2994, "step": 6830 }, { "epoch": 0.087552, "grad_norm": 1.4609375, "learning_rate": 2.950907405837983e-05, "loss": 2.3273, "step": 6840 }, { "epoch": 0.08768, "grad_norm": 1.515625, "learning_rate": 2.9507532468290668e-05, "loss": 2.3229, "step": 6850 }, { "epoch": 0.087808, "grad_norm": 1.4765625, "learning_rate": 2.9505988501962803e-05, "loss": 2.3107, "step": 6860 }, { "epoch": 0.087936, "grad_norm": 1.5, "learning_rate": 2.9504442159649132e-05, "loss": 2.3614, "step": 6870 }, { "epoch": 0.088064, "grad_norm": 1.609375, "learning_rate": 2.9502893441602937e-05, "loss": 2.3168, "step": 6880 }, { "epoch": 0.088192, "grad_norm": 1.6640625, "learning_rate": 2.9501342348077883e-05, "loss": 2.3019, "step": 6890 }, { "epoch": 0.08832, "grad_norm": 1.5625, "learning_rate": 2.9499788879328036e-05, "loss": 2.3469, "step": 6900 }, { "epoch": 0.088448, "grad_norm": 1.6796875, "learning_rate": 2.9498233035607837e-05, "loss": 2.3487, "step": 6910 }, { "epoch": 0.088576, "grad_norm": 1.3359375, "learning_rate": 2.9496674817172126e-05, "loss": 2.3341, "step": 6920 }, { "epoch": 0.088704, "grad_norm": 1.515625, "learning_rate": 2.949511422427613e-05, "loss": 2.3224, "step": 6930 }, { "epoch": 0.088832, "grad_norm": 1.6953125, "learning_rate": 2.9493551257175463e-05, "loss": 2.3029, "step": 6940 }, { "epoch": 0.08896, "grad_norm": 1.453125, "learning_rate": 2.949198591612613e-05, "loss": 2.3459, "step": 6950 }, { "epoch": 0.089088, "grad_norm": 1.390625, "learning_rate": 2.949041820138452e-05, "loss": 2.334, "step": 6960 }, { "epoch": 0.089216, "grad_norm": 1.5234375, "learning_rate": 2.9488848113207413e-05, "loss": 2.3406, "step": 6970 }, { "epoch": 0.089344, "grad_norm": 1.3828125, "learning_rate": 2.948727565185199e-05, "loss": 2.2984, "step": 6980 }, { "epoch": 0.089472, "grad_norm": 1.5625, "learning_rate": 2.9485700817575796e-05, "loss": 2.3212, "step": 6990 }, { "epoch": 0.0896, "grad_norm": 1.375, "learning_rate": 2.948412361063679e-05, "loss": 2.3113, "step": 7000 }, { "epoch": 0.089728, "grad_norm": 1.4140625, "learning_rate": 2.9482544031293298e-05, "loss": 2.3123, "step": 7010 }, { "epoch": 0.089856, "grad_norm": 1.484375, "learning_rate": 2.9480962079804053e-05, "loss": 2.3478, "step": 7020 }, { "epoch": 0.089984, "grad_norm": 1.484375, "learning_rate": 2.947937775642817e-05, "loss": 2.3211, "step": 7030 }, { "epoch": 0.090112, "grad_norm": 1.5390625, "learning_rate": 2.947779106142514e-05, "loss": 2.3148, "step": 7040 }, { "epoch": 0.09024, "grad_norm": 1.5078125, "learning_rate": 2.9476201995054862e-05, "loss": 2.3336, "step": 7050 }, { "epoch": 0.090368, "grad_norm": 1.5078125, "learning_rate": 2.947461055757761e-05, "loss": 2.3376, "step": 7060 }, { "epoch": 0.090496, "grad_norm": 1.4609375, "learning_rate": 2.9473016749254053e-05, "loss": 2.3149, "step": 7070 }, { "epoch": 0.090624, "grad_norm": 1.3984375, "learning_rate": 2.947142057034525e-05, "loss": 2.2982, "step": 7080 }, { "epoch": 0.090752, "grad_norm": 1.4140625, "learning_rate": 2.9469822021112636e-05, "loss": 2.3247, "step": 7090 }, { "epoch": 0.09088, "grad_norm": 1.5390625, "learning_rate": 2.9468221101818047e-05, "loss": 2.3002, "step": 7100 }, { "epoch": 0.091008, "grad_norm": 1.53125, "learning_rate": 2.9466617812723705e-05, "loss": 2.3261, "step": 7110 }, { "epoch": 0.091136, "grad_norm": 1.46875, "learning_rate": 2.9465012154092223e-05, "loss": 2.3439, "step": 7120 }, { "epoch": 0.091264, "grad_norm": 1.5625, "learning_rate": 2.946340412618659e-05, "loss": 2.3472, "step": 7130 }, { "epoch": 0.091392, "grad_norm": 1.4296875, "learning_rate": 2.9461793729270188e-05, "loss": 2.3159, "step": 7140 }, { "epoch": 0.09152, "grad_norm": 1.796875, "learning_rate": 2.9460180963606793e-05, "loss": 2.3358, "step": 7150 }, { "epoch": 0.091648, "grad_norm": 1.5, "learning_rate": 2.9458565829460565e-05, "loss": 2.377, "step": 7160 }, { "epoch": 0.091776, "grad_norm": 17.625, "learning_rate": 2.9456948327096057e-05, "loss": 2.3124, "step": 7170 }, { "epoch": 0.091904, "grad_norm": 1.546875, "learning_rate": 2.9455328456778203e-05, "loss": 2.3349, "step": 7180 }, { "epoch": 0.092032, "grad_norm": 1.53125, "learning_rate": 2.9453706218772322e-05, "loss": 2.3179, "step": 7190 }, { "epoch": 0.09216, "grad_norm": 1.484375, "learning_rate": 2.945208161334413e-05, "loss": 2.3207, "step": 7200 }, { "epoch": 0.092288, "grad_norm": 1.578125, "learning_rate": 2.9450454640759726e-05, "loss": 2.3582, "step": 7210 }, { "epoch": 0.092416, "grad_norm": 1.4296875, "learning_rate": 2.94488253012856e-05, "loss": 2.3317, "step": 7220 }, { "epoch": 0.092544, "grad_norm": 1.5703125, "learning_rate": 2.944719359518862e-05, "loss": 2.3158, "step": 7230 }, { "epoch": 0.092672, "grad_norm": 1.53125, "learning_rate": 2.944555952273606e-05, "loss": 2.3561, "step": 7240 }, { "epoch": 0.0928, "grad_norm": 1.5859375, "learning_rate": 2.944392308419556e-05, "loss": 2.3126, "step": 7250 }, { "epoch": 0.092928, "grad_norm": 5.3125, "learning_rate": 2.944228427983516e-05, "loss": 2.3311, "step": 7260 }, { "epoch": 0.093056, "grad_norm": 2.015625, "learning_rate": 2.9440643109923296e-05, "loss": 2.3215, "step": 7270 }, { "epoch": 0.093184, "grad_norm": 1.40625, "learning_rate": 2.9438999574728766e-05, "loss": 2.3411, "step": 7280 }, { "epoch": 0.093312, "grad_norm": 1.640625, "learning_rate": 2.9437353674520774e-05, "loss": 2.3258, "step": 7290 }, { "epoch": 0.09344, "grad_norm": 1.5078125, "learning_rate": 2.9435705409568914e-05, "loss": 2.3239, "step": 7300 }, { "epoch": 0.093568, "grad_norm": 1.5546875, "learning_rate": 2.943405478014316e-05, "loss": 2.3499, "step": 7310 }, { "epoch": 0.093696, "grad_norm": 1.578125, "learning_rate": 2.9432401786513862e-05, "loss": 2.3196, "step": 7320 }, { "epoch": 0.093824, "grad_norm": 1.4296875, "learning_rate": 2.9430746428951784e-05, "loss": 2.3304, "step": 7330 }, { "epoch": 0.093952, "grad_norm": 1.8046875, "learning_rate": 2.942908870772806e-05, "loss": 2.3391, "step": 7340 }, { "epoch": 0.09408, "grad_norm": 1.5625, "learning_rate": 2.942742862311421e-05, "loss": 2.3372, "step": 7350 }, { "epoch": 0.094208, "grad_norm": 1.4921875, "learning_rate": 2.942576617538214e-05, "loss": 2.3494, "step": 7360 }, { "epoch": 0.094336, "grad_norm": 1.46875, "learning_rate": 2.942410136480417e-05, "loss": 2.306, "step": 7370 }, { "epoch": 0.094464, "grad_norm": 1.4453125, "learning_rate": 2.9422434191652955e-05, "loss": 2.3084, "step": 7380 }, { "epoch": 0.094592, "grad_norm": 1.5078125, "learning_rate": 2.9420764656201587e-05, "loss": 2.3219, "step": 7390 }, { "epoch": 0.09472, "grad_norm": 1.6640625, "learning_rate": 2.941909275872352e-05, "loss": 2.3264, "step": 7400 }, { "epoch": 0.094848, "grad_norm": 1.4609375, "learning_rate": 2.9417418499492598e-05, "loss": 2.3218, "step": 7410 }, { "epoch": 0.094976, "grad_norm": 1.5546875, "learning_rate": 2.9415741878783058e-05, "loss": 2.3502, "step": 7420 }, { "epoch": 0.095104, "grad_norm": 1.46875, "learning_rate": 2.9414062896869513e-05, "loss": 2.3117, "step": 7430 }, { "epoch": 0.095232, "grad_norm": 1.5703125, "learning_rate": 2.9412381554026973e-05, "loss": 2.3081, "step": 7440 }, { "epoch": 0.09536, "grad_norm": 2.25, "learning_rate": 2.9410697850530832e-05, "loss": 2.3174, "step": 7450 }, { "epoch": 0.095488, "grad_norm": 1.5078125, "learning_rate": 2.9409011786656868e-05, "loss": 2.2923, "step": 7460 }, { "epoch": 0.095616, "grad_norm": 1.484375, "learning_rate": 2.9407323362681248e-05, "loss": 2.2996, "step": 7470 }, { "epoch": 0.095744, "grad_norm": 1.6328125, "learning_rate": 2.9405632578880523e-05, "loss": 2.3303, "step": 7480 }, { "epoch": 0.095872, "grad_norm": 1.625, "learning_rate": 2.9403939435531634e-05, "loss": 2.312, "step": 7490 }, { "epoch": 0.096, "grad_norm": 1.6328125, "learning_rate": 2.9402243932911906e-05, "loss": 2.3133, "step": 7500 }, { "epoch": 0.096128, "grad_norm": 1.6015625, "learning_rate": 2.9400546071299053e-05, "loss": 2.3242, "step": 7510 }, { "epoch": 0.096256, "grad_norm": 1.5703125, "learning_rate": 2.9398845850971173e-05, "loss": 2.3234, "step": 7520 }, { "epoch": 0.096384, "grad_norm": 1.53125, "learning_rate": 2.9397143272206744e-05, "loss": 2.3119, "step": 7530 }, { "epoch": 0.096512, "grad_norm": 1.390625, "learning_rate": 2.939543833528465e-05, "loss": 2.3411, "step": 7540 }, { "epoch": 0.09664, "grad_norm": 1.5234375, "learning_rate": 2.939373104048414e-05, "loss": 2.3196, "step": 7550 }, { "epoch": 0.096768, "grad_norm": 1.4765625, "learning_rate": 2.9392021388084862e-05, "loss": 2.342, "step": 7560 }, { "epoch": 0.096896, "grad_norm": 1.4296875, "learning_rate": 2.939030937836684e-05, "loss": 2.3296, "step": 7570 }, { "epoch": 0.097024, "grad_norm": 1.421875, "learning_rate": 2.93885950116105e-05, "loss": 2.3244, "step": 7580 }, { "epoch": 0.097152, "grad_norm": 1.46875, "learning_rate": 2.9386878288096625e-05, "loss": 2.3705, "step": 7590 }, { "epoch": 0.09728, "grad_norm": 1.5, "learning_rate": 2.9385159208106423e-05, "loss": 2.3193, "step": 7600 }, { "epoch": 0.097408, "grad_norm": 1.578125, "learning_rate": 2.9383437771921462e-05, "loss": 2.3514, "step": 7610 }, { "epoch": 0.097536, "grad_norm": 1.6875, "learning_rate": 2.9381713979823697e-05, "loss": 2.2867, "step": 7620 }, { "epoch": 0.097664, "grad_norm": 1.5390625, "learning_rate": 2.9379987832095478e-05, "loss": 2.3268, "step": 7630 }, { "epoch": 0.097792, "grad_norm": 1.5078125, "learning_rate": 2.9378259329019534e-05, "loss": 2.342, "step": 7640 }, { "epoch": 0.09792, "grad_norm": 2.671875, "learning_rate": 2.937652847087899e-05, "loss": 2.3358, "step": 7650 }, { "epoch": 0.098048, "grad_norm": 1.5859375, "learning_rate": 2.937479525795734e-05, "loss": 2.3162, "step": 7660 }, { "epoch": 0.098176, "grad_norm": 1.484375, "learning_rate": 2.9373059690538477e-05, "loss": 2.3245, "step": 7670 }, { "epoch": 0.098304, "grad_norm": 1.5625, "learning_rate": 2.9371321768906676e-05, "loss": 2.3274, "step": 7680 }, { "epoch": 0.098432, "grad_norm": 1.5, "learning_rate": 2.9369581493346597e-05, "loss": 2.331, "step": 7690 }, { "epoch": 0.09856, "grad_norm": 1.53125, "learning_rate": 2.936783886414328e-05, "loss": 2.3333, "step": 7700 }, { "epoch": 0.098688, "grad_norm": 1.5, "learning_rate": 2.9366093881582165e-05, "loss": 2.342, "step": 7710 }, { "epoch": 0.098816, "grad_norm": 1.515625, "learning_rate": 2.9364346545949067e-05, "loss": 2.3099, "step": 7720 }, { "epoch": 0.098944, "grad_norm": 1.40625, "learning_rate": 2.9362596857530184e-05, "loss": 2.3152, "step": 7730 }, { "epoch": 0.099072, "grad_norm": 1.5390625, "learning_rate": 2.9360844816612103e-05, "loss": 2.3267, "step": 7740 }, { "epoch": 0.0992, "grad_norm": 1.5625, "learning_rate": 2.9359090423481807e-05, "loss": 2.3461, "step": 7750 }, { "epoch": 0.099328, "grad_norm": 1.4453125, "learning_rate": 2.9357333678426643e-05, "loss": 2.3156, "step": 7760 }, { "epoch": 0.099456, "grad_norm": 1.4609375, "learning_rate": 2.9355574581734354e-05, "loss": 2.3512, "step": 7770 }, { "epoch": 0.099584, "grad_norm": 1.4921875, "learning_rate": 2.9353813133693074e-05, "loss": 2.3148, "step": 7780 }, { "epoch": 0.099712, "grad_norm": 1.40625, "learning_rate": 2.9352049334591316e-05, "loss": 2.349, "step": 7790 }, { "epoch": 0.09984, "grad_norm": 1.421875, "learning_rate": 2.9350283184717973e-05, "loss": 2.3216, "step": 7800 }, { "epoch": 0.099968, "grad_norm": 1.625, "learning_rate": 2.9348514684362344e-05, "loss": 2.2967, "step": 7810 }, { "epoch": 0.100096, "grad_norm": 1.546875, "learning_rate": 2.9346743833814076e-05, "loss": 2.3346, "step": 7820 }, { "epoch": 0.100224, "grad_norm": 1.3515625, "learning_rate": 2.9344970633363244e-05, "loss": 2.3165, "step": 7830 }, { "epoch": 0.100352, "grad_norm": 1.53125, "learning_rate": 2.9343195083300266e-05, "loss": 2.3113, "step": 7840 }, { "epoch": 0.10048, "grad_norm": 1.5, "learning_rate": 2.9341417183915986e-05, "loss": 2.3194, "step": 7850 }, { "epoch": 0.100608, "grad_norm": 1.5390625, "learning_rate": 2.9339636935501596e-05, "loss": 2.3236, "step": 7860 }, { "epoch": 0.100736, "grad_norm": 1.40625, "learning_rate": 2.93378543383487e-05, "loss": 2.3006, "step": 7870 }, { "epoch": 0.100864, "grad_norm": 1.5625, "learning_rate": 2.9336069392749266e-05, "loss": 2.3232, "step": 7880 }, { "epoch": 0.100992, "grad_norm": 1.3515625, "learning_rate": 2.9334282098995663e-05, "loss": 2.2925, "step": 7890 }, { "epoch": 0.10112, "grad_norm": 1.5625, "learning_rate": 2.9332492457380636e-05, "loss": 2.3319, "step": 7900 }, { "epoch": 0.101248, "grad_norm": 1.5703125, "learning_rate": 2.933070046819732e-05, "loss": 2.3396, "step": 7910 }, { "epoch": 0.101376, "grad_norm": 1.59375, "learning_rate": 2.9328906131739223e-05, "loss": 2.339, "step": 7920 }, { "epoch": 0.101504, "grad_norm": 1.5, "learning_rate": 2.9327109448300255e-05, "loss": 2.2948, "step": 7930 }, { "epoch": 0.101632, "grad_norm": 1.5859375, "learning_rate": 2.9325310418174693e-05, "loss": 2.3127, "step": 7940 }, { "epoch": 0.10176, "grad_norm": 1.5859375, "learning_rate": 2.9323509041657215e-05, "loss": 2.3612, "step": 7950 }, { "epoch": 0.101888, "grad_norm": 1.5625, "learning_rate": 2.9321705319042865e-05, "loss": 2.318, "step": 7960 }, { "epoch": 0.102016, "grad_norm": 1.6015625, "learning_rate": 2.9319899250627085e-05, "loss": 2.3037, "step": 7970 }, { "epoch": 0.102144, "grad_norm": 1.515625, "learning_rate": 2.93180908367057e-05, "loss": 2.3505, "step": 7980 }, { "epoch": 0.102272, "grad_norm": 1.8046875, "learning_rate": 2.9316280077574914e-05, "loss": 2.3182, "step": 7990 }, { "epoch": 0.1024, "grad_norm": 1.8125, "learning_rate": 2.931446697353132e-05, "loss": 2.3345, "step": 8000 }, { "epoch": 0.102528, "grad_norm": 1.6796875, "learning_rate": 2.9312651524871885e-05, "loss": 2.3438, "step": 8010 }, { "epoch": 0.102656, "grad_norm": 1.484375, "learning_rate": 2.9310833731893974e-05, "loss": 2.3043, "step": 8020 }, { "epoch": 0.102784, "grad_norm": 1.5625, "learning_rate": 2.930901359489533e-05, "loss": 2.3159, "step": 8030 }, { "epoch": 0.102912, "grad_norm": 1.453125, "learning_rate": 2.9307191114174076e-05, "loss": 2.3897, "step": 8040 }, { "epoch": 0.10304, "grad_norm": 1.3984375, "learning_rate": 2.9305366290028726e-05, "loss": 2.3296, "step": 8050 }, { "epoch": 0.103168, "grad_norm": 1.5703125, "learning_rate": 2.9303539122758166e-05, "loss": 2.3033, "step": 8060 }, { "epoch": 0.103296, "grad_norm": 1.4453125, "learning_rate": 2.9301709612661688e-05, "loss": 2.3192, "step": 8070 }, { "epoch": 0.103424, "grad_norm": 1.6328125, "learning_rate": 2.9299877760038944e-05, "loss": 2.3243, "step": 8080 }, { "epoch": 0.103552, "grad_norm": 1.4921875, "learning_rate": 2.9298043565189982e-05, "loss": 2.3554, "step": 8090 }, { "epoch": 0.10368, "grad_norm": 1.5, "learning_rate": 2.929620702841523e-05, "loss": 2.32, "step": 8100 }, { "epoch": 0.103808, "grad_norm": 1.421875, "learning_rate": 2.92943681500155e-05, "loss": 2.3149, "step": 8110 }, { "epoch": 0.103936, "grad_norm": 1.4609375, "learning_rate": 2.9292526930291992e-05, "loss": 2.3283, "step": 8120 }, { "epoch": 0.104064, "grad_norm": 1.4375, "learning_rate": 2.929068336954628e-05, "loss": 2.3149, "step": 8130 }, { "epoch": 0.104192, "grad_norm": 1.4296875, "learning_rate": 2.9288837468080328e-05, "loss": 2.3153, "step": 8140 }, { "epoch": 0.10432, "grad_norm": 1.453125, "learning_rate": 2.928698922619649e-05, "loss": 2.3171, "step": 8150 }, { "epoch": 0.104448, "grad_norm": 1.5390625, "learning_rate": 2.9285138644197494e-05, "loss": 2.3013, "step": 8160 }, { "epoch": 0.104576, "grad_norm": 1.5546875, "learning_rate": 2.9283285722386442e-05, "loss": 2.3141, "step": 8170 }, { "epoch": 0.104704, "grad_norm": 1.4765625, "learning_rate": 2.9281430461066846e-05, "loss": 2.3316, "step": 8180 }, { "epoch": 0.104832, "grad_norm": 1.546875, "learning_rate": 2.9279572860542574e-05, "loss": 2.3341, "step": 8190 }, { "epoch": 0.10496, "grad_norm": 1.375, "learning_rate": 2.9277712921117893e-05, "loss": 2.3229, "step": 8200 }, { "epoch": 0.105088, "grad_norm": 1.5703125, "learning_rate": 2.9275850643097448e-05, "loss": 2.2836, "step": 8210 }, { "epoch": 0.105216, "grad_norm": 1.6171875, "learning_rate": 2.927398602678627e-05, "loss": 2.3171, "step": 8220 }, { "epoch": 0.105344, "grad_norm": 1.4609375, "learning_rate": 2.927211907248977e-05, "loss": 2.3003, "step": 8230 }, { "epoch": 0.105472, "grad_norm": 1.6171875, "learning_rate": 2.9270249780513743e-05, "loss": 2.3302, "step": 8240 }, { "epoch": 0.1056, "grad_norm": 1.4296875, "learning_rate": 2.9268378151164362e-05, "loss": 2.3424, "step": 8250 }, { "epoch": 0.105728, "grad_norm": 1.59375, "learning_rate": 2.92665041847482e-05, "loss": 2.36, "step": 8260 }, { "epoch": 0.105856, "grad_norm": 1.515625, "learning_rate": 2.9264627881572188e-05, "loss": 2.2957, "step": 8270 }, { "epoch": 0.105984, "grad_norm": 1.5390625, "learning_rate": 2.9262749241943656e-05, "loss": 2.3262, "step": 8280 }, { "epoch": 0.106112, "grad_norm": 1.46875, "learning_rate": 2.9260868266170314e-05, "loss": 2.3058, "step": 8290 }, { "epoch": 0.10624, "grad_norm": 1.6328125, "learning_rate": 2.925898495456025e-05, "loss": 2.2944, "step": 8300 }, { "epoch": 0.106368, "grad_norm": 1.484375, "learning_rate": 2.9257099307421947e-05, "loss": 2.2663, "step": 8310 }, { "epoch": 0.106496, "grad_norm": 1.5390625, "learning_rate": 2.925521132506425e-05, "loss": 2.357, "step": 8320 }, { "epoch": 0.106624, "grad_norm": 1.5078125, "learning_rate": 2.9253321007796407e-05, "loss": 2.3289, "step": 8330 }, { "epoch": 0.106752, "grad_norm": 1.5, "learning_rate": 2.9251428355928034e-05, "loss": 2.3406, "step": 8340 }, { "epoch": 0.10688, "grad_norm": 1.453125, "learning_rate": 2.9249533369769142e-05, "loss": 2.3171, "step": 8350 }, { "epoch": 0.107008, "grad_norm": 1.453125, "learning_rate": 2.924763604963011e-05, "loss": 2.3298, "step": 8360 }, { "epoch": 0.107136, "grad_norm": 1.4140625, "learning_rate": 2.9245736395821703e-05, "loss": 2.352, "step": 8370 }, { "epoch": 0.107264, "grad_norm": 1.53125, "learning_rate": 2.9243834408655087e-05, "loss": 2.3096, "step": 8380 }, { "epoch": 0.107392, "grad_norm": 1.53125, "learning_rate": 2.924193008844178e-05, "loss": 2.3176, "step": 8390 }, { "epoch": 0.10752, "grad_norm": 1.7578125, "learning_rate": 2.9240023435493707e-05, "loss": 2.2963, "step": 8400 }, { "epoch": 0.107648, "grad_norm": 1.515625, "learning_rate": 2.923811445012316e-05, "loss": 2.3566, "step": 8410 }, { "epoch": 0.107776, "grad_norm": 1.4921875, "learning_rate": 2.923620313264282e-05, "loss": 2.3277, "step": 8420 }, { "epoch": 0.107904, "grad_norm": 1.7109375, "learning_rate": 2.9234289483365747e-05, "loss": 2.3307, "step": 8430 }, { "epoch": 0.108032, "grad_norm": 1.6015625, "learning_rate": 2.9232373502605388e-05, "loss": 2.289, "step": 8440 }, { "epoch": 0.10816, "grad_norm": 1.40625, "learning_rate": 2.923045519067556e-05, "loss": 2.3316, "step": 8450 }, { "epoch": 0.108288, "grad_norm": 1.5859375, "learning_rate": 2.9228534547890478e-05, "loss": 2.3488, "step": 8460 }, { "epoch": 0.108416, "grad_norm": 1.4453125, "learning_rate": 2.922661157456473e-05, "loss": 2.3147, "step": 8470 }, { "epoch": 0.108544, "grad_norm": 1.6015625, "learning_rate": 2.9224686271013284e-05, "loss": 2.3756, "step": 8480 }, { "epoch": 0.108672, "grad_norm": 1.6015625, "learning_rate": 2.9222758637551494e-05, "loss": 2.3232, "step": 8490 }, { "epoch": 0.1088, "grad_norm": 2.125, "learning_rate": 2.9220828674495083e-05, "loss": 2.3046, "step": 8500 }, { "epoch": 0.108928, "grad_norm": 1.484375, "learning_rate": 2.9218896382160186e-05, "loss": 2.3066, "step": 8510 }, { "epoch": 0.109056, "grad_norm": 1.4453125, "learning_rate": 2.9216961760863284e-05, "loss": 2.3477, "step": 8520 }, { "epoch": 0.109184, "grad_norm": 1.7109375, "learning_rate": 2.9215024810921262e-05, "loss": 2.2939, "step": 8530 }, { "epoch": 0.109312, "grad_norm": 1.5390625, "learning_rate": 2.9213085532651375e-05, "loss": 2.3055, "step": 8540 }, { "epoch": 0.10944, "grad_norm": 1.875, "learning_rate": 2.921114392637127e-05, "loss": 2.3266, "step": 8550 }, { "epoch": 0.109568, "grad_norm": 1.5078125, "learning_rate": 2.920919999239897e-05, "loss": 2.3055, "step": 8560 }, { "epoch": 0.109696, "grad_norm": 1.453125, "learning_rate": 2.9207253731052868e-05, "loss": 2.3276, "step": 8570 }, { "epoch": 0.109824, "grad_norm": 1.5390625, "learning_rate": 2.9205305142651766e-05, "loss": 2.3268, "step": 8580 }, { "epoch": 0.109952, "grad_norm": 1.5390625, "learning_rate": 2.920335422751481e-05, "loss": 2.3253, "step": 8590 }, { "epoch": 0.11008, "grad_norm": 5.1875, "learning_rate": 2.9201400985961565e-05, "loss": 2.296, "step": 8600 }, { "epoch": 0.110208, "grad_norm": 1.5, "learning_rate": 2.919944541831195e-05, "loss": 2.314, "step": 8610 }, { "epoch": 0.110336, "grad_norm": 1.609375, "learning_rate": 2.919748752488627e-05, "loss": 2.3267, "step": 8620 }, { "epoch": 0.110464, "grad_norm": 1.5859375, "learning_rate": 2.9195527306005227e-05, "loss": 2.3519, "step": 8630 }, { "epoch": 0.110592, "grad_norm": 1.40625, "learning_rate": 2.9193564761989883e-05, "loss": 2.3234, "step": 8640 }, { "epoch": 0.11072, "grad_norm": 1.6640625, "learning_rate": 2.9191599893161695e-05, "loss": 2.327, "step": 8650 }, { "epoch": 0.110848, "grad_norm": 4.25, "learning_rate": 2.9189632699842487e-05, "loss": 2.3533, "step": 8660 }, { "epoch": 0.110976, "grad_norm": 1.5, "learning_rate": 2.9187663182354485e-05, "loss": 2.3215, "step": 8670 }, { "epoch": 0.111104, "grad_norm": 1.40625, "learning_rate": 2.9185691341020275e-05, "loss": 2.311, "step": 8680 }, { "epoch": 0.111232, "grad_norm": 1.71875, "learning_rate": 2.9183717176162836e-05, "loss": 2.3541, "step": 8690 }, { "epoch": 0.11136, "grad_norm": 1.734375, "learning_rate": 2.9181740688105512e-05, "loss": 2.3214, "step": 8700 }, { "epoch": 0.111488, "grad_norm": 1.7109375, "learning_rate": 2.9179761877172055e-05, "loss": 2.3039, "step": 8710 }, { "epoch": 0.111616, "grad_norm": 1.53125, "learning_rate": 2.917778074368657e-05, "loss": 2.3048, "step": 8720 }, { "epoch": 0.111744, "grad_norm": 1.4921875, "learning_rate": 2.917579728797356e-05, "loss": 2.2839, "step": 8730 }, { "epoch": 0.111872, "grad_norm": 1.4765625, "learning_rate": 2.9173811510357898e-05, "loss": 2.3182, "step": 8740 }, { "epoch": 0.112, "grad_norm": 1.53125, "learning_rate": 2.917182341116484e-05, "loss": 2.3272, "step": 8750 }, { "epoch": 0.112128, "grad_norm": 1.515625, "learning_rate": 2.916983299072003e-05, "loss": 2.3418, "step": 8760 }, { "epoch": 0.112256, "grad_norm": 1.703125, "learning_rate": 2.9167840249349483e-05, "loss": 2.3248, "step": 8770 }, { "epoch": 0.112384, "grad_norm": 1.796875, "learning_rate": 2.916584518737959e-05, "loss": 2.3002, "step": 8780 }, { "epoch": 0.112512, "grad_norm": 1.5078125, "learning_rate": 2.916384780513714e-05, "loss": 2.3376, "step": 8790 }, { "epoch": 0.11264, "grad_norm": 1.4921875, "learning_rate": 2.9161848102949286e-05, "loss": 2.3266, "step": 8800 }, { "epoch": 0.112768, "grad_norm": 1.8828125, "learning_rate": 2.9159846081143565e-05, "loss": 2.3143, "step": 8810 }, { "epoch": 0.112896, "grad_norm": 1.59375, "learning_rate": 2.9157841740047898e-05, "loss": 2.3374, "step": 8820 }, { "epoch": 0.113024, "grad_norm": 1.5078125, "learning_rate": 2.9155835079990578e-05, "loss": 2.3035, "step": 8830 }, { "epoch": 0.113152, "grad_norm": 1.6015625, "learning_rate": 2.9153826101300292e-05, "loss": 2.3369, "step": 8840 }, { "epoch": 0.11328, "grad_norm": 1.4609375, "learning_rate": 2.9151814804306087e-05, "loss": 2.3339, "step": 8850 }, { "epoch": 0.113408, "grad_norm": 1.546875, "learning_rate": 2.9149801189337406e-05, "loss": 2.3495, "step": 8860 }, { "epoch": 0.113536, "grad_norm": 1.4453125, "learning_rate": 2.9147785256724066e-05, "loss": 2.3104, "step": 8870 }, { "epoch": 0.113664, "grad_norm": 1.5703125, "learning_rate": 2.9145767006796258e-05, "loss": 2.3196, "step": 8880 }, { "epoch": 0.113792, "grad_norm": 1.59375, "learning_rate": 2.9143746439884568e-05, "loss": 2.2947, "step": 8890 }, { "epoch": 0.11392, "grad_norm": 1.546875, "learning_rate": 2.9141723556319942e-05, "loss": 2.3411, "step": 8900 }, { "epoch": 0.114048, "grad_norm": 1.3984375, "learning_rate": 2.9139698356433723e-05, "loss": 2.3143, "step": 8910 }, { "epoch": 0.114176, "grad_norm": 1.5234375, "learning_rate": 2.913767084055762e-05, "loss": 2.334, "step": 8920 }, { "epoch": 0.114304, "grad_norm": 1.4296875, "learning_rate": 2.9135641009023728e-05, "loss": 2.3328, "step": 8930 }, { "epoch": 0.114432, "grad_norm": 1.609375, "learning_rate": 2.913360886216452e-05, "loss": 2.3463, "step": 8940 }, { "epoch": 0.11456, "grad_norm": 1.65625, "learning_rate": 2.913157440031285e-05, "loss": 2.3215, "step": 8950 }, { "epoch": 0.114688, "grad_norm": 1.53125, "learning_rate": 2.9129537623801948e-05, "loss": 2.3289, "step": 8960 }, { "epoch": 0.114816, "grad_norm": 1.6796875, "learning_rate": 2.9127498532965422e-05, "loss": 2.3299, "step": 8970 }, { "epoch": 0.114944, "grad_norm": 1.65625, "learning_rate": 2.9125457128137267e-05, "loss": 2.3222, "step": 8980 }, { "epoch": 0.115072, "grad_norm": 1.375, "learning_rate": 2.9123413409651847e-05, "loss": 2.2933, "step": 8990 }, { "epoch": 0.1152, "grad_norm": 1.453125, "learning_rate": 2.9121367377843917e-05, "loss": 2.3312, "step": 9000 }, { "epoch": 0.115328, "grad_norm": 1.640625, "learning_rate": 2.9119319033048596e-05, "loss": 2.3149, "step": 9010 }, { "epoch": 0.115456, "grad_norm": 1.4609375, "learning_rate": 2.9117268375601394e-05, "loss": 2.3304, "step": 9020 }, { "epoch": 0.115584, "grad_norm": 1.4453125, "learning_rate": 2.9115215405838194e-05, "loss": 2.3382, "step": 9030 }, { "epoch": 0.115712, "grad_norm": 1.53125, "learning_rate": 2.9113160124095255e-05, "loss": 2.3119, "step": 9040 }, { "epoch": 0.11584, "grad_norm": 1.90625, "learning_rate": 2.9111102530709224e-05, "loss": 2.2918, "step": 9050 }, { "epoch": 0.115968, "grad_norm": 1.4765625, "learning_rate": 2.910904262601712e-05, "loss": 2.2691, "step": 9060 }, { "epoch": 0.116096, "grad_norm": 1.5, "learning_rate": 2.910698041035634e-05, "loss": 2.3089, "step": 9070 }, { "epoch": 0.116224, "grad_norm": 1.6171875, "learning_rate": 2.9104915884064663e-05, "loss": 2.3187, "step": 9080 }, { "epoch": 0.116352, "grad_norm": 1.609375, "learning_rate": 2.9102849047480245e-05, "loss": 2.3656, "step": 9090 }, { "epoch": 0.11648, "grad_norm": 1.6171875, "learning_rate": 2.910077990094162e-05, "loss": 2.3046, "step": 9100 }, { "epoch": 0.116608, "grad_norm": 1.4453125, "learning_rate": 2.9098708444787702e-05, "loss": 2.3197, "step": 9110 }, { "epoch": 0.116736, "grad_norm": 1.5234375, "learning_rate": 2.9096634679357776e-05, "loss": 2.2907, "step": 9120 }, { "epoch": 0.116864, "grad_norm": 1.6171875, "learning_rate": 2.9094558604991517e-05, "loss": 2.3332, "step": 9130 }, { "epoch": 0.116992, "grad_norm": 1.5625, "learning_rate": 2.9092480222028974e-05, "loss": 2.3356, "step": 9140 }, { "epoch": 0.11712, "grad_norm": 1.5390625, "learning_rate": 2.9090399530810566e-05, "loss": 2.3134, "step": 9150 }, { "epoch": 0.117248, "grad_norm": 1.6015625, "learning_rate": 2.90883165316771e-05, "loss": 2.3427, "step": 9160 }, { "epoch": 0.117376, "grad_norm": 1.5859375, "learning_rate": 2.908623122496976e-05, "loss": 2.3161, "step": 9170 }, { "epoch": 0.117504, "grad_norm": 1.5625, "learning_rate": 2.9084143611030103e-05, "loss": 2.3115, "step": 9180 }, { "epoch": 0.117632, "grad_norm": 1.484375, "learning_rate": 2.9082053690200066e-05, "loss": 2.3231, "step": 9190 }, { "epoch": 0.11776, "grad_norm": 1.609375, "learning_rate": 2.9079961462821963e-05, "loss": 2.3269, "step": 9200 }, { "epoch": 0.117888, "grad_norm": 1.8203125, "learning_rate": 2.9077866929238493e-05, "loss": 2.3295, "step": 9210 }, { "epoch": 0.118016, "grad_norm": 1.53125, "learning_rate": 2.9075770089792718e-05, "loss": 2.316, "step": 9220 }, { "epoch": 0.118144, "grad_norm": 1.640625, "learning_rate": 2.9073670944828092e-05, "loss": 2.3335, "step": 9230 }, { "epoch": 0.118272, "grad_norm": 1.5546875, "learning_rate": 2.9071569494688444e-05, "loss": 2.2961, "step": 9240 }, { "epoch": 0.1184, "grad_norm": 1.8046875, "learning_rate": 2.9069465739717977e-05, "loss": 2.3087, "step": 9250 }, { "epoch": 0.118528, "grad_norm": 1.625, "learning_rate": 2.9067359680261264e-05, "loss": 2.3358, "step": 9260 }, { "epoch": 0.118656, "grad_norm": 1.4921875, "learning_rate": 2.906525131666327e-05, "loss": 2.3124, "step": 9270 }, { "epoch": 0.118784, "grad_norm": 1.5, "learning_rate": 2.9063140649269334e-05, "loss": 2.3017, "step": 9280 }, { "epoch": 0.118912, "grad_norm": 1.6171875, "learning_rate": 2.9061027678425163e-05, "loss": 2.3172, "step": 9290 }, { "epoch": 0.11904, "grad_norm": 1.5, "learning_rate": 2.9058912404476857e-05, "loss": 2.2974, "step": 9300 }, { "epoch": 0.119168, "grad_norm": 1.4921875, "learning_rate": 2.9056794827770872e-05, "loss": 2.3004, "step": 9310 }, { "epoch": 0.119296, "grad_norm": 1.5546875, "learning_rate": 2.905467494865406e-05, "loss": 2.3229, "step": 9320 }, { "epoch": 0.119424, "grad_norm": 6.625, "learning_rate": 2.905255276747364e-05, "loss": 2.3165, "step": 9330 }, { "epoch": 0.119552, "grad_norm": 1.6015625, "learning_rate": 2.9050428284577214e-05, "loss": 2.3103, "step": 9340 }, { "epoch": 0.11968, "grad_norm": 1.5, "learning_rate": 2.904830150031276e-05, "loss": 2.308, "step": 9350 }, { "epoch": 0.119808, "grad_norm": 1.90625, "learning_rate": 2.9046172415028634e-05, "loss": 2.2822, "step": 9360 }, { "epoch": 0.119936, "grad_norm": 1.5390625, "learning_rate": 2.904404102907356e-05, "loss": 2.3528, "step": 9370 }, { "epoch": 0.120064, "grad_norm": 1.453125, "learning_rate": 2.904190734279664e-05, "loss": 2.321, "step": 9380 }, { "epoch": 0.120192, "grad_norm": 1.578125, "learning_rate": 2.903977135654737e-05, "loss": 2.3092, "step": 9390 }, { "epoch": 0.12032, "grad_norm": 1.6171875, "learning_rate": 2.90376330706756e-05, "loss": 2.344, "step": 9400 }, { "epoch": 0.120448, "grad_norm": 1.4921875, "learning_rate": 2.9035492485531576e-05, "loss": 2.326, "step": 9410 }, { "epoch": 0.120576, "grad_norm": 1.4921875, "learning_rate": 2.903334960146591e-05, "loss": 2.3176, "step": 9420 }, { "epoch": 0.120704, "grad_norm": 1.6015625, "learning_rate": 2.9031204418829588e-05, "loss": 2.3117, "step": 9430 }, { "epoch": 0.120832, "grad_norm": 1.4453125, "learning_rate": 2.9029056937973978e-05, "loss": 2.3436, "step": 9440 }, { "epoch": 0.12096, "grad_norm": 1.6328125, "learning_rate": 2.9026907159250828e-05, "loss": 2.3073, "step": 9450 }, { "epoch": 0.121088, "grad_norm": 1.3828125, "learning_rate": 2.902475508301225e-05, "loss": 2.3218, "step": 9460 }, { "epoch": 0.121216, "grad_norm": 1.59375, "learning_rate": 2.9022600709610748e-05, "loss": 2.3244, "step": 9470 }, { "epoch": 0.121344, "grad_norm": 1.4765625, "learning_rate": 2.9020444039399186e-05, "loss": 2.3166, "step": 9480 }, { "epoch": 0.121472, "grad_norm": 9.8125, "learning_rate": 2.901828507273082e-05, "loss": 2.308, "step": 9490 }, { "epoch": 0.1216, "grad_norm": 1.4140625, "learning_rate": 2.9016123809959275e-05, "loss": 2.3333, "step": 9500 }, { "epoch": 0.121728, "grad_norm": 1.4609375, "learning_rate": 2.901396025143854e-05, "loss": 2.3386, "step": 9510 }, { "epoch": 0.121856, "grad_norm": 1.5390625, "learning_rate": 2.9011794397523007e-05, "loss": 2.3119, "step": 9520 }, { "epoch": 0.121984, "grad_norm": 2.734375, "learning_rate": 2.9009626248567414e-05, "loss": 2.3254, "step": 9530 }, { "epoch": 0.122112, "grad_norm": 1.5078125, "learning_rate": 2.9007455804926902e-05, "loss": 2.312, "step": 9540 }, { "epoch": 0.12224, "grad_norm": 1.5234375, "learning_rate": 2.900528306695697e-05, "loss": 2.3037, "step": 9550 }, { "epoch": 0.122368, "grad_norm": 1.5546875, "learning_rate": 2.9003108035013498e-05, "loss": 2.3268, "step": 9560 }, { "epoch": 0.122496, "grad_norm": 1.4375, "learning_rate": 2.9000930709452744e-05, "loss": 2.3237, "step": 9570 }, { "epoch": 0.122624, "grad_norm": 1.5234375, "learning_rate": 2.8998751090631336e-05, "loss": 2.3395, "step": 9580 }, { "epoch": 0.122752, "grad_norm": 1.9375, "learning_rate": 2.8996569178906286e-05, "loss": 2.306, "step": 9590 }, { "epoch": 0.12288, "grad_norm": 1.5546875, "learning_rate": 2.8994384974634975e-05, "loss": 2.2851, "step": 9600 }, { "epoch": 0.123008, "grad_norm": 1.6171875, "learning_rate": 2.8992198478175162e-05, "loss": 2.3393, "step": 9610 }, { "epoch": 0.123136, "grad_norm": 1.578125, "learning_rate": 2.8990009689884974e-05, "loss": 2.299, "step": 9620 }, { "epoch": 0.123264, "grad_norm": 1.6171875, "learning_rate": 2.8987818610122927e-05, "loss": 2.2975, "step": 9630 }, { "epoch": 0.123392, "grad_norm": 1.765625, "learning_rate": 2.898562523924791e-05, "loss": 2.3491, "step": 9640 }, { "epoch": 0.12352, "grad_norm": 1.421875, "learning_rate": 2.898342957761917e-05, "loss": 2.317, "step": 9650 }, { "epoch": 0.123648, "grad_norm": 1.4609375, "learning_rate": 2.8981231625596354e-05, "loss": 2.3692, "step": 9660 }, { "epoch": 0.123776, "grad_norm": 1.6484375, "learning_rate": 2.8979031383539462e-05, "loss": 2.3293, "step": 9670 }, { "epoch": 0.123904, "grad_norm": 1.421875, "learning_rate": 2.897682885180889e-05, "loss": 2.3296, "step": 9680 }, { "epoch": 0.124032, "grad_norm": 1.4609375, "learning_rate": 2.897462403076539e-05, "loss": 2.3392, "step": 9690 }, { "epoch": 0.12416, "grad_norm": 1.5390625, "learning_rate": 2.8972416920770097e-05, "loss": 2.3021, "step": 9700 }, { "epoch": 0.124288, "grad_norm": 1.46875, "learning_rate": 2.897020752218453e-05, "loss": 2.3526, "step": 9710 }, { "epoch": 0.124416, "grad_norm": 1.5625, "learning_rate": 2.8967995835370565e-05, "loss": 2.3163, "step": 9720 }, { "epoch": 0.124544, "grad_norm": 1.4921875, "learning_rate": 2.8965781860690464e-05, "loss": 2.2941, "step": 9730 }, { "epoch": 0.124672, "grad_norm": 1.40625, "learning_rate": 2.8963565598506863e-05, "loss": 2.324, "step": 9740 }, { "epoch": 0.1248, "grad_norm": 1.46875, "learning_rate": 2.8961347049182767e-05, "loss": 2.3163, "step": 9750 }, { "epoch": 0.124928, "grad_norm": 1.4765625, "learning_rate": 2.895912621308157e-05, "loss": 2.3561, "step": 9760 }, { "epoch": 0.125056, "grad_norm": 1.6328125, "learning_rate": 2.8956903090567018e-05, "loss": 2.2871, "step": 9770 }, { "epoch": 0.125184, "grad_norm": 1.5078125, "learning_rate": 2.8954677682003254e-05, "loss": 2.3022, "step": 9780 }, { "epoch": 0.125312, "grad_norm": 1.6171875, "learning_rate": 2.895244998775478e-05, "loss": 2.3412, "step": 9790 }, { "epoch": 0.12544, "grad_norm": 1.4765625, "learning_rate": 2.895022000818648e-05, "loss": 2.3202, "step": 9800 }, { "epoch": 0.125568, "grad_norm": 1.6015625, "learning_rate": 2.8947987743663603e-05, "loss": 2.3294, "step": 9810 }, { "epoch": 0.125696, "grad_norm": 1.5625, "learning_rate": 2.8945753194551787e-05, "loss": 2.3018, "step": 9820 }, { "epoch": 0.125824, "grad_norm": 1.6171875, "learning_rate": 2.894351636121704e-05, "loss": 2.329, "step": 9830 }, { "epoch": 0.125952, "grad_norm": 1.4609375, "learning_rate": 2.8941277244025727e-05, "loss": 2.3278, "step": 9840 }, { "epoch": 0.12608, "grad_norm": 1.4609375, "learning_rate": 2.8939035843344618e-05, "loss": 2.304, "step": 9850 }, { "epoch": 0.126208, "grad_norm": 1.4609375, "learning_rate": 2.8936792159540823e-05, "loss": 2.3265, "step": 9860 }, { "epoch": 0.126336, "grad_norm": 2.546875, "learning_rate": 2.8934546192981856e-05, "loss": 2.317, "step": 9870 }, { "epoch": 0.126464, "grad_norm": 1.4921875, "learning_rate": 2.8932297944035583e-05, "loss": 2.2978, "step": 9880 }, { "epoch": 0.126592, "grad_norm": 1.484375, "learning_rate": 2.893004741307025e-05, "loss": 2.3104, "step": 9890 }, { "epoch": 0.12672, "grad_norm": 1.515625, "learning_rate": 2.8927794600454495e-05, "loss": 2.3151, "step": 9900 }, { "epoch": 0.126848, "grad_norm": 1.9375, "learning_rate": 2.8925539506557296e-05, "loss": 2.2776, "step": 9910 }, { "epoch": 0.126976, "grad_norm": 1.3984375, "learning_rate": 2.892328213174803e-05, "loss": 2.2988, "step": 9920 }, { "epoch": 0.127104, "grad_norm": 1.578125, "learning_rate": 2.8921022476396442e-05, "loss": 2.2958, "step": 9930 }, { "epoch": 0.127232, "grad_norm": 1.5390625, "learning_rate": 2.891876054087265e-05, "loss": 2.2947, "step": 9940 }, { "epoch": 0.12736, "grad_norm": 1.5859375, "learning_rate": 2.8916496325547138e-05, "loss": 2.3331, "step": 9950 }, { "epoch": 0.127488, "grad_norm": 1.5078125, "learning_rate": 2.8914229830790773e-05, "loss": 2.3172, "step": 9960 }, { "epoch": 0.127616, "grad_norm": 1.65625, "learning_rate": 2.8911961056974794e-05, "loss": 2.3285, "step": 9970 }, { "epoch": 0.127744, "grad_norm": 1.625, "learning_rate": 2.8909690004470807e-05, "loss": 2.3, "step": 9980 }, { "epoch": 0.127872, "grad_norm": 1.4765625, "learning_rate": 2.8907416673650796e-05, "loss": 2.3236, "step": 9990 }, { "epoch": 0.128, "grad_norm": 1.4609375, "learning_rate": 2.8905141064887123e-05, "loss": 2.3295, "step": 10000 }, { "epoch": 0.128128, "grad_norm": 1.484375, "learning_rate": 2.8902863178552512e-05, "loss": 2.2943, "step": 10010 }, { "epoch": 0.128256, "grad_norm": 1.578125, "learning_rate": 2.890058301502007e-05, "loss": 2.3163, "step": 10020 }, { "epoch": 0.128384, "grad_norm": 1.7890625, "learning_rate": 2.8898300574663266e-05, "loss": 2.3281, "step": 10030 }, { "epoch": 0.128512, "grad_norm": 1.5859375, "learning_rate": 2.889601585785596e-05, "loss": 2.3053, "step": 10040 }, { "epoch": 0.12864, "grad_norm": 1.40625, "learning_rate": 2.8893728864972364e-05, "loss": 2.3278, "step": 10050 }, { "epoch": 0.128768, "grad_norm": 1.546875, "learning_rate": 2.8891439596387075e-05, "loss": 2.3067, "step": 10060 }, { "epoch": 0.128896, "grad_norm": 1.515625, "learning_rate": 2.888914805247506e-05, "loss": 2.2752, "step": 10070 }, { "epoch": 0.129024, "grad_norm": 1.4921875, "learning_rate": 2.888685423361166e-05, "loss": 2.3148, "step": 10080 }, { "epoch": 0.129152, "grad_norm": 1.640625, "learning_rate": 2.888455814017259e-05, "loss": 2.3471, "step": 10090 }, { "epoch": 0.12928, "grad_norm": 1.640625, "learning_rate": 2.8882259772533927e-05, "loss": 2.2857, "step": 10100 }, { "epoch": 0.129408, "grad_norm": 1.640625, "learning_rate": 2.8879959131072137e-05, "loss": 2.306, "step": 10110 }, { "epoch": 0.129536, "grad_norm": 1.375, "learning_rate": 2.8877656216164047e-05, "loss": 2.3224, "step": 10120 }, { "epoch": 0.129664, "grad_norm": 1.7109375, "learning_rate": 2.887535102818686e-05, "loss": 2.3262, "step": 10130 }, { "epoch": 0.129792, "grad_norm": 1.6875, "learning_rate": 2.8873043567518147e-05, "loss": 2.3281, "step": 10140 }, { "epoch": 0.12992, "grad_norm": 1.78125, "learning_rate": 2.887073383453586e-05, "loss": 2.3012, "step": 10150 }, { "epoch": 0.130048, "grad_norm": 1.5859375, "learning_rate": 2.8868421829618313e-05, "loss": 2.2828, "step": 10160 }, { "epoch": 0.130176, "grad_norm": 1.4375, "learning_rate": 2.8866107553144204e-05, "loss": 2.324, "step": 10170 }, { "epoch": 0.130304, "grad_norm": 1.546875, "learning_rate": 2.8863791005492594e-05, "loss": 2.3366, "step": 10180 }, { "epoch": 0.130432, "grad_norm": 1.453125, "learning_rate": 2.8861472187042908e-05, "loss": 2.3552, "step": 10190 }, { "epoch": 0.13056, "grad_norm": 1.5546875, "learning_rate": 2.885915109817497e-05, "loss": 2.3038, "step": 10200 }, { "epoch": 0.130688, "grad_norm": 1.4453125, "learning_rate": 2.885682773926895e-05, "loss": 2.336, "step": 10210 }, { "epoch": 0.130816, "grad_norm": 1.5234375, "learning_rate": 2.8854502110705395e-05, "loss": 2.3169, "step": 10220 }, { "epoch": 0.130944, "grad_norm": 1.5078125, "learning_rate": 2.8852174212865235e-05, "loss": 2.3213, "step": 10230 }, { "epoch": 0.131072, "grad_norm": 1.71875, "learning_rate": 2.8849844046129763e-05, "loss": 2.3068, "step": 10240 }, { "epoch": 0.1312, "grad_norm": 1.5234375, "learning_rate": 2.8847511610880644e-05, "loss": 2.3116, "step": 10250 }, { "epoch": 0.131328, "grad_norm": 1.4453125, "learning_rate": 2.8845176907499916e-05, "loss": 2.2968, "step": 10260 }, { "epoch": 0.131456, "grad_norm": 1.546875, "learning_rate": 2.8842839936369988e-05, "loss": 2.2805, "step": 10270 }, { "epoch": 0.131584, "grad_norm": 1.6015625, "learning_rate": 2.884050069787364e-05, "loss": 2.2913, "step": 10280 }, { "epoch": 0.131712, "grad_norm": 1.53125, "learning_rate": 2.8838159192394022e-05, "loss": 2.3502, "step": 10290 }, { "epoch": 0.13184, "grad_norm": 1.5546875, "learning_rate": 2.8835815420314666e-05, "loss": 2.3312, "step": 10300 }, { "epoch": 0.131968, "grad_norm": 1.5, "learning_rate": 2.8833469382019455e-05, "loss": 2.3218, "step": 10310 }, { "epoch": 0.132096, "grad_norm": 1.4375, "learning_rate": 2.8831121077892662e-05, "loss": 2.3037, "step": 10320 }, { "epoch": 0.132224, "grad_norm": 1.53125, "learning_rate": 2.8828770508318917e-05, "loss": 2.3105, "step": 10330 }, { "epoch": 0.132352, "grad_norm": 1.546875, "learning_rate": 2.882641767368324e-05, "loss": 2.327, "step": 10340 }, { "epoch": 0.13248, "grad_norm": 1.546875, "learning_rate": 2.8824062574371e-05, "loss": 2.3212, "step": 10350 }, { "epoch": 0.132608, "grad_norm": 1.5546875, "learning_rate": 2.8821705210767946e-05, "loss": 2.2966, "step": 10360 }, { "epoch": 0.132736, "grad_norm": 1.546875, "learning_rate": 2.8819345583260206e-05, "loss": 2.3124, "step": 10370 }, { "epoch": 0.132864, "grad_norm": 1.578125, "learning_rate": 2.8816983692234263e-05, "loss": 2.2964, "step": 10380 }, { "epoch": 0.132992, "grad_norm": 1.4921875, "learning_rate": 2.8814619538076984e-05, "loss": 2.2898, "step": 10390 }, { "epoch": 0.13312, "grad_norm": 1.5078125, "learning_rate": 2.8812253121175605e-05, "loss": 2.3284, "step": 10400 }, { "epoch": 0.133248, "grad_norm": 1.578125, "learning_rate": 2.8809884441917725e-05, "loss": 2.2821, "step": 10410 }, { "epoch": 0.133376, "grad_norm": 1.5078125, "learning_rate": 2.880751350069132e-05, "loss": 2.3088, "step": 10420 }, { "epoch": 0.133504, "grad_norm": 1.5078125, "learning_rate": 2.8805140297884732e-05, "loss": 2.3133, "step": 10430 }, { "epoch": 0.133632, "grad_norm": 1.3828125, "learning_rate": 2.880276483388668e-05, "loss": 2.3077, "step": 10440 }, { "epoch": 0.13376, "grad_norm": 1.4921875, "learning_rate": 2.8800387109086247e-05, "loss": 2.3407, "step": 10450 }, { "epoch": 0.133888, "grad_norm": 1.671875, "learning_rate": 2.8798007123872887e-05, "loss": 2.3117, "step": 10460 }, { "epoch": 0.134016, "grad_norm": 1.5546875, "learning_rate": 2.8795624878636432e-05, "loss": 2.3375, "step": 10470 }, { "epoch": 0.134144, "grad_norm": 1.578125, "learning_rate": 2.879324037376707e-05, "loss": 2.3407, "step": 10480 }, { "epoch": 0.134272, "grad_norm": 1.421875, "learning_rate": 2.8790853609655373e-05, "loss": 2.3018, "step": 10490 }, { "epoch": 0.1344, "grad_norm": 1.5, "learning_rate": 2.8788464586692277e-05, "loss": 2.2714, "step": 10500 }, { "epoch": 0.134528, "grad_norm": 1.453125, "learning_rate": 2.878607330526909e-05, "loss": 2.3347, "step": 10510 }, { "epoch": 0.134656, "grad_norm": 1.4765625, "learning_rate": 2.8783679765777483e-05, "loss": 2.3312, "step": 10520 }, { "epoch": 0.134784, "grad_norm": 1.375, "learning_rate": 2.8781283968609502e-05, "loss": 2.3171, "step": 10530 }, { "epoch": 0.134912, "grad_norm": 1.484375, "learning_rate": 2.877888591415757e-05, "loss": 2.3323, "step": 10540 }, { "epoch": 0.13504, "grad_norm": 1.5234375, "learning_rate": 2.8776485602814463e-05, "loss": 2.3213, "step": 10550 }, { "epoch": 0.135168, "grad_norm": 1.65625, "learning_rate": 2.8774083034973343e-05, "loss": 2.2957, "step": 10560 }, { "epoch": 0.135296, "grad_norm": 1.484375, "learning_rate": 2.8771678211027733e-05, "loss": 2.326, "step": 10570 }, { "epoch": 0.135424, "grad_norm": 1.59375, "learning_rate": 2.876927113137153e-05, "loss": 2.2777, "step": 10580 }, { "epoch": 0.135552, "grad_norm": 1.546875, "learning_rate": 2.8766861796398992e-05, "loss": 2.3168, "step": 10590 }, { "epoch": 0.13568, "grad_norm": 1.453125, "learning_rate": 2.8764450206504758e-05, "loss": 2.3084, "step": 10600 }, { "epoch": 0.135808, "grad_norm": 1.453125, "learning_rate": 2.876203636208383e-05, "loss": 2.3504, "step": 10610 }, { "epoch": 0.135936, "grad_norm": 1.4921875, "learning_rate": 2.875962026353157e-05, "loss": 2.3268, "step": 10620 }, { "epoch": 0.136064, "grad_norm": 1.5859375, "learning_rate": 2.8757201911243736e-05, "loss": 2.3216, "step": 10630 }, { "epoch": 0.136192, "grad_norm": 2.6875, "learning_rate": 2.8754781305616422e-05, "loss": 2.3234, "step": 10640 }, { "epoch": 0.13632, "grad_norm": 2.203125, "learning_rate": 2.875235844704612e-05, "loss": 2.293, "step": 10650 }, { "epoch": 0.136448, "grad_norm": 1.5703125, "learning_rate": 2.8749933335929673e-05, "loss": 2.2833, "step": 10660 }, { "epoch": 0.136576, "grad_norm": 1.46875, "learning_rate": 2.8747505972664297e-05, "loss": 2.3214, "step": 10670 }, { "epoch": 0.136704, "grad_norm": 1.515625, "learning_rate": 2.874507635764758e-05, "loss": 2.3282, "step": 10680 }, { "epoch": 0.136832, "grad_norm": 1.59375, "learning_rate": 2.8742644491277475e-05, "loss": 2.3083, "step": 10690 }, { "epoch": 0.13696, "grad_norm": 1.65625, "learning_rate": 2.874021037395231e-05, "loss": 2.3222, "step": 10700 }, { "epoch": 0.137088, "grad_norm": 1.5390625, "learning_rate": 2.8737774006070777e-05, "loss": 2.3455, "step": 10710 }, { "epoch": 0.137216, "grad_norm": 1.578125, "learning_rate": 2.8735335388031927e-05, "loss": 2.3155, "step": 10720 }, { "epoch": 0.137344, "grad_norm": 1.484375, "learning_rate": 2.8732894520235206e-05, "loss": 2.3445, "step": 10730 }, { "epoch": 0.137472, "grad_norm": 1.453125, "learning_rate": 2.8730451403080396e-05, "loss": 2.3239, "step": 10740 }, { "epoch": 0.1376, "grad_norm": 1.484375, "learning_rate": 2.872800603696768e-05, "loss": 2.3236, "step": 10750 }, { "epoch": 0.137728, "grad_norm": 1.7109375, "learning_rate": 2.8725558422297576e-05, "loss": 2.2854, "step": 10760 }, { "epoch": 0.137856, "grad_norm": 1.5234375, "learning_rate": 2.8723108559471e-05, "loss": 2.3069, "step": 10770 }, { "epoch": 0.137984, "grad_norm": 1.5625, "learning_rate": 2.8720656448889217e-05, "loss": 2.3226, "step": 10780 }, { "epoch": 0.138112, "grad_norm": 1.5234375, "learning_rate": 2.871820209095387e-05, "loss": 2.2969, "step": 10790 }, { "epoch": 0.13824, "grad_norm": 1.4921875, "learning_rate": 2.8715745486066967e-05, "loss": 2.2986, "step": 10800 }, { "epoch": 0.138368, "grad_norm": 1.5625, "learning_rate": 2.871328663463088e-05, "loss": 2.3299, "step": 10810 }, { "epoch": 0.138496, "grad_norm": 1.4296875, "learning_rate": 2.8710825537048353e-05, "loss": 2.3011, "step": 10820 }, { "epoch": 0.138624, "grad_norm": 6.09375, "learning_rate": 2.8708362193722504e-05, "loss": 2.2952, "step": 10830 }, { "epoch": 0.138752, "grad_norm": 1.5703125, "learning_rate": 2.87058966050568e-05, "loss": 2.3197, "step": 10840 }, { "epoch": 0.13888, "grad_norm": 1.4921875, "learning_rate": 2.8703428771455105e-05, "loss": 2.3196, "step": 10850 }, { "epoch": 0.139008, "grad_norm": 1.5859375, "learning_rate": 2.870095869332162e-05, "loss": 2.3081, "step": 10860 }, { "epoch": 0.139136, "grad_norm": 3.671875, "learning_rate": 2.8698486371060938e-05, "loss": 2.3088, "step": 10870 }, { "epoch": 0.139264, "grad_norm": 1.71875, "learning_rate": 2.8696011805078e-05, "loss": 2.3067, "step": 10880 }, { "epoch": 0.139392, "grad_norm": 1.4296875, "learning_rate": 2.869353499577813e-05, "loss": 2.3255, "step": 10890 }, { "epoch": 0.13952, "grad_norm": 1.4375, "learning_rate": 2.8691055943567003e-05, "loss": 2.3136, "step": 10900 }, { "epoch": 0.139648, "grad_norm": 2.3125, "learning_rate": 2.868857464885069e-05, "loss": 2.2892, "step": 10910 }, { "epoch": 0.139776, "grad_norm": 2.765625, "learning_rate": 2.8686091112035596e-05, "loss": 2.3048, "step": 10920 }, { "epoch": 0.139904, "grad_norm": 1.5234375, "learning_rate": 2.8683605333528508e-05, "loss": 2.3095, "step": 10930 }, { "epoch": 0.140032, "grad_norm": 1.59375, "learning_rate": 2.8681117313736588e-05, "loss": 2.3027, "step": 10940 }, { "epoch": 0.14016, "grad_norm": 1.5703125, "learning_rate": 2.8678627053067353e-05, "loss": 2.3026, "step": 10950 }, { "epoch": 0.140288, "grad_norm": 1.5, "learning_rate": 2.8676134551928692e-05, "loss": 2.2958, "step": 10960 }, { "epoch": 0.140416, "grad_norm": 1.453125, "learning_rate": 2.8673639810728864e-05, "loss": 2.3043, "step": 10970 }, { "epoch": 0.140544, "grad_norm": 1.5625, "learning_rate": 2.867114282987648e-05, "loss": 2.3342, "step": 10980 }, { "epoch": 0.140672, "grad_norm": 1.484375, "learning_rate": 2.8668643609780546e-05, "loss": 2.3007, "step": 10990 }, { "epoch": 0.1408, "grad_norm": 1.5078125, "learning_rate": 2.8666142150850403e-05, "loss": 2.2858, "step": 11000 }, { "epoch": 0.140928, "grad_norm": 2.1875, "learning_rate": 2.866363845349578e-05, "loss": 2.3487, "step": 11010 }, { "epoch": 0.141056, "grad_norm": 1.421875, "learning_rate": 2.8661132518126764e-05, "loss": 2.3048, "step": 11020 }, { "epoch": 0.141184, "grad_norm": 1.765625, "learning_rate": 2.8658624345153813e-05, "loss": 2.3243, "step": 11030 }, { "epoch": 0.141312, "grad_norm": 1.671875, "learning_rate": 2.865611393498775e-05, "loss": 2.2937, "step": 11040 }, { "epoch": 0.14144, "grad_norm": 1.5625, "learning_rate": 2.865360128803976e-05, "loss": 2.2897, "step": 11050 }, { "epoch": 0.141568, "grad_norm": 1.8125, "learning_rate": 2.86510864047214e-05, "loss": 2.3012, "step": 11060 }, { "epoch": 0.141696, "grad_norm": 1.546875, "learning_rate": 2.8648569285444595e-05, "loss": 2.2914, "step": 11070 }, { "epoch": 0.141824, "grad_norm": 1.390625, "learning_rate": 2.864604993062163e-05, "loss": 2.3224, "step": 11080 }, { "epoch": 0.141952, "grad_norm": 1.46875, "learning_rate": 2.8643528340665157e-05, "loss": 2.2831, "step": 11090 }, { "epoch": 0.14208, "grad_norm": 1.5390625, "learning_rate": 2.8641004515988192e-05, "loss": 2.3227, "step": 11100 }, { "epoch": 0.142208, "grad_norm": 1.5703125, "learning_rate": 2.8638478457004128e-05, "loss": 2.3295, "step": 11110 }, { "epoch": 0.142336, "grad_norm": 1.5078125, "learning_rate": 2.8635950164126716e-05, "loss": 2.3137, "step": 11120 }, { "epoch": 0.142464, "grad_norm": 1.6015625, "learning_rate": 2.8633419637770074e-05, "loss": 2.3016, "step": 11130 }, { "epoch": 0.142592, "grad_norm": 1.5390625, "learning_rate": 2.8630886878348686e-05, "loss": 2.2843, "step": 11140 }, { "epoch": 0.14272, "grad_norm": 2.203125, "learning_rate": 2.8628351886277394e-05, "loss": 2.2889, "step": 11150 }, { "epoch": 0.142848, "grad_norm": 1.4609375, "learning_rate": 2.862581466197142e-05, "loss": 2.3073, "step": 11160 }, { "epoch": 0.142976, "grad_norm": 1.5859375, "learning_rate": 2.8623275205846346e-05, "loss": 2.3226, "step": 11170 }, { "epoch": 0.143104, "grad_norm": 1.6015625, "learning_rate": 2.8620733518318114e-05, "loss": 2.3625, "step": 11180 }, { "epoch": 0.143232, "grad_norm": 1.5703125, "learning_rate": 2.8618189599803037e-05, "loss": 2.3405, "step": 11190 }, { "epoch": 0.14336, "grad_norm": 1.546875, "learning_rate": 2.8615643450717793e-05, "loss": 2.3316, "step": 11200 }, { "epoch": 0.143488, "grad_norm": 1.6015625, "learning_rate": 2.861309507147943e-05, "loss": 2.3224, "step": 11210 }, { "epoch": 0.143616, "grad_norm": 1.40625, "learning_rate": 2.8610544462505343e-05, "loss": 2.3225, "step": 11220 }, { "epoch": 0.143744, "grad_norm": 1.4375, "learning_rate": 2.8607991624213314e-05, "loss": 2.3281, "step": 11230 }, { "epoch": 0.143872, "grad_norm": 1.484375, "learning_rate": 2.860543655702148e-05, "loss": 2.2953, "step": 11240 }, { "epoch": 0.144, "grad_norm": 1.6171875, "learning_rate": 2.8602879261348343e-05, "loss": 2.3109, "step": 11250 }, { "epoch": 0.144128, "grad_norm": 1.5859375, "learning_rate": 2.8600319737612767e-05, "loss": 2.3211, "step": 11260 }, { "epoch": 0.144256, "grad_norm": 1.8125, "learning_rate": 2.8597757986233994e-05, "loss": 2.2829, "step": 11270 }, { "epoch": 0.144384, "grad_norm": 1.4765625, "learning_rate": 2.859519400763162e-05, "loss": 2.3021, "step": 11280 }, { "epoch": 0.144512, "grad_norm": 1.4609375, "learning_rate": 2.85926278022256e-05, "loss": 2.2891, "step": 11290 }, { "epoch": 0.14464, "grad_norm": 1.640625, "learning_rate": 2.8590059370436277e-05, "loss": 2.3417, "step": 11300 }, { "epoch": 0.144768, "grad_norm": 1.4375, "learning_rate": 2.8587488712684324e-05, "loss": 2.3459, "step": 11310 }, { "epoch": 0.144896, "grad_norm": 1.4921875, "learning_rate": 2.8584915829390815e-05, "loss": 2.2761, "step": 11320 }, { "epoch": 0.145024, "grad_norm": 1.46875, "learning_rate": 2.858234072097716e-05, "loss": 2.2769, "step": 11330 }, { "epoch": 0.145152, "grad_norm": 1.5078125, "learning_rate": 2.8579763387865147e-05, "loss": 2.3194, "step": 11340 }, { "epoch": 0.14528, "grad_norm": 1.5078125, "learning_rate": 2.857718383047693e-05, "loss": 2.3019, "step": 11350 }, { "epoch": 0.145408, "grad_norm": 4.125, "learning_rate": 2.8574602049235025e-05, "loss": 2.3034, "step": 11360 }, { "epoch": 0.145536, "grad_norm": 1.515625, "learning_rate": 2.8572018044562305e-05, "loss": 2.3454, "step": 11370 }, { "epoch": 0.145664, "grad_norm": 1.5, "learning_rate": 2.8569431816882016e-05, "loss": 2.3373, "step": 11380 }, { "epoch": 0.145792, "grad_norm": 1.515625, "learning_rate": 2.8566843366617767e-05, "loss": 2.3024, "step": 11390 }, { "epoch": 0.14592, "grad_norm": 1.5234375, "learning_rate": 2.856425269419352e-05, "loss": 2.3129, "step": 11400 }, { "epoch": 0.146048, "grad_norm": 1.5, "learning_rate": 2.8561659800033626e-05, "loss": 2.301, "step": 11410 }, { "epoch": 0.146176, "grad_norm": 1.3515625, "learning_rate": 2.8559064684562772e-05, "loss": 2.3259, "step": 11420 }, { "epoch": 0.146304, "grad_norm": 1.6015625, "learning_rate": 2.8556467348206022e-05, "loss": 2.3245, "step": 11430 }, { "epoch": 0.146432, "grad_norm": 1.4609375, "learning_rate": 2.8553867791388806e-05, "loss": 2.294, "step": 11440 }, { "epoch": 0.14656, "grad_norm": 1.671875, "learning_rate": 2.855126601453691e-05, "loss": 2.3403, "step": 11450 }, { "epoch": 0.146688, "grad_norm": 1.734375, "learning_rate": 2.8548662018076496e-05, "loss": 2.3359, "step": 11460 }, { "epoch": 0.146816, "grad_norm": 1.6015625, "learning_rate": 2.8546055802434075e-05, "loss": 2.3188, "step": 11470 }, { "epoch": 0.146944, "grad_norm": 1.8203125, "learning_rate": 2.854344736803653e-05, "loss": 2.3603, "step": 11480 }, { "epoch": 0.147072, "grad_norm": 1.6796875, "learning_rate": 2.8540836715311106e-05, "loss": 2.2972, "step": 11490 }, { "epoch": 0.1472, "grad_norm": 1.59375, "learning_rate": 2.8538223844685406e-05, "loss": 2.3155, "step": 11500 }, { "epoch": 0.147328, "grad_norm": 1.578125, "learning_rate": 2.8535608756587406e-05, "loss": 2.3456, "step": 11510 }, { "epoch": 0.147456, "grad_norm": 1.4921875, "learning_rate": 2.8532991451445442e-05, "loss": 2.341, "step": 11520 }, { "epoch": 0.147584, "grad_norm": 1.4765625, "learning_rate": 2.8530371929688206e-05, "loss": 2.3448, "step": 11530 }, { "epoch": 0.147712, "grad_norm": 1.40625, "learning_rate": 2.852775019174476e-05, "loss": 2.3087, "step": 11540 }, { "epoch": 0.14784, "grad_norm": 1.5, "learning_rate": 2.8525126238044536e-05, "loss": 2.3131, "step": 11550 }, { "epoch": 0.147968, "grad_norm": 1.46875, "learning_rate": 2.8522500069017305e-05, "loss": 2.3351, "step": 11560 }, { "epoch": 0.148096, "grad_norm": 1.390625, "learning_rate": 2.851987168509323e-05, "loss": 2.3407, "step": 11570 }, { "epoch": 0.148224, "grad_norm": 1.4453125, "learning_rate": 2.8517241086702815e-05, "loss": 2.2976, "step": 11580 }, { "epoch": 0.148352, "grad_norm": 1.6171875, "learning_rate": 2.8514608274276937e-05, "loss": 2.2937, "step": 11590 }, { "epoch": 0.14848, "grad_norm": 1.546875, "learning_rate": 2.8511973248246834e-05, "loss": 2.3382, "step": 11600 }, { "epoch": 0.148608, "grad_norm": 1.4296875, "learning_rate": 2.8509336009044106e-05, "loss": 2.2753, "step": 11610 }, { "epoch": 0.148736, "grad_norm": 1.5234375, "learning_rate": 2.8506696557100713e-05, "loss": 2.3194, "step": 11620 }, { "epoch": 0.148864, "grad_norm": 4.34375, "learning_rate": 2.8504054892848984e-05, "loss": 2.2967, "step": 11630 }, { "epoch": 0.148992, "grad_norm": 1.5546875, "learning_rate": 2.8501411016721604e-05, "loss": 2.2979, "step": 11640 }, { "epoch": 0.14912, "grad_norm": 1.46875, "learning_rate": 2.8498764929151626e-05, "loss": 2.3476, "step": 11650 }, { "epoch": 0.149248, "grad_norm": 1.421875, "learning_rate": 2.849611663057245e-05, "loss": 2.335, "step": 11660 }, { "epoch": 0.149376, "grad_norm": 1.65625, "learning_rate": 2.8493466121417868e-05, "loss": 2.3373, "step": 11670 }, { "epoch": 0.149504, "grad_norm": 1.453125, "learning_rate": 2.8490813402122004e-05, "loss": 2.3004, "step": 11680 }, { "epoch": 0.149632, "grad_norm": 1.6484375, "learning_rate": 2.8488158473119354e-05, "loss": 2.3138, "step": 11690 }, { "epoch": 0.14976, "grad_norm": 1.484375, "learning_rate": 2.8485501334844785e-05, "loss": 2.3045, "step": 11700 }, { "epoch": 0.149888, "grad_norm": 1.484375, "learning_rate": 2.848284198773352e-05, "loss": 2.3254, "step": 11710 }, { "epoch": 0.150016, "grad_norm": 1.4765625, "learning_rate": 2.848018043222114e-05, "loss": 2.3207, "step": 11720 }, { "epoch": 0.150144, "grad_norm": 2.5625, "learning_rate": 2.8477516668743584e-05, "loss": 2.3503, "step": 11730 }, { "epoch": 0.150272, "grad_norm": 1.5234375, "learning_rate": 2.8474850697737164e-05, "loss": 2.2833, "step": 11740 }, { "epoch": 0.1504, "grad_norm": 1.4609375, "learning_rate": 2.847218251963855e-05, "loss": 2.2889, "step": 11750 }, { "epoch": 0.150528, "grad_norm": 1.4296875, "learning_rate": 2.8469512134884778e-05, "loss": 2.2905, "step": 11760 }, { "epoch": 0.150656, "grad_norm": 1.5390625, "learning_rate": 2.8466839543913225e-05, "loss": 2.3426, "step": 11770 }, { "epoch": 0.150784, "grad_norm": 1.59375, "learning_rate": 2.846416474716165e-05, "loss": 2.3094, "step": 11780 }, { "epoch": 0.150912, "grad_norm": 1.59375, "learning_rate": 2.846148774506817e-05, "loss": 2.3162, "step": 11790 }, { "epoch": 0.15104, "grad_norm": 1.5859375, "learning_rate": 2.8458808538071267e-05, "loss": 2.3114, "step": 11800 }, { "epoch": 0.151168, "grad_norm": 1.46875, "learning_rate": 2.845612712660976e-05, "loss": 2.3245, "step": 11810 }, { "epoch": 0.151296, "grad_norm": 1.5546875, "learning_rate": 2.8453443511122857e-05, "loss": 2.2959, "step": 11820 }, { "epoch": 0.151424, "grad_norm": 2.15625, "learning_rate": 2.845075769205012e-05, "loss": 2.2985, "step": 11830 }, { "epoch": 0.151552, "grad_norm": 2.4375, "learning_rate": 2.8448069669831464e-05, "loss": 2.2962, "step": 11840 }, { "epoch": 0.15168, "grad_norm": 1.5390625, "learning_rate": 2.8445379444907165e-05, "loss": 2.3069, "step": 11850 }, { "epoch": 0.151808, "grad_norm": 1.7265625, "learning_rate": 2.8442687017717875e-05, "loss": 2.2756, "step": 11860 }, { "epoch": 0.151936, "grad_norm": 1.59375, "learning_rate": 2.843999238870459e-05, "loss": 2.3041, "step": 11870 }, { "epoch": 0.152064, "grad_norm": 1.4921875, "learning_rate": 2.8437295558308668e-05, "loss": 2.3126, "step": 11880 }, { "epoch": 0.152192, "grad_norm": 1.46875, "learning_rate": 2.8434596526971837e-05, "loss": 2.2963, "step": 11890 }, { "epoch": 0.15232, "grad_norm": 1.6171875, "learning_rate": 2.843189529513618e-05, "loss": 2.2953, "step": 11900 }, { "epoch": 0.152448, "grad_norm": 1.5625, "learning_rate": 2.8429191863244146e-05, "loss": 2.3261, "step": 11910 }, { "epoch": 0.152576, "grad_norm": 1.5, "learning_rate": 2.842648623173853e-05, "loss": 2.3231, "step": 11920 }, { "epoch": 0.152704, "grad_norm": 1.4453125, "learning_rate": 2.84237784010625e-05, "loss": 2.3447, "step": 11930 }, { "epoch": 0.152832, "grad_norm": 1.484375, "learning_rate": 2.842106837165959e-05, "loss": 2.2914, "step": 11940 }, { "epoch": 0.15296, "grad_norm": 2.21875, "learning_rate": 2.841835614397367e-05, "loss": 2.3029, "step": 11950 }, { "epoch": 0.153088, "grad_norm": 1.4453125, "learning_rate": 2.8415641718448994e-05, "loss": 2.3285, "step": 11960 }, { "epoch": 0.153216, "grad_norm": 1.5, "learning_rate": 2.841292509553017e-05, "loss": 2.2922, "step": 11970 }, { "epoch": 0.153344, "grad_norm": 1.7734375, "learning_rate": 2.8410206275662154e-05, "loss": 2.3016, "step": 11980 }, { "epoch": 0.153472, "grad_norm": 1.53125, "learning_rate": 2.8407485259290278e-05, "loss": 2.3165, "step": 11990 }, { "epoch": 0.1536, "grad_norm": 1.484375, "learning_rate": 2.8404762046860216e-05, "loss": 2.3533, "step": 12000 }, { "epoch": 0.153728, "grad_norm": 1.5, "learning_rate": 2.840203663881803e-05, "loss": 2.2976, "step": 12010 }, { "epoch": 0.153856, "grad_norm": 1.546875, "learning_rate": 2.839930903561011e-05, "loss": 2.303, "step": 12020 }, { "epoch": 0.153984, "grad_norm": 1.5234375, "learning_rate": 2.839657923768323e-05, "loss": 2.3135, "step": 12030 }, { "epoch": 0.154112, "grad_norm": 1.453125, "learning_rate": 2.83938472454845e-05, "loss": 2.3183, "step": 12040 }, { "epoch": 0.15424, "grad_norm": 1.625, "learning_rate": 2.839111305946141e-05, "loss": 2.3274, "step": 12050 }, { "epoch": 0.154368, "grad_norm": 1.4921875, "learning_rate": 2.8388376680061806e-05, "loss": 2.3084, "step": 12060 }, { "epoch": 0.154496, "grad_norm": 1.5390625, "learning_rate": 2.838563810773388e-05, "loss": 2.2834, "step": 12070 }, { "epoch": 0.154624, "grad_norm": 1.4453125, "learning_rate": 2.83828973429262e-05, "loss": 2.3497, "step": 12080 }, { "epoch": 0.154752, "grad_norm": 1.546875, "learning_rate": 2.8380154386087674e-05, "loss": 2.2973, "step": 12090 }, { "epoch": 0.15488, "grad_norm": 1.5078125, "learning_rate": 2.8377409237667594e-05, "loss": 2.3117, "step": 12100 }, { "epoch": 0.155008, "grad_norm": 1.53125, "learning_rate": 2.837466189811559e-05, "loss": 2.3281, "step": 12110 }, { "epoch": 0.155136, "grad_norm": 1.5, "learning_rate": 2.8371912367881657e-05, "loss": 2.2909, "step": 12120 }, { "epoch": 0.155264, "grad_norm": 1.65625, "learning_rate": 2.8369160647416155e-05, "loss": 2.321, "step": 12130 }, { "epoch": 0.155392, "grad_norm": 1.53125, "learning_rate": 2.83664067371698e-05, "loss": 2.2997, "step": 12140 }, { "epoch": 0.15552, "grad_norm": 1.546875, "learning_rate": 2.836365063759365e-05, "loss": 2.3057, "step": 12150 }, { "epoch": 0.155648, "grad_norm": 1.75, "learning_rate": 2.8360892349139154e-05, "loss": 2.302, "step": 12160 }, { "epoch": 0.155776, "grad_norm": 1.9375, "learning_rate": 2.835813187225809e-05, "loss": 2.3218, "step": 12170 }, { "epoch": 0.155904, "grad_norm": 1.6875, "learning_rate": 2.8355369207402605e-05, "loss": 2.3092, "step": 12180 }, { "epoch": 0.156032, "grad_norm": 1.5390625, "learning_rate": 2.8352604355025215e-05, "loss": 2.298, "step": 12190 }, { "epoch": 0.15616, "grad_norm": 1.5703125, "learning_rate": 2.834983731557878e-05, "loss": 2.3352, "step": 12200 }, { "epoch": 0.156288, "grad_norm": 1.5, "learning_rate": 2.834706808951652e-05, "loss": 2.3355, "step": 12210 }, { "epoch": 0.156416, "grad_norm": 1.53125, "learning_rate": 2.8344296677292015e-05, "loss": 2.3223, "step": 12220 }, { "epoch": 0.156544, "grad_norm": 1.6875, "learning_rate": 2.8341523079359214e-05, "loss": 2.3392, "step": 12230 }, { "epoch": 0.156672, "grad_norm": 1.5234375, "learning_rate": 2.8338747296172404e-05, "loss": 2.3003, "step": 12240 }, { "epoch": 0.1568, "grad_norm": 1.3828125, "learning_rate": 2.8335969328186246e-05, "loss": 2.3013, "step": 12250 }, { "epoch": 0.156928, "grad_norm": 1.5390625, "learning_rate": 2.8333189175855747e-05, "loss": 2.3198, "step": 12260 }, { "epoch": 0.157056, "grad_norm": 1.46875, "learning_rate": 2.8330406839636285e-05, "loss": 2.3004, "step": 12270 }, { "epoch": 0.157184, "grad_norm": 1.4765625, "learning_rate": 2.8327622319983582e-05, "loss": 2.3192, "step": 12280 }, { "epoch": 0.157312, "grad_norm": 1.6171875, "learning_rate": 2.832483561735373e-05, "loss": 2.2876, "step": 12290 }, { "epoch": 0.15744, "grad_norm": 1.5546875, "learning_rate": 2.832204673220317e-05, "loss": 2.2956, "step": 12300 }, { "epoch": 0.157568, "grad_norm": 1.4453125, "learning_rate": 2.83192556649887e-05, "loss": 2.3016, "step": 12310 }, { "epoch": 0.157696, "grad_norm": 1.46875, "learning_rate": 2.8316462416167482e-05, "loss": 2.2924, "step": 12320 }, { "epoch": 0.157824, "grad_norm": 1.515625, "learning_rate": 2.8313666986197028e-05, "loss": 2.3081, "step": 12330 }, { "epoch": 0.157952, "grad_norm": 1.796875, "learning_rate": 2.831086937553522e-05, "loss": 2.2693, "step": 12340 }, { "epoch": 0.15808, "grad_norm": 1.4609375, "learning_rate": 2.8308069584640276e-05, "loss": 2.2996, "step": 12350 }, { "epoch": 0.158208, "grad_norm": 1.640625, "learning_rate": 2.8305267613970796e-05, "loss": 2.3166, "step": 12360 }, { "epoch": 0.158336, "grad_norm": 1.46875, "learning_rate": 2.8302463463985717e-05, "loss": 2.3166, "step": 12370 }, { "epoch": 0.158464, "grad_norm": 1.484375, "learning_rate": 2.8299657135144342e-05, "loss": 2.303, "step": 12380 }, { "epoch": 0.158592, "grad_norm": 1.453125, "learning_rate": 2.829684862790632e-05, "loss": 2.3144, "step": 12390 }, { "epoch": 0.15872, "grad_norm": 1.5078125, "learning_rate": 2.8294037942731687e-05, "loss": 2.3021, "step": 12400 }, { "epoch": 0.158848, "grad_norm": 1.53125, "learning_rate": 2.8291225080080795e-05, "loss": 2.2956, "step": 12410 }, { "epoch": 0.158976, "grad_norm": 1.9921875, "learning_rate": 2.8288410040414384e-05, "loss": 2.3525, "step": 12420 }, { "epoch": 0.159104, "grad_norm": 1.5859375, "learning_rate": 2.828559282419353e-05, "loss": 2.301, "step": 12430 }, { "epoch": 0.159232, "grad_norm": 1.4921875, "learning_rate": 2.8282773431879686e-05, "loss": 2.3046, "step": 12440 }, { "epoch": 0.15936, "grad_norm": 1.5390625, "learning_rate": 2.8279951863934642e-05, "loss": 2.3093, "step": 12450 }, { "epoch": 0.159488, "grad_norm": 1.4609375, "learning_rate": 2.8277128120820553e-05, "loss": 2.3075, "step": 12460 }, { "epoch": 0.159616, "grad_norm": 1.9375, "learning_rate": 2.8274302202999933e-05, "loss": 2.3062, "step": 12470 }, { "epoch": 0.159744, "grad_norm": 1.53125, "learning_rate": 2.827147411093564e-05, "loss": 2.3266, "step": 12480 }, { "epoch": 0.159872, "grad_norm": 1.5390625, "learning_rate": 2.8268643845090914e-05, "loss": 2.3056, "step": 12490 }, { "epoch": 0.16, "grad_norm": 1.7734375, "learning_rate": 2.8265811405929315e-05, "loss": 2.3239, "step": 12500 }, { "epoch": 0.160128, "grad_norm": 1.5078125, "learning_rate": 2.8262976793914787e-05, "loss": 2.2847, "step": 12510 }, { "epoch": 0.160256, "grad_norm": 1.6171875, "learning_rate": 2.8260140009511624e-05, "loss": 2.3037, "step": 12520 }, { "epoch": 0.160384, "grad_norm": 1.3984375, "learning_rate": 2.8257301053184468e-05, "loss": 2.306, "step": 12530 }, { "epoch": 0.160512, "grad_norm": 1.4765625, "learning_rate": 2.8254459925398318e-05, "loss": 2.3076, "step": 12540 }, { "epoch": 0.16064, "grad_norm": 1.578125, "learning_rate": 2.8251616626618538e-05, "loss": 2.3331, "step": 12550 }, { "epoch": 0.160768, "grad_norm": 1.3984375, "learning_rate": 2.8248771157310844e-05, "loss": 2.3251, "step": 12560 }, { "epoch": 0.160896, "grad_norm": 1.515625, "learning_rate": 2.82459235179413e-05, "loss": 2.3279, "step": 12570 }, { "epoch": 0.161024, "grad_norm": 1.421875, "learning_rate": 2.824307370897633e-05, "loss": 2.3175, "step": 12580 }, { "epoch": 0.161152, "grad_norm": 1.4453125, "learning_rate": 2.8240221730882723e-05, "loss": 2.3134, "step": 12590 }, { "epoch": 0.16128, "grad_norm": 1.53125, "learning_rate": 2.8237367584127596e-05, "loss": 2.3166, "step": 12600 }, { "epoch": 0.161408, "grad_norm": 1.5390625, "learning_rate": 2.823451126917846e-05, "loss": 2.3239, "step": 12610 }, { "epoch": 0.161536, "grad_norm": 1.6328125, "learning_rate": 2.8231652786503148e-05, "loss": 2.2895, "step": 12620 }, { "epoch": 0.161664, "grad_norm": 1.5390625, "learning_rate": 2.8228792136569864e-05, "loss": 2.2851, "step": 12630 }, { "epoch": 0.161792, "grad_norm": 1.5625, "learning_rate": 2.8225929319847165e-05, "loss": 2.3067, "step": 12640 }, { "epoch": 0.16192, "grad_norm": 2.015625, "learning_rate": 2.822306433680396e-05, "loss": 2.3234, "step": 12650 }, { "epoch": 0.162048, "grad_norm": 1.546875, "learning_rate": 2.8220197187909516e-05, "loss": 2.2779, "step": 12660 }, { "epoch": 0.162176, "grad_norm": 2.46875, "learning_rate": 2.821732787363345e-05, "loss": 2.3265, "step": 12670 }, { "epoch": 0.162304, "grad_norm": 1.4375, "learning_rate": 2.821445639444574e-05, "loss": 2.3034, "step": 12680 }, { "epoch": 0.162432, "grad_norm": 1.5703125, "learning_rate": 2.8211582750816713e-05, "loss": 2.3199, "step": 12690 }, { "epoch": 0.16256, "grad_norm": 1.5078125, "learning_rate": 2.8208706943217058e-05, "loss": 2.3179, "step": 12700 }, { "epoch": 0.162688, "grad_norm": 1.7265625, "learning_rate": 2.820582897211781e-05, "loss": 2.3155, "step": 12710 }, { "epoch": 0.162816, "grad_norm": 1.7265625, "learning_rate": 2.820294883799036e-05, "loss": 2.3205, "step": 12720 }, { "epoch": 0.162944, "grad_norm": 1.7421875, "learning_rate": 2.8200066541306456e-05, "loss": 2.317, "step": 12730 }, { "epoch": 0.163072, "grad_norm": 1.890625, "learning_rate": 2.81971820825382e-05, "loss": 2.3145, "step": 12740 }, { "epoch": 0.1632, "grad_norm": 1.5546875, "learning_rate": 2.8194295462158054e-05, "loss": 2.3151, "step": 12750 }, { "epoch": 0.163328, "grad_norm": 1.5234375, "learning_rate": 2.8191406680638812e-05, "loss": 2.3116, "step": 12760 }, { "epoch": 0.163456, "grad_norm": 1.484375, "learning_rate": 2.818851573845365e-05, "loss": 2.3165, "step": 12770 }, { "epoch": 0.163584, "grad_norm": 1.4765625, "learning_rate": 2.8185622636076085e-05, "loss": 2.3045, "step": 12780 }, { "epoch": 0.163712, "grad_norm": 1.5234375, "learning_rate": 2.8182727373979982e-05, "loss": 2.3081, "step": 12790 }, { "epoch": 0.16384, "grad_norm": 1.609375, "learning_rate": 2.817982995263957e-05, "loss": 2.2927, "step": 12800 }, { "epoch": 0.163968, "grad_norm": 1.4375, "learning_rate": 2.8176930372529427e-05, "loss": 2.3098, "step": 12810 }, { "epoch": 0.164096, "grad_norm": 1.546875, "learning_rate": 2.817402863412448e-05, "loss": 2.2954, "step": 12820 }, { "epoch": 0.164224, "grad_norm": 1.5546875, "learning_rate": 2.8171124737900024e-05, "loss": 2.3249, "step": 12830 }, { "epoch": 0.164352, "grad_norm": 1.4765625, "learning_rate": 2.8168218684331693e-05, "loss": 2.2723, "step": 12840 }, { "epoch": 0.16448, "grad_norm": 1.59375, "learning_rate": 2.8165310473895483e-05, "loss": 2.297, "step": 12850 }, { "epoch": 0.164608, "grad_norm": 1.4375, "learning_rate": 2.8162400107067733e-05, "loss": 2.3203, "step": 12860 }, { "epoch": 0.164736, "grad_norm": 1.4375, "learning_rate": 2.8159487584325148e-05, "loss": 2.3125, "step": 12870 }, { "epoch": 0.164864, "grad_norm": 1.4765625, "learning_rate": 2.815657290614478e-05, "loss": 2.3304, "step": 12880 }, { "epoch": 0.164992, "grad_norm": 1.4921875, "learning_rate": 2.815365607300403e-05, "loss": 2.2997, "step": 12890 }, { "epoch": 0.16512, "grad_norm": 1.5546875, "learning_rate": 2.8150737085380656e-05, "loss": 2.2865, "step": 12900 }, { "epoch": 0.165248, "grad_norm": 1.5078125, "learning_rate": 2.8147815943752776e-05, "loss": 2.3104, "step": 12910 }, { "epoch": 0.165376, "grad_norm": 1.46875, "learning_rate": 2.814489264859885e-05, "loss": 2.2854, "step": 12920 }, { "epoch": 0.165504, "grad_norm": 1.4921875, "learning_rate": 2.814196720039769e-05, "loss": 2.3102, "step": 12930 }, { "epoch": 0.165632, "grad_norm": 1.59375, "learning_rate": 2.8139039599628473e-05, "loss": 2.3297, "step": 12940 }, { "epoch": 0.16576, "grad_norm": 1.4609375, "learning_rate": 2.8136109846770717e-05, "loss": 2.2899, "step": 12950 }, { "epoch": 0.165888, "grad_norm": 1.515625, "learning_rate": 2.813317794230429e-05, "loss": 2.346, "step": 12960 }, { "epoch": 0.166016, "grad_norm": 1.6484375, "learning_rate": 2.8130243886709436e-05, "loss": 2.3104, "step": 12970 }, { "epoch": 0.166144, "grad_norm": 1.71875, "learning_rate": 2.8127307680466713e-05, "loss": 2.3065, "step": 12980 }, { "epoch": 0.166272, "grad_norm": 1.484375, "learning_rate": 2.812436932405707e-05, "loss": 2.2981, "step": 12990 }, { "epoch": 0.1664, "grad_norm": 1.7265625, "learning_rate": 2.8121428817961775e-05, "loss": 2.287, "step": 13000 }, { "epoch": 0.166528, "grad_norm": 2.1875, "learning_rate": 2.8118486162662476e-05, "loss": 2.3106, "step": 13010 }, { "epoch": 0.166656, "grad_norm": 1.5546875, "learning_rate": 2.8115541358641157e-05, "loss": 2.3045, "step": 13020 }, { "epoch": 0.166784, "grad_norm": 1.4453125, "learning_rate": 2.811259440638016e-05, "loss": 2.3141, "step": 13030 }, { "epoch": 0.166912, "grad_norm": 1.546875, "learning_rate": 2.8109645306362167e-05, "loss": 2.3237, "step": 13040 }, { "epoch": 0.16704, "grad_norm": 1.4296875, "learning_rate": 2.8106694059070228e-05, "loss": 2.3105, "step": 13050 }, { "epoch": 0.167168, "grad_norm": 1.453125, "learning_rate": 2.810374066498774e-05, "loss": 2.306, "step": 13060 }, { "epoch": 0.167296, "grad_norm": 1.765625, "learning_rate": 2.8100785124598448e-05, "loss": 2.2869, "step": 13070 }, { "epoch": 0.167424, "grad_norm": 2.46875, "learning_rate": 2.809782743838645e-05, "loss": 2.3294, "step": 13080 }, { "epoch": 0.167552, "grad_norm": 1.5390625, "learning_rate": 2.8094867606836193e-05, "loss": 2.2967, "step": 13090 }, { "epoch": 0.16768, "grad_norm": 1.3984375, "learning_rate": 2.809190563043248e-05, "loss": 2.3274, "step": 13100 }, { "epoch": 0.167808, "grad_norm": 1.546875, "learning_rate": 2.808894150966046e-05, "loss": 2.3237, "step": 13110 }, { "epoch": 0.167936, "grad_norm": 1.515625, "learning_rate": 2.8085975245005645e-05, "loss": 2.3198, "step": 13120 }, { "epoch": 0.168064, "grad_norm": 1.4296875, "learning_rate": 2.808300683695388e-05, "loss": 2.3144, "step": 13130 }, { "epoch": 0.168192, "grad_norm": 1.546875, "learning_rate": 2.8080036285991378e-05, "loss": 2.3268, "step": 13140 }, { "epoch": 0.16832, "grad_norm": 1.5859375, "learning_rate": 2.8077063592604692e-05, "loss": 2.3244, "step": 13150 }, { "epoch": 0.168448, "grad_norm": 1.4375, "learning_rate": 2.8074088757280733e-05, "loss": 2.3347, "step": 13160 }, { "epoch": 0.168576, "grad_norm": 1.53125, "learning_rate": 2.8071111780506752e-05, "loss": 2.3046, "step": 13170 }, { "epoch": 0.168704, "grad_norm": 1.5625, "learning_rate": 2.806813266277037e-05, "loss": 2.3148, "step": 13180 }, { "epoch": 0.168832, "grad_norm": 1.5625, "learning_rate": 2.8065151404559537e-05, "loss": 2.3157, "step": 13190 }, { "epoch": 0.16896, "grad_norm": 1.421875, "learning_rate": 2.8062168006362562e-05, "loss": 2.3042, "step": 13200 }, { "epoch": 0.169088, "grad_norm": 1.515625, "learning_rate": 2.8059182468668115e-05, "loss": 2.2872, "step": 13210 }, { "epoch": 0.169216, "grad_norm": 1.5546875, "learning_rate": 2.8056194791965205e-05, "loss": 2.2927, "step": 13220 }, { "epoch": 0.169344, "grad_norm": 11.75, "learning_rate": 2.8053204976743194e-05, "loss": 2.3091, "step": 13230 }, { "epoch": 0.169472, "grad_norm": 2.46875, "learning_rate": 2.8050213023491788e-05, "loss": 2.3329, "step": 13240 }, { "epoch": 0.1696, "grad_norm": 1.578125, "learning_rate": 2.8047218932701057e-05, "loss": 2.3071, "step": 13250 }, { "epoch": 0.169728, "grad_norm": 1.484375, "learning_rate": 2.8044222704861413e-05, "loss": 2.3093, "step": 13260 }, { "epoch": 0.169856, "grad_norm": 1.6640625, "learning_rate": 2.804122434046361e-05, "loss": 2.2723, "step": 13270 }, { "epoch": 0.169984, "grad_norm": 1.484375, "learning_rate": 2.8038223839998767e-05, "loss": 2.2921, "step": 13280 }, { "epoch": 0.170112, "grad_norm": 1.515625, "learning_rate": 2.8035221203958347e-05, "loss": 2.3059, "step": 13290 }, { "epoch": 0.17024, "grad_norm": 1.4765625, "learning_rate": 2.803221643283416e-05, "loss": 2.2973, "step": 13300 }, { "epoch": 0.170368, "grad_norm": 1.4140625, "learning_rate": 2.8029209527118375e-05, "loss": 2.2966, "step": 13310 }, { "epoch": 0.170496, "grad_norm": 1.875, "learning_rate": 2.8026200487303485e-05, "loss": 2.2817, "step": 13320 }, { "epoch": 0.170624, "grad_norm": 1.578125, "learning_rate": 2.802318931388237e-05, "loss": 2.3197, "step": 13330 }, { "epoch": 0.170752, "grad_norm": 1.46875, "learning_rate": 2.8020176007348232e-05, "loss": 2.2888, "step": 13340 }, { "epoch": 0.17088, "grad_norm": 1.4921875, "learning_rate": 2.801716056819463e-05, "loss": 2.3138, "step": 13350 }, { "epoch": 0.171008, "grad_norm": 1.5078125, "learning_rate": 2.8014142996915476e-05, "loss": 2.2908, "step": 13360 }, { "epoch": 0.171136, "grad_norm": 1.4921875, "learning_rate": 2.801112329400503e-05, "loss": 2.2919, "step": 13370 }, { "epoch": 0.171264, "grad_norm": 1.53125, "learning_rate": 2.8008101459957894e-05, "loss": 2.3412, "step": 13380 }, { "epoch": 0.171392, "grad_norm": 1.640625, "learning_rate": 2.8005077495269032e-05, "loss": 2.3323, "step": 13390 }, { "epoch": 0.17152, "grad_norm": 1.5859375, "learning_rate": 2.8002051400433745e-05, "loss": 2.3199, "step": 13400 }, { "epoch": 0.171648, "grad_norm": 1.53125, "learning_rate": 2.7999023175947683e-05, "loss": 2.308, "step": 13410 }, { "epoch": 0.171776, "grad_norm": 1.734375, "learning_rate": 2.7995992822306862e-05, "loss": 2.2784, "step": 13420 }, { "epoch": 0.171904, "grad_norm": 1.453125, "learning_rate": 2.799296034000762e-05, "loss": 2.2937, "step": 13430 }, { "epoch": 0.172032, "grad_norm": 1.7421875, "learning_rate": 2.7989925729546665e-05, "loss": 2.3168, "step": 13440 }, { "epoch": 0.17216, "grad_norm": 1.7265625, "learning_rate": 2.7986888991421048e-05, "loss": 2.3212, "step": 13450 }, { "epoch": 0.172288, "grad_norm": 1.578125, "learning_rate": 2.798385012612816e-05, "loss": 2.3221, "step": 13460 }, { "epoch": 0.172416, "grad_norm": 1.875, "learning_rate": 2.7980809134165753e-05, "loss": 2.3201, "step": 13470 }, { "epoch": 0.172544, "grad_norm": 1.484375, "learning_rate": 2.797776601603192e-05, "loss": 2.314, "step": 13480 }, { "epoch": 0.172672, "grad_norm": 1.4296875, "learning_rate": 2.79747207722251e-05, "loss": 2.3129, "step": 13490 }, { "epoch": 0.1728, "grad_norm": 1.53125, "learning_rate": 2.7971673403244097e-05, "loss": 2.2909, "step": 13500 }, { "epoch": 0.172928, "grad_norm": 1.53125, "learning_rate": 2.7968623909588033e-05, "loss": 2.3097, "step": 13510 }, { "epoch": 0.173056, "grad_norm": 1.46875, "learning_rate": 2.7965572291756405e-05, "loss": 2.3091, "step": 13520 }, { "epoch": 0.173184, "grad_norm": 1.609375, "learning_rate": 2.7962518550249046e-05, "loss": 2.3304, "step": 13530 }, { "epoch": 0.173312, "grad_norm": 1.4296875, "learning_rate": 2.7959462685566138e-05, "loss": 2.2827, "step": 13540 }, { "epoch": 0.17344, "grad_norm": 1.4453125, "learning_rate": 2.795640469820821e-05, "loss": 2.3117, "step": 13550 }, { "epoch": 0.173568, "grad_norm": 1.4453125, "learning_rate": 2.7953344588676147e-05, "loss": 2.2635, "step": 13560 }, { "epoch": 0.173696, "grad_norm": 1.6640625, "learning_rate": 2.7950282357471166e-05, "loss": 2.3323, "step": 13570 }, { "epoch": 0.173824, "grad_norm": 1.59375, "learning_rate": 2.794721800509485e-05, "loss": 2.3054, "step": 13580 }, { "epoch": 0.173952, "grad_norm": 1.7109375, "learning_rate": 2.7944151532049108e-05, "loss": 2.3056, "step": 13590 }, { "epoch": 0.17408, "grad_norm": 1.4921875, "learning_rate": 2.794108293883622e-05, "loss": 2.314, "step": 13600 }, { "epoch": 0.174208, "grad_norm": 1.3828125, "learning_rate": 2.7938012225958794e-05, "loss": 2.3023, "step": 13610 }, { "epoch": 0.174336, "grad_norm": 1.5, "learning_rate": 2.7934939393919793e-05, "loss": 2.3106, "step": 13620 }, { "epoch": 0.174464, "grad_norm": 1.5234375, "learning_rate": 2.7931864443222533e-05, "loss": 2.3226, "step": 13630 }, { "epoch": 0.174592, "grad_norm": 1.5, "learning_rate": 2.7928787374370667e-05, "loss": 2.3009, "step": 13640 }, { "epoch": 0.17472, "grad_norm": 1.4921875, "learning_rate": 2.7925708187868192e-05, "loss": 2.3235, "step": 13650 }, { "epoch": 0.174848, "grad_norm": 1.4765625, "learning_rate": 2.7922626884219463e-05, "loss": 2.3396, "step": 13660 }, { "epoch": 0.174976, "grad_norm": 1.5, "learning_rate": 2.7919543463929185e-05, "loss": 2.2962, "step": 13670 }, { "epoch": 0.175104, "grad_norm": 1.4453125, "learning_rate": 2.7916457927502394e-05, "loss": 2.2723, "step": 13680 }, { "epoch": 0.175232, "grad_norm": 1.46875, "learning_rate": 2.7913370275444483e-05, "loss": 2.3285, "step": 13690 }, { "epoch": 0.17536, "grad_norm": 1.4609375, "learning_rate": 2.791028050826119e-05, "loss": 2.2675, "step": 13700 }, { "epoch": 0.175488, "grad_norm": 1.4609375, "learning_rate": 2.7907188626458598e-05, "loss": 2.308, "step": 13710 }, { "epoch": 0.175616, "grad_norm": 1.5703125, "learning_rate": 2.7904094630543133e-05, "loss": 2.3182, "step": 13720 }, { "epoch": 0.175744, "grad_norm": 1.515625, "learning_rate": 2.7900998521021574e-05, "loss": 2.2839, "step": 13730 }, { "epoch": 0.175872, "grad_norm": 1.53125, "learning_rate": 2.7897900298401047e-05, "loss": 2.3008, "step": 13740 }, { "epoch": 0.176, "grad_norm": 1.640625, "learning_rate": 2.7894799963189018e-05, "loss": 2.3013, "step": 13750 }, { "epoch": 0.176128, "grad_norm": 1.59375, "learning_rate": 2.78916975158933e-05, "loss": 2.2979, "step": 13760 }, { "epoch": 0.176256, "grad_norm": 1.5390625, "learning_rate": 2.788859295702205e-05, "loss": 2.3093, "step": 13770 }, { "epoch": 0.176384, "grad_norm": 1.640625, "learning_rate": 2.7885486287083783e-05, "loss": 2.2886, "step": 13780 }, { "epoch": 0.176512, "grad_norm": 1.65625, "learning_rate": 2.7882377506587353e-05, "loss": 2.2928, "step": 13790 }, { "epoch": 0.17664, "grad_norm": 1.5859375, "learning_rate": 2.7879266616041946e-05, "loss": 2.2984, "step": 13800 }, { "epoch": 0.176768, "grad_norm": 1.359375, "learning_rate": 2.7876153615957108e-05, "loss": 2.3009, "step": 13810 }, { "epoch": 0.176896, "grad_norm": 1.4453125, "learning_rate": 2.7873038506842736e-05, "loss": 2.3112, "step": 13820 }, { "epoch": 0.177024, "grad_norm": 1.5625, "learning_rate": 2.786992128920906e-05, "loss": 2.3012, "step": 13830 }, { "epoch": 0.177152, "grad_norm": 1.625, "learning_rate": 2.7866801963566658e-05, "loss": 2.3159, "step": 13840 }, { "epoch": 0.17728, "grad_norm": 1.4453125, "learning_rate": 2.7863680530426462e-05, "loss": 2.2945, "step": 13850 }, { "epoch": 0.177408, "grad_norm": 1.4453125, "learning_rate": 2.7860556990299733e-05, "loss": 2.3095, "step": 13860 }, { "epoch": 0.177536, "grad_norm": 1.5, "learning_rate": 2.785743134369809e-05, "loss": 2.2983, "step": 13870 }, { "epoch": 0.177664, "grad_norm": 1.6171875, "learning_rate": 2.7854303591133493e-05, "loss": 2.313, "step": 13880 }, { "epoch": 0.177792, "grad_norm": 1.5546875, "learning_rate": 2.785117373311825e-05, "loss": 2.3052, "step": 13890 }, { "epoch": 0.17792, "grad_norm": 1.5, "learning_rate": 2.7848041770165013e-05, "loss": 2.3029, "step": 13900 }, { "epoch": 0.178048, "grad_norm": 1.6875, "learning_rate": 2.7844907702786768e-05, "loss": 2.3311, "step": 13910 }, { "epoch": 0.178176, "grad_norm": 1.6640625, "learning_rate": 2.7841771531496863e-05, "loss": 2.3293, "step": 13920 }, { "epoch": 0.178304, "grad_norm": 1.5703125, "learning_rate": 2.7838633256808977e-05, "loss": 2.2913, "step": 13930 }, { "epoch": 0.178432, "grad_norm": 1.5078125, "learning_rate": 2.7835492879237143e-05, "loss": 2.3082, "step": 13940 }, { "epoch": 0.17856, "grad_norm": 1.6171875, "learning_rate": 2.7832350399295734e-05, "loss": 2.3357, "step": 13950 }, { "epoch": 0.178688, "grad_norm": 1.3984375, "learning_rate": 2.7829205817499465e-05, "loss": 2.2882, "step": 13960 }, { "epoch": 0.178816, "grad_norm": 1.5, "learning_rate": 2.78260591343634e-05, "loss": 2.2818, "step": 13970 }, { "epoch": 0.178944, "grad_norm": 1.640625, "learning_rate": 2.7822910350402943e-05, "loss": 2.2942, "step": 13980 }, { "epoch": 0.179072, "grad_norm": 1.5078125, "learning_rate": 2.781975946613385e-05, "loss": 2.2718, "step": 13990 }, { "epoch": 0.1792, "grad_norm": 1.53125, "learning_rate": 2.7816606482072204e-05, "loss": 2.2943, "step": 14000 }, { "epoch": 0.179328, "grad_norm": 1.8984375, "learning_rate": 2.7813451398734454e-05, "loss": 2.3042, "step": 14010 }, { "epoch": 0.179456, "grad_norm": 1.46875, "learning_rate": 2.7810294216637374e-05, "loss": 2.3279, "step": 14020 }, { "epoch": 0.179584, "grad_norm": 1.59375, "learning_rate": 2.7807134936298094e-05, "loss": 2.2694, "step": 14030 }, { "epoch": 0.179712, "grad_norm": 1.4921875, "learning_rate": 2.7803973558234085e-05, "loss": 2.3188, "step": 14040 }, { "epoch": 0.17984, "grad_norm": 1.484375, "learning_rate": 2.7800810082963158e-05, "loss": 2.3053, "step": 14050 }, { "epoch": 0.179968, "grad_norm": 1.546875, "learning_rate": 2.7797644511003464e-05, "loss": 2.2893, "step": 14060 }, { "epoch": 0.180096, "grad_norm": 1.703125, "learning_rate": 2.7794476842873515e-05, "loss": 2.3011, "step": 14070 }, { "epoch": 0.180224, "grad_norm": 1.453125, "learning_rate": 2.7791307079092144e-05, "loss": 2.3127, "step": 14080 }, { "epoch": 0.180352, "grad_norm": 2.09375, "learning_rate": 2.7788135220178542e-05, "loss": 2.3109, "step": 14090 }, { "epoch": 0.18048, "grad_norm": 2.0625, "learning_rate": 2.7784961266652236e-05, "loss": 2.3298, "step": 14100 }, { "epoch": 0.180608, "grad_norm": 1.5234375, "learning_rate": 2.77817852190331e-05, "loss": 2.2849, "step": 14110 }, { "epoch": 0.180736, "grad_norm": 1.5234375, "learning_rate": 2.7778607077841357e-05, "loss": 2.282, "step": 14120 }, { "epoch": 0.180864, "grad_norm": 1.5234375, "learning_rate": 2.777542684359755e-05, "loss": 2.3167, "step": 14130 }, { "epoch": 0.180992, "grad_norm": 1.5, "learning_rate": 2.7772244516822593e-05, "loss": 2.309, "step": 14140 }, { "epoch": 0.18112, "grad_norm": 1.4140625, "learning_rate": 2.7769060098037727e-05, "loss": 2.3003, "step": 14150 }, { "epoch": 0.181248, "grad_norm": 1.4453125, "learning_rate": 2.7765873587764538e-05, "loss": 2.2793, "step": 14160 }, { "epoch": 0.181376, "grad_norm": 1.9375, "learning_rate": 2.7762684986524956e-05, "loss": 2.312, "step": 14170 }, { "epoch": 0.181504, "grad_norm": 1.578125, "learning_rate": 2.7759494294841257e-05, "loss": 2.3233, "step": 14180 }, { "epoch": 0.181632, "grad_norm": 1.6171875, "learning_rate": 2.7756301513236046e-05, "loss": 2.3068, "step": 14190 }, { "epoch": 0.18176, "grad_norm": 1.5, "learning_rate": 2.7753106642232287e-05, "loss": 2.2791, "step": 14200 }, { "epoch": 0.181888, "grad_norm": 1.46875, "learning_rate": 2.7749909682353277e-05, "loss": 2.3249, "step": 14210 }, { "epoch": 0.182016, "grad_norm": 1.4921875, "learning_rate": 2.7746710634122655e-05, "loss": 2.31, "step": 14220 }, { "epoch": 0.182144, "grad_norm": 1.5234375, "learning_rate": 2.774350949806441e-05, "loss": 2.3094, "step": 14230 }, { "epoch": 0.182272, "grad_norm": 1.890625, "learning_rate": 2.7740306274702864e-05, "loss": 2.3174, "step": 14240 }, { "epoch": 0.1824, "grad_norm": 1.5078125, "learning_rate": 2.773710096456268e-05, "loss": 2.3069, "step": 14250 }, { "epoch": 0.182528, "grad_norm": 1.6015625, "learning_rate": 2.773389356816887e-05, "loss": 2.3072, "step": 14260 }, { "epoch": 0.182656, "grad_norm": 1.6484375, "learning_rate": 2.7730684086046785e-05, "loss": 2.266, "step": 14270 }, { "epoch": 0.182784, "grad_norm": 1.5234375, "learning_rate": 2.772747251872212e-05, "loss": 2.3069, "step": 14280 }, { "epoch": 0.182912, "grad_norm": 1.3984375, "learning_rate": 2.7724258866720903e-05, "loss": 2.3352, "step": 14290 }, { "epoch": 0.18304, "grad_norm": 1.453125, "learning_rate": 2.7721043130569514e-05, "loss": 2.2942, "step": 14300 }, { "epoch": 0.183168, "grad_norm": 1.484375, "learning_rate": 2.7717825310794663e-05, "loss": 2.3118, "step": 14310 }, { "epoch": 0.183296, "grad_norm": 1.953125, "learning_rate": 2.7714605407923415e-05, "loss": 2.311, "step": 14320 }, { "epoch": 0.183424, "grad_norm": 1.6328125, "learning_rate": 2.7711383422483162e-05, "loss": 2.3023, "step": 14330 }, { "epoch": 0.183552, "grad_norm": 1.5, "learning_rate": 2.7708159355001654e-05, "loss": 2.327, "step": 14340 }, { "epoch": 0.18368, "grad_norm": 1.5546875, "learning_rate": 2.7704933206006964e-05, "loss": 2.3176, "step": 14350 }, { "epoch": 0.183808, "grad_norm": 1.453125, "learning_rate": 2.770170497602751e-05, "loss": 2.3043, "step": 14360 }, { "epoch": 0.183936, "grad_norm": 1.5625, "learning_rate": 2.7698474665592067e-05, "loss": 2.2977, "step": 14370 }, { "epoch": 0.184064, "grad_norm": 2.234375, "learning_rate": 2.7695242275229733e-05, "loss": 2.2771, "step": 14380 }, { "epoch": 0.184192, "grad_norm": 1.4765625, "learning_rate": 2.769200780546995e-05, "loss": 2.2971, "step": 14390 }, { "epoch": 0.18432, "grad_norm": 1.4765625, "learning_rate": 2.7688771256842506e-05, "loss": 2.3026, "step": 14400 }, { "epoch": 0.184448, "grad_norm": 1.4921875, "learning_rate": 2.7685532629877524e-05, "loss": 2.2806, "step": 14410 }, { "epoch": 0.184576, "grad_norm": 1.4453125, "learning_rate": 2.768229192510547e-05, "loss": 2.318, "step": 14420 }, { "epoch": 0.184704, "grad_norm": 1.59375, "learning_rate": 2.7679049143057154e-05, "loss": 2.3004, "step": 14430 }, { "epoch": 0.184832, "grad_norm": 1.5859375, "learning_rate": 2.767580428426372e-05, "loss": 2.3069, "step": 14440 }, { "epoch": 0.18496, "grad_norm": 1.6484375, "learning_rate": 2.767255734925665e-05, "loss": 2.3105, "step": 14450 }, { "epoch": 0.185088, "grad_norm": 1.4453125, "learning_rate": 2.7669308338567778e-05, "loss": 2.2998, "step": 14460 }, { "epoch": 0.185216, "grad_norm": 1.515625, "learning_rate": 2.766605725272927e-05, "loss": 2.3044, "step": 14470 }, { "epoch": 0.185344, "grad_norm": 1.5390625, "learning_rate": 2.766280409227363e-05, "loss": 2.3449, "step": 14480 }, { "epoch": 0.185472, "grad_norm": 1.546875, "learning_rate": 2.76595488577337e-05, "loss": 2.3321, "step": 14490 }, { "epoch": 0.1856, "grad_norm": 1.5390625, "learning_rate": 2.7656291549642678e-05, "loss": 2.3134, "step": 14500 }, { "epoch": 0.185728, "grad_norm": 1.453125, "learning_rate": 2.7653032168534076e-05, "loss": 2.2968, "step": 14510 }, { "epoch": 0.185856, "grad_norm": 2.484375, "learning_rate": 2.764977071494177e-05, "loss": 2.3391, "step": 14520 }, { "epoch": 0.185984, "grad_norm": 2.09375, "learning_rate": 2.7646507189399958e-05, "loss": 2.3063, "step": 14530 }, { "epoch": 0.186112, "grad_norm": 1.7265625, "learning_rate": 2.7643241592443184e-05, "loss": 2.2987, "step": 14540 }, { "epoch": 0.18624, "grad_norm": 1.4609375, "learning_rate": 2.7639973924606345e-05, "loss": 2.2967, "step": 14550 }, { "epoch": 0.186368, "grad_norm": 1.578125, "learning_rate": 2.763670418642464e-05, "loss": 2.3215, "step": 14560 }, { "epoch": 0.186496, "grad_norm": 1.5546875, "learning_rate": 2.763343237843365e-05, "loss": 2.3146, "step": 14570 }, { "epoch": 0.186624, "grad_norm": 1.453125, "learning_rate": 2.7630158501169266e-05, "loss": 2.2642, "step": 14580 }, { "epoch": 0.186752, "grad_norm": 1.75, "learning_rate": 2.762688255516773e-05, "loss": 2.3188, "step": 14590 }, { "epoch": 0.18688, "grad_norm": 1.4375, "learning_rate": 2.7623604540965624e-05, "loss": 2.2851, "step": 14600 }, { "epoch": 0.187008, "grad_norm": 1.46875, "learning_rate": 2.7620324459099856e-05, "loss": 2.2968, "step": 14610 }, { "epoch": 0.187136, "grad_norm": 1.6015625, "learning_rate": 2.7617042310107695e-05, "loss": 2.3129, "step": 14620 }, { "epoch": 0.187264, "grad_norm": 1.390625, "learning_rate": 2.7613758094526724e-05, "loss": 2.2989, "step": 14630 }, { "epoch": 0.187392, "grad_norm": 1.5234375, "learning_rate": 2.7610471812894884e-05, "loss": 2.2819, "step": 14640 }, { "epoch": 0.18752, "grad_norm": 1.421875, "learning_rate": 2.7607183465750437e-05, "loss": 2.2943, "step": 14650 }, { "epoch": 0.187648, "grad_norm": 1.5390625, "learning_rate": 2.7603893053632005e-05, "loss": 2.2887, "step": 14660 }, { "epoch": 0.187776, "grad_norm": 1.4296875, "learning_rate": 2.7600600577078527e-05, "loss": 2.2746, "step": 14670 }, { "epoch": 0.187904, "grad_norm": 1.640625, "learning_rate": 2.759730603662929e-05, "loss": 2.2697, "step": 14680 }, { "epoch": 0.188032, "grad_norm": 1.53125, "learning_rate": 2.759400943282392e-05, "loss": 2.2932, "step": 14690 }, { "epoch": 0.18816, "grad_norm": 1.6640625, "learning_rate": 2.7590710766202383e-05, "loss": 2.2795, "step": 14700 }, { "epoch": 0.188288, "grad_norm": 1.4609375, "learning_rate": 2.7587410037304974e-05, "loss": 2.3062, "step": 14710 }, { "epoch": 0.188416, "grad_norm": 1.609375, "learning_rate": 2.758410724667233e-05, "loss": 2.2809, "step": 14720 }, { "epoch": 0.188544, "grad_norm": 1.453125, "learning_rate": 2.758080239484543e-05, "loss": 2.3282, "step": 14730 }, { "epoch": 0.188672, "grad_norm": 1.9296875, "learning_rate": 2.7577495482365576e-05, "loss": 2.3168, "step": 14740 }, { "epoch": 0.1888, "grad_norm": 1.4921875, "learning_rate": 2.757418650977444e-05, "loss": 2.3148, "step": 14750 }, { "epoch": 0.188928, "grad_norm": 1.546875, "learning_rate": 2.7570875477613993e-05, "loss": 2.2936, "step": 14760 }, { "epoch": 0.189056, "grad_norm": 1.359375, "learning_rate": 2.7567562386426564e-05, "loss": 2.3207, "step": 14770 }, { "epoch": 0.189184, "grad_norm": 1.796875, "learning_rate": 2.7564247236754814e-05, "loss": 2.314, "step": 14780 }, { "epoch": 0.189312, "grad_norm": 1.6171875, "learning_rate": 2.756093002914175e-05, "loss": 2.3262, "step": 14790 }, { "epoch": 0.18944, "grad_norm": 1.5078125, "learning_rate": 2.75576107641307e-05, "loss": 2.3152, "step": 14800 }, { "epoch": 0.189568, "grad_norm": 1.453125, "learning_rate": 2.7554289442265346e-05, "loss": 2.2939, "step": 14810 }, { "epoch": 0.189696, "grad_norm": 1.59375, "learning_rate": 2.7550966064089692e-05, "loss": 2.3095, "step": 14820 }, { "epoch": 0.189824, "grad_norm": 1.6328125, "learning_rate": 2.754764063014809e-05, "loss": 2.2958, "step": 14830 }, { "epoch": 0.189952, "grad_norm": 1.578125, "learning_rate": 2.754431314098522e-05, "loss": 2.3097, "step": 14840 }, { "epoch": 0.19008, "grad_norm": 1.5859375, "learning_rate": 2.754098359714611e-05, "loss": 2.3113, "step": 14850 }, { "epoch": 0.190208, "grad_norm": 1.46875, "learning_rate": 2.7537651999176116e-05, "loss": 2.3091, "step": 14860 }, { "epoch": 0.190336, "grad_norm": 1.46875, "learning_rate": 2.753431834762092e-05, "loss": 2.3261, "step": 14870 }, { "epoch": 0.190464, "grad_norm": 1.4453125, "learning_rate": 2.7530982643026568e-05, "loss": 2.2888, "step": 14880 }, { "epoch": 0.190592, "grad_norm": 1.6015625, "learning_rate": 2.7527644885939414e-05, "loss": 2.3117, "step": 14890 }, { "epoch": 0.19072, "grad_norm": 1.5625, "learning_rate": 2.752430507690617e-05, "loss": 2.2688, "step": 14900 }, { "epoch": 0.190848, "grad_norm": 1.4609375, "learning_rate": 2.7520963216473867e-05, "loss": 2.2787, "step": 14910 }, { "epoch": 0.190976, "grad_norm": 1.4296875, "learning_rate": 2.7517619305189892e-05, "loss": 2.2854, "step": 14920 }, { "epoch": 0.191104, "grad_norm": 1.5625, "learning_rate": 2.7514273343601945e-05, "loss": 2.3347, "step": 14930 }, { "epoch": 0.191232, "grad_norm": 1.4765625, "learning_rate": 2.7510925332258072e-05, "loss": 2.3334, "step": 14940 }, { "epoch": 0.19136, "grad_norm": 1.625, "learning_rate": 2.7507575271706664e-05, "loss": 2.2967, "step": 14950 }, { "epoch": 0.191488, "grad_norm": 1.5546875, "learning_rate": 2.750422316249643e-05, "loss": 2.2706, "step": 14960 }, { "epoch": 0.191616, "grad_norm": 1.8515625, "learning_rate": 2.7500869005176426e-05, "loss": 2.3179, "step": 14970 }, { "epoch": 0.191744, "grad_norm": 1.59375, "learning_rate": 2.7497512800296046e-05, "loss": 2.3162, "step": 14980 }, { "epoch": 0.191872, "grad_norm": 1.5390625, "learning_rate": 2.749415454840501e-05, "loss": 2.2895, "step": 14990 }, { "epoch": 0.192, "grad_norm": 1.515625, "learning_rate": 2.7490794250053377e-05, "loss": 2.3121, "step": 15000 }, { "epoch": 0.192128, "grad_norm": 1.3984375, "learning_rate": 2.7487431905791543e-05, "loss": 2.2949, "step": 15010 }, { "epoch": 0.192256, "grad_norm": 1.46875, "learning_rate": 2.748406751617024e-05, "loss": 2.2771, "step": 15020 }, { "epoch": 0.192384, "grad_norm": 1.4296875, "learning_rate": 2.7480701081740528e-05, "loss": 2.3025, "step": 15030 }, { "epoch": 0.192512, "grad_norm": 1.53125, "learning_rate": 2.747733260305381e-05, "loss": 2.2875, "step": 15040 }, { "epoch": 0.19264, "grad_norm": 1.5, "learning_rate": 2.747396208066182e-05, "loss": 2.3094, "step": 15050 }, { "epoch": 0.192768, "grad_norm": 1.5234375, "learning_rate": 2.747058951511663e-05, "loss": 2.2984, "step": 15060 }, { "epoch": 0.192896, "grad_norm": 1.5625, "learning_rate": 2.746721490697064e-05, "loss": 2.3076, "step": 15070 }, { "epoch": 0.193024, "grad_norm": 1.4921875, "learning_rate": 2.746383825677659e-05, "loss": 2.2929, "step": 15080 }, { "epoch": 0.193152, "grad_norm": 1.5859375, "learning_rate": 2.746045956508756e-05, "loss": 2.3116, "step": 15090 }, { "epoch": 0.19328, "grad_norm": 1.5234375, "learning_rate": 2.7457078832456945e-05, "loss": 2.2881, "step": 15100 }, { "epoch": 0.193408, "grad_norm": 1.65625, "learning_rate": 2.7453696059438494e-05, "loss": 2.2717, "step": 15110 }, { "epoch": 0.193536, "grad_norm": 1.5234375, "learning_rate": 2.745031124658629e-05, "loss": 2.2711, "step": 15120 }, { "epoch": 0.193664, "grad_norm": 1.53125, "learning_rate": 2.744692439445473e-05, "loss": 2.307, "step": 15130 }, { "epoch": 0.193792, "grad_norm": 1.546875, "learning_rate": 2.744353550359856e-05, "loss": 2.2911, "step": 15140 }, { "epoch": 0.19392, "grad_norm": 1.515625, "learning_rate": 2.744014457457287e-05, "loss": 2.2806, "step": 15150 }, { "epoch": 0.194048, "grad_norm": 1.796875, "learning_rate": 2.7436751607933053e-05, "loss": 2.3002, "step": 15160 }, { "epoch": 0.194176, "grad_norm": 1.5234375, "learning_rate": 2.7433356604234877e-05, "loss": 2.3163, "step": 15170 }, { "epoch": 0.194304, "grad_norm": 1.59375, "learning_rate": 2.7429959564034402e-05, "loss": 2.3126, "step": 15180 }, { "epoch": 0.194432, "grad_norm": 1.515625, "learning_rate": 2.7426560487888054e-05, "loss": 2.3166, "step": 15190 }, { "epoch": 0.19456, "grad_norm": 1.71875, "learning_rate": 2.742315937635257e-05, "loss": 2.3275, "step": 15200 }, { "epoch": 0.194688, "grad_norm": 1.515625, "learning_rate": 2.7419756229985038e-05, "loss": 2.2891, "step": 15210 }, { "epoch": 0.194816, "grad_norm": 1.578125, "learning_rate": 2.741635104934286e-05, "loss": 2.3005, "step": 15220 }, { "epoch": 0.194944, "grad_norm": 1.5390625, "learning_rate": 2.7412943834983796e-05, "loss": 2.3084, "step": 15230 }, { "epoch": 0.195072, "grad_norm": 1.6640625, "learning_rate": 2.7409534587465917e-05, "loss": 2.3167, "step": 15240 }, { "epoch": 0.1952, "grad_norm": 1.859375, "learning_rate": 2.7406123307347634e-05, "loss": 2.2811, "step": 15250 }, { "epoch": 0.195328, "grad_norm": 1.5859375, "learning_rate": 2.74027099951877e-05, "loss": 2.322, "step": 15260 }, { "epoch": 0.195456, "grad_norm": 1.84375, "learning_rate": 2.7399294651545184e-05, "loss": 2.2848, "step": 15270 }, { "epoch": 0.195584, "grad_norm": 3.984375, "learning_rate": 2.7395877276979506e-05, "loss": 2.2899, "step": 15280 }, { "epoch": 0.195712, "grad_norm": 1.6015625, "learning_rate": 2.7392457872050402e-05, "loss": 2.3199, "step": 15290 }, { "epoch": 0.19584, "grad_norm": 1.46875, "learning_rate": 2.738903643731795e-05, "loss": 2.3336, "step": 15300 }, { "epoch": 0.195968, "grad_norm": 2.0, "learning_rate": 2.7385612973342558e-05, "loss": 2.2957, "step": 15310 }, { "epoch": 0.196096, "grad_norm": 1.5, "learning_rate": 2.738218748068497e-05, "loss": 2.294, "step": 15320 }, { "epoch": 0.196224, "grad_norm": 1.515625, "learning_rate": 2.7378759959906265e-05, "loss": 2.3258, "step": 15330 }, { "epoch": 0.196352, "grad_norm": 1.6015625, "learning_rate": 2.7375330411567835e-05, "loss": 2.3367, "step": 15340 }, { "epoch": 0.19648, "grad_norm": 1.5703125, "learning_rate": 2.7371898836231423e-05, "loss": 2.3071, "step": 15350 }, { "epoch": 0.196608, "grad_norm": 1.5, "learning_rate": 2.73684652344591e-05, "loss": 2.2886, "step": 15360 }, { "epoch": 0.196736, "grad_norm": 1.546875, "learning_rate": 2.7365029606813264e-05, "loss": 2.3201, "step": 15370 }, { "epoch": 0.196864, "grad_norm": 1.5390625, "learning_rate": 2.7361591953856658e-05, "loss": 2.2866, "step": 15380 }, { "epoch": 0.196992, "grad_norm": 1.5703125, "learning_rate": 2.735815227615233e-05, "loss": 2.274, "step": 15390 }, { "epoch": 0.19712, "grad_norm": 1.6875, "learning_rate": 2.7354710574263698e-05, "loss": 2.2968, "step": 15400 }, { "epoch": 0.197248, "grad_norm": 1.625, "learning_rate": 2.735126684875447e-05, "loss": 2.2982, "step": 15410 }, { "epoch": 0.197376, "grad_norm": 1.640625, "learning_rate": 2.734782110018872e-05, "loss": 2.3104, "step": 15420 }, { "epoch": 0.197504, "grad_norm": 1.5, "learning_rate": 2.734437332913083e-05, "loss": 2.2814, "step": 15430 }, { "epoch": 0.197632, "grad_norm": 1.5078125, "learning_rate": 2.734092353614553e-05, "loss": 2.2985, "step": 15440 }, { "epoch": 0.19776, "grad_norm": 1.5234375, "learning_rate": 2.7337471721797863e-05, "loss": 2.3121, "step": 15450 }, { "epoch": 0.197888, "grad_norm": 1.5, "learning_rate": 2.7334017886653228e-05, "loss": 2.28, "step": 15460 }, { "epoch": 0.198016, "grad_norm": 1.5234375, "learning_rate": 2.7330562031277327e-05, "loss": 2.3258, "step": 15470 }, { "epoch": 0.198144, "grad_norm": 1.5390625, "learning_rate": 2.7327104156236212e-05, "loss": 2.3155, "step": 15480 }, { "epoch": 0.198272, "grad_norm": 1.59375, "learning_rate": 2.732364426209626e-05, "loss": 2.2964, "step": 15490 }, { "epoch": 0.1984, "grad_norm": 1.5390625, "learning_rate": 2.7320182349424185e-05, "loss": 2.2892, "step": 15500 }, { "epoch": 0.198528, "grad_norm": 1.5078125, "learning_rate": 2.7316718418787012e-05, "loss": 2.2921, "step": 15510 }, { "epoch": 0.198656, "grad_norm": 1.578125, "learning_rate": 2.731325247075212e-05, "loss": 2.3056, "step": 15520 }, { "epoch": 0.198784, "grad_norm": 1.6015625, "learning_rate": 2.730978450588721e-05, "loss": 2.2545, "step": 15530 }, { "epoch": 0.198912, "grad_norm": 1.6875, "learning_rate": 2.7306314524760304e-05, "loss": 2.3066, "step": 15540 }, { "epoch": 0.19904, "grad_norm": 1.6875, "learning_rate": 2.7302842527939766e-05, "loss": 2.3192, "step": 15550 }, { "epoch": 0.199168, "grad_norm": 1.515625, "learning_rate": 2.729936851599429e-05, "loss": 2.3188, "step": 15560 }, { "epoch": 0.199296, "grad_norm": 1.5625, "learning_rate": 2.7295892489492895e-05, "loss": 2.3062, "step": 15570 }, { "epoch": 0.199424, "grad_norm": 4.71875, "learning_rate": 2.7292414449004923e-05, "loss": 2.3347, "step": 15580 }, { "epoch": 0.199552, "grad_norm": 1.5, "learning_rate": 2.728893439510007e-05, "loss": 2.3034, "step": 15590 }, { "epoch": 0.19968, "grad_norm": 1.671875, "learning_rate": 2.7285452328348324e-05, "loss": 2.3311, "step": 15600 }, { "epoch": 0.199808, "grad_norm": 1.390625, "learning_rate": 2.7281968249320048e-05, "loss": 2.2836, "step": 15610 }, { "epoch": 0.199936, "grad_norm": 1.4375, "learning_rate": 2.7278482158585897e-05, "loss": 2.3148, "step": 15620 }, { "epoch": 0.200064, "grad_norm": 1.5390625, "learning_rate": 2.727499405671687e-05, "loss": 2.3119, "step": 15630 }, { "epoch": 0.200192, "grad_norm": 2.484375, "learning_rate": 2.7271503944284303e-05, "loss": 2.3174, "step": 15640 }, { "epoch": 0.20032, "grad_norm": 1.484375, "learning_rate": 2.7268011821859854e-05, "loss": 2.298, "step": 15650 }, { "epoch": 0.200448, "grad_norm": 1.40625, "learning_rate": 2.7264517690015498e-05, "loss": 2.282, "step": 15660 }, { "epoch": 0.200576, "grad_norm": 1.4921875, "learning_rate": 2.7261021549323563e-05, "loss": 2.3203, "step": 15670 }, { "epoch": 0.200704, "grad_norm": 1.4609375, "learning_rate": 2.725752340035668e-05, "loss": 2.3206, "step": 15680 }, { "epoch": 0.200832, "grad_norm": 1.625, "learning_rate": 2.7254023243687838e-05, "loss": 2.2919, "step": 15690 }, { "epoch": 0.20096, "grad_norm": 1.4296875, "learning_rate": 2.7250521079890335e-05, "loss": 2.2806, "step": 15700 }, { "epoch": 0.201088, "grad_norm": 1.46875, "learning_rate": 2.7247016909537798e-05, "loss": 2.3041, "step": 15710 }, { "epoch": 0.201216, "grad_norm": 1.5234375, "learning_rate": 2.724351073320419e-05, "loss": 2.3187, "step": 15720 }, { "epoch": 0.201344, "grad_norm": 1.5625, "learning_rate": 2.72400025514638e-05, "loss": 2.3141, "step": 15730 }, { "epoch": 0.201472, "grad_norm": 1.609375, "learning_rate": 2.7236492364891247e-05, "loss": 2.3017, "step": 15740 }, { "epoch": 0.2016, "grad_norm": 1.78125, "learning_rate": 2.7232980174061476e-05, "loss": 2.3203, "step": 15750 }, { "epoch": 0.201728, "grad_norm": 1.4375, "learning_rate": 2.7229465979549753e-05, "loss": 2.303, "step": 15760 }, { "epoch": 0.201856, "grad_norm": 1.515625, "learning_rate": 2.722594978193169e-05, "loss": 2.2727, "step": 15770 }, { "epoch": 0.201984, "grad_norm": 1.4765625, "learning_rate": 2.722243158178321e-05, "loss": 2.2991, "step": 15780 }, { "epoch": 0.202112, "grad_norm": 1.6484375, "learning_rate": 2.721891137968058e-05, "loss": 2.3089, "step": 15790 }, { "epoch": 0.20224, "grad_norm": 1.6015625, "learning_rate": 2.721538917620038e-05, "loss": 2.3138, "step": 15800 }, { "epoch": 0.202368, "grad_norm": 1.765625, "learning_rate": 2.721186497191952e-05, "loss": 2.2993, "step": 15810 }, { "epoch": 0.202496, "grad_norm": 1.625, "learning_rate": 2.7208338767415247e-05, "loss": 2.2945, "step": 15820 }, { "epoch": 0.202624, "grad_norm": 1.7109375, "learning_rate": 2.7204810563265134e-05, "loss": 2.2925, "step": 15830 }, { "epoch": 0.202752, "grad_norm": 1.6171875, "learning_rate": 2.7201280360047067e-05, "loss": 2.2985, "step": 15840 }, { "epoch": 0.20288, "grad_norm": 1.5546875, "learning_rate": 2.719774815833928e-05, "loss": 2.3039, "step": 15850 }, { "epoch": 0.203008, "grad_norm": 1.4609375, "learning_rate": 2.7194213958720316e-05, "loss": 2.3089, "step": 15860 }, { "epoch": 0.203136, "grad_norm": 2.171875, "learning_rate": 2.7190677761769067e-05, "loss": 2.3263, "step": 15870 }, { "epoch": 0.203264, "grad_norm": 1.6015625, "learning_rate": 2.7187139568064722e-05, "loss": 2.2715, "step": 15880 }, { "epoch": 0.203392, "grad_norm": 1.46875, "learning_rate": 2.7183599378186823e-05, "loss": 2.3094, "step": 15890 }, { "epoch": 0.20352, "grad_norm": 1.515625, "learning_rate": 2.7180057192715232e-05, "loss": 2.3253, "step": 15900 }, { "epoch": 0.203648, "grad_norm": 1.5859375, "learning_rate": 2.7176513012230128e-05, "loss": 2.312, "step": 15910 }, { "epoch": 0.203776, "grad_norm": 1.5625, "learning_rate": 2.7172966837312034e-05, "loss": 2.2971, "step": 15920 }, { "epoch": 0.203904, "grad_norm": 1.6640625, "learning_rate": 2.716941866854178e-05, "loss": 2.2988, "step": 15930 }, { "epoch": 0.204032, "grad_norm": 1.609375, "learning_rate": 2.7165868506500545e-05, "loss": 2.3052, "step": 15940 }, { "epoch": 0.20416, "grad_norm": 2.046875, "learning_rate": 2.7162316351769815e-05, "loss": 2.3062, "step": 15950 }, { "epoch": 0.204288, "grad_norm": 1.5546875, "learning_rate": 2.7158762204931403e-05, "loss": 2.2962, "step": 15960 }, { "epoch": 0.204416, "grad_norm": 1.6875, "learning_rate": 2.7155206066567466e-05, "loss": 2.3175, "step": 15970 }, { "epoch": 0.204544, "grad_norm": 1.5859375, "learning_rate": 2.7151647937260474e-05, "loss": 2.2976, "step": 15980 }, { "epoch": 0.204672, "grad_norm": 1.546875, "learning_rate": 2.714808781759322e-05, "loss": 2.3231, "step": 15990 }, { "epoch": 0.2048, "grad_norm": 1.46875, "learning_rate": 2.7144525708148837e-05, "loss": 2.3144, "step": 16000 }, { "epoch": 0.204928, "grad_norm": 1.5, "learning_rate": 2.7140961609510763e-05, "loss": 2.2784, "step": 16010 }, { "epoch": 0.205056, "grad_norm": 2.109375, "learning_rate": 2.7137395522262784e-05, "loss": 2.2745, "step": 16020 }, { "epoch": 0.205184, "grad_norm": 1.4765625, "learning_rate": 2.7133827446988998e-05, "loss": 2.3095, "step": 16030 }, { "epoch": 0.205312, "grad_norm": 1.6796875, "learning_rate": 2.7130257384273833e-05, "loss": 2.3218, "step": 16040 }, { "epoch": 0.20544, "grad_norm": 1.6484375, "learning_rate": 2.712668533470204e-05, "loss": 2.3036, "step": 16050 }, { "epoch": 0.205568, "grad_norm": 1.4375, "learning_rate": 2.71231112988587e-05, "loss": 2.2688, "step": 16060 }, { "epoch": 0.205696, "grad_norm": 1.4765625, "learning_rate": 2.7119535277329218e-05, "loss": 2.2841, "step": 16070 }, { "epoch": 0.205824, "grad_norm": 1.484375, "learning_rate": 2.711595727069932e-05, "loss": 2.2846, "step": 16080 }, { "epoch": 0.205952, "grad_norm": 1.5859375, "learning_rate": 2.7112377279555064e-05, "loss": 2.3055, "step": 16090 }, { "epoch": 0.20608, "grad_norm": 2.5625, "learning_rate": 2.7108795304482815e-05, "loss": 2.2759, "step": 16100 }, { "epoch": 0.206208, "grad_norm": 1.4375, "learning_rate": 2.71052113460693e-05, "loss": 2.3141, "step": 16110 }, { "epoch": 0.206336, "grad_norm": 1.4140625, "learning_rate": 2.7101625404901525e-05, "loss": 2.2913, "step": 16120 }, { "epoch": 0.206464, "grad_norm": 1.53125, "learning_rate": 2.7098037481566862e-05, "loss": 2.2784, "step": 16130 }, { "epoch": 0.206592, "grad_norm": 1.5703125, "learning_rate": 2.709444757665298e-05, "loss": 2.302, "step": 16140 }, { "epoch": 0.20672, "grad_norm": 1.59375, "learning_rate": 2.709085569074788e-05, "loss": 2.3004, "step": 16150 }, { "epoch": 0.206848, "grad_norm": 1.6171875, "learning_rate": 2.7087261824439896e-05, "loss": 2.2891, "step": 16160 }, { "epoch": 0.206976, "grad_norm": 1.609375, "learning_rate": 2.7083665978317674e-05, "loss": 2.2839, "step": 16170 }, { "epoch": 0.207104, "grad_norm": 1.46875, "learning_rate": 2.7080068152970197e-05, "loss": 2.2857, "step": 16180 }, { "epoch": 0.207232, "grad_norm": 1.546875, "learning_rate": 2.7076468348986756e-05, "loss": 2.2732, "step": 16190 }, { "epoch": 0.20736, "grad_norm": 1.5, "learning_rate": 2.7072866566956983e-05, "loss": 2.2838, "step": 16200 }, { "epoch": 0.207488, "grad_norm": 1.4921875, "learning_rate": 2.706926280747082e-05, "loss": 2.3032, "step": 16210 }, { "epoch": 0.207616, "grad_norm": 1.46875, "learning_rate": 2.7065657071118546e-05, "loss": 2.269, "step": 16220 }, { "epoch": 0.207744, "grad_norm": 1.71875, "learning_rate": 2.7062049358490754e-05, "loss": 2.2946, "step": 16230 }, { "epoch": 0.207872, "grad_norm": 2.484375, "learning_rate": 2.705843967017836e-05, "loss": 2.2978, "step": 16240 }, { "epoch": 0.208, "grad_norm": 1.65625, "learning_rate": 2.705482800677261e-05, "loss": 2.3057, "step": 16250 }, { "epoch": 0.208128, "grad_norm": 1.5, "learning_rate": 2.7051214368865072e-05, "loss": 2.2989, "step": 16260 }, { "epoch": 0.208256, "grad_norm": 1.9609375, "learning_rate": 2.7047598757047636e-05, "loss": 2.2784, "step": 16270 }, { "epoch": 0.208384, "grad_norm": 1.71875, "learning_rate": 2.7043981171912518e-05, "loss": 2.2832, "step": 16280 }, { "epoch": 0.208512, "grad_norm": 1.65625, "learning_rate": 2.704036161405225e-05, "loss": 2.2733, "step": 16290 }, { "epoch": 0.20864, "grad_norm": 1.4140625, "learning_rate": 2.703674008405969e-05, "loss": 2.3243, "step": 16300 }, { "epoch": 0.208768, "grad_norm": 1.53125, "learning_rate": 2.7033116582528025e-05, "loss": 2.3038, "step": 16310 }, { "epoch": 0.208896, "grad_norm": 1.53125, "learning_rate": 2.7029491110050764e-05, "loss": 2.2731, "step": 16320 }, { "epoch": 0.209024, "grad_norm": 1.546875, "learning_rate": 2.7025863667221727e-05, "loss": 2.2832, "step": 16330 }, { "epoch": 0.209152, "grad_norm": 6.8125, "learning_rate": 2.7022234254635075e-05, "loss": 2.3043, "step": 16340 }, { "epoch": 0.20928, "grad_norm": 1.625, "learning_rate": 2.7018602872885275e-05, "loss": 2.295, "step": 16350 }, { "epoch": 0.209408, "grad_norm": 1.4765625, "learning_rate": 2.7014969522567124e-05, "loss": 2.297, "step": 16360 }, { "epoch": 0.209536, "grad_norm": 8.125, "learning_rate": 2.7011334204275747e-05, "loss": 2.2897, "step": 16370 }, { "epoch": 0.209664, "grad_norm": 1.875, "learning_rate": 2.700769691860658e-05, "loss": 2.2976, "step": 16380 }, { "epoch": 0.209792, "grad_norm": 1.65625, "learning_rate": 2.700405766615538e-05, "loss": 2.3094, "step": 16390 }, { "epoch": 0.20992, "grad_norm": 1.625, "learning_rate": 2.7000416447518247e-05, "loss": 2.3139, "step": 16400 }, { "epoch": 0.210048, "grad_norm": 1.484375, "learning_rate": 2.6996773263291584e-05, "loss": 2.2662, "step": 16410 }, { "epoch": 0.210176, "grad_norm": 1.453125, "learning_rate": 2.6993128114072116e-05, "loss": 2.3009, "step": 16420 }, { "epoch": 0.210304, "grad_norm": 1.46875, "learning_rate": 2.6989481000456897e-05, "loss": 2.3162, "step": 16430 }, { "epoch": 0.210432, "grad_norm": 1.46875, "learning_rate": 2.69858319230433e-05, "loss": 2.3126, "step": 16440 }, { "epoch": 0.21056, "grad_norm": 1.46875, "learning_rate": 2.6982180882429017e-05, "loss": 2.327, "step": 16450 }, { "epoch": 0.210688, "grad_norm": 1.4921875, "learning_rate": 2.697852787921207e-05, "loss": 2.284, "step": 16460 }, { "epoch": 0.210816, "grad_norm": 1.5234375, "learning_rate": 2.69748729139908e-05, "loss": 2.2785, "step": 16470 }, { "epoch": 0.210944, "grad_norm": 1.6171875, "learning_rate": 2.6971215987363855e-05, "loss": 2.2655, "step": 16480 }, { "epoch": 0.211072, "grad_norm": 2.15625, "learning_rate": 2.696755709993022e-05, "loss": 2.2925, "step": 16490 }, { "epoch": 0.2112, "grad_norm": 1.5703125, "learning_rate": 2.6963896252289207e-05, "loss": 2.3021, "step": 16500 }, { "epoch": 0.211328, "grad_norm": 1.515625, "learning_rate": 2.696023344504042e-05, "loss": 2.2887, "step": 16510 }, { "epoch": 0.211456, "grad_norm": 1.546875, "learning_rate": 2.6956568678783823e-05, "loss": 2.3156, "step": 16520 }, { "epoch": 0.211584, "grad_norm": 1.5078125, "learning_rate": 2.6952901954119664e-05, "loss": 2.3399, "step": 16530 }, { "epoch": 0.211712, "grad_norm": 1.578125, "learning_rate": 2.694923327164854e-05, "loss": 2.3183, "step": 16540 }, { "epoch": 0.21184, "grad_norm": 1.6328125, "learning_rate": 2.694556263197135e-05, "loss": 2.3, "step": 16550 }, { "epoch": 0.211968, "grad_norm": 1.4765625, "learning_rate": 2.6941890035689326e-05, "loss": 2.3155, "step": 16560 }, { "epoch": 0.212096, "grad_norm": 1.3984375, "learning_rate": 2.6938215483404007e-05, "loss": 2.3241, "step": 16570 }, { "epoch": 0.212224, "grad_norm": 1.640625, "learning_rate": 2.6934538975717267e-05, "loss": 2.2988, "step": 16580 }, { "epoch": 0.212352, "grad_norm": 1.609375, "learning_rate": 2.6930860513231297e-05, "loss": 2.2883, "step": 16590 }, { "epoch": 0.21248, "grad_norm": 1.5390625, "learning_rate": 2.69271800965486e-05, "loss": 2.2967, "step": 16600 }, { "epoch": 0.212608, "grad_norm": 1.671875, "learning_rate": 2.6923497726272005e-05, "loss": 2.3144, "step": 16610 }, { "epoch": 0.212736, "grad_norm": 1.6171875, "learning_rate": 2.6919813403004658e-05, "loss": 2.3196, "step": 16620 }, { "epoch": 0.212864, "grad_norm": 1.578125, "learning_rate": 2.691612712735003e-05, "loss": 2.2667, "step": 16630 }, { "epoch": 0.212992, "grad_norm": 1.921875, "learning_rate": 2.6912438899911907e-05, "loss": 2.3216, "step": 16640 }, { "epoch": 0.21312, "grad_norm": 1.453125, "learning_rate": 2.69087487212944e-05, "loss": 2.3356, "step": 16650 }, { "epoch": 0.213248, "grad_norm": 1.5, "learning_rate": 2.6905056592101933e-05, "loss": 2.3062, "step": 16660 }, { "epoch": 0.213376, "grad_norm": 1.5703125, "learning_rate": 2.690136251293925e-05, "loss": 2.2807, "step": 16670 }, { "epoch": 0.213504, "grad_norm": 1.53125, "learning_rate": 2.6897666484411422e-05, "loss": 2.3161, "step": 16680 }, { "epoch": 0.213632, "grad_norm": 1.59375, "learning_rate": 2.689396850712383e-05, "loss": 2.3131, "step": 16690 }, { "epoch": 0.21376, "grad_norm": 1.546875, "learning_rate": 2.6890268581682185e-05, "loss": 2.2808, "step": 16700 }, { "epoch": 0.213888, "grad_norm": 1.453125, "learning_rate": 2.6886566708692502e-05, "loss": 2.3149, "step": 16710 }, { "epoch": 0.214016, "grad_norm": 1.6171875, "learning_rate": 2.6882862888761127e-05, "loss": 2.2879, "step": 16720 }, { "epoch": 0.214144, "grad_norm": 1.6015625, "learning_rate": 2.6879157122494728e-05, "loss": 2.314, "step": 16730 }, { "epoch": 0.214272, "grad_norm": 1.546875, "learning_rate": 2.6875449410500272e-05, "loss": 2.3446, "step": 16740 }, { "epoch": 0.2144, "grad_norm": 1.5703125, "learning_rate": 2.6871739753385068e-05, "loss": 2.3122, "step": 16750 }, { "epoch": 0.214528, "grad_norm": 1.6875, "learning_rate": 2.686802815175673e-05, "loss": 2.2679, "step": 16760 }, { "epoch": 0.214656, "grad_norm": 1.8125, "learning_rate": 2.68643146062232e-05, "loss": 2.3134, "step": 16770 }, { "epoch": 0.214784, "grad_norm": 1.421875, "learning_rate": 2.686059911739272e-05, "loss": 2.3023, "step": 16780 }, { "epoch": 0.214912, "grad_norm": 1.4375, "learning_rate": 2.6856881685873875e-05, "loss": 2.3072, "step": 16790 }, { "epoch": 0.21504, "grad_norm": 1.7578125, "learning_rate": 2.6853162312275547e-05, "loss": 2.2893, "step": 16800 }, { "epoch": 0.215168, "grad_norm": 1.546875, "learning_rate": 2.684944099720695e-05, "loss": 2.2913, "step": 16810 }, { "epoch": 0.215296, "grad_norm": 1.4765625, "learning_rate": 2.6845717741277607e-05, "loss": 2.2865, "step": 16820 }, { "epoch": 0.215424, "grad_norm": 1.7265625, "learning_rate": 2.6841992545097373e-05, "loss": 2.3008, "step": 16830 }, { "epoch": 0.215552, "grad_norm": 1.4765625, "learning_rate": 2.6838265409276397e-05, "loss": 2.2811, "step": 16840 }, { "epoch": 0.21568, "grad_norm": 1.390625, "learning_rate": 2.683453633442517e-05, "loss": 2.3202, "step": 16850 }, { "epoch": 0.215808, "grad_norm": 1.46875, "learning_rate": 2.6830805321154478e-05, "loss": 2.3125, "step": 16860 }, { "epoch": 0.215936, "grad_norm": 1.703125, "learning_rate": 2.6827072370075453e-05, "loss": 2.3087, "step": 16870 }, { "epoch": 0.216064, "grad_norm": 1.6953125, "learning_rate": 2.6823337481799515e-05, "loss": 2.3192, "step": 16880 }, { "epoch": 0.216192, "grad_norm": 1.5234375, "learning_rate": 2.6819600656938418e-05, "loss": 2.3348, "step": 16890 }, { "epoch": 0.21632, "grad_norm": 1.4375, "learning_rate": 2.681586189610423e-05, "loss": 2.3191, "step": 16900 }, { "epoch": 0.216448, "grad_norm": 1.6484375, "learning_rate": 2.681212119990934e-05, "loss": 2.3268, "step": 16910 }, { "epoch": 0.216576, "grad_norm": 1.4453125, "learning_rate": 2.680837856896644e-05, "loss": 2.3104, "step": 16920 }, { "epoch": 0.216704, "grad_norm": 1.5703125, "learning_rate": 2.6804634003888556e-05, "loss": 2.278, "step": 16930 }, { "epoch": 0.216832, "grad_norm": 1.5234375, "learning_rate": 2.680088750528902e-05, "loss": 2.2676, "step": 16940 }, { "epoch": 0.21696, "grad_norm": 1.625, "learning_rate": 2.6797139073781484e-05, "loss": 2.3104, "step": 16950 }, { "epoch": 0.217088, "grad_norm": 1.5546875, "learning_rate": 2.6793388709979913e-05, "loss": 2.2934, "step": 16960 }, { "epoch": 0.217216, "grad_norm": 1.546875, "learning_rate": 2.6789636414498602e-05, "loss": 2.2714, "step": 16970 }, { "epoch": 0.217344, "grad_norm": 1.515625, "learning_rate": 2.6785882187952143e-05, "loss": 2.3161, "step": 16980 }, { "epoch": 0.217472, "grad_norm": 1.578125, "learning_rate": 2.6782126030955456e-05, "loss": 2.291, "step": 16990 }, { "epoch": 0.2176, "grad_norm": 1.9375, "learning_rate": 2.6778367944123778e-05, "loss": 2.2863, "step": 17000 }, { "epoch": 0.217728, "grad_norm": 1.6484375, "learning_rate": 2.6774607928072657e-05, "loss": 2.2935, "step": 17010 }, { "epoch": 0.217856, "grad_norm": 1.5625, "learning_rate": 2.6770845983417958e-05, "loss": 2.303, "step": 17020 }, { "epoch": 0.217984, "grad_norm": 1.5, "learning_rate": 2.6767082110775864e-05, "loss": 2.2753, "step": 17030 }, { "epoch": 0.218112, "grad_norm": 1.46875, "learning_rate": 2.6763316310762872e-05, "loss": 2.3034, "step": 17040 }, { "epoch": 0.21824, "grad_norm": 1.9609375, "learning_rate": 2.6759548583995795e-05, "loss": 2.2993, "step": 17050 }, { "epoch": 0.218368, "grad_norm": 1.625, "learning_rate": 2.6755778931091767e-05, "loss": 2.2742, "step": 17060 }, { "epoch": 0.218496, "grad_norm": 1.8671875, "learning_rate": 2.675200735266822e-05, "loss": 2.2724, "step": 17070 }, { "epoch": 0.218624, "grad_norm": 1.5546875, "learning_rate": 2.6748233849342927e-05, "loss": 2.3237, "step": 17080 }, { "epoch": 0.218752, "grad_norm": 1.453125, "learning_rate": 2.674445842173396e-05, "loss": 2.2961, "step": 17090 }, { "epoch": 0.21888, "grad_norm": 3.765625, "learning_rate": 2.67406810704597e-05, "loss": 2.3011, "step": 17100 }, { "epoch": 0.219008, "grad_norm": 1.609375, "learning_rate": 2.673690179613887e-05, "loss": 2.3047, "step": 17110 }, { "epoch": 0.219136, "grad_norm": 1.6484375, "learning_rate": 2.6733120599390476e-05, "loss": 2.3325, "step": 17120 }, { "epoch": 0.219264, "grad_norm": 1.6796875, "learning_rate": 2.6729337480833854e-05, "loss": 2.3173, "step": 17130 }, { "epoch": 0.219392, "grad_norm": 1.4453125, "learning_rate": 2.6725552441088656e-05, "loss": 2.3036, "step": 17140 }, { "epoch": 0.21952, "grad_norm": 1.5390625, "learning_rate": 2.672176548077485e-05, "loss": 2.2982, "step": 17150 }, { "epoch": 0.219648, "grad_norm": 1.8359375, "learning_rate": 2.6717976600512714e-05, "loss": 2.2739, "step": 17160 }, { "epoch": 0.219776, "grad_norm": 1.375, "learning_rate": 2.6714185800922843e-05, "loss": 2.2993, "step": 17170 }, { "epoch": 0.219904, "grad_norm": 1.578125, "learning_rate": 2.6710393082626138e-05, "loss": 2.3067, "step": 17180 }, { "epoch": 0.220032, "grad_norm": 1.59375, "learning_rate": 2.6706598446243832e-05, "loss": 2.2629, "step": 17190 }, { "epoch": 0.22016, "grad_norm": 1.765625, "learning_rate": 2.6702801892397454e-05, "loss": 2.2901, "step": 17200 }, { "epoch": 0.220288, "grad_norm": 1.6015625, "learning_rate": 2.6699003421708856e-05, "loss": 2.3243, "step": 17210 }, { "epoch": 0.220416, "grad_norm": 1.5390625, "learning_rate": 2.6695203034800203e-05, "loss": 2.333, "step": 17220 }, { "epoch": 0.220544, "grad_norm": 1.6328125, "learning_rate": 2.6691400732293973e-05, "loss": 2.3036, "step": 17230 }, { "epoch": 0.220672, "grad_norm": 1.578125, "learning_rate": 2.6687596514812962e-05, "loss": 2.2953, "step": 17240 }, { "epoch": 0.2208, "grad_norm": 1.5, "learning_rate": 2.668379038298027e-05, "loss": 2.2907, "step": 17250 }, { "epoch": 0.220928, "grad_norm": 1.4609375, "learning_rate": 2.667998233741932e-05, "loss": 2.3003, "step": 17260 }, { "epoch": 0.221056, "grad_norm": 1.4921875, "learning_rate": 2.6676172378753846e-05, "loss": 2.2967, "step": 17270 }, { "epoch": 0.221184, "grad_norm": 1.5078125, "learning_rate": 2.667236050760789e-05, "loss": 2.3085, "step": 17280 }, { "epoch": 0.221312, "grad_norm": 1.5859375, "learning_rate": 2.6668546724605817e-05, "loss": 2.3074, "step": 17290 }, { "epoch": 0.22144, "grad_norm": 1.5546875, "learning_rate": 2.666473103037229e-05, "loss": 2.2876, "step": 17300 }, { "epoch": 0.221568, "grad_norm": 1.78125, "learning_rate": 2.6660913425532307e-05, "loss": 2.2795, "step": 17310 }, { "epoch": 0.221696, "grad_norm": 1.671875, "learning_rate": 2.6657093910711156e-05, "loss": 2.2987, "step": 17320 }, { "epoch": 0.221824, "grad_norm": 1.453125, "learning_rate": 2.665327248653446e-05, "loss": 2.2977, "step": 17330 }, { "epoch": 0.221952, "grad_norm": 1.5234375, "learning_rate": 2.6649449153628134e-05, "loss": 2.3482, "step": 17340 }, { "epoch": 0.22208, "grad_norm": 1.5390625, "learning_rate": 2.6645623912618414e-05, "loss": 2.3002, "step": 17350 }, { "epoch": 0.222208, "grad_norm": 1.546875, "learning_rate": 2.6641796764131856e-05, "loss": 2.2877, "step": 17360 }, { "epoch": 0.222336, "grad_norm": 1.6953125, "learning_rate": 2.663796770879532e-05, "loss": 2.3196, "step": 17370 }, { "epoch": 0.222464, "grad_norm": 2.03125, "learning_rate": 2.6634136747235978e-05, "loss": 2.2865, "step": 17380 }, { "epoch": 0.222592, "grad_norm": 1.59375, "learning_rate": 2.6630303880081313e-05, "loss": 2.2909, "step": 17390 }, { "epoch": 0.22272, "grad_norm": 1.578125, "learning_rate": 2.662646910795913e-05, "loss": 2.2851, "step": 17400 }, { "epoch": 0.222848, "grad_norm": 1.53125, "learning_rate": 2.6622632431497542e-05, "loss": 2.2951, "step": 17410 }, { "epoch": 0.222976, "grad_norm": 1.7109375, "learning_rate": 2.6618793851324962e-05, "loss": 2.2823, "step": 17420 }, { "epoch": 0.223104, "grad_norm": 1.6796875, "learning_rate": 2.661495336807013e-05, "loss": 2.3312, "step": 17430 }, { "epoch": 0.223232, "grad_norm": 1.515625, "learning_rate": 2.661111098236209e-05, "loss": 2.2934, "step": 17440 }, { "epoch": 0.22336, "grad_norm": 1.421875, "learning_rate": 2.6607266694830202e-05, "loss": 2.3197, "step": 17450 }, { "epoch": 0.223488, "grad_norm": 1.4765625, "learning_rate": 2.6603420506104132e-05, "loss": 2.2942, "step": 17460 }, { "epoch": 0.223616, "grad_norm": 1.6953125, "learning_rate": 2.6599572416813863e-05, "loss": 2.3092, "step": 17470 }, { "epoch": 0.223744, "grad_norm": 1.6796875, "learning_rate": 2.6595722427589686e-05, "loss": 2.2863, "step": 17480 }, { "epoch": 0.223872, "grad_norm": 1.6171875, "learning_rate": 2.6591870539062206e-05, "loss": 2.2814, "step": 17490 }, { "epoch": 0.224, "grad_norm": 1.453125, "learning_rate": 2.6588016751862335e-05, "loss": 2.2721, "step": 17500 }, { "epoch": 0.224128, "grad_norm": 1.625, "learning_rate": 2.6584161066621295e-05, "loss": 2.32, "step": 17510 }, { "epoch": 0.224256, "grad_norm": 2.0, "learning_rate": 2.6580303483970626e-05, "loss": 2.2594, "step": 17520 }, { "epoch": 0.224384, "grad_norm": 1.5546875, "learning_rate": 2.6576444004542176e-05, "loss": 2.2979, "step": 17530 }, { "epoch": 0.224512, "grad_norm": 1.4921875, "learning_rate": 2.6572582628968097e-05, "loss": 2.2976, "step": 17540 }, { "epoch": 0.22464, "grad_norm": 1.65625, "learning_rate": 2.6568719357880864e-05, "loss": 2.2675, "step": 17550 }, { "epoch": 0.224768, "grad_norm": 1.7578125, "learning_rate": 2.6564854191913245e-05, "loss": 2.2828, "step": 17560 }, { "epoch": 0.224896, "grad_norm": 1.6171875, "learning_rate": 2.6560987131698343e-05, "loss": 2.3374, "step": 17570 }, { "epoch": 0.225024, "grad_norm": 1.53125, "learning_rate": 2.6557118177869542e-05, "loss": 2.3013, "step": 17580 }, { "epoch": 0.225152, "grad_norm": 1.4921875, "learning_rate": 2.6553247331060565e-05, "loss": 2.3078, "step": 17590 }, { "epoch": 0.22528, "grad_norm": 1.515625, "learning_rate": 2.6549374591905423e-05, "loss": 2.309, "step": 17600 }, { "epoch": 0.225408, "grad_norm": 1.5859375, "learning_rate": 2.6545499961038448e-05, "loss": 2.2963, "step": 17610 }, { "epoch": 0.225536, "grad_norm": 1.5078125, "learning_rate": 2.6541623439094277e-05, "loss": 2.2806, "step": 17620 }, { "epoch": 0.225664, "grad_norm": 1.4453125, "learning_rate": 2.6537745026707862e-05, "loss": 2.2899, "step": 17630 }, { "epoch": 0.225792, "grad_norm": 1.5078125, "learning_rate": 2.653386472451446e-05, "loss": 2.2815, "step": 17640 }, { "epoch": 0.22592, "grad_norm": 1.8125, "learning_rate": 2.652998253314964e-05, "loss": 2.2824, "step": 17650 }, { "epoch": 0.226048, "grad_norm": 1.4921875, "learning_rate": 2.652609845324928e-05, "loss": 2.2931, "step": 17660 }, { "epoch": 0.226176, "grad_norm": 1.625, "learning_rate": 2.6522212485449562e-05, "loss": 2.2363, "step": 17670 }, { "epoch": 0.226304, "grad_norm": 1.546875, "learning_rate": 2.651832463038699e-05, "loss": 2.2856, "step": 17680 }, { "epoch": 0.226432, "grad_norm": 1.4765625, "learning_rate": 2.6514434888698357e-05, "loss": 2.2735, "step": 17690 }, { "epoch": 0.22656, "grad_norm": 2.0, "learning_rate": 2.651054326102079e-05, "loss": 2.3003, "step": 17700 }, { "epoch": 0.226688, "grad_norm": 1.515625, "learning_rate": 2.6506649747991708e-05, "loss": 2.2741, "step": 17710 }, { "epoch": 0.226816, "grad_norm": 1.5390625, "learning_rate": 2.650275435024884e-05, "loss": 2.2873, "step": 17720 }, { "epoch": 0.226944, "grad_norm": 1.4765625, "learning_rate": 2.6498857068430227e-05, "loss": 2.3129, "step": 17730 }, { "epoch": 0.227072, "grad_norm": 1.5625, "learning_rate": 2.6494957903174222e-05, "loss": 2.3199, "step": 17740 }, { "epoch": 0.2272, "grad_norm": 1.71875, "learning_rate": 2.649105685511948e-05, "loss": 2.2899, "step": 17750 }, { "epoch": 0.227328, "grad_norm": 1.5546875, "learning_rate": 2.6487153924904966e-05, "loss": 2.3099, "step": 17760 }, { "epoch": 0.227456, "grad_norm": 1.5703125, "learning_rate": 2.6483249113169953e-05, "loss": 2.2608, "step": 17770 }, { "epoch": 0.227584, "grad_norm": 1.734375, "learning_rate": 2.6479342420554034e-05, "loss": 2.3103, "step": 17780 }, { "epoch": 0.227712, "grad_norm": 6.59375, "learning_rate": 2.6475433847697086e-05, "loss": 2.3399, "step": 17790 }, { "epoch": 0.22784, "grad_norm": 1.53125, "learning_rate": 2.6471523395239317e-05, "loss": 2.292, "step": 17800 }, { "epoch": 0.227968, "grad_norm": 1.6640625, "learning_rate": 2.6467611063821226e-05, "loss": 2.271, "step": 17810 }, { "epoch": 0.228096, "grad_norm": 1.578125, "learning_rate": 2.6463696854083634e-05, "loss": 2.286, "step": 17820 }, { "epoch": 0.228224, "grad_norm": 1.46875, "learning_rate": 2.6459780766667663e-05, "loss": 2.2872, "step": 17830 }, { "epoch": 0.228352, "grad_norm": 2.625, "learning_rate": 2.6455862802214738e-05, "loss": 2.3054, "step": 17840 }, { "epoch": 0.22848, "grad_norm": 5.09375, "learning_rate": 2.6451942961366594e-05, "loss": 2.3137, "step": 17850 }, { "epoch": 0.228608, "grad_norm": 1.4765625, "learning_rate": 2.6448021244765284e-05, "loss": 2.3073, "step": 17860 }, { "epoch": 0.228736, "grad_norm": 1.71875, "learning_rate": 2.644409765305315e-05, "loss": 2.27, "step": 17870 }, { "epoch": 0.228864, "grad_norm": 1.6640625, "learning_rate": 2.6440172186872858e-05, "loss": 2.3474, "step": 17880 }, { "epoch": 0.228992, "grad_norm": 1.5546875, "learning_rate": 2.643624484686737e-05, "loss": 2.2924, "step": 17890 }, { "epoch": 0.22912, "grad_norm": 1.5703125, "learning_rate": 2.6432315633679953e-05, "loss": 2.3231, "step": 17900 }, { "epoch": 0.229248, "grad_norm": 1.5078125, "learning_rate": 2.64283845479542e-05, "loss": 2.2739, "step": 17910 }, { "epoch": 0.229376, "grad_norm": 1.8359375, "learning_rate": 2.6424451590333985e-05, "loss": 2.3156, "step": 17920 }, { "epoch": 0.229504, "grad_norm": 1.734375, "learning_rate": 2.642051676146351e-05, "loss": 2.2998, "step": 17930 }, { "epoch": 0.229632, "grad_norm": 1.703125, "learning_rate": 2.6416580061987263e-05, "loss": 2.2403, "step": 17940 }, { "epoch": 0.22976, "grad_norm": 1.5703125, "learning_rate": 2.641264149255006e-05, "loss": 2.3084, "step": 17950 }, { "epoch": 0.229888, "grad_norm": 1.546875, "learning_rate": 2.6408701053797003e-05, "loss": 2.272, "step": 17960 }, { "epoch": 0.230016, "grad_norm": 1.5625, "learning_rate": 2.6404758746373524e-05, "loss": 2.2946, "step": 17970 }, { "epoch": 0.230144, "grad_norm": 1.484375, "learning_rate": 2.6400814570925334e-05, "loss": 2.2897, "step": 17980 }, { "epoch": 0.230272, "grad_norm": 1.578125, "learning_rate": 2.6396868528098473e-05, "loss": 2.2881, "step": 17990 }, { "epoch": 0.2304, "grad_norm": 1.484375, "learning_rate": 2.6392920618539268e-05, "loss": 2.2898, "step": 18000 }, { "epoch": 0.230528, "grad_norm": 1.5546875, "learning_rate": 2.6388970842894363e-05, "loss": 2.314, "step": 18010 }, { "epoch": 0.230656, "grad_norm": 1.5859375, "learning_rate": 2.6385019201810707e-05, "loss": 2.2825, "step": 18020 }, { "epoch": 0.230784, "grad_norm": 1.4453125, "learning_rate": 2.6381065695935556e-05, "loss": 2.3123, "step": 18030 }, { "epoch": 0.230912, "grad_norm": 1.6796875, "learning_rate": 2.637711032591646e-05, "loss": 2.252, "step": 18040 }, { "epoch": 0.23104, "grad_norm": 1.4921875, "learning_rate": 2.637315309240129e-05, "loss": 2.3117, "step": 18050 }, { "epoch": 0.231168, "grad_norm": 1.6015625, "learning_rate": 2.6369193996038215e-05, "loss": 2.2896, "step": 18060 }, { "epoch": 0.231296, "grad_norm": 1.484375, "learning_rate": 2.6365233037475705e-05, "loss": 2.3124, "step": 18070 }, { "epoch": 0.231424, "grad_norm": 1.8125, "learning_rate": 2.636127021736254e-05, "loss": 2.2838, "step": 18080 }, { "epoch": 0.231552, "grad_norm": 1.46875, "learning_rate": 2.6357305536347808e-05, "loss": 2.2744, "step": 18090 }, { "epoch": 0.23168, "grad_norm": 1.5703125, "learning_rate": 2.6353338995080894e-05, "loss": 2.2853, "step": 18100 }, { "epoch": 0.231808, "grad_norm": 1.4375, "learning_rate": 2.634937059421149e-05, "loss": 2.3006, "step": 18110 }, { "epoch": 0.231936, "grad_norm": 1.546875, "learning_rate": 2.6345400334389597e-05, "loss": 2.3018, "step": 18120 }, { "epoch": 0.232064, "grad_norm": 1.53125, "learning_rate": 2.6341428216265514e-05, "loss": 2.2681, "step": 18130 }, { "epoch": 0.232192, "grad_norm": 1.6484375, "learning_rate": 2.6337454240489853e-05, "loss": 2.2898, "step": 18140 }, { "epoch": 0.23232, "grad_norm": 1.5078125, "learning_rate": 2.6333478407713523e-05, "loss": 2.3143, "step": 18150 }, { "epoch": 0.232448, "grad_norm": 1.609375, "learning_rate": 2.632950071858774e-05, "loss": 2.3006, "step": 18160 }, { "epoch": 0.232576, "grad_norm": 1.7421875, "learning_rate": 2.6325521173764016e-05, "loss": 2.3029, "step": 18170 }, { "epoch": 0.232704, "grad_norm": 1.59375, "learning_rate": 2.632153977389419e-05, "loss": 2.2628, "step": 18180 }, { "epoch": 0.232832, "grad_norm": 1.578125, "learning_rate": 2.6317556519630373e-05, "loss": 2.2919, "step": 18190 }, { "epoch": 0.23296, "grad_norm": 1.53125, "learning_rate": 2.6313571411625004e-05, "loss": 2.3076, "step": 18200 }, { "epoch": 0.233088, "grad_norm": 1.5625, "learning_rate": 2.6309584450530817e-05, "loss": 2.2982, "step": 18210 }, { "epoch": 0.233216, "grad_norm": 1.5234375, "learning_rate": 2.630559563700085e-05, "loss": 2.2778, "step": 18220 }, { "epoch": 0.233344, "grad_norm": 1.578125, "learning_rate": 2.6301604971688445e-05, "loss": 2.3142, "step": 18230 }, { "epoch": 0.233472, "grad_norm": 1.53125, "learning_rate": 2.629761245524724e-05, "loss": 2.295, "step": 18240 }, { "epoch": 0.2336, "grad_norm": 1.6875, "learning_rate": 2.6293618088331192e-05, "loss": 2.3017, "step": 18250 }, { "epoch": 0.233728, "grad_norm": 2.75, "learning_rate": 2.6289621871594552e-05, "loss": 2.3059, "step": 18260 }, { "epoch": 0.233856, "grad_norm": 1.5, "learning_rate": 2.6285623805691872e-05, "loss": 2.2825, "step": 18270 }, { "epoch": 0.233984, "grad_norm": 1.5390625, "learning_rate": 2.6281623891278003e-05, "loss": 2.3066, "step": 18280 }, { "epoch": 0.234112, "grad_norm": 1.7109375, "learning_rate": 2.6277622129008113e-05, "loss": 2.3146, "step": 18290 }, { "epoch": 0.23424, "grad_norm": 1.578125, "learning_rate": 2.6273618519537658e-05, "loss": 2.3262, "step": 18300 }, { "epoch": 0.234368, "grad_norm": 1.5390625, "learning_rate": 2.6269613063522408e-05, "loss": 2.3156, "step": 18310 }, { "epoch": 0.234496, "grad_norm": 1.578125, "learning_rate": 2.6265605761618428e-05, "loss": 2.2887, "step": 18320 }, { "epoch": 0.234624, "grad_norm": 1.625, "learning_rate": 2.626159661448209e-05, "loss": 2.2791, "step": 18330 }, { "epoch": 0.234752, "grad_norm": 1.5078125, "learning_rate": 2.6257585622770064e-05, "loss": 2.3, "step": 18340 }, { "epoch": 0.23488, "grad_norm": 1.5078125, "learning_rate": 2.6253572787139324e-05, "loss": 2.3026, "step": 18350 }, { "epoch": 0.235008, "grad_norm": 1.609375, "learning_rate": 2.6249558108247143e-05, "loss": 2.2986, "step": 18360 }, { "epoch": 0.235136, "grad_norm": 1.5546875, "learning_rate": 2.6245541586751107e-05, "loss": 2.2946, "step": 18370 }, { "epoch": 0.235264, "grad_norm": 1.7734375, "learning_rate": 2.6241523223309095e-05, "loss": 2.2957, "step": 18380 }, { "epoch": 0.235392, "grad_norm": 1.453125, "learning_rate": 2.6237503018579277e-05, "loss": 2.3024, "step": 18390 }, { "epoch": 0.23552, "grad_norm": 1.4453125, "learning_rate": 2.6233480973220152e-05, "loss": 2.2934, "step": 18400 }, { "epoch": 0.235648, "grad_norm": 1.7109375, "learning_rate": 2.6229457087890493e-05, "loss": 2.2739, "step": 18410 }, { "epoch": 0.235776, "grad_norm": 1.390625, "learning_rate": 2.622543136324939e-05, "loss": 2.3145, "step": 18420 }, { "epoch": 0.235904, "grad_norm": 1.7734375, "learning_rate": 2.6221403799956233e-05, "loss": 2.2922, "step": 18430 }, { "epoch": 0.236032, "grad_norm": 1.6484375, "learning_rate": 2.6217374398670703e-05, "loss": 2.2926, "step": 18440 }, { "epoch": 0.23616, "grad_norm": 1.5390625, "learning_rate": 2.6213343160052797e-05, "loss": 2.313, "step": 18450 }, { "epoch": 0.236288, "grad_norm": 1.5, "learning_rate": 2.6209310084762805e-05, "loss": 2.2969, "step": 18460 }, { "epoch": 0.236416, "grad_norm": 1.4921875, "learning_rate": 2.6205275173461315e-05, "loss": 2.2874, "step": 18470 }, { "epoch": 0.236544, "grad_norm": 1.53125, "learning_rate": 2.6201238426809222e-05, "loss": 2.3005, "step": 18480 }, { "epoch": 0.236672, "grad_norm": 1.4375, "learning_rate": 2.619719984546771e-05, "loss": 2.3014, "step": 18490 }, { "epoch": 0.2368, "grad_norm": 1.5703125, "learning_rate": 2.6193159430098282e-05, "loss": 2.2905, "step": 18500 }, { "epoch": 0.236928, "grad_norm": 1.5390625, "learning_rate": 2.6189117181362733e-05, "loss": 2.2866, "step": 18510 }, { "epoch": 0.237056, "grad_norm": 1.640625, "learning_rate": 2.618507309992315e-05, "loss": 2.2878, "step": 18520 }, { "epoch": 0.237184, "grad_norm": 1.546875, "learning_rate": 2.6181027186441926e-05, "loss": 2.3089, "step": 18530 }, { "epoch": 0.237312, "grad_norm": 1.5546875, "learning_rate": 2.617697944158176e-05, "loss": 2.2863, "step": 18540 }, { "epoch": 0.23744, "grad_norm": 1.59375, "learning_rate": 2.6172929866005646e-05, "loss": 2.286, "step": 18550 }, { "epoch": 0.237568, "grad_norm": 2.171875, "learning_rate": 2.6168878460376877e-05, "loss": 2.2895, "step": 18560 }, { "epoch": 0.237696, "grad_norm": 1.546875, "learning_rate": 2.6164825225359044e-05, "loss": 2.3155, "step": 18570 }, { "epoch": 0.237824, "grad_norm": 2.15625, "learning_rate": 2.6160770161616044e-05, "loss": 2.2974, "step": 18580 }, { "epoch": 0.237952, "grad_norm": 1.53125, "learning_rate": 2.6156713269812064e-05, "loss": 2.3003, "step": 18590 }, { "epoch": 0.23808, "grad_norm": 1.7734375, "learning_rate": 2.615265455061161e-05, "loss": 2.2805, "step": 18600 }, { "epoch": 0.238208, "grad_norm": 1.5078125, "learning_rate": 2.6148594004679456e-05, "loss": 2.2756, "step": 18610 }, { "epoch": 0.238336, "grad_norm": 1.515625, "learning_rate": 2.6144531632680704e-05, "loss": 2.3032, "step": 18620 }, { "epoch": 0.238464, "grad_norm": 1.453125, "learning_rate": 2.614046743528074e-05, "loss": 2.2791, "step": 18630 }, { "epoch": 0.238592, "grad_norm": 1.53125, "learning_rate": 2.613640141314525e-05, "loss": 2.2963, "step": 18640 }, { "epoch": 0.23872, "grad_norm": 1.4609375, "learning_rate": 2.6132333566940233e-05, "loss": 2.2824, "step": 18650 }, { "epoch": 0.238848, "grad_norm": 1.6328125, "learning_rate": 2.612826389733196e-05, "loss": 2.2869, "step": 18660 }, { "epoch": 0.238976, "grad_norm": 1.6484375, "learning_rate": 2.612419240498703e-05, "loss": 2.3071, "step": 18670 }, { "epoch": 0.239104, "grad_norm": 1.5, "learning_rate": 2.6120119090572318e-05, "loss": 2.3067, "step": 18680 }, { "epoch": 0.239232, "grad_norm": 1.4921875, "learning_rate": 2.6116043954755012e-05, "loss": 2.2932, "step": 18690 }, { "epoch": 0.23936, "grad_norm": 1.53125, "learning_rate": 2.611196699820259e-05, "loss": 2.2946, "step": 18700 }, { "epoch": 0.239488, "grad_norm": 1.5859375, "learning_rate": 2.6107888221582828e-05, "loss": 2.3101, "step": 18710 }, { "epoch": 0.239616, "grad_norm": 1.6640625, "learning_rate": 2.6103807625563804e-05, "loss": 2.2887, "step": 18720 }, { "epoch": 0.239744, "grad_norm": 1.4765625, "learning_rate": 2.60997252108139e-05, "loss": 2.31, "step": 18730 }, { "epoch": 0.239872, "grad_norm": 1.546875, "learning_rate": 2.6095640978001775e-05, "loss": 2.292, "step": 18740 }, { "epoch": 0.24, "grad_norm": 1.5546875, "learning_rate": 2.6091554927796414e-05, "loss": 2.2752, "step": 18750 }, { "epoch": 0.240128, "grad_norm": 1.4765625, "learning_rate": 2.6087467060867076e-05, "loss": 2.3137, "step": 18760 }, { "epoch": 0.240256, "grad_norm": 1.59375, "learning_rate": 2.608337737788333e-05, "loss": 2.278, "step": 18770 }, { "epoch": 0.240384, "grad_norm": 1.6328125, "learning_rate": 2.607928587951504e-05, "loss": 2.3012, "step": 18780 }, { "epoch": 0.240512, "grad_norm": 1.546875, "learning_rate": 2.6075192566432367e-05, "loss": 2.3362, "step": 18790 }, { "epoch": 0.24064, "grad_norm": 1.53125, "learning_rate": 2.6071097439305764e-05, "loss": 2.2741, "step": 18800 }, { "epoch": 0.240768, "grad_norm": 1.5078125, "learning_rate": 2.6067000498805986e-05, "loss": 2.2923, "step": 18810 }, { "epoch": 0.240896, "grad_norm": 1.5078125, "learning_rate": 2.6062901745604096e-05, "loss": 2.2807, "step": 18820 }, { "epoch": 0.241024, "grad_norm": 1.4609375, "learning_rate": 2.605880118037143e-05, "loss": 2.3112, "step": 18830 }, { "epoch": 0.241152, "grad_norm": 1.578125, "learning_rate": 2.6054698803779638e-05, "loss": 2.2831, "step": 18840 }, { "epoch": 0.24128, "grad_norm": 3.34375, "learning_rate": 2.6050594616500667e-05, "loss": 2.3184, "step": 18850 }, { "epoch": 0.241408, "grad_norm": 1.5703125, "learning_rate": 2.6046488619206752e-05, "loss": 2.278, "step": 18860 }, { "epoch": 0.241536, "grad_norm": 1.6015625, "learning_rate": 2.6042380812570424e-05, "loss": 2.2898, "step": 18870 }, { "epoch": 0.241664, "grad_norm": 1.546875, "learning_rate": 2.6038271197264522e-05, "loss": 2.2969, "step": 18880 }, { "epoch": 0.241792, "grad_norm": 1.6796875, "learning_rate": 2.6034159773962173e-05, "loss": 2.2874, "step": 18890 }, { "epoch": 0.24192, "grad_norm": 1.5234375, "learning_rate": 2.60300465433368e-05, "loss": 2.2942, "step": 18900 }, { "epoch": 0.242048, "grad_norm": 1.453125, "learning_rate": 2.602593150606212e-05, "loss": 2.3005, "step": 18910 }, { "epoch": 0.242176, "grad_norm": 1.5703125, "learning_rate": 2.6021814662812155e-05, "loss": 2.3084, "step": 18920 }, { "epoch": 0.242304, "grad_norm": 1.6171875, "learning_rate": 2.601769601426121e-05, "loss": 2.3145, "step": 18930 }, { "epoch": 0.242432, "grad_norm": 1.6953125, "learning_rate": 2.60135755610839e-05, "loss": 2.3263, "step": 18940 }, { "epoch": 0.24256, "grad_norm": 1.484375, "learning_rate": 2.6009453303955124e-05, "loss": 2.3042, "step": 18950 }, { "epoch": 0.242688, "grad_norm": 1.453125, "learning_rate": 2.600532924355008e-05, "loss": 2.288, "step": 18960 }, { "epoch": 0.242816, "grad_norm": 1.5625, "learning_rate": 2.6001203380544265e-05, "loss": 2.304, "step": 18970 }, { "epoch": 0.242944, "grad_norm": 1.7265625, "learning_rate": 2.599707571561347e-05, "loss": 2.2839, "step": 18980 }, { "epoch": 0.243072, "grad_norm": 1.53125, "learning_rate": 2.599294624943377e-05, "loss": 2.2713, "step": 18990 }, { "epoch": 0.2432, "grad_norm": 1.65625, "learning_rate": 2.598881498268155e-05, "loss": 2.3225, "step": 19000 }, { "epoch": 0.243328, "grad_norm": 1.5859375, "learning_rate": 2.598468191603349e-05, "loss": 2.3127, "step": 19010 }, { "epoch": 0.243456, "grad_norm": 1.9609375, "learning_rate": 2.5980547050166555e-05, "loss": 2.3298, "step": 19020 }, { "epoch": 0.243584, "grad_norm": 1.5390625, "learning_rate": 2.5976410385758006e-05, "loss": 2.3021, "step": 19030 }, { "epoch": 0.243712, "grad_norm": 1.53125, "learning_rate": 2.59722719234854e-05, "loss": 2.2823, "step": 19040 }, { "epoch": 0.24384, "grad_norm": 1.625, "learning_rate": 2.59681316640266e-05, "loss": 2.3174, "step": 19050 }, { "epoch": 0.243968, "grad_norm": 1.46875, "learning_rate": 2.5963989608059742e-05, "loss": 2.2908, "step": 19060 }, { "epoch": 0.244096, "grad_norm": 1.6328125, "learning_rate": 2.5959845756263275e-05, "loss": 2.2837, "step": 19070 }, { "epoch": 0.244224, "grad_norm": 1.6015625, "learning_rate": 2.5955700109315934e-05, "loss": 2.304, "step": 19080 }, { "epoch": 0.244352, "grad_norm": 1.5, "learning_rate": 2.5951552667896747e-05, "loss": 2.2924, "step": 19090 }, { "epoch": 0.24448, "grad_norm": 1.4140625, "learning_rate": 2.594740343268504e-05, "loss": 2.285, "step": 19100 }, { "epoch": 0.244608, "grad_norm": 1.4453125, "learning_rate": 2.5943252404360425e-05, "loss": 2.2938, "step": 19110 }, { "epoch": 0.244736, "grad_norm": 5.78125, "learning_rate": 2.5939099583602815e-05, "loss": 2.2885, "step": 19120 }, { "epoch": 0.244864, "grad_norm": 1.5390625, "learning_rate": 2.5934944971092417e-05, "loss": 2.3014, "step": 19130 }, { "epoch": 0.244992, "grad_norm": 1.5546875, "learning_rate": 2.5930788567509733e-05, "loss": 2.3003, "step": 19140 }, { "epoch": 0.24512, "grad_norm": 1.578125, "learning_rate": 2.5926630373535552e-05, "loss": 2.3122, "step": 19150 }, { "epoch": 0.245248, "grad_norm": 1.5078125, "learning_rate": 2.5922470389850952e-05, "loss": 2.2884, "step": 19160 }, { "epoch": 0.245376, "grad_norm": 1.484375, "learning_rate": 2.5918308617137317e-05, "loss": 2.2837, "step": 19170 }, { "epoch": 0.245504, "grad_norm": 1.5390625, "learning_rate": 2.591414505607632e-05, "loss": 2.2858, "step": 19180 }, { "epoch": 0.245632, "grad_norm": 1.4921875, "learning_rate": 2.5909979707349916e-05, "loss": 2.2937, "step": 19190 }, { "epoch": 0.24576, "grad_norm": 1.765625, "learning_rate": 2.590581257164038e-05, "loss": 2.2853, "step": 19200 }, { "epoch": 0.245888, "grad_norm": 1.7265625, "learning_rate": 2.590164364963024e-05, "loss": 2.3113, "step": 19210 }, { "epoch": 0.246016, "grad_norm": 1.6171875, "learning_rate": 2.5897472942002353e-05, "loss": 2.2822, "step": 19220 }, { "epoch": 0.246144, "grad_norm": 1.484375, "learning_rate": 2.5893300449439847e-05, "loss": 2.305, "step": 19230 }, { "epoch": 0.246272, "grad_norm": 1.6171875, "learning_rate": 2.5889126172626152e-05, "loss": 2.3247, "step": 19240 }, { "epoch": 0.2464, "grad_norm": 5.21875, "learning_rate": 2.588495011224498e-05, "loss": 2.297, "step": 19250 }, { "epoch": 0.246528, "grad_norm": 1.53125, "learning_rate": 2.5880772268980352e-05, "loss": 2.2834, "step": 19260 }, { "epoch": 0.246656, "grad_norm": 1.625, "learning_rate": 2.587659264351657e-05, "loss": 2.2951, "step": 19270 }, { "epoch": 0.246784, "grad_norm": 1.5703125, "learning_rate": 2.5872411236538226e-05, "loss": 2.3057, "step": 19280 }, { "epoch": 0.246912, "grad_norm": 1.5703125, "learning_rate": 2.5868228048730203e-05, "loss": 2.3069, "step": 19290 }, { "epoch": 0.24704, "grad_norm": 1.40625, "learning_rate": 2.5864043080777686e-05, "loss": 2.2838, "step": 19300 }, { "epoch": 0.247168, "grad_norm": 1.4296875, "learning_rate": 2.585985633336614e-05, "loss": 2.2905, "step": 19310 }, { "epoch": 0.247296, "grad_norm": 1.5390625, "learning_rate": 2.5855667807181338e-05, "loss": 2.2639, "step": 19320 }, { "epoch": 0.247424, "grad_norm": 1.5078125, "learning_rate": 2.585147750290932e-05, "loss": 2.2662, "step": 19330 }, { "epoch": 0.247552, "grad_norm": 1.7265625, "learning_rate": 2.5847285421236442e-05, "loss": 2.2984, "step": 19340 }, { "epoch": 0.24768, "grad_norm": 1.5546875, "learning_rate": 2.584309156284933e-05, "loss": 2.2893, "step": 19350 }, { "epoch": 0.247808, "grad_norm": 1.4375, "learning_rate": 2.583889592843491e-05, "loss": 2.3035, "step": 19360 }, { "epoch": 0.247936, "grad_norm": 1.640625, "learning_rate": 2.5834698518680405e-05, "loss": 2.3061, "step": 19370 }, { "epoch": 0.248064, "grad_norm": 1.5703125, "learning_rate": 2.5830499334273323e-05, "loss": 2.2974, "step": 19380 }, { "epoch": 0.248192, "grad_norm": 1.7890625, "learning_rate": 2.582629837590146e-05, "loss": 2.282, "step": 19390 }, { "epoch": 0.24832, "grad_norm": 1.4765625, "learning_rate": 2.582209564425291e-05, "loss": 2.3031, "step": 19400 }, { "epoch": 0.248448, "grad_norm": 1.671875, "learning_rate": 2.5817891140016048e-05, "loss": 2.2849, "step": 19410 }, { "epoch": 0.248576, "grad_norm": 1.53125, "learning_rate": 2.5813684863879544e-05, "loss": 2.2921, "step": 19420 }, { "epoch": 0.248704, "grad_norm": 1.46875, "learning_rate": 2.5809476816532363e-05, "loss": 2.257, "step": 19430 }, { "epoch": 0.248832, "grad_norm": 1.453125, "learning_rate": 2.580526699866375e-05, "loss": 2.3085, "step": 19440 }, { "epoch": 0.24896, "grad_norm": 1.5234375, "learning_rate": 2.5801055410963252e-05, "loss": 2.2794, "step": 19450 }, { "epoch": 0.249088, "grad_norm": 1.6640625, "learning_rate": 2.5796842054120694e-05, "loss": 2.3098, "step": 19460 }, { "epoch": 0.249216, "grad_norm": 1.609375, "learning_rate": 2.5792626928826198e-05, "loss": 2.2693, "step": 19470 }, { "epoch": 0.249344, "grad_norm": 1.5703125, "learning_rate": 2.5788410035770177e-05, "loss": 2.3104, "step": 19480 }, { "epoch": 0.249472, "grad_norm": 1.515625, "learning_rate": 2.578419137564332e-05, "loss": 2.2556, "step": 19490 }, { "epoch": 0.2496, "grad_norm": 1.5, "learning_rate": 2.577997094913663e-05, "loss": 2.2797, "step": 19500 }, { "epoch": 0.249728, "grad_norm": 1.515625, "learning_rate": 2.5775748756941374e-05, "loss": 2.2517, "step": 19510 }, { "epoch": 0.249856, "grad_norm": 1.4921875, "learning_rate": 2.5771524799749126e-05, "loss": 2.2948, "step": 19520 }, { "epoch": 0.249984, "grad_norm": 1.4765625, "learning_rate": 2.5767299078251743e-05, "loss": 2.2665, "step": 19530 }, { "epoch": 0.250112, "grad_norm": 1.515625, "learning_rate": 2.5763071593141362e-05, "loss": 2.3058, "step": 19540 }, { "epoch": 0.25024, "grad_norm": 1.7109375, "learning_rate": 2.575884234511043e-05, "loss": 2.3171, "step": 19550 }, { "epoch": 0.250368, "grad_norm": 1.53125, "learning_rate": 2.5754611334851663e-05, "loss": 2.2809, "step": 19560 }, { "epoch": 0.250496, "grad_norm": 1.71875, "learning_rate": 2.575037856305807e-05, "loss": 2.2925, "step": 19570 }, { "epoch": 0.250624, "grad_norm": 1.703125, "learning_rate": 2.574614403042295e-05, "loss": 2.2786, "step": 19580 }, { "epoch": 0.250752, "grad_norm": 1.8515625, "learning_rate": 2.5741907737639903e-05, "loss": 2.3099, "step": 19590 }, { "epoch": 0.25088, "grad_norm": 1.578125, "learning_rate": 2.57376696854028e-05, "loss": 2.2893, "step": 19600 }, { "epoch": 0.251008, "grad_norm": 1.546875, "learning_rate": 2.57334298744058e-05, "loss": 2.286, "step": 19610 }, { "epoch": 0.251136, "grad_norm": 1.5859375, "learning_rate": 2.5729188305343365e-05, "loss": 2.279, "step": 19620 }, { "epoch": 0.251264, "grad_norm": 3.546875, "learning_rate": 2.5724944978910226e-05, "loss": 2.2982, "step": 19630 }, { "epoch": 0.251392, "grad_norm": 1.59375, "learning_rate": 2.5720699895801428e-05, "loss": 2.2504, "step": 19640 }, { "epoch": 0.25152, "grad_norm": 1.578125, "learning_rate": 2.5716453056712273e-05, "loss": 2.2953, "step": 19650 }, { "epoch": 0.251648, "grad_norm": 1.5859375, "learning_rate": 2.5712204462338374e-05, "loss": 2.3086, "step": 19660 }, { "epoch": 0.251776, "grad_norm": 1.6328125, "learning_rate": 2.5707954113375617e-05, "loss": 2.2953, "step": 19670 }, { "epoch": 0.251904, "grad_norm": 1.578125, "learning_rate": 2.570370201052018e-05, "loss": 2.3041, "step": 19680 }, { "epoch": 0.252032, "grad_norm": 1.515625, "learning_rate": 2.5699448154468545e-05, "loss": 2.2661, "step": 19690 }, { "epoch": 0.25216, "grad_norm": 1.5, "learning_rate": 2.5695192545917446e-05, "loss": 2.2962, "step": 19700 }, { "epoch": 0.252288, "grad_norm": 1.765625, "learning_rate": 2.569093518556393e-05, "loss": 2.2555, "step": 19710 }, { "epoch": 0.252416, "grad_norm": 1.4765625, "learning_rate": 2.5686676074105334e-05, "loss": 2.3258, "step": 19720 }, { "epoch": 0.252544, "grad_norm": 1.609375, "learning_rate": 2.568241521223926e-05, "loss": 2.2709, "step": 19730 }, { "epoch": 0.252672, "grad_norm": 1.6171875, "learning_rate": 2.567815260066362e-05, "loss": 2.3142, "step": 19740 }, { "epoch": 0.2528, "grad_norm": 1.484375, "learning_rate": 2.567388824007659e-05, "loss": 2.3043, "step": 19750 }, { "epoch": 0.252928, "grad_norm": 1.3515625, "learning_rate": 2.5669622131176654e-05, "loss": 2.3017, "step": 19760 }, { "epoch": 0.253056, "grad_norm": 1.5703125, "learning_rate": 2.566535427466257e-05, "loss": 2.2725, "step": 19770 }, { "epoch": 0.253184, "grad_norm": 1.5390625, "learning_rate": 2.5661084671233384e-05, "loss": 2.2873, "step": 19780 }, { "epoch": 0.253312, "grad_norm": 1.4765625, "learning_rate": 2.565681332158843e-05, "loss": 2.2909, "step": 19790 }, { "epoch": 0.25344, "grad_norm": 1.5390625, "learning_rate": 2.5652540226427324e-05, "loss": 2.3185, "step": 19800 }, { "epoch": 0.253568, "grad_norm": 1.609375, "learning_rate": 2.5648265386449977e-05, "loss": 2.2746, "step": 19810 }, { "epoch": 0.253696, "grad_norm": 1.5234375, "learning_rate": 2.564398880235657e-05, "loss": 2.3057, "step": 19820 }, { "epoch": 0.253824, "grad_norm": 1.609375, "learning_rate": 2.563971047484759e-05, "loss": 2.2991, "step": 19830 }, { "epoch": 0.253952, "grad_norm": 1.53125, "learning_rate": 2.5635430404623797e-05, "loss": 2.2795, "step": 19840 }, { "epoch": 0.25408, "grad_norm": 1.609375, "learning_rate": 2.563114859238623e-05, "loss": 2.2806, "step": 19850 }, { "epoch": 0.254208, "grad_norm": 1.515625, "learning_rate": 2.562686503883623e-05, "loss": 2.2992, "step": 19860 }, { "epoch": 0.254336, "grad_norm": 1.96875, "learning_rate": 2.5622579744675416e-05, "loss": 2.2928, "step": 19870 }, { "epoch": 0.254464, "grad_norm": 1.625, "learning_rate": 2.5618292710605684e-05, "loss": 2.2654, "step": 19880 }, { "epoch": 0.254592, "grad_norm": 1.53125, "learning_rate": 2.561400393732923e-05, "loss": 2.2998, "step": 19890 }, { "epoch": 0.25472, "grad_norm": 1.625, "learning_rate": 2.5609713425548518e-05, "loss": 2.3121, "step": 19900 }, { "epoch": 0.254848, "grad_norm": 1.546875, "learning_rate": 2.5605421175966315e-05, "loss": 2.2746, "step": 19910 }, { "epoch": 0.254976, "grad_norm": 1.5078125, "learning_rate": 2.5601127189285655e-05, "loss": 2.2985, "step": 19920 }, { "epoch": 0.255104, "grad_norm": 1.5703125, "learning_rate": 2.5596831466209872e-05, "loss": 2.2814, "step": 19930 }, { "epoch": 0.255232, "grad_norm": 1.4453125, "learning_rate": 2.5592534007442573e-05, "loss": 2.3083, "step": 19940 }, { "epoch": 0.25536, "grad_norm": 1.5859375, "learning_rate": 2.5588234813687655e-05, "loss": 2.2902, "step": 19950 }, { "epoch": 0.255488, "grad_norm": 1.5546875, "learning_rate": 2.5583933885649292e-05, "loss": 2.2965, "step": 19960 }, { "epoch": 0.255616, "grad_norm": 1.5546875, "learning_rate": 2.5579631224031957e-05, "loss": 2.3062, "step": 19970 }, { "epoch": 0.255744, "grad_norm": 1.4140625, "learning_rate": 2.55753268295404e-05, "loss": 2.2813, "step": 19980 }, { "epoch": 0.255872, "grad_norm": 1.53125, "learning_rate": 2.557102070287964e-05, "loss": 2.297, "step": 19990 }, { "epoch": 0.256, "grad_norm": 1.53125, "learning_rate": 2.5566712844755e-05, "loss": 2.2766, "step": 20000 }, { "epoch": 0.256128, "grad_norm": 1.5625, "learning_rate": 2.5562403255872078e-05, "loss": 2.2948, "step": 20010 }, { "epoch": 0.256256, "grad_norm": 1.5625, "learning_rate": 2.5558091936936757e-05, "loss": 2.3118, "step": 20020 }, { "epoch": 0.256384, "grad_norm": 1.5859375, "learning_rate": 2.55537788886552e-05, "loss": 2.2982, "step": 20030 }, { "epoch": 0.256512, "grad_norm": 1.765625, "learning_rate": 2.5549464111733862e-05, "loss": 2.2796, "step": 20040 }, { "epoch": 0.25664, "grad_norm": 1.640625, "learning_rate": 2.5545147606879465e-05, "loss": 2.3106, "step": 20050 }, { "epoch": 0.256768, "grad_norm": 1.6328125, "learning_rate": 2.554082937479904e-05, "loss": 2.3, "step": 20060 }, { "epoch": 0.256896, "grad_norm": 1.5703125, "learning_rate": 2.5536509416199864e-05, "loss": 2.2933, "step": 20070 }, { "epoch": 0.257024, "grad_norm": 1.40625, "learning_rate": 2.5532187731789534e-05, "loss": 2.2875, "step": 20080 }, { "epoch": 0.257152, "grad_norm": 1.5859375, "learning_rate": 2.5527864322275912e-05, "loss": 2.3064, "step": 20090 }, { "epoch": 0.25728, "grad_norm": 1.375, "learning_rate": 2.5523539188367135e-05, "loss": 2.3126, "step": 20100 }, { "epoch": 0.257408, "grad_norm": 1.6171875, "learning_rate": 2.551921233077164e-05, "loss": 2.3252, "step": 20110 }, { "epoch": 0.257536, "grad_norm": 1.4765625, "learning_rate": 2.5514883750198137e-05, "loss": 2.309, "step": 20120 }, { "epoch": 0.257664, "grad_norm": 1.625, "learning_rate": 2.551055344735562e-05, "loss": 2.2838, "step": 20130 }, { "epoch": 0.257792, "grad_norm": 1.5, "learning_rate": 2.550622142295336e-05, "loss": 2.2834, "step": 20140 }, { "epoch": 0.25792, "grad_norm": 1.5703125, "learning_rate": 2.5501887677700915e-05, "loss": 2.2622, "step": 20150 }, { "epoch": 0.258048, "grad_norm": 1.4140625, "learning_rate": 2.5497552212308124e-05, "loss": 2.2699, "step": 20160 }, { "epoch": 0.258176, "grad_norm": 1.6328125, "learning_rate": 2.5493215027485107e-05, "loss": 2.2807, "step": 20170 }, { "epoch": 0.258304, "grad_norm": 1.5, "learning_rate": 2.5488876123942272e-05, "loss": 2.2745, "step": 20180 }, { "epoch": 0.258432, "grad_norm": 1.6171875, "learning_rate": 2.54845355023903e-05, "loss": 2.2708, "step": 20190 }, { "epoch": 0.25856, "grad_norm": 1.6171875, "learning_rate": 2.5480193163540156e-05, "loss": 2.2956, "step": 20200 }, { "epoch": 0.258688, "grad_norm": 1.5078125, "learning_rate": 2.5475849108103083e-05, "loss": 2.2912, "step": 20210 }, { "epoch": 0.258816, "grad_norm": 1.4609375, "learning_rate": 2.5471503336790613e-05, "loss": 2.2897, "step": 20220 }, { "epoch": 0.258944, "grad_norm": 1.546875, "learning_rate": 2.546715585031456e-05, "loss": 2.2829, "step": 20230 }, { "epoch": 0.259072, "grad_norm": 1.65625, "learning_rate": 2.5462806649387e-05, "loss": 2.3073, "step": 20240 }, { "epoch": 0.2592, "grad_norm": 1.546875, "learning_rate": 2.545845573472031e-05, "loss": 2.3149, "step": 20250 }, { "epoch": 0.259328, "grad_norm": 1.609375, "learning_rate": 2.5454103107027152e-05, "loss": 2.3154, "step": 20260 }, { "epoch": 0.259456, "grad_norm": 1.6171875, "learning_rate": 2.5449748767020442e-05, "loss": 2.3147, "step": 20270 }, { "epoch": 0.259584, "grad_norm": 1.515625, "learning_rate": 2.54453927154134e-05, "loss": 2.262, "step": 20280 }, { "epoch": 0.259712, "grad_norm": 1.546875, "learning_rate": 2.5441034952919515e-05, "loss": 2.3054, "step": 20290 }, { "epoch": 0.25984, "grad_norm": 1.671875, "learning_rate": 2.543667548025256e-05, "loss": 2.3469, "step": 20300 }, { "epoch": 0.259968, "grad_norm": 1.5625, "learning_rate": 2.5432314298126597e-05, "loss": 2.2743, "step": 20310 }, { "epoch": 0.260096, "grad_norm": 1.4296875, "learning_rate": 2.5427951407255952e-05, "loss": 2.2974, "step": 20320 }, { "epoch": 0.260224, "grad_norm": 3.203125, "learning_rate": 2.5423586808355238e-05, "loss": 2.2989, "step": 20330 }, { "epoch": 0.260352, "grad_norm": 1.484375, "learning_rate": 2.5419220502139346e-05, "loss": 2.2746, "step": 20340 }, { "epoch": 0.26048, "grad_norm": 1.734375, "learning_rate": 2.541485248932345e-05, "loss": 2.3158, "step": 20350 }, { "epoch": 0.260608, "grad_norm": 2.4375, "learning_rate": 2.5410482770623004e-05, "loss": 2.292, "step": 20360 }, { "epoch": 0.260736, "grad_norm": 1.6953125, "learning_rate": 2.5406111346753737e-05, "loss": 2.2904, "step": 20370 }, { "epoch": 0.260864, "grad_norm": 1.515625, "learning_rate": 2.5401738218431666e-05, "loss": 2.2972, "step": 20380 }, { "epoch": 0.260992, "grad_norm": 1.5859375, "learning_rate": 2.5397363386373066e-05, "loss": 2.2797, "step": 20390 }, { "epoch": 0.26112, "grad_norm": 1.6796875, "learning_rate": 2.5392986851294516e-05, "loss": 2.3011, "step": 20400 }, { "epoch": 0.261248, "grad_norm": 1.6328125, "learning_rate": 2.538860861391287e-05, "loss": 2.3275, "step": 20410 }, { "epoch": 0.261376, "grad_norm": 1.515625, "learning_rate": 2.5384228674945238e-05, "loss": 2.2928, "step": 20420 }, { "epoch": 0.261504, "grad_norm": 5.65625, "learning_rate": 2.5379847035109037e-05, "loss": 2.2919, "step": 20430 }, { "epoch": 0.261632, "grad_norm": 12.0, "learning_rate": 2.537546369512195e-05, "loss": 2.2643, "step": 20440 }, { "epoch": 0.26176, "grad_norm": 1.4921875, "learning_rate": 2.5371078655701935e-05, "loss": 2.3242, "step": 20450 }, { "epoch": 0.261888, "grad_norm": 1.4609375, "learning_rate": 2.536669191756724e-05, "loss": 2.297, "step": 20460 }, { "epoch": 0.262016, "grad_norm": 1.5390625, "learning_rate": 2.5362303481436377e-05, "loss": 2.2974, "step": 20470 }, { "epoch": 0.262144, "grad_norm": 1.7578125, "learning_rate": 2.5357913348028144e-05, "loss": 2.2865, "step": 20480 }, { "epoch": 0.262272, "grad_norm": 1.671875, "learning_rate": 2.5353521518061615e-05, "loss": 2.296, "step": 20490 }, { "epoch": 0.2624, "grad_norm": 6.34375, "learning_rate": 2.5349127992256148e-05, "loss": 2.2959, "step": 20500 }, { "epoch": 0.262528, "grad_norm": 1.5703125, "learning_rate": 2.5344732771331372e-05, "loss": 2.2624, "step": 20510 }, { "epoch": 0.262656, "grad_norm": 1.4765625, "learning_rate": 2.5340335856007195e-05, "loss": 2.2995, "step": 20520 }, { "epoch": 0.262784, "grad_norm": 1.46875, "learning_rate": 2.5335937247003803e-05, "loss": 2.2774, "step": 20530 }, { "epoch": 0.262912, "grad_norm": 1.640625, "learning_rate": 2.5331536945041654e-05, "loss": 2.3072, "step": 20540 }, { "epoch": 0.26304, "grad_norm": 1.65625, "learning_rate": 2.5327134950841497e-05, "loss": 2.3023, "step": 20550 }, { "epoch": 0.263168, "grad_norm": 1.671875, "learning_rate": 2.5322731265124344e-05, "loss": 2.2858, "step": 20560 }, { "epoch": 0.263296, "grad_norm": 1.4921875, "learning_rate": 2.5318325888611494e-05, "loss": 2.2806, "step": 20570 }, { "epoch": 0.263424, "grad_norm": 1.578125, "learning_rate": 2.531391882202451e-05, "loss": 2.3218, "step": 20580 }, { "epoch": 0.263552, "grad_norm": 1.484375, "learning_rate": 2.5309510066085253e-05, "loss": 2.3107, "step": 20590 }, { "epoch": 0.26368, "grad_norm": 1.6328125, "learning_rate": 2.5305099621515843e-05, "loss": 2.3062, "step": 20600 }, { "epoch": 0.263808, "grad_norm": 1.6171875, "learning_rate": 2.5300687489038685e-05, "loss": 2.3018, "step": 20610 }, { "epoch": 0.263936, "grad_norm": 1.5546875, "learning_rate": 2.5296273669376443e-05, "loss": 2.3031, "step": 20620 }, { "epoch": 0.264064, "grad_norm": 1.4921875, "learning_rate": 2.5291858163252088e-05, "loss": 2.2777, "step": 20630 }, { "epoch": 0.264192, "grad_norm": 1.6640625, "learning_rate": 2.5287440971388847e-05, "loss": 2.2854, "step": 20640 }, { "epoch": 0.26432, "grad_norm": 1.5859375, "learning_rate": 2.5283022094510224e-05, "loss": 2.2993, "step": 20650 }, { "epoch": 0.264448, "grad_norm": 1.359375, "learning_rate": 2.5278601533340006e-05, "loss": 2.2923, "step": 20660 }, { "epoch": 0.264576, "grad_norm": 1.5703125, "learning_rate": 2.5274179288602247e-05, "loss": 2.3082, "step": 20670 }, { "epoch": 0.264704, "grad_norm": 1.5703125, "learning_rate": 2.526975536102129e-05, "loss": 2.2963, "step": 20680 }, { "epoch": 0.264832, "grad_norm": 1.578125, "learning_rate": 2.526532975132173e-05, "loss": 2.3185, "step": 20690 }, { "epoch": 0.26496, "grad_norm": 1.6640625, "learning_rate": 2.5260902460228467e-05, "loss": 2.2777, "step": 20700 }, { "epoch": 0.265088, "grad_norm": 1.71875, "learning_rate": 2.525647348846666e-05, "loss": 2.2673, "step": 20710 }, { "epoch": 0.265216, "grad_norm": 1.8515625, "learning_rate": 2.5252042836761737e-05, "loss": 2.2669, "step": 20720 }, { "epoch": 0.265344, "grad_norm": 1.546875, "learning_rate": 2.524761050583942e-05, "loss": 2.307, "step": 20730 }, { "epoch": 0.265472, "grad_norm": 1.6328125, "learning_rate": 2.5243176496425692e-05, "loss": 2.3144, "step": 20740 }, { "epoch": 0.2656, "grad_norm": 1.6328125, "learning_rate": 2.5238740809246816e-05, "loss": 2.3066, "step": 20750 }, { "epoch": 0.265728, "grad_norm": 1.609375, "learning_rate": 2.5234303445029323e-05, "loss": 2.3393, "step": 20760 }, { "epoch": 0.265856, "grad_norm": 1.671875, "learning_rate": 2.5229864404500027e-05, "loss": 2.2948, "step": 20770 }, { "epoch": 0.265984, "grad_norm": 1.53125, "learning_rate": 2.5225423688386018e-05, "loss": 2.2956, "step": 20780 }, { "epoch": 0.266112, "grad_norm": 1.53125, "learning_rate": 2.5220981297414653e-05, "loss": 2.3213, "step": 20790 }, { "epoch": 0.26624, "grad_norm": 1.734375, "learning_rate": 2.5216537232313568e-05, "loss": 2.3161, "step": 20800 }, { "epoch": 0.266368, "grad_norm": 1.515625, "learning_rate": 2.521209149381066e-05, "loss": 2.2744, "step": 20810 }, { "epoch": 0.266496, "grad_norm": 2.484375, "learning_rate": 2.5207644082634135e-05, "loss": 2.3013, "step": 20820 }, { "epoch": 0.266624, "grad_norm": 1.921875, "learning_rate": 2.5203194999512433e-05, "loss": 2.3095, "step": 20830 }, { "epoch": 0.266752, "grad_norm": 1.546875, "learning_rate": 2.5198744245174286e-05, "loss": 2.2975, "step": 20840 }, { "epoch": 0.26688, "grad_norm": 1.5625, "learning_rate": 2.51942918203487e-05, "loss": 2.2812, "step": 20850 }, { "epoch": 0.267008, "grad_norm": 1.4296875, "learning_rate": 2.5189837725764956e-05, "loss": 2.3074, "step": 20860 }, { "epoch": 0.267136, "grad_norm": 2.421875, "learning_rate": 2.5185381962152605e-05, "loss": 2.2806, "step": 20870 }, { "epoch": 0.267264, "grad_norm": 1.421875, "learning_rate": 2.518092453024147e-05, "loss": 2.3191, "step": 20880 }, { "epoch": 0.267392, "grad_norm": 13.5625, "learning_rate": 2.5176465430761644e-05, "loss": 2.3153, "step": 20890 }, { "epoch": 0.26752, "grad_norm": 1.5546875, "learning_rate": 2.5172004664443503e-05, "loss": 2.2606, "step": 20900 }, { "epoch": 0.267648, "grad_norm": 1.5625, "learning_rate": 2.51675422320177e-05, "loss": 2.3078, "step": 20910 }, { "epoch": 0.267776, "grad_norm": 4.90625, "learning_rate": 2.5163078134215138e-05, "loss": 2.2736, "step": 20920 }, { "epoch": 0.267904, "grad_norm": 1.609375, "learning_rate": 2.5158612371767016e-05, "loss": 2.3073, "step": 20930 }, { "epoch": 0.268032, "grad_norm": 1.6328125, "learning_rate": 2.5154144945404786e-05, "loss": 2.3318, "step": 20940 }, { "epoch": 0.26816, "grad_norm": 1.734375, "learning_rate": 2.51496758558602e-05, "loss": 2.2684, "step": 20950 }, { "epoch": 0.268288, "grad_norm": 1.6484375, "learning_rate": 2.514520510386525e-05, "loss": 2.2967, "step": 20960 }, { "epoch": 0.268416, "grad_norm": 1.4765625, "learning_rate": 2.5140732690152226e-05, "loss": 2.3105, "step": 20970 }, { "epoch": 0.268544, "grad_norm": 1.640625, "learning_rate": 2.5136258615453672e-05, "loss": 2.332, "step": 20980 }, { "epoch": 0.268672, "grad_norm": 1.4921875, "learning_rate": 2.5131782880502424e-05, "loss": 2.2957, "step": 20990 }, { "epoch": 0.2688, "grad_norm": 1.78125, "learning_rate": 2.5127305486031564e-05, "loss": 2.2853, "step": 21000 }, { "epoch": 0.268928, "grad_norm": 2.78125, "learning_rate": 2.5122826432774466e-05, "loss": 2.2959, "step": 21010 }, { "epoch": 0.269056, "grad_norm": 1.609375, "learning_rate": 2.5118345721464768e-05, "loss": 2.3107, "step": 21020 }, { "epoch": 0.269184, "grad_norm": 1.6875, "learning_rate": 2.5113863352836388e-05, "loss": 2.2856, "step": 21030 }, { "epoch": 0.269312, "grad_norm": 1.6171875, "learning_rate": 2.5109379327623497e-05, "loss": 2.2826, "step": 21040 }, { "epoch": 0.26944, "grad_norm": 1.5390625, "learning_rate": 2.5104893646560562e-05, "loss": 2.2998, "step": 21050 }, { "epoch": 0.269568, "grad_norm": 1.5234375, "learning_rate": 2.51004063103823e-05, "loss": 2.2885, "step": 21060 }, { "epoch": 0.269696, "grad_norm": 1.4765625, "learning_rate": 2.5095917319823708e-05, "loss": 2.2419, "step": 21070 }, { "epoch": 0.269824, "grad_norm": 2.046875, "learning_rate": 2.509142667562005e-05, "loss": 2.3005, "step": 21080 }, { "epoch": 0.269952, "grad_norm": 1.5234375, "learning_rate": 2.5086934378506876e-05, "loss": 2.2431, "step": 21090 }, { "epoch": 0.27008, "grad_norm": 1.4921875, "learning_rate": 2.5082440429219982e-05, "loss": 2.3134, "step": 21100 }, { "epoch": 0.270208, "grad_norm": 1.609375, "learning_rate": 2.507794482849545e-05, "loss": 2.2994, "step": 21110 }, { "epoch": 0.270336, "grad_norm": 2.03125, "learning_rate": 2.5073447577069637e-05, "loss": 2.2657, "step": 21120 }, { "epoch": 0.270464, "grad_norm": 2.0, "learning_rate": 2.506894867567916e-05, "loss": 2.307, "step": 21130 }, { "epoch": 0.270592, "grad_norm": 1.5703125, "learning_rate": 2.50644481250609e-05, "loss": 2.3153, "step": 21140 }, { "epoch": 0.27072, "grad_norm": 1.75, "learning_rate": 2.505994592595203e-05, "loss": 2.3003, "step": 21150 }, { "epoch": 0.270848, "grad_norm": 1.4609375, "learning_rate": 2.5055442079089976e-05, "loss": 2.2882, "step": 21160 }, { "epoch": 0.270976, "grad_norm": 1.671875, "learning_rate": 2.5050936585212437e-05, "loss": 2.2721, "step": 21170 }, { "epoch": 0.271104, "grad_norm": 1.515625, "learning_rate": 2.504642944505739e-05, "loss": 2.2985, "step": 21180 }, { "epoch": 0.271232, "grad_norm": 1.640625, "learning_rate": 2.504192065936307e-05, "loss": 2.3098, "step": 21190 }, { "epoch": 0.27136, "grad_norm": 1.4765625, "learning_rate": 2.5037410228867983e-05, "loss": 2.3458, "step": 21200 }, { "epoch": 0.271488, "grad_norm": 1.5390625, "learning_rate": 2.5032898154310914e-05, "loss": 2.2865, "step": 21210 }, { "epoch": 0.271616, "grad_norm": 1.703125, "learning_rate": 2.5028384436430913e-05, "loss": 2.3044, "step": 21220 }, { "epoch": 0.271744, "grad_norm": 1.96875, "learning_rate": 2.5023869075967286e-05, "loss": 2.3161, "step": 21230 }, { "epoch": 0.271872, "grad_norm": 1.7265625, "learning_rate": 2.501935207365963e-05, "loss": 2.2885, "step": 21240 }, { "epoch": 0.272, "grad_norm": 1.5546875, "learning_rate": 2.50148334302478e-05, "loss": 2.307, "step": 21250 }, { "epoch": 0.272128, "grad_norm": 1.546875, "learning_rate": 2.5010313146471914e-05, "loss": 2.278, "step": 21260 }, { "epoch": 0.272256, "grad_norm": 1.4609375, "learning_rate": 2.500579122307237e-05, "loss": 2.2818, "step": 21270 }, { "epoch": 0.272384, "grad_norm": 1.421875, "learning_rate": 2.5001267660789823e-05, "loss": 2.272, "step": 21280 }, { "epoch": 0.272512, "grad_norm": 1.53125, "learning_rate": 2.4996742460365215e-05, "loss": 2.2844, "step": 21290 }, { "epoch": 0.27264, "grad_norm": 1.6171875, "learning_rate": 2.499221562253973e-05, "loss": 2.306, "step": 21300 }, { "epoch": 0.272768, "grad_norm": 1.6484375, "learning_rate": 2.4987687148054846e-05, "loss": 2.3007, "step": 21310 }, { "epoch": 0.272896, "grad_norm": 5.40625, "learning_rate": 2.498315703765229e-05, "loss": 2.3104, "step": 21320 }, { "epoch": 0.273024, "grad_norm": 1.6875, "learning_rate": 2.4978625292074067e-05, "loss": 2.262, "step": 21330 }, { "epoch": 0.273152, "grad_norm": 1.4921875, "learning_rate": 2.497409191206245e-05, "loss": 2.283, "step": 21340 }, { "epoch": 0.27328, "grad_norm": 1.75, "learning_rate": 2.4969556898359968e-05, "loss": 2.3208, "step": 21350 }, { "epoch": 0.273408, "grad_norm": 1.59375, "learning_rate": 2.4965020251709437e-05, "loss": 2.2497, "step": 21360 }, { "epoch": 0.273536, "grad_norm": 1.546875, "learning_rate": 2.4960481972853922e-05, "loss": 2.3104, "step": 21370 }, { "epoch": 0.273664, "grad_norm": 1.6015625, "learning_rate": 2.495594206253677e-05, "loss": 2.2992, "step": 21380 }, { "epoch": 0.273792, "grad_norm": 2.390625, "learning_rate": 2.4951400521501583e-05, "loss": 2.2934, "step": 21390 }, { "epoch": 0.27392, "grad_norm": 1.4921875, "learning_rate": 2.4946857350492244e-05, "loss": 2.2927, "step": 21400 }, { "epoch": 0.274048, "grad_norm": 1.53125, "learning_rate": 2.4942312550252883e-05, "loss": 2.2739, "step": 21410 }, { "epoch": 0.274176, "grad_norm": 1.5234375, "learning_rate": 2.4937766121527917e-05, "loss": 2.3118, "step": 21420 }, { "epoch": 0.274304, "grad_norm": 1.5078125, "learning_rate": 2.4933218065062013e-05, "loss": 2.2667, "step": 21430 }, { "epoch": 0.274432, "grad_norm": 1.5625, "learning_rate": 2.4928668381600123e-05, "loss": 2.2841, "step": 21440 }, { "epoch": 0.27456, "grad_norm": 2.296875, "learning_rate": 2.4924117071887446e-05, "loss": 2.2637, "step": 21450 }, { "epoch": 0.274688, "grad_norm": 1.53125, "learning_rate": 2.4919564136669467e-05, "loss": 2.2935, "step": 21460 }, { "epoch": 0.274816, "grad_norm": 1.5390625, "learning_rate": 2.4915009576691922e-05, "loss": 2.3046, "step": 21470 }, { "epoch": 0.274944, "grad_norm": 1.5078125, "learning_rate": 2.4910453392700808e-05, "loss": 2.2548, "step": 21480 }, { "epoch": 0.275072, "grad_norm": 2.203125, "learning_rate": 2.490589558544241e-05, "loss": 2.2972, "step": 21490 }, { "epoch": 0.2752, "grad_norm": 1.625, "learning_rate": 2.4901336155663266e-05, "loss": 2.2759, "step": 21500 }, { "epoch": 0.275328, "grad_norm": 1.6328125, "learning_rate": 2.4896775104110185e-05, "loss": 2.2984, "step": 21510 }, { "epoch": 0.275456, "grad_norm": 1.59375, "learning_rate": 2.4892212431530223e-05, "loss": 2.3055, "step": 21520 }, { "epoch": 0.275584, "grad_norm": 1.4609375, "learning_rate": 2.4887648138670725e-05, "loss": 2.2861, "step": 21530 }, { "epoch": 0.275712, "grad_norm": 1.7734375, "learning_rate": 2.4883082226279295e-05, "loss": 2.2775, "step": 21540 }, { "epoch": 0.27584, "grad_norm": 1.4609375, "learning_rate": 2.4878514695103796e-05, "loss": 2.3075, "step": 21550 }, { "epoch": 0.275968, "grad_norm": 1.6171875, "learning_rate": 2.4873945545892357e-05, "loss": 2.3144, "step": 21560 }, { "epoch": 0.276096, "grad_norm": 1.734375, "learning_rate": 2.4869374779393377e-05, "loss": 2.311, "step": 21570 }, { "epoch": 0.276224, "grad_norm": 1.671875, "learning_rate": 2.486480239635552e-05, "loss": 2.3164, "step": 21580 }, { "epoch": 0.276352, "grad_norm": 1.484375, "learning_rate": 2.4860228397527703e-05, "loss": 2.2815, "step": 21590 }, { "epoch": 0.27648, "grad_norm": 1.546875, "learning_rate": 2.485565278365913e-05, "loss": 2.2858, "step": 21600 }, { "epoch": 0.276608, "grad_norm": 2.109375, "learning_rate": 2.4851075555499252e-05, "loss": 2.2953, "step": 21610 }, { "epoch": 0.276736, "grad_norm": 1.734375, "learning_rate": 2.484649671379778e-05, "loss": 2.283, "step": 21620 }, { "epoch": 0.276864, "grad_norm": 2.359375, "learning_rate": 2.4841916259304714e-05, "loss": 2.2698, "step": 21630 }, { "epoch": 0.276992, "grad_norm": 1.53125, "learning_rate": 2.4837334192770292e-05, "loss": 2.2817, "step": 21640 }, { "epoch": 0.27712, "grad_norm": 1.5625, "learning_rate": 2.4832750514945027e-05, "loss": 2.2687, "step": 21650 }, { "epoch": 0.277248, "grad_norm": 1.59375, "learning_rate": 2.4828165226579702e-05, "loss": 2.2909, "step": 21660 }, { "epoch": 0.277376, "grad_norm": 1.6171875, "learning_rate": 2.4823578328425347e-05, "loss": 2.2957, "step": 21670 }, { "epoch": 0.277504, "grad_norm": 1.6484375, "learning_rate": 2.4818989821233276e-05, "loss": 2.2747, "step": 21680 }, { "epoch": 0.277632, "grad_norm": 1.5625, "learning_rate": 2.4814399705755046e-05, "loss": 2.2884, "step": 21690 }, { "epoch": 0.27776, "grad_norm": 1.6953125, "learning_rate": 2.4809807982742493e-05, "loss": 2.2837, "step": 21700 }, { "epoch": 0.277888, "grad_norm": 1.5546875, "learning_rate": 2.4805214652947717e-05, "loss": 2.3011, "step": 21710 }, { "epoch": 0.278016, "grad_norm": 3.3125, "learning_rate": 2.4800619717123067e-05, "loss": 2.2831, "step": 21720 }, { "epoch": 0.278144, "grad_norm": 1.5625, "learning_rate": 2.4796023176021162e-05, "loss": 2.283, "step": 21730 }, { "epoch": 0.278272, "grad_norm": 1.5, "learning_rate": 2.4791425030394893e-05, "loss": 2.2723, "step": 21740 }, { "epoch": 0.2784, "grad_norm": 2.171875, "learning_rate": 2.4786825280997407e-05, "loss": 2.2646, "step": 21750 }, { "epoch": 0.278528, "grad_norm": 1.5703125, "learning_rate": 2.47822239285821e-05, "loss": 2.2615, "step": 21760 }, { "epoch": 0.278656, "grad_norm": 1.59375, "learning_rate": 2.477762097390266e-05, "loss": 2.2861, "step": 21770 }, { "epoch": 0.278784, "grad_norm": 1.4921875, "learning_rate": 2.4773016417713006e-05, "loss": 2.3107, "step": 21780 }, { "epoch": 0.278912, "grad_norm": 1.578125, "learning_rate": 2.4768410260767342e-05, "loss": 2.29, "step": 21790 }, { "epoch": 0.27904, "grad_norm": 1.4921875, "learning_rate": 2.4763802503820127e-05, "loss": 2.2767, "step": 21800 }, { "epoch": 0.279168, "grad_norm": 1.5, "learning_rate": 2.475919314762608e-05, "loss": 2.2813, "step": 21810 }, { "epoch": 0.279296, "grad_norm": 1.625, "learning_rate": 2.4754582192940176e-05, "loss": 2.3161, "step": 21820 }, { "epoch": 0.279424, "grad_norm": 1.5390625, "learning_rate": 2.4749969640517672e-05, "loss": 2.2879, "step": 21830 }, { "epoch": 0.279552, "grad_norm": 1.5078125, "learning_rate": 2.4745355491114066e-05, "loss": 2.2995, "step": 21840 }, { "epoch": 0.27968, "grad_norm": 2.4375, "learning_rate": 2.4740739745485127e-05, "loss": 2.3163, "step": 21850 }, { "epoch": 0.279808, "grad_norm": 1.4296875, "learning_rate": 2.4736122404386884e-05, "loss": 2.2973, "step": 21860 }, { "epoch": 0.279936, "grad_norm": 1.46875, "learning_rate": 2.4731503468575625e-05, "loss": 2.3138, "step": 21870 }, { "epoch": 0.280064, "grad_norm": 1.9140625, "learning_rate": 2.472688293880791e-05, "loss": 2.286, "step": 21880 }, { "epoch": 0.280192, "grad_norm": 1.5625, "learning_rate": 2.4722260815840538e-05, "loss": 2.2838, "step": 21890 }, { "epoch": 0.28032, "grad_norm": 1.609375, "learning_rate": 2.4717637100430592e-05, "loss": 2.3054, "step": 21900 }, { "epoch": 0.280448, "grad_norm": 1.484375, "learning_rate": 2.4713011793335397e-05, "loss": 2.3, "step": 21910 }, { "epoch": 0.280576, "grad_norm": 1.5859375, "learning_rate": 2.4708384895312567e-05, "loss": 2.3078, "step": 21920 }, { "epoch": 0.280704, "grad_norm": 10.9375, "learning_rate": 2.4703756407119937e-05, "loss": 2.2983, "step": 21930 }, { "epoch": 0.280832, "grad_norm": 1.5859375, "learning_rate": 2.4699126329515634e-05, "loss": 2.2795, "step": 21940 }, { "epoch": 0.28096, "grad_norm": 1.7109375, "learning_rate": 2.4694494663258032e-05, "loss": 2.3028, "step": 21950 }, { "epoch": 0.281088, "grad_norm": 1.453125, "learning_rate": 2.4689861409105765e-05, "loss": 2.2944, "step": 21960 }, { "epoch": 0.281216, "grad_norm": 1.8515625, "learning_rate": 2.468522656781773e-05, "loss": 2.2816, "step": 21970 }, { "epoch": 0.281344, "grad_norm": 1.546875, "learning_rate": 2.468059014015309e-05, "loss": 2.2926, "step": 21980 }, { "epoch": 0.281472, "grad_norm": 2.0625, "learning_rate": 2.4675952126871252e-05, "loss": 2.2766, "step": 21990 }, { "epoch": 0.2816, "grad_norm": 1.65625, "learning_rate": 2.4671312528731903e-05, "loss": 2.286, "step": 22000 }, { "epoch": 0.281728, "grad_norm": 1.5703125, "learning_rate": 2.4666671346494968e-05, "loss": 2.3259, "step": 22010 }, { "epoch": 0.281856, "grad_norm": 1.4453125, "learning_rate": 2.4662028580920652e-05, "loss": 2.3043, "step": 22020 }, { "epoch": 0.281984, "grad_norm": 1.5703125, "learning_rate": 2.46573842327694e-05, "loss": 2.2973, "step": 22030 }, { "epoch": 0.282112, "grad_norm": 1.5703125, "learning_rate": 2.465273830280193e-05, "loss": 2.2988, "step": 22040 }, { "epoch": 0.28224, "grad_norm": 1.6171875, "learning_rate": 2.4648090791779214e-05, "loss": 2.27, "step": 22050 }, { "epoch": 0.282368, "grad_norm": 1.625, "learning_rate": 2.464344170046249e-05, "loss": 2.2788, "step": 22060 }, { "epoch": 0.282496, "grad_norm": 1.84375, "learning_rate": 2.4638791029613245e-05, "loss": 2.2883, "step": 22070 }, { "epoch": 0.282624, "grad_norm": 1.671875, "learning_rate": 2.463413877999322e-05, "loss": 2.256, "step": 22080 }, { "epoch": 0.282752, "grad_norm": 1.546875, "learning_rate": 2.4629484952364433e-05, "loss": 2.2619, "step": 22090 }, { "epoch": 0.28288, "grad_norm": 1.5703125, "learning_rate": 2.462482954748915e-05, "loss": 2.2685, "step": 22100 }, { "epoch": 0.283008, "grad_norm": 1.4296875, "learning_rate": 2.4620172566129888e-05, "loss": 2.2972, "step": 22110 }, { "epoch": 0.283136, "grad_norm": 1.5234375, "learning_rate": 2.4615514009049443e-05, "loss": 2.2824, "step": 22120 }, { "epoch": 0.283264, "grad_norm": 1.6640625, "learning_rate": 2.4610853877010842e-05, "loss": 2.2827, "step": 22130 }, { "epoch": 0.283392, "grad_norm": 1.7109375, "learning_rate": 2.4606192170777393e-05, "loss": 2.2924, "step": 22140 }, { "epoch": 0.28352, "grad_norm": 1.5703125, "learning_rate": 2.460152889111265e-05, "loss": 2.3021, "step": 22150 }, { "epoch": 0.283648, "grad_norm": 1.5390625, "learning_rate": 2.4596864038780427e-05, "loss": 2.2788, "step": 22160 }, { "epoch": 0.283776, "grad_norm": 1.7890625, "learning_rate": 2.45921976145448e-05, "loss": 2.3002, "step": 22170 }, { "epoch": 0.283904, "grad_norm": 1.609375, "learning_rate": 2.458752961917009e-05, "loss": 2.3054, "step": 22180 }, { "epoch": 0.284032, "grad_norm": 1.546875, "learning_rate": 2.45828600534209e-05, "loss": 2.2889, "step": 22190 }, { "epoch": 0.28416, "grad_norm": 1.515625, "learning_rate": 2.4578188918062056e-05, "loss": 2.277, "step": 22200 }, { "epoch": 0.284288, "grad_norm": 1.578125, "learning_rate": 2.457351621385867e-05, "loss": 2.2869, "step": 22210 }, { "epoch": 0.284416, "grad_norm": 1.5234375, "learning_rate": 2.4568841941576098e-05, "loss": 2.2985, "step": 22220 }, { "epoch": 0.284544, "grad_norm": 1.4921875, "learning_rate": 2.4564166101979956e-05, "loss": 2.3129, "step": 22230 }, { "epoch": 0.284672, "grad_norm": 1.640625, "learning_rate": 2.455948869583611e-05, "loss": 2.3057, "step": 22240 }, { "epoch": 0.2848, "grad_norm": 1.5703125, "learning_rate": 2.45548097239107e-05, "loss": 2.2725, "step": 22250 }, { "epoch": 0.284928, "grad_norm": 1.5546875, "learning_rate": 2.4550129186970102e-05, "loss": 2.3048, "step": 22260 }, { "epoch": 0.285056, "grad_norm": 1.5625, "learning_rate": 2.454544708578096e-05, "loss": 2.2774, "step": 22270 }, { "epoch": 0.285184, "grad_norm": 1.5703125, "learning_rate": 2.454076342111017e-05, "loss": 2.3184, "step": 22280 }, { "epoch": 0.285312, "grad_norm": 2.171875, "learning_rate": 2.4536078193724885e-05, "loss": 2.32, "step": 22290 }, { "epoch": 0.28544, "grad_norm": 1.6328125, "learning_rate": 2.4531391404392522e-05, "loss": 2.286, "step": 22300 }, { "epoch": 0.285568, "grad_norm": 1.703125, "learning_rate": 2.4526703053880737e-05, "loss": 2.2971, "step": 22310 }, { "epoch": 0.285696, "grad_norm": 1.4296875, "learning_rate": 2.4522013142957453e-05, "loss": 2.2834, "step": 22320 }, { "epoch": 0.285824, "grad_norm": 1.515625, "learning_rate": 2.4517321672390848e-05, "loss": 2.2831, "step": 22330 }, { "epoch": 0.285952, "grad_norm": 1.5234375, "learning_rate": 2.4512628642949354e-05, "loss": 2.2998, "step": 22340 }, { "epoch": 0.28608, "grad_norm": 1.484375, "learning_rate": 2.4507934055401663e-05, "loss": 2.3021, "step": 22350 }, { "epoch": 0.286208, "grad_norm": 1.5234375, "learning_rate": 2.450323791051671e-05, "loss": 2.2754, "step": 22360 }, { "epoch": 0.286336, "grad_norm": 1.65625, "learning_rate": 2.44985402090637e-05, "loss": 2.304, "step": 22370 }, { "epoch": 0.286464, "grad_norm": 1.484375, "learning_rate": 2.449384095181208e-05, "loss": 2.3252, "step": 22380 }, { "epoch": 0.286592, "grad_norm": 1.6796875, "learning_rate": 2.4489140139531553e-05, "loss": 2.2958, "step": 22390 }, { "epoch": 0.28672, "grad_norm": 1.5, "learning_rate": 2.44844377729921e-05, "loss": 2.2549, "step": 22400 }, { "epoch": 0.286848, "grad_norm": 1.5234375, "learning_rate": 2.447973385296392e-05, "loss": 2.2947, "step": 22410 }, { "epoch": 0.286976, "grad_norm": 1.65625, "learning_rate": 2.4475028380217486e-05, "loss": 2.2953, "step": 22420 }, { "epoch": 0.287104, "grad_norm": 1.7578125, "learning_rate": 2.447032135552353e-05, "loss": 2.2919, "step": 22430 }, { "epoch": 0.287232, "grad_norm": 1.703125, "learning_rate": 2.446561277965303e-05, "loss": 2.3028, "step": 22440 }, { "epoch": 0.28736, "grad_norm": 1.6640625, "learning_rate": 2.4460902653377218e-05, "loss": 2.2556, "step": 22450 }, { "epoch": 0.287488, "grad_norm": 1.5078125, "learning_rate": 2.4456190977467582e-05, "loss": 2.2882, "step": 22460 }, { "epoch": 0.287616, "grad_norm": 1.4375, "learning_rate": 2.4451477752695867e-05, "loss": 2.2632, "step": 22470 }, { "epoch": 0.287744, "grad_norm": 1.609375, "learning_rate": 2.4446762979834063e-05, "loss": 2.2666, "step": 22480 }, { "epoch": 0.287872, "grad_norm": 1.6328125, "learning_rate": 2.444204665965442e-05, "loss": 2.2666, "step": 22490 }, { "epoch": 0.288, "grad_norm": 1.5546875, "learning_rate": 2.443732879292945e-05, "loss": 2.3007, "step": 22500 }, { "epoch": 0.288128, "grad_norm": 1.609375, "learning_rate": 2.4432609380431894e-05, "loss": 2.275, "step": 22510 }, { "epoch": 0.288256, "grad_norm": 1.6171875, "learning_rate": 2.4427888422934766e-05, "loss": 2.271, "step": 22520 }, { "epoch": 0.288384, "grad_norm": 1.5078125, "learning_rate": 2.4423165921211334e-05, "loss": 2.2901, "step": 22530 }, { "epoch": 0.288512, "grad_norm": 1.5078125, "learning_rate": 2.4418441876035103e-05, "loss": 2.2902, "step": 22540 }, { "epoch": 0.28864, "grad_norm": 1.40625, "learning_rate": 2.441371628817985e-05, "loss": 2.2513, "step": 22550 }, { "epoch": 0.288768, "grad_norm": 1.4921875, "learning_rate": 2.4408989158419586e-05, "loss": 2.2774, "step": 22560 }, { "epoch": 0.288896, "grad_norm": 1.5078125, "learning_rate": 2.4404260487528597e-05, "loss": 2.302, "step": 22570 }, { "epoch": 0.289024, "grad_norm": 1.4375, "learning_rate": 2.4399530276281393e-05, "loss": 2.3285, "step": 22580 }, { "epoch": 0.289152, "grad_norm": 1.4921875, "learning_rate": 2.439479852545276e-05, "loss": 2.3062, "step": 22590 }, { "epoch": 0.28928, "grad_norm": 1.5546875, "learning_rate": 2.4390065235817726e-05, "loss": 2.3074, "step": 22600 }, { "epoch": 0.289408, "grad_norm": 1.6015625, "learning_rate": 2.4385330408151578e-05, "loss": 2.305, "step": 22610 }, { "epoch": 0.289536, "grad_norm": 1.6015625, "learning_rate": 2.438059404322984e-05, "loss": 2.2926, "step": 22620 }, { "epoch": 0.289664, "grad_norm": 1.8828125, "learning_rate": 2.4375856141828306e-05, "loss": 2.2702, "step": 22630 }, { "epoch": 0.289792, "grad_norm": 1.4921875, "learning_rate": 2.437111670472301e-05, "loss": 2.2874, "step": 22640 }, { "epoch": 0.28992, "grad_norm": 1.5078125, "learning_rate": 2.4366375732690234e-05, "loss": 2.2945, "step": 22650 }, { "epoch": 0.290048, "grad_norm": 1.7265625, "learning_rate": 2.436163322650653e-05, "loss": 2.291, "step": 22660 }, { "epoch": 0.290176, "grad_norm": 1.4609375, "learning_rate": 2.4356889186948686e-05, "loss": 2.3308, "step": 22670 }, { "epoch": 0.290304, "grad_norm": 1.4609375, "learning_rate": 2.435214361479374e-05, "loss": 2.3025, "step": 22680 }, { "epoch": 0.290432, "grad_norm": 1.6640625, "learning_rate": 2.434739651081899e-05, "loss": 2.3088, "step": 22690 }, { "epoch": 0.29056, "grad_norm": 1.609375, "learning_rate": 2.434264787580198e-05, "loss": 2.3172, "step": 22700 }, { "epoch": 0.290688, "grad_norm": 1.578125, "learning_rate": 2.4337897710520497e-05, "loss": 2.3063, "step": 22710 }, { "epoch": 0.290816, "grad_norm": 1.578125, "learning_rate": 2.43331460157526e-05, "loss": 2.2634, "step": 22720 }, { "epoch": 0.290944, "grad_norm": 1.5859375, "learning_rate": 2.4328392792276574e-05, "loss": 2.2826, "step": 22730 }, { "epoch": 0.291072, "grad_norm": 1.625, "learning_rate": 2.4323638040870975e-05, "loss": 2.2795, "step": 22740 }, { "epoch": 0.2912, "grad_norm": 1.5390625, "learning_rate": 2.43188817623146e-05, "loss": 2.2729, "step": 22750 }, { "epoch": 0.291328, "grad_norm": 1.5078125, "learning_rate": 2.431412395738648e-05, "loss": 2.3113, "step": 22760 }, { "epoch": 0.291456, "grad_norm": 1.421875, "learning_rate": 2.4309364626865936e-05, "loss": 2.2958, "step": 22770 }, { "epoch": 0.291584, "grad_norm": 1.5, "learning_rate": 2.4304603771532505e-05, "loss": 2.3066, "step": 22780 }, { "epoch": 0.291712, "grad_norm": 1.6015625, "learning_rate": 2.4299841392165974e-05, "loss": 2.326, "step": 22790 }, { "epoch": 0.29184, "grad_norm": 1.515625, "learning_rate": 2.4295077489546403e-05, "loss": 2.2916, "step": 22800 }, { "epoch": 0.291968, "grad_norm": 1.59375, "learning_rate": 2.4290312064454087e-05, "loss": 2.2845, "step": 22810 }, { "epoch": 0.292096, "grad_norm": 1.8984375, "learning_rate": 2.4285545117669563e-05, "loss": 2.2904, "step": 22820 }, { "epoch": 0.292224, "grad_norm": 1.46875, "learning_rate": 2.428077664997363e-05, "loss": 2.3001, "step": 22830 }, { "epoch": 0.292352, "grad_norm": 1.46875, "learning_rate": 2.427600666214733e-05, "loss": 2.302, "step": 22840 }, { "epoch": 0.29248, "grad_norm": 1.59375, "learning_rate": 2.4271235154971967e-05, "loss": 2.2919, "step": 22850 }, { "epoch": 0.292608, "grad_norm": 1.7578125, "learning_rate": 2.4266462129229067e-05, "loss": 2.2523, "step": 22860 }, { "epoch": 0.292736, "grad_norm": 1.5546875, "learning_rate": 2.426168758570043e-05, "loss": 2.289, "step": 22870 }, { "epoch": 0.292864, "grad_norm": 1.65625, "learning_rate": 2.4256911525168086e-05, "loss": 2.2876, "step": 22880 }, { "epoch": 0.292992, "grad_norm": 1.609375, "learning_rate": 2.4252133948414336e-05, "loss": 2.302, "step": 22890 }, { "epoch": 0.29312, "grad_norm": 1.6328125, "learning_rate": 2.42473548562217e-05, "loss": 2.2876, "step": 22900 }, { "epoch": 0.293248, "grad_norm": 1.9140625, "learning_rate": 2.424257424937297e-05, "loss": 2.2677, "step": 22910 }, { "epoch": 0.293376, "grad_norm": 1.546875, "learning_rate": 2.4237792128651184e-05, "loss": 2.2746, "step": 22920 }, { "epoch": 0.293504, "grad_norm": 1.5, "learning_rate": 2.4233008494839614e-05, "loss": 2.2506, "step": 22930 }, { "epoch": 0.293632, "grad_norm": 1.609375, "learning_rate": 2.4228223348721786e-05, "loss": 2.2993, "step": 22940 }, { "epoch": 0.29376, "grad_norm": 1.6484375, "learning_rate": 2.4223436691081487e-05, "loss": 2.2628, "step": 22950 }, { "epoch": 0.293888, "grad_norm": 1.515625, "learning_rate": 2.4218648522702724e-05, "loss": 2.3249, "step": 22960 }, { "epoch": 0.294016, "grad_norm": 2.984375, "learning_rate": 2.421385884436978e-05, "loss": 2.2878, "step": 22970 }, { "epoch": 0.294144, "grad_norm": 1.7890625, "learning_rate": 2.4209067656867167e-05, "loss": 2.301, "step": 22980 }, { "epoch": 0.294272, "grad_norm": 1.5546875, "learning_rate": 2.4204274960979655e-05, "loss": 2.2978, "step": 22990 }, { "epoch": 0.2944, "grad_norm": 1.484375, "learning_rate": 2.419948075749225e-05, "loss": 2.2902, "step": 23000 }, { "epoch": 0.294528, "grad_norm": 2.28125, "learning_rate": 2.4194685047190214e-05, "loss": 2.3058, "step": 23010 }, { "epoch": 0.294656, "grad_norm": 1.4609375, "learning_rate": 2.4189887830859054e-05, "loss": 2.3138, "step": 23020 }, { "epoch": 0.294784, "grad_norm": 1.6171875, "learning_rate": 2.4185089109284526e-05, "loss": 2.3006, "step": 23030 }, { "epoch": 0.294912, "grad_norm": 1.59375, "learning_rate": 2.418028888325262e-05, "loss": 2.2897, "step": 23040 }, { "epoch": 0.29504, "grad_norm": 1.46875, "learning_rate": 2.4175487153549587e-05, "loss": 2.2971, "step": 23050 }, { "epoch": 0.295168, "grad_norm": 1.546875, "learning_rate": 2.417068392096192e-05, "loss": 2.2872, "step": 23060 }, { "epoch": 0.295296, "grad_norm": 1.6015625, "learning_rate": 2.4165879186276353e-05, "loss": 2.2861, "step": 23070 }, { "epoch": 0.295424, "grad_norm": 1.5234375, "learning_rate": 2.4161072950279875e-05, "loss": 2.301, "step": 23080 }, { "epoch": 0.295552, "grad_norm": 1.5546875, "learning_rate": 2.4156265213759713e-05, "loss": 2.2965, "step": 23090 }, { "epoch": 0.29568, "grad_norm": 1.5390625, "learning_rate": 2.415145597750334e-05, "loss": 2.2659, "step": 23100 }, { "epoch": 0.295808, "grad_norm": 1.5546875, "learning_rate": 2.4146645242298484e-05, "loss": 2.2823, "step": 23110 }, { "epoch": 0.295936, "grad_norm": 1.5234375, "learning_rate": 2.4141833008933104e-05, "loss": 2.2689, "step": 23120 }, { "epoch": 0.296064, "grad_norm": 1.65625, "learning_rate": 2.413701927819542e-05, "loss": 2.3021, "step": 23130 }, { "epoch": 0.296192, "grad_norm": 1.5234375, "learning_rate": 2.4132204050873886e-05, "loss": 2.2636, "step": 23140 }, { "epoch": 0.29632, "grad_norm": 1.78125, "learning_rate": 2.4127387327757203e-05, "loss": 2.2863, "step": 23150 }, { "epoch": 0.296448, "grad_norm": 1.5546875, "learning_rate": 2.4122569109634324e-05, "loss": 2.2982, "step": 23160 }, { "epoch": 0.296576, "grad_norm": 1.53125, "learning_rate": 2.4117749397294436e-05, "loss": 2.2886, "step": 23170 }, { "epoch": 0.296704, "grad_norm": 1.59375, "learning_rate": 2.4112928191526974e-05, "loss": 2.266, "step": 23180 }, { "epoch": 0.296832, "grad_norm": 1.5546875, "learning_rate": 2.4108105493121627e-05, "loss": 2.2715, "step": 23190 }, { "epoch": 0.29696, "grad_norm": 3.375, "learning_rate": 2.4103281302868324e-05, "loss": 2.3285, "step": 23200 }, { "epoch": 0.297088, "grad_norm": 2.03125, "learning_rate": 2.4098455621557222e-05, "loss": 2.2763, "step": 23210 }, { "epoch": 0.297216, "grad_norm": 1.6328125, "learning_rate": 2.4093628449978744e-05, "loss": 2.2767, "step": 23220 }, { "epoch": 0.297344, "grad_norm": 1.4609375, "learning_rate": 2.4088799788923556e-05, "loss": 2.321, "step": 23230 }, { "epoch": 0.297472, "grad_norm": 1.5625, "learning_rate": 2.4083969639182548e-05, "loss": 2.302, "step": 23240 }, { "epoch": 0.2976, "grad_norm": 1.3515625, "learning_rate": 2.4079138001546877e-05, "loss": 2.2906, "step": 23250 }, { "epoch": 0.297728, "grad_norm": 1.625, "learning_rate": 2.407430487680793e-05, "loss": 2.278, "step": 23260 }, { "epoch": 0.297856, "grad_norm": 1.7265625, "learning_rate": 2.406947026575734e-05, "loss": 2.3026, "step": 23270 }, { "epoch": 0.297984, "grad_norm": 1.546875, "learning_rate": 2.406463416918698e-05, "loss": 2.2808, "step": 23280 }, { "epoch": 0.298112, "grad_norm": 1.640625, "learning_rate": 2.4059796587888982e-05, "loss": 2.2602, "step": 23290 }, { "epoch": 0.29824, "grad_norm": 2.296875, "learning_rate": 2.405495752265571e-05, "loss": 2.2736, "step": 23300 }, { "epoch": 0.298368, "grad_norm": 1.53125, "learning_rate": 2.405011697427975e-05, "loss": 2.29, "step": 23310 }, { "epoch": 0.298496, "grad_norm": 1.6484375, "learning_rate": 2.4045274943553982e-05, "loss": 2.3032, "step": 23320 }, { "epoch": 0.298624, "grad_norm": 1.78125, "learning_rate": 2.404043143127148e-05, "loss": 2.3143, "step": 23330 }, { "epoch": 0.298752, "grad_norm": 3.40625, "learning_rate": 2.403558643822558e-05, "loss": 2.3273, "step": 23340 }, { "epoch": 0.29888, "grad_norm": 1.4296875, "learning_rate": 2.403073996520987e-05, "loss": 2.29, "step": 23350 }, { "epoch": 0.299008, "grad_norm": 1.578125, "learning_rate": 2.4025892013018162e-05, "loss": 2.284, "step": 23360 }, { "epoch": 0.299136, "grad_norm": 1.640625, "learning_rate": 2.402104258244452e-05, "loss": 2.3127, "step": 23370 }, { "epoch": 0.299264, "grad_norm": 1.484375, "learning_rate": 2.4016191674283252e-05, "loss": 2.3005, "step": 23380 }, { "epoch": 0.299392, "grad_norm": 1.46875, "learning_rate": 2.40113392893289e-05, "loss": 2.3185, "step": 23390 }, { "epoch": 0.29952, "grad_norm": 1.609375, "learning_rate": 2.4006485428376264e-05, "loss": 2.3044, "step": 23400 }, { "epoch": 0.299648, "grad_norm": 1.8125, "learning_rate": 2.4001630092220363e-05, "loss": 2.2769, "step": 23410 }, { "epoch": 0.299776, "grad_norm": 1.6015625, "learning_rate": 2.399677328165647e-05, "loss": 2.2977, "step": 23420 }, { "epoch": 0.299904, "grad_norm": 1.7109375, "learning_rate": 2.399191499748011e-05, "loss": 2.2656, "step": 23430 }, { "epoch": 0.300032, "grad_norm": 1.578125, "learning_rate": 2.3987055240487016e-05, "loss": 2.2815, "step": 23440 }, { "epoch": 0.30016, "grad_norm": 1.5859375, "learning_rate": 2.3982194011473203e-05, "loss": 2.3014, "step": 23450 }, { "epoch": 0.300288, "grad_norm": 1.421875, "learning_rate": 2.397733131123491e-05, "loss": 2.2892, "step": 23460 }, { "epoch": 0.300416, "grad_norm": 1.484375, "learning_rate": 2.3972467140568605e-05, "loss": 2.2821, "step": 23470 }, { "epoch": 0.300544, "grad_norm": 1.6875, "learning_rate": 2.3967601500271015e-05, "loss": 2.2931, "step": 23480 }, { "epoch": 0.300672, "grad_norm": 1.6953125, "learning_rate": 2.396273439113909e-05, "loss": 2.3322, "step": 23490 }, { "epoch": 0.3008, "grad_norm": 10.625, "learning_rate": 2.3957865813970043e-05, "loss": 2.2815, "step": 23500 }, { "epoch": 0.300928, "grad_norm": 1.5078125, "learning_rate": 2.3952995769561304e-05, "loss": 2.2919, "step": 23510 }, { "epoch": 0.301056, "grad_norm": 1.5546875, "learning_rate": 2.394812425871056e-05, "loss": 2.3286, "step": 23520 }, { "epoch": 0.301184, "grad_norm": 1.6015625, "learning_rate": 2.394325128221573e-05, "loss": 2.3071, "step": 23530 }, { "epoch": 0.301312, "grad_norm": 9.875, "learning_rate": 2.3938376840874986e-05, "loss": 2.2696, "step": 23540 }, { "epoch": 0.30144, "grad_norm": 5.78125, "learning_rate": 2.3933500935486715e-05, "loss": 2.2744, "step": 23550 }, { "epoch": 0.301568, "grad_norm": 1.515625, "learning_rate": 2.392862356684956e-05, "loss": 2.2969, "step": 23560 }, { "epoch": 0.301696, "grad_norm": 1.65625, "learning_rate": 2.3923744735762413e-05, "loss": 2.2693, "step": 23570 }, { "epoch": 0.301824, "grad_norm": 1.4453125, "learning_rate": 2.3918864443024385e-05, "loss": 2.2959, "step": 23580 }, { "epoch": 0.301952, "grad_norm": 1.5703125, "learning_rate": 2.3913982689434837e-05, "loss": 2.2856, "step": 23590 }, { "epoch": 0.30208, "grad_norm": 7.9375, "learning_rate": 2.390909947579337e-05, "loss": 2.285, "step": 23600 }, { "epoch": 0.302208, "grad_norm": 1.6171875, "learning_rate": 2.3904214802899826e-05, "loss": 2.2908, "step": 23610 }, { "epoch": 0.302336, "grad_norm": 1.609375, "learning_rate": 2.389932867155428e-05, "loss": 2.2999, "step": 23620 }, { "epoch": 0.302464, "grad_norm": 1.484375, "learning_rate": 2.389444108255704e-05, "loss": 2.3159, "step": 23630 }, { "epoch": 0.302592, "grad_norm": 1.640625, "learning_rate": 2.3889552036708674e-05, "loss": 2.292, "step": 23640 }, { "epoch": 0.30272, "grad_norm": 1.59375, "learning_rate": 2.3884661534809967e-05, "loss": 2.2615, "step": 23650 }, { "epoch": 0.302848, "grad_norm": 1.578125, "learning_rate": 2.3879769577661952e-05, "loss": 2.2642, "step": 23660 }, { "epoch": 0.302976, "grad_norm": 1.7265625, "learning_rate": 2.3874876166065907e-05, "loss": 2.2748, "step": 23670 }, { "epoch": 0.303104, "grad_norm": 1.5, "learning_rate": 2.386998130082333e-05, "loss": 2.3102, "step": 23680 }, { "epoch": 0.303232, "grad_norm": 1.40625, "learning_rate": 2.3865084982735973e-05, "loss": 2.2968, "step": 23690 }, { "epoch": 0.30336, "grad_norm": 1.5703125, "learning_rate": 2.3860187212605822e-05, "loss": 2.2781, "step": 23700 }, { "epoch": 0.303488, "grad_norm": 2.421875, "learning_rate": 2.38552879912351e-05, "loss": 2.2901, "step": 23710 }, { "epoch": 0.303616, "grad_norm": 1.6171875, "learning_rate": 2.3850387319426264e-05, "loss": 2.2804, "step": 23720 }, { "epoch": 0.303744, "grad_norm": 1.4609375, "learning_rate": 2.384548519798201e-05, "loss": 2.2743, "step": 23730 }, { "epoch": 0.303872, "grad_norm": 1.59375, "learning_rate": 2.3840581627705283e-05, "loss": 2.3061, "step": 23740 }, { "epoch": 0.304, "grad_norm": 1.5703125, "learning_rate": 2.3835676609399247e-05, "loss": 2.2669, "step": 23750 }, { "epoch": 0.304128, "grad_norm": 1.59375, "learning_rate": 2.3830770143867313e-05, "loss": 2.3107, "step": 23760 }, { "epoch": 0.304256, "grad_norm": 1.4765625, "learning_rate": 2.382586223191313e-05, "loss": 2.2944, "step": 23770 }, { "epoch": 0.304384, "grad_norm": 1.53125, "learning_rate": 2.3820952874340585e-05, "loss": 2.2828, "step": 23780 }, { "epoch": 0.304512, "grad_norm": 1.515625, "learning_rate": 2.381604207195379e-05, "loss": 2.2941, "step": 23790 }, { "epoch": 0.30464, "grad_norm": 1.6953125, "learning_rate": 2.3811129825557107e-05, "loss": 2.2927, "step": 23800 }, { "epoch": 0.304768, "grad_norm": 1.46875, "learning_rate": 2.3806216135955134e-05, "loss": 2.2714, "step": 23810 }, { "epoch": 0.304896, "grad_norm": 1.6015625, "learning_rate": 2.3801301003952694e-05, "loss": 2.2888, "step": 23820 }, { "epoch": 0.305024, "grad_norm": 1.515625, "learning_rate": 2.3796384430354853e-05, "loss": 2.3218, "step": 23830 }, { "epoch": 0.305152, "grad_norm": 1.4375, "learning_rate": 2.379146641596692e-05, "loss": 2.2633, "step": 23840 }, { "epoch": 0.30528, "grad_norm": 1.5859375, "learning_rate": 2.3786546961594427e-05, "loss": 2.3016, "step": 23850 }, { "epoch": 0.305408, "grad_norm": 1.96875, "learning_rate": 2.378162606804316e-05, "loss": 2.2919, "step": 23860 }, { "epoch": 0.305536, "grad_norm": 1.515625, "learning_rate": 2.3776703736119113e-05, "loss": 2.3083, "step": 23870 }, { "epoch": 0.305664, "grad_norm": 1.6015625, "learning_rate": 2.3771779966628545e-05, "loss": 2.2672, "step": 23880 }, { "epoch": 0.305792, "grad_norm": 1.6484375, "learning_rate": 2.3766854760377925e-05, "loss": 2.2919, "step": 23890 }, { "epoch": 0.30592, "grad_norm": 1.53125, "learning_rate": 2.3761928118173978e-05, "loss": 2.2852, "step": 23900 }, { "epoch": 0.306048, "grad_norm": 1.4921875, "learning_rate": 2.3757000040823654e-05, "loss": 2.2986, "step": 23910 }, { "epoch": 0.306176, "grad_norm": 1.640625, "learning_rate": 2.3752070529134144e-05, "loss": 2.2887, "step": 23920 }, { "epoch": 0.306304, "grad_norm": 1.6484375, "learning_rate": 2.3747139583912857e-05, "loss": 2.2995, "step": 23930 }, { "epoch": 0.306432, "grad_norm": 1.6640625, "learning_rate": 2.3742207205967466e-05, "loss": 2.2665, "step": 23940 }, { "epoch": 0.30656, "grad_norm": 1.6328125, "learning_rate": 2.373727339610585e-05, "loss": 2.276, "step": 23950 }, { "epoch": 0.306688, "grad_norm": 1.578125, "learning_rate": 2.3732338155136138e-05, "loss": 2.2704, "step": 23960 }, { "epoch": 0.306816, "grad_norm": 1.6015625, "learning_rate": 2.3727401483866692e-05, "loss": 2.2739, "step": 23970 }, { "epoch": 0.306944, "grad_norm": 1.484375, "learning_rate": 2.3722463383106105e-05, "loss": 2.2987, "step": 23980 }, { "epoch": 0.307072, "grad_norm": 1.5625, "learning_rate": 2.3717523853663203e-05, "loss": 2.283, "step": 23990 }, { "epoch": 0.3072, "grad_norm": 1.8203125, "learning_rate": 2.3712582896347058e-05, "loss": 2.3074, "step": 24000 }, { "epoch": 0.307328, "grad_norm": 1.5859375, "learning_rate": 2.3707640511966953e-05, "loss": 2.3019, "step": 24010 }, { "epoch": 0.307456, "grad_norm": 1.4765625, "learning_rate": 2.3702696701332426e-05, "loss": 2.315, "step": 24020 }, { "epoch": 0.307584, "grad_norm": 1.5859375, "learning_rate": 2.3697751465253237e-05, "loss": 2.2774, "step": 24030 }, { "epoch": 0.307712, "grad_norm": 1.7109375, "learning_rate": 2.3692804804539387e-05, "loss": 2.314, "step": 24040 }, { "epoch": 0.30784, "grad_norm": 1.5234375, "learning_rate": 2.3687856720001113e-05, "loss": 2.2813, "step": 24050 }, { "epoch": 0.307968, "grad_norm": 1.6640625, "learning_rate": 2.3682907212448862e-05, "loss": 2.2945, "step": 24060 }, { "epoch": 0.308096, "grad_norm": 1.6328125, "learning_rate": 2.3677956282693342e-05, "loss": 2.3068, "step": 24070 }, { "epoch": 0.308224, "grad_norm": 2.625, "learning_rate": 2.367300393154548e-05, "loss": 2.2868, "step": 24080 }, { "epoch": 0.308352, "grad_norm": 1.5234375, "learning_rate": 2.3668050159816446e-05, "loss": 2.2809, "step": 24090 }, { "epoch": 0.30848, "grad_norm": 1.515625, "learning_rate": 2.3663094968317624e-05, "loss": 2.2931, "step": 24100 }, { "epoch": 0.308608, "grad_norm": 1.6328125, "learning_rate": 2.3658138357860646e-05, "loss": 2.2816, "step": 24110 }, { "epoch": 0.308736, "grad_norm": 1.8203125, "learning_rate": 2.3653180329257374e-05, "loss": 2.2902, "step": 24120 }, { "epoch": 0.308864, "grad_norm": 1.5859375, "learning_rate": 2.36482208833199e-05, "loss": 2.2986, "step": 24130 }, { "epoch": 0.308992, "grad_norm": 1.59375, "learning_rate": 2.3643260020860543e-05, "loss": 2.2729, "step": 24140 }, { "epoch": 0.30912, "grad_norm": 1.46875, "learning_rate": 2.363829774269187e-05, "loss": 2.2985, "step": 24150 }, { "epoch": 0.309248, "grad_norm": 2.84375, "learning_rate": 2.3633334049626667e-05, "loss": 2.3139, "step": 24160 }, { "epoch": 0.309376, "grad_norm": 1.8515625, "learning_rate": 2.362836894247795e-05, "loss": 2.285, "step": 24170 }, { "epoch": 0.309504, "grad_norm": 1.8359375, "learning_rate": 2.362340242205897e-05, "loss": 2.2813, "step": 24180 }, { "epoch": 0.309632, "grad_norm": 1.46875, "learning_rate": 2.3618434489183215e-05, "loss": 2.3041, "step": 24190 }, { "epoch": 0.30976, "grad_norm": 1.5234375, "learning_rate": 2.3613465144664396e-05, "loss": 2.2884, "step": 24200 }, { "epoch": 0.309888, "grad_norm": 1.59375, "learning_rate": 2.3608494389316468e-05, "loss": 2.2808, "step": 24210 }, { "epoch": 0.310016, "grad_norm": 1.5078125, "learning_rate": 2.3603522223953596e-05, "loss": 2.3124, "step": 24220 }, { "epoch": 0.310144, "grad_norm": 1.6796875, "learning_rate": 2.35985486493902e-05, "loss": 2.2899, "step": 24230 }, { "epoch": 0.310272, "grad_norm": 1.5390625, "learning_rate": 2.3593573666440905e-05, "loss": 2.2962, "step": 24240 }, { "epoch": 0.3104, "grad_norm": 2.5625, "learning_rate": 2.3588597275920588e-05, "loss": 2.3073, "step": 24250 }, { "epoch": 0.310528, "grad_norm": 1.515625, "learning_rate": 2.3583619478644357e-05, "loss": 2.2809, "step": 24260 }, { "epoch": 0.310656, "grad_norm": 1.5078125, "learning_rate": 2.357864027542753e-05, "loss": 2.2837, "step": 24270 }, { "epoch": 0.310784, "grad_norm": 1.546875, "learning_rate": 2.3573659667085672e-05, "loss": 2.2841, "step": 24280 }, { "epoch": 0.310912, "grad_norm": 1.4765625, "learning_rate": 2.3568677654434576e-05, "loss": 2.2858, "step": 24290 }, { "epoch": 0.31104, "grad_norm": 1.59375, "learning_rate": 2.356369423829026e-05, "loss": 2.2724, "step": 24300 }, { "epoch": 0.311168, "grad_norm": 1.5703125, "learning_rate": 2.3558709419468977e-05, "loss": 2.2973, "step": 24310 }, { "epoch": 0.311296, "grad_norm": 1.6875, "learning_rate": 2.355372319878721e-05, "loss": 2.2985, "step": 24320 }, { "epoch": 0.311424, "grad_norm": 1.46875, "learning_rate": 2.354873557706166e-05, "loss": 2.2943, "step": 24330 }, { "epoch": 0.311552, "grad_norm": 1.609375, "learning_rate": 2.3543746555109278e-05, "loss": 2.2811, "step": 24340 }, { "epoch": 0.31168, "grad_norm": 1.578125, "learning_rate": 2.353875613374723e-05, "loss": 2.29, "step": 24350 }, { "epoch": 0.311808, "grad_norm": 1.4921875, "learning_rate": 2.353376431379291e-05, "loss": 2.2682, "step": 24360 }, { "epoch": 0.311936, "grad_norm": 1.5546875, "learning_rate": 2.352877109606395e-05, "loss": 2.3117, "step": 24370 }, { "epoch": 0.312064, "grad_norm": 1.578125, "learning_rate": 2.3523776481378197e-05, "loss": 2.2877, "step": 24380 }, { "epoch": 0.312192, "grad_norm": 1.5, "learning_rate": 2.3518780470553746e-05, "loss": 2.2935, "step": 24390 }, { "epoch": 0.31232, "grad_norm": 1.46875, "learning_rate": 2.3513783064408916e-05, "loss": 2.2691, "step": 24400 }, { "epoch": 0.312448, "grad_norm": 1.671875, "learning_rate": 2.3508784263762235e-05, "loss": 2.2732, "step": 24410 }, { "epoch": 0.312576, "grad_norm": 1.6953125, "learning_rate": 2.350378406943248e-05, "loss": 2.314, "step": 24420 }, { "epoch": 0.312704, "grad_norm": 1.4765625, "learning_rate": 2.3498782482238656e-05, "loss": 2.2824, "step": 24430 }, { "epoch": 0.312832, "grad_norm": 1.546875, "learning_rate": 2.349377950299998e-05, "loss": 2.2695, "step": 24440 }, { "epoch": 0.31296, "grad_norm": 1.4921875, "learning_rate": 2.348877513253591e-05, "loss": 2.3178, "step": 24450 }, { "epoch": 0.313088, "grad_norm": 1.6171875, "learning_rate": 2.3483769371666133e-05, "loss": 2.2861, "step": 24460 }, { "epoch": 0.313216, "grad_norm": 3.234375, "learning_rate": 2.3478762221210558e-05, "loss": 2.2948, "step": 24470 }, { "epoch": 0.313344, "grad_norm": 1.484375, "learning_rate": 2.3473753681989323e-05, "loss": 2.282, "step": 24480 }, { "epoch": 0.313472, "grad_norm": 1.375, "learning_rate": 2.346874375482279e-05, "loss": 2.2881, "step": 24490 }, { "epoch": 0.3136, "grad_norm": 1.53125, "learning_rate": 2.3463732440531564e-05, "loss": 2.2793, "step": 24500 }, { "epoch": 0.313728, "grad_norm": 1.578125, "learning_rate": 2.345871973993645e-05, "loss": 2.3047, "step": 24510 }, { "epoch": 0.313856, "grad_norm": 1.484375, "learning_rate": 2.3453705653858507e-05, "loss": 2.2875, "step": 24520 }, { "epoch": 0.313984, "grad_norm": 1.4921875, "learning_rate": 2.3448690183119007e-05, "loss": 2.2902, "step": 24530 }, { "epoch": 0.314112, "grad_norm": 1.4453125, "learning_rate": 2.344367332853945e-05, "loss": 2.3361, "step": 24540 }, { "epoch": 0.31424, "grad_norm": 1.6015625, "learning_rate": 2.3438655090941557e-05, "loss": 2.256, "step": 24550 }, { "epoch": 0.314368, "grad_norm": 1.4453125, "learning_rate": 2.3433635471147296e-05, "loss": 2.2457, "step": 24560 }, { "epoch": 0.314496, "grad_norm": 1.5390625, "learning_rate": 2.3428614469978836e-05, "loss": 2.305, "step": 24570 }, { "epoch": 0.314624, "grad_norm": 3.3125, "learning_rate": 2.3423592088258596e-05, "loss": 2.3053, "step": 24580 }, { "epoch": 0.314752, "grad_norm": 1.4921875, "learning_rate": 2.3418568326809193e-05, "loss": 2.2885, "step": 24590 }, { "epoch": 0.31488, "grad_norm": 1.4921875, "learning_rate": 2.34135431864535e-05, "loss": 2.2781, "step": 24600 }, { "epoch": 0.315008, "grad_norm": 1.5546875, "learning_rate": 2.34085166680146e-05, "loss": 2.271, "step": 24610 }, { "epoch": 0.315136, "grad_norm": 1.5703125, "learning_rate": 2.3403488772315796e-05, "loss": 2.3058, "step": 24620 }, { "epoch": 0.315264, "grad_norm": 1.6015625, "learning_rate": 2.3398459500180627e-05, "loss": 2.2933, "step": 24630 }, { "epoch": 0.315392, "grad_norm": 1.6171875, "learning_rate": 2.339342885243287e-05, "loss": 2.2853, "step": 24640 }, { "epoch": 0.31552, "grad_norm": 1.65625, "learning_rate": 2.3388396829896485e-05, "loss": 2.2983, "step": 24650 }, { "epoch": 0.315648, "grad_norm": 1.484375, "learning_rate": 2.3383363433395706e-05, "loss": 2.2749, "step": 24660 }, { "epoch": 0.315776, "grad_norm": 1.84375, "learning_rate": 2.3378328663754963e-05, "loss": 2.3043, "step": 24670 }, { "epoch": 0.315904, "grad_norm": 1.5078125, "learning_rate": 2.3373292521798916e-05, "loss": 2.3184, "step": 24680 }, { "epoch": 0.316032, "grad_norm": 1.5390625, "learning_rate": 2.336825500835245e-05, "loss": 2.2939, "step": 24690 }, { "epoch": 0.31616, "grad_norm": 1.53125, "learning_rate": 2.336321612424069e-05, "loss": 2.2787, "step": 24700 }, { "epoch": 0.316288, "grad_norm": 1.53125, "learning_rate": 2.335817587028896e-05, "loss": 2.3007, "step": 24710 }, { "epoch": 0.316416, "grad_norm": 1.4765625, "learning_rate": 2.335313424732282e-05, "loss": 2.3278, "step": 24720 }, { "epoch": 0.316544, "grad_norm": 1.4765625, "learning_rate": 2.3348091256168062e-05, "loss": 2.303, "step": 24730 }, { "epoch": 0.316672, "grad_norm": 1.7421875, "learning_rate": 2.334304689765069e-05, "loss": 2.3092, "step": 24740 }, { "epoch": 0.3168, "grad_norm": 1.5703125, "learning_rate": 2.333800117259694e-05, "loss": 2.2981, "step": 24750 }, { "epoch": 0.316928, "grad_norm": 1.46875, "learning_rate": 2.3332954081833265e-05, "loss": 2.2911, "step": 24760 }, { "epoch": 0.317056, "grad_norm": 1.5625, "learning_rate": 2.332790562618635e-05, "loss": 2.2754, "step": 24770 }, { "epoch": 0.317184, "grad_norm": 1.4765625, "learning_rate": 2.3322855806483095e-05, "loss": 2.2935, "step": 24780 }, { "epoch": 0.317312, "grad_norm": 1.3828125, "learning_rate": 2.3317804623550626e-05, "loss": 2.2663, "step": 24790 }, { "epoch": 0.31744, "grad_norm": 3.765625, "learning_rate": 2.3312752078216296e-05, "loss": 2.2652, "step": 24800 }, { "epoch": 0.317568, "grad_norm": 1.515625, "learning_rate": 2.3307698171307685e-05, "loss": 2.2613, "step": 24810 }, { "epoch": 0.317696, "grad_norm": 1.53125, "learning_rate": 2.330264290365258e-05, "loss": 2.2917, "step": 24820 }, { "epoch": 0.317824, "grad_norm": 1.5390625, "learning_rate": 2.3297586276079002e-05, "loss": 2.2791, "step": 24830 }, { "epoch": 0.317952, "grad_norm": 1.484375, "learning_rate": 2.32925282894152e-05, "loss": 2.2929, "step": 24840 }, { "epoch": 0.31808, "grad_norm": 1.609375, "learning_rate": 2.3287468944489634e-05, "loss": 2.2977, "step": 24850 }, { "epoch": 0.318208, "grad_norm": 1.5078125, "learning_rate": 2.328240824213099e-05, "loss": 2.2776, "step": 24860 }, { "epoch": 0.318336, "grad_norm": 1.53125, "learning_rate": 2.3277346183168182e-05, "loss": 2.3029, "step": 24870 }, { "epoch": 0.318464, "grad_norm": 2.171875, "learning_rate": 2.3272282768430345e-05, "loss": 2.2972, "step": 24880 }, { "epoch": 0.318592, "grad_norm": 1.4609375, "learning_rate": 2.3267217998746824e-05, "loss": 2.308, "step": 24890 }, { "epoch": 0.31872, "grad_norm": 1.609375, "learning_rate": 2.3262151874947194e-05, "loss": 2.3002, "step": 24900 }, { "epoch": 0.318848, "grad_norm": 1.5234375, "learning_rate": 2.325708439786127e-05, "loss": 2.316, "step": 24910 }, { "epoch": 0.318976, "grad_norm": 1.4921875, "learning_rate": 2.325201556831905e-05, "loss": 2.3035, "step": 24920 }, { "epoch": 0.319104, "grad_norm": 1.515625, "learning_rate": 2.3246945387150797e-05, "loss": 2.2895, "step": 24930 }, { "epoch": 0.319232, "grad_norm": 1.4765625, "learning_rate": 2.3241873855186952e-05, "loss": 2.2797, "step": 24940 }, { "epoch": 0.31936, "grad_norm": 1.7421875, "learning_rate": 2.323680097325821e-05, "loss": 2.2393, "step": 24950 }, { "epoch": 0.319488, "grad_norm": 1.5078125, "learning_rate": 2.3231726742195477e-05, "loss": 2.2878, "step": 24960 }, { "epoch": 0.319616, "grad_norm": 1.5703125, "learning_rate": 2.322665116282987e-05, "loss": 2.2905, "step": 24970 }, { "epoch": 0.319744, "grad_norm": 1.671875, "learning_rate": 2.3221574235992744e-05, "loss": 2.2737, "step": 24980 }, { "epoch": 0.319872, "grad_norm": 1.515625, "learning_rate": 2.3216495962515664e-05, "loss": 2.3227, "step": 24990 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 2.321141634323042e-05, "loss": 2.2641, "step": 25000 }, { "epoch": 0.320128, "grad_norm": 1.734375, "learning_rate": 2.3206335378969013e-05, "loss": 2.2684, "step": 25010 }, { "epoch": 0.320256, "grad_norm": 1.4453125, "learning_rate": 2.3201253070563685e-05, "loss": 2.3018, "step": 25020 }, { "epoch": 0.320384, "grad_norm": 1.640625, "learning_rate": 2.3196169418846868e-05, "loss": 2.3029, "step": 25030 }, { "epoch": 0.320512, "grad_norm": 1.5546875, "learning_rate": 2.3191084424651242e-05, "loss": 2.2779, "step": 25040 }, { "epoch": 0.32064, "grad_norm": 1.671875, "learning_rate": 2.3185998088809696e-05, "loss": 2.2904, "step": 25050 }, { "epoch": 0.320768, "grad_norm": 1.5234375, "learning_rate": 2.3180910412155333e-05, "loss": 2.2778, "step": 25060 }, { "epoch": 0.320896, "grad_norm": 1.6015625, "learning_rate": 2.317582139552148e-05, "loss": 2.2725, "step": 25070 }, { "epoch": 0.321024, "grad_norm": 1.6796875, "learning_rate": 2.3170731039741695e-05, "loss": 2.2792, "step": 25080 }, { "epoch": 0.321152, "grad_norm": 1.5078125, "learning_rate": 2.316563934564973e-05, "loss": 2.2756, "step": 25090 }, { "epoch": 0.32128, "grad_norm": 1.5546875, "learning_rate": 2.3160546314079585e-05, "loss": 2.3104, "step": 25100 }, { "epoch": 0.321408, "grad_norm": 1.5546875, "learning_rate": 2.315545194586546e-05, "loss": 2.2791, "step": 25110 }, { "epoch": 0.321536, "grad_norm": 1.8046875, "learning_rate": 2.3150356241841775e-05, "loss": 2.2865, "step": 25120 }, { "epoch": 0.321664, "grad_norm": 1.5078125, "learning_rate": 2.314525920284318e-05, "loss": 2.2697, "step": 25130 }, { "epoch": 0.321792, "grad_norm": 1.6171875, "learning_rate": 2.3140160829704523e-05, "loss": 2.2934, "step": 25140 }, { "epoch": 0.32192, "grad_norm": 1.84375, "learning_rate": 2.3135061123260903e-05, "loss": 2.2694, "step": 25150 }, { "epoch": 0.322048, "grad_norm": 1.4609375, "learning_rate": 2.3129960084347606e-05, "loss": 2.2782, "step": 25160 }, { "epoch": 0.322176, "grad_norm": 1.6484375, "learning_rate": 2.3124857713800155e-05, "loss": 2.3208, "step": 25170 }, { "epoch": 0.322304, "grad_norm": 1.515625, "learning_rate": 2.311975401245428e-05, "loss": 2.2812, "step": 25180 }, { "epoch": 0.322432, "grad_norm": 2.953125, "learning_rate": 2.3114648981145936e-05, "loss": 2.3047, "step": 25190 }, { "epoch": 0.32256, "grad_norm": 1.65625, "learning_rate": 2.3109542620711298e-05, "loss": 2.2694, "step": 25200 }, { "epoch": 0.322688, "grad_norm": 1.5390625, "learning_rate": 2.310443493198674e-05, "loss": 2.2607, "step": 25210 }, { "epoch": 0.322816, "grad_norm": 1.578125, "learning_rate": 2.3099325915808884e-05, "loss": 2.2913, "step": 25220 }, { "epoch": 0.322944, "grad_norm": 1.609375, "learning_rate": 2.3094215573014547e-05, "loss": 2.3062, "step": 25230 }, { "epoch": 0.323072, "grad_norm": 1.5625, "learning_rate": 2.308910390444077e-05, "loss": 2.2999, "step": 25240 }, { "epoch": 0.3232, "grad_norm": 1.8125, "learning_rate": 2.3083990910924802e-05, "loss": 2.295, "step": 25250 }, { "epoch": 0.323328, "grad_norm": 1.5859375, "learning_rate": 2.307887659330414e-05, "loss": 2.2761, "step": 25260 }, { "epoch": 0.323456, "grad_norm": 1.625, "learning_rate": 2.3073760952416456e-05, "loss": 2.2595, "step": 25270 }, { "epoch": 0.323584, "grad_norm": 1.4453125, "learning_rate": 2.306864398909966e-05, "loss": 2.2836, "step": 25280 }, { "epoch": 0.323712, "grad_norm": 1.5546875, "learning_rate": 2.306352570419189e-05, "loss": 2.3109, "step": 25290 }, { "epoch": 0.32384, "grad_norm": 1.53125, "learning_rate": 2.3058406098531476e-05, "loss": 2.2736, "step": 25300 }, { "epoch": 0.323968, "grad_norm": 1.5859375, "learning_rate": 2.3053285172956976e-05, "loss": 2.2693, "step": 25310 }, { "epoch": 0.324096, "grad_norm": 1.5078125, "learning_rate": 2.3048162928307165e-05, "loss": 2.2526, "step": 25320 }, { "epoch": 0.324224, "grad_norm": 1.546875, "learning_rate": 2.3043039365421038e-05, "loss": 2.2928, "step": 25330 }, { "epoch": 0.324352, "grad_norm": 1.4609375, "learning_rate": 2.30379144851378e-05, "loss": 2.2986, "step": 25340 }, { "epoch": 0.32448, "grad_norm": 1.4765625, "learning_rate": 2.3032788288296867e-05, "loss": 2.2695, "step": 25350 }, { "epoch": 0.324608, "grad_norm": 1.5234375, "learning_rate": 2.3027660775737883e-05, "loss": 2.2688, "step": 25360 }, { "epoch": 0.324736, "grad_norm": 1.4609375, "learning_rate": 2.3022531948300696e-05, "loss": 2.2923, "step": 25370 }, { "epoch": 0.324864, "grad_norm": 1.40625, "learning_rate": 2.3017401806825378e-05, "loss": 2.3059, "step": 25380 }, { "epoch": 0.324992, "grad_norm": 1.4296875, "learning_rate": 2.301227035215221e-05, "loss": 2.283, "step": 25390 }, { "epoch": 0.32512, "grad_norm": 2.0, "learning_rate": 2.3007137585121693e-05, "loss": 2.3041, "step": 25400 }, { "epoch": 0.325248, "grad_norm": 1.5, "learning_rate": 2.300200350657454e-05, "loss": 2.2994, "step": 25410 }, { "epoch": 0.325376, "grad_norm": 1.5703125, "learning_rate": 2.2996868117351677e-05, "loss": 2.2893, "step": 25420 }, { "epoch": 0.325504, "grad_norm": 1.8046875, "learning_rate": 2.299173141829425e-05, "loss": 2.2957, "step": 25430 }, { "epoch": 0.325632, "grad_norm": 1.6171875, "learning_rate": 2.298659341024361e-05, "loss": 2.2691, "step": 25440 }, { "epoch": 0.32576, "grad_norm": 1.46875, "learning_rate": 2.298145409404134e-05, "loss": 2.2779, "step": 25450 }, { "epoch": 0.325888, "grad_norm": 1.5078125, "learning_rate": 2.2976313470529216e-05, "loss": 2.2894, "step": 25460 }, { "epoch": 0.326016, "grad_norm": 1.5, "learning_rate": 2.2971171540549246e-05, "loss": 2.2523, "step": 25470 }, { "epoch": 0.326144, "grad_norm": 1.5, "learning_rate": 2.2966028304943645e-05, "loss": 2.2722, "step": 25480 }, { "epoch": 0.326272, "grad_norm": 1.5546875, "learning_rate": 2.2960883764554834e-05, "loss": 2.3149, "step": 25490 }, { "epoch": 0.3264, "grad_norm": 1.546875, "learning_rate": 2.2955737920225463e-05, "loss": 2.292, "step": 25500 }, { "epoch": 0.326528, "grad_norm": 1.5, "learning_rate": 2.2950590772798373e-05, "loss": 2.3023, "step": 25510 }, { "epoch": 0.326656, "grad_norm": 1.5390625, "learning_rate": 2.2945442323116653e-05, "loss": 2.2781, "step": 25520 }, { "epoch": 0.326784, "grad_norm": 1.6015625, "learning_rate": 2.2940292572023575e-05, "loss": 2.2668, "step": 25530 }, { "epoch": 0.326912, "grad_norm": 1.6171875, "learning_rate": 2.2935141520362628e-05, "loss": 2.2647, "step": 25540 }, { "epoch": 0.32704, "grad_norm": 1.546875, "learning_rate": 2.292998916897753e-05, "loss": 2.2683, "step": 25550 }, { "epoch": 0.327168, "grad_norm": 1.7265625, "learning_rate": 2.2924835518712203e-05, "loss": 2.2922, "step": 25560 }, { "epoch": 0.327296, "grad_norm": 1.9375, "learning_rate": 2.2919680570410776e-05, "loss": 2.2667, "step": 25570 }, { "epoch": 0.327424, "grad_norm": 1.6953125, "learning_rate": 2.2914524324917598e-05, "loss": 2.3089, "step": 25580 }, { "epoch": 0.327552, "grad_norm": 1.5234375, "learning_rate": 2.2909366783077225e-05, "loss": 2.2775, "step": 25590 }, { "epoch": 0.32768, "grad_norm": 1.53125, "learning_rate": 2.2904207945734436e-05, "loss": 2.277, "step": 25600 }, { "epoch": 0.327808, "grad_norm": 1.53125, "learning_rate": 2.2899047813734213e-05, "loss": 2.307, "step": 25610 }, { "epoch": 0.327936, "grad_norm": 1.890625, "learning_rate": 2.2893886387921743e-05, "loss": 2.2943, "step": 25620 }, { "epoch": 0.328064, "grad_norm": 1.515625, "learning_rate": 2.288872366914244e-05, "loss": 2.3134, "step": 25630 }, { "epoch": 0.328192, "grad_norm": 1.53125, "learning_rate": 2.2883559658241928e-05, "loss": 2.3059, "step": 25640 }, { "epoch": 0.32832, "grad_norm": 1.5625, "learning_rate": 2.287839435606603e-05, "loss": 2.3099, "step": 25650 }, { "epoch": 0.328448, "grad_norm": 1.7109375, "learning_rate": 2.287322776346079e-05, "loss": 2.2837, "step": 25660 }, { "epoch": 0.328576, "grad_norm": 1.6640625, "learning_rate": 2.286805988127247e-05, "loss": 2.2954, "step": 25670 }, { "epoch": 0.328704, "grad_norm": 1.6796875, "learning_rate": 2.2862890710347525e-05, "loss": 2.2918, "step": 25680 }, { "epoch": 0.328832, "grad_norm": 1.46875, "learning_rate": 2.2857720251532635e-05, "loss": 2.3069, "step": 25690 }, { "epoch": 0.32896, "grad_norm": 1.5078125, "learning_rate": 2.2852548505674688e-05, "loss": 2.2853, "step": 25700 }, { "epoch": 0.329088, "grad_norm": 1.5859375, "learning_rate": 2.2847375473620786e-05, "loss": 2.2692, "step": 25710 }, { "epoch": 0.329216, "grad_norm": 1.7890625, "learning_rate": 2.2842201156218228e-05, "loss": 2.295, "step": 25720 }, { "epoch": 0.329344, "grad_norm": 1.65625, "learning_rate": 2.2837025554314543e-05, "loss": 2.2858, "step": 25730 }, { "epoch": 0.329472, "grad_norm": 1.5078125, "learning_rate": 2.2831848668757454e-05, "loss": 2.255, "step": 25740 }, { "epoch": 0.3296, "grad_norm": 1.7109375, "learning_rate": 2.2826670500394902e-05, "loss": 2.3011, "step": 25750 }, { "epoch": 0.329728, "grad_norm": 1.7421875, "learning_rate": 2.282149105007504e-05, "loss": 2.2979, "step": 25760 }, { "epoch": 0.329856, "grad_norm": 1.671875, "learning_rate": 2.2816310318646227e-05, "loss": 2.2977, "step": 25770 }, { "epoch": 0.329984, "grad_norm": 1.546875, "learning_rate": 2.281112830695703e-05, "loss": 2.3054, "step": 25780 }, { "epoch": 0.330112, "grad_norm": 1.6171875, "learning_rate": 2.2805945015856234e-05, "loss": 2.3013, "step": 25790 }, { "epoch": 0.33024, "grad_norm": 1.71875, "learning_rate": 2.2800760446192824e-05, "loss": 2.2642, "step": 25800 }, { "epoch": 0.330368, "grad_norm": 1.5390625, "learning_rate": 2.2795574598816e-05, "loss": 2.2881, "step": 25810 }, { "epoch": 0.330496, "grad_norm": 6.53125, "learning_rate": 2.2790387474575168e-05, "loss": 2.3013, "step": 25820 }, { "epoch": 0.330624, "grad_norm": 1.515625, "learning_rate": 2.278519907431995e-05, "loss": 2.3121, "step": 25830 }, { "epoch": 0.330752, "grad_norm": 1.671875, "learning_rate": 2.278000939890016e-05, "loss": 2.3013, "step": 25840 }, { "epoch": 0.33088, "grad_norm": 1.765625, "learning_rate": 2.2774818449165848e-05, "loss": 2.2999, "step": 25850 }, { "epoch": 0.331008, "grad_norm": 1.6015625, "learning_rate": 2.2769626225967246e-05, "loss": 2.2903, "step": 25860 }, { "epoch": 0.331136, "grad_norm": 1.6640625, "learning_rate": 2.2764432730154814e-05, "loss": 2.2862, "step": 25870 }, { "epoch": 0.331264, "grad_norm": 1.453125, "learning_rate": 2.2759237962579207e-05, "loss": 2.2801, "step": 25880 }, { "epoch": 0.331392, "grad_norm": 1.4921875, "learning_rate": 2.2754041924091297e-05, "loss": 2.309, "step": 25890 }, { "epoch": 0.33152, "grad_norm": 5.875, "learning_rate": 2.2748844615542158e-05, "loss": 2.2667, "step": 25900 }, { "epoch": 0.331648, "grad_norm": 1.671875, "learning_rate": 2.2743646037783083e-05, "loss": 2.2906, "step": 25910 }, { "epoch": 0.331776, "grad_norm": 1.65625, "learning_rate": 2.273844619166555e-05, "loss": 2.292, "step": 25920 }, { "epoch": 0.331904, "grad_norm": 1.515625, "learning_rate": 2.2733245078041276e-05, "loss": 2.3019, "step": 25930 }, { "epoch": 0.332032, "grad_norm": 1.5546875, "learning_rate": 2.272804269776216e-05, "loss": 2.2925, "step": 25940 }, { "epoch": 0.33216, "grad_norm": 1.640625, "learning_rate": 2.2722839051680318e-05, "loss": 2.2789, "step": 25950 }, { "epoch": 0.332288, "grad_norm": 3.03125, "learning_rate": 2.2717634140648076e-05, "loss": 2.2932, "step": 25960 }, { "epoch": 0.332416, "grad_norm": 1.4921875, "learning_rate": 2.2712427965517962e-05, "loss": 2.294, "step": 25970 }, { "epoch": 0.332544, "grad_norm": 1.578125, "learning_rate": 2.2707220527142718e-05, "loss": 2.2945, "step": 25980 }, { "epoch": 0.332672, "grad_norm": 1.4921875, "learning_rate": 2.2702011826375285e-05, "loss": 2.3151, "step": 25990 }, { "epoch": 0.3328, "grad_norm": 1.453125, "learning_rate": 2.2696801864068808e-05, "loss": 2.2882, "step": 26000 }, { "epoch": 0.332928, "grad_norm": 1.6796875, "learning_rate": 2.2691590641076653e-05, "loss": 2.2591, "step": 26010 }, { "epoch": 0.333056, "grad_norm": 1.5859375, "learning_rate": 2.2686378158252385e-05, "loss": 2.2913, "step": 26020 }, { "epoch": 0.333184, "grad_norm": 1.4609375, "learning_rate": 2.268116441644977e-05, "loss": 2.2654, "step": 26030 }, { "epoch": 0.333312, "grad_norm": 1.640625, "learning_rate": 2.267594941652278e-05, "loss": 2.3144, "step": 26040 }, { "epoch": 0.33344, "grad_norm": 1.6796875, "learning_rate": 2.2670733159325613e-05, "loss": 2.2765, "step": 26050 }, { "epoch": 0.333568, "grad_norm": 1.734375, "learning_rate": 2.2665515645712646e-05, "loss": 2.2592, "step": 26060 }, { "epoch": 0.333696, "grad_norm": 1.6171875, "learning_rate": 2.2660296876538472e-05, "loss": 2.2654, "step": 26070 }, { "epoch": 0.333824, "grad_norm": 2.6875, "learning_rate": 2.2655076852657893e-05, "loss": 2.2823, "step": 26080 }, { "epoch": 0.333952, "grad_norm": 1.5546875, "learning_rate": 2.2649855574925925e-05, "loss": 2.3137, "step": 26090 }, { "epoch": 0.33408, "grad_norm": 1.5859375, "learning_rate": 2.2644633044197766e-05, "loss": 2.2747, "step": 26100 }, { "epoch": 0.334208, "grad_norm": 1.4453125, "learning_rate": 2.2639409261328838e-05, "loss": 2.2819, "step": 26110 }, { "epoch": 0.334336, "grad_norm": 1.953125, "learning_rate": 2.2634184227174765e-05, "loss": 2.2833, "step": 26120 }, { "epoch": 0.334464, "grad_norm": 1.546875, "learning_rate": 2.2628957942591368e-05, "loss": 2.2947, "step": 26130 }, { "epoch": 0.334592, "grad_norm": 1.5390625, "learning_rate": 2.2623730408434676e-05, "loss": 2.2872, "step": 26140 }, { "epoch": 0.33472, "grad_norm": 1.6171875, "learning_rate": 2.2618501625560933e-05, "loss": 2.3022, "step": 26150 }, { "epoch": 0.334848, "grad_norm": 1.4921875, "learning_rate": 2.2613271594826573e-05, "loss": 2.2674, "step": 26160 }, { "epoch": 0.334976, "grad_norm": 1.4921875, "learning_rate": 2.2608040317088245e-05, "loss": 2.2734, "step": 26170 }, { "epoch": 0.335104, "grad_norm": 1.75, "learning_rate": 2.2602807793202793e-05, "loss": 2.2769, "step": 26180 }, { "epoch": 0.335232, "grad_norm": 1.734375, "learning_rate": 2.2597574024027276e-05, "loss": 2.2933, "step": 26190 }, { "epoch": 0.33536, "grad_norm": 1.609375, "learning_rate": 2.2592339010418946e-05, "loss": 2.2601, "step": 26200 }, { "epoch": 0.335488, "grad_norm": 1.6640625, "learning_rate": 2.2587102753235268e-05, "loss": 2.2933, "step": 26210 }, { "epoch": 0.335616, "grad_norm": 1.484375, "learning_rate": 2.2581865253333906e-05, "loss": 2.2582, "step": 26220 }, { "epoch": 0.335744, "grad_norm": 1.5625, "learning_rate": 2.2576626511572722e-05, "loss": 2.2968, "step": 26230 }, { "epoch": 0.335872, "grad_norm": 1.671875, "learning_rate": 2.257138652880979e-05, "loss": 2.2828, "step": 26240 }, { "epoch": 0.336, "grad_norm": 1.4765625, "learning_rate": 2.2566145305903395e-05, "loss": 2.2709, "step": 26250 }, { "epoch": 0.336128, "grad_norm": 1.4921875, "learning_rate": 2.256090284371201e-05, "loss": 2.2636, "step": 26260 }, { "epoch": 0.336256, "grad_norm": 1.59375, "learning_rate": 2.2555659143094303e-05, "loss": 2.2702, "step": 26270 }, { "epoch": 0.336384, "grad_norm": 1.4921875, "learning_rate": 2.2550414204909174e-05, "loss": 2.2812, "step": 26280 }, { "epoch": 0.336512, "grad_norm": 1.546875, "learning_rate": 2.2545168030015705e-05, "loss": 2.2983, "step": 26290 }, { "epoch": 0.33664, "grad_norm": 1.5390625, "learning_rate": 2.2539920619273176e-05, "loss": 2.2815, "step": 26300 }, { "epoch": 0.336768, "grad_norm": 1.5625, "learning_rate": 2.253467197354109e-05, "loss": 2.3072, "step": 26310 }, { "epoch": 0.336896, "grad_norm": 1.6171875, "learning_rate": 2.2529422093679135e-05, "loss": 2.3006, "step": 26320 }, { "epoch": 0.337024, "grad_norm": 1.59375, "learning_rate": 2.252417098054721e-05, "loss": 2.2713, "step": 26330 }, { "epoch": 0.337152, "grad_norm": 1.640625, "learning_rate": 2.251891863500541e-05, "loss": 2.2495, "step": 26340 }, { "epoch": 0.33728, "grad_norm": 1.6171875, "learning_rate": 2.251366505791404e-05, "loss": 2.283, "step": 26350 }, { "epoch": 0.337408, "grad_norm": 1.546875, "learning_rate": 2.250841025013359e-05, "loss": 2.2813, "step": 26360 }, { "epoch": 0.337536, "grad_norm": 1.6484375, "learning_rate": 2.250315421252478e-05, "loss": 2.2678, "step": 26370 }, { "epoch": 0.337664, "grad_norm": 2.515625, "learning_rate": 2.24978969459485e-05, "loss": 2.2991, "step": 26380 }, { "epoch": 0.337792, "grad_norm": 1.546875, "learning_rate": 2.249263845126586e-05, "loss": 2.2986, "step": 26390 }, { "epoch": 0.33792, "grad_norm": 1.6484375, "learning_rate": 2.2487378729338174e-05, "loss": 2.2686, "step": 26400 }, { "epoch": 0.338048, "grad_norm": 1.4921875, "learning_rate": 2.248211778102694e-05, "loss": 2.313, "step": 26410 }, { "epoch": 0.338176, "grad_norm": 1.5859375, "learning_rate": 2.2476855607193866e-05, "loss": 2.2812, "step": 26420 }, { "epoch": 0.338304, "grad_norm": 1.5625, "learning_rate": 2.2471592208700873e-05, "loss": 2.317, "step": 26430 }, { "epoch": 0.338432, "grad_norm": 1.5390625, "learning_rate": 2.2466327586410063e-05, "loss": 2.3089, "step": 26440 }, { "epoch": 0.33856, "grad_norm": 1.515625, "learning_rate": 2.2461061741183743e-05, "loss": 2.3031, "step": 26450 }, { "epoch": 0.338688, "grad_norm": 1.96875, "learning_rate": 2.2455794673884437e-05, "loss": 2.3075, "step": 26460 }, { "epoch": 0.338816, "grad_norm": 1.5234375, "learning_rate": 2.245052638537484e-05, "loss": 2.2856, "step": 26470 }, { "epoch": 0.338944, "grad_norm": 1.6015625, "learning_rate": 2.244525687651788e-05, "loss": 2.29, "step": 26480 }, { "epoch": 0.339072, "grad_norm": 1.515625, "learning_rate": 2.243998614817665e-05, "loss": 2.2548, "step": 26490 }, { "epoch": 0.3392, "grad_norm": 1.4765625, "learning_rate": 2.2434714201214478e-05, "loss": 2.3199, "step": 26500 }, { "epoch": 0.339328, "grad_norm": 1.765625, "learning_rate": 2.2429441036494866e-05, "loss": 2.2643, "step": 26510 }, { "epoch": 0.339456, "grad_norm": 1.5859375, "learning_rate": 2.2424166654881516e-05, "loss": 2.2866, "step": 26520 }, { "epoch": 0.339584, "grad_norm": 1.4375, "learning_rate": 2.2418891057238355e-05, "loss": 2.2769, "step": 26530 }, { "epoch": 0.339712, "grad_norm": 1.4921875, "learning_rate": 2.2413614244429477e-05, "loss": 2.2841, "step": 26540 }, { "epoch": 0.33984, "grad_norm": 1.6015625, "learning_rate": 2.2408336217319193e-05, "loss": 2.26, "step": 26550 }, { "epoch": 0.339968, "grad_norm": 1.5703125, "learning_rate": 2.240305697677201e-05, "loss": 2.2879, "step": 26560 }, { "epoch": 0.340096, "grad_norm": 1.5546875, "learning_rate": 2.2397776523652636e-05, "loss": 2.2778, "step": 26570 }, { "epoch": 0.340224, "grad_norm": 1.7265625, "learning_rate": 2.239249485882597e-05, "loss": 2.2872, "step": 26580 }, { "epoch": 0.340352, "grad_norm": 1.5703125, "learning_rate": 2.2387211983157113e-05, "loss": 2.2803, "step": 26590 }, { "epoch": 0.34048, "grad_norm": 1.671875, "learning_rate": 2.2381927897511375e-05, "loss": 2.2858, "step": 26600 }, { "epoch": 0.340608, "grad_norm": 1.5390625, "learning_rate": 2.2376642602754243e-05, "loss": 2.3135, "step": 26610 }, { "epoch": 0.340736, "grad_norm": 1.609375, "learning_rate": 2.2371356099751415e-05, "loss": 2.2736, "step": 26620 }, { "epoch": 0.340864, "grad_norm": 1.703125, "learning_rate": 2.2366068389368793e-05, "loss": 2.2915, "step": 26630 }, { "epoch": 0.340992, "grad_norm": 1.515625, "learning_rate": 2.2360779472472468e-05, "loss": 2.3114, "step": 26640 }, { "epoch": 0.34112, "grad_norm": 1.515625, "learning_rate": 2.235548934992872e-05, "loss": 2.3117, "step": 26650 }, { "epoch": 0.341248, "grad_norm": 1.4921875, "learning_rate": 2.2350198022604053e-05, "loss": 2.2888, "step": 26660 }, { "epoch": 0.341376, "grad_norm": 1.5859375, "learning_rate": 2.2344905491365138e-05, "loss": 2.2952, "step": 26670 }, { "epoch": 0.341504, "grad_norm": 1.984375, "learning_rate": 2.233961175707886e-05, "loss": 2.2693, "step": 26680 }, { "epoch": 0.341632, "grad_norm": 1.640625, "learning_rate": 2.2334316820612293e-05, "loss": 2.2717, "step": 26690 }, { "epoch": 0.34176, "grad_norm": 1.4140625, "learning_rate": 2.2329020682832728e-05, "loss": 2.3149, "step": 26700 }, { "epoch": 0.341888, "grad_norm": 2.1875, "learning_rate": 2.2323723344607627e-05, "loss": 2.2915, "step": 26710 }, { "epoch": 0.342016, "grad_norm": 2.9375, "learning_rate": 2.231842480680466e-05, "loss": 2.2906, "step": 26720 }, { "epoch": 0.342144, "grad_norm": 1.5625, "learning_rate": 2.231312507029169e-05, "loss": 2.2793, "step": 26730 }, { "epoch": 0.342272, "grad_norm": 1.4296875, "learning_rate": 2.230782413593679e-05, "loss": 2.2692, "step": 26740 }, { "epoch": 0.3424, "grad_norm": 1.578125, "learning_rate": 2.2302522004608207e-05, "loss": 2.2991, "step": 26750 }, { "epoch": 0.342528, "grad_norm": 1.546875, "learning_rate": 2.2297218677174398e-05, "loss": 2.29, "step": 26760 }, { "epoch": 0.342656, "grad_norm": 1.53125, "learning_rate": 2.2291914154504018e-05, "loss": 2.258, "step": 26770 }, { "epoch": 0.342784, "grad_norm": 1.5390625, "learning_rate": 2.2286608437465906e-05, "loss": 2.2643, "step": 26780 }, { "epoch": 0.342912, "grad_norm": 2.65625, "learning_rate": 2.228130152692911e-05, "loss": 2.2679, "step": 26790 }, { "epoch": 0.34304, "grad_norm": 1.5, "learning_rate": 2.2275993423762864e-05, "loss": 2.2858, "step": 26800 }, { "epoch": 0.343168, "grad_norm": 1.5546875, "learning_rate": 2.22706841288366e-05, "loss": 2.3005, "step": 26810 }, { "epoch": 0.343296, "grad_norm": 3.328125, "learning_rate": 2.226537364301995e-05, "loss": 2.2822, "step": 26820 }, { "epoch": 0.343424, "grad_norm": 1.453125, "learning_rate": 2.2260061967182728e-05, "loss": 2.2998, "step": 26830 }, { "epoch": 0.343552, "grad_norm": 1.65625, "learning_rate": 2.2254749102194962e-05, "loss": 2.2788, "step": 26840 }, { "epoch": 0.34368, "grad_norm": 1.5546875, "learning_rate": 2.224943504892686e-05, "loss": 2.2614, "step": 26850 }, { "epoch": 0.343808, "grad_norm": 3.328125, "learning_rate": 2.2244119808248824e-05, "loss": 2.2607, "step": 26860 }, { "epoch": 0.343936, "grad_norm": 1.5546875, "learning_rate": 2.2238803381031462e-05, "loss": 2.2777, "step": 26870 }, { "epoch": 0.344064, "grad_norm": 1.625, "learning_rate": 2.223348576814557e-05, "loss": 2.2921, "step": 26880 }, { "epoch": 0.344192, "grad_norm": 1.6015625, "learning_rate": 2.2228166970462133e-05, "loss": 2.2599, "step": 26890 }, { "epoch": 0.34432, "grad_norm": 1.703125, "learning_rate": 2.2222846988852344e-05, "loss": 2.2754, "step": 26900 }, { "epoch": 0.344448, "grad_norm": 1.9453125, "learning_rate": 2.221752582418757e-05, "loss": 2.2757, "step": 26910 }, { "epoch": 0.344576, "grad_norm": 1.53125, "learning_rate": 2.221220347733939e-05, "loss": 2.2997, "step": 26920 }, { "epoch": 0.344704, "grad_norm": 1.5234375, "learning_rate": 2.220687994917957e-05, "loss": 2.2727, "step": 26930 }, { "epoch": 0.344832, "grad_norm": 1.6484375, "learning_rate": 2.2201555240580062e-05, "loss": 2.2856, "step": 26940 }, { "epoch": 0.34496, "grad_norm": 1.5859375, "learning_rate": 2.219622935241303e-05, "loss": 2.3151, "step": 26950 }, { "epoch": 0.345088, "grad_norm": 1.390625, "learning_rate": 2.2190902285550805e-05, "loss": 2.2591, "step": 26960 }, { "epoch": 0.345216, "grad_norm": 1.9296875, "learning_rate": 2.2185574040865943e-05, "loss": 2.2674, "step": 26970 }, { "epoch": 0.345344, "grad_norm": 1.5625, "learning_rate": 2.218024461923116e-05, "loss": 2.3004, "step": 26980 }, { "epoch": 0.345472, "grad_norm": 1.453125, "learning_rate": 2.217491402151939e-05, "loss": 2.2688, "step": 26990 }, { "epoch": 0.3456, "grad_norm": 1.5703125, "learning_rate": 2.2169582248603743e-05, "loss": 2.3184, "step": 27000 }, { "epoch": 0.345728, "grad_norm": 1.5625, "learning_rate": 2.216424930135753e-05, "loss": 2.2629, "step": 27010 }, { "epoch": 0.345856, "grad_norm": 1.78125, "learning_rate": 2.215891518065426e-05, "loss": 2.3192, "step": 27020 }, { "epoch": 0.345984, "grad_norm": 1.6328125, "learning_rate": 2.2153579887367616e-05, "loss": 2.2854, "step": 27030 }, { "epoch": 0.346112, "grad_norm": 1.546875, "learning_rate": 2.2148243422371492e-05, "loss": 2.2791, "step": 27040 }, { "epoch": 0.34624, "grad_norm": 1.515625, "learning_rate": 2.2142905786539966e-05, "loss": 2.3041, "step": 27050 }, { "epoch": 0.346368, "grad_norm": 1.5, "learning_rate": 2.2137566980747306e-05, "loss": 2.2631, "step": 27060 }, { "epoch": 0.346496, "grad_norm": 1.5703125, "learning_rate": 2.2132227005867968e-05, "loss": 2.2771, "step": 27070 }, { "epoch": 0.346624, "grad_norm": 1.5546875, "learning_rate": 2.2126885862776612e-05, "loss": 2.2726, "step": 27080 }, { "epoch": 0.346752, "grad_norm": 1.515625, "learning_rate": 2.212154355234808e-05, "loss": 2.3096, "step": 27090 }, { "epoch": 0.34688, "grad_norm": 1.71875, "learning_rate": 2.2116200075457405e-05, "loss": 2.2967, "step": 27100 }, { "epoch": 0.347008, "grad_norm": 1.59375, "learning_rate": 2.211085543297982e-05, "loss": 2.2583, "step": 27110 }, { "epoch": 0.347136, "grad_norm": 1.8203125, "learning_rate": 2.2105509625790735e-05, "loss": 2.2937, "step": 27120 }, { "epoch": 0.347264, "grad_norm": 1.6328125, "learning_rate": 2.2100162654765764e-05, "loss": 2.2741, "step": 27130 }, { "epoch": 0.347392, "grad_norm": 1.625, "learning_rate": 2.20948145207807e-05, "loss": 2.2952, "step": 27140 }, { "epoch": 0.34752, "grad_norm": 1.40625, "learning_rate": 2.2089465224711536e-05, "loss": 2.263, "step": 27150 }, { "epoch": 0.347648, "grad_norm": 1.6015625, "learning_rate": 2.2084114767434455e-05, "loss": 2.2709, "step": 27160 }, { "epoch": 0.347776, "grad_norm": 1.4921875, "learning_rate": 2.207876314982582e-05, "loss": 2.3049, "step": 27170 }, { "epoch": 0.347904, "grad_norm": 1.734375, "learning_rate": 2.207341037276219e-05, "loss": 2.2994, "step": 27180 }, { "epoch": 0.348032, "grad_norm": 1.6171875, "learning_rate": 2.2068056437120324e-05, "loss": 2.2826, "step": 27190 }, { "epoch": 0.34816, "grad_norm": 1.6875, "learning_rate": 2.2062701343777152e-05, "loss": 2.3002, "step": 27200 }, { "epoch": 0.348288, "grad_norm": 1.546875, "learning_rate": 2.205734509360981e-05, "loss": 2.3115, "step": 27210 }, { "epoch": 0.348416, "grad_norm": 1.8359375, "learning_rate": 2.2051987687495612e-05, "loss": 2.3097, "step": 27220 }, { "epoch": 0.348544, "grad_norm": 1.7421875, "learning_rate": 2.2046629126312066e-05, "loss": 2.2893, "step": 27230 }, { "epoch": 0.348672, "grad_norm": 1.6796875, "learning_rate": 2.2041269410936876e-05, "loss": 2.2827, "step": 27240 }, { "epoch": 0.3488, "grad_norm": 1.625, "learning_rate": 2.203590854224792e-05, "loss": 2.2794, "step": 27250 }, { "epoch": 0.348928, "grad_norm": 1.625, "learning_rate": 2.203054652112328e-05, "loss": 2.2865, "step": 27260 }, { "epoch": 0.349056, "grad_norm": 1.53125, "learning_rate": 2.2025183348441217e-05, "loss": 2.2961, "step": 27270 }, { "epoch": 0.349184, "grad_norm": 2.09375, "learning_rate": 2.2019819025080175e-05, "loss": 2.2741, "step": 27280 }, { "epoch": 0.349312, "grad_norm": 1.59375, "learning_rate": 2.201445355191881e-05, "loss": 2.3136, "step": 27290 }, { "epoch": 0.34944, "grad_norm": 1.7421875, "learning_rate": 2.2009086929835938e-05, "loss": 2.2639, "step": 27300 }, { "epoch": 0.349568, "grad_norm": 1.796875, "learning_rate": 2.200371915971059e-05, "loss": 2.3022, "step": 27310 }, { "epoch": 0.349696, "grad_norm": 1.515625, "learning_rate": 2.1998350242421957e-05, "loss": 2.2967, "step": 27320 }, { "epoch": 0.349824, "grad_norm": 1.4296875, "learning_rate": 2.1992980178849445e-05, "loss": 2.2881, "step": 27330 }, { "epoch": 0.349952, "grad_norm": 1.5859375, "learning_rate": 2.1987608969872628e-05, "loss": 2.2907, "step": 27340 }, { "epoch": 0.35008, "grad_norm": 1.546875, "learning_rate": 2.1982236616371276e-05, "loss": 2.2909, "step": 27350 }, { "epoch": 0.350208, "grad_norm": 1.546875, "learning_rate": 2.197686311922535e-05, "loss": 2.2788, "step": 27360 }, { "epoch": 0.350336, "grad_norm": 1.6953125, "learning_rate": 2.1971488479314988e-05, "loss": 2.2902, "step": 27370 }, { "epoch": 0.350464, "grad_norm": 1.59375, "learning_rate": 2.1966112697520518e-05, "loss": 2.2634, "step": 27380 }, { "epoch": 0.350592, "grad_norm": 1.46875, "learning_rate": 2.196073577472247e-05, "loss": 2.2427, "step": 27390 }, { "epoch": 0.35072, "grad_norm": 1.59375, "learning_rate": 2.1955357711801534e-05, "loss": 2.2567, "step": 27400 }, { "epoch": 0.350848, "grad_norm": 1.453125, "learning_rate": 2.1949978509638608e-05, "loss": 2.3022, "step": 27410 }, { "epoch": 0.350976, "grad_norm": 1.6015625, "learning_rate": 2.1944598169114774e-05, "loss": 2.3087, "step": 27420 }, { "epoch": 0.351104, "grad_norm": 1.609375, "learning_rate": 2.1939216691111294e-05, "loss": 2.3131, "step": 27430 }, { "epoch": 0.351232, "grad_norm": 1.4921875, "learning_rate": 2.1933834076509613e-05, "loss": 2.2801, "step": 27440 }, { "epoch": 0.35136, "grad_norm": 3.90625, "learning_rate": 2.1928450326191373e-05, "loss": 2.2748, "step": 27450 }, { "epoch": 0.351488, "grad_norm": 1.6640625, "learning_rate": 2.1923065441038398e-05, "loss": 2.2709, "step": 27460 }, { "epoch": 0.351616, "grad_norm": 8.0, "learning_rate": 2.19176794219327e-05, "loss": 2.2837, "step": 27470 }, { "epoch": 0.351744, "grad_norm": 1.5, "learning_rate": 2.1912292269756465e-05, "loss": 2.2926, "step": 27480 }, { "epoch": 0.351872, "grad_norm": 1.5625, "learning_rate": 2.1906903985392076e-05, "loss": 2.3006, "step": 27490 }, { "epoch": 0.352, "grad_norm": 1.5546875, "learning_rate": 2.19015145697221e-05, "loss": 2.3016, "step": 27500 }, { "epoch": 0.352128, "grad_norm": 1.546875, "learning_rate": 2.1896124023629293e-05, "loss": 2.2505, "step": 27510 }, { "epoch": 0.352256, "grad_norm": 1.6171875, "learning_rate": 2.189073234799658e-05, "loss": 2.3087, "step": 27520 }, { "epoch": 0.352384, "grad_norm": 1.6640625, "learning_rate": 2.1885339543707097e-05, "loss": 2.289, "step": 27530 }, { "epoch": 0.352512, "grad_norm": 1.8125, "learning_rate": 2.1879945611644135e-05, "loss": 2.2718, "step": 27540 }, { "epoch": 0.35264, "grad_norm": 1.515625, "learning_rate": 2.187455055269119e-05, "loss": 2.2911, "step": 27550 }, { "epoch": 0.352768, "grad_norm": 1.4453125, "learning_rate": 2.186915436773194e-05, "loss": 2.2771, "step": 27560 }, { "epoch": 0.352896, "grad_norm": 1.5625, "learning_rate": 2.186375705765024e-05, "loss": 2.2673, "step": 27570 }, { "epoch": 0.353024, "grad_norm": 1.796875, "learning_rate": 2.185835862333014e-05, "loss": 2.2784, "step": 27580 }, { "epoch": 0.353152, "grad_norm": 1.9453125, "learning_rate": 2.1852959065655865e-05, "loss": 2.2897, "step": 27590 }, { "epoch": 0.35328, "grad_norm": 1.453125, "learning_rate": 2.184755838551183e-05, "loss": 2.3272, "step": 27600 }, { "epoch": 0.353408, "grad_norm": 1.8984375, "learning_rate": 2.184215658378262e-05, "loss": 2.276, "step": 27610 }, { "epoch": 0.353536, "grad_norm": 1.59375, "learning_rate": 2.1836753661353027e-05, "loss": 2.2655, "step": 27620 }, { "epoch": 0.353664, "grad_norm": 1.5703125, "learning_rate": 2.183134961910801e-05, "loss": 2.2848, "step": 27630 }, { "epoch": 0.353792, "grad_norm": 2.03125, "learning_rate": 2.1825944457932717e-05, "loss": 2.2745, "step": 27640 }, { "epoch": 0.35392, "grad_norm": 7.28125, "learning_rate": 2.1820538178712476e-05, "loss": 2.2789, "step": 27650 }, { "epoch": 0.354048, "grad_norm": 1.6015625, "learning_rate": 2.18151307823328e-05, "loss": 2.2928, "step": 27660 }, { "epoch": 0.354176, "grad_norm": 1.484375, "learning_rate": 2.180972226967939e-05, "loss": 2.2859, "step": 27670 }, { "epoch": 0.354304, "grad_norm": 1.578125, "learning_rate": 2.1804312641638117e-05, "loss": 2.2697, "step": 27680 }, { "epoch": 0.354432, "grad_norm": 1.5, "learning_rate": 2.179890189909505e-05, "loss": 2.2886, "step": 27690 }, { "epoch": 0.35456, "grad_norm": 1.625, "learning_rate": 2.179349004293643e-05, "loss": 2.2709, "step": 27700 }, { "epoch": 0.354688, "grad_norm": 1.5078125, "learning_rate": 2.1788077074048677e-05, "loss": 2.2543, "step": 27710 }, { "epoch": 0.354816, "grad_norm": 1.703125, "learning_rate": 2.1782662993318404e-05, "loss": 2.2878, "step": 27720 }, { "epoch": 0.354944, "grad_norm": 1.5390625, "learning_rate": 2.1777247801632412e-05, "loss": 2.2797, "step": 27730 }, { "epoch": 0.355072, "grad_norm": 1.7265625, "learning_rate": 2.1771831499877665e-05, "loss": 2.3166, "step": 27740 }, { "epoch": 0.3552, "grad_norm": 1.5859375, "learning_rate": 2.1766414088941313e-05, "loss": 2.277, "step": 27750 }, { "epoch": 0.355328, "grad_norm": 1.6171875, "learning_rate": 2.17609955697107e-05, "loss": 2.2817, "step": 27760 }, { "epoch": 0.355456, "grad_norm": 1.7421875, "learning_rate": 2.1755575943073348e-05, "loss": 2.3206, "step": 27770 }, { "epoch": 0.355584, "grad_norm": 1.5546875, "learning_rate": 2.1750155209916944e-05, "loss": 2.2824, "step": 27780 }, { "epoch": 0.355712, "grad_norm": 1.6015625, "learning_rate": 2.1744733371129375e-05, "loss": 2.2786, "step": 27790 }, { "epoch": 0.35584, "grad_norm": 1.484375, "learning_rate": 2.1739310427598703e-05, "loss": 2.3004, "step": 27800 }, { "epoch": 0.355968, "grad_norm": 2.09375, "learning_rate": 2.173388638021317e-05, "loss": 2.2927, "step": 27810 }, { "epoch": 0.356096, "grad_norm": 1.578125, "learning_rate": 2.17284612298612e-05, "loss": 2.2659, "step": 27820 }, { "epoch": 0.356224, "grad_norm": 1.3984375, "learning_rate": 2.1723034977431398e-05, "loss": 2.2921, "step": 27830 }, { "epoch": 0.356352, "grad_norm": 1.5390625, "learning_rate": 2.1717607623812545e-05, "loss": 2.2534, "step": 27840 }, { "epoch": 0.35648, "grad_norm": 1.578125, "learning_rate": 2.1712179169893607e-05, "loss": 2.2903, "step": 27850 }, { "epoch": 0.356608, "grad_norm": 1.609375, "learning_rate": 2.1706749616563733e-05, "loss": 2.2651, "step": 27860 }, { "epoch": 0.356736, "grad_norm": 1.4453125, "learning_rate": 2.1701318964712242e-05, "loss": 2.3069, "step": 27870 }, { "epoch": 0.356864, "grad_norm": 1.6171875, "learning_rate": 2.1695887215228647e-05, "loss": 2.2648, "step": 27880 }, { "epoch": 0.356992, "grad_norm": 1.53125, "learning_rate": 2.1690454369002625e-05, "loss": 2.278, "step": 27890 }, { "epoch": 0.35712, "grad_norm": 1.5234375, "learning_rate": 2.168502042692404e-05, "loss": 2.2959, "step": 27900 }, { "epoch": 0.357248, "grad_norm": 1.6875, "learning_rate": 2.1679585389882947e-05, "loss": 2.2772, "step": 27910 }, { "epoch": 0.357376, "grad_norm": 1.71875, "learning_rate": 2.1674149258769562e-05, "loss": 2.2808, "step": 27920 }, { "epoch": 0.357504, "grad_norm": 1.5703125, "learning_rate": 2.1668712034474286e-05, "loss": 2.3271, "step": 27930 }, { "epoch": 0.357632, "grad_norm": 1.703125, "learning_rate": 2.1663273717887703e-05, "loss": 2.2908, "step": 27940 }, { "epoch": 0.35776, "grad_norm": 1.6171875, "learning_rate": 2.1657834309900574e-05, "loss": 2.278, "step": 27950 }, { "epoch": 0.357888, "grad_norm": 1.6640625, "learning_rate": 2.1652393811403838e-05, "loss": 2.3115, "step": 27960 }, { "epoch": 0.358016, "grad_norm": 29.75, "learning_rate": 2.164695222328861e-05, "loss": 2.2798, "step": 27970 }, { "epoch": 0.358144, "grad_norm": 1.5546875, "learning_rate": 2.1641509546446193e-05, "loss": 2.2755, "step": 27980 }, { "epoch": 0.358272, "grad_norm": 1.484375, "learning_rate": 2.1636065781768054e-05, "loss": 2.2898, "step": 27990 }, { "epoch": 0.3584, "grad_norm": 1.6484375, "learning_rate": 2.1630620930145854e-05, "loss": 2.2604, "step": 28000 }, { "epoch": 0.358528, "grad_norm": 1.921875, "learning_rate": 2.162517499247142e-05, "loss": 2.3032, "step": 28010 }, { "epoch": 0.358656, "grad_norm": 1.5234375, "learning_rate": 2.1619727969636758e-05, "loss": 2.2746, "step": 28020 }, { "epoch": 0.358784, "grad_norm": 1.625, "learning_rate": 2.161427986253406e-05, "loss": 2.2929, "step": 28030 }, { "epoch": 0.358912, "grad_norm": 1.625, "learning_rate": 2.1608830672055687e-05, "loss": 2.2901, "step": 28040 }, { "epoch": 0.35904, "grad_norm": 1.546875, "learning_rate": 2.1603380399094182e-05, "loss": 2.3165, "step": 28050 }, { "epoch": 0.359168, "grad_norm": 7.46875, "learning_rate": 2.159792904454226e-05, "loss": 2.2655, "step": 28060 }, { "epoch": 0.359296, "grad_norm": 1.6171875, "learning_rate": 2.1592476609292823e-05, "loss": 2.2715, "step": 28070 }, { "epoch": 0.359424, "grad_norm": 1.578125, "learning_rate": 2.1587023094238942e-05, "loss": 2.2609, "step": 28080 }, { "epoch": 0.359552, "grad_norm": 1.4296875, "learning_rate": 2.1581568500273863e-05, "loss": 2.2633, "step": 28090 }, { "epoch": 0.35968, "grad_norm": 1.4375, "learning_rate": 2.157611282829102e-05, "loss": 2.2408, "step": 28100 }, { "epoch": 0.359808, "grad_norm": 1.7734375, "learning_rate": 2.1570656079184014e-05, "loss": 2.3317, "step": 28110 }, { "epoch": 0.359936, "grad_norm": 1.6015625, "learning_rate": 2.1565198253846622e-05, "loss": 2.2901, "step": 28120 }, { "epoch": 0.360064, "grad_norm": 1.5859375, "learning_rate": 2.15597393531728e-05, "loss": 2.2891, "step": 28130 }, { "epoch": 0.360192, "grad_norm": 1.84375, "learning_rate": 2.155427937805668e-05, "loss": 2.3129, "step": 28140 }, { "epoch": 0.36032, "grad_norm": 1.59375, "learning_rate": 2.1548818329392582e-05, "loss": 2.2583, "step": 28150 }, { "epoch": 0.360448, "grad_norm": 1.5625, "learning_rate": 2.154335620807497e-05, "loss": 2.2701, "step": 28160 }, { "epoch": 0.360576, "grad_norm": 1.4375, "learning_rate": 2.153789301499851e-05, "loss": 2.2808, "step": 28170 }, { "epoch": 0.360704, "grad_norm": 1.6640625, "learning_rate": 2.153242875105805e-05, "loss": 2.2792, "step": 28180 }, { "epoch": 0.360832, "grad_norm": 1.6171875, "learning_rate": 2.1526963417148586e-05, "loss": 2.2924, "step": 28190 }, { "epoch": 0.36096, "grad_norm": 1.703125, "learning_rate": 2.152149701416531e-05, "loss": 2.2795, "step": 28200 }, { "epoch": 0.361088, "grad_norm": 1.5703125, "learning_rate": 2.1516029543003577e-05, "loss": 2.2584, "step": 28210 }, { "epoch": 0.361216, "grad_norm": 1.6484375, "learning_rate": 2.151056100455894e-05, "loss": 2.2734, "step": 28220 }, { "epoch": 0.361344, "grad_norm": 1.6171875, "learning_rate": 2.150509139972709e-05, "loss": 2.3061, "step": 28230 }, { "epoch": 0.361472, "grad_norm": 1.53125, "learning_rate": 2.1499620729403918e-05, "loss": 2.303, "step": 28240 }, { "epoch": 0.3616, "grad_norm": 1.4453125, "learning_rate": 2.149414899448549e-05, "loss": 2.2498, "step": 28250 }, { "epoch": 0.361728, "grad_norm": 1.546875, "learning_rate": 2.148867619586803e-05, "loss": 2.2862, "step": 28260 }, { "epoch": 0.361856, "grad_norm": 1.578125, "learning_rate": 2.148320233444796e-05, "loss": 2.3068, "step": 28270 }, { "epoch": 0.361984, "grad_norm": 1.59375, "learning_rate": 2.1477727411121854e-05, "loss": 2.2931, "step": 28280 }, { "epoch": 0.362112, "grad_norm": 1.5859375, "learning_rate": 2.147225142678647e-05, "loss": 2.2975, "step": 28290 }, { "epoch": 0.36224, "grad_norm": 1.6484375, "learning_rate": 2.1466774382338737e-05, "loss": 2.3061, "step": 28300 }, { "epoch": 0.362368, "grad_norm": 1.5234375, "learning_rate": 2.1461296278675757e-05, "loss": 2.2799, "step": 28310 }, { "epoch": 0.362496, "grad_norm": 1.84375, "learning_rate": 2.1455817116694816e-05, "loss": 2.2715, "step": 28320 }, { "epoch": 0.362624, "grad_norm": 1.875, "learning_rate": 2.145033689729336e-05, "loss": 2.3019, "step": 28330 }, { "epoch": 0.362752, "grad_norm": 1.5078125, "learning_rate": 2.144485562136901e-05, "loss": 2.3028, "step": 28340 }, { "epoch": 0.36288, "grad_norm": 1.609375, "learning_rate": 2.1439373289819563e-05, "loss": 2.3011, "step": 28350 }, { "epoch": 0.363008, "grad_norm": 1.5546875, "learning_rate": 2.1433889903542992e-05, "loss": 2.2928, "step": 28360 }, { "epoch": 0.363136, "grad_norm": 1.546875, "learning_rate": 2.1428405463437445e-05, "loss": 2.2794, "step": 28370 }, { "epoch": 0.363264, "grad_norm": 1.578125, "learning_rate": 2.142291997040123e-05, "loss": 2.2777, "step": 28380 }, { "epoch": 0.363392, "grad_norm": 1.515625, "learning_rate": 2.1417433425332834e-05, "loss": 2.2922, "step": 28390 }, { "epoch": 0.36352, "grad_norm": 1.5625, "learning_rate": 2.1411945829130917e-05, "loss": 2.2771, "step": 28400 }, { "epoch": 0.363648, "grad_norm": 1.5078125, "learning_rate": 2.1406457182694315e-05, "loss": 2.2749, "step": 28410 }, { "epoch": 0.363776, "grad_norm": 1.4375, "learning_rate": 2.1400967486922037e-05, "loss": 2.291, "step": 28420 }, { "epoch": 0.363904, "grad_norm": 1.703125, "learning_rate": 2.139547674271325e-05, "loss": 2.2957, "step": 28430 }, { "epoch": 0.364032, "grad_norm": 1.59375, "learning_rate": 2.1389984950967298e-05, "loss": 2.2851, "step": 28440 }, { "epoch": 0.36416, "grad_norm": 1.6484375, "learning_rate": 2.1384492112583716e-05, "loss": 2.289, "step": 28450 }, { "epoch": 0.364288, "grad_norm": 1.5390625, "learning_rate": 2.137899822846219e-05, "loss": 2.2577, "step": 28460 }, { "epoch": 0.364416, "grad_norm": 1.59375, "learning_rate": 2.137350329950257e-05, "loss": 2.2814, "step": 28470 }, { "epoch": 0.364544, "grad_norm": 1.53125, "learning_rate": 2.13680073266049e-05, "loss": 2.2911, "step": 28480 }, { "epoch": 0.364672, "grad_norm": 1.609375, "learning_rate": 2.1362510310669383e-05, "loss": 2.2724, "step": 28490 }, { "epoch": 0.3648, "grad_norm": 1.4921875, "learning_rate": 2.1357012252596397e-05, "loss": 2.2789, "step": 28500 }, { "epoch": 0.364928, "grad_norm": 1.4375, "learning_rate": 2.1351513153286478e-05, "loss": 2.2576, "step": 28510 }, { "epoch": 0.365056, "grad_norm": 1.453125, "learning_rate": 2.1346013013640352e-05, "loss": 2.2828, "step": 28520 }, { "epoch": 0.365184, "grad_norm": 1.546875, "learning_rate": 2.1340511834558907e-05, "loss": 2.301, "step": 28530 }, { "epoch": 0.365312, "grad_norm": 1.546875, "learning_rate": 2.1335009616943192e-05, "loss": 2.2855, "step": 28540 }, { "epoch": 0.36544, "grad_norm": 1.5390625, "learning_rate": 2.1329506361694435e-05, "loss": 2.3078, "step": 28550 }, { "epoch": 0.365568, "grad_norm": 1.5, "learning_rate": 2.132400206971404e-05, "loss": 2.2949, "step": 28560 }, { "epoch": 0.365696, "grad_norm": 1.546875, "learning_rate": 2.1318496741903567e-05, "loss": 2.2898, "step": 28570 }, { "epoch": 0.365824, "grad_norm": 1.46875, "learning_rate": 2.1312990379164758e-05, "loss": 2.3019, "step": 28580 }, { "epoch": 0.365952, "grad_norm": 1.578125, "learning_rate": 2.130748298239951e-05, "loss": 2.2935, "step": 28590 }, { "epoch": 0.36608, "grad_norm": 1.6015625, "learning_rate": 2.1301974552509918e-05, "loss": 2.2746, "step": 28600 }, { "epoch": 0.366208, "grad_norm": 1.4765625, "learning_rate": 2.129646509039821e-05, "loss": 2.2683, "step": 28610 }, { "epoch": 0.366336, "grad_norm": 1.6328125, "learning_rate": 2.1290954596966792e-05, "loss": 2.245, "step": 28620 }, { "epoch": 0.366464, "grad_norm": 1.6484375, "learning_rate": 2.128544307311827e-05, "loss": 2.2935, "step": 28630 }, { "epoch": 0.366592, "grad_norm": 1.6015625, "learning_rate": 2.127993051975538e-05, "loss": 2.2876, "step": 28640 }, { "epoch": 0.36672, "grad_norm": 1.6015625, "learning_rate": 2.127441693778105e-05, "loss": 2.3043, "step": 28650 }, { "epoch": 0.366848, "grad_norm": 1.6796875, "learning_rate": 2.1268902328098356e-05, "loss": 2.2879, "step": 28660 }, { "epoch": 0.366976, "grad_norm": 1.53125, "learning_rate": 2.1263386691610572e-05, "loss": 2.3142, "step": 28670 }, { "epoch": 0.367104, "grad_norm": 1.5, "learning_rate": 2.1257870029221112e-05, "loss": 2.3048, "step": 28680 }, { "epoch": 0.367232, "grad_norm": 1.59375, "learning_rate": 2.125235234183357e-05, "loss": 2.2893, "step": 28690 }, { "epoch": 0.36736, "grad_norm": 1.7265625, "learning_rate": 2.124683363035171e-05, "loss": 2.2654, "step": 28700 }, { "epoch": 0.367488, "grad_norm": 1.6328125, "learning_rate": 2.1241313895679454e-05, "loss": 2.2757, "step": 28710 }, { "epoch": 0.367616, "grad_norm": 1.609375, "learning_rate": 2.1235793138720912e-05, "loss": 2.2857, "step": 28720 }, { "epoch": 0.367744, "grad_norm": 2.046875, "learning_rate": 2.1230271360380335e-05, "loss": 2.2559, "step": 28730 }, { "epoch": 0.367872, "grad_norm": 1.5546875, "learning_rate": 2.1224748561562158e-05, "loss": 2.2651, "step": 28740 }, { "epoch": 0.368, "grad_norm": 1.8125, "learning_rate": 2.121922474317098e-05, "loss": 2.2655, "step": 28750 }, { "epoch": 0.368128, "grad_norm": 1.4609375, "learning_rate": 2.121369990611157e-05, "loss": 2.2395, "step": 28760 }, { "epoch": 0.368256, "grad_norm": 1.6171875, "learning_rate": 2.120817405128885e-05, "loss": 2.2857, "step": 28770 }, { "epoch": 0.368384, "grad_norm": 1.5234375, "learning_rate": 2.1202647179607927e-05, "loss": 2.2864, "step": 28780 }, { "epoch": 0.368512, "grad_norm": 1.625, "learning_rate": 2.119711929197407e-05, "loss": 2.3028, "step": 28790 }, { "epoch": 0.36864, "grad_norm": 1.4765625, "learning_rate": 2.11915903892927e-05, "loss": 2.3052, "step": 28800 }, { "epoch": 0.368768, "grad_norm": 1.75, "learning_rate": 2.1186060472469422e-05, "loss": 2.2915, "step": 28810 }, { "epoch": 0.368896, "grad_norm": 1.5859375, "learning_rate": 2.1180529542409993e-05, "loss": 2.2987, "step": 28820 }, { "epoch": 0.369024, "grad_norm": 1.5, "learning_rate": 2.117499760002035e-05, "loss": 2.2639, "step": 28830 }, { "epoch": 0.369152, "grad_norm": 1.578125, "learning_rate": 2.116946464620659e-05, "loss": 2.2697, "step": 28840 }, { "epoch": 0.36928, "grad_norm": 1.5859375, "learning_rate": 2.116393068187497e-05, "loss": 2.2889, "step": 28850 }, { "epoch": 0.369408, "grad_norm": 1.640625, "learning_rate": 2.115839570793192e-05, "loss": 2.2946, "step": 28860 }, { "epoch": 0.369536, "grad_norm": 1.6796875, "learning_rate": 2.1152859725284033e-05, "loss": 2.2533, "step": 28870 }, { "epoch": 0.369664, "grad_norm": 1.5546875, "learning_rate": 2.1147322734838063e-05, "loss": 2.2836, "step": 28880 }, { "epoch": 0.369792, "grad_norm": 1.6875, "learning_rate": 2.1141784737500933e-05, "loss": 2.2984, "step": 28890 }, { "epoch": 0.36992, "grad_norm": 1.6796875, "learning_rate": 2.1136245734179728e-05, "loss": 2.2564, "step": 28900 }, { "epoch": 0.370048, "grad_norm": 1.5859375, "learning_rate": 2.1130705725781717e-05, "loss": 2.2907, "step": 28910 }, { "epoch": 0.370176, "grad_norm": 2.09375, "learning_rate": 2.1125164713214295e-05, "loss": 2.2618, "step": 28920 }, { "epoch": 0.370304, "grad_norm": 1.546875, "learning_rate": 2.1119622697385055e-05, "loss": 2.2912, "step": 28930 }, { "epoch": 0.370432, "grad_norm": 1.5234375, "learning_rate": 2.1114079679201742e-05, "loss": 2.2808, "step": 28940 }, { "epoch": 0.37056, "grad_norm": 1.640625, "learning_rate": 2.110853565957227e-05, "loss": 2.2899, "step": 28950 }, { "epoch": 0.370688, "grad_norm": 1.609375, "learning_rate": 2.11029906394047e-05, "loss": 2.2577, "step": 28960 }, { "epoch": 0.370816, "grad_norm": 1.5703125, "learning_rate": 2.109744461960728e-05, "loss": 2.2706, "step": 28970 }, { "epoch": 0.370944, "grad_norm": 3.078125, "learning_rate": 2.1091897601088412e-05, "loss": 2.2861, "step": 28980 }, { "epoch": 0.371072, "grad_norm": 1.515625, "learning_rate": 2.1086349584756658e-05, "loss": 2.274, "step": 28990 }, { "epoch": 0.3712, "grad_norm": 1.5234375, "learning_rate": 2.108080057152075e-05, "loss": 2.2996, "step": 29000 }, { "epoch": 0.371328, "grad_norm": 1.609375, "learning_rate": 2.107525056228958e-05, "loss": 2.2911, "step": 29010 }, { "epoch": 0.371456, "grad_norm": 1.671875, "learning_rate": 2.1069699557972193e-05, "loss": 2.2956, "step": 29020 }, { "epoch": 0.371584, "grad_norm": 1.578125, "learning_rate": 2.1064147559477817e-05, "loss": 2.3216, "step": 29030 }, { "epoch": 0.371712, "grad_norm": 1.625, "learning_rate": 2.1058594567715836e-05, "loss": 2.2847, "step": 29040 }, { "epoch": 0.37184, "grad_norm": 1.4765625, "learning_rate": 2.1053040583595783e-05, "loss": 2.3023, "step": 29050 }, { "epoch": 0.371968, "grad_norm": 1.6640625, "learning_rate": 2.104748560802738e-05, "loss": 2.2866, "step": 29060 }, { "epoch": 0.372096, "grad_norm": 1.625, "learning_rate": 2.1041929641920472e-05, "loss": 2.3055, "step": 29070 }, { "epoch": 0.372224, "grad_norm": 2.015625, "learning_rate": 2.103637268618511e-05, "loss": 2.3046, "step": 29080 }, { "epoch": 0.372352, "grad_norm": 1.4765625, "learning_rate": 2.1030814741731475e-05, "loss": 2.3159, "step": 29090 }, { "epoch": 0.37248, "grad_norm": 1.578125, "learning_rate": 2.1025255809469937e-05, "loss": 2.2758, "step": 29100 }, { "epoch": 0.372608, "grad_norm": 1.6171875, "learning_rate": 2.1019695890310992e-05, "loss": 2.278, "step": 29110 }, { "epoch": 0.372736, "grad_norm": 1.6171875, "learning_rate": 2.1014134985165338e-05, "loss": 2.2805, "step": 29120 }, { "epoch": 0.372864, "grad_norm": 1.5703125, "learning_rate": 2.1008573094943798e-05, "loss": 2.2994, "step": 29130 }, { "epoch": 0.372992, "grad_norm": 1.5234375, "learning_rate": 2.1003010220557387e-05, "loss": 2.2821, "step": 29140 }, { "epoch": 0.37312, "grad_norm": 1.5234375, "learning_rate": 2.099744636291726e-05, "loss": 2.2656, "step": 29150 }, { "epoch": 0.373248, "grad_norm": 1.59375, "learning_rate": 2.0991881522934736e-05, "loss": 2.2781, "step": 29160 }, { "epoch": 0.373376, "grad_norm": 1.5390625, "learning_rate": 2.0986315701521313e-05, "loss": 2.263, "step": 29170 }, { "epoch": 0.373504, "grad_norm": 1.6640625, "learning_rate": 2.098074889958862e-05, "loss": 2.3042, "step": 29180 }, { "epoch": 0.373632, "grad_norm": 1.578125, "learning_rate": 2.0975181118048478e-05, "loss": 2.2856, "step": 29190 }, { "epoch": 0.37376, "grad_norm": 1.5859375, "learning_rate": 2.096961235781284e-05, "loss": 2.2954, "step": 29200 }, { "epoch": 0.373888, "grad_norm": 1.5234375, "learning_rate": 2.096404261979384e-05, "loss": 2.3075, "step": 29210 }, { "epoch": 0.374016, "grad_norm": 7.28125, "learning_rate": 2.0958471904903763e-05, "loss": 2.3115, "step": 29220 }, { "epoch": 0.374144, "grad_norm": 1.8671875, "learning_rate": 2.095290021405506e-05, "loss": 2.2851, "step": 29230 }, { "epoch": 0.374272, "grad_norm": 1.546875, "learning_rate": 2.0947327548160322e-05, "loss": 2.2828, "step": 29240 }, { "epoch": 0.3744, "grad_norm": 1.578125, "learning_rate": 2.0941753908132335e-05, "loss": 2.2794, "step": 29250 }, { "epoch": 0.374528, "grad_norm": 1.546875, "learning_rate": 2.093617929488401e-05, "loss": 2.298, "step": 29260 }, { "epoch": 0.374656, "grad_norm": 4.65625, "learning_rate": 2.0930603709328444e-05, "loss": 2.2821, "step": 29270 }, { "epoch": 0.374784, "grad_norm": 1.8359375, "learning_rate": 2.092502715237887e-05, "loss": 2.2989, "step": 29280 }, { "epoch": 0.374912, "grad_norm": 2.734375, "learning_rate": 2.09194496249487e-05, "loss": 2.2744, "step": 29290 }, { "epoch": 0.37504, "grad_norm": 1.4921875, "learning_rate": 2.091387112795149e-05, "loss": 2.2667, "step": 29300 }, { "epoch": 0.375168, "grad_norm": 1.53125, "learning_rate": 2.0908291662300963e-05, "loss": 2.3001, "step": 29310 }, { "epoch": 0.375296, "grad_norm": 1.65625, "learning_rate": 2.0902711228911005e-05, "loss": 2.2771, "step": 29320 }, { "epoch": 0.375424, "grad_norm": 1.4921875, "learning_rate": 2.089712982869565e-05, "loss": 2.2888, "step": 29330 }, { "epoch": 0.375552, "grad_norm": 1.5546875, "learning_rate": 2.0891547462569087e-05, "loss": 2.2982, "step": 29340 }, { "epoch": 0.37568, "grad_norm": 1.5078125, "learning_rate": 2.0885964131445677e-05, "loss": 2.3053, "step": 29350 }, { "epoch": 0.375808, "grad_norm": 1.7265625, "learning_rate": 2.0880379836239945e-05, "loss": 2.2543, "step": 29360 }, { "epoch": 0.375936, "grad_norm": 1.6640625, "learning_rate": 2.0874794577866547e-05, "loss": 2.2626, "step": 29370 }, { "epoch": 0.376064, "grad_norm": 1.6796875, "learning_rate": 2.086920835724031e-05, "loss": 2.3022, "step": 29380 }, { "epoch": 0.376192, "grad_norm": 1.6640625, "learning_rate": 2.0863621175276232e-05, "loss": 2.2557, "step": 29390 }, { "epoch": 0.37632, "grad_norm": 1.5625, "learning_rate": 2.085803303288945e-05, "loss": 2.2609, "step": 29400 }, { "epoch": 0.376448, "grad_norm": 1.5625, "learning_rate": 2.0852443930995266e-05, "loss": 2.2603, "step": 29410 }, { "epoch": 0.376576, "grad_norm": 1.5625, "learning_rate": 2.0846853870509136e-05, "loss": 2.281, "step": 29420 }, { "epoch": 0.376704, "grad_norm": 1.5625, "learning_rate": 2.084126285234668e-05, "loss": 2.3018, "step": 29430 }, { "epoch": 0.376832, "grad_norm": 1.4921875, "learning_rate": 2.083567087742367e-05, "loss": 2.2957, "step": 29440 }, { "epoch": 0.37696, "grad_norm": 1.6171875, "learning_rate": 2.0830077946656025e-05, "loss": 2.2769, "step": 29450 }, { "epoch": 0.377088, "grad_norm": 1.640625, "learning_rate": 2.082448406095984e-05, "loss": 2.3024, "step": 29460 }, { "epoch": 0.377216, "grad_norm": 1.625, "learning_rate": 2.0818889221251357e-05, "loss": 2.2764, "step": 29470 }, { "epoch": 0.377344, "grad_norm": 1.5859375, "learning_rate": 2.0813293428446975e-05, "loss": 2.293, "step": 29480 }, { "epoch": 0.377472, "grad_norm": 1.5625, "learning_rate": 2.080769668346324e-05, "loss": 2.2813, "step": 29490 }, { "epoch": 0.3776, "grad_norm": 1.5234375, "learning_rate": 2.080209898721687e-05, "loss": 2.2927, "step": 29500 }, { "epoch": 0.377728, "grad_norm": 1.625, "learning_rate": 2.0796500340624725e-05, "loss": 2.2558, "step": 29510 }, { "epoch": 0.377856, "grad_norm": 1.609375, "learning_rate": 2.079090074460383e-05, "loss": 2.2801, "step": 29520 }, { "epoch": 0.377984, "grad_norm": 1.6640625, "learning_rate": 2.0785300200071363e-05, "loss": 2.3003, "step": 29530 }, { "epoch": 0.378112, "grad_norm": 1.625, "learning_rate": 2.0779698707944653e-05, "loss": 2.2761, "step": 29540 }, { "epoch": 0.37824, "grad_norm": 1.578125, "learning_rate": 2.0774096269141193e-05, "loss": 2.2993, "step": 29550 }, { "epoch": 0.378368, "grad_norm": 1.4296875, "learning_rate": 2.0768492884578622e-05, "loss": 2.2827, "step": 29560 }, { "epoch": 0.378496, "grad_norm": 1.734375, "learning_rate": 2.0762888555174736e-05, "loss": 2.2804, "step": 29570 }, { "epoch": 0.378624, "grad_norm": 1.4140625, "learning_rate": 2.0757283281847488e-05, "loss": 2.2783, "step": 29580 }, { "epoch": 0.378752, "grad_norm": 1.6015625, "learning_rate": 2.0751677065514993e-05, "loss": 2.2969, "step": 29590 }, { "epoch": 0.37888, "grad_norm": 1.5, "learning_rate": 2.0746069907095504e-05, "loss": 2.3313, "step": 29600 }, { "epoch": 0.379008, "grad_norm": 1.6171875, "learning_rate": 2.0740461807507435e-05, "loss": 2.3112, "step": 29610 }, { "epoch": 0.379136, "grad_norm": 1.4609375, "learning_rate": 2.073485276766936e-05, "loss": 2.2794, "step": 29620 }, { "epoch": 0.379264, "grad_norm": 1.5390625, "learning_rate": 2.0729242788500006e-05, "loss": 2.2744, "step": 29630 }, { "epoch": 0.379392, "grad_norm": 1.5546875, "learning_rate": 2.0723631870918247e-05, "loss": 2.2353, "step": 29640 }, { "epoch": 0.37952, "grad_norm": 1.59375, "learning_rate": 2.0718020015843116e-05, "loss": 2.2946, "step": 29650 }, { "epoch": 0.379648, "grad_norm": 1.671875, "learning_rate": 2.0712407224193793e-05, "loss": 2.2688, "step": 29660 }, { "epoch": 0.379776, "grad_norm": 2.09375, "learning_rate": 2.070679349688963e-05, "loss": 2.2617, "step": 29670 }, { "epoch": 0.379904, "grad_norm": 1.7578125, "learning_rate": 2.07011788348501e-05, "loss": 2.2545, "step": 29680 }, { "epoch": 0.380032, "grad_norm": 2.03125, "learning_rate": 2.069556323899486e-05, "loss": 2.2934, "step": 29690 }, { "epoch": 0.38016, "grad_norm": 1.4453125, "learning_rate": 2.0689946710243705e-05, "loss": 2.2933, "step": 29700 }, { "epoch": 0.380288, "grad_norm": 1.6328125, "learning_rate": 2.0684329249516588e-05, "loss": 2.2714, "step": 29710 }, { "epoch": 0.380416, "grad_norm": 1.46875, "learning_rate": 2.067871085773361e-05, "loss": 2.2904, "step": 29720 }, { "epoch": 0.380544, "grad_norm": 1.484375, "learning_rate": 2.067309153581502e-05, "loss": 2.3135, "step": 29730 }, { "epoch": 0.380672, "grad_norm": 1.984375, "learning_rate": 2.0667471284681237e-05, "loss": 2.294, "step": 29740 }, { "epoch": 0.3808, "grad_norm": 1.84375, "learning_rate": 2.0661850105252818e-05, "loss": 2.325, "step": 29750 }, { "epoch": 0.380928, "grad_norm": 1.625, "learning_rate": 2.0656227998450472e-05, "loss": 2.2711, "step": 29760 }, { "epoch": 0.381056, "grad_norm": 1.4140625, "learning_rate": 2.065060496519507e-05, "loss": 2.2998, "step": 29770 }, { "epoch": 0.381184, "grad_norm": 1.5625, "learning_rate": 2.064498100640762e-05, "loss": 2.2503, "step": 29780 }, { "epoch": 0.381312, "grad_norm": 1.5234375, "learning_rate": 2.0639356123009293e-05, "loss": 2.2729, "step": 29790 }, { "epoch": 0.38144, "grad_norm": 1.6328125, "learning_rate": 2.0633730315921406e-05, "loss": 2.2749, "step": 29800 }, { "epoch": 0.381568, "grad_norm": 1.546875, "learning_rate": 2.0628103586065435e-05, "loss": 2.2707, "step": 29810 }, { "epoch": 0.381696, "grad_norm": 1.5703125, "learning_rate": 2.0622475934363e-05, "loss": 2.2651, "step": 29820 }, { "epoch": 0.381824, "grad_norm": 1.6796875, "learning_rate": 2.0616847361735867e-05, "loss": 2.2988, "step": 29830 }, { "epoch": 0.381952, "grad_norm": 1.5859375, "learning_rate": 2.0611217869105965e-05, "loss": 2.2917, "step": 29840 }, { "epoch": 0.38208, "grad_norm": 1.3828125, "learning_rate": 2.0605587457395365e-05, "loss": 2.2943, "step": 29850 }, { "epoch": 0.382208, "grad_norm": 1.71875, "learning_rate": 2.059995612752629e-05, "loss": 2.2965, "step": 29860 }, { "epoch": 0.382336, "grad_norm": 1.46875, "learning_rate": 2.0594323880421122e-05, "loss": 2.3046, "step": 29870 }, { "epoch": 0.382464, "grad_norm": 1.7421875, "learning_rate": 2.0588690717002386e-05, "loss": 2.3234, "step": 29880 }, { "epoch": 0.382592, "grad_norm": 1.5, "learning_rate": 2.0583056638192748e-05, "loss": 2.3168, "step": 29890 }, { "epoch": 0.38272, "grad_norm": 1.546875, "learning_rate": 2.057742164491504e-05, "loss": 2.2793, "step": 29900 }, { "epoch": 0.382848, "grad_norm": 1.5234375, "learning_rate": 2.0571785738092236e-05, "loss": 2.3217, "step": 29910 }, { "epoch": 0.382976, "grad_norm": 1.609375, "learning_rate": 2.056614891864746e-05, "loss": 2.25, "step": 29920 }, { "epoch": 0.383104, "grad_norm": 1.5078125, "learning_rate": 2.0560511187503988e-05, "loss": 2.2734, "step": 29930 }, { "epoch": 0.383232, "grad_norm": 1.671875, "learning_rate": 2.055487254558524e-05, "loss": 2.255, "step": 29940 }, { "epoch": 0.38336, "grad_norm": 1.640625, "learning_rate": 2.054923299381479e-05, "loss": 2.2814, "step": 29950 }, { "epoch": 0.383488, "grad_norm": 1.640625, "learning_rate": 2.0543592533116356e-05, "loss": 2.2455, "step": 29960 }, { "epoch": 0.383616, "grad_norm": 1.6171875, "learning_rate": 2.053795116441382e-05, "loss": 2.2654, "step": 29970 }, { "epoch": 0.383744, "grad_norm": 1.578125, "learning_rate": 2.053230888863119e-05, "loss": 2.2419, "step": 29980 }, { "epoch": 0.383872, "grad_norm": 1.6328125, "learning_rate": 2.052666570669263e-05, "loss": 2.3148, "step": 29990 }, { "epoch": 0.384, "grad_norm": 1.5625, "learning_rate": 2.0521021619522472e-05, "loss": 2.2971, "step": 30000 }, { "epoch": 0.384128, "grad_norm": 1.5234375, "learning_rate": 2.051537662804517e-05, "loss": 2.2761, "step": 30010 }, { "epoch": 0.384256, "grad_norm": 1.546875, "learning_rate": 2.0509730733185343e-05, "loss": 2.3057, "step": 30020 }, { "epoch": 0.384384, "grad_norm": 1.59375, "learning_rate": 2.0504083935867736e-05, "loss": 2.2704, "step": 30030 }, { "epoch": 0.384512, "grad_norm": 1.703125, "learning_rate": 2.0498436237017273e-05, "loss": 2.2595, "step": 30040 }, { "epoch": 0.38464, "grad_norm": 1.4609375, "learning_rate": 2.0492787637559012e-05, "loss": 2.2858, "step": 30050 }, { "epoch": 0.384768, "grad_norm": 1.4921875, "learning_rate": 2.0487138138418143e-05, "loss": 2.2914, "step": 30060 }, { "epoch": 0.384896, "grad_norm": 1.484375, "learning_rate": 2.0481487740520026e-05, "loss": 2.2855, "step": 30070 }, { "epoch": 0.385024, "grad_norm": 1.5859375, "learning_rate": 2.0475836444790153e-05, "loss": 2.2801, "step": 30080 }, { "epoch": 0.385152, "grad_norm": 1.65625, "learning_rate": 2.0470184252154182e-05, "loss": 2.2612, "step": 30090 }, { "epoch": 0.38528, "grad_norm": 1.7265625, "learning_rate": 2.046453116353789e-05, "loss": 2.3082, "step": 30100 }, { "epoch": 0.385408, "grad_norm": 1.59375, "learning_rate": 2.045887717986722e-05, "loss": 2.3049, "step": 30110 }, { "epoch": 0.385536, "grad_norm": 1.5234375, "learning_rate": 2.0453222302068266e-05, "loss": 2.3087, "step": 30120 }, { "epoch": 0.385664, "grad_norm": 1.6484375, "learning_rate": 2.0447566531067253e-05, "loss": 2.261, "step": 30130 }, { "epoch": 0.385792, "grad_norm": 1.59375, "learning_rate": 2.0441909867790556e-05, "loss": 2.3027, "step": 30140 }, { "epoch": 0.38592, "grad_norm": 1.4609375, "learning_rate": 2.0436252313164708e-05, "loss": 2.2811, "step": 30150 }, { "epoch": 0.386048, "grad_norm": 1.4765625, "learning_rate": 2.0430593868116364e-05, "loss": 2.3182, "step": 30160 }, { "epoch": 0.386176, "grad_norm": 1.78125, "learning_rate": 2.0424934533572364e-05, "loss": 2.3041, "step": 30170 }, { "epoch": 0.386304, "grad_norm": 1.625, "learning_rate": 2.0419274310459647e-05, "loss": 2.2752, "step": 30180 }, { "epoch": 0.386432, "grad_norm": 1.6875, "learning_rate": 2.0413613199705333e-05, "loss": 2.2689, "step": 30190 }, { "epoch": 0.38656, "grad_norm": 1.4765625, "learning_rate": 2.0407951202236673e-05, "loss": 2.3049, "step": 30200 }, { "epoch": 0.386688, "grad_norm": 1.53125, "learning_rate": 2.040228831898106e-05, "loss": 2.2739, "step": 30210 }, { "epoch": 0.386816, "grad_norm": 1.6875, "learning_rate": 2.039662455086605e-05, "loss": 2.275, "step": 30220 }, { "epoch": 0.386944, "grad_norm": 1.53125, "learning_rate": 2.039095989881931e-05, "loss": 2.2892, "step": 30230 }, { "epoch": 0.387072, "grad_norm": 1.6875, "learning_rate": 2.0385294363768694e-05, "loss": 2.2687, "step": 30240 }, { "epoch": 0.3872, "grad_norm": 1.578125, "learning_rate": 2.037962794664217e-05, "loss": 2.3019, "step": 30250 }, { "epoch": 0.387328, "grad_norm": 1.5078125, "learning_rate": 2.0373960648367857e-05, "loss": 2.2986, "step": 30260 }, { "epoch": 0.387456, "grad_norm": 1.5078125, "learning_rate": 2.0368292469874022e-05, "loss": 2.2663, "step": 30270 }, { "epoch": 0.387584, "grad_norm": 1.6640625, "learning_rate": 2.0362623412089084e-05, "loss": 2.2863, "step": 30280 }, { "epoch": 0.387712, "grad_norm": 1.625, "learning_rate": 2.0356953475941592e-05, "loss": 2.2942, "step": 30290 }, { "epoch": 0.38784, "grad_norm": 1.53125, "learning_rate": 2.035128266236024e-05, "loss": 2.3199, "step": 30300 }, { "epoch": 0.387968, "grad_norm": 1.53125, "learning_rate": 2.0345610972273875e-05, "loss": 2.285, "step": 30310 }, { "epoch": 0.388096, "grad_norm": 1.625, "learning_rate": 2.0339938406611484e-05, "loss": 2.288, "step": 30320 }, { "epoch": 0.388224, "grad_norm": 1.5234375, "learning_rate": 2.033426496630219e-05, "loss": 2.3064, "step": 30330 }, { "epoch": 0.388352, "grad_norm": 1.5390625, "learning_rate": 2.0328590652275268e-05, "loss": 2.2819, "step": 30340 }, { "epoch": 0.38848, "grad_norm": 1.515625, "learning_rate": 2.032291546546014e-05, "loss": 2.2829, "step": 30350 }, { "epoch": 0.388608, "grad_norm": 1.59375, "learning_rate": 2.031723940678636e-05, "loss": 2.2719, "step": 30360 }, { "epoch": 0.388736, "grad_norm": 1.65625, "learning_rate": 2.0311562477183624e-05, "loss": 2.2956, "step": 30370 }, { "epoch": 0.388864, "grad_norm": 1.578125, "learning_rate": 2.030588467758178e-05, "loss": 2.258, "step": 30380 }, { "epoch": 0.388992, "grad_norm": 1.5078125, "learning_rate": 2.0300206008910816e-05, "loss": 2.2767, "step": 30390 }, { "epoch": 0.38912, "grad_norm": 1.5390625, "learning_rate": 2.0294526472100862e-05, "loss": 2.2865, "step": 30400 }, { "epoch": 0.389248, "grad_norm": 1.625, "learning_rate": 2.028884606808218e-05, "loss": 2.2964, "step": 30410 }, { "epoch": 0.389376, "grad_norm": 1.7109375, "learning_rate": 2.028316479778519e-05, "loss": 2.2758, "step": 30420 }, { "epoch": 0.389504, "grad_norm": 1.5078125, "learning_rate": 2.0277482662140455e-05, "loss": 2.3018, "step": 30430 }, { "epoch": 0.389632, "grad_norm": 1.5546875, "learning_rate": 2.027179966207865e-05, "loss": 2.2825, "step": 30440 }, { "epoch": 0.38976, "grad_norm": 1.59375, "learning_rate": 2.026611579853063e-05, "loss": 2.2928, "step": 30450 }, { "epoch": 0.389888, "grad_norm": 1.5390625, "learning_rate": 2.026043107242737e-05, "loss": 2.2729, "step": 30460 }, { "epoch": 0.390016, "grad_norm": 1.53125, "learning_rate": 2.0254745484699992e-05, "loss": 2.2678, "step": 30470 }, { "epoch": 0.390144, "grad_norm": 1.625, "learning_rate": 2.0249059036279756e-05, "loss": 2.2825, "step": 30480 }, { "epoch": 0.390272, "grad_norm": 1.6171875, "learning_rate": 2.0243371728098067e-05, "loss": 2.2995, "step": 30490 }, { "epoch": 0.3904, "grad_norm": 1.5546875, "learning_rate": 2.0237683561086464e-05, "loss": 2.2531, "step": 30500 }, { "epoch": 0.390528, "grad_norm": 1.609375, "learning_rate": 2.023199453617664e-05, "loss": 2.2745, "step": 30510 }, { "epoch": 0.390656, "grad_norm": 1.640625, "learning_rate": 2.0226304654300413e-05, "loss": 2.2897, "step": 30520 }, { "epoch": 0.390784, "grad_norm": 1.515625, "learning_rate": 2.022061391638975e-05, "loss": 2.2786, "step": 30530 }, { "epoch": 0.390912, "grad_norm": 1.5, "learning_rate": 2.0214922323376758e-05, "loss": 2.3144, "step": 30540 }, { "epoch": 0.39104, "grad_norm": 1.546875, "learning_rate": 2.020922987619368e-05, "loss": 2.241, "step": 30550 }, { "epoch": 0.391168, "grad_norm": 1.546875, "learning_rate": 2.0203536575772908e-05, "loss": 2.2646, "step": 30560 }, { "epoch": 0.391296, "grad_norm": 1.5390625, "learning_rate": 2.0197842423046956e-05, "loss": 2.3374, "step": 30570 }, { "epoch": 0.391424, "grad_norm": 1.6328125, "learning_rate": 2.0192147418948497e-05, "loss": 2.2807, "step": 30580 }, { "epoch": 0.391552, "grad_norm": 1.75, "learning_rate": 2.0186451564410335e-05, "loss": 2.3032, "step": 30590 }, { "epoch": 0.39168, "grad_norm": 1.71875, "learning_rate": 2.018075486036541e-05, "loss": 2.2974, "step": 30600 }, { "epoch": 0.391808, "grad_norm": 1.6640625, "learning_rate": 2.0175057307746807e-05, "loss": 2.2892, "step": 30610 }, { "epoch": 0.391936, "grad_norm": 1.7265625, "learning_rate": 2.0169358907487754e-05, "loss": 2.2922, "step": 30620 }, { "epoch": 0.392064, "grad_norm": 1.578125, "learning_rate": 2.0163659660521597e-05, "loss": 2.2866, "step": 30630 }, { "epoch": 0.392192, "grad_norm": 1.5546875, "learning_rate": 2.0157959567781848e-05, "loss": 2.271, "step": 30640 }, { "epoch": 0.39232, "grad_norm": 1.7578125, "learning_rate": 2.015225863020214e-05, "loss": 2.2807, "step": 30650 }, { "epoch": 0.392448, "grad_norm": 1.6953125, "learning_rate": 2.014655684871625e-05, "loss": 2.2866, "step": 30660 }, { "epoch": 0.392576, "grad_norm": 1.7734375, "learning_rate": 2.014085422425809e-05, "loss": 2.2916, "step": 30670 }, { "epoch": 0.392704, "grad_norm": 1.703125, "learning_rate": 2.013515075776172e-05, "loss": 2.2752, "step": 30680 }, { "epoch": 0.392832, "grad_norm": 1.6015625, "learning_rate": 2.012944645016132e-05, "loss": 2.2807, "step": 30690 }, { "epoch": 0.39296, "grad_norm": 1.578125, "learning_rate": 2.0123741302391227e-05, "loss": 2.3285, "step": 30700 }, { "epoch": 0.393088, "grad_norm": 2.046875, "learning_rate": 2.01180353153859e-05, "loss": 2.279, "step": 30710 }, { "epoch": 0.393216, "grad_norm": 1.4921875, "learning_rate": 2.011232849007994e-05, "loss": 2.2951, "step": 30720 }, { "epoch": 0.393344, "grad_norm": 2.21875, "learning_rate": 2.01066208274081e-05, "loss": 2.3016, "step": 30730 }, { "epoch": 0.393472, "grad_norm": 1.6796875, "learning_rate": 2.0100912328305253e-05, "loss": 2.2937, "step": 30740 }, { "epoch": 0.3936, "grad_norm": 1.53125, "learning_rate": 2.0095202993706405e-05, "loss": 2.2498, "step": 30750 }, { "epoch": 0.393728, "grad_norm": 1.5546875, "learning_rate": 2.008949282454672e-05, "loss": 2.2901, "step": 30760 }, { "epoch": 0.393856, "grad_norm": 1.578125, "learning_rate": 2.0083781821761476e-05, "loss": 2.2971, "step": 30770 }, { "epoch": 0.393984, "grad_norm": 1.7109375, "learning_rate": 2.00780699862861e-05, "loss": 2.2838, "step": 30780 }, { "epoch": 0.394112, "grad_norm": 1.6328125, "learning_rate": 2.007235731905616e-05, "loss": 2.2746, "step": 30790 }, { "epoch": 0.39424, "grad_norm": 1.5078125, "learning_rate": 2.006664382100734e-05, "loss": 2.2869, "step": 30800 }, { "epoch": 0.394368, "grad_norm": 1.65625, "learning_rate": 2.0060929493075496e-05, "loss": 2.2873, "step": 30810 }, { "epoch": 0.394496, "grad_norm": 1.5625, "learning_rate": 2.0055214336196575e-05, "loss": 2.2617, "step": 30820 }, { "epoch": 0.394624, "grad_norm": 1.609375, "learning_rate": 2.0049498351306687e-05, "loss": 2.3221, "step": 30830 }, { "epoch": 0.394752, "grad_norm": 1.484375, "learning_rate": 2.0043781539342084e-05, "loss": 2.3082, "step": 30840 }, { "epoch": 0.39488, "grad_norm": 1.765625, "learning_rate": 2.003806390123913e-05, "loss": 2.2492, "step": 30850 }, { "epoch": 0.395008, "grad_norm": 1.578125, "learning_rate": 2.0032345437934348e-05, "loss": 2.304, "step": 30860 }, { "epoch": 0.395136, "grad_norm": 1.53125, "learning_rate": 2.002662615036437e-05, "loss": 2.2653, "step": 30870 }, { "epoch": 0.395264, "grad_norm": 1.9375, "learning_rate": 2.0020906039465992e-05, "loss": 2.2651, "step": 30880 }, { "epoch": 0.395392, "grad_norm": 1.4609375, "learning_rate": 2.0015185106176125e-05, "loss": 2.2588, "step": 30890 }, { "epoch": 0.39552, "grad_norm": 1.515625, "learning_rate": 2.0009463351431816e-05, "loss": 2.3066, "step": 30900 }, { "epoch": 0.395648, "grad_norm": 2.25, "learning_rate": 2.0003740776170258e-05, "loss": 2.3178, "step": 30910 }, { "epoch": 0.395776, "grad_norm": 1.5546875, "learning_rate": 1.9998017381328768e-05, "loss": 2.2906, "step": 30920 }, { "epoch": 0.395904, "grad_norm": 2.15625, "learning_rate": 1.9992293167844802e-05, "loss": 2.2549, "step": 30930 }, { "epoch": 0.396032, "grad_norm": 1.4765625, "learning_rate": 1.9986568136655944e-05, "loss": 2.327, "step": 30940 }, { "epoch": 0.39616, "grad_norm": 1.515625, "learning_rate": 1.9980842288699923e-05, "loss": 2.2709, "step": 30950 }, { "epoch": 0.396288, "grad_norm": 1.4921875, "learning_rate": 1.9975115624914595e-05, "loss": 2.2807, "step": 30960 }, { "epoch": 0.396416, "grad_norm": 1.65625, "learning_rate": 1.9969388146237944e-05, "loss": 2.2969, "step": 30970 }, { "epoch": 0.396544, "grad_norm": 1.8125, "learning_rate": 1.9963659853608096e-05, "loss": 2.2711, "step": 30980 }, { "epoch": 0.396672, "grad_norm": 1.5390625, "learning_rate": 1.995793074796331e-05, "loss": 2.307, "step": 30990 }, { "epoch": 0.3968, "grad_norm": 1.53125, "learning_rate": 1.9952200830241974e-05, "loss": 2.2446, "step": 31000 }, { "epoch": 0.396928, "grad_norm": 1.53125, "learning_rate": 1.9946470101382618e-05, "loss": 2.2895, "step": 31010 }, { "epoch": 0.397056, "grad_norm": 1.5703125, "learning_rate": 1.994073856232388e-05, "loss": 2.3037, "step": 31020 }, { "epoch": 0.397184, "grad_norm": 1.5625, "learning_rate": 1.9935006214004566e-05, "loss": 2.2947, "step": 31030 }, { "epoch": 0.397312, "grad_norm": 1.625, "learning_rate": 1.992927305736359e-05, "loss": 2.286, "step": 31040 }, { "epoch": 0.39744, "grad_norm": 1.5546875, "learning_rate": 1.992353909334001e-05, "loss": 2.3038, "step": 31050 }, { "epoch": 0.397568, "grad_norm": 1.6875, "learning_rate": 1.9917804322873003e-05, "loss": 2.2911, "step": 31060 }, { "epoch": 0.397696, "grad_norm": 1.546875, "learning_rate": 1.9912068746901897e-05, "loss": 2.2888, "step": 31070 }, { "epoch": 0.397824, "grad_norm": 2.25, "learning_rate": 1.9906332366366134e-05, "loss": 2.2936, "step": 31080 }, { "epoch": 0.397952, "grad_norm": 1.4921875, "learning_rate": 1.99005951822053e-05, "loss": 2.2936, "step": 31090 }, { "epoch": 0.39808, "grad_norm": 1.671875, "learning_rate": 1.98948571953591e-05, "loss": 2.2883, "step": 31100 }, { "epoch": 0.398208, "grad_norm": 1.5859375, "learning_rate": 1.988911840676739e-05, "loss": 2.3011, "step": 31110 }, { "epoch": 0.398336, "grad_norm": 1.734375, "learning_rate": 1.9883378817370148e-05, "loss": 2.3215, "step": 31120 }, { "epoch": 0.398464, "grad_norm": 1.53125, "learning_rate": 1.987763842810746e-05, "loss": 2.2918, "step": 31130 }, { "epoch": 0.398592, "grad_norm": 1.4765625, "learning_rate": 1.987189723991959e-05, "loss": 2.2976, "step": 31140 }, { "epoch": 0.39872, "grad_norm": 1.53125, "learning_rate": 1.986615525374689e-05, "loss": 2.3014, "step": 31150 }, { "epoch": 0.398848, "grad_norm": 1.640625, "learning_rate": 1.9860412470529865e-05, "loss": 2.2705, "step": 31160 }, { "epoch": 0.398976, "grad_norm": 1.734375, "learning_rate": 1.9854668891209148e-05, "loss": 2.2933, "step": 31170 }, { "epoch": 0.399104, "grad_norm": 1.546875, "learning_rate": 1.9848924516725495e-05, "loss": 2.2634, "step": 31180 }, { "epoch": 0.399232, "grad_norm": 1.6484375, "learning_rate": 1.98431793480198e-05, "loss": 2.2865, "step": 31190 }, { "epoch": 0.39936, "grad_norm": 1.7421875, "learning_rate": 1.9837433386033075e-05, "loss": 2.2962, "step": 31200 }, { "epoch": 0.399488, "grad_norm": 1.65625, "learning_rate": 1.9831686631706483e-05, "loss": 2.3073, "step": 31210 }, { "epoch": 0.399616, "grad_norm": 1.53125, "learning_rate": 1.9825939085981304e-05, "loss": 2.2778, "step": 31220 }, { "epoch": 0.399744, "grad_norm": 1.6875, "learning_rate": 1.982019074979894e-05, "loss": 2.2886, "step": 31230 }, { "epoch": 0.399872, "grad_norm": 1.5546875, "learning_rate": 1.981444162410094e-05, "loss": 2.2644, "step": 31240 }, { "epoch": 0.4, "grad_norm": 16.5, "learning_rate": 1.9808691709828966e-05, "loss": 2.3079, "step": 31250 }, { "epoch": 0.400128, "grad_norm": 1.4921875, "learning_rate": 1.9802941007924815e-05, "loss": 2.2576, "step": 31260 }, { "epoch": 0.400256, "grad_norm": 1.671875, "learning_rate": 1.9797189519330422e-05, "loss": 2.3034, "step": 31270 }, { "epoch": 0.400384, "grad_norm": 1.6796875, "learning_rate": 1.9791437244987837e-05, "loss": 2.2926, "step": 31280 }, { "epoch": 0.400512, "grad_norm": 1.765625, "learning_rate": 1.978568418583925e-05, "loss": 2.2733, "step": 31290 }, { "epoch": 0.40064, "grad_norm": 1.59375, "learning_rate": 1.9779930342826972e-05, "loss": 2.2749, "step": 31300 }, { "epoch": 0.400768, "grad_norm": 1.5390625, "learning_rate": 1.9774175716893436e-05, "loss": 2.2892, "step": 31310 }, { "epoch": 0.400896, "grad_norm": 1.453125, "learning_rate": 1.9768420308981227e-05, "loss": 2.275, "step": 31320 }, { "epoch": 0.401024, "grad_norm": 1.578125, "learning_rate": 1.9762664120033033e-05, "loss": 2.2954, "step": 31330 }, { "epoch": 0.401152, "grad_norm": 1.7578125, "learning_rate": 1.9756907150991683e-05, "loss": 2.2942, "step": 31340 }, { "epoch": 0.40128, "grad_norm": 1.6171875, "learning_rate": 1.975114940280013e-05, "loss": 2.2835, "step": 31350 }, { "epoch": 0.401408, "grad_norm": 1.6328125, "learning_rate": 1.974539087640146e-05, "loss": 2.2637, "step": 31360 }, { "epoch": 0.401536, "grad_norm": 1.703125, "learning_rate": 1.9739631572738876e-05, "loss": 2.3136, "step": 31370 }, { "epoch": 0.401664, "grad_norm": 1.4765625, "learning_rate": 1.9733871492755716e-05, "loss": 2.3049, "step": 31380 }, { "epoch": 0.401792, "grad_norm": 1.71875, "learning_rate": 1.9728110637395443e-05, "loss": 2.2931, "step": 31390 }, { "epoch": 0.40192, "grad_norm": 1.671875, "learning_rate": 1.972234900760165e-05, "loss": 2.3041, "step": 31400 }, { "epoch": 0.402048, "grad_norm": 1.7578125, "learning_rate": 1.9716586604318047e-05, "loss": 2.285, "step": 31410 }, { "epoch": 0.402176, "grad_norm": 1.609375, "learning_rate": 1.971082342848849e-05, "loss": 2.3023, "step": 31420 }, { "epoch": 0.402304, "grad_norm": 1.578125, "learning_rate": 1.9705059481056943e-05, "loss": 2.3008, "step": 31430 }, { "epoch": 0.402432, "grad_norm": 1.3828125, "learning_rate": 1.9699294762967495e-05, "loss": 2.2934, "step": 31440 }, { "epoch": 0.40256, "grad_norm": 1.734375, "learning_rate": 1.969352927516438e-05, "loss": 2.2809, "step": 31450 }, { "epoch": 0.402688, "grad_norm": 2.3125, "learning_rate": 1.9687763018591947e-05, "loss": 2.2976, "step": 31460 }, { "epoch": 0.402816, "grad_norm": 1.5390625, "learning_rate": 1.968199599419466e-05, "loss": 2.2929, "step": 31470 }, { "epoch": 0.402944, "grad_norm": 1.421875, "learning_rate": 1.9676228202917127e-05, "loss": 2.2939, "step": 31480 }, { "epoch": 0.403072, "grad_norm": 1.7421875, "learning_rate": 1.9670459645704083e-05, "loss": 2.2902, "step": 31490 }, { "epoch": 0.4032, "grad_norm": 1.5, "learning_rate": 1.9664690323500366e-05, "loss": 2.2631, "step": 31500 }, { "epoch": 0.403328, "grad_norm": 1.5, "learning_rate": 1.9658920237250958e-05, "loss": 2.3101, "step": 31510 }, { "epoch": 0.403456, "grad_norm": 1.609375, "learning_rate": 1.9653149387900965e-05, "loss": 2.2821, "step": 31520 }, { "epoch": 0.403584, "grad_norm": 1.4609375, "learning_rate": 1.964737777639561e-05, "loss": 2.3024, "step": 31530 }, { "epoch": 0.403712, "grad_norm": 4.15625, "learning_rate": 1.964160540368024e-05, "loss": 2.2831, "step": 31540 }, { "epoch": 0.40384, "grad_norm": 1.53125, "learning_rate": 1.9635832270700346e-05, "loss": 2.2746, "step": 31550 }, { "epoch": 0.403968, "grad_norm": 1.53125, "learning_rate": 1.963005837840152e-05, "loss": 2.3108, "step": 31560 }, { "epoch": 0.404096, "grad_norm": 1.59375, "learning_rate": 1.962428372772949e-05, "loss": 2.251, "step": 31570 }, { "epoch": 0.404224, "grad_norm": 1.9375, "learning_rate": 1.96185083196301e-05, "loss": 2.2444, "step": 31580 }, { "epoch": 0.404352, "grad_norm": 3.75, "learning_rate": 1.961273215504933e-05, "loss": 2.2768, "step": 31590 }, { "epoch": 0.40448, "grad_norm": 1.7890625, "learning_rate": 1.960695523493328e-05, "loss": 2.3022, "step": 31600 }, { "epoch": 0.404608, "grad_norm": 1.890625, "learning_rate": 1.9601177560228166e-05, "loss": 2.2539, "step": 31610 }, { "epoch": 0.404736, "grad_norm": 1.5625, "learning_rate": 1.9595399131880336e-05, "loss": 2.281, "step": 31620 }, { "epoch": 0.404864, "grad_norm": 1.609375, "learning_rate": 1.958961995083626e-05, "loss": 2.3094, "step": 31630 }, { "epoch": 0.404992, "grad_norm": 1.8359375, "learning_rate": 1.9583840018042527e-05, "loss": 2.2637, "step": 31640 }, { "epoch": 0.40512, "grad_norm": 1.546875, "learning_rate": 1.957805933444586e-05, "loss": 2.2782, "step": 31650 }, { "epoch": 0.405248, "grad_norm": 1.90625, "learning_rate": 1.9572277900993082e-05, "loss": 2.2746, "step": 31660 }, { "epoch": 0.405376, "grad_norm": 1.6015625, "learning_rate": 1.9566495718631165e-05, "loss": 2.3124, "step": 31670 }, { "epoch": 0.405504, "grad_norm": 1.59375, "learning_rate": 1.956071278830719e-05, "loss": 2.3003, "step": 31680 }, { "epoch": 0.405632, "grad_norm": 1.625, "learning_rate": 1.9554929110968364e-05, "loss": 2.2881, "step": 31690 }, { "epoch": 0.40576, "grad_norm": 1.6328125, "learning_rate": 1.9549144687562013e-05, "loss": 2.3055, "step": 31700 }, { "epoch": 0.405888, "grad_norm": 1.9140625, "learning_rate": 1.9543359519035587e-05, "loss": 2.2797, "step": 31710 }, { "epoch": 0.406016, "grad_norm": 1.6875, "learning_rate": 1.9537573606336667e-05, "loss": 2.2908, "step": 31720 }, { "epoch": 0.406144, "grad_norm": 1.4609375, "learning_rate": 1.9531786950412936e-05, "loss": 2.2564, "step": 31730 }, { "epoch": 0.406272, "grad_norm": 1.5625, "learning_rate": 1.9525999552212216e-05, "loss": 2.2906, "step": 31740 }, { "epoch": 0.4064, "grad_norm": 1.625, "learning_rate": 1.9520211412682448e-05, "loss": 2.2528, "step": 31750 }, { "epoch": 0.406528, "grad_norm": 1.59375, "learning_rate": 1.9514422532771687e-05, "loss": 2.2973, "step": 31760 }, { "epoch": 0.406656, "grad_norm": 1.5703125, "learning_rate": 1.9508632913428115e-05, "loss": 2.285, "step": 31770 }, { "epoch": 0.406784, "grad_norm": 1.6953125, "learning_rate": 1.950284255560003e-05, "loss": 2.2756, "step": 31780 }, { "epoch": 0.406912, "grad_norm": 1.5, "learning_rate": 1.9497051460235856e-05, "loss": 2.3071, "step": 31790 }, { "epoch": 0.40704, "grad_norm": 1.4609375, "learning_rate": 1.9491259628284146e-05, "loss": 2.2588, "step": 31800 }, { "epoch": 0.407168, "grad_norm": 1.515625, "learning_rate": 1.948546706069355e-05, "loss": 2.2834, "step": 31810 }, { "epoch": 0.407296, "grad_norm": 1.4921875, "learning_rate": 1.9479673758412862e-05, "loss": 2.2882, "step": 31820 }, { "epoch": 0.407424, "grad_norm": 1.7421875, "learning_rate": 1.947387972239098e-05, "loss": 2.2845, "step": 31830 }, { "epoch": 0.407552, "grad_norm": 1.421875, "learning_rate": 1.946808495357694e-05, "loss": 2.2851, "step": 31840 }, { "epoch": 0.40768, "grad_norm": 1.53125, "learning_rate": 1.9462289452919872e-05, "loss": 2.2742, "step": 31850 }, { "epoch": 0.407808, "grad_norm": 1.453125, "learning_rate": 1.945649322136905e-05, "loss": 2.2878, "step": 31860 }, { "epoch": 0.407936, "grad_norm": 1.59375, "learning_rate": 1.945069625987386e-05, "loss": 2.286, "step": 31870 }, { "epoch": 0.408064, "grad_norm": 1.5546875, "learning_rate": 1.9444898569383806e-05, "loss": 2.2647, "step": 31880 }, { "epoch": 0.408192, "grad_norm": 2.140625, "learning_rate": 1.943910015084851e-05, "loss": 2.2819, "step": 31890 }, { "epoch": 0.40832, "grad_norm": 1.53125, "learning_rate": 1.943330100521771e-05, "loss": 2.2749, "step": 31900 }, { "epoch": 0.408448, "grad_norm": 1.6875, "learning_rate": 1.9427501133441275e-05, "loss": 2.2721, "step": 31910 }, { "epoch": 0.408576, "grad_norm": 1.5078125, "learning_rate": 1.9421700536469185e-05, "loss": 2.2792, "step": 31920 }, { "epoch": 0.408704, "grad_norm": 1.59375, "learning_rate": 1.941589921525154e-05, "loss": 2.2875, "step": 31930 }, { "epoch": 0.408832, "grad_norm": 1.5234375, "learning_rate": 1.9410097170738553e-05, "loss": 2.3035, "step": 31940 }, { "epoch": 0.40896, "grad_norm": 1.640625, "learning_rate": 1.940429440388057e-05, "loss": 2.28, "step": 31950 }, { "epoch": 0.409088, "grad_norm": 1.640625, "learning_rate": 1.9398490915628033e-05, "loss": 2.2876, "step": 31960 }, { "epoch": 0.409216, "grad_norm": 1.59375, "learning_rate": 1.939268670693153e-05, "loss": 2.3013, "step": 31970 }, { "epoch": 0.409344, "grad_norm": 1.59375, "learning_rate": 1.938688177874174e-05, "loss": 2.2694, "step": 31980 }, { "epoch": 0.409472, "grad_norm": 1.453125, "learning_rate": 1.9381076132009483e-05, "loss": 2.2928, "step": 31990 }, { "epoch": 0.4096, "grad_norm": 1.765625, "learning_rate": 1.9375269767685675e-05, "loss": 2.3062, "step": 32000 }, { "epoch": 0.409728, "grad_norm": 1.53125, "learning_rate": 1.936946268672137e-05, "loss": 2.2821, "step": 32010 }, { "epoch": 0.409856, "grad_norm": 1.6015625, "learning_rate": 1.9363654890067723e-05, "loss": 2.282, "step": 32020 }, { "epoch": 0.409984, "grad_norm": 1.5546875, "learning_rate": 1.9357846378676022e-05, "loss": 2.2757, "step": 32030 }, { "epoch": 0.410112, "grad_norm": 1.578125, "learning_rate": 1.935203715349765e-05, "loss": 2.316, "step": 32040 }, { "epoch": 0.41024, "grad_norm": 1.7734375, "learning_rate": 1.934622721548413e-05, "loss": 2.308, "step": 32050 }, { "epoch": 0.410368, "grad_norm": 1.5703125, "learning_rate": 1.934041656558709e-05, "loss": 2.2788, "step": 32060 }, { "epoch": 0.410496, "grad_norm": 1.703125, "learning_rate": 1.933460520475827e-05, "loss": 2.3209, "step": 32070 }, { "epoch": 0.410624, "grad_norm": 1.5625, "learning_rate": 1.932879313394954e-05, "loss": 2.2954, "step": 32080 }, { "epoch": 0.410752, "grad_norm": 1.53125, "learning_rate": 1.9322980354112877e-05, "loss": 2.2861, "step": 32090 }, { "epoch": 0.41088, "grad_norm": 1.5078125, "learning_rate": 1.931716686620038e-05, "loss": 2.2831, "step": 32100 }, { "epoch": 0.411008, "grad_norm": 2.03125, "learning_rate": 1.9311352671164253e-05, "loss": 2.3027, "step": 32110 }, { "epoch": 0.411136, "grad_norm": 1.5390625, "learning_rate": 1.9305537769956826e-05, "loss": 2.2569, "step": 32120 }, { "epoch": 0.411264, "grad_norm": 1.640625, "learning_rate": 1.929972216353054e-05, "loss": 2.268, "step": 32130 }, { "epoch": 0.411392, "grad_norm": 1.7890625, "learning_rate": 1.9293905852837958e-05, "loss": 2.2619, "step": 32140 }, { "epoch": 0.41152, "grad_norm": 1.5625, "learning_rate": 1.9288088838831754e-05, "loss": 2.3009, "step": 32150 }, { "epoch": 0.411648, "grad_norm": 1.7421875, "learning_rate": 1.9282271122464705e-05, "loss": 2.2878, "step": 32160 }, { "epoch": 0.411776, "grad_norm": 1.578125, "learning_rate": 1.9276452704689724e-05, "loss": 2.2525, "step": 32170 }, { "epoch": 0.411904, "grad_norm": 1.703125, "learning_rate": 1.9270633586459838e-05, "loss": 2.281, "step": 32180 }, { "epoch": 0.412032, "grad_norm": 1.6015625, "learning_rate": 1.9264813768728164e-05, "loss": 2.2783, "step": 32190 }, { "epoch": 0.41216, "grad_norm": 2.453125, "learning_rate": 1.925899325244796e-05, "loss": 2.3211, "step": 32200 }, { "epoch": 0.412288, "grad_norm": 1.5625, "learning_rate": 1.9253172038572587e-05, "loss": 2.257, "step": 32210 }, { "epoch": 0.412416, "grad_norm": 1.6953125, "learning_rate": 1.9247350128055516e-05, "loss": 2.3001, "step": 32220 }, { "epoch": 0.412544, "grad_norm": 1.5859375, "learning_rate": 1.9241527521850344e-05, "loss": 2.3071, "step": 32230 }, { "epoch": 0.412672, "grad_norm": 1.53125, "learning_rate": 1.9235704220910774e-05, "loss": 2.2797, "step": 32240 }, { "epoch": 0.4128, "grad_norm": 1.53125, "learning_rate": 1.9229880226190627e-05, "loss": 2.2754, "step": 32250 }, { "epoch": 0.412928, "grad_norm": 1.5625, "learning_rate": 1.922405553864383e-05, "loss": 2.2995, "step": 32260 }, { "epoch": 0.413056, "grad_norm": 1.5234375, "learning_rate": 1.921823015922443e-05, "loss": 2.2756, "step": 32270 }, { "epoch": 0.413184, "grad_norm": 1.7109375, "learning_rate": 1.9212404088886587e-05, "loss": 2.2758, "step": 32280 }, { "epoch": 0.413312, "grad_norm": 1.7109375, "learning_rate": 1.920657732858458e-05, "loss": 2.3167, "step": 32290 }, { "epoch": 0.41344, "grad_norm": 1.671875, "learning_rate": 1.9200749879272784e-05, "loss": 2.2835, "step": 32300 }, { "epoch": 0.413568, "grad_norm": 2.125, "learning_rate": 1.9194921741905698e-05, "loss": 2.3117, "step": 32310 }, { "epoch": 0.413696, "grad_norm": 1.6796875, "learning_rate": 1.918909291743794e-05, "loss": 2.2232, "step": 32320 }, { "epoch": 0.413824, "grad_norm": 1.4765625, "learning_rate": 1.9183263406824224e-05, "loss": 2.2618, "step": 32330 }, { "epoch": 0.413952, "grad_norm": 1.5546875, "learning_rate": 1.917743321101939e-05, "loss": 2.2701, "step": 32340 }, { "epoch": 0.41408, "grad_norm": 1.703125, "learning_rate": 1.9171602330978385e-05, "loss": 2.2937, "step": 32350 }, { "epoch": 0.414208, "grad_norm": 1.5078125, "learning_rate": 1.9165770767656273e-05, "loss": 2.296, "step": 32360 }, { "epoch": 0.414336, "grad_norm": 1.609375, "learning_rate": 1.915993852200822e-05, "loss": 2.2932, "step": 32370 }, { "epoch": 0.414464, "grad_norm": 1.7578125, "learning_rate": 1.9154105594989516e-05, "loss": 2.3209, "step": 32380 }, { "epoch": 0.414592, "grad_norm": 1.6015625, "learning_rate": 1.914827198755555e-05, "loss": 2.2559, "step": 32390 }, { "epoch": 0.41472, "grad_norm": 1.5859375, "learning_rate": 1.9142437700661825e-05, "loss": 2.2764, "step": 32400 }, { "epoch": 0.414848, "grad_norm": 1.5703125, "learning_rate": 1.9136602735263973e-05, "loss": 2.2735, "step": 32410 }, { "epoch": 0.414976, "grad_norm": 1.5703125, "learning_rate": 1.9130767092317714e-05, "loss": 2.281, "step": 32420 }, { "epoch": 0.415104, "grad_norm": 1.5546875, "learning_rate": 1.9124930772778887e-05, "loss": 2.2876, "step": 32430 }, { "epoch": 0.415232, "grad_norm": 1.8125, "learning_rate": 1.9119093777603444e-05, "loss": 2.2711, "step": 32440 }, { "epoch": 0.41536, "grad_norm": 1.4296875, "learning_rate": 1.911325610774745e-05, "loss": 2.286, "step": 32450 }, { "epoch": 0.415488, "grad_norm": 1.578125, "learning_rate": 1.9107417764167073e-05, "loss": 2.3029, "step": 32460 }, { "epoch": 0.415616, "grad_norm": 1.46875, "learning_rate": 1.91015787478186e-05, "loss": 2.2651, "step": 32470 }, { "epoch": 0.415744, "grad_norm": 1.5546875, "learning_rate": 1.909573905965842e-05, "loss": 2.2886, "step": 32480 }, { "epoch": 0.415872, "grad_norm": 1.515625, "learning_rate": 1.9089898700643034e-05, "loss": 2.2718, "step": 32490 }, { "epoch": 0.416, "grad_norm": 1.5625, "learning_rate": 1.9084057671729055e-05, "loss": 2.2766, "step": 32500 }, { "epoch": 0.416128, "grad_norm": 1.6875, "learning_rate": 1.9078215973873208e-05, "loss": 2.2835, "step": 32510 }, { "epoch": 0.416256, "grad_norm": 1.5703125, "learning_rate": 1.907237360803232e-05, "loss": 2.2697, "step": 32520 }, { "epoch": 0.416384, "grad_norm": 1.6328125, "learning_rate": 1.9066530575163347e-05, "loss": 2.275, "step": 32530 }, { "epoch": 0.416512, "grad_norm": 1.65625, "learning_rate": 1.906068687622332e-05, "loss": 2.2807, "step": 32540 }, { "epoch": 0.41664, "grad_norm": 3.875, "learning_rate": 1.9054842512169406e-05, "loss": 2.2722, "step": 32550 }, { "epoch": 0.416768, "grad_norm": 1.5859375, "learning_rate": 1.9048997483958883e-05, "loss": 2.2893, "step": 32560 }, { "epoch": 0.416896, "grad_norm": 1.7578125, "learning_rate": 1.904315179254911e-05, "loss": 2.2668, "step": 32570 }, { "epoch": 0.417024, "grad_norm": 1.59375, "learning_rate": 1.9037305438897588e-05, "loss": 2.2522, "step": 32580 }, { "epoch": 0.417152, "grad_norm": 1.4921875, "learning_rate": 1.903145842396191e-05, "loss": 2.2925, "step": 32590 }, { "epoch": 0.41728, "grad_norm": 1.40625, "learning_rate": 1.9025610748699768e-05, "loss": 2.2949, "step": 32600 }, { "epoch": 0.417408, "grad_norm": 1.4921875, "learning_rate": 1.901976241406898e-05, "loss": 2.273, "step": 32610 }, { "epoch": 0.417536, "grad_norm": 1.5703125, "learning_rate": 1.9013913421027464e-05, "loss": 2.3026, "step": 32620 }, { "epoch": 0.417664, "grad_norm": 2.015625, "learning_rate": 1.900806377053325e-05, "loss": 2.28, "step": 32630 }, { "epoch": 0.417792, "grad_norm": 1.6328125, "learning_rate": 1.900221346354447e-05, "loss": 2.2763, "step": 32640 }, { "epoch": 0.41792, "grad_norm": 1.5546875, "learning_rate": 1.899636250101936e-05, "loss": 2.2801, "step": 32650 }, { "epoch": 0.418048, "grad_norm": 1.5859375, "learning_rate": 1.899051088391628e-05, "loss": 2.2678, "step": 32660 }, { "epoch": 0.418176, "grad_norm": 1.796875, "learning_rate": 1.8984658613193683e-05, "loss": 2.2856, "step": 32670 }, { "epoch": 0.418304, "grad_norm": 1.625, "learning_rate": 1.897880568981013e-05, "loss": 2.2865, "step": 32680 }, { "epoch": 0.418432, "grad_norm": 1.625, "learning_rate": 1.8972952114724292e-05, "loss": 2.2868, "step": 32690 }, { "epoch": 0.41856, "grad_norm": 1.5625, "learning_rate": 1.8967097888894947e-05, "loss": 2.2887, "step": 32700 }, { "epoch": 0.418688, "grad_norm": 1.5546875, "learning_rate": 1.896124301328098e-05, "loss": 2.2874, "step": 32710 }, { "epoch": 0.418816, "grad_norm": 1.5, "learning_rate": 1.8955387488841375e-05, "loss": 2.3176, "step": 32720 }, { "epoch": 0.418944, "grad_norm": 2.140625, "learning_rate": 1.8949531316535233e-05, "loss": 2.2684, "step": 32730 }, { "epoch": 0.419072, "grad_norm": 1.65625, "learning_rate": 1.8943674497321763e-05, "loss": 2.2856, "step": 32740 }, { "epoch": 0.4192, "grad_norm": 1.5234375, "learning_rate": 1.8937817032160262e-05, "loss": 2.2763, "step": 32750 }, { "epoch": 0.419328, "grad_norm": 1.5703125, "learning_rate": 1.8931958922010152e-05, "loss": 2.2788, "step": 32760 }, { "epoch": 0.419456, "grad_norm": 1.5078125, "learning_rate": 1.892610016783095e-05, "loss": 2.2979, "step": 32770 }, { "epoch": 0.419584, "grad_norm": 1.7578125, "learning_rate": 1.892024077058228e-05, "loss": 2.2536, "step": 32780 }, { "epoch": 0.419712, "grad_norm": 2.5, "learning_rate": 1.891438073122388e-05, "loss": 2.2547, "step": 32790 }, { "epoch": 0.41984, "grad_norm": 1.5859375, "learning_rate": 1.890852005071558e-05, "loss": 2.2842, "step": 32800 }, { "epoch": 0.419968, "grad_norm": 1.65625, "learning_rate": 1.8902658730017324e-05, "loss": 2.2915, "step": 32810 }, { "epoch": 0.420096, "grad_norm": 1.6640625, "learning_rate": 1.8896796770089153e-05, "loss": 2.2668, "step": 32820 }, { "epoch": 0.420224, "grad_norm": 1.59375, "learning_rate": 1.889093417189122e-05, "loss": 2.2685, "step": 32830 }, { "epoch": 0.420352, "grad_norm": 1.7109375, "learning_rate": 1.8885070936383788e-05, "loss": 2.2898, "step": 32840 }, { "epoch": 0.42048, "grad_norm": 1.5546875, "learning_rate": 1.88792070645272e-05, "loss": 2.2513, "step": 32850 }, { "epoch": 0.420608, "grad_norm": 1.5546875, "learning_rate": 1.8873342557281935e-05, "loss": 2.2818, "step": 32860 }, { "epoch": 0.420736, "grad_norm": 1.59375, "learning_rate": 1.8867477415608553e-05, "loss": 2.266, "step": 32870 }, { "epoch": 0.420864, "grad_norm": 1.9296875, "learning_rate": 1.886161164046773e-05, "loss": 2.2554, "step": 32880 }, { "epoch": 0.420992, "grad_norm": 1.609375, "learning_rate": 1.8855745232820236e-05, "loss": 2.283, "step": 32890 }, { "epoch": 0.42112, "grad_norm": 1.546875, "learning_rate": 1.8849878193626955e-05, "loss": 2.2718, "step": 32900 }, { "epoch": 0.421248, "grad_norm": 1.5234375, "learning_rate": 1.884401052384887e-05, "loss": 2.2802, "step": 32910 }, { "epoch": 0.421376, "grad_norm": 1.5625, "learning_rate": 1.883814222444706e-05, "loss": 2.2871, "step": 32920 }, { "epoch": 0.421504, "grad_norm": 1.6015625, "learning_rate": 1.8832273296382722e-05, "loss": 2.2831, "step": 32930 }, { "epoch": 0.421632, "grad_norm": 1.5390625, "learning_rate": 1.8826403740617147e-05, "loss": 2.3079, "step": 32940 }, { "epoch": 0.42176, "grad_norm": 1.609375, "learning_rate": 1.8820533558111724e-05, "loss": 2.2728, "step": 32950 }, { "epoch": 0.421888, "grad_norm": 1.859375, "learning_rate": 1.8814662749827955e-05, "loss": 2.2602, "step": 32960 }, { "epoch": 0.422016, "grad_norm": 1.6484375, "learning_rate": 1.880879131672744e-05, "loss": 2.2968, "step": 32970 }, { "epoch": 0.422144, "grad_norm": 1.7109375, "learning_rate": 1.880291925977188e-05, "loss": 2.3093, "step": 32980 }, { "epoch": 0.422272, "grad_norm": 2.0625, "learning_rate": 1.8797046579923078e-05, "loss": 2.2754, "step": 32990 }, { "epoch": 0.4224, "grad_norm": 7.46875, "learning_rate": 1.879117327814294e-05, "loss": 2.2966, "step": 33000 }, { "epoch": 0.422528, "grad_norm": 1.53125, "learning_rate": 1.878529935539348e-05, "loss": 2.2845, "step": 33010 }, { "epoch": 0.422656, "grad_norm": 1.5390625, "learning_rate": 1.8779424812636804e-05, "loss": 2.3149, "step": 33020 }, { "epoch": 0.422784, "grad_norm": 1.46875, "learning_rate": 1.8773549650835122e-05, "loss": 2.2801, "step": 33030 }, { "epoch": 0.422912, "grad_norm": 1.578125, "learning_rate": 1.8767673870950748e-05, "loss": 2.2799, "step": 33040 }, { "epoch": 0.42304, "grad_norm": 1.578125, "learning_rate": 1.8761797473946097e-05, "loss": 2.2898, "step": 33050 }, { "epoch": 0.423168, "grad_norm": 1.5234375, "learning_rate": 1.875592046078369e-05, "loss": 2.2831, "step": 33060 }, { "epoch": 0.423296, "grad_norm": 1.5625, "learning_rate": 1.875004283242613e-05, "loss": 2.2602, "step": 33070 }, { "epoch": 0.423424, "grad_norm": 1.5859375, "learning_rate": 1.8744164589836147e-05, "loss": 2.3093, "step": 33080 }, { "epoch": 0.423552, "grad_norm": 1.5546875, "learning_rate": 1.8738285733976555e-05, "loss": 2.2672, "step": 33090 }, { "epoch": 0.42368, "grad_norm": 1.4765625, "learning_rate": 1.8732406265810262e-05, "loss": 2.2952, "step": 33100 }, { "epoch": 0.423808, "grad_norm": 1.6875, "learning_rate": 1.87265261863003e-05, "loss": 2.2775, "step": 33110 }, { "epoch": 0.423936, "grad_norm": 1.5703125, "learning_rate": 1.8720645496409783e-05, "loss": 2.3058, "step": 33120 }, { "epoch": 0.424064, "grad_norm": 1.75, "learning_rate": 1.8714764197101928e-05, "loss": 2.2724, "step": 33130 }, { "epoch": 0.424192, "grad_norm": 1.7265625, "learning_rate": 1.8708882289340052e-05, "loss": 2.2819, "step": 33140 }, { "epoch": 0.42432, "grad_norm": 1.6484375, "learning_rate": 1.8702999774087575e-05, "loss": 2.2786, "step": 33150 }, { "epoch": 0.424448, "grad_norm": 1.6875, "learning_rate": 1.8697116652308012e-05, "loss": 2.2988, "step": 33160 }, { "epoch": 0.424576, "grad_norm": 12.4375, "learning_rate": 1.8691232924964985e-05, "loss": 2.3019, "step": 33170 }, { "epoch": 0.424704, "grad_norm": 1.59375, "learning_rate": 1.868534859302221e-05, "loss": 2.3063, "step": 33180 }, { "epoch": 0.424832, "grad_norm": 1.546875, "learning_rate": 1.8679463657443496e-05, "loss": 2.3044, "step": 33190 }, { "epoch": 0.42496, "grad_norm": 1.75, "learning_rate": 1.8673578119192755e-05, "loss": 2.2796, "step": 33200 }, { "epoch": 0.425088, "grad_norm": 1.5546875, "learning_rate": 1.866769197923401e-05, "loss": 2.2639, "step": 33210 }, { "epoch": 0.425216, "grad_norm": 1.5078125, "learning_rate": 1.8661805238531364e-05, "loss": 2.3053, "step": 33220 }, { "epoch": 0.425344, "grad_norm": 1.578125, "learning_rate": 1.8655917898049025e-05, "loss": 2.2778, "step": 33230 }, { "epoch": 0.425472, "grad_norm": 1.734375, "learning_rate": 1.8650029958751313e-05, "loss": 2.2692, "step": 33240 }, { "epoch": 0.4256, "grad_norm": 1.546875, "learning_rate": 1.8644141421602622e-05, "loss": 2.2763, "step": 33250 }, { "epoch": 0.425728, "grad_norm": 3.546875, "learning_rate": 1.8638252287567458e-05, "loss": 2.3067, "step": 33260 }, { "epoch": 0.425856, "grad_norm": 1.65625, "learning_rate": 1.863236255761042e-05, "loss": 2.2648, "step": 33270 }, { "epoch": 0.425984, "grad_norm": 1.5234375, "learning_rate": 1.8626472232696222e-05, "loss": 2.2993, "step": 33280 }, { "epoch": 0.426112, "grad_norm": 3.140625, "learning_rate": 1.8620581313789644e-05, "loss": 2.2683, "step": 33290 }, { "epoch": 0.42624, "grad_norm": 1.5546875, "learning_rate": 1.861468980185558e-05, "loss": 2.3003, "step": 33300 }, { "epoch": 0.426368, "grad_norm": 1.5234375, "learning_rate": 1.860879769785903e-05, "loss": 2.2547, "step": 33310 }, { "epoch": 0.426496, "grad_norm": 1.390625, "learning_rate": 1.8602905002765086e-05, "loss": 2.3006, "step": 33320 }, { "epoch": 0.426624, "grad_norm": 1.5625, "learning_rate": 1.8597011717538918e-05, "loss": 2.2841, "step": 33330 }, { "epoch": 0.426752, "grad_norm": 1.46875, "learning_rate": 1.8591117843145814e-05, "loss": 2.2586, "step": 33340 }, { "epoch": 0.42688, "grad_norm": 1.4921875, "learning_rate": 1.8585223380551157e-05, "loss": 2.2792, "step": 33350 }, { "epoch": 0.427008, "grad_norm": 1.671875, "learning_rate": 1.8579328330720416e-05, "loss": 2.2837, "step": 33360 }, { "epoch": 0.427136, "grad_norm": 1.6015625, "learning_rate": 1.8573432694619162e-05, "loss": 2.289, "step": 33370 }, { "epoch": 0.427264, "grad_norm": 1.765625, "learning_rate": 1.856753647321306e-05, "loss": 2.2691, "step": 33380 }, { "epoch": 0.427392, "grad_norm": 1.6484375, "learning_rate": 1.8561639667467877e-05, "loss": 2.2805, "step": 33390 }, { "epoch": 0.42752, "grad_norm": 1.5234375, "learning_rate": 1.8555742278349467e-05, "loss": 2.2999, "step": 33400 }, { "epoch": 0.427648, "grad_norm": 1.6640625, "learning_rate": 1.8549844306823787e-05, "loss": 2.2747, "step": 33410 }, { "epoch": 0.427776, "grad_norm": 4.03125, "learning_rate": 1.8543945753856887e-05, "loss": 2.3258, "step": 33420 }, { "epoch": 0.427904, "grad_norm": 1.609375, "learning_rate": 1.8538046620414903e-05, "loss": 2.2802, "step": 33430 }, { "epoch": 0.428032, "grad_norm": 1.53125, "learning_rate": 1.8532146907464083e-05, "loss": 2.2783, "step": 33440 }, { "epoch": 0.42816, "grad_norm": 1.53125, "learning_rate": 1.8526246615970756e-05, "loss": 2.289, "step": 33450 }, { "epoch": 0.428288, "grad_norm": 2.03125, "learning_rate": 1.852034574690136e-05, "loss": 2.2606, "step": 33460 }, { "epoch": 0.428416, "grad_norm": 1.5234375, "learning_rate": 1.851444430122241e-05, "loss": 2.2765, "step": 33470 }, { "epoch": 0.428544, "grad_norm": 3.203125, "learning_rate": 1.8508542279900522e-05, "loss": 2.2525, "step": 33480 }, { "epoch": 0.428672, "grad_norm": 1.59375, "learning_rate": 1.8502639683902416e-05, "loss": 2.3016, "step": 33490 }, { "epoch": 0.4288, "grad_norm": 1.546875, "learning_rate": 1.8496736514194895e-05, "loss": 2.2474, "step": 33500 }, { "epoch": 0.428928, "grad_norm": 1.6640625, "learning_rate": 1.8490832771744858e-05, "loss": 2.2645, "step": 33510 }, { "epoch": 0.429056, "grad_norm": 1.546875, "learning_rate": 1.8484928457519303e-05, "loss": 2.2745, "step": 33520 }, { "epoch": 0.429184, "grad_norm": 1.4375, "learning_rate": 1.8479023572485313e-05, "loss": 2.3075, "step": 33530 }, { "epoch": 0.429312, "grad_norm": 1.421875, "learning_rate": 1.8473118117610074e-05, "loss": 2.2689, "step": 33540 }, { "epoch": 0.42944, "grad_norm": 1.53125, "learning_rate": 1.8467212093860864e-05, "loss": 2.2537, "step": 33550 }, { "epoch": 0.429568, "grad_norm": 1.5703125, "learning_rate": 1.846130550220504e-05, "loss": 2.2826, "step": 33560 }, { "epoch": 0.429696, "grad_norm": 1.65625, "learning_rate": 1.845539834361007e-05, "loss": 2.2473, "step": 33570 }, { "epoch": 0.429824, "grad_norm": 1.5546875, "learning_rate": 1.844949061904351e-05, "loss": 2.2692, "step": 33580 }, { "epoch": 0.429952, "grad_norm": 1.6015625, "learning_rate": 1.8443582329473e-05, "loss": 2.3054, "step": 33590 }, { "epoch": 0.43008, "grad_norm": 1.4140625, "learning_rate": 1.8437673475866282e-05, "loss": 2.2851, "step": 33600 }, { "epoch": 0.430208, "grad_norm": 1.578125, "learning_rate": 1.843176405919119e-05, "loss": 2.2905, "step": 33610 }, { "epoch": 0.430336, "grad_norm": 1.546875, "learning_rate": 1.842585408041565e-05, "loss": 2.2781, "step": 33620 }, { "epoch": 0.430464, "grad_norm": 1.546875, "learning_rate": 1.841994354050767e-05, "loss": 2.2762, "step": 33630 }, { "epoch": 0.430592, "grad_norm": 1.421875, "learning_rate": 1.8414032440435358e-05, "loss": 2.2937, "step": 33640 }, { "epoch": 0.43072, "grad_norm": 1.484375, "learning_rate": 1.840812078116692e-05, "loss": 2.2586, "step": 33650 }, { "epoch": 0.430848, "grad_norm": 2.53125, "learning_rate": 1.8402208563670645e-05, "loss": 2.2705, "step": 33660 }, { "epoch": 0.430976, "grad_norm": 1.6171875, "learning_rate": 1.8396295788914915e-05, "loss": 2.2532, "step": 33670 }, { "epoch": 0.431104, "grad_norm": 1.6875, "learning_rate": 1.8390382457868196e-05, "loss": 2.2969, "step": 33680 }, { "epoch": 0.431232, "grad_norm": 1.4296875, "learning_rate": 1.838446857149906e-05, "loss": 2.261, "step": 33690 }, { "epoch": 0.43136, "grad_norm": 1.859375, "learning_rate": 1.8378554130776176e-05, "loss": 2.2924, "step": 33700 }, { "epoch": 0.431488, "grad_norm": 1.640625, "learning_rate": 1.8372639136668266e-05, "loss": 2.3063, "step": 33710 }, { "epoch": 0.431616, "grad_norm": 1.5234375, "learning_rate": 1.836672359014418e-05, "loss": 2.2692, "step": 33720 }, { "epoch": 0.431744, "grad_norm": 1.5625, "learning_rate": 1.8360807492172846e-05, "loss": 2.2591, "step": 33730 }, { "epoch": 0.431872, "grad_norm": 1.5703125, "learning_rate": 1.835489084372328e-05, "loss": 2.2969, "step": 33740 }, { "epoch": 0.432, "grad_norm": 1.4765625, "learning_rate": 1.8348973645764586e-05, "loss": 2.2849, "step": 33750 }, { "epoch": 0.432128, "grad_norm": 1.5546875, "learning_rate": 1.8343055899265967e-05, "loss": 2.2828, "step": 33760 }, { "epoch": 0.432256, "grad_norm": 1.65625, "learning_rate": 1.833713760519672e-05, "loss": 2.3076, "step": 33770 }, { "epoch": 0.432384, "grad_norm": 1.5390625, "learning_rate": 1.8331218764526206e-05, "loss": 2.2779, "step": 33780 }, { "epoch": 0.432512, "grad_norm": 1.7109375, "learning_rate": 1.8325299378223904e-05, "loss": 2.2911, "step": 33790 }, { "epoch": 0.43264, "grad_norm": 1.609375, "learning_rate": 1.831937944725936e-05, "loss": 2.302, "step": 33800 }, { "epoch": 0.432768, "grad_norm": 1.671875, "learning_rate": 1.8313458972602233e-05, "loss": 2.3023, "step": 33810 }, { "epoch": 0.432896, "grad_norm": 1.5546875, "learning_rate": 1.8307537955222248e-05, "loss": 2.2777, "step": 33820 }, { "epoch": 0.433024, "grad_norm": 1.625, "learning_rate": 1.8301616396089233e-05, "loss": 2.2824, "step": 33830 }, { "epoch": 0.433152, "grad_norm": 7.125, "learning_rate": 1.8295694296173102e-05, "loss": 2.2866, "step": 33840 }, { "epoch": 0.43328, "grad_norm": 1.4921875, "learning_rate": 1.8289771656443852e-05, "loss": 2.286, "step": 33850 }, { "epoch": 0.433408, "grad_norm": 1.6171875, "learning_rate": 1.828384847787157e-05, "loss": 2.287, "step": 33860 }, { "epoch": 0.433536, "grad_norm": 1.484375, "learning_rate": 1.8277924761426443e-05, "loss": 2.2823, "step": 33870 }, { "epoch": 0.433664, "grad_norm": 1.6875, "learning_rate": 1.827200050807873e-05, "loss": 2.2526, "step": 33880 }, { "epoch": 0.433792, "grad_norm": 1.53125, "learning_rate": 1.8266075718798792e-05, "loss": 2.2994, "step": 33890 }, { "epoch": 0.43392, "grad_norm": 5.1875, "learning_rate": 1.826015039455706e-05, "loss": 2.2839, "step": 33900 }, { "epoch": 0.434048, "grad_norm": 1.6328125, "learning_rate": 1.8254224536324072e-05, "loss": 2.2773, "step": 33910 }, { "epoch": 0.434176, "grad_norm": 1.53125, "learning_rate": 1.824829814507044e-05, "loss": 2.2598, "step": 33920 }, { "epoch": 0.434304, "grad_norm": 1.515625, "learning_rate": 1.824237122176687e-05, "loss": 2.2713, "step": 33930 }, { "epoch": 0.434432, "grad_norm": 1.8359375, "learning_rate": 1.8236443767384154e-05, "loss": 2.2949, "step": 33940 }, { "epoch": 0.43456, "grad_norm": 1.734375, "learning_rate": 1.823051578289317e-05, "loss": 2.2893, "step": 33950 }, { "epoch": 0.434688, "grad_norm": 1.6171875, "learning_rate": 1.8224587269264875e-05, "loss": 2.2978, "step": 33960 }, { "epoch": 0.434816, "grad_norm": 1.5625, "learning_rate": 1.8218658227470336e-05, "loss": 2.2905, "step": 33970 }, { "epoch": 0.434944, "grad_norm": 1.6015625, "learning_rate": 1.8212728658480675e-05, "loss": 2.3049, "step": 33980 }, { "epoch": 0.435072, "grad_norm": 1.6171875, "learning_rate": 1.820679856326713e-05, "loss": 2.2706, "step": 33990 }, { "epoch": 0.4352, "grad_norm": 1.65625, "learning_rate": 1.8200867942801007e-05, "loss": 2.3025, "step": 34000 }, { "epoch": 0.435328, "grad_norm": 1.8359375, "learning_rate": 1.81949367980537e-05, "loss": 2.2666, "step": 34010 }, { "epoch": 0.435456, "grad_norm": 1.6171875, "learning_rate": 1.818900512999669e-05, "loss": 2.2801, "step": 34020 }, { "epoch": 0.435584, "grad_norm": 1.7421875, "learning_rate": 1.8183072939601548e-05, "loss": 2.3034, "step": 34030 }, { "epoch": 0.435712, "grad_norm": 1.546875, "learning_rate": 1.8177140227839935e-05, "loss": 2.2781, "step": 34040 }, { "epoch": 0.43584, "grad_norm": 1.53125, "learning_rate": 1.817120699568358e-05, "loss": 2.2789, "step": 34050 }, { "epoch": 0.435968, "grad_norm": 1.640625, "learning_rate": 1.8165273244104314e-05, "loss": 2.2794, "step": 34060 }, { "epoch": 0.436096, "grad_norm": 1.6015625, "learning_rate": 1.815933897407404e-05, "loss": 2.2728, "step": 34070 }, { "epoch": 0.436224, "grad_norm": 2.375, "learning_rate": 1.8153404186564765e-05, "loss": 2.3047, "step": 34080 }, { "epoch": 0.436352, "grad_norm": 1.59375, "learning_rate": 1.8147468882548552e-05, "loss": 2.2937, "step": 34090 }, { "epoch": 0.43648, "grad_norm": 1.71875, "learning_rate": 1.814153306299757e-05, "loss": 2.2979, "step": 34100 }, { "epoch": 0.436608, "grad_norm": 1.6328125, "learning_rate": 1.8135596728884078e-05, "loss": 2.2693, "step": 34110 }, { "epoch": 0.436736, "grad_norm": 1.53125, "learning_rate": 1.81296598811804e-05, "loss": 2.3042, "step": 34120 }, { "epoch": 0.436864, "grad_norm": 1.6953125, "learning_rate": 1.8123722520858947e-05, "loss": 2.3071, "step": 34130 }, { "epoch": 0.436992, "grad_norm": 1.53125, "learning_rate": 1.811778464889223e-05, "loss": 2.2819, "step": 34140 }, { "epoch": 0.43712, "grad_norm": 1.53125, "learning_rate": 1.811184626625283e-05, "loss": 2.2838, "step": 34150 }, { "epoch": 0.437248, "grad_norm": 1.46875, "learning_rate": 1.8105907373913416e-05, "loss": 2.2675, "step": 34160 }, { "epoch": 0.437376, "grad_norm": 1.5078125, "learning_rate": 1.8099967972846735e-05, "loss": 2.3119, "step": 34170 }, { "epoch": 0.437504, "grad_norm": 1.6171875, "learning_rate": 1.8094028064025628e-05, "loss": 2.2797, "step": 34180 }, { "epoch": 0.437632, "grad_norm": 1.4140625, "learning_rate": 1.8088087648423008e-05, "loss": 2.3044, "step": 34190 }, { "epoch": 0.43776, "grad_norm": 1.7578125, "learning_rate": 1.8082146727011882e-05, "loss": 2.2922, "step": 34200 }, { "epoch": 0.437888, "grad_norm": 1.59375, "learning_rate": 1.8076205300765325e-05, "loss": 2.2911, "step": 34210 }, { "epoch": 0.438016, "grad_norm": 1.5078125, "learning_rate": 1.8070263370656516e-05, "loss": 2.288, "step": 34220 }, { "epoch": 0.438144, "grad_norm": 1.453125, "learning_rate": 1.8064320937658694e-05, "loss": 2.2941, "step": 34230 }, { "epoch": 0.438272, "grad_norm": 1.484375, "learning_rate": 1.8058378002745196e-05, "loss": 2.2784, "step": 34240 }, { "epoch": 0.4384, "grad_norm": 1.65625, "learning_rate": 1.805243456688943e-05, "loss": 2.2617, "step": 34250 }, { "epoch": 0.438528, "grad_norm": 1.78125, "learning_rate": 1.8046490631064896e-05, "loss": 2.3049, "step": 34260 }, { "epoch": 0.438656, "grad_norm": 1.578125, "learning_rate": 1.8040546196245174e-05, "loss": 2.291, "step": 34270 }, { "epoch": 0.438784, "grad_norm": 1.484375, "learning_rate": 1.803460126340392e-05, "loss": 2.2557, "step": 34280 }, { "epoch": 0.438912, "grad_norm": 1.578125, "learning_rate": 1.8028655833514877e-05, "loss": 2.3298, "step": 34290 }, { "epoch": 0.43904, "grad_norm": 1.421875, "learning_rate": 1.802270990755186e-05, "loss": 2.2968, "step": 34300 }, { "epoch": 0.439168, "grad_norm": 1.5234375, "learning_rate": 1.8016763486488785e-05, "loss": 2.2852, "step": 34310 }, { "epoch": 0.439296, "grad_norm": 1.5546875, "learning_rate": 1.801081657129963e-05, "loss": 2.2927, "step": 34320 }, { "epoch": 0.439424, "grad_norm": 1.578125, "learning_rate": 1.800486916295846e-05, "loss": 2.3096, "step": 34330 }, { "epoch": 0.439552, "grad_norm": 1.6484375, "learning_rate": 1.7998921262439422e-05, "loss": 2.2667, "step": 34340 }, { "epoch": 0.43968, "grad_norm": 1.46875, "learning_rate": 1.7992972870716746e-05, "loss": 2.2964, "step": 34350 }, { "epoch": 0.439808, "grad_norm": 1.703125, "learning_rate": 1.7987023988764736e-05, "loss": 2.2979, "step": 34360 }, { "epoch": 0.439936, "grad_norm": 1.6484375, "learning_rate": 1.7981074617557778e-05, "loss": 2.2759, "step": 34370 }, { "epoch": 0.440064, "grad_norm": 1.46875, "learning_rate": 1.797512475807035e-05, "loss": 2.3057, "step": 34380 }, { "epoch": 0.440192, "grad_norm": 1.4609375, "learning_rate": 1.796917441127699e-05, "loss": 2.3015, "step": 34390 }, { "epoch": 0.44032, "grad_norm": 1.453125, "learning_rate": 1.796322357815233e-05, "loss": 2.3061, "step": 34400 }, { "epoch": 0.440448, "grad_norm": 1.5703125, "learning_rate": 1.7957272259671074e-05, "loss": 2.2966, "step": 34410 }, { "epoch": 0.440576, "grad_norm": 1.53125, "learning_rate": 1.7951320456808016e-05, "loss": 2.2588, "step": 34420 }, { "epoch": 0.440704, "grad_norm": 1.875, "learning_rate": 1.7945368170538017e-05, "loss": 2.3181, "step": 34430 }, { "epoch": 0.440832, "grad_norm": 1.484375, "learning_rate": 1.793941540183602e-05, "loss": 2.308, "step": 34440 }, { "epoch": 0.44096, "grad_norm": 1.5, "learning_rate": 1.793346215167705e-05, "loss": 2.2892, "step": 34450 }, { "epoch": 0.441088, "grad_norm": 1.5703125, "learning_rate": 1.792750842103622e-05, "loss": 2.3046, "step": 34460 }, { "epoch": 0.441216, "grad_norm": 1.5546875, "learning_rate": 1.7921554210888705e-05, "loss": 2.3262, "step": 34470 }, { "epoch": 0.441344, "grad_norm": 1.515625, "learning_rate": 1.7915599522209763e-05, "loss": 2.2728, "step": 34480 }, { "epoch": 0.441472, "grad_norm": 1.6328125, "learning_rate": 1.7909644355974736e-05, "loss": 2.278, "step": 34490 }, { "epoch": 0.4416, "grad_norm": 1.5, "learning_rate": 1.7903688713159044e-05, "loss": 2.2677, "step": 34500 }, { "epoch": 0.441728, "grad_norm": 1.5546875, "learning_rate": 1.7897732594738174e-05, "loss": 2.2862, "step": 34510 }, { "epoch": 0.441856, "grad_norm": 1.5234375, "learning_rate": 1.7891776001687704e-05, "loss": 2.2873, "step": 34520 }, { "epoch": 0.441984, "grad_norm": 1.5625, "learning_rate": 1.788581893498329e-05, "loss": 2.2799, "step": 34530 }, { "epoch": 0.442112, "grad_norm": 1.5078125, "learning_rate": 1.7879861395600654e-05, "loss": 2.2997, "step": 34540 }, { "epoch": 0.44224, "grad_norm": 1.703125, "learning_rate": 1.78739033845156e-05, "loss": 2.3158, "step": 34550 }, { "epoch": 0.442368, "grad_norm": 3.890625, "learning_rate": 1.7867944902704017e-05, "loss": 2.3123, "step": 34560 }, { "epoch": 0.442496, "grad_norm": 1.4921875, "learning_rate": 1.7861985951141863e-05, "loss": 2.2794, "step": 34570 }, { "epoch": 0.442624, "grad_norm": 1.5078125, "learning_rate": 1.7856026530805178e-05, "loss": 2.294, "step": 34580 }, { "epoch": 0.442752, "grad_norm": 1.5390625, "learning_rate": 1.785006664267007e-05, "loss": 2.278, "step": 34590 }, { "epoch": 0.44288, "grad_norm": 1.4765625, "learning_rate": 1.7844106287712735e-05, "loss": 2.2868, "step": 34600 }, { "epoch": 0.443008, "grad_norm": 1.5703125, "learning_rate": 1.7838145466909437e-05, "loss": 2.3156, "step": 34610 }, { "epoch": 0.443136, "grad_norm": 1.484375, "learning_rate": 1.7832184181236517e-05, "loss": 2.2682, "step": 34620 }, { "epoch": 0.443264, "grad_norm": 1.5234375, "learning_rate": 1.78262224316704e-05, "loss": 2.3178, "step": 34630 }, { "epoch": 0.443392, "grad_norm": 1.59375, "learning_rate": 1.7820260219187576e-05, "loss": 2.27, "step": 34640 }, { "epoch": 0.44352, "grad_norm": 1.6328125, "learning_rate": 1.7814297544764624e-05, "loss": 2.2724, "step": 34650 }, { "epoch": 0.443648, "grad_norm": 2.078125, "learning_rate": 1.7808334409378186e-05, "loss": 2.2917, "step": 34660 }, { "epoch": 0.443776, "grad_norm": 1.7578125, "learning_rate": 1.7802370814004983e-05, "loss": 2.2854, "step": 34670 }, { "epoch": 0.443904, "grad_norm": 1.546875, "learning_rate": 1.7796406759621815e-05, "loss": 2.2957, "step": 34680 }, { "epoch": 0.444032, "grad_norm": 1.5390625, "learning_rate": 1.7790442247205556e-05, "loss": 2.2868, "step": 34690 }, { "epoch": 0.44416, "grad_norm": 1.5859375, "learning_rate": 1.778447727773315e-05, "loss": 2.2756, "step": 34700 }, { "epoch": 0.444288, "grad_norm": 1.515625, "learning_rate": 1.7778511852181623e-05, "loss": 2.3299, "step": 34710 }, { "epoch": 0.444416, "grad_norm": 1.8046875, "learning_rate": 1.7772545971528077e-05, "loss": 2.2852, "step": 34720 }, { "epoch": 0.444544, "grad_norm": 1.6484375, "learning_rate": 1.7766579636749676e-05, "loss": 2.2824, "step": 34730 }, { "epoch": 0.444672, "grad_norm": 1.4765625, "learning_rate": 1.776061284882367e-05, "loss": 2.3065, "step": 34740 }, { "epoch": 0.4448, "grad_norm": 1.5234375, "learning_rate": 1.7754645608727374e-05, "loss": 2.2958, "step": 34750 }, { "epoch": 0.444928, "grad_norm": 1.625, "learning_rate": 1.7748677917438196e-05, "loss": 2.2946, "step": 34760 }, { "epoch": 0.445056, "grad_norm": 3.375, "learning_rate": 1.7742709775933598e-05, "loss": 2.2799, "step": 34770 }, { "epoch": 0.445184, "grad_norm": 13.375, "learning_rate": 1.7736741185191114e-05, "loss": 2.2839, "step": 34780 }, { "epoch": 0.445312, "grad_norm": 1.4296875, "learning_rate": 1.7730772146188373e-05, "loss": 2.2951, "step": 34790 }, { "epoch": 0.44544, "grad_norm": 1.59375, "learning_rate": 1.772480265990306e-05, "loss": 2.268, "step": 34800 }, { "epoch": 0.445568, "grad_norm": 1.484375, "learning_rate": 1.7718832727312936e-05, "loss": 2.2708, "step": 34810 }, { "epoch": 0.445696, "grad_norm": 1.734375, "learning_rate": 1.7712862349395833e-05, "loss": 2.2737, "step": 34820 }, { "epoch": 0.445824, "grad_norm": 1.9140625, "learning_rate": 1.7706891527129667e-05, "loss": 2.251, "step": 34830 }, { "epoch": 0.445952, "grad_norm": 1.6015625, "learning_rate": 1.7700920261492423e-05, "loss": 2.2889, "step": 34840 }, { "epoch": 0.44608, "grad_norm": 1.6640625, "learning_rate": 1.769494855346214e-05, "loss": 2.2562, "step": 34850 }, { "epoch": 0.446208, "grad_norm": 1.6484375, "learning_rate": 1.7688976404016956e-05, "loss": 2.2969, "step": 34860 }, { "epoch": 0.446336, "grad_norm": 1.484375, "learning_rate": 1.768300381413507e-05, "loss": 2.2759, "step": 34870 }, { "epoch": 0.446464, "grad_norm": 1.6328125, "learning_rate": 1.767703078479475e-05, "loss": 2.276, "step": 34880 }, { "epoch": 0.446592, "grad_norm": 1.546875, "learning_rate": 1.767105731697434e-05, "loss": 2.2704, "step": 34890 }, { "epoch": 0.44672, "grad_norm": 4.875, "learning_rate": 1.7665083411652254e-05, "loss": 2.2899, "step": 34900 }, { "epoch": 0.446848, "grad_norm": 5.625, "learning_rate": 1.7659109069806977e-05, "loss": 2.3018, "step": 34910 }, { "epoch": 0.446976, "grad_norm": 1.671875, "learning_rate": 1.7653134292417073e-05, "loss": 2.288, "step": 34920 }, { "epoch": 0.447104, "grad_norm": 1.5703125, "learning_rate": 1.764715908046116e-05, "loss": 2.2878, "step": 34930 }, { "epoch": 0.447232, "grad_norm": 1.59375, "learning_rate": 1.7641183434917954e-05, "loss": 2.2801, "step": 34940 }, { "epoch": 0.44736, "grad_norm": 1.6015625, "learning_rate": 1.7635207356766214e-05, "loss": 2.3261, "step": 34950 }, { "epoch": 0.447488, "grad_norm": 1.5234375, "learning_rate": 1.762923084698479e-05, "loss": 2.2686, "step": 34960 }, { "epoch": 0.447616, "grad_norm": 1.671875, "learning_rate": 1.7623253906552593e-05, "loss": 2.2428, "step": 34970 }, { "epoch": 0.447744, "grad_norm": 1.609375, "learning_rate": 1.76172765364486e-05, "loss": 2.3021, "step": 34980 }, { "epoch": 0.447872, "grad_norm": 1.6015625, "learning_rate": 1.761129873765188e-05, "loss": 2.2971, "step": 34990 }, { "epoch": 0.448, "grad_norm": 1.484375, "learning_rate": 1.760532051114154e-05, "loss": 2.3189, "step": 35000 }, { "epoch": 0.448128, "grad_norm": 1.578125, "learning_rate": 1.7599341857896783e-05, "loss": 2.3111, "step": 35010 }, { "epoch": 0.448256, "grad_norm": 1.7109375, "learning_rate": 1.7593362778896874e-05, "loss": 2.2829, "step": 35020 }, { "epoch": 0.448384, "grad_norm": 1.515625, "learning_rate": 1.758738327512115e-05, "loss": 2.3058, "step": 35030 }, { "epoch": 0.448512, "grad_norm": 1.484375, "learning_rate": 1.7581403347549008e-05, "loss": 2.2466, "step": 35040 }, { "epoch": 0.44864, "grad_norm": 1.6015625, "learning_rate": 1.7575422997159916e-05, "loss": 2.2936, "step": 35050 }, { "epoch": 0.448768, "grad_norm": 1.5703125, "learning_rate": 1.7569442224933427e-05, "loss": 2.2347, "step": 35060 }, { "epoch": 0.448896, "grad_norm": 1.5703125, "learning_rate": 1.7563461031849152e-05, "loss": 2.2823, "step": 35070 }, { "epoch": 0.449024, "grad_norm": 1.65625, "learning_rate": 1.7557479418886766e-05, "loss": 2.2757, "step": 35080 }, { "epoch": 0.449152, "grad_norm": 1.5625, "learning_rate": 1.7551497387026014e-05, "loss": 2.2925, "step": 35090 }, { "epoch": 0.44928, "grad_norm": 1.765625, "learning_rate": 1.7545514937246725e-05, "loss": 2.2451, "step": 35100 }, { "epoch": 0.449408, "grad_norm": 1.46875, "learning_rate": 1.753953207052878e-05, "loss": 2.2454, "step": 35110 }, { "epoch": 0.449536, "grad_norm": 1.46875, "learning_rate": 1.753354878785213e-05, "loss": 2.285, "step": 35120 }, { "epoch": 0.449664, "grad_norm": 1.921875, "learning_rate": 1.7527565090196797e-05, "loss": 2.2864, "step": 35130 }, { "epoch": 0.449792, "grad_norm": 1.578125, "learning_rate": 1.752158097854288e-05, "loss": 2.2398, "step": 35140 }, { "epoch": 0.44992, "grad_norm": 1.5390625, "learning_rate": 1.751559645387053e-05, "loss": 2.2723, "step": 35150 }, { "epoch": 0.450048, "grad_norm": 1.59375, "learning_rate": 1.7509611517159973e-05, "loss": 2.3164, "step": 35160 }, { "epoch": 0.450176, "grad_norm": 1.4453125, "learning_rate": 1.7503626169391503e-05, "loss": 2.2401, "step": 35170 }, { "epoch": 0.450304, "grad_norm": 1.6015625, "learning_rate": 1.7497640411545485e-05, "loss": 2.2619, "step": 35180 }, { "epoch": 0.450432, "grad_norm": 1.5078125, "learning_rate": 1.7491654244602344e-05, "loss": 2.2875, "step": 35190 }, { "epoch": 0.45056, "grad_norm": 1.5546875, "learning_rate": 1.748566766954257e-05, "loss": 2.3182, "step": 35200 }, { "epoch": 0.450688, "grad_norm": 1.96875, "learning_rate": 1.7479680687346735e-05, "loss": 2.3219, "step": 35210 }, { "epoch": 0.450816, "grad_norm": 1.625, "learning_rate": 1.747369329899546e-05, "loss": 2.2982, "step": 35220 }, { "epoch": 0.450944, "grad_norm": 1.609375, "learning_rate": 1.746770550546944e-05, "loss": 2.2895, "step": 35230 }, { "epoch": 0.451072, "grad_norm": 1.5, "learning_rate": 1.7461717307749434e-05, "loss": 2.2863, "step": 35240 }, { "epoch": 0.4512, "grad_norm": 1.6484375, "learning_rate": 1.745572870681628e-05, "loss": 2.2777, "step": 35250 }, { "epoch": 0.451328, "grad_norm": 1.4921875, "learning_rate": 1.7449739703650866e-05, "loss": 2.2747, "step": 35260 }, { "epoch": 0.451456, "grad_norm": 1.609375, "learning_rate": 1.7443750299234145e-05, "loss": 2.3012, "step": 35270 }, { "epoch": 0.451584, "grad_norm": 1.7109375, "learning_rate": 1.743776049454715e-05, "loss": 2.269, "step": 35280 }, { "epoch": 0.451712, "grad_norm": 1.53125, "learning_rate": 1.7431770290570972e-05, "loss": 2.2899, "step": 35290 }, { "epoch": 0.45184, "grad_norm": 1.578125, "learning_rate": 1.742577968828676e-05, "loss": 2.2725, "step": 35300 }, { "epoch": 0.451968, "grad_norm": 2.203125, "learning_rate": 1.7419788688675743e-05, "loss": 2.268, "step": 35310 }, { "epoch": 0.452096, "grad_norm": 1.640625, "learning_rate": 1.7413797292719202e-05, "loss": 2.2907, "step": 35320 }, { "epoch": 0.452224, "grad_norm": 1.59375, "learning_rate": 1.7407805501398493e-05, "loss": 2.3041, "step": 35330 }, { "epoch": 0.452352, "grad_norm": 1.796875, "learning_rate": 1.740181331569503e-05, "loss": 2.3029, "step": 35340 }, { "epoch": 0.45248, "grad_norm": 1.59375, "learning_rate": 1.7395820736590295e-05, "loss": 2.2614, "step": 35350 }, { "epoch": 0.452608, "grad_norm": 1.625, "learning_rate": 1.7389827765065828e-05, "loss": 2.2823, "step": 35360 }, { "epoch": 0.452736, "grad_norm": 1.578125, "learning_rate": 1.7383834402103244e-05, "loss": 2.2793, "step": 35370 }, { "epoch": 0.452864, "grad_norm": 3.5, "learning_rate": 1.7377840648684215e-05, "loss": 2.3038, "step": 35380 }, { "epoch": 0.452992, "grad_norm": 1.9609375, "learning_rate": 1.737184650579048e-05, "loss": 2.2954, "step": 35390 }, { "epoch": 0.45312, "grad_norm": 1.7890625, "learning_rate": 1.736585197440383e-05, "loss": 2.3031, "step": 35400 }, { "epoch": 0.453248, "grad_norm": 1.625, "learning_rate": 1.7359857055506152e-05, "loss": 2.2584, "step": 35410 }, { "epoch": 0.453376, "grad_norm": 1.6171875, "learning_rate": 1.7353861750079354e-05, "loss": 2.2523, "step": 35420 }, { "epoch": 0.453504, "grad_norm": 1.71875, "learning_rate": 1.734786605910543e-05, "loss": 2.3311, "step": 35430 }, { "epoch": 0.453632, "grad_norm": 1.6484375, "learning_rate": 1.7341869983566445e-05, "loss": 2.2885, "step": 35440 }, { "epoch": 0.45376, "grad_norm": 1.5703125, "learning_rate": 1.7335873524444516e-05, "loss": 2.2663, "step": 35450 }, { "epoch": 0.453888, "grad_norm": 1.4765625, "learning_rate": 1.7329876682721812e-05, "loss": 2.2802, "step": 35460 }, { "epoch": 0.454016, "grad_norm": 1.5546875, "learning_rate": 1.7323879459380585e-05, "loss": 2.308, "step": 35470 }, { "epoch": 0.454144, "grad_norm": 1.6640625, "learning_rate": 1.731788185540314e-05, "loss": 2.2623, "step": 35480 }, { "epoch": 0.454272, "grad_norm": 1.4453125, "learning_rate": 1.7311883871771845e-05, "loss": 2.2844, "step": 35490 }, { "epoch": 0.4544, "grad_norm": 1.5, "learning_rate": 1.7305885509469127e-05, "loss": 2.2667, "step": 35500 }, { "epoch": 0.454528, "grad_norm": 2.1875, "learning_rate": 1.729988676947748e-05, "loss": 2.302, "step": 35510 }, { "epoch": 0.454656, "grad_norm": 1.4921875, "learning_rate": 1.7293887652779468e-05, "loss": 2.2882, "step": 35520 }, { "epoch": 0.454784, "grad_norm": 1.5390625, "learning_rate": 1.7287888160357694e-05, "loss": 2.3012, "step": 35530 }, { "epoch": 0.454912, "grad_norm": 1.6171875, "learning_rate": 1.7281888293194836e-05, "loss": 2.2804, "step": 35540 }, { "epoch": 0.45504, "grad_norm": 1.46875, "learning_rate": 1.727588805227364e-05, "loss": 2.2925, "step": 35550 }, { "epoch": 0.455168, "grad_norm": 1.5703125, "learning_rate": 1.7269887438576906e-05, "loss": 2.2792, "step": 35560 }, { "epoch": 0.455296, "grad_norm": 1.6640625, "learning_rate": 1.7263886453087492e-05, "loss": 2.2807, "step": 35570 }, { "epoch": 0.455424, "grad_norm": 1.53125, "learning_rate": 1.7257885096788315e-05, "loss": 2.2511, "step": 35580 }, { "epoch": 0.455552, "grad_norm": 1.5625, "learning_rate": 1.7251883370662367e-05, "loss": 2.2907, "step": 35590 }, { "epoch": 0.45568, "grad_norm": 1.6171875, "learning_rate": 1.7245881275692692e-05, "loss": 2.2583, "step": 35600 }, { "epoch": 0.455808, "grad_norm": 1.6171875, "learning_rate": 1.7239878812862382e-05, "loss": 2.3345, "step": 35610 }, { "epoch": 0.455936, "grad_norm": 1.5078125, "learning_rate": 1.7233875983154607e-05, "loss": 2.295, "step": 35620 }, { "epoch": 0.456064, "grad_norm": 1.6484375, "learning_rate": 1.72278727875526e-05, "loss": 2.2979, "step": 35630 }, { "epoch": 0.456192, "grad_norm": 1.484375, "learning_rate": 1.7221869227039637e-05, "loss": 2.2805, "step": 35640 }, { "epoch": 0.45632, "grad_norm": 1.59375, "learning_rate": 1.7215865302599058e-05, "loss": 2.3144, "step": 35650 }, { "epoch": 0.456448, "grad_norm": 1.5625, "learning_rate": 1.7209861015214273e-05, "loss": 2.2832, "step": 35660 }, { "epoch": 0.456576, "grad_norm": 1.515625, "learning_rate": 1.7203856365868742e-05, "loss": 2.2902, "step": 35670 }, { "epoch": 0.456704, "grad_norm": 1.6015625, "learning_rate": 1.719785135554599e-05, "loss": 2.2495, "step": 35680 }, { "epoch": 0.456832, "grad_norm": 1.65625, "learning_rate": 1.7191845985229595e-05, "loss": 2.3075, "step": 35690 }, { "epoch": 0.45696, "grad_norm": 1.53125, "learning_rate": 1.71858402559032e-05, "loss": 2.2492, "step": 35700 }, { "epoch": 0.457088, "grad_norm": 1.578125, "learning_rate": 1.71798341685505e-05, "loss": 2.2932, "step": 35710 }, { "epoch": 0.457216, "grad_norm": 1.6484375, "learning_rate": 1.717382772415526e-05, "loss": 2.2728, "step": 35720 }, { "epoch": 0.457344, "grad_norm": 1.6171875, "learning_rate": 1.7167820923701287e-05, "loss": 2.2558, "step": 35730 }, { "epoch": 0.457472, "grad_norm": 1.6640625, "learning_rate": 1.7161813768172463e-05, "loss": 2.2492, "step": 35740 }, { "epoch": 0.4576, "grad_norm": 1.703125, "learning_rate": 1.7155806258552717e-05, "loss": 2.3246, "step": 35750 }, { "epoch": 0.457728, "grad_norm": 1.578125, "learning_rate": 1.7149798395826043e-05, "loss": 2.3132, "step": 35760 }, { "epoch": 0.457856, "grad_norm": 1.46875, "learning_rate": 1.7143790180976483e-05, "loss": 2.2784, "step": 35770 }, { "epoch": 0.457984, "grad_norm": 1.515625, "learning_rate": 1.7137781614988148e-05, "loss": 2.2764, "step": 35780 }, { "epoch": 0.458112, "grad_norm": 1.4609375, "learning_rate": 1.7131772698845205e-05, "loss": 2.2495, "step": 35790 }, { "epoch": 0.45824, "grad_norm": 1.5078125, "learning_rate": 1.712576343353187e-05, "loss": 2.289, "step": 35800 }, { "epoch": 0.458368, "grad_norm": 1.96875, "learning_rate": 1.711975382003242e-05, "loss": 2.3116, "step": 35810 }, { "epoch": 0.458496, "grad_norm": 1.5078125, "learning_rate": 1.7113743859331187e-05, "loss": 2.2907, "step": 35820 }, { "epoch": 0.458624, "grad_norm": 2.375, "learning_rate": 1.710773355241258e-05, "loss": 2.26, "step": 35830 }, { "epoch": 0.458752, "grad_norm": 1.5546875, "learning_rate": 1.7101722900261036e-05, "loss": 2.2746, "step": 35840 }, { "epoch": 0.45888, "grad_norm": 1.578125, "learning_rate": 1.709571190386105e-05, "loss": 2.2894, "step": 35850 }, { "epoch": 0.459008, "grad_norm": 1.65625, "learning_rate": 1.7089700564197204e-05, "loss": 2.3051, "step": 35860 }, { "epoch": 0.459136, "grad_norm": 1.7109375, "learning_rate": 1.708368888225411e-05, "loss": 2.2775, "step": 35870 }, { "epoch": 0.459264, "grad_norm": 1.484375, "learning_rate": 1.707767685901643e-05, "loss": 2.29, "step": 35880 }, { "epoch": 0.459392, "grad_norm": 1.6484375, "learning_rate": 1.7071664495468906e-05, "loss": 2.2847, "step": 35890 }, { "epoch": 0.45952, "grad_norm": 1.53125, "learning_rate": 1.7065651792596323e-05, "loss": 2.3013, "step": 35900 }, { "epoch": 0.459648, "grad_norm": 1.5, "learning_rate": 1.705963875138352e-05, "loss": 2.3034, "step": 35910 }, { "epoch": 0.459776, "grad_norm": 1.578125, "learning_rate": 1.705362537281539e-05, "loss": 2.2919, "step": 35920 }, { "epoch": 0.459904, "grad_norm": 1.53125, "learning_rate": 1.704761165787689e-05, "loss": 2.2807, "step": 35930 }, { "epoch": 0.460032, "grad_norm": 1.5390625, "learning_rate": 1.7041597607553028e-05, "loss": 2.2808, "step": 35940 }, { "epoch": 0.46016, "grad_norm": 1.4921875, "learning_rate": 1.7035583222828863e-05, "loss": 2.2435, "step": 35950 }, { "epoch": 0.460288, "grad_norm": 1.7734375, "learning_rate": 1.702956850468951e-05, "loss": 2.289, "step": 35960 }, { "epoch": 0.460416, "grad_norm": 1.625, "learning_rate": 1.702355345412015e-05, "loss": 2.2614, "step": 35970 }, { "epoch": 0.460544, "grad_norm": 1.671875, "learning_rate": 1.7017538072105995e-05, "loss": 2.263, "step": 35980 }, { "epoch": 0.460672, "grad_norm": 1.5625, "learning_rate": 1.7011522359632335e-05, "loss": 2.2561, "step": 35990 }, { "epoch": 0.4608, "grad_norm": 1.6484375, "learning_rate": 1.70055063176845e-05, "loss": 2.2882, "step": 36000 }, { "epoch": 0.460928, "grad_norm": 9.1875, "learning_rate": 1.699948994724788e-05, "loss": 2.2894, "step": 36010 }, { "epoch": 0.461056, "grad_norm": 1.6796875, "learning_rate": 1.699347324930792e-05, "loss": 2.2639, "step": 36020 }, { "epoch": 0.461184, "grad_norm": 1.5546875, "learning_rate": 1.6987456224850106e-05, "loss": 2.2757, "step": 36030 }, { "epoch": 0.461312, "grad_norm": 1.4609375, "learning_rate": 1.6981438874859996e-05, "loss": 2.287, "step": 36040 }, { "epoch": 0.46144, "grad_norm": 1.484375, "learning_rate": 1.6975421200323188e-05, "loss": 2.2952, "step": 36050 }, { "epoch": 0.461568, "grad_norm": 1.6484375, "learning_rate": 1.6969403202225344e-05, "loss": 2.288, "step": 36060 }, { "epoch": 0.461696, "grad_norm": 1.6328125, "learning_rate": 1.6963384881552163e-05, "loss": 2.3044, "step": 36070 }, { "epoch": 0.461824, "grad_norm": 1.5625, "learning_rate": 1.6957366239289415e-05, "loss": 2.3328, "step": 36080 }, { "epoch": 0.461952, "grad_norm": 1.5859375, "learning_rate": 1.6951347276422907e-05, "loss": 2.2745, "step": 36090 }, { "epoch": 0.46208, "grad_norm": 1.609375, "learning_rate": 1.6945327993938515e-05, "loss": 2.2975, "step": 36100 }, { "epoch": 0.462208, "grad_norm": 1.4765625, "learning_rate": 1.693930839282215e-05, "loss": 2.2878, "step": 36110 }, { "epoch": 0.462336, "grad_norm": 1.59375, "learning_rate": 1.6933288474059783e-05, "loss": 2.3005, "step": 36120 }, { "epoch": 0.462464, "grad_norm": 1.59375, "learning_rate": 1.6927268238637444e-05, "loss": 2.2914, "step": 36130 }, { "epoch": 0.462592, "grad_norm": 1.8046875, "learning_rate": 1.6921247687541202e-05, "loss": 2.2978, "step": 36140 }, { "epoch": 0.46272, "grad_norm": 1.578125, "learning_rate": 1.6915226821757184e-05, "loss": 2.2785, "step": 36150 }, { "epoch": 0.462848, "grad_norm": 1.59375, "learning_rate": 1.690920564227157e-05, "loss": 2.3001, "step": 36160 }, { "epoch": 0.462976, "grad_norm": 1.5703125, "learning_rate": 1.690318415007059e-05, "loss": 2.285, "step": 36170 }, { "epoch": 0.463104, "grad_norm": 1.59375, "learning_rate": 1.6897162346140526e-05, "loss": 2.3029, "step": 36180 }, { "epoch": 0.463232, "grad_norm": 1.5546875, "learning_rate": 1.6891140231467705e-05, "loss": 2.261, "step": 36190 }, { "epoch": 0.46336, "grad_norm": 1.484375, "learning_rate": 1.6885117807038517e-05, "loss": 2.2784, "step": 36200 }, { "epoch": 0.463488, "grad_norm": 1.59375, "learning_rate": 1.6879095073839395e-05, "loss": 2.2368, "step": 36210 }, { "epoch": 0.463616, "grad_norm": 1.625, "learning_rate": 1.6873072032856817e-05, "loss": 2.292, "step": 36220 }, { "epoch": 0.463744, "grad_norm": 1.6953125, "learning_rate": 1.6867048685077318e-05, "loss": 2.2943, "step": 36230 }, { "epoch": 0.463872, "grad_norm": 1.671875, "learning_rate": 1.6861025031487493e-05, "loss": 2.2914, "step": 36240 }, { "epoch": 0.464, "grad_norm": 1.5546875, "learning_rate": 1.6855001073073966e-05, "loss": 2.2883, "step": 36250 }, { "epoch": 0.464128, "grad_norm": 1.5546875, "learning_rate": 1.6848976810823426e-05, "loss": 2.2899, "step": 36260 }, { "epoch": 0.464256, "grad_norm": 1.65625, "learning_rate": 1.68429522457226e-05, "loss": 2.2707, "step": 36270 }, { "epoch": 0.464384, "grad_norm": 1.53125, "learning_rate": 1.6836927378758292e-05, "loss": 2.2764, "step": 36280 }, { "epoch": 0.464512, "grad_norm": 1.4921875, "learning_rate": 1.6830902210917317e-05, "loss": 2.2659, "step": 36290 }, { "epoch": 0.46464, "grad_norm": 1.515625, "learning_rate": 1.6824876743186562e-05, "loss": 2.2934, "step": 36300 }, { "epoch": 0.464768, "grad_norm": 1.515625, "learning_rate": 1.6818850976552964e-05, "loss": 2.2669, "step": 36310 }, { "epoch": 0.464896, "grad_norm": 1.5234375, "learning_rate": 1.6812824912003507e-05, "loss": 2.2579, "step": 36320 }, { "epoch": 0.465024, "grad_norm": 1.4765625, "learning_rate": 1.680679855052521e-05, "loss": 2.2548, "step": 36330 }, { "epoch": 0.465152, "grad_norm": 2.984375, "learning_rate": 1.6800771893105157e-05, "loss": 2.3028, "step": 36340 }, { "epoch": 0.46528, "grad_norm": 1.59375, "learning_rate": 1.6794744940730476e-05, "loss": 2.2931, "step": 36350 }, { "epoch": 0.465408, "grad_norm": 2.59375, "learning_rate": 1.678871769438834e-05, "loss": 2.3049, "step": 36360 }, { "epoch": 0.465536, "grad_norm": 1.5, "learning_rate": 1.6782690155065973e-05, "loss": 2.2775, "step": 36370 }, { "epoch": 0.465664, "grad_norm": 1.7734375, "learning_rate": 1.677666232375065e-05, "loss": 2.2634, "step": 36380 }, { "epoch": 0.465792, "grad_norm": 2.046875, "learning_rate": 1.6770634201429684e-05, "loss": 2.2843, "step": 36390 }, { "epoch": 0.46592, "grad_norm": 1.5703125, "learning_rate": 1.6764605789090448e-05, "loss": 2.2892, "step": 36400 }, { "epoch": 0.466048, "grad_norm": 1.515625, "learning_rate": 1.675857708772035e-05, "loss": 2.2661, "step": 36410 }, { "epoch": 0.466176, "grad_norm": 1.5, "learning_rate": 1.6752548098306858e-05, "loss": 2.3054, "step": 36420 }, { "epoch": 0.466304, "grad_norm": 1.6875, "learning_rate": 1.6746518821837477e-05, "loss": 2.274, "step": 36430 }, { "epoch": 0.466432, "grad_norm": 1.40625, "learning_rate": 1.6740489259299767e-05, "loss": 2.2871, "step": 36440 }, { "epoch": 0.46656, "grad_norm": 1.546875, "learning_rate": 1.6734459411681328e-05, "loss": 2.2952, "step": 36450 }, { "epoch": 0.466688, "grad_norm": 1.5625, "learning_rate": 1.6728429279969803e-05, "loss": 2.2723, "step": 36460 }, { "epoch": 0.466816, "grad_norm": 2.1875, "learning_rate": 1.6722398865152902e-05, "loss": 2.2815, "step": 36470 }, { "epoch": 0.466944, "grad_norm": 1.6796875, "learning_rate": 1.6716368168218355e-05, "loss": 2.3155, "step": 36480 }, { "epoch": 0.467072, "grad_norm": 1.5546875, "learning_rate": 1.671033719015396e-05, "loss": 2.2986, "step": 36490 }, { "epoch": 0.4672, "grad_norm": 1.6875, "learning_rate": 1.6704305931947542e-05, "loss": 2.2789, "step": 36500 }, { "epoch": 0.467328, "grad_norm": 1.5859375, "learning_rate": 1.6698274394586993e-05, "loss": 2.273, "step": 36510 }, { "epoch": 0.467456, "grad_norm": 1.5234375, "learning_rate": 1.6692242579060228e-05, "loss": 2.2775, "step": 36520 }, { "epoch": 0.467584, "grad_norm": 1.578125, "learning_rate": 1.6686210486355226e-05, "loss": 2.2893, "step": 36530 }, { "epoch": 0.467712, "grad_norm": 1.5, "learning_rate": 1.668017811746e-05, "loss": 2.2661, "step": 36540 }, { "epoch": 0.46784, "grad_norm": 1.546875, "learning_rate": 1.6674145473362618e-05, "loss": 2.2932, "step": 36550 }, { "epoch": 0.467968, "grad_norm": 1.4453125, "learning_rate": 1.6668112555051183e-05, "loss": 2.2863, "step": 36560 }, { "epoch": 0.468096, "grad_norm": 1.671875, "learning_rate": 1.666207936351385e-05, "loss": 2.2802, "step": 36570 }, { "epoch": 0.468224, "grad_norm": 1.59375, "learning_rate": 1.6656045899738808e-05, "loss": 2.3165, "step": 36580 }, { "epoch": 0.468352, "grad_norm": 33.5, "learning_rate": 1.6650012164714316e-05, "loss": 2.2862, "step": 36590 }, { "epoch": 0.46848, "grad_norm": 1.703125, "learning_rate": 1.664397815942864e-05, "loss": 2.2798, "step": 36600 }, { "epoch": 0.468608, "grad_norm": 1.59375, "learning_rate": 1.6637943884870126e-05, "loss": 2.2635, "step": 36610 }, { "epoch": 0.468736, "grad_norm": 1.6796875, "learning_rate": 1.663190934202714e-05, "loss": 2.2902, "step": 36620 }, { "epoch": 0.468864, "grad_norm": 1.6796875, "learning_rate": 1.662587453188811e-05, "loss": 2.3004, "step": 36630 }, { "epoch": 0.468992, "grad_norm": 1.90625, "learning_rate": 1.6619839455441484e-05, "loss": 2.2888, "step": 36640 }, { "epoch": 0.46912, "grad_norm": 1.5390625, "learning_rate": 1.661380411367578e-05, "loss": 2.2876, "step": 36650 }, { "epoch": 0.469248, "grad_norm": 1.7109375, "learning_rate": 1.6607768507579543e-05, "loss": 2.2892, "step": 36660 }, { "epoch": 0.469376, "grad_norm": 1.7265625, "learning_rate": 1.660173263814137e-05, "loss": 2.293, "step": 36670 }, { "epoch": 0.469504, "grad_norm": 1.5859375, "learning_rate": 1.6595696506349882e-05, "loss": 2.2961, "step": 36680 }, { "epoch": 0.469632, "grad_norm": 1.640625, "learning_rate": 1.6589660113193775e-05, "loss": 2.2804, "step": 36690 }, { "epoch": 0.46976, "grad_norm": 1.5546875, "learning_rate": 1.6583623459661766e-05, "loss": 2.2547, "step": 36700 }, { "epoch": 0.469888, "grad_norm": 1.578125, "learning_rate": 1.6577586546742617e-05, "loss": 2.3082, "step": 36710 }, { "epoch": 0.470016, "grad_norm": 1.640625, "learning_rate": 1.6571549375425134e-05, "loss": 2.296, "step": 36720 }, { "epoch": 0.470144, "grad_norm": 1.421875, "learning_rate": 1.656551194669817e-05, "loss": 2.2975, "step": 36730 }, { "epoch": 0.470272, "grad_norm": 1.6875, "learning_rate": 1.655947426155061e-05, "loss": 2.2732, "step": 36740 }, { "epoch": 0.4704, "grad_norm": 1.5078125, "learning_rate": 1.655343632097139e-05, "loss": 2.2595, "step": 36750 }, { "epoch": 0.470528, "grad_norm": 1.6015625, "learning_rate": 1.6547398125949488e-05, "loss": 2.2781, "step": 36760 }, { "epoch": 0.470656, "grad_norm": 1.8359375, "learning_rate": 1.6541359677473914e-05, "loss": 2.2983, "step": 36770 }, { "epoch": 0.470784, "grad_norm": 1.484375, "learning_rate": 1.653532097653374e-05, "loss": 2.3051, "step": 36780 }, { "epoch": 0.470912, "grad_norm": 1.5390625, "learning_rate": 1.652928202411805e-05, "loss": 2.2686, "step": 36790 }, { "epoch": 0.47104, "grad_norm": 1.578125, "learning_rate": 1.6523242821215992e-05, "loss": 2.2942, "step": 36800 }, { "epoch": 0.471168, "grad_norm": 1.625, "learning_rate": 1.6517203368816746e-05, "loss": 2.2969, "step": 36810 }, { "epoch": 0.471296, "grad_norm": 1.53125, "learning_rate": 1.6511163667909542e-05, "loss": 2.297, "step": 36820 }, { "epoch": 0.471424, "grad_norm": 1.546875, "learning_rate": 1.650512371948363e-05, "loss": 2.2891, "step": 36830 }, { "epoch": 0.471552, "grad_norm": 1.5390625, "learning_rate": 1.6499083524528327e-05, "loss": 2.28, "step": 36840 }, { "epoch": 0.47168, "grad_norm": 1.53125, "learning_rate": 1.6493043084032967e-05, "loss": 2.2929, "step": 36850 }, { "epoch": 0.471808, "grad_norm": 1.515625, "learning_rate": 1.6487002398986944e-05, "loss": 2.2849, "step": 36860 }, { "epoch": 0.471936, "grad_norm": 1.5234375, "learning_rate": 1.6480961470379676e-05, "loss": 2.2863, "step": 36870 }, { "epoch": 0.472064, "grad_norm": 1.5234375, "learning_rate": 1.6474920299200624e-05, "loss": 2.2752, "step": 36880 }, { "epoch": 0.472192, "grad_norm": 1.65625, "learning_rate": 1.6468878886439306e-05, "loss": 2.2952, "step": 36890 }, { "epoch": 0.47232, "grad_norm": 1.625, "learning_rate": 1.646283723308525e-05, "loss": 2.2726, "step": 36900 }, { "epoch": 0.472448, "grad_norm": 1.5234375, "learning_rate": 1.645679534012805e-05, "loss": 2.303, "step": 36910 }, { "epoch": 0.472576, "grad_norm": 1.4375, "learning_rate": 1.6450753208557326e-05, "loss": 2.2909, "step": 36920 }, { "epoch": 0.472704, "grad_norm": 1.59375, "learning_rate": 1.6444710839362738e-05, "loss": 2.2826, "step": 36930 }, { "epoch": 0.472832, "grad_norm": 1.6171875, "learning_rate": 1.6438668233533985e-05, "loss": 2.2475, "step": 36940 }, { "epoch": 0.47296, "grad_norm": 1.609375, "learning_rate": 1.6432625392060808e-05, "loss": 2.296, "step": 36950 }, { "epoch": 0.473088, "grad_norm": 1.4765625, "learning_rate": 1.6426582315932983e-05, "loss": 2.2864, "step": 36960 }, { "epoch": 0.473216, "grad_norm": 1.5234375, "learning_rate": 1.6420539006140335e-05, "loss": 2.2936, "step": 36970 }, { "epoch": 0.473344, "grad_norm": 1.5546875, "learning_rate": 1.6414495463672706e-05, "loss": 2.2671, "step": 36980 }, { "epoch": 0.473472, "grad_norm": 1.4140625, "learning_rate": 1.6408451689519994e-05, "loss": 2.2765, "step": 36990 }, { "epoch": 0.4736, "grad_norm": 2.6875, "learning_rate": 1.6402407684672134e-05, "loss": 2.2649, "step": 37000 }, { "epoch": 0.473728, "grad_norm": 1.6796875, "learning_rate": 1.639636345011909e-05, "loss": 2.2757, "step": 37010 }, { "epoch": 0.473856, "grad_norm": 1.53125, "learning_rate": 1.6390318986850864e-05, "loss": 2.2638, "step": 37020 }, { "epoch": 0.473984, "grad_norm": 1.609375, "learning_rate": 1.6384274295857506e-05, "loss": 2.2979, "step": 37030 }, { "epoch": 0.474112, "grad_norm": 1.4453125, "learning_rate": 1.6378229378129095e-05, "loss": 2.3035, "step": 37040 }, { "epoch": 0.47424, "grad_norm": 2.03125, "learning_rate": 1.637218423465575e-05, "loss": 2.2701, "step": 37050 }, { "epoch": 0.474368, "grad_norm": 1.5703125, "learning_rate": 1.636613886642762e-05, "loss": 2.2986, "step": 37060 }, { "epoch": 0.474496, "grad_norm": 1.5625, "learning_rate": 1.6360093274434908e-05, "loss": 2.2533, "step": 37070 }, { "epoch": 0.474624, "grad_norm": 1.625, "learning_rate": 1.6354047459667828e-05, "loss": 2.2628, "step": 37080 }, { "epoch": 0.474752, "grad_norm": 1.6171875, "learning_rate": 1.6348001423116653e-05, "loss": 2.2682, "step": 37090 }, { "epoch": 0.47488, "grad_norm": 1.578125, "learning_rate": 1.6341955165771684e-05, "loss": 2.3091, "step": 37100 }, { "epoch": 0.475008, "grad_norm": 1.5078125, "learning_rate": 1.633590868862326e-05, "loss": 2.3048, "step": 37110 }, { "epoch": 0.475136, "grad_norm": 1.6015625, "learning_rate": 1.632986199266175e-05, "loss": 2.2797, "step": 37120 }, { "epoch": 0.475264, "grad_norm": 1.5, "learning_rate": 1.6323815078877564e-05, "loss": 2.2703, "step": 37130 }, { "epoch": 0.475392, "grad_norm": 1.5, "learning_rate": 1.631776794826115e-05, "loss": 2.2823, "step": 37140 }, { "epoch": 0.47552, "grad_norm": 1.5078125, "learning_rate": 1.6311720601802984e-05, "loss": 2.2883, "step": 37150 }, { "epoch": 0.475648, "grad_norm": 1.6328125, "learning_rate": 1.6305673040493587e-05, "loss": 2.266, "step": 37160 }, { "epoch": 0.475776, "grad_norm": 1.5546875, "learning_rate": 1.62996252653235e-05, "loss": 2.2985, "step": 37170 }, { "epoch": 0.475904, "grad_norm": 1.6328125, "learning_rate": 1.6293577277283323e-05, "loss": 2.2697, "step": 37180 }, { "epoch": 0.476032, "grad_norm": 1.65625, "learning_rate": 1.6287529077363665e-05, "loss": 2.2556, "step": 37190 }, { "epoch": 0.47616, "grad_norm": 1.5859375, "learning_rate": 1.6281480666555192e-05, "loss": 2.2604, "step": 37200 }, { "epoch": 0.476288, "grad_norm": 1.5546875, "learning_rate": 1.6275432045848583e-05, "loss": 2.2761, "step": 37210 }, { "epoch": 0.476416, "grad_norm": 1.5546875, "learning_rate": 1.6269383216234566e-05, "loss": 2.291, "step": 37220 }, { "epoch": 0.476544, "grad_norm": 1.546875, "learning_rate": 1.62633341787039e-05, "loss": 2.2836, "step": 37230 }, { "epoch": 0.476672, "grad_norm": 1.3828125, "learning_rate": 1.6257284934247384e-05, "loss": 2.2602, "step": 37240 }, { "epoch": 0.4768, "grad_norm": 1.5546875, "learning_rate": 1.6251235483855838e-05, "loss": 2.2776, "step": 37250 }, { "epoch": 0.476928, "grad_norm": 1.5078125, "learning_rate": 1.624518582852012e-05, "loss": 2.2869, "step": 37260 }, { "epoch": 0.477056, "grad_norm": 1.5546875, "learning_rate": 1.6239135969231132e-05, "loss": 2.2922, "step": 37270 }, { "epoch": 0.477184, "grad_norm": 1.59375, "learning_rate": 1.6233085906979798e-05, "loss": 2.3099, "step": 37280 }, { "epoch": 0.477312, "grad_norm": 1.453125, "learning_rate": 1.6227035642757072e-05, "loss": 2.261, "step": 37290 }, { "epoch": 0.47744, "grad_norm": 1.4296875, "learning_rate": 1.622098517755395e-05, "loss": 2.298, "step": 37300 }, { "epoch": 0.477568, "grad_norm": 1.6015625, "learning_rate": 1.621493451236147e-05, "loss": 2.2885, "step": 37310 }, { "epoch": 0.477696, "grad_norm": 1.5546875, "learning_rate": 1.6208883648170682e-05, "loss": 2.2624, "step": 37320 }, { "epoch": 0.477824, "grad_norm": 3.53125, "learning_rate": 1.6202832585972674e-05, "loss": 2.2392, "step": 37330 }, { "epoch": 0.477952, "grad_norm": 1.6484375, "learning_rate": 1.6196781326758574e-05, "loss": 2.2518, "step": 37340 }, { "epoch": 0.47808, "grad_norm": 1.5546875, "learning_rate": 1.6190729871519548e-05, "loss": 2.2786, "step": 37350 }, { "epoch": 0.478208, "grad_norm": 2.375, "learning_rate": 1.618467822124677e-05, "loss": 2.2962, "step": 37360 }, { "epoch": 0.478336, "grad_norm": 1.5703125, "learning_rate": 1.6178626376931463e-05, "loss": 2.2924, "step": 37370 }, { "epoch": 0.478464, "grad_norm": 1.6171875, "learning_rate": 1.6172574339564887e-05, "loss": 2.2925, "step": 37380 }, { "epoch": 0.478592, "grad_norm": 1.671875, "learning_rate": 1.6166522110138327e-05, "loss": 2.2963, "step": 37390 }, { "epoch": 0.47872, "grad_norm": 1.546875, "learning_rate": 1.616046968964309e-05, "loss": 2.2941, "step": 37400 }, { "epoch": 0.478848, "grad_norm": 5.75, "learning_rate": 1.615441707907052e-05, "loss": 2.2796, "step": 37410 }, { "epoch": 0.478976, "grad_norm": 2.1875, "learning_rate": 1.6148364279412013e-05, "loss": 2.3061, "step": 37420 }, { "epoch": 0.479104, "grad_norm": 1.5703125, "learning_rate": 1.614231129165896e-05, "loss": 2.2893, "step": 37430 }, { "epoch": 0.479232, "grad_norm": 1.65625, "learning_rate": 1.6136258116802806e-05, "loss": 2.2645, "step": 37440 }, { "epoch": 0.47936, "grad_norm": 1.546875, "learning_rate": 1.6130204755835025e-05, "loss": 2.3015, "step": 37450 }, { "epoch": 0.479488, "grad_norm": 1.609375, "learning_rate": 1.6124151209747116e-05, "loss": 2.2943, "step": 37460 }, { "epoch": 0.479616, "grad_norm": 1.578125, "learning_rate": 1.611809747953061e-05, "loss": 2.2557, "step": 37470 }, { "epoch": 0.479744, "grad_norm": 1.5390625, "learning_rate": 1.6112043566177065e-05, "loss": 2.2987, "step": 37480 }, { "epoch": 0.479872, "grad_norm": 1.84375, "learning_rate": 1.6105989470678077e-05, "loss": 2.2435, "step": 37490 }, { "epoch": 0.48, "grad_norm": 1.875, "learning_rate": 1.6099935194025267e-05, "loss": 2.2835, "step": 37500 }, { "epoch": 0.480128, "grad_norm": 1.9453125, "learning_rate": 1.6093880737210278e-05, "loss": 2.2939, "step": 37510 }, { "epoch": 0.480256, "grad_norm": 1.5078125, "learning_rate": 1.60878261012248e-05, "loss": 2.2957, "step": 37520 }, { "epoch": 0.480384, "grad_norm": 1.5625, "learning_rate": 1.6081771287060536e-05, "loss": 2.2912, "step": 37530 }, { "epoch": 0.480512, "grad_norm": 1.609375, "learning_rate": 1.6075716295709234e-05, "loss": 2.2699, "step": 37540 }, { "epoch": 0.48064, "grad_norm": 1.546875, "learning_rate": 1.606966112816265e-05, "loss": 2.2795, "step": 37550 }, { "epoch": 0.480768, "grad_norm": 1.5546875, "learning_rate": 1.606360578541259e-05, "loss": 2.2741, "step": 37560 }, { "epoch": 0.480896, "grad_norm": 1.5078125, "learning_rate": 1.6057550268450877e-05, "loss": 2.2811, "step": 37570 }, { "epoch": 0.481024, "grad_norm": 1.796875, "learning_rate": 1.6051494578269363e-05, "loss": 2.2949, "step": 37580 }, { "epoch": 0.481152, "grad_norm": 1.6015625, "learning_rate": 1.6045438715859932e-05, "loss": 2.2749, "step": 37590 }, { "epoch": 0.48128, "grad_norm": 1.5390625, "learning_rate": 1.603938268221449e-05, "loss": 2.2803, "step": 37600 }, { "epoch": 0.481408, "grad_norm": 1.5078125, "learning_rate": 1.603332647832498e-05, "loss": 2.2617, "step": 37610 }, { "epoch": 0.481536, "grad_norm": 1.578125, "learning_rate": 1.6027270105183372e-05, "loss": 2.3041, "step": 37620 }, { "epoch": 0.481664, "grad_norm": 1.484375, "learning_rate": 1.6021213563781654e-05, "loss": 2.3113, "step": 37630 }, { "epoch": 0.481792, "grad_norm": 1.5703125, "learning_rate": 1.601515685511185e-05, "loss": 2.2773, "step": 37640 }, { "epoch": 0.48192, "grad_norm": 1.6875, "learning_rate": 1.6009099980166008e-05, "loss": 2.2495, "step": 37650 }, { "epoch": 0.482048, "grad_norm": 1.4453125, "learning_rate": 1.600304293993621e-05, "loss": 2.3086, "step": 37660 }, { "epoch": 0.482176, "grad_norm": 1.5859375, "learning_rate": 1.5996985735414552e-05, "loss": 2.2615, "step": 37670 }, { "epoch": 0.482304, "grad_norm": 1.5234375, "learning_rate": 1.5990928367593163e-05, "loss": 2.2731, "step": 37680 }, { "epoch": 0.482432, "grad_norm": 2.4375, "learning_rate": 1.598487083746421e-05, "loss": 2.2733, "step": 37690 }, { "epoch": 0.48256, "grad_norm": 1.59375, "learning_rate": 1.597881314601987e-05, "loss": 2.3343, "step": 37700 }, { "epoch": 0.482688, "grad_norm": 1.671875, "learning_rate": 1.5972755294252355e-05, "loss": 2.3039, "step": 37710 }, { "epoch": 0.482816, "grad_norm": 1.4921875, "learning_rate": 1.5966697283153898e-05, "loss": 2.296, "step": 37720 }, { "epoch": 0.482944, "grad_norm": 1.7734375, "learning_rate": 1.596063911371677e-05, "loss": 2.2686, "step": 37730 }, { "epoch": 0.483072, "grad_norm": 1.515625, "learning_rate": 1.595458078693325e-05, "loss": 2.2578, "step": 37740 }, { "epoch": 0.4832, "grad_norm": 1.859375, "learning_rate": 1.5948522303795653e-05, "loss": 2.2796, "step": 37750 }, { "epoch": 0.483328, "grad_norm": 1.625, "learning_rate": 1.5942463665296324e-05, "loss": 2.253, "step": 37760 }, { "epoch": 0.483456, "grad_norm": 1.75, "learning_rate": 1.5936404872427628e-05, "loss": 2.2915, "step": 37770 }, { "epoch": 0.483584, "grad_norm": 1.6640625, "learning_rate": 1.593034592618195e-05, "loss": 2.2902, "step": 37780 }, { "epoch": 0.483712, "grad_norm": 1.640625, "learning_rate": 1.592428682755171e-05, "loss": 2.2898, "step": 37790 }, { "epoch": 0.48384, "grad_norm": 4.15625, "learning_rate": 1.591822757752935e-05, "loss": 2.2625, "step": 37800 }, { "epoch": 0.483968, "grad_norm": 1.4453125, "learning_rate": 1.5912168177107334e-05, "loss": 2.2903, "step": 37810 }, { "epoch": 0.484096, "grad_norm": 1.6171875, "learning_rate": 1.5906108627278145e-05, "loss": 2.307, "step": 37820 }, { "epoch": 0.484224, "grad_norm": 1.6484375, "learning_rate": 1.5900048929034313e-05, "loss": 2.2823, "step": 37830 }, { "epoch": 0.484352, "grad_norm": 1.6953125, "learning_rate": 1.589398908336836e-05, "loss": 2.294, "step": 37840 }, { "epoch": 0.48448, "grad_norm": 1.4765625, "learning_rate": 1.5887929091272862e-05, "loss": 2.2598, "step": 37850 }, { "epoch": 0.484608, "grad_norm": 1.4453125, "learning_rate": 1.5881868953740396e-05, "loss": 2.282, "step": 37860 }, { "epoch": 0.484736, "grad_norm": 1.5625, "learning_rate": 1.5875808671763582e-05, "loss": 2.2718, "step": 37870 }, { "epoch": 0.484864, "grad_norm": 1.4921875, "learning_rate": 1.586974824633505e-05, "loss": 2.2746, "step": 37880 }, { "epoch": 0.484992, "grad_norm": 1.6484375, "learning_rate": 1.5863687678447454e-05, "loss": 2.2779, "step": 37890 }, { "epoch": 0.48512, "grad_norm": 1.4453125, "learning_rate": 1.585762696909348e-05, "loss": 2.3059, "step": 37900 }, { "epoch": 0.485248, "grad_norm": 1.5859375, "learning_rate": 1.5851566119265833e-05, "loss": 2.3151, "step": 37910 }, { "epoch": 0.485376, "grad_norm": 1.703125, "learning_rate": 1.584550512995724e-05, "loss": 2.2913, "step": 37920 }, { "epoch": 0.485504, "grad_norm": 1.578125, "learning_rate": 1.5839444002160445e-05, "loss": 2.3159, "step": 37930 }, { "epoch": 0.485632, "grad_norm": 1.5546875, "learning_rate": 1.5833382736868232e-05, "loss": 2.2608, "step": 37940 }, { "epoch": 0.48576, "grad_norm": 1.6953125, "learning_rate": 1.582732133507339e-05, "loss": 2.2871, "step": 37950 }, { "epoch": 0.485888, "grad_norm": 1.5390625, "learning_rate": 1.5821259797768733e-05, "loss": 2.2721, "step": 37960 }, { "epoch": 0.486016, "grad_norm": 1.5390625, "learning_rate": 1.5815198125947116e-05, "loss": 2.3042, "step": 37970 }, { "epoch": 0.486144, "grad_norm": 1.421875, "learning_rate": 1.580913632060138e-05, "loss": 2.2945, "step": 37980 }, { "epoch": 0.486272, "grad_norm": 1.6640625, "learning_rate": 1.580307438272442e-05, "loss": 2.3021, "step": 37990 }, { "epoch": 0.4864, "grad_norm": 1.6328125, "learning_rate": 1.5797012313309153e-05, "loss": 2.285, "step": 38000 }, { "epoch": 0.486528, "grad_norm": 1.59375, "learning_rate": 1.5790950113348487e-05, "loss": 2.2739, "step": 38010 }, { "epoch": 0.486656, "grad_norm": 2.921875, "learning_rate": 1.5784887783835378e-05, "loss": 2.2515, "step": 38020 }, { "epoch": 0.486784, "grad_norm": 1.546875, "learning_rate": 1.5778825325762797e-05, "loss": 2.2723, "step": 38030 }, { "epoch": 0.486912, "grad_norm": 1.765625, "learning_rate": 1.5772762740123737e-05, "loss": 2.265, "step": 38040 }, { "epoch": 0.48704, "grad_norm": 1.515625, "learning_rate": 1.5766700027911202e-05, "loss": 2.2945, "step": 38050 }, { "epoch": 0.487168, "grad_norm": 1.625, "learning_rate": 1.576063719011823e-05, "loss": 2.2809, "step": 38060 }, { "epoch": 0.487296, "grad_norm": 1.5859375, "learning_rate": 1.5754574227737878e-05, "loss": 2.2641, "step": 38070 }, { "epoch": 0.487424, "grad_norm": 3.328125, "learning_rate": 1.574851114176321e-05, "loss": 2.293, "step": 38080 }, { "epoch": 0.487552, "grad_norm": 1.5703125, "learning_rate": 1.5742447933187327e-05, "loss": 2.2865, "step": 38090 }, { "epoch": 0.48768, "grad_norm": 1.5859375, "learning_rate": 1.5736384603003338e-05, "loss": 2.2906, "step": 38100 }, { "epoch": 0.487808, "grad_norm": 1.5234375, "learning_rate": 1.5730321152204382e-05, "loss": 2.2864, "step": 38110 }, { "epoch": 0.487936, "grad_norm": 1.484375, "learning_rate": 1.572425758178361e-05, "loss": 2.28, "step": 38120 }, { "epoch": 0.488064, "grad_norm": 1.46875, "learning_rate": 1.5718193892734192e-05, "loss": 2.2804, "step": 38130 }, { "epoch": 0.488192, "grad_norm": 1.71875, "learning_rate": 1.571213008604933e-05, "loss": 2.2709, "step": 38140 }, { "epoch": 0.48832, "grad_norm": 1.515625, "learning_rate": 1.5706066162722224e-05, "loss": 2.2651, "step": 38150 }, { "epoch": 0.488448, "grad_norm": 1.546875, "learning_rate": 1.5700002123746107e-05, "loss": 2.3028, "step": 38160 }, { "epoch": 0.488576, "grad_norm": 1.5546875, "learning_rate": 1.5693937970114234e-05, "loss": 2.2854, "step": 38170 }, { "epoch": 0.488704, "grad_norm": 1.546875, "learning_rate": 1.568787370281987e-05, "loss": 2.2965, "step": 38180 }, { "epoch": 0.488832, "grad_norm": 1.5625, "learning_rate": 1.568180932285631e-05, "loss": 2.2992, "step": 38190 }, { "epoch": 0.48896, "grad_norm": 9.0, "learning_rate": 1.5675744831216847e-05, "loss": 2.2668, "step": 38200 }, { "epoch": 0.489088, "grad_norm": 1.6484375, "learning_rate": 1.5669680228894817e-05, "loss": 2.2957, "step": 38210 }, { "epoch": 0.489216, "grad_norm": 1.59375, "learning_rate": 1.5663615516883548e-05, "loss": 2.2623, "step": 38220 }, { "epoch": 0.489344, "grad_norm": 1.71875, "learning_rate": 1.5657550696176414e-05, "loss": 2.297, "step": 38230 }, { "epoch": 0.489472, "grad_norm": 1.5703125, "learning_rate": 1.5651485767766784e-05, "loss": 2.2543, "step": 38240 }, { "epoch": 0.4896, "grad_norm": 1.4609375, "learning_rate": 1.5645420732648062e-05, "loss": 2.2809, "step": 38250 }, { "epoch": 0.489728, "grad_norm": 1.375, "learning_rate": 1.5639355591813655e-05, "loss": 2.2832, "step": 38260 }, { "epoch": 0.489856, "grad_norm": 1.640625, "learning_rate": 1.5633290346256987e-05, "loss": 2.29, "step": 38270 }, { "epoch": 0.489984, "grad_norm": 1.4921875, "learning_rate": 1.562722499697152e-05, "loss": 2.2725, "step": 38280 }, { "epoch": 0.490112, "grad_norm": 3.671875, "learning_rate": 1.5621159544950705e-05, "loss": 2.3122, "step": 38290 }, { "epoch": 0.49024, "grad_norm": 1.453125, "learning_rate": 1.5615093991188034e-05, "loss": 2.2909, "step": 38300 }, { "epoch": 0.490368, "grad_norm": 1.640625, "learning_rate": 1.5609028336677e-05, "loss": 2.274, "step": 38310 }, { "epoch": 0.490496, "grad_norm": 1.7578125, "learning_rate": 1.5602962582411115e-05, "loss": 2.2882, "step": 38320 }, { "epoch": 0.490624, "grad_norm": 1.671875, "learning_rate": 1.5596896729383912e-05, "loss": 2.2733, "step": 38330 }, { "epoch": 0.490752, "grad_norm": 1.5703125, "learning_rate": 1.559083077858894e-05, "loss": 2.3031, "step": 38340 }, { "epoch": 0.49088, "grad_norm": 1.5078125, "learning_rate": 1.5584764731019766e-05, "loss": 2.3013, "step": 38350 }, { "epoch": 0.491008, "grad_norm": 1.625, "learning_rate": 1.5578698587669956e-05, "loss": 2.2575, "step": 38360 }, { "epoch": 0.491136, "grad_norm": 1.7109375, "learning_rate": 1.557263234953311e-05, "loss": 2.2402, "step": 38370 }, { "epoch": 0.491264, "grad_norm": 1.5625, "learning_rate": 1.556656601760285e-05, "loss": 2.306, "step": 38380 }, { "epoch": 0.491392, "grad_norm": 1.5390625, "learning_rate": 1.556049959287278e-05, "loss": 2.266, "step": 38390 }, { "epoch": 0.49152, "grad_norm": 1.5, "learning_rate": 1.5554433076336552e-05, "loss": 2.2845, "step": 38400 }, { "epoch": 0.491648, "grad_norm": 1.8359375, "learning_rate": 1.554836646898783e-05, "loss": 2.2917, "step": 38410 }, { "epoch": 0.491776, "grad_norm": 1.625, "learning_rate": 1.554229977182027e-05, "loss": 2.2921, "step": 38420 }, { "epoch": 0.491904, "grad_norm": 1.53125, "learning_rate": 1.5536232985827562e-05, "loss": 2.313, "step": 38430 }, { "epoch": 0.492032, "grad_norm": 1.7421875, "learning_rate": 1.5530166112003407e-05, "loss": 2.2935, "step": 38440 }, { "epoch": 0.49216, "grad_norm": 1.703125, "learning_rate": 1.552409915134152e-05, "loss": 2.2892, "step": 38450 }, { "epoch": 0.492288, "grad_norm": 2.21875, "learning_rate": 1.551803210483563e-05, "loss": 2.2733, "step": 38460 }, { "epoch": 0.492416, "grad_norm": 1.671875, "learning_rate": 1.5511964973479477e-05, "loss": 2.2747, "step": 38470 }, { "epoch": 0.492544, "grad_norm": 1.53125, "learning_rate": 1.5505897758266813e-05, "loss": 2.2667, "step": 38480 }, { "epoch": 0.492672, "grad_norm": 1.53125, "learning_rate": 1.5499830460191424e-05, "loss": 2.2958, "step": 38490 }, { "epoch": 0.4928, "grad_norm": 1.5078125, "learning_rate": 1.5493763080247078e-05, "loss": 2.2498, "step": 38500 }, { "epoch": 0.492928, "grad_norm": 1.5703125, "learning_rate": 1.548769561942757e-05, "loss": 2.2651, "step": 38510 }, { "epoch": 0.493056, "grad_norm": 1.6171875, "learning_rate": 1.5481628078726726e-05, "loss": 2.2979, "step": 38520 }, { "epoch": 0.493184, "grad_norm": 1.6015625, "learning_rate": 1.5475560459138358e-05, "loss": 2.2888, "step": 38530 }, { "epoch": 0.493312, "grad_norm": 1.59375, "learning_rate": 1.54694927616563e-05, "loss": 2.2714, "step": 38540 }, { "epoch": 0.49344, "grad_norm": 1.6328125, "learning_rate": 1.5463424987274407e-05, "loss": 2.2663, "step": 38550 }, { "epoch": 0.493568, "grad_norm": 8.625, "learning_rate": 1.5457357136986547e-05, "loss": 2.3145, "step": 38560 }, { "epoch": 0.493696, "grad_norm": 6.0, "learning_rate": 1.5451289211786584e-05, "loss": 2.266, "step": 38570 }, { "epoch": 0.493824, "grad_norm": 2.609375, "learning_rate": 1.54452212126684e-05, "loss": 2.2923, "step": 38580 }, { "epoch": 0.493952, "grad_norm": 1.6015625, "learning_rate": 1.543915314062591e-05, "loss": 2.3016, "step": 38590 }, { "epoch": 0.49408, "grad_norm": 1.65625, "learning_rate": 1.543308499665301e-05, "loss": 2.2981, "step": 38600 }, { "epoch": 0.494208, "grad_norm": 1.6875, "learning_rate": 1.5427016781743628e-05, "loss": 2.2618, "step": 38610 }, { "epoch": 0.494336, "grad_norm": 1.578125, "learning_rate": 1.54209484968917e-05, "loss": 2.2628, "step": 38620 }, { "epoch": 0.494464, "grad_norm": 1.578125, "learning_rate": 1.541488014309116e-05, "loss": 2.2785, "step": 38630 }, { "epoch": 0.494592, "grad_norm": 1.4921875, "learning_rate": 1.540881172133598e-05, "loss": 2.2883, "step": 38640 }, { "epoch": 0.49472, "grad_norm": 1.65625, "learning_rate": 1.5402743232620115e-05, "loss": 2.2726, "step": 38650 }, { "epoch": 0.494848, "grad_norm": 1.4453125, "learning_rate": 1.5396674677937554e-05, "loss": 2.2538, "step": 38660 }, { "epoch": 0.494976, "grad_norm": 1.9140625, "learning_rate": 1.5390606058282278e-05, "loss": 2.2867, "step": 38670 }, { "epoch": 0.495104, "grad_norm": 1.8125, "learning_rate": 1.5384537374648293e-05, "loss": 2.2483, "step": 38680 }, { "epoch": 0.495232, "grad_norm": 1.6796875, "learning_rate": 1.5378468628029612e-05, "loss": 2.2644, "step": 38690 }, { "epoch": 0.49536, "grad_norm": 1.625, "learning_rate": 1.5372399819420242e-05, "loss": 2.2725, "step": 38700 }, { "epoch": 0.495488, "grad_norm": 1.71875, "learning_rate": 1.5366330949814227e-05, "loss": 2.2711, "step": 38710 }, { "epoch": 0.495616, "grad_norm": 1.5078125, "learning_rate": 1.5360262020205604e-05, "loss": 2.2782, "step": 38720 }, { "epoch": 0.495744, "grad_norm": 1.53125, "learning_rate": 1.5354193031588427e-05, "loss": 2.317, "step": 38730 }, { "epoch": 0.495872, "grad_norm": 1.5234375, "learning_rate": 1.534812398495675e-05, "loss": 2.2845, "step": 38740 }, { "epoch": 0.496, "grad_norm": 1.765625, "learning_rate": 1.5342054881304647e-05, "loss": 2.2842, "step": 38750 }, { "epoch": 0.496128, "grad_norm": 1.59375, "learning_rate": 1.5335985721626202e-05, "loss": 2.2474, "step": 38760 }, { "epoch": 0.496256, "grad_norm": 1.578125, "learning_rate": 1.5329916506915494e-05, "loss": 2.2546, "step": 38770 }, { "epoch": 0.496384, "grad_norm": 1.5859375, "learning_rate": 1.5323847238166626e-05, "loss": 2.2597, "step": 38780 }, { "epoch": 0.496512, "grad_norm": 1.6875, "learning_rate": 1.53177779163737e-05, "loss": 2.2872, "step": 38790 }, { "epoch": 0.49664, "grad_norm": 1.6953125, "learning_rate": 1.5311708542530843e-05, "loss": 2.2613, "step": 38800 }, { "epoch": 0.496768, "grad_norm": 1.765625, "learning_rate": 1.5305639117632163e-05, "loss": 2.3062, "step": 38810 }, { "epoch": 0.496896, "grad_norm": 1.53125, "learning_rate": 1.5299569642671802e-05, "loss": 2.2871, "step": 38820 }, { "epoch": 0.497024, "grad_norm": 1.625, "learning_rate": 1.5293500118643903e-05, "loss": 2.2459, "step": 38830 }, { "epoch": 0.497152, "grad_norm": 1.65625, "learning_rate": 1.5287430546542603e-05, "loss": 2.2818, "step": 38840 }, { "epoch": 0.49728, "grad_norm": 1.5703125, "learning_rate": 1.5281360927362065e-05, "loss": 2.2552, "step": 38850 }, { "epoch": 0.497408, "grad_norm": 1.5, "learning_rate": 1.527529126209645e-05, "loss": 2.2963, "step": 38860 }, { "epoch": 0.497536, "grad_norm": 2.765625, "learning_rate": 1.5269221551739942e-05, "loss": 2.2856, "step": 38870 }, { "epoch": 0.497664, "grad_norm": 1.578125, "learning_rate": 1.5263151797286698e-05, "loss": 2.3038, "step": 38880 }, { "epoch": 0.497792, "grad_norm": 1.578125, "learning_rate": 1.5257081999730914e-05, "loss": 2.2968, "step": 38890 }, { "epoch": 0.49792, "grad_norm": 2.625, "learning_rate": 1.5251012160066789e-05, "loss": 2.2822, "step": 38900 }, { "epoch": 0.498048, "grad_norm": 1.46875, "learning_rate": 1.524494227928852e-05, "loss": 2.2875, "step": 38910 }, { "epoch": 0.498176, "grad_norm": 1.4921875, "learning_rate": 1.5238872358390305e-05, "loss": 2.2851, "step": 38920 }, { "epoch": 0.498304, "grad_norm": 1.84375, "learning_rate": 1.523280239836637e-05, "loss": 2.2702, "step": 38930 }, { "epoch": 0.498432, "grad_norm": 1.5, "learning_rate": 1.5226732400210925e-05, "loss": 2.2728, "step": 38940 }, { "epoch": 0.49856, "grad_norm": 1.7109375, "learning_rate": 1.52206623649182e-05, "loss": 2.2719, "step": 38950 }, { "epoch": 0.498688, "grad_norm": 1.6796875, "learning_rate": 1.5214592293482427e-05, "loss": 2.2702, "step": 38960 }, { "epoch": 0.498816, "grad_norm": 1.515625, "learning_rate": 1.5208522186897842e-05, "loss": 2.286, "step": 38970 }, { "epoch": 0.498944, "grad_norm": 1.7109375, "learning_rate": 1.520245204615869e-05, "loss": 2.2919, "step": 38980 }, { "epoch": 0.499072, "grad_norm": 1.828125, "learning_rate": 1.5196381872259224e-05, "loss": 2.2853, "step": 38990 }, { "epoch": 0.4992, "grad_norm": 1.6171875, "learning_rate": 1.5190311666193693e-05, "loss": 2.3216, "step": 39000 }, { "epoch": 0.499328, "grad_norm": 1.6328125, "learning_rate": 1.5184241428956353e-05, "loss": 2.2725, "step": 39010 }, { "epoch": 0.499456, "grad_norm": 1.6171875, "learning_rate": 1.5178171161541484e-05, "loss": 2.2721, "step": 39020 }, { "epoch": 0.499584, "grad_norm": 1.6484375, "learning_rate": 1.5172100864943339e-05, "loss": 2.2933, "step": 39030 }, { "epoch": 0.499712, "grad_norm": 1.578125, "learning_rate": 1.516603054015621e-05, "loss": 2.2978, "step": 39040 }, { "epoch": 0.49984, "grad_norm": 2.5625, "learning_rate": 1.5159960188174357e-05, "loss": 2.2906, "step": 39050 }, { "epoch": 0.499968, "grad_norm": 1.484375, "learning_rate": 1.5153889809992082e-05, "loss": 2.2991, "step": 39060 }, { "epoch": 0.500096, "grad_norm": 12.25, "learning_rate": 1.5147819406603663e-05, "loss": 2.2866, "step": 39070 }, { "epoch": 0.500224, "grad_norm": 1.53125, "learning_rate": 1.514174897900339e-05, "loss": 2.2838, "step": 39080 }, { "epoch": 0.500352, "grad_norm": 1.578125, "learning_rate": 1.5135678528185568e-05, "loss": 2.2605, "step": 39090 }, { "epoch": 0.50048, "grad_norm": 1.53125, "learning_rate": 1.5129608055144491e-05, "loss": 2.2608, "step": 39100 }, { "epoch": 0.500608, "grad_norm": 1.4140625, "learning_rate": 1.5123537560874465e-05, "loss": 2.3363, "step": 39110 }, { "epoch": 0.500736, "grad_norm": 1.6171875, "learning_rate": 1.5117467046369796e-05, "loss": 2.3207, "step": 39120 }, { "epoch": 0.500864, "grad_norm": 1.40625, "learning_rate": 1.5111396512624792e-05, "loss": 2.2688, "step": 39130 }, { "epoch": 0.500992, "grad_norm": 1.4609375, "learning_rate": 1.510532596063378e-05, "loss": 2.3031, "step": 39140 }, { "epoch": 0.50112, "grad_norm": 1.59375, "learning_rate": 1.5099255391391055e-05, "loss": 2.2881, "step": 39150 }, { "epoch": 0.501248, "grad_norm": 1.4921875, "learning_rate": 1.509318480589095e-05, "loss": 2.2672, "step": 39160 }, { "epoch": 0.501376, "grad_norm": 1.4921875, "learning_rate": 1.5087114205127788e-05, "loss": 2.258, "step": 39170 }, { "epoch": 0.501504, "grad_norm": 1.5078125, "learning_rate": 1.508104359009589e-05, "loss": 2.262, "step": 39180 }, { "epoch": 0.501632, "grad_norm": 1.5390625, "learning_rate": 1.5074972961789581e-05, "loss": 2.2646, "step": 39190 }, { "epoch": 0.50176, "grad_norm": 1.5703125, "learning_rate": 1.5068902321203192e-05, "loss": 2.3034, "step": 39200 }, { "epoch": 0.501888, "grad_norm": 1.8515625, "learning_rate": 1.506283166933106e-05, "loss": 2.2758, "step": 39210 }, { "epoch": 0.502016, "grad_norm": 1.4921875, "learning_rate": 1.505676100716751e-05, "loss": 2.2594, "step": 39220 }, { "epoch": 0.502144, "grad_norm": 1.5625, "learning_rate": 1.5050690335706878e-05, "loss": 2.271, "step": 39230 }, { "epoch": 0.502272, "grad_norm": 1.625, "learning_rate": 1.5044619655943499e-05, "loss": 2.2793, "step": 39240 }, { "epoch": 0.5024, "grad_norm": 1.515625, "learning_rate": 1.5038548968871718e-05, "loss": 2.2731, "step": 39250 }, { "epoch": 0.502528, "grad_norm": 1.5703125, "learning_rate": 1.5032478275485865e-05, "loss": 2.2786, "step": 39260 }, { "epoch": 0.502656, "grad_norm": 1.4609375, "learning_rate": 1.5026407576780284e-05, "loss": 2.2791, "step": 39270 }, { "epoch": 0.502784, "grad_norm": 1.515625, "learning_rate": 1.5020336873749314e-05, "loss": 2.2585, "step": 39280 }, { "epoch": 0.502912, "grad_norm": 1.6484375, "learning_rate": 1.50142661673873e-05, "loss": 2.2986, "step": 39290 }, { "epoch": 0.50304, "grad_norm": 1.671875, "learning_rate": 1.5008195458688581e-05, "loss": 2.3079, "step": 39300 }, { "epoch": 0.503168, "grad_norm": 1.625, "learning_rate": 1.50021247486475e-05, "loss": 2.3089, "step": 39310 }, { "epoch": 0.503296, "grad_norm": 1.4765625, "learning_rate": 1.4996054038258396e-05, "loss": 2.2857, "step": 39320 }, { "epoch": 0.503424, "grad_norm": 1.5234375, "learning_rate": 1.4989983328515617e-05, "loss": 2.2658, "step": 39330 }, { "epoch": 0.503552, "grad_norm": 1.5625, "learning_rate": 1.4983912620413504e-05, "loss": 2.2758, "step": 39340 }, { "epoch": 0.50368, "grad_norm": 1.5625, "learning_rate": 1.4977841914946392e-05, "loss": 2.2902, "step": 39350 }, { "epoch": 0.503808, "grad_norm": 2.15625, "learning_rate": 1.4971771213108635e-05, "loss": 2.2554, "step": 39360 }, { "epoch": 0.503936, "grad_norm": 1.734375, "learning_rate": 1.4965700515894573e-05, "loss": 2.3102, "step": 39370 }, { "epoch": 0.504064, "grad_norm": 1.640625, "learning_rate": 1.495962982429853e-05, "loss": 2.2655, "step": 39380 }, { "epoch": 0.504192, "grad_norm": 1.5, "learning_rate": 1.4953559139314865e-05, "loss": 2.3187, "step": 39390 }, { "epoch": 0.50432, "grad_norm": 1.484375, "learning_rate": 1.4947488461937908e-05, "loss": 2.296, "step": 39400 }, { "epoch": 0.504448, "grad_norm": 1.5234375, "learning_rate": 1.4941417793161991e-05, "loss": 2.2806, "step": 39410 }, { "epoch": 0.504576, "grad_norm": 1.65625, "learning_rate": 1.4935347133981462e-05, "loss": 2.3225, "step": 39420 }, { "epoch": 0.504704, "grad_norm": 1.640625, "learning_rate": 1.4929276485390647e-05, "loss": 2.2389, "step": 39430 }, { "epoch": 0.504832, "grad_norm": 1.4375, "learning_rate": 1.4923205848383877e-05, "loss": 2.2833, "step": 39440 }, { "epoch": 0.50496, "grad_norm": 1.578125, "learning_rate": 1.491713522395549e-05, "loss": 2.2606, "step": 39450 }, { "epoch": 0.505088, "grad_norm": 1.7734375, "learning_rate": 1.4911064613099803e-05, "loss": 2.2567, "step": 39460 }, { "epoch": 0.505216, "grad_norm": 1.5703125, "learning_rate": 1.4904994016811155e-05, "loss": 2.2899, "step": 39470 }, { "epoch": 0.505344, "grad_norm": 1.671875, "learning_rate": 1.4898923436083868e-05, "loss": 2.2948, "step": 39480 }, { "epoch": 0.505472, "grad_norm": 2.359375, "learning_rate": 1.4892852871912248e-05, "loss": 2.2832, "step": 39490 }, { "epoch": 0.5056, "grad_norm": 1.5234375, "learning_rate": 1.4886782325290634e-05, "loss": 2.2597, "step": 39500 }, { "epoch": 0.505728, "grad_norm": 1.4140625, "learning_rate": 1.488071179721333e-05, "loss": 2.271, "step": 39510 }, { "epoch": 0.505856, "grad_norm": 1.5625, "learning_rate": 1.4874641288674651e-05, "loss": 2.2854, "step": 39520 }, { "epoch": 0.505984, "grad_norm": 1.6328125, "learning_rate": 1.486857080066891e-05, "loss": 2.2551, "step": 39530 }, { "epoch": 0.506112, "grad_norm": 1.484375, "learning_rate": 1.4862500334190407e-05, "loss": 2.2853, "step": 39540 }, { "epoch": 0.50624, "grad_norm": 1.4921875, "learning_rate": 1.4856429890233447e-05, "loss": 2.2497, "step": 39550 }, { "epoch": 0.506368, "grad_norm": 1.5546875, "learning_rate": 1.4850359469792334e-05, "loss": 2.2592, "step": 39560 }, { "epoch": 0.506496, "grad_norm": 1.7734375, "learning_rate": 1.4844289073861357e-05, "loss": 2.3023, "step": 39570 }, { "epoch": 0.506624, "grad_norm": 1.625, "learning_rate": 1.4838218703434802e-05, "loss": 2.2958, "step": 39580 }, { "epoch": 0.506752, "grad_norm": 1.71875, "learning_rate": 1.483214835950697e-05, "loss": 2.2775, "step": 39590 }, { "epoch": 0.50688, "grad_norm": 1.515625, "learning_rate": 1.4826078043072132e-05, "loss": 2.2794, "step": 39600 }, { "epoch": 0.507008, "grad_norm": 1.71875, "learning_rate": 1.4820007755124575e-05, "loss": 2.2721, "step": 39610 }, { "epoch": 0.507136, "grad_norm": 40.0, "learning_rate": 1.4813937496658567e-05, "loss": 2.2296, "step": 39620 }, { "epoch": 0.507264, "grad_norm": 1.5, "learning_rate": 1.4807867268668372e-05, "loss": 2.2937, "step": 39630 }, { "epoch": 0.507392, "grad_norm": 1.6640625, "learning_rate": 1.4801797072148265e-05, "loss": 2.2716, "step": 39640 }, { "epoch": 0.50752, "grad_norm": 1.4921875, "learning_rate": 1.4795726908092495e-05, "loss": 2.2764, "step": 39650 }, { "epoch": 0.507648, "grad_norm": 1.6015625, "learning_rate": 1.4789656777495318e-05, "loss": 2.2584, "step": 39660 }, { "epoch": 0.507776, "grad_norm": 1.6484375, "learning_rate": 1.4783586681350982e-05, "loss": 2.2533, "step": 39670 }, { "epoch": 0.507904, "grad_norm": 1.578125, "learning_rate": 1.477751662065373e-05, "loss": 2.2651, "step": 39680 }, { "epoch": 0.508032, "grad_norm": 1.5390625, "learning_rate": 1.477144659639779e-05, "loss": 2.3038, "step": 39690 }, { "epoch": 0.50816, "grad_norm": 1.5859375, "learning_rate": 1.4765376609577407e-05, "loss": 2.2506, "step": 39700 }, { "epoch": 0.508288, "grad_norm": 1.5, "learning_rate": 1.4759306661186796e-05, "loss": 2.2781, "step": 39710 }, { "epoch": 0.508416, "grad_norm": 1.5078125, "learning_rate": 1.4753236752220172e-05, "loss": 2.2957, "step": 39720 }, { "epoch": 0.508544, "grad_norm": 1.5234375, "learning_rate": 1.4747166883671752e-05, "loss": 2.2546, "step": 39730 }, { "epoch": 0.508672, "grad_norm": 1.59375, "learning_rate": 1.4741097056535743e-05, "loss": 2.2684, "step": 39740 }, { "epoch": 0.5088, "grad_norm": 1.5703125, "learning_rate": 1.4735027271806331e-05, "loss": 2.3187, "step": 39750 }, { "epoch": 0.508928, "grad_norm": 1.4453125, "learning_rate": 1.472895753047772e-05, "loss": 2.3012, "step": 39760 }, { "epoch": 0.509056, "grad_norm": 1.5859375, "learning_rate": 1.472288783354408e-05, "loss": 2.2819, "step": 39770 }, { "epoch": 0.509184, "grad_norm": 1.5078125, "learning_rate": 1.4716818181999608e-05, "loss": 2.276, "step": 39780 }, { "epoch": 0.509312, "grad_norm": 1.46875, "learning_rate": 1.4710748576838457e-05, "loss": 2.2345, "step": 39790 }, { "epoch": 0.50944, "grad_norm": 1.59375, "learning_rate": 1.4704679019054786e-05, "loss": 2.2809, "step": 39800 }, { "epoch": 0.509568, "grad_norm": 1.5390625, "learning_rate": 1.4698609509642763e-05, "loss": 2.2908, "step": 39810 }, { "epoch": 0.509696, "grad_norm": 1.5625, "learning_rate": 1.4692540049596523e-05, "loss": 2.3103, "step": 39820 }, { "epoch": 0.509824, "grad_norm": 1.515625, "learning_rate": 1.4686470639910207e-05, "loss": 2.2686, "step": 39830 }, { "epoch": 0.509952, "grad_norm": 1.484375, "learning_rate": 1.4680401281577947e-05, "loss": 2.2953, "step": 39840 }, { "epoch": 0.51008, "grad_norm": 5.53125, "learning_rate": 1.467433197559386e-05, "loss": 2.2927, "step": 39850 }, { "epoch": 0.510208, "grad_norm": 1.6328125, "learning_rate": 1.4668262722952059e-05, "loss": 2.2864, "step": 39860 }, { "epoch": 0.510336, "grad_norm": 1.5546875, "learning_rate": 1.466219352464665e-05, "loss": 2.2747, "step": 39870 }, { "epoch": 0.510464, "grad_norm": 1.640625, "learning_rate": 1.4656124381671729e-05, "loss": 2.2845, "step": 39880 }, { "epoch": 0.510592, "grad_norm": 1.5078125, "learning_rate": 1.4650055295021373e-05, "loss": 2.2729, "step": 39890 }, { "epoch": 0.51072, "grad_norm": 1.5, "learning_rate": 1.4643986265689674e-05, "loss": 2.2454, "step": 39900 }, { "epoch": 0.510848, "grad_norm": 1.671875, "learning_rate": 1.4637917294670681e-05, "loss": 2.3068, "step": 39910 }, { "epoch": 0.510976, "grad_norm": 1.71875, "learning_rate": 1.4631848382958467e-05, "loss": 2.2418, "step": 39920 }, { "epoch": 0.511104, "grad_norm": 1.6015625, "learning_rate": 1.4625779531547075e-05, "loss": 2.2846, "step": 39930 }, { "epoch": 0.511232, "grad_norm": 1.5546875, "learning_rate": 1.4619710741430537e-05, "loss": 2.2855, "step": 39940 }, { "epoch": 0.51136, "grad_norm": 1.6953125, "learning_rate": 1.4613642013602892e-05, "loss": 2.2702, "step": 39950 }, { "epoch": 0.511488, "grad_norm": 1.53125, "learning_rate": 1.4607573349058149e-05, "loss": 2.2574, "step": 39960 }, { "epoch": 0.511616, "grad_norm": 1.7578125, "learning_rate": 1.4601504748790319e-05, "loss": 2.2914, "step": 39970 }, { "epoch": 0.511744, "grad_norm": 1.671875, "learning_rate": 1.4595436213793399e-05, "loss": 2.2775, "step": 39980 }, { "epoch": 0.511872, "grad_norm": 1.5703125, "learning_rate": 1.4589367745061374e-05, "loss": 2.247, "step": 39990 }, { "epoch": 0.512, "grad_norm": 1.625, "learning_rate": 1.4583299343588216e-05, "loss": 2.2816, "step": 40000 }, { "epoch": 0.512128, "grad_norm": 2.703125, "learning_rate": 1.45772310103679e-05, "loss": 2.263, "step": 40010 }, { "epoch": 0.512256, "grad_norm": 1.640625, "learning_rate": 1.4571162746394372e-05, "loss": 2.306, "step": 40020 }, { "epoch": 0.512384, "grad_norm": 1.5625, "learning_rate": 1.4565094552661567e-05, "loss": 2.2927, "step": 40030 }, { "epoch": 0.512512, "grad_norm": 1.453125, "learning_rate": 1.455902643016343e-05, "loss": 2.2845, "step": 40040 }, { "epoch": 0.51264, "grad_norm": 1.6875, "learning_rate": 1.4552958379893871e-05, "loss": 2.2791, "step": 40050 }, { "epoch": 0.512768, "grad_norm": 1.6796875, "learning_rate": 1.4546890402846795e-05, "loss": 2.3003, "step": 40060 }, { "epoch": 0.512896, "grad_norm": 1.546875, "learning_rate": 1.4540822500016105e-05, "loss": 2.2852, "step": 40070 }, { "epoch": 0.513024, "grad_norm": 1.6953125, "learning_rate": 1.4534754672395675e-05, "loss": 2.3078, "step": 40080 }, { "epoch": 0.513152, "grad_norm": 1.640625, "learning_rate": 1.4528686920979381e-05, "loss": 2.303, "step": 40090 }, { "epoch": 0.51328, "grad_norm": 1.90625, "learning_rate": 1.4522619246761081e-05, "loss": 2.2976, "step": 40100 }, { "epoch": 0.513408, "grad_norm": 1.625, "learning_rate": 1.4516551650734612e-05, "loss": 2.2731, "step": 40110 }, { "epoch": 0.513536, "grad_norm": 1.4921875, "learning_rate": 1.4510484133893819e-05, "loss": 2.2695, "step": 40120 }, { "epoch": 0.513664, "grad_norm": 1.6171875, "learning_rate": 1.4504416697232518e-05, "loss": 2.2677, "step": 40130 }, { "epoch": 0.513792, "grad_norm": 3.515625, "learning_rate": 1.4498349341744503e-05, "loss": 2.3002, "step": 40140 }, { "epoch": 0.51392, "grad_norm": 1.4921875, "learning_rate": 1.4492282068423583e-05, "loss": 2.304, "step": 40150 }, { "epoch": 0.514048, "grad_norm": 1.5625, "learning_rate": 1.4486214878263532e-05, "loss": 2.3011, "step": 40160 }, { "epoch": 0.514176, "grad_norm": 1.4453125, "learning_rate": 1.4480147772258111e-05, "loss": 2.2757, "step": 40170 }, { "epoch": 0.514304, "grad_norm": 1.5234375, "learning_rate": 1.447408075140108e-05, "loss": 2.261, "step": 40180 }, { "epoch": 0.514432, "grad_norm": 1.53125, "learning_rate": 1.4468013816686174e-05, "loss": 2.2592, "step": 40190 }, { "epoch": 0.51456, "grad_norm": 1.640625, "learning_rate": 1.4461946969107114e-05, "loss": 2.2974, "step": 40200 }, { "epoch": 0.514688, "grad_norm": 1.5546875, "learning_rate": 1.4455880209657614e-05, "loss": 2.2716, "step": 40210 }, { "epoch": 0.514816, "grad_norm": 1.7109375, "learning_rate": 1.4449813539331364e-05, "loss": 2.2769, "step": 40220 }, { "epoch": 0.514944, "grad_norm": 1.578125, "learning_rate": 1.4443746959122053e-05, "loss": 2.247, "step": 40230 }, { "epoch": 0.515072, "grad_norm": 1.46875, "learning_rate": 1.4437680470023349e-05, "loss": 2.2704, "step": 40240 }, { "epoch": 0.5152, "grad_norm": 1.578125, "learning_rate": 1.4431614073028886e-05, "loss": 2.3081, "step": 40250 }, { "epoch": 0.515328, "grad_norm": 1.484375, "learning_rate": 1.4425547769132316e-05, "loss": 2.2666, "step": 40260 }, { "epoch": 0.515456, "grad_norm": 1.4921875, "learning_rate": 1.4419481559327259e-05, "loss": 2.2519, "step": 40270 }, { "epoch": 0.515584, "grad_norm": 1.5390625, "learning_rate": 1.4413415444607309e-05, "loss": 2.2828, "step": 40280 }, { "epoch": 0.515712, "grad_norm": 1.6171875, "learning_rate": 1.440734942596607e-05, "loss": 2.2891, "step": 40290 }, { "epoch": 0.51584, "grad_norm": 1.671875, "learning_rate": 1.4401283504397106e-05, "loss": 2.2738, "step": 40300 }, { "epoch": 0.515968, "grad_norm": 1.859375, "learning_rate": 1.4395217680893978e-05, "loss": 2.3031, "step": 40310 }, { "epoch": 0.516096, "grad_norm": 1.71875, "learning_rate": 1.438915195645023e-05, "loss": 2.3101, "step": 40320 }, { "epoch": 0.516224, "grad_norm": 2.484375, "learning_rate": 1.4383086332059387e-05, "loss": 2.2844, "step": 40330 }, { "epoch": 0.516352, "grad_norm": 1.4375, "learning_rate": 1.437702080871495e-05, "loss": 2.2819, "step": 40340 }, { "epoch": 0.51648, "grad_norm": 1.4765625, "learning_rate": 1.4370955387410427e-05, "loss": 2.2804, "step": 40350 }, { "epoch": 0.516608, "grad_norm": 1.5625, "learning_rate": 1.4364890069139293e-05, "loss": 2.264, "step": 40360 }, { "epoch": 0.516736, "grad_norm": 1.6328125, "learning_rate": 1.4358824854894988e-05, "loss": 2.2704, "step": 40370 }, { "epoch": 0.516864, "grad_norm": 1.46875, "learning_rate": 1.4352759745670976e-05, "loss": 2.2875, "step": 40380 }, { "epoch": 0.516992, "grad_norm": 1.4609375, "learning_rate": 1.4346694742460672e-05, "loss": 2.3133, "step": 40390 }, { "epoch": 0.51712, "grad_norm": 1.484375, "learning_rate": 1.4340629846257487e-05, "loss": 2.2871, "step": 40400 }, { "epoch": 0.517248, "grad_norm": 1.65625, "learning_rate": 1.433456505805481e-05, "loss": 2.3295, "step": 40410 }, { "epoch": 0.517376, "grad_norm": 2.359375, "learning_rate": 1.4328500378846013e-05, "loss": 2.3007, "step": 40420 }, { "epoch": 0.517504, "grad_norm": 1.5859375, "learning_rate": 1.4322435809624451e-05, "loss": 2.3104, "step": 40430 }, { "epoch": 0.517632, "grad_norm": 1.640625, "learning_rate": 1.4316371351383463e-05, "loss": 2.2877, "step": 40440 }, { "epoch": 0.51776, "grad_norm": 1.9609375, "learning_rate": 1.4310307005116359e-05, "loss": 2.2432, "step": 40450 }, { "epoch": 0.517888, "grad_norm": 1.6640625, "learning_rate": 1.430424277181645e-05, "loss": 2.3163, "step": 40460 }, { "epoch": 0.518016, "grad_norm": 1.3984375, "learning_rate": 1.4298178652477014e-05, "loss": 2.2868, "step": 40470 }, { "epoch": 0.518144, "grad_norm": 1.4921875, "learning_rate": 1.429211464809131e-05, "loss": 2.2883, "step": 40480 }, { "epoch": 0.518272, "grad_norm": 1.5546875, "learning_rate": 1.428605075965259e-05, "loss": 2.2912, "step": 40490 }, { "epoch": 0.5184, "grad_norm": 1.5546875, "learning_rate": 1.4279986988154073e-05, "loss": 2.3004, "step": 40500 }, { "epoch": 0.518528, "grad_norm": 1.890625, "learning_rate": 1.4273923334588965e-05, "loss": 2.2813, "step": 40510 }, { "epoch": 0.518656, "grad_norm": 1.5546875, "learning_rate": 1.4267859799950454e-05, "loss": 2.2981, "step": 40520 }, { "epoch": 0.518784, "grad_norm": 1.5390625, "learning_rate": 1.4261796385231704e-05, "loss": 2.264, "step": 40530 }, { "epoch": 0.518912, "grad_norm": 1.5546875, "learning_rate": 1.4255733091425875e-05, "loss": 2.2565, "step": 40540 }, { "epoch": 0.51904, "grad_norm": 1.6796875, "learning_rate": 1.4249669919526081e-05, "loss": 2.2775, "step": 40550 }, { "epoch": 0.519168, "grad_norm": 1.5859375, "learning_rate": 1.424360687052543e-05, "loss": 2.2724, "step": 40560 }, { "epoch": 0.519296, "grad_norm": 2.0625, "learning_rate": 1.423754394541702e-05, "loss": 2.2662, "step": 40570 }, { "epoch": 0.519424, "grad_norm": 1.671875, "learning_rate": 1.4231481145193913e-05, "loss": 2.2772, "step": 40580 }, { "epoch": 0.519552, "grad_norm": 1.6796875, "learning_rate": 1.4225418470849153e-05, "loss": 2.2662, "step": 40590 }, { "epoch": 0.51968, "grad_norm": 1.6171875, "learning_rate": 1.421935592337577e-05, "loss": 2.287, "step": 40600 }, { "epoch": 0.519808, "grad_norm": 1.4453125, "learning_rate": 1.4213293503766768e-05, "loss": 2.3076, "step": 40610 }, { "epoch": 0.519936, "grad_norm": 2.359375, "learning_rate": 1.420723121301513e-05, "loss": 2.2684, "step": 40620 }, { "epoch": 0.520064, "grad_norm": 1.515625, "learning_rate": 1.4201169052113826e-05, "loss": 2.2756, "step": 40630 }, { "epoch": 0.520192, "grad_norm": 1.5078125, "learning_rate": 1.419510702205579e-05, "loss": 2.2876, "step": 40640 }, { "epoch": 0.52032, "grad_norm": 1.640625, "learning_rate": 1.4189045123833944e-05, "loss": 2.2981, "step": 40650 }, { "epoch": 0.520448, "grad_norm": 1.6171875, "learning_rate": 1.4182983358441197e-05, "loss": 2.2624, "step": 40660 }, { "epoch": 0.520576, "grad_norm": 1.46875, "learning_rate": 1.4176921726870418e-05, "loss": 2.2736, "step": 40670 }, { "epoch": 0.520704, "grad_norm": 1.5078125, "learning_rate": 1.4170860230114455e-05, "loss": 2.273, "step": 40680 }, { "epoch": 0.520832, "grad_norm": 2.0, "learning_rate": 1.4164798869166157e-05, "loss": 2.2878, "step": 40690 }, { "epoch": 0.52096, "grad_norm": 1.625, "learning_rate": 1.4158737645018325e-05, "loss": 2.2752, "step": 40700 }, { "epoch": 0.521088, "grad_norm": 2.09375, "learning_rate": 1.4152676558663754e-05, "loss": 2.3102, "step": 40710 }, { "epoch": 0.521216, "grad_norm": 1.59375, "learning_rate": 1.4146615611095207e-05, "loss": 2.2864, "step": 40720 }, { "epoch": 0.521344, "grad_norm": 1.6328125, "learning_rate": 1.4140554803305424e-05, "loss": 2.2594, "step": 40730 }, { "epoch": 0.521472, "grad_norm": 1.5703125, "learning_rate": 1.4134494136287134e-05, "loss": 2.2799, "step": 40740 }, { "epoch": 0.5216, "grad_norm": 1.703125, "learning_rate": 1.4128433611033028e-05, "loss": 2.2794, "step": 40750 }, { "epoch": 0.521728, "grad_norm": 1.5, "learning_rate": 1.4122373228535776e-05, "loss": 2.2833, "step": 40760 }, { "epoch": 0.521856, "grad_norm": 1.6875, "learning_rate": 1.4116312989788045e-05, "loss": 2.2968, "step": 40770 }, { "epoch": 0.521984, "grad_norm": 1.6484375, "learning_rate": 1.411025289578245e-05, "loss": 2.2902, "step": 40780 }, { "epoch": 0.522112, "grad_norm": 1.6484375, "learning_rate": 1.410419294751159e-05, "loss": 2.293, "step": 40790 }, { "epoch": 0.52224, "grad_norm": 1.5703125, "learning_rate": 1.4098133145968056e-05, "loss": 2.3038, "step": 40800 }, { "epoch": 0.522368, "grad_norm": 1.5078125, "learning_rate": 1.40920734921444e-05, "loss": 2.2805, "step": 40810 }, { "epoch": 0.522496, "grad_norm": 7.53125, "learning_rate": 1.408601398703315e-05, "loss": 2.2985, "step": 40820 }, { "epoch": 0.522624, "grad_norm": 1.578125, "learning_rate": 1.4079954631626822e-05, "loss": 2.29, "step": 40830 }, { "epoch": 0.522752, "grad_norm": 1.5078125, "learning_rate": 1.4073895426917889e-05, "loss": 2.299, "step": 40840 }, { "epoch": 0.52288, "grad_norm": 1.7578125, "learning_rate": 1.4067836373898815e-05, "loss": 2.2942, "step": 40850 }, { "epoch": 0.523008, "grad_norm": 1.6171875, "learning_rate": 1.4061777473562031e-05, "loss": 2.2743, "step": 40860 }, { "epoch": 0.523136, "grad_norm": 1.7421875, "learning_rate": 1.405571872689994e-05, "loss": 2.2735, "step": 40870 }, { "epoch": 0.523264, "grad_norm": 1.6015625, "learning_rate": 1.4049660134904934e-05, "loss": 2.2509, "step": 40880 }, { "epoch": 0.523392, "grad_norm": 1.6953125, "learning_rate": 1.4043601698569373e-05, "loss": 2.2843, "step": 40890 }, { "epoch": 0.52352, "grad_norm": 1.6171875, "learning_rate": 1.4037543418885573e-05, "loss": 2.2933, "step": 40900 }, { "epoch": 0.523648, "grad_norm": 1.6484375, "learning_rate": 1.4031485296845856e-05, "loss": 2.2808, "step": 40910 }, { "epoch": 0.523776, "grad_norm": 1.546875, "learning_rate": 1.4025427333442499e-05, "loss": 2.3094, "step": 40920 }, { "epoch": 0.523904, "grad_norm": 1.7265625, "learning_rate": 1.401936952966775e-05, "loss": 2.2575, "step": 40930 }, { "epoch": 0.524032, "grad_norm": 1.5, "learning_rate": 1.4013311886513846e-05, "loss": 2.2774, "step": 40940 }, { "epoch": 0.52416, "grad_norm": 1.546875, "learning_rate": 1.4007254404972986e-05, "loss": 2.2742, "step": 40950 }, { "epoch": 0.524288, "grad_norm": 1.6484375, "learning_rate": 1.400119708603734e-05, "loss": 2.2847, "step": 40960 }, { "epoch": 0.524416, "grad_norm": 1.5078125, "learning_rate": 1.399513993069907e-05, "loss": 2.2865, "step": 40970 }, { "epoch": 0.524544, "grad_norm": 1.640625, "learning_rate": 1.3989082939950288e-05, "loss": 2.2556, "step": 40980 }, { "epoch": 0.524672, "grad_norm": 1.5546875, "learning_rate": 1.3983026114783086e-05, "loss": 2.2531, "step": 40990 }, { "epoch": 0.5248, "grad_norm": 1.7421875, "learning_rate": 1.3976969456189549e-05, "loss": 2.2374, "step": 41000 }, { "epoch": 0.524928, "grad_norm": 1.53125, "learning_rate": 1.3970912965161697e-05, "loss": 2.3073, "step": 41010 }, { "epoch": 0.525056, "grad_norm": 1.453125, "learning_rate": 1.396485664269156e-05, "loss": 2.2693, "step": 41020 }, { "epoch": 0.525184, "grad_norm": 1.46875, "learning_rate": 1.3958800489771119e-05, "loss": 2.3038, "step": 41030 }, { "epoch": 0.525312, "grad_norm": 1.4921875, "learning_rate": 1.3952744507392329e-05, "loss": 2.2995, "step": 41040 }, { "epoch": 0.52544, "grad_norm": 1.4609375, "learning_rate": 1.3946688696547123e-05, "loss": 2.2849, "step": 41050 }, { "epoch": 0.525568, "grad_norm": 1.5390625, "learning_rate": 1.3940633058227403e-05, "loss": 2.2713, "step": 41060 }, { "epoch": 0.525696, "grad_norm": 1.6484375, "learning_rate": 1.393457759342504e-05, "loss": 2.266, "step": 41070 }, { "epoch": 0.525824, "grad_norm": 1.59375, "learning_rate": 1.3928522303131883e-05, "loss": 2.2612, "step": 41080 }, { "epoch": 0.525952, "grad_norm": 1.6015625, "learning_rate": 1.392246718833975e-05, "loss": 2.2788, "step": 41090 }, { "epoch": 0.52608, "grad_norm": 1.671875, "learning_rate": 1.3916412250040418e-05, "loss": 2.3027, "step": 41100 }, { "epoch": 0.526208, "grad_norm": 1.53125, "learning_rate": 1.3910357489225663e-05, "loss": 2.2619, "step": 41110 }, { "epoch": 0.526336, "grad_norm": 1.59375, "learning_rate": 1.390430290688721e-05, "loss": 2.2624, "step": 41120 }, { "epoch": 0.526464, "grad_norm": 1.484375, "learning_rate": 1.3898248504016745e-05, "loss": 2.2783, "step": 41130 }, { "epoch": 0.526592, "grad_norm": 1.484375, "learning_rate": 1.3892194281605959e-05, "loss": 2.2862, "step": 41140 }, { "epoch": 0.52672, "grad_norm": 1.515625, "learning_rate": 1.3886140240646482e-05, "loss": 2.2771, "step": 41150 }, { "epoch": 0.526848, "grad_norm": 1.5546875, "learning_rate": 1.3880086382129937e-05, "loss": 2.2643, "step": 41160 }, { "epoch": 0.526976, "grad_norm": 1.6640625, "learning_rate": 1.3874032707047897e-05, "loss": 2.2725, "step": 41170 }, { "epoch": 0.527104, "grad_norm": 1.5703125, "learning_rate": 1.3867979216391915e-05, "loss": 2.3117, "step": 41180 }, { "epoch": 0.527232, "grad_norm": 1.5078125, "learning_rate": 1.3861925911153516e-05, "loss": 2.2988, "step": 41190 }, { "epoch": 0.52736, "grad_norm": 1.546875, "learning_rate": 1.3855872792324193e-05, "loss": 2.2987, "step": 41200 }, { "epoch": 0.527488, "grad_norm": 1.6015625, "learning_rate": 1.3849819860895398e-05, "loss": 2.3071, "step": 41210 }, { "epoch": 0.527616, "grad_norm": 1.640625, "learning_rate": 1.3843767117858576e-05, "loss": 2.2749, "step": 41220 }, { "epoch": 0.527744, "grad_norm": 2.265625, "learning_rate": 1.3837714564205122e-05, "loss": 2.2917, "step": 41230 }, { "epoch": 0.527872, "grad_norm": 1.46875, "learning_rate": 1.383166220092639e-05, "loss": 2.2897, "step": 41240 }, { "epoch": 0.528, "grad_norm": 1.5546875, "learning_rate": 1.3825610029013737e-05, "loss": 2.2882, "step": 41250 }, { "epoch": 0.528128, "grad_norm": 1.4296875, "learning_rate": 1.3819558049458463e-05, "loss": 2.2489, "step": 41260 }, { "epoch": 0.528256, "grad_norm": 1.890625, "learning_rate": 1.3813506263251835e-05, "loss": 2.2923, "step": 41270 }, { "epoch": 0.528384, "grad_norm": 1.7265625, "learning_rate": 1.3807454671385106e-05, "loss": 2.2737, "step": 41280 }, { "epoch": 0.528512, "grad_norm": 1.5390625, "learning_rate": 1.3801403274849484e-05, "loss": 2.2845, "step": 41290 }, { "epoch": 0.52864, "grad_norm": 1.6640625, "learning_rate": 1.3795352074636141e-05, "loss": 2.2603, "step": 41300 }, { "epoch": 0.528768, "grad_norm": 1.5078125, "learning_rate": 1.3789301071736235e-05, "loss": 2.2444, "step": 41310 }, { "epoch": 0.528896, "grad_norm": 1.5625, "learning_rate": 1.3783250267140867e-05, "loss": 2.2464, "step": 41320 }, { "epoch": 0.529024, "grad_norm": 1.578125, "learning_rate": 1.3777199661841136e-05, "loss": 2.276, "step": 41330 }, { "epoch": 0.529152, "grad_norm": 1.5625, "learning_rate": 1.377114925682808e-05, "loss": 2.2948, "step": 41340 }, { "epoch": 0.52928, "grad_norm": 1.5546875, "learning_rate": 1.376509905309272e-05, "loss": 2.2523, "step": 41350 }, { "epoch": 0.529408, "grad_norm": 1.578125, "learning_rate": 1.3759049051626039e-05, "loss": 2.2824, "step": 41360 }, { "epoch": 0.529536, "grad_norm": 1.65625, "learning_rate": 1.3752999253418987e-05, "loss": 2.2869, "step": 41370 }, { "epoch": 0.529664, "grad_norm": 1.6015625, "learning_rate": 1.3746949659462478e-05, "loss": 2.2799, "step": 41380 }, { "epoch": 0.529792, "grad_norm": 1.5390625, "learning_rate": 1.3740900270747401e-05, "loss": 2.2673, "step": 41390 }, { "epoch": 0.52992, "grad_norm": 1.6953125, "learning_rate": 1.3734851088264608e-05, "loss": 2.2674, "step": 41400 }, { "epoch": 0.530048, "grad_norm": 1.625, "learning_rate": 1.3728802113004902e-05, "loss": 2.2678, "step": 41410 }, { "epoch": 0.530176, "grad_norm": 1.640625, "learning_rate": 1.3722753345959084e-05, "loss": 2.2773, "step": 41420 }, { "epoch": 0.530304, "grad_norm": 1.71875, "learning_rate": 1.3716704788117892e-05, "loss": 2.271, "step": 41430 }, { "epoch": 0.530432, "grad_norm": 1.515625, "learning_rate": 1.3710656440472036e-05, "loss": 2.3094, "step": 41440 }, { "epoch": 0.53056, "grad_norm": 1.4453125, "learning_rate": 1.3704608304012206e-05, "loss": 2.273, "step": 41450 }, { "epoch": 0.530688, "grad_norm": 1.640625, "learning_rate": 1.3698560379729041e-05, "loss": 2.2486, "step": 41460 }, { "epoch": 0.530816, "grad_norm": 1.4921875, "learning_rate": 1.3692512668613154e-05, "loss": 2.2727, "step": 41470 }, { "epoch": 0.530944, "grad_norm": 1.4375, "learning_rate": 1.368646517165512e-05, "loss": 2.2953, "step": 41480 }, { "epoch": 0.531072, "grad_norm": 1.5390625, "learning_rate": 1.3680417889845477e-05, "loss": 2.3027, "step": 41490 }, { "epoch": 0.5312, "grad_norm": 2.03125, "learning_rate": 1.3674370824174734e-05, "loss": 2.2807, "step": 41500 }, { "epoch": 0.531328, "grad_norm": 1.640625, "learning_rate": 1.3668323975633357e-05, "loss": 2.2813, "step": 41510 }, { "epoch": 0.531456, "grad_norm": 1.5859375, "learning_rate": 1.3662277345211776e-05, "loss": 2.3, "step": 41520 }, { "epoch": 0.531584, "grad_norm": 1.5390625, "learning_rate": 1.3656230933900405e-05, "loss": 2.2708, "step": 41530 }, { "epoch": 0.531712, "grad_norm": 1.6640625, "learning_rate": 1.3650184742689593e-05, "loss": 2.3004, "step": 41540 }, { "epoch": 0.53184, "grad_norm": 1.484375, "learning_rate": 1.3644138772569662e-05, "loss": 2.2656, "step": 41550 }, { "epoch": 0.531968, "grad_norm": 1.5625, "learning_rate": 1.363809302453092e-05, "loss": 2.2926, "step": 41560 }, { "epoch": 0.532096, "grad_norm": 1.5546875, "learning_rate": 1.3632047499563608e-05, "loss": 2.2774, "step": 41570 }, { "epoch": 0.532224, "grad_norm": 1.5546875, "learning_rate": 1.3626002198657944e-05, "loss": 2.2793, "step": 41580 }, { "epoch": 0.532352, "grad_norm": 1.5625, "learning_rate": 1.3619957122804115e-05, "loss": 2.3068, "step": 41590 }, { "epoch": 0.53248, "grad_norm": 1.5234375, "learning_rate": 1.3613912272992259e-05, "loss": 2.2435, "step": 41600 }, { "epoch": 0.532608, "grad_norm": 1.5703125, "learning_rate": 1.3607867650212484e-05, "loss": 2.3032, "step": 41610 }, { "epoch": 0.532736, "grad_norm": 1.6640625, "learning_rate": 1.3601823255454863e-05, "loss": 2.2825, "step": 41620 }, { "epoch": 0.532864, "grad_norm": 1.5234375, "learning_rate": 1.3595779089709417e-05, "loss": 2.3047, "step": 41630 }, { "epoch": 0.532992, "grad_norm": 1.484375, "learning_rate": 1.3589735153966158e-05, "loss": 2.3055, "step": 41640 }, { "epoch": 0.53312, "grad_norm": 1.625, "learning_rate": 1.3583691449215035e-05, "loss": 2.2614, "step": 41650 }, { "epoch": 0.533248, "grad_norm": 1.515625, "learning_rate": 1.357764797644596e-05, "loss": 2.2956, "step": 41660 }, { "epoch": 0.533376, "grad_norm": 1.515625, "learning_rate": 1.3571604736648825e-05, "loss": 2.2701, "step": 41670 }, { "epoch": 0.533504, "grad_norm": 1.4765625, "learning_rate": 1.356556173081347e-05, "loss": 2.287, "step": 41680 }, { "epoch": 0.533632, "grad_norm": 1.640625, "learning_rate": 1.3559518959929693e-05, "loss": 2.2964, "step": 41690 }, { "epoch": 0.53376, "grad_norm": 1.6875, "learning_rate": 1.3553476424987271e-05, "loss": 2.3202, "step": 41700 }, { "epoch": 0.533888, "grad_norm": 1.5390625, "learning_rate": 1.3547434126975926e-05, "loss": 2.2898, "step": 41710 }, { "epoch": 0.534016, "grad_norm": 1.65625, "learning_rate": 1.3541392066885342e-05, "loss": 2.2858, "step": 41720 }, { "epoch": 0.534144, "grad_norm": 1.5703125, "learning_rate": 1.3535350245705179e-05, "loss": 2.3128, "step": 41730 }, { "epoch": 0.534272, "grad_norm": 1.546875, "learning_rate": 1.3529308664425042e-05, "loss": 2.325, "step": 41740 }, { "epoch": 0.5344, "grad_norm": 1.5234375, "learning_rate": 1.3523267324034497e-05, "loss": 2.3103, "step": 41750 }, { "epoch": 0.534528, "grad_norm": 1.515625, "learning_rate": 1.351722622552309e-05, "loss": 2.3085, "step": 41760 }, { "epoch": 0.534656, "grad_norm": 1.71875, "learning_rate": 1.3511185369880299e-05, "loss": 2.2702, "step": 41770 }, { "epoch": 0.534784, "grad_norm": 1.5625, "learning_rate": 1.3505144758095586e-05, "loss": 2.2553, "step": 41780 }, { "epoch": 0.534912, "grad_norm": 1.5625, "learning_rate": 1.3499104391158363e-05, "loss": 2.3059, "step": 41790 }, { "epoch": 0.53504, "grad_norm": 1.546875, "learning_rate": 1.3493064270057996e-05, "loss": 2.2889, "step": 41800 }, { "epoch": 0.535168, "grad_norm": 1.5078125, "learning_rate": 1.3487024395783824e-05, "loss": 2.2986, "step": 41810 }, { "epoch": 0.535296, "grad_norm": 1.546875, "learning_rate": 1.3480984769325135e-05, "loss": 2.2706, "step": 41820 }, { "epoch": 0.535424, "grad_norm": 1.6328125, "learning_rate": 1.3474945391671179e-05, "loss": 2.2938, "step": 41830 }, { "epoch": 0.535552, "grad_norm": 1.4921875, "learning_rate": 1.3468906263811172e-05, "loss": 2.2936, "step": 41840 }, { "epoch": 0.53568, "grad_norm": 1.53125, "learning_rate": 1.3462867386734279e-05, "loss": 2.301, "step": 41850 }, { "epoch": 0.535808, "grad_norm": 1.609375, "learning_rate": 1.3456828761429625e-05, "loss": 2.3246, "step": 41860 }, { "epoch": 0.535936, "grad_norm": 1.609375, "learning_rate": 1.3450790388886308e-05, "loss": 2.2702, "step": 41870 }, { "epoch": 0.536064, "grad_norm": 1.609375, "learning_rate": 1.344475227009337e-05, "loss": 2.2457, "step": 41880 }, { "epoch": 0.536192, "grad_norm": 1.484375, "learning_rate": 1.3438714406039804e-05, "loss": 2.2852, "step": 41890 }, { "epoch": 0.53632, "grad_norm": 1.671875, "learning_rate": 1.3432676797714588e-05, "loss": 2.2832, "step": 41900 }, { "epoch": 0.536448, "grad_norm": 1.71875, "learning_rate": 1.3426639446106635e-05, "loss": 2.2901, "step": 41910 }, { "epoch": 0.536576, "grad_norm": 1.5703125, "learning_rate": 1.3420602352204824e-05, "loss": 2.2641, "step": 41920 }, { "epoch": 0.536704, "grad_norm": 1.5703125, "learning_rate": 1.3414565516997996e-05, "loss": 2.2676, "step": 41930 }, { "epoch": 0.536832, "grad_norm": 1.4453125, "learning_rate": 1.3408528941474934e-05, "loss": 2.2484, "step": 41940 }, { "epoch": 0.53696, "grad_norm": 1.6875, "learning_rate": 1.3402492626624402e-05, "loss": 2.2922, "step": 41950 }, { "epoch": 0.537088, "grad_norm": 1.671875, "learning_rate": 1.3396456573435104e-05, "loss": 2.257, "step": 41960 }, { "epoch": 0.537216, "grad_norm": 1.546875, "learning_rate": 1.33904207828957e-05, "loss": 2.2914, "step": 41970 }, { "epoch": 0.537344, "grad_norm": 1.5078125, "learning_rate": 1.3384385255994823e-05, "loss": 2.2572, "step": 41980 }, { "epoch": 0.537472, "grad_norm": 1.5703125, "learning_rate": 1.3378349993721052e-05, "loss": 2.2644, "step": 41990 }, { "epoch": 0.5376, "grad_norm": 1.46875, "learning_rate": 1.337231499706291e-05, "loss": 2.3168, "step": 42000 }, { "epoch": 0.537728, "grad_norm": 1.6875, "learning_rate": 1.3366280267008905e-05, "loss": 2.2909, "step": 42010 }, { "epoch": 0.537856, "grad_norm": 1.5703125, "learning_rate": 1.336024580454748e-05, "loss": 2.2995, "step": 42020 }, { "epoch": 0.537984, "grad_norm": 1.5859375, "learning_rate": 1.3354211610667041e-05, "loss": 2.3144, "step": 42030 }, { "epoch": 0.538112, "grad_norm": 1.4921875, "learning_rate": 1.334817768635595e-05, "loss": 2.2815, "step": 42040 }, { "epoch": 0.53824, "grad_norm": 1.625, "learning_rate": 1.3342144032602524e-05, "loss": 2.3057, "step": 42050 }, { "epoch": 0.538368, "grad_norm": 1.59375, "learning_rate": 1.3336110650395033e-05, "loss": 2.2738, "step": 42060 }, { "epoch": 0.538496, "grad_norm": 2.015625, "learning_rate": 1.3330077540721712e-05, "loss": 2.2903, "step": 42070 }, { "epoch": 0.538624, "grad_norm": 1.53125, "learning_rate": 1.3324044704570732e-05, "loss": 2.2988, "step": 42080 }, { "epoch": 0.538752, "grad_norm": 1.4609375, "learning_rate": 1.3318012142930248e-05, "loss": 2.3085, "step": 42090 }, { "epoch": 0.53888, "grad_norm": 1.484375, "learning_rate": 1.3311979856788348e-05, "loss": 2.2752, "step": 42100 }, { "epoch": 0.539008, "grad_norm": 1.6953125, "learning_rate": 1.3305947847133076e-05, "loss": 2.3116, "step": 42110 }, { "epoch": 0.539136, "grad_norm": 1.65625, "learning_rate": 1.329991611495244e-05, "loss": 2.2747, "step": 42120 }, { "epoch": 0.539264, "grad_norm": 1.8046875, "learning_rate": 1.3293884661234401e-05, "loss": 2.2759, "step": 42130 }, { "epoch": 0.539392, "grad_norm": 1.6640625, "learning_rate": 1.3287853486966865e-05, "loss": 2.3024, "step": 42140 }, { "epoch": 0.53952, "grad_norm": 1.6484375, "learning_rate": 1.3281822593137704e-05, "loss": 2.272, "step": 42150 }, { "epoch": 0.539648, "grad_norm": 1.7421875, "learning_rate": 1.3275791980734738e-05, "loss": 2.2592, "step": 42160 }, { "epoch": 0.539776, "grad_norm": 1.4921875, "learning_rate": 1.3269761650745733e-05, "loss": 2.3148, "step": 42170 }, { "epoch": 0.539904, "grad_norm": 1.7265625, "learning_rate": 1.3263731604158435e-05, "loss": 2.2721, "step": 42180 }, { "epoch": 0.540032, "grad_norm": 1.6953125, "learning_rate": 1.3257701841960512e-05, "loss": 2.2692, "step": 42190 }, { "epoch": 0.54016, "grad_norm": 1.46875, "learning_rate": 1.3251672365139597e-05, "loss": 2.2659, "step": 42200 }, { "epoch": 0.540288, "grad_norm": 1.671875, "learning_rate": 1.3245643174683295e-05, "loss": 2.3012, "step": 42210 }, { "epoch": 0.540416, "grad_norm": 1.40625, "learning_rate": 1.3239614271579137e-05, "loss": 2.2801, "step": 42220 }, { "epoch": 0.540544, "grad_norm": 1.484375, "learning_rate": 1.3233585656814615e-05, "loss": 2.3049, "step": 42230 }, { "epoch": 0.540672, "grad_norm": 6.53125, "learning_rate": 1.3227557331377184e-05, "loss": 2.2831, "step": 42240 }, { "epoch": 0.5408, "grad_norm": 1.5859375, "learning_rate": 1.3221529296254239e-05, "loss": 2.2814, "step": 42250 }, { "epoch": 0.540928, "grad_norm": 1.6015625, "learning_rate": 1.3215501552433135e-05, "loss": 2.3112, "step": 42260 }, { "epoch": 0.541056, "grad_norm": 3.53125, "learning_rate": 1.320947410090118e-05, "loss": 2.3087, "step": 42270 }, { "epoch": 0.541184, "grad_norm": 1.703125, "learning_rate": 1.3203446942645619e-05, "loss": 2.2759, "step": 42280 }, { "epoch": 0.541312, "grad_norm": 1.671875, "learning_rate": 1.319742007865368e-05, "loss": 2.2893, "step": 42290 }, { "epoch": 0.54144, "grad_norm": 1.5390625, "learning_rate": 1.3191393509912509e-05, "loss": 2.2621, "step": 42300 }, { "epoch": 0.541568, "grad_norm": 1.578125, "learning_rate": 1.3185367237409217e-05, "loss": 2.2951, "step": 42310 }, { "epoch": 0.541696, "grad_norm": 1.578125, "learning_rate": 1.3179341262130881e-05, "loss": 2.3044, "step": 42320 }, { "epoch": 0.541824, "grad_norm": 1.5390625, "learning_rate": 1.3173315585064507e-05, "loss": 2.3213, "step": 42330 }, { "epoch": 0.541952, "grad_norm": 1.4921875, "learning_rate": 1.3167290207197062e-05, "loss": 2.2948, "step": 42340 }, { "epoch": 0.54208, "grad_norm": 1.5390625, "learning_rate": 1.3161265129515468e-05, "loss": 2.3006, "step": 42350 }, { "epoch": 0.542208, "grad_norm": 1.578125, "learning_rate": 1.3155240353006588e-05, "loss": 2.2649, "step": 42360 }, { "epoch": 0.542336, "grad_norm": 1.5703125, "learning_rate": 1.3149215878657241e-05, "loss": 2.2814, "step": 42370 }, { "epoch": 0.542464, "grad_norm": 1.890625, "learning_rate": 1.3143191707454201e-05, "loss": 2.254, "step": 42380 }, { "epoch": 0.542592, "grad_norm": 1.484375, "learning_rate": 1.3137167840384179e-05, "loss": 2.2876, "step": 42390 }, { "epoch": 0.54272, "grad_norm": 1.46875, "learning_rate": 1.3131144278433856e-05, "loss": 2.3011, "step": 42400 }, { "epoch": 0.542848, "grad_norm": 1.46875, "learning_rate": 1.3125121022589852e-05, "loss": 2.3102, "step": 42410 }, { "epoch": 0.542976, "grad_norm": 1.671875, "learning_rate": 1.311909807383872e-05, "loss": 2.271, "step": 42420 }, { "epoch": 0.543104, "grad_norm": 1.5703125, "learning_rate": 1.3113075433167e-05, "loss": 2.3005, "step": 42430 }, { "epoch": 0.543232, "grad_norm": 1.5234375, "learning_rate": 1.3107053101561151e-05, "loss": 2.2892, "step": 42440 }, { "epoch": 0.54336, "grad_norm": 1.5703125, "learning_rate": 1.3101031080007588e-05, "loss": 2.2682, "step": 42450 }, { "epoch": 0.543488, "grad_norm": 1.4921875, "learning_rate": 1.3095009369492689e-05, "loss": 2.2632, "step": 42460 }, { "epoch": 0.543616, "grad_norm": 1.578125, "learning_rate": 1.3088987971002762e-05, "loss": 2.2944, "step": 42470 }, { "epoch": 0.543744, "grad_norm": 1.4921875, "learning_rate": 1.3082966885524075e-05, "loss": 2.2887, "step": 42480 }, { "epoch": 0.543872, "grad_norm": 1.4921875, "learning_rate": 1.3076946114042847e-05, "loss": 2.277, "step": 42490 }, { "epoch": 0.544, "grad_norm": 1.6796875, "learning_rate": 1.3070925657545233e-05, "loss": 2.2861, "step": 42500 }, { "epoch": 0.544128, "grad_norm": 1.65625, "learning_rate": 1.3064905517017343e-05, "loss": 2.2751, "step": 42510 }, { "epoch": 0.544256, "grad_norm": 1.640625, "learning_rate": 1.3058885693445254e-05, "loss": 2.2921, "step": 42520 }, { "epoch": 0.544384, "grad_norm": 1.40625, "learning_rate": 1.3052866187814955e-05, "loss": 2.2743, "step": 42530 }, { "epoch": 0.544512, "grad_norm": 1.5390625, "learning_rate": 1.3046847001112404e-05, "loss": 2.261, "step": 42540 }, { "epoch": 0.54464, "grad_norm": 1.625, "learning_rate": 1.3040828134323512e-05, "loss": 2.3233, "step": 42550 }, { "epoch": 0.544768, "grad_norm": 1.546875, "learning_rate": 1.3034809588434126e-05, "loss": 2.2992, "step": 42560 }, { "epoch": 0.544896, "grad_norm": 1.6875, "learning_rate": 1.3028791364430046e-05, "loss": 2.2602, "step": 42570 }, { "epoch": 0.545024, "grad_norm": 1.5625, "learning_rate": 1.3022773463297015e-05, "loss": 2.2852, "step": 42580 }, { "epoch": 0.545152, "grad_norm": 2.703125, "learning_rate": 1.3016755886020724e-05, "loss": 2.2905, "step": 42590 }, { "epoch": 0.54528, "grad_norm": 1.5859375, "learning_rate": 1.301073863358682e-05, "loss": 2.2624, "step": 42600 }, { "epoch": 0.545408, "grad_norm": 1.640625, "learning_rate": 1.3004721706980884e-05, "loss": 2.2908, "step": 42610 }, { "epoch": 0.545536, "grad_norm": 1.46875, "learning_rate": 1.2998705107188445e-05, "loss": 2.2735, "step": 42620 }, { "epoch": 0.545664, "grad_norm": 1.53125, "learning_rate": 1.2992688835194992e-05, "loss": 2.2571, "step": 42630 }, { "epoch": 0.545792, "grad_norm": 1.8515625, "learning_rate": 1.2986672891985952e-05, "loss": 2.267, "step": 42640 }, { "epoch": 0.54592, "grad_norm": 1.5703125, "learning_rate": 1.298065727854668e-05, "loss": 2.283, "step": 42650 }, { "epoch": 0.546048, "grad_norm": 1.7890625, "learning_rate": 1.2974641995862513e-05, "loss": 2.2805, "step": 42660 }, { "epoch": 0.546176, "grad_norm": 1.5390625, "learning_rate": 1.2968627044918708e-05, "loss": 2.2829, "step": 42670 }, { "epoch": 0.546304, "grad_norm": 1.5625, "learning_rate": 1.296261242670047e-05, "loss": 2.2731, "step": 42680 }, { "epoch": 0.546432, "grad_norm": 1.71875, "learning_rate": 1.2956598142192963e-05, "loss": 2.2755, "step": 42690 }, { "epoch": 0.54656, "grad_norm": 1.5390625, "learning_rate": 1.295058419238128e-05, "loss": 2.2762, "step": 42700 }, { "epoch": 0.546688, "grad_norm": 1.609375, "learning_rate": 1.294457057825047e-05, "loss": 2.312, "step": 42710 }, { "epoch": 0.546816, "grad_norm": 1.6640625, "learning_rate": 1.2938557300785522e-05, "loss": 2.2662, "step": 42720 }, { "epoch": 0.546944, "grad_norm": 2.546875, "learning_rate": 1.2932544360971368e-05, "loss": 2.2795, "step": 42730 }, { "epoch": 0.547072, "grad_norm": 1.6484375, "learning_rate": 1.2926531759792898e-05, "loss": 2.2882, "step": 42740 }, { "epoch": 0.5472, "grad_norm": 4.25, "learning_rate": 1.2920519498234933e-05, "loss": 2.2652, "step": 42750 }, { "epoch": 0.547328, "grad_norm": 1.578125, "learning_rate": 1.2914507577282233e-05, "loss": 2.2688, "step": 42760 }, { "epoch": 0.547456, "grad_norm": 1.5625, "learning_rate": 1.290849599791952e-05, "loss": 2.2783, "step": 42770 }, { "epoch": 0.547584, "grad_norm": 1.6875, "learning_rate": 1.2902484761131453e-05, "loss": 2.2756, "step": 42780 }, { "epoch": 0.547712, "grad_norm": 1.515625, "learning_rate": 1.2896473867902625e-05, "loss": 2.2844, "step": 42790 }, { "epoch": 0.54784, "grad_norm": 1.6015625, "learning_rate": 1.2890463319217588e-05, "loss": 2.295, "step": 42800 }, { "epoch": 0.547968, "grad_norm": 1.515625, "learning_rate": 1.2884453116060829e-05, "loss": 2.3074, "step": 42810 }, { "epoch": 0.548096, "grad_norm": 1.53125, "learning_rate": 1.2878443259416775e-05, "loss": 2.3019, "step": 42820 }, { "epoch": 0.548224, "grad_norm": 1.4609375, "learning_rate": 1.287243375026981e-05, "loss": 2.3082, "step": 42830 }, { "epoch": 0.548352, "grad_norm": 1.6875, "learning_rate": 1.2866424589604244e-05, "loss": 2.2745, "step": 42840 }, { "epoch": 0.54848, "grad_norm": 1.5078125, "learning_rate": 1.2860415778404336e-05, "loss": 2.2763, "step": 42850 }, { "epoch": 0.548608, "grad_norm": 1.4921875, "learning_rate": 1.2854407317654302e-05, "loss": 2.3081, "step": 42860 }, { "epoch": 0.548736, "grad_norm": 1.4765625, "learning_rate": 1.284839920833828e-05, "loss": 2.2462, "step": 42870 }, { "epoch": 0.548864, "grad_norm": 1.4921875, "learning_rate": 1.2842391451440361e-05, "loss": 2.3006, "step": 42880 }, { "epoch": 0.548992, "grad_norm": 1.640625, "learning_rate": 1.2836384047944576e-05, "loss": 2.3076, "step": 42890 }, { "epoch": 0.54912, "grad_norm": 1.6171875, "learning_rate": 1.2830376998834896e-05, "loss": 2.2693, "step": 42900 }, { "epoch": 0.549248, "grad_norm": 1.984375, "learning_rate": 1.2824370305095242e-05, "loss": 2.2752, "step": 42910 }, { "epoch": 0.549376, "grad_norm": 1.484375, "learning_rate": 1.2818363967709463e-05, "loss": 2.2996, "step": 42920 }, { "epoch": 0.549504, "grad_norm": 1.5078125, "learning_rate": 1.2812357987661359e-05, "loss": 2.2874, "step": 42930 }, { "epoch": 0.549632, "grad_norm": 1.5234375, "learning_rate": 1.2806352365934683e-05, "loss": 2.2692, "step": 42940 }, { "epoch": 0.54976, "grad_norm": 1.5703125, "learning_rate": 1.2800347103513103e-05, "loss": 2.2678, "step": 42950 }, { "epoch": 0.549888, "grad_norm": 1.4453125, "learning_rate": 1.2794342201380239e-05, "loss": 2.3191, "step": 42960 }, { "epoch": 0.550016, "grad_norm": 1.8515625, "learning_rate": 1.2788337660519666e-05, "loss": 2.2959, "step": 42970 }, { "epoch": 0.550144, "grad_norm": 1.59375, "learning_rate": 1.2782333481914884e-05, "loss": 2.2885, "step": 42980 }, { "epoch": 0.550272, "grad_norm": 1.546875, "learning_rate": 1.2776329666549335e-05, "loss": 2.3001, "step": 42990 }, { "epoch": 0.5504, "grad_norm": 1.4375, "learning_rate": 1.2770326215406408e-05, "loss": 2.2756, "step": 43000 }, { "epoch": 0.550528, "grad_norm": 1.53125, "learning_rate": 1.2764323129469425e-05, "loss": 2.2778, "step": 43010 }, { "epoch": 0.550656, "grad_norm": 1.4609375, "learning_rate": 1.2758320409721659e-05, "loss": 2.2663, "step": 43020 }, { "epoch": 0.550784, "grad_norm": 1.4609375, "learning_rate": 1.2752318057146312e-05, "loss": 2.2896, "step": 43030 }, { "epoch": 0.550912, "grad_norm": 1.7734375, "learning_rate": 1.2746316072726524e-05, "loss": 2.2436, "step": 43040 }, { "epoch": 0.55104, "grad_norm": 1.53125, "learning_rate": 1.2740314457445395e-05, "loss": 2.2739, "step": 43050 }, { "epoch": 0.551168, "grad_norm": 1.5390625, "learning_rate": 1.273431321228594e-05, "loss": 2.2839, "step": 43060 }, { "epoch": 0.551296, "grad_norm": 1.8203125, "learning_rate": 1.2728312338231119e-05, "loss": 2.2801, "step": 43070 }, { "epoch": 0.551424, "grad_norm": 1.84375, "learning_rate": 1.2722311836263849e-05, "loss": 2.2815, "step": 43080 }, { "epoch": 0.551552, "grad_norm": 1.5390625, "learning_rate": 1.2716311707366966e-05, "loss": 2.298, "step": 43090 }, { "epoch": 0.55168, "grad_norm": 1.5703125, "learning_rate": 1.271031195252325e-05, "loss": 2.295, "step": 43100 }, { "epoch": 0.551808, "grad_norm": 1.484375, "learning_rate": 1.2704312572715426e-05, "loss": 2.2947, "step": 43110 }, { "epoch": 0.551936, "grad_norm": 1.5546875, "learning_rate": 1.2698313568926151e-05, "loss": 2.305, "step": 43120 }, { "epoch": 0.552064, "grad_norm": 1.578125, "learning_rate": 1.2692314942138019e-05, "loss": 2.3041, "step": 43130 }, { "epoch": 0.552192, "grad_norm": 1.5703125, "learning_rate": 1.268631669333357e-05, "loss": 2.2996, "step": 43140 }, { "epoch": 0.55232, "grad_norm": 1.4296875, "learning_rate": 1.268031882349528e-05, "loss": 2.2727, "step": 43150 }, { "epoch": 0.552448, "grad_norm": 1.5, "learning_rate": 1.2674321333605548e-05, "loss": 2.2544, "step": 43160 }, { "epoch": 0.552576, "grad_norm": 1.640625, "learning_rate": 1.2668324224646745e-05, "loss": 2.3182, "step": 43170 }, { "epoch": 0.552704, "grad_norm": 1.5234375, "learning_rate": 1.2662327497601136e-05, "loss": 2.313, "step": 43180 }, { "epoch": 0.552832, "grad_norm": 1.6015625, "learning_rate": 1.2656331153450961e-05, "loss": 2.3112, "step": 43190 }, { "epoch": 0.55296, "grad_norm": 1.65625, "learning_rate": 1.2650335193178377e-05, "loss": 2.2632, "step": 43200 }, { "epoch": 0.553088, "grad_norm": 1.609375, "learning_rate": 1.2644339617765481e-05, "loss": 2.2725, "step": 43210 }, { "epoch": 0.553216, "grad_norm": 1.5859375, "learning_rate": 1.263834442819431e-05, "loss": 2.2694, "step": 43220 }, { "epoch": 0.553344, "grad_norm": 1.5546875, "learning_rate": 1.263234962544684e-05, "loss": 2.2691, "step": 43230 }, { "epoch": 0.553472, "grad_norm": 1.5, "learning_rate": 1.2626355210504976e-05, "loss": 2.287, "step": 43240 }, { "epoch": 0.5536, "grad_norm": 1.59375, "learning_rate": 1.2620361184350563e-05, "loss": 2.2659, "step": 43250 }, { "epoch": 0.553728, "grad_norm": 1.8203125, "learning_rate": 1.261436754796539e-05, "loss": 2.2706, "step": 43260 }, { "epoch": 0.553856, "grad_norm": 1.578125, "learning_rate": 1.2608374302331163e-05, "loss": 2.2579, "step": 43270 }, { "epoch": 0.553984, "grad_norm": 1.6015625, "learning_rate": 1.2602381448429557e-05, "loss": 2.2739, "step": 43280 }, { "epoch": 0.554112, "grad_norm": 1.5234375, "learning_rate": 1.2596388987242144e-05, "loss": 2.291, "step": 43290 }, { "epoch": 0.55424, "grad_norm": 1.6796875, "learning_rate": 1.2590396919750448e-05, "loss": 2.2618, "step": 43300 }, { "epoch": 0.554368, "grad_norm": 3.09375, "learning_rate": 1.2584405246935945e-05, "loss": 2.2556, "step": 43310 }, { "epoch": 0.554496, "grad_norm": 1.3984375, "learning_rate": 1.2578413969780022e-05, "loss": 2.2867, "step": 43320 }, { "epoch": 0.554624, "grad_norm": 1.640625, "learning_rate": 1.2572423089264016e-05, "loss": 2.2623, "step": 43330 }, { "epoch": 0.554752, "grad_norm": 1.7265625, "learning_rate": 1.256643260636919e-05, "loss": 2.2954, "step": 43340 }, { "epoch": 0.55488, "grad_norm": 1.6953125, "learning_rate": 1.2560442522076747e-05, "loss": 2.2546, "step": 43350 }, { "epoch": 0.555008, "grad_norm": 1.65625, "learning_rate": 1.2554452837367825e-05, "loss": 2.3131, "step": 43360 }, { "epoch": 0.555136, "grad_norm": 1.515625, "learning_rate": 1.2548463553223493e-05, "loss": 2.2719, "step": 43370 }, { "epoch": 0.555264, "grad_norm": 1.4609375, "learning_rate": 1.2542474670624752e-05, "loss": 2.2534, "step": 43380 }, { "epoch": 0.555392, "grad_norm": 1.4296875, "learning_rate": 1.2536486190552553e-05, "loss": 2.3187, "step": 43390 }, { "epoch": 0.55552, "grad_norm": 1.546875, "learning_rate": 1.2530498113987766e-05, "loss": 2.2984, "step": 43400 }, { "epoch": 0.555648, "grad_norm": 1.6796875, "learning_rate": 1.2524510441911189e-05, "loss": 2.2892, "step": 43410 }, { "epoch": 0.555776, "grad_norm": 1.7421875, "learning_rate": 1.2518523175303577e-05, "loss": 2.2973, "step": 43420 }, { "epoch": 0.555904, "grad_norm": 1.6328125, "learning_rate": 1.2512536315145597e-05, "loss": 2.2899, "step": 43430 }, { "epoch": 0.556032, "grad_norm": 1.5625, "learning_rate": 1.2506549862417856e-05, "loss": 2.2595, "step": 43440 }, { "epoch": 0.55616, "grad_norm": 1.6171875, "learning_rate": 1.2500563818100903e-05, "loss": 2.2536, "step": 43450 }, { "epoch": 0.556288, "grad_norm": 2.578125, "learning_rate": 1.2494578183175209e-05, "loss": 2.3032, "step": 43460 }, { "epoch": 0.556416, "grad_norm": 1.5234375, "learning_rate": 1.2488592958621177e-05, "loss": 2.2883, "step": 43470 }, { "epoch": 0.556544, "grad_norm": 1.59375, "learning_rate": 1.2482608145419158e-05, "loss": 2.2723, "step": 43480 }, { "epoch": 0.556672, "grad_norm": 1.6328125, "learning_rate": 1.247662374454941e-05, "loss": 2.2534, "step": 43490 }, { "epoch": 0.5568, "grad_norm": 1.53125, "learning_rate": 1.2470639756992156e-05, "loss": 2.2877, "step": 43500 }, { "epoch": 0.556928, "grad_norm": 2.1875, "learning_rate": 1.2464656183727529e-05, "loss": 2.2953, "step": 43510 }, { "epoch": 0.557056, "grad_norm": 1.7421875, "learning_rate": 1.2458673025735584e-05, "loss": 2.3057, "step": 43520 }, { "epoch": 0.557184, "grad_norm": 1.6640625, "learning_rate": 1.2452690283996343e-05, "loss": 2.2748, "step": 43530 }, { "epoch": 0.557312, "grad_norm": 1.5546875, "learning_rate": 1.2446707959489734e-05, "loss": 2.2937, "step": 43540 }, { "epoch": 0.55744, "grad_norm": 1.46875, "learning_rate": 1.2440726053195614e-05, "loss": 2.3031, "step": 43550 }, { "epoch": 0.557568, "grad_norm": 1.578125, "learning_rate": 1.243474456609379e-05, "loss": 2.2737, "step": 43560 }, { "epoch": 0.557696, "grad_norm": 2.15625, "learning_rate": 1.2428763499163989e-05, "loss": 2.2971, "step": 43570 }, { "epoch": 0.557824, "grad_norm": 1.4296875, "learning_rate": 1.2422782853385862e-05, "loss": 2.2673, "step": 43580 }, { "epoch": 0.557952, "grad_norm": 1.6875, "learning_rate": 1.241680262973901e-05, "loss": 2.284, "step": 43590 }, { "epoch": 0.55808, "grad_norm": 1.5546875, "learning_rate": 1.2410822829202951e-05, "loss": 2.2764, "step": 43600 }, { "epoch": 0.558208, "grad_norm": 1.75, "learning_rate": 1.240484345275713e-05, "loss": 2.2642, "step": 43610 }, { "epoch": 0.558336, "grad_norm": 1.5625, "learning_rate": 1.2398864501380942e-05, "loss": 2.27, "step": 43620 }, { "epoch": 0.558464, "grad_norm": 1.5703125, "learning_rate": 1.2392885976053689e-05, "loss": 2.2586, "step": 43630 }, { "epoch": 0.558592, "grad_norm": 1.5, "learning_rate": 1.2386907877754625e-05, "loss": 2.2838, "step": 43640 }, { "epoch": 0.55872, "grad_norm": 1.5625, "learning_rate": 1.2380930207462917e-05, "loss": 2.2994, "step": 43650 }, { "epoch": 0.558848, "grad_norm": 1.3984375, "learning_rate": 1.2374952966157665e-05, "loss": 2.2872, "step": 43660 }, { "epoch": 0.558976, "grad_norm": 1.5390625, "learning_rate": 1.236897615481791e-05, "loss": 2.3021, "step": 43670 }, { "epoch": 0.559104, "grad_norm": 1.5078125, "learning_rate": 1.2362999774422609e-05, "loss": 2.2576, "step": 43680 }, { "epoch": 0.559232, "grad_norm": 1.8671875, "learning_rate": 1.2357023825950655e-05, "loss": 2.2872, "step": 43690 }, { "epoch": 0.55936, "grad_norm": 1.5, "learning_rate": 1.2351048310380868e-05, "loss": 2.2823, "step": 43700 }, { "epoch": 0.559488, "grad_norm": 1.5625, "learning_rate": 1.2345073228692004e-05, "loss": 2.2565, "step": 43710 }, { "epoch": 0.559616, "grad_norm": 2.484375, "learning_rate": 1.2339098581862728e-05, "loss": 2.2711, "step": 43720 }, { "epoch": 0.559744, "grad_norm": 1.4765625, "learning_rate": 1.2333124370871666e-05, "loss": 2.2697, "step": 43730 }, { "epoch": 0.559872, "grad_norm": 1.8125, "learning_rate": 1.2327150596697346e-05, "loss": 2.2839, "step": 43740 }, { "epoch": 0.56, "grad_norm": 1.578125, "learning_rate": 1.232117726031823e-05, "loss": 2.2757, "step": 43750 }, { "epoch": 0.560128, "grad_norm": 1.6640625, "learning_rate": 1.2315204362712718e-05, "loss": 2.2938, "step": 43760 }, { "epoch": 0.560256, "grad_norm": 1.5, "learning_rate": 1.2309231904859128e-05, "loss": 2.275, "step": 43770 }, { "epoch": 0.560384, "grad_norm": 1.5703125, "learning_rate": 1.2303259887735707e-05, "loss": 2.2895, "step": 43780 }, { "epoch": 0.560512, "grad_norm": 1.6484375, "learning_rate": 1.229728831232064e-05, "loss": 2.2766, "step": 43790 }, { "epoch": 0.56064, "grad_norm": 2.578125, "learning_rate": 1.2291317179592017e-05, "loss": 2.2827, "step": 43800 }, { "epoch": 0.560768, "grad_norm": 1.5625, "learning_rate": 1.2285346490527891e-05, "loss": 2.2823, "step": 43810 }, { "epoch": 0.560896, "grad_norm": 1.46875, "learning_rate": 1.2279376246106208e-05, "loss": 2.303, "step": 43820 }, { "epoch": 0.561024, "grad_norm": 1.5390625, "learning_rate": 1.2273406447304852e-05, "loss": 2.2865, "step": 43830 }, { "epoch": 0.561152, "grad_norm": 1.5390625, "learning_rate": 1.2267437095101647e-05, "loss": 2.2814, "step": 43840 }, { "epoch": 0.56128, "grad_norm": 1.625, "learning_rate": 1.2261468190474332e-05, "loss": 2.2924, "step": 43850 }, { "epoch": 0.561408, "grad_norm": 1.5390625, "learning_rate": 1.2255499734400568e-05, "loss": 2.2158, "step": 43860 }, { "epoch": 0.561536, "grad_norm": 1.4609375, "learning_rate": 1.2249531727857956e-05, "loss": 2.2658, "step": 43870 }, { "epoch": 0.561664, "grad_norm": 1.46875, "learning_rate": 1.2243564171824014e-05, "loss": 2.271, "step": 43880 }, { "epoch": 0.561792, "grad_norm": 1.546875, "learning_rate": 1.2237597067276183e-05, "loss": 2.283, "step": 43890 }, { "epoch": 0.56192, "grad_norm": 1.765625, "learning_rate": 1.2231630415191844e-05, "loss": 2.2632, "step": 43900 }, { "epoch": 0.562048, "grad_norm": 1.5859375, "learning_rate": 1.2225664216548292e-05, "loss": 2.2708, "step": 43910 }, { "epoch": 0.562176, "grad_norm": 1.5859375, "learning_rate": 1.2219698472322745e-05, "loss": 2.2791, "step": 43920 }, { "epoch": 0.562304, "grad_norm": 1.6015625, "learning_rate": 1.221373318349237e-05, "loss": 2.2915, "step": 43930 }, { "epoch": 0.562432, "grad_norm": 1.4765625, "learning_rate": 1.2207768351034219e-05, "loss": 2.2744, "step": 43940 }, { "epoch": 0.56256, "grad_norm": 1.6796875, "learning_rate": 1.2201803975925311e-05, "loss": 2.2997, "step": 43950 }, { "epoch": 0.562688, "grad_norm": 1.5078125, "learning_rate": 1.2195840059142568e-05, "loss": 2.3076, "step": 43960 }, { "epoch": 0.562816, "grad_norm": 1.5703125, "learning_rate": 1.2189876601662831e-05, "loss": 2.2469, "step": 43970 }, { "epoch": 0.562944, "grad_norm": 1.4375, "learning_rate": 1.2183913604462887e-05, "loss": 2.2643, "step": 43980 }, { "epoch": 0.563072, "grad_norm": 4.125, "learning_rate": 1.2177951068519431e-05, "loss": 2.307, "step": 43990 }, { "epoch": 0.5632, "grad_norm": 1.5234375, "learning_rate": 1.2171988994809082e-05, "loss": 2.2994, "step": 44000 }, { "epoch": 0.563328, "grad_norm": 1.734375, "learning_rate": 1.2166027384308397e-05, "loss": 2.2728, "step": 44010 }, { "epoch": 0.563456, "grad_norm": 1.578125, "learning_rate": 1.2160066237993845e-05, "loss": 2.291, "step": 44020 }, { "epoch": 0.563584, "grad_norm": 1.640625, "learning_rate": 1.2154105556841817e-05, "loss": 2.3074, "step": 44030 }, { "epoch": 0.563712, "grad_norm": 1.5625, "learning_rate": 1.214814534182865e-05, "loss": 2.2706, "step": 44040 }, { "epoch": 0.56384, "grad_norm": 1.7265625, "learning_rate": 1.2142185593930573e-05, "loss": 2.2903, "step": 44050 }, { "epoch": 0.563968, "grad_norm": 1.4765625, "learning_rate": 1.2136226314123753e-05, "loss": 2.2839, "step": 44060 }, { "epoch": 0.564096, "grad_norm": 1.4375, "learning_rate": 1.213026750338429e-05, "loss": 2.2782, "step": 44070 }, { "epoch": 0.564224, "grad_norm": 2.234375, "learning_rate": 1.2124309162688198e-05, "loss": 2.303, "step": 44080 }, { "epoch": 0.564352, "grad_norm": 1.8203125, "learning_rate": 1.2118351293011404e-05, "loss": 2.3023, "step": 44090 }, { "epoch": 0.56448, "grad_norm": 1.578125, "learning_rate": 1.2112393895329779e-05, "loss": 2.312, "step": 44100 }, { "epoch": 0.564608, "grad_norm": 1.484375, "learning_rate": 1.2106436970619096e-05, "loss": 2.2889, "step": 44110 }, { "epoch": 0.564736, "grad_norm": 3.75, "learning_rate": 1.2100480519855071e-05, "loss": 2.3136, "step": 44120 }, { "epoch": 0.564864, "grad_norm": 1.4453125, "learning_rate": 1.2094524544013326e-05, "loss": 2.2846, "step": 44130 }, { "epoch": 0.564992, "grad_norm": 1.640625, "learning_rate": 1.2088569044069401e-05, "loss": 2.2595, "step": 44140 }, { "epoch": 0.56512, "grad_norm": 1.609375, "learning_rate": 1.2082614020998787e-05, "loss": 2.2931, "step": 44150 }, { "epoch": 0.565248, "grad_norm": 3.890625, "learning_rate": 1.207665947577687e-05, "loss": 2.2638, "step": 44160 }, { "epoch": 0.565376, "grad_norm": 1.6953125, "learning_rate": 1.2070705409378955e-05, "loss": 2.2898, "step": 44170 }, { "epoch": 0.565504, "grad_norm": 1.5546875, "learning_rate": 1.2064751822780294e-05, "loss": 2.3002, "step": 44180 }, { "epoch": 0.565632, "grad_norm": 1.59375, "learning_rate": 1.2058798716956039e-05, "loss": 2.279, "step": 44190 }, { "epoch": 0.56576, "grad_norm": 1.5078125, "learning_rate": 1.2052846092881266e-05, "loss": 2.2605, "step": 44200 }, { "epoch": 0.565888, "grad_norm": 1.5703125, "learning_rate": 1.2046893951530982e-05, "loss": 2.2642, "step": 44210 }, { "epoch": 0.566016, "grad_norm": 1.578125, "learning_rate": 1.2040942293880109e-05, "loss": 2.3005, "step": 44220 }, { "epoch": 0.566144, "grad_norm": 1.609375, "learning_rate": 1.2034991120903482e-05, "loss": 2.2878, "step": 44230 }, { "epoch": 0.566272, "grad_norm": 1.65625, "learning_rate": 1.2029040433575871e-05, "loss": 2.284, "step": 44240 }, { "epoch": 0.5664, "grad_norm": 1.59375, "learning_rate": 1.2023090232871954e-05, "loss": 2.2654, "step": 44250 }, { "epoch": 0.566528, "grad_norm": 1.671875, "learning_rate": 1.2017140519766344e-05, "loss": 2.2757, "step": 44260 }, { "epoch": 0.566656, "grad_norm": 1.5390625, "learning_rate": 1.2011191295233564e-05, "loss": 2.294, "step": 44270 }, { "epoch": 0.566784, "grad_norm": 1.609375, "learning_rate": 1.200524256024804e-05, "loss": 2.2408, "step": 44280 }, { "epoch": 0.566912, "grad_norm": 1.4453125, "learning_rate": 1.1999294315784158e-05, "loss": 2.2674, "step": 44290 }, { "epoch": 0.56704, "grad_norm": 1.5546875, "learning_rate": 1.1993346562816193e-05, "loss": 2.2727, "step": 44300 }, { "epoch": 0.567168, "grad_norm": 1.6015625, "learning_rate": 1.1987399302318343e-05, "loss": 2.2834, "step": 44310 }, { "epoch": 0.567296, "grad_norm": 1.59375, "learning_rate": 1.198145253526474e-05, "loss": 2.2546, "step": 44320 }, { "epoch": 0.567424, "grad_norm": 1.5390625, "learning_rate": 1.1975506262629418e-05, "loss": 2.2637, "step": 44330 }, { "epoch": 0.567552, "grad_norm": 1.734375, "learning_rate": 1.1969560485386337e-05, "loss": 2.2781, "step": 44340 }, { "epoch": 0.56768, "grad_norm": 1.515625, "learning_rate": 1.1963615204509384e-05, "loss": 2.3032, "step": 44350 }, { "epoch": 0.567808, "grad_norm": 2.0, "learning_rate": 1.195767042097235e-05, "loss": 2.2997, "step": 44360 }, { "epoch": 0.567936, "grad_norm": 1.5078125, "learning_rate": 1.195172613574895e-05, "loss": 2.2854, "step": 44370 }, { "epoch": 0.568064, "grad_norm": 1.4765625, "learning_rate": 1.1945782349812826e-05, "loss": 2.254, "step": 44380 }, { "epoch": 0.568192, "grad_norm": 1.5078125, "learning_rate": 1.1939839064137533e-05, "loss": 2.2953, "step": 44390 }, { "epoch": 0.56832, "grad_norm": 1.3984375, "learning_rate": 1.1933896279696525e-05, "loss": 2.2997, "step": 44400 }, { "epoch": 0.568448, "grad_norm": 1.5, "learning_rate": 1.1927953997463212e-05, "loss": 2.2951, "step": 44410 }, { "epoch": 0.568576, "grad_norm": 1.53125, "learning_rate": 1.192201221841089e-05, "loss": 2.3121, "step": 44420 }, { "epoch": 0.568704, "grad_norm": 1.5390625, "learning_rate": 1.1916070943512786e-05, "loss": 2.3187, "step": 44430 }, { "epoch": 0.568832, "grad_norm": 1.5234375, "learning_rate": 1.1910130173742041e-05, "loss": 2.2842, "step": 44440 }, { "epoch": 0.56896, "grad_norm": 1.6953125, "learning_rate": 1.1904189910071713e-05, "loss": 2.2787, "step": 44450 }, { "epoch": 0.569088, "grad_norm": 1.6953125, "learning_rate": 1.1898250153474783e-05, "loss": 2.2591, "step": 44460 }, { "epoch": 0.569216, "grad_norm": 1.5859375, "learning_rate": 1.189231090492414e-05, "loss": 2.2857, "step": 44470 }, { "epoch": 0.569344, "grad_norm": 1.8125, "learning_rate": 1.1886372165392589e-05, "loss": 2.261, "step": 44480 }, { "epoch": 0.569472, "grad_norm": 2.328125, "learning_rate": 1.1880433935852869e-05, "loss": 2.2877, "step": 44490 }, { "epoch": 0.5696, "grad_norm": 1.6875, "learning_rate": 1.1874496217277616e-05, "loss": 2.2666, "step": 44500 }, { "epoch": 0.569728, "grad_norm": 2.140625, "learning_rate": 1.1868559010639387e-05, "loss": 2.2769, "step": 44510 }, { "epoch": 0.569856, "grad_norm": 1.46875, "learning_rate": 1.1862622316910663e-05, "loss": 2.2855, "step": 44520 }, { "epoch": 0.569984, "grad_norm": 1.7265625, "learning_rate": 1.1856686137063834e-05, "loss": 2.2723, "step": 44530 }, { "epoch": 0.570112, "grad_norm": 1.5078125, "learning_rate": 1.1850750472071202e-05, "loss": 2.2982, "step": 44540 }, { "epoch": 0.57024, "grad_norm": 1.5078125, "learning_rate": 1.1844815322904999e-05, "loss": 2.3247, "step": 44550 }, { "epoch": 0.570368, "grad_norm": 1.6015625, "learning_rate": 1.1838880690537352e-05, "loss": 2.3146, "step": 44560 }, { "epoch": 0.570496, "grad_norm": 2.359375, "learning_rate": 1.183294657594033e-05, "loss": 2.3062, "step": 44570 }, { "epoch": 0.570624, "grad_norm": 1.5546875, "learning_rate": 1.1827012980085892e-05, "loss": 2.2806, "step": 44580 }, { "epoch": 0.570752, "grad_norm": 1.65625, "learning_rate": 1.1821079903945918e-05, "loss": 2.2555, "step": 44590 }, { "epoch": 0.57088, "grad_norm": 1.3515625, "learning_rate": 1.1815147348492218e-05, "loss": 2.2948, "step": 44600 }, { "epoch": 0.571008, "grad_norm": 1.5234375, "learning_rate": 1.1809215314696499e-05, "loss": 2.2452, "step": 44610 }, { "epoch": 0.571136, "grad_norm": 1.53125, "learning_rate": 1.180328380353039e-05, "loss": 2.2571, "step": 44620 }, { "epoch": 0.571264, "grad_norm": 1.4765625, "learning_rate": 1.1797352815965436e-05, "loss": 2.2626, "step": 44630 }, { "epoch": 0.571392, "grad_norm": 1.6484375, "learning_rate": 1.1791422352973092e-05, "loss": 2.2859, "step": 44640 }, { "epoch": 0.57152, "grad_norm": 1.78125, "learning_rate": 1.1785492415524729e-05, "loss": 2.2548, "step": 44650 }, { "epoch": 0.571648, "grad_norm": 1.6796875, "learning_rate": 1.1779563004591632e-05, "loss": 2.2538, "step": 44660 }, { "epoch": 0.571776, "grad_norm": 1.7734375, "learning_rate": 1.1773634121145002e-05, "loss": 2.2399, "step": 44670 }, { "epoch": 0.571904, "grad_norm": 2.296875, "learning_rate": 1.176770576615594e-05, "loss": 2.2723, "step": 44680 }, { "epoch": 0.572032, "grad_norm": 2.15625, "learning_rate": 1.1761777940595494e-05, "loss": 2.2644, "step": 44690 }, { "epoch": 0.57216, "grad_norm": 1.4921875, "learning_rate": 1.1755850645434583e-05, "loss": 2.3148, "step": 44700 }, { "epoch": 0.572288, "grad_norm": 1.5234375, "learning_rate": 1.174992388164406e-05, "loss": 2.2768, "step": 44710 }, { "epoch": 0.572416, "grad_norm": 1.703125, "learning_rate": 1.1743997650194703e-05, "loss": 2.2621, "step": 44720 }, { "epoch": 0.572544, "grad_norm": 1.65625, "learning_rate": 1.1738071952057178e-05, "loss": 2.2928, "step": 44730 }, { "epoch": 0.572672, "grad_norm": 1.546875, "learning_rate": 1.1732146788202084e-05, "loss": 2.2657, "step": 44740 }, { "epoch": 0.5728, "grad_norm": 1.5859375, "learning_rate": 1.1726222159599921e-05, "loss": 2.2529, "step": 44750 }, { "epoch": 0.572928, "grad_norm": 4.21875, "learning_rate": 1.17202980672211e-05, "loss": 2.2623, "step": 44760 }, { "epoch": 0.573056, "grad_norm": 2.09375, "learning_rate": 1.1714374512035955e-05, "loss": 2.2909, "step": 44770 }, { "epoch": 0.573184, "grad_norm": 1.53125, "learning_rate": 1.1708451495014722e-05, "loss": 2.2552, "step": 44780 }, { "epoch": 0.573312, "grad_norm": 1.484375, "learning_rate": 1.1702529017127544e-05, "loss": 2.2425, "step": 44790 }, { "epoch": 0.57344, "grad_norm": 1.4765625, "learning_rate": 1.1696607079344505e-05, "loss": 2.2485, "step": 44800 }, { "epoch": 0.573568, "grad_norm": 1.46875, "learning_rate": 1.1690685682635565e-05, "loss": 2.2676, "step": 44810 }, { "epoch": 0.573696, "grad_norm": 2.109375, "learning_rate": 1.1684764827970605e-05, "loss": 2.2671, "step": 44820 }, { "epoch": 0.573824, "grad_norm": 1.6328125, "learning_rate": 1.1678844516319436e-05, "loss": 2.2938, "step": 44830 }, { "epoch": 0.573952, "grad_norm": 1.6171875, "learning_rate": 1.167292474865176e-05, "loss": 2.2768, "step": 44840 }, { "epoch": 0.57408, "grad_norm": 1.6328125, "learning_rate": 1.1667005525937191e-05, "loss": 2.2729, "step": 44850 }, { "epoch": 0.574208, "grad_norm": 1.4296875, "learning_rate": 1.1661086849145267e-05, "loss": 2.2616, "step": 44860 }, { "epoch": 0.574336, "grad_norm": 1.6484375, "learning_rate": 1.1655168719245424e-05, "loss": 2.2747, "step": 44870 }, { "epoch": 0.574464, "grad_norm": 1.5234375, "learning_rate": 1.1649251137207016e-05, "loss": 2.3021, "step": 44880 }, { "epoch": 0.574592, "grad_norm": 1.484375, "learning_rate": 1.1643334103999302e-05, "loss": 2.2964, "step": 44890 }, { "epoch": 0.57472, "grad_norm": 2.09375, "learning_rate": 1.1637417620591448e-05, "loss": 2.2785, "step": 44900 }, { "epoch": 0.574848, "grad_norm": 1.609375, "learning_rate": 1.1631501687952546e-05, "loss": 2.2581, "step": 44910 }, { "epoch": 0.574976, "grad_norm": 1.609375, "learning_rate": 1.1625586307051584e-05, "loss": 2.3053, "step": 44920 }, { "epoch": 0.575104, "grad_norm": 1.4765625, "learning_rate": 1.1619671478857453e-05, "loss": 2.3177, "step": 44930 }, { "epoch": 0.575232, "grad_norm": 4.78125, "learning_rate": 1.1613757204338975e-05, "loss": 2.3171, "step": 44940 }, { "epoch": 0.57536, "grad_norm": 1.5078125, "learning_rate": 1.1607843484464865e-05, "loss": 2.3015, "step": 44950 }, { "epoch": 0.575488, "grad_norm": 5.46875, "learning_rate": 1.1601930320203748e-05, "loss": 2.3087, "step": 44960 }, { "epoch": 0.575616, "grad_norm": 1.4921875, "learning_rate": 1.159601771252417e-05, "loss": 2.2725, "step": 44970 }, { "epoch": 0.575744, "grad_norm": 1.71875, "learning_rate": 1.159010566239457e-05, "loss": 2.3056, "step": 44980 }, { "epoch": 0.575872, "grad_norm": 1.7265625, "learning_rate": 1.1584194170783303e-05, "loss": 2.3002, "step": 44990 }, { "epoch": 0.576, "grad_norm": 1.5234375, "learning_rate": 1.1578283238658637e-05, "loss": 2.298, "step": 45000 }, { "epoch": 0.576128, "grad_norm": 1.609375, "learning_rate": 1.1572372866988742e-05, "loss": 2.284, "step": 45010 }, { "epoch": 0.576256, "grad_norm": 1.53125, "learning_rate": 1.1566463056741689e-05, "loss": 2.2833, "step": 45020 }, { "epoch": 0.576384, "grad_norm": 1.6484375, "learning_rate": 1.1560553808885489e-05, "loss": 2.2857, "step": 45030 }, { "epoch": 0.576512, "grad_norm": 1.71875, "learning_rate": 1.1554645124388012e-05, "loss": 2.2614, "step": 45040 }, { "epoch": 0.57664, "grad_norm": 1.6484375, "learning_rate": 1.1548737004217078e-05, "loss": 2.2635, "step": 45050 }, { "epoch": 0.576768, "grad_norm": 1.5078125, "learning_rate": 1.1542829449340399e-05, "loss": 2.2679, "step": 45060 }, { "epoch": 0.576896, "grad_norm": 1.609375, "learning_rate": 1.1536922460725584e-05, "loss": 2.2829, "step": 45070 }, { "epoch": 0.577024, "grad_norm": 1.4453125, "learning_rate": 1.1531016039340165e-05, "loss": 2.294, "step": 45080 }, { "epoch": 0.577152, "grad_norm": 1.625, "learning_rate": 1.1525110186151577e-05, "loss": 2.2924, "step": 45090 }, { "epoch": 0.57728, "grad_norm": 5.8125, "learning_rate": 1.151920490212715e-05, "loss": 2.2761, "step": 45100 }, { "epoch": 0.577408, "grad_norm": 1.59375, "learning_rate": 1.1513300188234145e-05, "loss": 2.2986, "step": 45110 }, { "epoch": 0.577536, "grad_norm": 1.5390625, "learning_rate": 1.1507396045439706e-05, "loss": 2.2643, "step": 45120 }, { "epoch": 0.577664, "grad_norm": 1.4140625, "learning_rate": 1.1501492474710889e-05, "loss": 2.2903, "step": 45130 }, { "epoch": 0.577792, "grad_norm": 1.546875, "learning_rate": 1.1495589477014674e-05, "loss": 2.275, "step": 45140 }, { "epoch": 0.57792, "grad_norm": 1.6328125, "learning_rate": 1.1489687053317928e-05, "loss": 2.2384, "step": 45150 }, { "epoch": 0.578048, "grad_norm": 1.71875, "learning_rate": 1.1483785204587416e-05, "loss": 2.3324, "step": 45160 }, { "epoch": 0.578176, "grad_norm": 1.625, "learning_rate": 1.1477883931789841e-05, "loss": 2.3044, "step": 45170 }, { "epoch": 0.578304, "grad_norm": 5.90625, "learning_rate": 1.147198323589178e-05, "loss": 2.275, "step": 45180 }, { "epoch": 0.578432, "grad_norm": 1.46875, "learning_rate": 1.1466083117859739e-05, "loss": 2.3061, "step": 45190 }, { "epoch": 0.57856, "grad_norm": 1.9453125, "learning_rate": 1.1460183578660112e-05, "loss": 2.2636, "step": 45200 }, { "epoch": 0.578688, "grad_norm": 1.5390625, "learning_rate": 1.1454284619259202e-05, "loss": 2.27, "step": 45210 }, { "epoch": 0.578816, "grad_norm": 1.7265625, "learning_rate": 1.1448386240623226e-05, "loss": 2.2723, "step": 45220 }, { "epoch": 0.578944, "grad_norm": 1.515625, "learning_rate": 1.1442488443718297e-05, "loss": 2.2735, "step": 45230 }, { "epoch": 0.579072, "grad_norm": 1.5390625, "learning_rate": 1.1436591229510432e-05, "loss": 2.2539, "step": 45240 }, { "epoch": 0.5792, "grad_norm": 1.6640625, "learning_rate": 1.1430694598965563e-05, "loss": 2.2665, "step": 45250 }, { "epoch": 0.579328, "grad_norm": 1.5234375, "learning_rate": 1.1424798553049518e-05, "loss": 2.2756, "step": 45260 }, { "epoch": 0.579456, "grad_norm": 1.5, "learning_rate": 1.1418903092728026e-05, "loss": 2.3014, "step": 45270 }, { "epoch": 0.579584, "grad_norm": 1.640625, "learning_rate": 1.141300821896673e-05, "loss": 2.2626, "step": 45280 }, { "epoch": 0.579712, "grad_norm": 1.4296875, "learning_rate": 1.1407113932731169e-05, "loss": 2.2468, "step": 45290 }, { "epoch": 0.57984, "grad_norm": 2.5625, "learning_rate": 1.1401220234986784e-05, "loss": 2.287, "step": 45300 }, { "epoch": 0.579968, "grad_norm": 1.578125, "learning_rate": 1.1395327126698935e-05, "loss": 2.3044, "step": 45310 }, { "epoch": 0.580096, "grad_norm": 1.53125, "learning_rate": 1.1389434608832867e-05, "loss": 2.2851, "step": 45320 }, { "epoch": 0.580224, "grad_norm": 1.59375, "learning_rate": 1.1383542682353734e-05, "loss": 2.2585, "step": 45330 }, { "epoch": 0.580352, "grad_norm": 1.515625, "learning_rate": 1.13776513482266e-05, "loss": 2.2924, "step": 45340 }, { "epoch": 0.58048, "grad_norm": 1.6796875, "learning_rate": 1.137176060741642e-05, "loss": 2.298, "step": 45350 }, { "epoch": 0.580608, "grad_norm": 1.515625, "learning_rate": 1.1365870460888069e-05, "loss": 2.3004, "step": 45360 }, { "epoch": 0.580736, "grad_norm": 1.75, "learning_rate": 1.1359980909606308e-05, "loss": 2.2974, "step": 45370 }, { "epoch": 0.580864, "grad_norm": 1.5390625, "learning_rate": 1.1354091954535806e-05, "loss": 2.2776, "step": 45380 }, { "epoch": 0.580992, "grad_norm": 1.5859375, "learning_rate": 1.134820359664114e-05, "loss": 2.2793, "step": 45390 }, { "epoch": 0.58112, "grad_norm": 1.5859375, "learning_rate": 1.1342315836886783e-05, "loss": 2.2776, "step": 45400 }, { "epoch": 0.581248, "grad_norm": 1.5390625, "learning_rate": 1.1336428676237106e-05, "loss": 2.2622, "step": 45410 }, { "epoch": 0.581376, "grad_norm": 1.484375, "learning_rate": 1.1330542115656392e-05, "loss": 2.2755, "step": 45420 }, { "epoch": 0.581504, "grad_norm": 1.6875, "learning_rate": 1.1324656156108823e-05, "loss": 2.2933, "step": 45430 }, { "epoch": 0.581632, "grad_norm": 1.703125, "learning_rate": 1.131877079855847e-05, "loss": 2.2592, "step": 45440 }, { "epoch": 0.58176, "grad_norm": 1.4453125, "learning_rate": 1.1312886043969334e-05, "loss": 2.2702, "step": 45450 }, { "epoch": 0.581888, "grad_norm": 1.6875, "learning_rate": 1.1307001893305286e-05, "loss": 2.2742, "step": 45460 }, { "epoch": 0.582016, "grad_norm": 1.53125, "learning_rate": 1.1301118347530107e-05, "loss": 2.2893, "step": 45470 }, { "epoch": 0.582144, "grad_norm": 1.671875, "learning_rate": 1.12952354076075e-05, "loss": 2.2559, "step": 45480 }, { "epoch": 0.582272, "grad_norm": 1.4921875, "learning_rate": 1.1289353074501036e-05, "loss": 2.3008, "step": 45490 }, { "epoch": 0.5824, "grad_norm": 1.6015625, "learning_rate": 1.1283471349174213e-05, "loss": 2.2989, "step": 45500 }, { "epoch": 0.582528, "grad_norm": 1.5546875, "learning_rate": 1.1277590232590415e-05, "loss": 2.2761, "step": 45510 }, { "epoch": 0.582656, "grad_norm": 1.8125, "learning_rate": 1.1271709725712929e-05, "loss": 2.2434, "step": 45520 }, { "epoch": 0.582784, "grad_norm": 1.6328125, "learning_rate": 1.1265829829504946e-05, "loss": 2.2674, "step": 45530 }, { "epoch": 0.582912, "grad_norm": 1.578125, "learning_rate": 1.1259950544929555e-05, "loss": 2.2796, "step": 45540 }, { "epoch": 0.58304, "grad_norm": 1.4921875, "learning_rate": 1.1254071872949737e-05, "loss": 2.2968, "step": 45550 }, { "epoch": 0.583168, "grad_norm": 1.5390625, "learning_rate": 1.1248193814528394e-05, "loss": 2.2681, "step": 45560 }, { "epoch": 0.583296, "grad_norm": 1.5, "learning_rate": 1.1242316370628303e-05, "loss": 2.2633, "step": 45570 }, { "epoch": 0.583424, "grad_norm": 1.7109375, "learning_rate": 1.1236439542212144e-05, "loss": 2.3047, "step": 45580 }, { "epoch": 0.583552, "grad_norm": 1.484375, "learning_rate": 1.123056333024252e-05, "loss": 2.2846, "step": 45590 }, { "epoch": 0.58368, "grad_norm": 4.25, "learning_rate": 1.1224687735681907e-05, "loss": 2.2794, "step": 45600 }, { "epoch": 0.583808, "grad_norm": 1.640625, "learning_rate": 1.1218812759492688e-05, "loss": 2.3107, "step": 45610 }, { "epoch": 0.583936, "grad_norm": 1.578125, "learning_rate": 1.1212938402637148e-05, "loss": 2.3119, "step": 45620 }, { "epoch": 0.584064, "grad_norm": 1.6953125, "learning_rate": 1.1207064666077472e-05, "loss": 2.2437, "step": 45630 }, { "epoch": 0.584192, "grad_norm": 1.5859375, "learning_rate": 1.1201191550775726e-05, "loss": 2.2676, "step": 45640 }, { "epoch": 0.58432, "grad_norm": 1.6875, "learning_rate": 1.1195319057693902e-05, "loss": 2.2762, "step": 45650 }, { "epoch": 0.584448, "grad_norm": 1.5, "learning_rate": 1.1189447187793866e-05, "loss": 2.2873, "step": 45660 }, { "epoch": 0.584576, "grad_norm": 1.546875, "learning_rate": 1.1183575942037402e-05, "loss": 2.2854, "step": 45670 }, { "epoch": 0.584704, "grad_norm": 1.5859375, "learning_rate": 1.1177705321386177e-05, "loss": 2.2691, "step": 45680 }, { "epoch": 0.584832, "grad_norm": 1.6484375, "learning_rate": 1.1171835326801751e-05, "loss": 2.3015, "step": 45690 }, { "epoch": 0.58496, "grad_norm": 1.6484375, "learning_rate": 1.1165965959245606e-05, "loss": 2.2914, "step": 45700 }, { "epoch": 0.585088, "grad_norm": 1.5, "learning_rate": 1.1160097219679098e-05, "loss": 2.2801, "step": 45710 }, { "epoch": 0.585216, "grad_norm": 1.5625, "learning_rate": 1.1154229109063485e-05, "loss": 2.2763, "step": 45720 }, { "epoch": 0.585344, "grad_norm": 1.484375, "learning_rate": 1.1148361628359931e-05, "loss": 2.2554, "step": 45730 }, { "epoch": 0.585472, "grad_norm": 1.78125, "learning_rate": 1.114249477852949e-05, "loss": 2.2844, "step": 45740 }, { "epoch": 0.5856, "grad_norm": 1.578125, "learning_rate": 1.1136628560533109e-05, "loss": 2.2778, "step": 45750 }, { "epoch": 0.585728, "grad_norm": 1.5078125, "learning_rate": 1.1130762975331643e-05, "loss": 2.2886, "step": 45760 }, { "epoch": 0.585856, "grad_norm": 1.78125, "learning_rate": 1.1124898023885831e-05, "loss": 2.3046, "step": 45770 }, { "epoch": 0.585984, "grad_norm": 1.984375, "learning_rate": 1.1119033707156312e-05, "loss": 2.2561, "step": 45780 }, { "epoch": 0.586112, "grad_norm": 1.6171875, "learning_rate": 1.1113170026103631e-05, "loss": 2.2854, "step": 45790 }, { "epoch": 0.58624, "grad_norm": 1.578125, "learning_rate": 1.1107306981688207e-05, "loss": 2.292, "step": 45800 }, { "epoch": 0.586368, "grad_norm": 1.5859375, "learning_rate": 1.1101444574870384e-05, "loss": 2.2566, "step": 45810 }, { "epoch": 0.586496, "grad_norm": 1.46875, "learning_rate": 1.1095582806610377e-05, "loss": 2.3148, "step": 45820 }, { "epoch": 0.586624, "grad_norm": 3.328125, "learning_rate": 1.1089721677868307e-05, "loss": 2.2915, "step": 45830 }, { "epoch": 0.586752, "grad_norm": 1.5703125, "learning_rate": 1.1083861189604187e-05, "loss": 2.2784, "step": 45840 }, { "epoch": 0.58688, "grad_norm": 1.4375, "learning_rate": 1.1078001342777927e-05, "loss": 2.2762, "step": 45850 }, { "epoch": 0.587008, "grad_norm": 1.53125, "learning_rate": 1.1072142138349334e-05, "loss": 2.296, "step": 45860 }, { "epoch": 0.587136, "grad_norm": 1.625, "learning_rate": 1.1066283577278103e-05, "loss": 2.2569, "step": 45870 }, { "epoch": 0.587264, "grad_norm": 1.6015625, "learning_rate": 1.1060425660523833e-05, "loss": 2.2911, "step": 45880 }, { "epoch": 0.587392, "grad_norm": 1.5390625, "learning_rate": 1.1054568389046005e-05, "loss": 2.2837, "step": 45890 }, { "epoch": 0.58752, "grad_norm": 1.625, "learning_rate": 1.104871176380401e-05, "loss": 2.2929, "step": 45900 }, { "epoch": 0.587648, "grad_norm": 1.5625, "learning_rate": 1.1042855785757124e-05, "loss": 2.3286, "step": 45910 }, { "epoch": 0.587776, "grad_norm": 1.5390625, "learning_rate": 1.1037000455864507e-05, "loss": 2.2811, "step": 45920 }, { "epoch": 0.587904, "grad_norm": 1.8203125, "learning_rate": 1.1031145775085237e-05, "loss": 2.2803, "step": 45930 }, { "epoch": 0.588032, "grad_norm": 2.328125, "learning_rate": 1.1025291744378266e-05, "loss": 2.3125, "step": 45940 }, { "epoch": 0.58816, "grad_norm": 1.5, "learning_rate": 1.1019438364702443e-05, "loss": 2.3072, "step": 45950 }, { "epoch": 0.588288, "grad_norm": 1.5234375, "learning_rate": 1.1013585637016523e-05, "loss": 2.2594, "step": 45960 }, { "epoch": 0.588416, "grad_norm": 1.6015625, "learning_rate": 1.1007733562279134e-05, "loss": 2.2967, "step": 45970 }, { "epoch": 0.588544, "grad_norm": 1.671875, "learning_rate": 1.1001882141448813e-05, "loss": 2.3208, "step": 45980 }, { "epoch": 0.588672, "grad_norm": 1.796875, "learning_rate": 1.0996031375483986e-05, "loss": 2.2749, "step": 45990 }, { "epoch": 0.5888, "grad_norm": 1.609375, "learning_rate": 1.0990181265342957e-05, "loss": 2.3066, "step": 46000 }, { "epoch": 0.588928, "grad_norm": 1.625, "learning_rate": 1.0984331811983956e-05, "loss": 2.2774, "step": 46010 }, { "epoch": 0.589056, "grad_norm": 1.8046875, "learning_rate": 1.0978483016365075e-05, "loss": 2.2922, "step": 46020 }, { "epoch": 0.589184, "grad_norm": 1.609375, "learning_rate": 1.0972634879444304e-05, "loss": 2.2766, "step": 46030 }, { "epoch": 0.589312, "grad_norm": 1.6796875, "learning_rate": 1.0966787402179537e-05, "loss": 2.2965, "step": 46040 }, { "epoch": 0.58944, "grad_norm": 1.5859375, "learning_rate": 1.096094058552855e-05, "loss": 2.2881, "step": 46050 }, { "epoch": 0.589568, "grad_norm": 1.6484375, "learning_rate": 1.0955094430449008e-05, "loss": 2.2778, "step": 46060 }, { "epoch": 0.589696, "grad_norm": 1.3984375, "learning_rate": 1.0949248937898481e-05, "loss": 2.2791, "step": 46070 }, { "epoch": 0.589824, "grad_norm": 1.578125, "learning_rate": 1.094340410883442e-05, "loss": 2.2705, "step": 46080 }, { "epoch": 0.589952, "grad_norm": 1.7109375, "learning_rate": 1.0937559944214165e-05, "loss": 2.2887, "step": 46090 }, { "epoch": 0.59008, "grad_norm": 1.5703125, "learning_rate": 1.0931716444994959e-05, "loss": 2.268, "step": 46100 }, { "epoch": 0.590208, "grad_norm": 1.5234375, "learning_rate": 1.0925873612133917e-05, "loss": 2.2686, "step": 46110 }, { "epoch": 0.590336, "grad_norm": 1.484375, "learning_rate": 1.0920031446588074e-05, "loss": 2.3118, "step": 46120 }, { "epoch": 0.590464, "grad_norm": 1.5078125, "learning_rate": 1.0914189949314327e-05, "loss": 2.2726, "step": 46130 }, { "epoch": 0.590592, "grad_norm": 1.5546875, "learning_rate": 1.0908349121269474e-05, "loss": 2.2627, "step": 46140 }, { "epoch": 0.59072, "grad_norm": 1.59375, "learning_rate": 1.0902508963410212e-05, "loss": 2.2605, "step": 46150 }, { "epoch": 0.590848, "grad_norm": 1.671875, "learning_rate": 1.0896669476693114e-05, "loss": 2.2809, "step": 46160 }, { "epoch": 0.590976, "grad_norm": 1.578125, "learning_rate": 1.0890830662074651e-05, "loss": 2.295, "step": 46170 }, { "epoch": 0.591104, "grad_norm": 1.5703125, "learning_rate": 1.0884992520511185e-05, "loss": 2.2951, "step": 46180 }, { "epoch": 0.591232, "grad_norm": 1.59375, "learning_rate": 1.0879155052958967e-05, "loss": 2.2975, "step": 46190 }, { "epoch": 0.59136, "grad_norm": 1.625, "learning_rate": 1.0873318260374124e-05, "loss": 2.2651, "step": 46200 }, { "epoch": 0.591488, "grad_norm": 1.4609375, "learning_rate": 1.08674821437127e-05, "loss": 2.2761, "step": 46210 }, { "epoch": 0.591616, "grad_norm": 1.578125, "learning_rate": 1.0861646703930605e-05, "loss": 2.2745, "step": 46220 }, { "epoch": 0.591744, "grad_norm": 1.6953125, "learning_rate": 1.0855811941983639e-05, "loss": 2.2759, "step": 46230 }, { "epoch": 0.591872, "grad_norm": 1.6796875, "learning_rate": 1.0849977858827512e-05, "loss": 2.2918, "step": 46240 }, { "epoch": 0.592, "grad_norm": 1.53125, "learning_rate": 1.0844144455417803e-05, "loss": 2.2872, "step": 46250 }, { "epoch": 0.592128, "grad_norm": 1.5859375, "learning_rate": 1.0838311732709979e-05, "loss": 2.284, "step": 46260 }, { "epoch": 0.592256, "grad_norm": 1.546875, "learning_rate": 1.0832479691659411e-05, "loss": 2.3334, "step": 46270 }, { "epoch": 0.592384, "grad_norm": 1.609375, "learning_rate": 1.0826648333221339e-05, "loss": 2.2826, "step": 46280 }, { "epoch": 0.592512, "grad_norm": 1.4453125, "learning_rate": 1.0820817658350913e-05, "loss": 2.2843, "step": 46290 }, { "epoch": 0.59264, "grad_norm": 3.1875, "learning_rate": 1.081498766800315e-05, "loss": 2.2807, "step": 46300 }, { "epoch": 0.592768, "grad_norm": 1.6015625, "learning_rate": 1.0809158363132967e-05, "loss": 2.2897, "step": 46310 }, { "epoch": 0.592896, "grad_norm": 1.6484375, "learning_rate": 1.0803329744695172e-05, "loss": 2.2751, "step": 46320 }, { "epoch": 0.593024, "grad_norm": 1.578125, "learning_rate": 1.0797501813644446e-05, "loss": 2.2955, "step": 46330 }, { "epoch": 0.593152, "grad_norm": 1.484375, "learning_rate": 1.0791674570935361e-05, "loss": 2.2532, "step": 46340 }, { "epoch": 0.59328, "grad_norm": 1.609375, "learning_rate": 1.0785848017522398e-05, "loss": 2.2559, "step": 46350 }, { "epoch": 0.593408, "grad_norm": 1.546875, "learning_rate": 1.0780022154359898e-05, "loss": 2.2943, "step": 46360 }, { "epoch": 0.593536, "grad_norm": 1.6171875, "learning_rate": 1.0774196982402097e-05, "loss": 2.3074, "step": 46370 }, { "epoch": 0.593664, "grad_norm": 1.640625, "learning_rate": 1.0768372502603127e-05, "loss": 2.2869, "step": 46380 }, { "epoch": 0.593792, "grad_norm": 1.578125, "learning_rate": 1.0762548715916996e-05, "loss": 2.3014, "step": 46390 }, { "epoch": 0.59392, "grad_norm": 1.6328125, "learning_rate": 1.07567256232976e-05, "loss": 2.283, "step": 46400 }, { "epoch": 0.594048, "grad_norm": 1.59375, "learning_rate": 1.0750903225698726e-05, "loss": 2.276, "step": 46410 }, { "epoch": 0.594176, "grad_norm": 1.6328125, "learning_rate": 1.074508152407404e-05, "loss": 2.2491, "step": 46420 }, { "epoch": 0.594304, "grad_norm": 1.4921875, "learning_rate": 1.0739260519377108e-05, "loss": 2.2683, "step": 46430 }, { "epoch": 0.594432, "grad_norm": 2.1875, "learning_rate": 1.0733440212561369e-05, "loss": 2.2596, "step": 46440 }, { "epoch": 0.59456, "grad_norm": 1.5703125, "learning_rate": 1.072762060458014e-05, "loss": 2.2736, "step": 46450 }, { "epoch": 0.594688, "grad_norm": 1.578125, "learning_rate": 1.0721801696386651e-05, "loss": 2.2782, "step": 46460 }, { "epoch": 0.594816, "grad_norm": 1.6875, "learning_rate": 1.0715983488933993e-05, "loss": 2.2757, "step": 46470 }, { "epoch": 0.594944, "grad_norm": 1.7421875, "learning_rate": 1.0710165983175147e-05, "loss": 2.2996, "step": 46480 }, { "epoch": 0.595072, "grad_norm": 1.640625, "learning_rate": 1.0704349180062992e-05, "loss": 2.2608, "step": 46490 }, { "epoch": 0.5952, "grad_norm": 1.4375, "learning_rate": 1.0698533080550273e-05, "loss": 2.3108, "step": 46500 }, { "epoch": 0.595328, "grad_norm": 1.5625, "learning_rate": 1.0692717685589633e-05, "loss": 2.2713, "step": 46510 }, { "epoch": 0.595456, "grad_norm": 1.4296875, "learning_rate": 1.0686902996133597e-05, "loss": 2.2885, "step": 46520 }, { "epoch": 0.595584, "grad_norm": 1.671875, "learning_rate": 1.0681089013134567e-05, "loss": 2.2791, "step": 46530 }, { "epoch": 0.595712, "grad_norm": 1.7421875, "learning_rate": 1.0675275737544837e-05, "loss": 2.2699, "step": 46540 }, { "epoch": 0.59584, "grad_norm": 1.5859375, "learning_rate": 1.0669463170316595e-05, "loss": 2.2624, "step": 46550 }, { "epoch": 0.595968, "grad_norm": 1.625, "learning_rate": 1.0663651312401887e-05, "loss": 2.2883, "step": 46560 }, { "epoch": 0.596096, "grad_norm": 1.5703125, "learning_rate": 1.0657840164752658e-05, "loss": 2.2878, "step": 46570 }, { "epoch": 0.596224, "grad_norm": 1.875, "learning_rate": 1.0652029728320744e-05, "loss": 2.2917, "step": 46580 }, { "epoch": 0.596352, "grad_norm": 1.6875, "learning_rate": 1.0646220004057851e-05, "loss": 2.262, "step": 46590 }, { "epoch": 0.59648, "grad_norm": 1.6953125, "learning_rate": 1.064041099291558e-05, "loss": 2.283, "step": 46600 }, { "epoch": 0.596608, "grad_norm": 1.515625, "learning_rate": 1.0634602695845403e-05, "loss": 2.2943, "step": 46610 }, { "epoch": 0.596736, "grad_norm": 1.6484375, "learning_rate": 1.0628795113798679e-05, "loss": 2.2686, "step": 46620 }, { "epoch": 0.596864, "grad_norm": 1.578125, "learning_rate": 1.0622988247726658e-05, "loss": 2.284, "step": 46630 }, { "epoch": 0.596992, "grad_norm": 1.4375, "learning_rate": 1.0617182098580464e-05, "loss": 2.2472, "step": 46640 }, { "epoch": 0.59712, "grad_norm": 1.4921875, "learning_rate": 1.0611376667311098e-05, "loss": 2.2685, "step": 46650 }, { "epoch": 0.597248, "grad_norm": 1.5703125, "learning_rate": 1.0605571954869467e-05, "loss": 2.2683, "step": 46660 }, { "epoch": 0.597376, "grad_norm": 2.234375, "learning_rate": 1.0599767962206342e-05, "loss": 2.2715, "step": 46670 }, { "epoch": 0.597504, "grad_norm": 1.671875, "learning_rate": 1.0593964690272362e-05, "loss": 2.2574, "step": 46680 }, { "epoch": 0.597632, "grad_norm": 1.578125, "learning_rate": 1.0588162140018086e-05, "loss": 2.2713, "step": 46690 }, { "epoch": 0.59776, "grad_norm": 1.4453125, "learning_rate": 1.0582360312393921e-05, "loss": 2.3008, "step": 46700 }, { "epoch": 0.597888, "grad_norm": 1.5234375, "learning_rate": 1.0576559208350174e-05, "loss": 2.2833, "step": 46710 }, { "epoch": 0.598016, "grad_norm": 1.6640625, "learning_rate": 1.0570758828837025e-05, "loss": 2.2911, "step": 46720 }, { "epoch": 0.598144, "grad_norm": 1.6015625, "learning_rate": 1.0564959174804538e-05, "loss": 2.3149, "step": 46730 }, { "epoch": 0.598272, "grad_norm": 1.65625, "learning_rate": 1.0559160247202662e-05, "loss": 2.3018, "step": 46740 }, { "epoch": 0.5984, "grad_norm": 1.578125, "learning_rate": 1.055336204698122e-05, "loss": 2.2601, "step": 46750 }, { "epoch": 0.598528, "grad_norm": 1.578125, "learning_rate": 1.0547564575089915e-05, "loss": 2.2582, "step": 46760 }, { "epoch": 0.598656, "grad_norm": 1.65625, "learning_rate": 1.0541767832478344e-05, "loss": 2.2441, "step": 46770 }, { "epoch": 0.598784, "grad_norm": 1.484375, "learning_rate": 1.0535971820095974e-05, "loss": 2.3209, "step": 46780 }, { "epoch": 0.598912, "grad_norm": 1.515625, "learning_rate": 1.0530176538892147e-05, "loss": 2.2738, "step": 46790 }, { "epoch": 0.59904, "grad_norm": 1.75, "learning_rate": 1.0524381989816097e-05, "loss": 2.2629, "step": 46800 }, { "epoch": 0.599168, "grad_norm": 2.0625, "learning_rate": 1.0518588173816934e-05, "loss": 2.2772, "step": 46810 }, { "epoch": 0.599296, "grad_norm": 1.5625, "learning_rate": 1.0512795091843642e-05, "loss": 2.2591, "step": 46820 }, { "epoch": 0.599424, "grad_norm": 1.515625, "learning_rate": 1.0507002744845097e-05, "loss": 2.2839, "step": 46830 }, { "epoch": 0.599552, "grad_norm": 1.5546875, "learning_rate": 1.0501211133770042e-05, "loss": 2.2951, "step": 46840 }, { "epoch": 0.59968, "grad_norm": 1.609375, "learning_rate": 1.0495420259567103e-05, "loss": 2.2813, "step": 46850 }, { "epoch": 0.599808, "grad_norm": 16.875, "learning_rate": 1.0489630123184795e-05, "loss": 2.3064, "step": 46860 }, { "epoch": 0.599936, "grad_norm": 1.609375, "learning_rate": 1.0483840725571501e-05, "loss": 2.296, "step": 46870 }, { "epoch": 0.600064, "grad_norm": 1.609375, "learning_rate": 1.0478052067675479e-05, "loss": 2.2759, "step": 46880 }, { "epoch": 0.600192, "grad_norm": 1.515625, "learning_rate": 1.0472264150444884e-05, "loss": 2.2861, "step": 46890 }, { "epoch": 0.60032, "grad_norm": 1.6171875, "learning_rate": 1.0466476974827733e-05, "loss": 2.3242, "step": 46900 }, { "epoch": 0.600448, "grad_norm": 1.5, "learning_rate": 1.0460690541771932e-05, "loss": 2.3154, "step": 46910 }, { "epoch": 0.600576, "grad_norm": 1.578125, "learning_rate": 1.0454904852225257e-05, "loss": 2.2998, "step": 46920 }, { "epoch": 0.600704, "grad_norm": 1.5078125, "learning_rate": 1.0449119907135364e-05, "loss": 2.3079, "step": 46930 }, { "epoch": 0.600832, "grad_norm": 1.6640625, "learning_rate": 1.0443335707449794e-05, "loss": 2.2654, "step": 46940 }, { "epoch": 0.60096, "grad_norm": 1.484375, "learning_rate": 1.0437552254115963e-05, "loss": 2.3032, "step": 46950 }, { "epoch": 0.601088, "grad_norm": 1.5078125, "learning_rate": 1.043176954808115e-05, "loss": 2.2602, "step": 46960 }, { "epoch": 0.601216, "grad_norm": 1.6015625, "learning_rate": 1.042598759029254e-05, "loss": 2.2996, "step": 46970 }, { "epoch": 0.601344, "grad_norm": 1.5625, "learning_rate": 1.0420206381697172e-05, "loss": 2.2637, "step": 46980 }, { "epoch": 0.601472, "grad_norm": 2.03125, "learning_rate": 1.0414425923241965e-05, "loss": 2.3088, "step": 46990 }, { "epoch": 0.6016, "grad_norm": 1.7578125, "learning_rate": 1.0408646215873732e-05, "loss": 2.2536, "step": 47000 }, { "epoch": 0.601728, "grad_norm": 1.5390625, "learning_rate": 1.0402867260539143e-05, "loss": 2.2476, "step": 47010 }, { "epoch": 0.601856, "grad_norm": 1.546875, "learning_rate": 1.0397089058184754e-05, "loss": 2.3042, "step": 47020 }, { "epoch": 0.601984, "grad_norm": 1.59375, "learning_rate": 1.0391311609757e-05, "loss": 2.2573, "step": 47030 }, { "epoch": 0.602112, "grad_norm": 1.640625, "learning_rate": 1.0385534916202185e-05, "loss": 2.3219, "step": 47040 }, { "epoch": 0.60224, "grad_norm": 1.5703125, "learning_rate": 1.0379758978466498e-05, "loss": 2.2879, "step": 47050 }, { "epoch": 0.602368, "grad_norm": 1.4296875, "learning_rate": 1.0373983797495995e-05, "loss": 2.2779, "step": 47060 }, { "epoch": 0.602496, "grad_norm": 1.625, "learning_rate": 1.0368209374236613e-05, "loss": 2.2738, "step": 47070 }, { "epoch": 0.602624, "grad_norm": 1.59375, "learning_rate": 1.0362435709634174e-05, "loss": 2.2493, "step": 47080 }, { "epoch": 0.602752, "grad_norm": 2.09375, "learning_rate": 1.0356662804634355e-05, "loss": 2.2732, "step": 47090 }, { "epoch": 0.60288, "grad_norm": 1.6953125, "learning_rate": 1.035089066018272e-05, "loss": 2.2856, "step": 47100 }, { "epoch": 0.603008, "grad_norm": 2.078125, "learning_rate": 1.0345119277224716e-05, "loss": 2.2858, "step": 47110 }, { "epoch": 0.603136, "grad_norm": 1.65625, "learning_rate": 1.0339348656705656e-05, "loss": 2.2819, "step": 47120 }, { "epoch": 0.603264, "grad_norm": 2.125, "learning_rate": 1.0333578799570724e-05, "loss": 2.2767, "step": 47130 }, { "epoch": 0.603392, "grad_norm": 1.5390625, "learning_rate": 1.0327809706764992e-05, "loss": 2.2427, "step": 47140 }, { "epoch": 0.60352, "grad_norm": 1.5546875, "learning_rate": 1.0322041379233397e-05, "loss": 2.2457, "step": 47150 }, { "epoch": 0.603648, "grad_norm": 1.4609375, "learning_rate": 1.0316273817920751e-05, "loss": 2.2563, "step": 47160 }, { "epoch": 0.603776, "grad_norm": 1.59375, "learning_rate": 1.0310507023771744e-05, "loss": 2.2964, "step": 47170 }, { "epoch": 0.603904, "grad_norm": 1.8828125, "learning_rate": 1.0304740997730941e-05, "loss": 2.2793, "step": 47180 }, { "epoch": 0.604032, "grad_norm": 1.890625, "learning_rate": 1.0298975740742771e-05, "loss": 2.261, "step": 47190 }, { "epoch": 0.60416, "grad_norm": 1.4921875, "learning_rate": 1.029321125375156e-05, "loss": 2.2887, "step": 47200 }, { "epoch": 0.604288, "grad_norm": 1.6640625, "learning_rate": 1.028744753770148e-05, "loss": 2.2729, "step": 47210 }, { "epoch": 0.604416, "grad_norm": 1.4453125, "learning_rate": 1.0281684593536596e-05, "loss": 2.2962, "step": 47220 }, { "epoch": 0.604544, "grad_norm": 1.5234375, "learning_rate": 1.0275922422200843e-05, "loss": 2.2721, "step": 47230 }, { "epoch": 0.604672, "grad_norm": 1.6171875, "learning_rate": 1.027016102463802e-05, "loss": 2.266, "step": 47240 }, { "epoch": 0.6048, "grad_norm": 1.4921875, "learning_rate": 1.0264400401791811e-05, "loss": 2.2942, "step": 47250 }, { "epoch": 0.604928, "grad_norm": 1.5390625, "learning_rate": 1.0258640554605767e-05, "loss": 2.265, "step": 47260 }, { "epoch": 0.605056, "grad_norm": 1.625, "learning_rate": 1.0252881484023311e-05, "loss": 2.3061, "step": 47270 }, { "epoch": 0.605184, "grad_norm": 1.703125, "learning_rate": 1.0247123190987746e-05, "loss": 2.2789, "step": 47280 }, { "epoch": 0.605312, "grad_norm": 1.5, "learning_rate": 1.0241365676442239e-05, "loss": 2.2761, "step": 47290 }, { "epoch": 0.60544, "grad_norm": 1.4921875, "learning_rate": 1.0235608941329828e-05, "loss": 2.2856, "step": 47300 }, { "epoch": 0.605568, "grad_norm": 1.4765625, "learning_rate": 1.0229852986593442e-05, "loss": 2.2679, "step": 47310 }, { "epoch": 0.605696, "grad_norm": 1.4921875, "learning_rate": 1.0224097813175859e-05, "loss": 2.2712, "step": 47320 }, { "epoch": 0.605824, "grad_norm": 1.5078125, "learning_rate": 1.0218343422019734e-05, "loss": 2.2665, "step": 47330 }, { "epoch": 0.605952, "grad_norm": 1.546875, "learning_rate": 1.0212589814067612e-05, "loss": 2.2683, "step": 47340 }, { "epoch": 0.60608, "grad_norm": 8.875, "learning_rate": 1.0206836990261884e-05, "loss": 2.2907, "step": 47350 }, { "epoch": 0.606208, "grad_norm": 3.953125, "learning_rate": 1.0201084951544837e-05, "loss": 2.302, "step": 47360 }, { "epoch": 0.606336, "grad_norm": 1.5546875, "learning_rate": 1.0195333698858606e-05, "loss": 2.2834, "step": 47370 }, { "epoch": 0.606464, "grad_norm": 1.5, "learning_rate": 1.0189583233145214e-05, "loss": 2.2583, "step": 47380 }, { "epoch": 0.606592, "grad_norm": 1.5703125, "learning_rate": 1.0183833555346548e-05, "loss": 2.2907, "step": 47390 }, { "epoch": 0.60672, "grad_norm": 1.4921875, "learning_rate": 1.0178084666404369e-05, "loss": 2.2641, "step": 47400 }, { "epoch": 0.606848, "grad_norm": 1.59375, "learning_rate": 1.01723365672603e-05, "loss": 2.3031, "step": 47410 }, { "epoch": 0.606976, "grad_norm": 1.546875, "learning_rate": 1.0166589258855856e-05, "loss": 2.2843, "step": 47420 }, { "epoch": 0.607104, "grad_norm": 1.8984375, "learning_rate": 1.0160842742132402e-05, "loss": 2.2986, "step": 47430 }, { "epoch": 0.607232, "grad_norm": 1.7890625, "learning_rate": 1.015509701803117e-05, "loss": 2.3029, "step": 47440 }, { "epoch": 0.60736, "grad_norm": 2.0, "learning_rate": 1.0149352087493286e-05, "loss": 2.3024, "step": 47450 }, { "epoch": 0.607488, "grad_norm": 1.6171875, "learning_rate": 1.0143607951459727e-05, "loss": 2.3007, "step": 47460 }, { "epoch": 0.607616, "grad_norm": 1.65625, "learning_rate": 1.013786461087134e-05, "loss": 2.2813, "step": 47470 }, { "epoch": 0.607744, "grad_norm": 7.1875, "learning_rate": 1.0132122066668853e-05, "loss": 2.294, "step": 47480 }, { "epoch": 0.607872, "grad_norm": 1.5078125, "learning_rate": 1.0126380319792856e-05, "loss": 2.2665, "step": 47490 }, { "epoch": 0.608, "grad_norm": 3.453125, "learning_rate": 1.0120639371183806e-05, "loss": 2.2923, "step": 47500 }, { "epoch": 0.608128, "grad_norm": 1.5234375, "learning_rate": 1.0114899221782038e-05, "loss": 2.2543, "step": 47510 }, { "epoch": 0.608256, "grad_norm": 1.6015625, "learning_rate": 1.0109159872527739e-05, "loss": 2.2724, "step": 47520 }, { "epoch": 0.608384, "grad_norm": 1.5078125, "learning_rate": 1.0103421324360992e-05, "loss": 2.3059, "step": 47530 }, { "epoch": 0.608512, "grad_norm": 1.4765625, "learning_rate": 1.009768357822173e-05, "loss": 2.2663, "step": 47540 }, { "epoch": 0.60864, "grad_norm": 1.59375, "learning_rate": 1.0091946635049752e-05, "loss": 2.3057, "step": 47550 }, { "epoch": 0.608768, "grad_norm": 1.5703125, "learning_rate": 1.0086210495784736e-05, "loss": 2.2913, "step": 47560 }, { "epoch": 0.608896, "grad_norm": 1.5703125, "learning_rate": 1.0080475161366225e-05, "loss": 2.283, "step": 47570 }, { "epoch": 0.609024, "grad_norm": 1.5390625, "learning_rate": 1.0074740632733621e-05, "loss": 2.2993, "step": 47580 }, { "epoch": 0.609152, "grad_norm": 1.6875, "learning_rate": 1.0069006910826215e-05, "loss": 2.3152, "step": 47590 }, { "epoch": 0.60928, "grad_norm": 1.6953125, "learning_rate": 1.0063273996583145e-05, "loss": 2.2787, "step": 47600 }, { "epoch": 0.609408, "grad_norm": 1.8046875, "learning_rate": 1.0057541890943422e-05, "loss": 2.2848, "step": 47610 }, { "epoch": 0.609536, "grad_norm": 1.609375, "learning_rate": 1.0051810594845936e-05, "loss": 2.2574, "step": 47620 }, { "epoch": 0.609664, "grad_norm": 1.59375, "learning_rate": 1.0046080109229432e-05, "loss": 2.2752, "step": 47630 }, { "epoch": 0.609792, "grad_norm": 1.5390625, "learning_rate": 1.0040350435032518e-05, "loss": 2.2692, "step": 47640 }, { "epoch": 0.60992, "grad_norm": 1.515625, "learning_rate": 1.003462157319369e-05, "loss": 2.2644, "step": 47650 }, { "epoch": 0.610048, "grad_norm": 1.5, "learning_rate": 1.002889352465129e-05, "loss": 2.3011, "step": 47660 }, { "epoch": 0.610176, "grad_norm": 1.59375, "learning_rate": 1.002316629034354e-05, "loss": 2.3015, "step": 47670 }, { "epoch": 0.610304, "grad_norm": 1.703125, "learning_rate": 1.001743987120852e-05, "loss": 2.2725, "step": 47680 }, { "epoch": 0.610432, "grad_norm": 1.59375, "learning_rate": 1.001171426818418e-05, "loss": 2.2961, "step": 47690 }, { "epoch": 0.61056, "grad_norm": 1.84375, "learning_rate": 1.0005989482208337e-05, "loss": 2.3091, "step": 47700 }, { "epoch": 0.610688, "grad_norm": 1.5390625, "learning_rate": 1.0000265514218673e-05, "loss": 2.2523, "step": 47710 }, { "epoch": 0.610816, "grad_norm": 2.578125, "learning_rate": 9.99454236515273e-06, "loss": 2.2688, "step": 47720 }, { "epoch": 0.610944, "grad_norm": 1.40625, "learning_rate": 9.988820035947939e-06, "loss": 2.2928, "step": 47730 }, { "epoch": 0.611072, "grad_norm": 1.5703125, "learning_rate": 9.983098527541566e-06, "loss": 2.283, "step": 47740 }, { "epoch": 0.6112, "grad_norm": 1.4296875, "learning_rate": 9.977377840870755e-06, "loss": 2.2801, "step": 47750 }, { "epoch": 0.611328, "grad_norm": 1.5703125, "learning_rate": 9.971657976872528e-06, "loss": 2.2744, "step": 47760 }, { "epoch": 0.611456, "grad_norm": 1.5078125, "learning_rate": 9.965938936483756e-06, "loss": 2.2996, "step": 47770 }, { "epoch": 0.611584, "grad_norm": 2.125, "learning_rate": 9.960220720641177e-06, "loss": 2.2991, "step": 47780 }, { "epoch": 0.611712, "grad_norm": 1.6640625, "learning_rate": 9.954503330281403e-06, "loss": 2.2805, "step": 47790 }, { "epoch": 0.61184, "grad_norm": 1.6796875, "learning_rate": 9.948786766340903e-06, "loss": 2.3235, "step": 47800 }, { "epoch": 0.611968, "grad_norm": 1.734375, "learning_rate": 9.943071029756007e-06, "loss": 2.2892, "step": 47810 }, { "epoch": 0.612096, "grad_norm": 1.6171875, "learning_rate": 9.937356121462924e-06, "loss": 2.2837, "step": 47820 }, { "epoch": 0.612224, "grad_norm": 1.5859375, "learning_rate": 9.931642042397707e-06, "loss": 2.2714, "step": 47830 }, { "epoch": 0.612352, "grad_norm": 10.875, "learning_rate": 9.925928793496304e-06, "loss": 2.2565, "step": 47840 }, { "epoch": 0.61248, "grad_norm": 1.515625, "learning_rate": 9.920216375694492e-06, "loss": 2.2811, "step": 47850 }, { "epoch": 0.612608, "grad_norm": 1.5859375, "learning_rate": 9.914504789927925e-06, "loss": 2.3101, "step": 47860 }, { "epoch": 0.612736, "grad_norm": 1.4921875, "learning_rate": 9.908794037132135e-06, "loss": 2.2934, "step": 47870 }, { "epoch": 0.612864, "grad_norm": 1.5546875, "learning_rate": 9.903084118242502e-06, "loss": 2.3014, "step": 47880 }, { "epoch": 0.612992, "grad_norm": 1.46875, "learning_rate": 9.897375034194269e-06, "loss": 2.2837, "step": 47890 }, { "epoch": 0.61312, "grad_norm": 1.578125, "learning_rate": 9.891666785922552e-06, "loss": 2.237, "step": 47900 }, { "epoch": 0.613248, "grad_norm": 1.609375, "learning_rate": 9.885959374362321e-06, "loss": 2.2641, "step": 47910 }, { "epoch": 0.613376, "grad_norm": 1.546875, "learning_rate": 9.880252800448413e-06, "loss": 2.2864, "step": 47920 }, { "epoch": 0.613504, "grad_norm": 1.609375, "learning_rate": 9.87454706511553e-06, "loss": 2.2953, "step": 47930 }, { "epoch": 0.613632, "grad_norm": 1.59375, "learning_rate": 9.868842169298233e-06, "loss": 2.258, "step": 47940 }, { "epoch": 0.61376, "grad_norm": 1.46875, "learning_rate": 9.863138113930938e-06, "loss": 2.2951, "step": 47950 }, { "epoch": 0.613888, "grad_norm": 1.578125, "learning_rate": 9.85743489994795e-06, "loss": 2.2811, "step": 47960 }, { "epoch": 0.614016, "grad_norm": 1.5625, "learning_rate": 9.851732528283399e-06, "loss": 2.2871, "step": 47970 }, { "epoch": 0.614144, "grad_norm": 1.6875, "learning_rate": 9.846030999871309e-06, "loss": 2.2565, "step": 47980 }, { "epoch": 0.614272, "grad_norm": 1.59375, "learning_rate": 9.84033031564555e-06, "loss": 2.2615, "step": 47990 }, { "epoch": 0.6144, "grad_norm": 1.65625, "learning_rate": 9.834630476539851e-06, "loss": 2.3131, "step": 48000 }, { "epoch": 0.614528, "grad_norm": 1.4609375, "learning_rate": 9.828931483487817e-06, "loss": 2.2746, "step": 48010 }, { "epoch": 0.614656, "grad_norm": 1.5703125, "learning_rate": 9.823233337422901e-06, "loss": 2.277, "step": 48020 }, { "epoch": 0.614784, "grad_norm": 1.578125, "learning_rate": 9.81753603927842e-06, "loss": 2.2841, "step": 48030 }, { "epoch": 0.614912, "grad_norm": 1.4921875, "learning_rate": 9.811839589987558e-06, "loss": 2.2857, "step": 48040 }, { "epoch": 0.61504, "grad_norm": 1.5, "learning_rate": 9.806143990483356e-06, "loss": 2.2816, "step": 48050 }, { "epoch": 0.615168, "grad_norm": 1.53125, "learning_rate": 9.800449241698708e-06, "loss": 2.2614, "step": 48060 }, { "epoch": 0.615296, "grad_norm": 2.40625, "learning_rate": 9.794755344566391e-06, "loss": 2.3254, "step": 48070 }, { "epoch": 0.615424, "grad_norm": 1.5, "learning_rate": 9.789062300019018e-06, "loss": 2.2854, "step": 48080 }, { "epoch": 0.615552, "grad_norm": 1.7890625, "learning_rate": 9.783370108989066e-06, "loss": 2.2818, "step": 48090 }, { "epoch": 0.61568, "grad_norm": 1.625, "learning_rate": 9.777678772408895e-06, "loss": 2.3241, "step": 48100 }, { "epoch": 0.615808, "grad_norm": 1.453125, "learning_rate": 9.7719882912107e-06, "loss": 2.2748, "step": 48110 }, { "epoch": 0.615936, "grad_norm": 1.5859375, "learning_rate": 9.766298666326541e-06, "loss": 2.2792, "step": 48120 }, { "epoch": 0.616064, "grad_norm": 1.625, "learning_rate": 9.760609898688348e-06, "loss": 2.2966, "step": 48130 }, { "epoch": 0.616192, "grad_norm": 1.421875, "learning_rate": 9.7549219892279e-06, "loss": 2.2741, "step": 48140 }, { "epoch": 0.61632, "grad_norm": 1.484375, "learning_rate": 9.749234938876843e-06, "loss": 2.2727, "step": 48150 }, { "epoch": 0.616448, "grad_norm": 1.53125, "learning_rate": 9.743548748566674e-06, "loss": 2.2847, "step": 48160 }, { "epoch": 0.616576, "grad_norm": 1.5546875, "learning_rate": 9.73786341922875e-06, "loss": 2.2748, "step": 48170 }, { "epoch": 0.616704, "grad_norm": 1.4375, "learning_rate": 9.7321789517943e-06, "loss": 2.2823, "step": 48180 }, { "epoch": 0.616832, "grad_norm": 1.5078125, "learning_rate": 9.726495347194406e-06, "loss": 2.2862, "step": 48190 }, { "epoch": 0.61696, "grad_norm": 1.5390625, "learning_rate": 9.720812606359987e-06, "loss": 2.3108, "step": 48200 }, { "epoch": 0.617088, "grad_norm": 1.640625, "learning_rate": 9.715130730221854e-06, "loss": 2.3081, "step": 48210 }, { "epoch": 0.617216, "grad_norm": 1.4921875, "learning_rate": 9.709449719710657e-06, "loss": 2.2598, "step": 48220 }, { "epoch": 0.617344, "grad_norm": 1.5546875, "learning_rate": 9.703769575756907e-06, "loss": 2.3112, "step": 48230 }, { "epoch": 0.617472, "grad_norm": 1.4609375, "learning_rate": 9.698090299290978e-06, "loss": 2.2536, "step": 48240 }, { "epoch": 0.6176, "grad_norm": 1.5546875, "learning_rate": 9.692411891243094e-06, "loss": 2.3082, "step": 48250 }, { "epoch": 0.617728, "grad_norm": 1.5, "learning_rate": 9.68673435254334e-06, "loss": 2.2924, "step": 48260 }, { "epoch": 0.617856, "grad_norm": 1.3984375, "learning_rate": 9.681057684121666e-06, "loss": 2.2883, "step": 48270 }, { "epoch": 0.617984, "grad_norm": 1.546875, "learning_rate": 9.675381886907862e-06, "loss": 2.2925, "step": 48280 }, { "epoch": 0.618112, "grad_norm": 1.6796875, "learning_rate": 9.6697069618316e-06, "loss": 2.288, "step": 48290 }, { "epoch": 0.61824, "grad_norm": 3.4375, "learning_rate": 9.664032909822389e-06, "loss": 2.2536, "step": 48300 }, { "epoch": 0.618368, "grad_norm": 1.546875, "learning_rate": 9.658359731809599e-06, "loss": 2.2795, "step": 48310 }, { "epoch": 0.618496, "grad_norm": 1.9921875, "learning_rate": 9.652687428722463e-06, "loss": 2.2529, "step": 48320 }, { "epoch": 0.618624, "grad_norm": 1.640625, "learning_rate": 9.647016001490066e-06, "loss": 2.3264, "step": 48330 }, { "epoch": 0.618752, "grad_norm": 1.5234375, "learning_rate": 9.64134545104135e-06, "loss": 2.2791, "step": 48340 }, { "epoch": 0.61888, "grad_norm": 1.453125, "learning_rate": 9.635675778305116e-06, "loss": 2.2763, "step": 48350 }, { "epoch": 0.619008, "grad_norm": 1.546875, "learning_rate": 9.630006984210018e-06, "loss": 2.3074, "step": 48360 }, { "epoch": 0.619136, "grad_norm": 1.5703125, "learning_rate": 9.624339069684566e-06, "loss": 2.2864, "step": 48370 }, { "epoch": 0.619264, "grad_norm": 2.046875, "learning_rate": 9.61867203565713e-06, "loss": 2.2606, "step": 48380 }, { "epoch": 0.619392, "grad_norm": 1.53125, "learning_rate": 9.61300588305593e-06, "loss": 2.2991, "step": 48390 }, { "epoch": 0.61952, "grad_norm": 1.5078125, "learning_rate": 9.607340612809038e-06, "loss": 2.2835, "step": 48400 }, { "epoch": 0.619648, "grad_norm": 1.5859375, "learning_rate": 9.601676225844406e-06, "loss": 2.2656, "step": 48410 }, { "epoch": 0.619776, "grad_norm": 1.484375, "learning_rate": 9.596012723089818e-06, "loss": 2.2599, "step": 48420 }, { "epoch": 0.619904, "grad_norm": 1.515625, "learning_rate": 9.5903501054729e-06, "loss": 2.2995, "step": 48430 }, { "epoch": 0.620032, "grad_norm": 1.6328125, "learning_rate": 9.584688373921175e-06, "loss": 2.2874, "step": 48440 }, { "epoch": 0.62016, "grad_norm": 1.609375, "learning_rate": 9.579027529361985e-06, "loss": 2.2836, "step": 48450 }, { "epoch": 0.620288, "grad_norm": 1.65625, "learning_rate": 9.573367572722544e-06, "loss": 2.2964, "step": 48460 }, { "epoch": 0.620416, "grad_norm": 1.6640625, "learning_rate": 9.567708504929918e-06, "loss": 2.264, "step": 48470 }, { "epoch": 0.620544, "grad_norm": 1.484375, "learning_rate": 9.562050326911012e-06, "loss": 2.2859, "step": 48480 }, { "epoch": 0.620672, "grad_norm": 1.734375, "learning_rate": 9.556393039592617e-06, "loss": 2.2801, "step": 48490 }, { "epoch": 0.6208, "grad_norm": 1.5, "learning_rate": 9.550736643901348e-06, "loss": 2.2663, "step": 48500 }, { "epoch": 0.620928, "grad_norm": 1.6171875, "learning_rate": 9.545081140763683e-06, "loss": 2.3092, "step": 48510 }, { "epoch": 0.621056, "grad_norm": 1.546875, "learning_rate": 9.539426531105967e-06, "loss": 2.2628, "step": 48520 }, { "epoch": 0.621184, "grad_norm": 1.3984375, "learning_rate": 9.533772815854383e-06, "loss": 2.2821, "step": 48530 }, { "epoch": 0.621312, "grad_norm": 1.8828125, "learning_rate": 9.528119995934968e-06, "loss": 2.2913, "step": 48540 }, { "epoch": 0.62144, "grad_norm": 1.5546875, "learning_rate": 9.522468072273625e-06, "loss": 2.2986, "step": 48550 }, { "epoch": 0.621568, "grad_norm": 1.7578125, "learning_rate": 9.516817045796097e-06, "loss": 2.2698, "step": 48560 }, { "epoch": 0.621696, "grad_norm": 1.5625, "learning_rate": 9.511166917427982e-06, "loss": 2.2878, "step": 48570 }, { "epoch": 0.621824, "grad_norm": 2.5625, "learning_rate": 9.505517688094742e-06, "loss": 2.2334, "step": 48580 }, { "epoch": 0.621952, "grad_norm": 1.5625, "learning_rate": 9.49986935872167e-06, "loss": 2.2399, "step": 48590 }, { "epoch": 0.62208, "grad_norm": 1.6484375, "learning_rate": 9.494221930233948e-06, "loss": 2.262, "step": 48600 }, { "epoch": 0.622208, "grad_norm": 1.546875, "learning_rate": 9.488575403556572e-06, "loss": 2.2649, "step": 48610 }, { "epoch": 0.622336, "grad_norm": 1.734375, "learning_rate": 9.482929779614401e-06, "loss": 2.2777, "step": 48620 }, { "epoch": 0.622464, "grad_norm": 1.5078125, "learning_rate": 9.477285059332165e-06, "loss": 2.2581, "step": 48630 }, { "epoch": 0.622592, "grad_norm": 1.5859375, "learning_rate": 9.471641243634424e-06, "loss": 2.2844, "step": 48640 }, { "epoch": 0.62272, "grad_norm": 1.5234375, "learning_rate": 9.4659983334456e-06, "loss": 2.2831, "step": 48650 }, { "epoch": 0.622848, "grad_norm": 1.5390625, "learning_rate": 9.460356329689966e-06, "loss": 2.2296, "step": 48660 }, { "epoch": 0.622976, "grad_norm": 1.515625, "learning_rate": 9.454715233291646e-06, "loss": 2.2569, "step": 48670 }, { "epoch": 0.623104, "grad_norm": 1.6328125, "learning_rate": 9.449075045174609e-06, "loss": 2.2798, "step": 48680 }, { "epoch": 0.623232, "grad_norm": 1.84375, "learning_rate": 9.443435766262687e-06, "loss": 2.2802, "step": 48690 }, { "epoch": 0.62336, "grad_norm": 1.5390625, "learning_rate": 9.437797397479553e-06, "loss": 2.3167, "step": 48700 }, { "epoch": 0.623488, "grad_norm": 1.734375, "learning_rate": 9.43215993974873e-06, "loss": 2.2638, "step": 48710 }, { "epoch": 0.623616, "grad_norm": 1.6953125, "learning_rate": 9.426523393993613e-06, "loss": 2.2968, "step": 48720 }, { "epoch": 0.623744, "grad_norm": 1.5078125, "learning_rate": 9.420887761137417e-06, "loss": 2.2783, "step": 48730 }, { "epoch": 0.623872, "grad_norm": 1.53125, "learning_rate": 9.415253042103222e-06, "loss": 2.2503, "step": 48740 }, { "epoch": 0.624, "grad_norm": 1.9140625, "learning_rate": 9.409619237813966e-06, "loss": 2.2899, "step": 48750 }, { "epoch": 0.624128, "grad_norm": 1.7109375, "learning_rate": 9.403986349192423e-06, "loss": 2.292, "step": 48760 }, { "epoch": 0.624256, "grad_norm": 1.515625, "learning_rate": 9.398354377161227e-06, "loss": 2.3102, "step": 48770 }, { "epoch": 0.624384, "grad_norm": 1.640625, "learning_rate": 9.392723322642856e-06, "loss": 2.2762, "step": 48780 }, { "epoch": 0.624512, "grad_norm": 1.46875, "learning_rate": 9.387093186559638e-06, "loss": 2.2871, "step": 48790 }, { "epoch": 0.62464, "grad_norm": 1.5, "learning_rate": 9.381463969833756e-06, "loss": 2.2717, "step": 48800 }, { "epoch": 0.624768, "grad_norm": 1.5703125, "learning_rate": 9.375835673387239e-06, "loss": 2.257, "step": 48810 }, { "epoch": 0.624896, "grad_norm": 1.5, "learning_rate": 9.370208298141956e-06, "loss": 2.2755, "step": 48820 }, { "epoch": 0.625024, "grad_norm": 1.59375, "learning_rate": 9.364581845019653e-06, "loss": 2.3059, "step": 48830 }, { "epoch": 0.625152, "grad_norm": 1.4921875, "learning_rate": 9.358956314941887e-06, "loss": 2.2809, "step": 48840 }, { "epoch": 0.62528, "grad_norm": 1.65625, "learning_rate": 9.35333170883009e-06, "loss": 2.293, "step": 48850 }, { "epoch": 0.625408, "grad_norm": 1.5546875, "learning_rate": 9.347708027605538e-06, "loss": 2.3053, "step": 48860 }, { "epoch": 0.625536, "grad_norm": 1.609375, "learning_rate": 9.342085272189354e-06, "loss": 2.2649, "step": 48870 }, { "epoch": 0.625664, "grad_norm": 1.5, "learning_rate": 9.336463443502499e-06, "loss": 2.2512, "step": 48880 }, { "epoch": 0.625792, "grad_norm": 4.84375, "learning_rate": 9.330842542465804e-06, "loss": 2.2775, "step": 48890 }, { "epoch": 0.62592, "grad_norm": 1.515625, "learning_rate": 9.325222569999926e-06, "loss": 2.2866, "step": 48900 }, { "epoch": 0.626048, "grad_norm": 1.609375, "learning_rate": 9.319603527025388e-06, "loss": 2.2806, "step": 48910 }, { "epoch": 0.626176, "grad_norm": 1.65625, "learning_rate": 9.313985414462549e-06, "loss": 2.2957, "step": 48920 }, { "epoch": 0.626304, "grad_norm": 1.609375, "learning_rate": 9.30836823323161e-06, "loss": 2.2798, "step": 48930 }, { "epoch": 0.626432, "grad_norm": 1.515625, "learning_rate": 9.302751984252642e-06, "loss": 2.2682, "step": 48940 }, { "epoch": 0.62656, "grad_norm": 1.546875, "learning_rate": 9.29713666844555e-06, "loss": 2.2554, "step": 48950 }, { "epoch": 0.626688, "grad_norm": 1.703125, "learning_rate": 9.291522286730071e-06, "loss": 2.2828, "step": 48960 }, { "epoch": 0.626816, "grad_norm": 1.4375, "learning_rate": 9.285908840025817e-06, "loss": 2.2872, "step": 48970 }, { "epoch": 0.626944, "grad_norm": 1.5703125, "learning_rate": 9.280296329252232e-06, "loss": 2.2879, "step": 48980 }, { "epoch": 0.627072, "grad_norm": 1.53125, "learning_rate": 9.274684755328602e-06, "loss": 2.299, "step": 48990 }, { "epoch": 0.6272, "grad_norm": 2.984375, "learning_rate": 9.269074119174076e-06, "loss": 2.2759, "step": 49000 }, { "epoch": 0.627328, "grad_norm": 1.6171875, "learning_rate": 9.26346442170763e-06, "loss": 2.2727, "step": 49010 }, { "epoch": 0.627456, "grad_norm": 1.5, "learning_rate": 9.2578556638481e-06, "loss": 2.3002, "step": 49020 }, { "epoch": 0.627584, "grad_norm": 1.578125, "learning_rate": 9.252247846514164e-06, "loss": 2.2704, "step": 49030 }, { "epoch": 0.627712, "grad_norm": 1.5546875, "learning_rate": 9.246640970624346e-06, "loss": 2.2817, "step": 49040 }, { "epoch": 0.62784, "grad_norm": 1.6171875, "learning_rate": 9.241035037097008e-06, "loss": 2.2914, "step": 49050 }, { "epoch": 0.627968, "grad_norm": 1.578125, "learning_rate": 9.23543004685038e-06, "loss": 2.2743, "step": 49060 }, { "epoch": 0.628096, "grad_norm": 1.5078125, "learning_rate": 9.22982600080251e-06, "loss": 2.2931, "step": 49070 }, { "epoch": 0.628224, "grad_norm": 1.53125, "learning_rate": 9.224222899871309e-06, "loss": 2.2493, "step": 49080 }, { "epoch": 0.628352, "grad_norm": 1.5859375, "learning_rate": 9.21862074497453e-06, "loss": 2.2767, "step": 49090 }, { "epoch": 0.62848, "grad_norm": 1.6875, "learning_rate": 9.213019537029763e-06, "loss": 2.2955, "step": 49100 }, { "epoch": 0.628608, "grad_norm": 2.140625, "learning_rate": 9.207419276954454e-06, "loss": 2.2916, "step": 49110 }, { "epoch": 0.628736, "grad_norm": 1.5546875, "learning_rate": 9.201819965665892e-06, "loss": 2.2769, "step": 49120 }, { "epoch": 0.628864, "grad_norm": 1.578125, "learning_rate": 9.196221604081197e-06, "loss": 2.2504, "step": 49130 }, { "epoch": 0.628992, "grad_norm": 1.4296875, "learning_rate": 9.190624193117354e-06, "loss": 2.2932, "step": 49140 }, { "epoch": 0.62912, "grad_norm": 1.484375, "learning_rate": 9.185027733691179e-06, "loss": 2.2888, "step": 49150 }, { "epoch": 0.629248, "grad_norm": 1.65625, "learning_rate": 9.179432226719327e-06, "loss": 2.2977, "step": 49160 }, { "epoch": 0.629376, "grad_norm": 1.4609375, "learning_rate": 9.173837673118321e-06, "loss": 2.2788, "step": 49170 }, { "epoch": 0.629504, "grad_norm": 1.5390625, "learning_rate": 9.168244073804507e-06, "loss": 2.271, "step": 49180 }, { "epoch": 0.629632, "grad_norm": 1.734375, "learning_rate": 9.162651429694068e-06, "loss": 2.2593, "step": 49190 }, { "epoch": 0.62976, "grad_norm": 1.640625, "learning_rate": 9.157059741703057e-06, "loss": 2.2808, "step": 49200 }, { "epoch": 0.629888, "grad_norm": 1.65625, "learning_rate": 9.151469010747348e-06, "loss": 2.2762, "step": 49210 }, { "epoch": 0.630016, "grad_norm": 1.6015625, "learning_rate": 9.14587923774267e-06, "loss": 2.2564, "step": 49220 }, { "epoch": 0.630144, "grad_norm": 1.4140625, "learning_rate": 9.140290423604593e-06, "loss": 2.29, "step": 49230 }, { "epoch": 0.630272, "grad_norm": 1.546875, "learning_rate": 9.134702569248517e-06, "loss": 2.2926, "step": 49240 }, { "epoch": 0.6304, "grad_norm": 1.5234375, "learning_rate": 9.129115675589713e-06, "loss": 2.2717, "step": 49250 }, { "epoch": 0.630528, "grad_norm": 1.59375, "learning_rate": 9.123529743543266e-06, "loss": 2.3069, "step": 49260 }, { "epoch": 0.630656, "grad_norm": 1.4609375, "learning_rate": 9.11794477402411e-06, "loss": 2.2857, "step": 49270 }, { "epoch": 0.630784, "grad_norm": 1.5078125, "learning_rate": 9.11236076794704e-06, "loss": 2.282, "step": 49280 }, { "epoch": 0.630912, "grad_norm": 1.625, "learning_rate": 9.106777726226673e-06, "loss": 2.2732, "step": 49290 }, { "epoch": 0.63104, "grad_norm": 1.59375, "learning_rate": 9.101195649777473e-06, "loss": 2.2847, "step": 49300 }, { "epoch": 0.631168, "grad_norm": 1.546875, "learning_rate": 9.095614539513751e-06, "loss": 2.262, "step": 49310 }, { "epoch": 0.631296, "grad_norm": 1.7578125, "learning_rate": 9.090034396349654e-06, "loss": 2.2784, "step": 49320 }, { "epoch": 0.631424, "grad_norm": 1.609375, "learning_rate": 9.084455221199174e-06, "loss": 2.2859, "step": 49330 }, { "epoch": 0.631552, "grad_norm": 1.515625, "learning_rate": 9.078877014976145e-06, "loss": 2.2912, "step": 49340 }, { "epoch": 0.63168, "grad_norm": 1.5703125, "learning_rate": 9.073299778594236e-06, "loss": 2.2641, "step": 49350 }, { "epoch": 0.631808, "grad_norm": 1.6015625, "learning_rate": 9.067723512966962e-06, "loss": 2.2614, "step": 49360 }, { "epoch": 0.631936, "grad_norm": 1.4375, "learning_rate": 9.062148219007686e-06, "loss": 2.2672, "step": 49370 }, { "epoch": 0.632064, "grad_norm": 1.5703125, "learning_rate": 9.056573897629591e-06, "loss": 2.2814, "step": 49380 }, { "epoch": 0.632192, "grad_norm": 1.546875, "learning_rate": 9.051000549745732e-06, "loss": 2.293, "step": 49390 }, { "epoch": 0.63232, "grad_norm": 1.6953125, "learning_rate": 9.045428176268974e-06, "loss": 2.2634, "step": 49400 }, { "epoch": 0.632448, "grad_norm": 1.609375, "learning_rate": 9.039856778112037e-06, "loss": 2.2664, "step": 49410 }, { "epoch": 0.632576, "grad_norm": 1.6328125, "learning_rate": 9.034286356187487e-06, "loss": 2.2856, "step": 49420 }, { "epoch": 0.632704, "grad_norm": 1.59375, "learning_rate": 9.028716911407715e-06, "loss": 2.283, "step": 49430 }, { "epoch": 0.632832, "grad_norm": 1.578125, "learning_rate": 9.023148444684958e-06, "loss": 2.3137, "step": 49440 }, { "epoch": 0.63296, "grad_norm": 1.6875, "learning_rate": 9.017580956931302e-06, "loss": 2.2586, "step": 49450 }, { "epoch": 0.633088, "grad_norm": 1.5859375, "learning_rate": 9.01201444905866e-06, "loss": 2.2521, "step": 49460 }, { "epoch": 0.633216, "grad_norm": 1.6640625, "learning_rate": 9.006448921978785e-06, "loss": 2.2557, "step": 49470 }, { "epoch": 0.633344, "grad_norm": 1.5078125, "learning_rate": 9.00088437660329e-06, "loss": 2.2866, "step": 49480 }, { "epoch": 0.633472, "grad_norm": 1.5703125, "learning_rate": 8.995320813843596e-06, "loss": 2.2938, "step": 49490 }, { "epoch": 0.6336, "grad_norm": 1.84375, "learning_rate": 8.989758234610974e-06, "loss": 2.3149, "step": 49500 }, { "epoch": 0.633728, "grad_norm": 1.578125, "learning_rate": 8.984196639816557e-06, "loss": 2.2527, "step": 49510 }, { "epoch": 0.633856, "grad_norm": 1.625, "learning_rate": 8.978636030371279e-06, "loss": 2.2935, "step": 49520 }, { "epoch": 0.633984, "grad_norm": 1.515625, "learning_rate": 8.973076407185946e-06, "loss": 2.3109, "step": 49530 }, { "epoch": 0.634112, "grad_norm": 1.484375, "learning_rate": 8.96751777117118e-06, "loss": 2.2492, "step": 49540 }, { "epoch": 0.63424, "grad_norm": 1.484375, "learning_rate": 8.961960123237446e-06, "loss": 2.3167, "step": 49550 }, { "epoch": 0.634368, "grad_norm": 1.65625, "learning_rate": 8.956403464295062e-06, "loss": 2.2955, "step": 49560 }, { "epoch": 0.634496, "grad_norm": 1.5078125, "learning_rate": 8.950847795254163e-06, "loss": 2.2702, "step": 49570 }, { "epoch": 0.634624, "grad_norm": 1.6171875, "learning_rate": 8.945293117024727e-06, "loss": 2.3044, "step": 49580 }, { "epoch": 0.634752, "grad_norm": 1.6328125, "learning_rate": 8.939739430516588e-06, "loss": 2.2949, "step": 49590 }, { "epoch": 0.63488, "grad_norm": 3.09375, "learning_rate": 8.934186736639394e-06, "loss": 2.2569, "step": 49600 }, { "epoch": 0.635008, "grad_norm": 1.515625, "learning_rate": 8.928635036302636e-06, "loss": 2.2974, "step": 49610 }, { "epoch": 0.635136, "grad_norm": 1.53125, "learning_rate": 8.923084330415658e-06, "loss": 2.2965, "step": 49620 }, { "epoch": 0.635264, "grad_norm": 1.40625, "learning_rate": 8.91753461988762e-06, "loss": 2.3049, "step": 49630 }, { "epoch": 0.635392, "grad_norm": 1.609375, "learning_rate": 8.911985905627528e-06, "loss": 2.2886, "step": 49640 }, { "epoch": 0.63552, "grad_norm": 1.6796875, "learning_rate": 8.906438188544234e-06, "loss": 2.2954, "step": 49650 }, { "epoch": 0.635648, "grad_norm": 1.5859375, "learning_rate": 8.90089146954641e-06, "loss": 2.2754, "step": 49660 }, { "epoch": 0.635776, "grad_norm": 1.4375, "learning_rate": 8.89534574954257e-06, "loss": 2.2944, "step": 49670 }, { "epoch": 0.635904, "grad_norm": 1.453125, "learning_rate": 8.889801029441075e-06, "loss": 2.2599, "step": 49680 }, { "epoch": 0.636032, "grad_norm": 1.5078125, "learning_rate": 8.884257310150102e-06, "loss": 2.2838, "step": 49690 }, { "epoch": 0.63616, "grad_norm": 1.9375, "learning_rate": 8.878714592577689e-06, "loss": 2.2787, "step": 49700 }, { "epoch": 0.636288, "grad_norm": 1.65625, "learning_rate": 8.873172877631696e-06, "loss": 2.3077, "step": 49710 }, { "epoch": 0.636416, "grad_norm": 1.6171875, "learning_rate": 8.867632166219801e-06, "loss": 2.2926, "step": 49720 }, { "epoch": 0.636544, "grad_norm": 1.484375, "learning_rate": 8.86209245924956e-06, "loss": 2.2841, "step": 49730 }, { "epoch": 0.636672, "grad_norm": 1.546875, "learning_rate": 8.856553757628329e-06, "loss": 2.2622, "step": 49740 }, { "epoch": 0.6368, "grad_norm": 1.625, "learning_rate": 8.85101606226331e-06, "loss": 2.236, "step": 49750 }, { "epoch": 0.636928, "grad_norm": 1.4140625, "learning_rate": 8.845479374061545e-06, "loss": 2.2882, "step": 49760 }, { "epoch": 0.637056, "grad_norm": 1.65625, "learning_rate": 8.839943693929906e-06, "loss": 2.2854, "step": 49770 }, { "epoch": 0.637184, "grad_norm": 1.5390625, "learning_rate": 8.834409022775099e-06, "loss": 2.2885, "step": 49780 }, { "epoch": 0.637312, "grad_norm": 1.5859375, "learning_rate": 8.82887536150367e-06, "loss": 2.2929, "step": 49790 }, { "epoch": 0.63744, "grad_norm": 2.15625, "learning_rate": 8.823342711021998e-06, "loss": 2.3097, "step": 49800 }, { "epoch": 0.637568, "grad_norm": 1.453125, "learning_rate": 8.817811072236285e-06, "loss": 2.2692, "step": 49810 }, { "epoch": 0.637696, "grad_norm": 1.5859375, "learning_rate": 8.812280446052589e-06, "loss": 2.2868, "step": 49820 }, { "epoch": 0.637824, "grad_norm": 1.5625, "learning_rate": 8.806750833376782e-06, "loss": 2.3127, "step": 49830 }, { "epoch": 0.637952, "grad_norm": 1.4921875, "learning_rate": 8.801222235114586e-06, "loss": 2.286, "step": 49840 }, { "epoch": 0.63808, "grad_norm": 1.4921875, "learning_rate": 8.795694652171545e-06, "loss": 2.2912, "step": 49850 }, { "epoch": 0.638208, "grad_norm": 1.5078125, "learning_rate": 8.790168085453038e-06, "loss": 2.308, "step": 49860 }, { "epoch": 0.638336, "grad_norm": 1.6953125, "learning_rate": 8.784642535864287e-06, "loss": 2.2719, "step": 49870 }, { "epoch": 0.638464, "grad_norm": 1.5703125, "learning_rate": 8.779118004310335e-06, "loss": 2.2602, "step": 49880 }, { "epoch": 0.638592, "grad_norm": 1.5390625, "learning_rate": 8.773594491696063e-06, "loss": 2.3065, "step": 49890 }, { "epoch": 0.63872, "grad_norm": 1.6796875, "learning_rate": 8.768071998926194e-06, "loss": 2.2723, "step": 49900 }, { "epoch": 0.638848, "grad_norm": 1.7421875, "learning_rate": 8.76255052690527e-06, "loss": 2.2608, "step": 49910 }, { "epoch": 0.638976, "grad_norm": 1.515625, "learning_rate": 8.757030076537672e-06, "loss": 2.2566, "step": 49920 }, { "epoch": 0.639104, "grad_norm": 2.59375, "learning_rate": 8.751510648727613e-06, "loss": 2.2984, "step": 49930 }, { "epoch": 0.639232, "grad_norm": 4.125, "learning_rate": 8.745992244379148e-06, "loss": 2.2735, "step": 49940 }, { "epoch": 0.63936, "grad_norm": 1.671875, "learning_rate": 8.74047486439614e-06, "loss": 2.2849, "step": 49950 }, { "epoch": 0.639488, "grad_norm": 1.53125, "learning_rate": 8.73495850968231e-06, "loss": 2.2653, "step": 49960 }, { "epoch": 0.639616, "grad_norm": 1.4921875, "learning_rate": 8.729443181141203e-06, "loss": 2.2927, "step": 49970 }, { "epoch": 0.639744, "grad_norm": 1.6015625, "learning_rate": 8.723928879676179e-06, "loss": 2.2687, "step": 49980 }, { "epoch": 0.639872, "grad_norm": 1.640625, "learning_rate": 8.718415606190468e-06, "loss": 2.2863, "step": 49990 }, { "epoch": 0.64, "grad_norm": 1.546875, "learning_rate": 8.712903361587087e-06, "loss": 2.2935, "step": 50000 }, { "epoch": 0.640128, "grad_norm": 1.8828125, "learning_rate": 8.707392146768913e-06, "loss": 2.283, "step": 50010 }, { "epoch": 0.640256, "grad_norm": 1.6015625, "learning_rate": 8.701881962638654e-06, "loss": 2.2976, "step": 50020 }, { "epoch": 0.640384, "grad_norm": 1.59375, "learning_rate": 8.696372810098828e-06, "loss": 2.2873, "step": 50030 }, { "epoch": 0.640512, "grad_norm": 1.6328125, "learning_rate": 8.690864690051808e-06, "loss": 2.3002, "step": 50040 }, { "epoch": 0.64064, "grad_norm": 1.875, "learning_rate": 8.685357603399789e-06, "loss": 2.2611, "step": 50050 }, { "epoch": 0.640768, "grad_norm": 1.59375, "learning_rate": 8.679851551044788e-06, "loss": 2.2814, "step": 50060 }, { "epoch": 0.640896, "grad_norm": 1.5859375, "learning_rate": 8.674346533888661e-06, "loss": 2.2842, "step": 50070 }, { "epoch": 0.641024, "grad_norm": 1.5859375, "learning_rate": 8.668842552833106e-06, "loss": 2.2833, "step": 50080 }, { "epoch": 0.641152, "grad_norm": 1.5625, "learning_rate": 8.663339608779617e-06, "loss": 2.293, "step": 50090 }, { "epoch": 0.64128, "grad_norm": 2.203125, "learning_rate": 8.657837702629564e-06, "loss": 2.289, "step": 50100 }, { "epoch": 0.641408, "grad_norm": 2.25, "learning_rate": 8.652336835284108e-06, "loss": 2.2559, "step": 50110 }, { "epoch": 0.641536, "grad_norm": 1.53125, "learning_rate": 8.646837007644258e-06, "loss": 2.3163, "step": 50120 }, { "epoch": 0.641664, "grad_norm": 1.4609375, "learning_rate": 8.641338220610858e-06, "loss": 2.2391, "step": 50130 }, { "epoch": 0.641792, "grad_norm": 1.546875, "learning_rate": 8.63584047508456e-06, "loss": 2.2739, "step": 50140 }, { "epoch": 0.64192, "grad_norm": 1.6640625, "learning_rate": 8.630343771965862e-06, "loss": 2.2964, "step": 50150 }, { "epoch": 0.642048, "grad_norm": 1.59375, "learning_rate": 8.624848112155101e-06, "loss": 2.2795, "step": 50160 }, { "epoch": 0.642176, "grad_norm": 1.484375, "learning_rate": 8.619353496552407e-06, "loss": 2.2806, "step": 50170 }, { "epoch": 0.642304, "grad_norm": 1.6015625, "learning_rate": 8.613859926057786e-06, "loss": 2.2645, "step": 50180 }, { "epoch": 0.642432, "grad_norm": 1.59375, "learning_rate": 8.608367401571038e-06, "loss": 2.2508, "step": 50190 }, { "epoch": 0.64256, "grad_norm": 1.5390625, "learning_rate": 8.602875923991791e-06, "loss": 2.2684, "step": 50200 }, { "epoch": 0.642688, "grad_norm": 1.6015625, "learning_rate": 8.597385494219536e-06, "loss": 2.292, "step": 50210 }, { "epoch": 0.642816, "grad_norm": 1.59375, "learning_rate": 8.59189611315355e-06, "loss": 2.2949, "step": 50220 }, { "epoch": 0.642944, "grad_norm": 1.5859375, "learning_rate": 8.586407781692967e-06, "loss": 2.2638, "step": 50230 }, { "epoch": 0.643072, "grad_norm": 1.6796875, "learning_rate": 8.580920500736741e-06, "loss": 2.2924, "step": 50240 }, { "epoch": 0.6432, "grad_norm": 1.5, "learning_rate": 8.575434271183646e-06, "loss": 2.2702, "step": 50250 }, { "epoch": 0.643328, "grad_norm": 1.53125, "learning_rate": 8.569949093932294e-06, "loss": 2.2824, "step": 50260 }, { "epoch": 0.643456, "grad_norm": 1.640625, "learning_rate": 8.564464969881128e-06, "loss": 2.3216, "step": 50270 }, { "epoch": 0.643584, "grad_norm": 1.7109375, "learning_rate": 8.558981899928398e-06, "loss": 2.271, "step": 50280 }, { "epoch": 0.643712, "grad_norm": 1.546875, "learning_rate": 8.5534998849722e-06, "loss": 2.241, "step": 50290 }, { "epoch": 0.64384, "grad_norm": 1.71875, "learning_rate": 8.548018925910464e-06, "loss": 2.2905, "step": 50300 }, { "epoch": 0.643968, "grad_norm": 1.5390625, "learning_rate": 8.542539023640908e-06, "loss": 2.2702, "step": 50310 }, { "epoch": 0.644096, "grad_norm": 1.6953125, "learning_rate": 8.537060179061136e-06, "loss": 2.2962, "step": 50320 }, { "epoch": 0.644224, "grad_norm": 1.90625, "learning_rate": 8.531582393068525e-06, "loss": 2.2817, "step": 50330 }, { "epoch": 0.644352, "grad_norm": 1.578125, "learning_rate": 8.526105666560306e-06, "loss": 2.2954, "step": 50340 }, { "epoch": 0.64448, "grad_norm": 1.75, "learning_rate": 8.520630000433538e-06, "loss": 2.2826, "step": 50350 }, { "epoch": 0.644608, "grad_norm": 1.4453125, "learning_rate": 8.515155395585087e-06, "loss": 2.2526, "step": 50360 }, { "epoch": 0.644736, "grad_norm": 1.90625, "learning_rate": 8.509681852911665e-06, "loss": 2.2953, "step": 50370 }, { "epoch": 0.644864, "grad_norm": 1.5234375, "learning_rate": 8.504209373309807e-06, "loss": 2.2691, "step": 50380 }, { "epoch": 0.644992, "grad_norm": 1.5, "learning_rate": 8.498737957675857e-06, "loss": 2.2619, "step": 50390 }, { "epoch": 0.64512, "grad_norm": 1.5859375, "learning_rate": 8.493267606906003e-06, "loss": 2.3202, "step": 50400 }, { "epoch": 0.645248, "grad_norm": 1.5546875, "learning_rate": 8.48779832189626e-06, "loss": 2.294, "step": 50410 }, { "epoch": 0.645376, "grad_norm": 1.7109375, "learning_rate": 8.482330103542448e-06, "loss": 2.2813, "step": 50420 }, { "epoch": 0.645504, "grad_norm": 1.625, "learning_rate": 8.476862952740231e-06, "loss": 2.2634, "step": 50430 }, { "epoch": 0.645632, "grad_norm": 1.5859375, "learning_rate": 8.471396870385092e-06, "loss": 2.2597, "step": 50440 }, { "epoch": 0.64576, "grad_norm": 1.4765625, "learning_rate": 8.465931857372339e-06, "loss": 2.2747, "step": 50450 }, { "epoch": 0.645888, "grad_norm": 1.484375, "learning_rate": 8.460467914597114e-06, "loss": 2.2739, "step": 50460 }, { "epoch": 0.646016, "grad_norm": 1.625, "learning_rate": 8.455005042954362e-06, "loss": 2.3088, "step": 50470 }, { "epoch": 0.646144, "grad_norm": 1.640625, "learning_rate": 8.44954324333887e-06, "loss": 2.2765, "step": 50480 }, { "epoch": 0.646272, "grad_norm": 1.5703125, "learning_rate": 8.444082516645253e-06, "loss": 2.2841, "step": 50490 }, { "epoch": 0.6464, "grad_norm": 1.546875, "learning_rate": 8.438622863767927e-06, "loss": 2.2847, "step": 50500 }, { "epoch": 0.646528, "grad_norm": 1.5, "learning_rate": 8.433164285601159e-06, "loss": 2.2798, "step": 50510 }, { "epoch": 0.646656, "grad_norm": 1.6171875, "learning_rate": 8.427706783039022e-06, "loss": 2.3061, "step": 50520 }, { "epoch": 0.646784, "grad_norm": 6.25, "learning_rate": 8.42225035697543e-06, "loss": 2.287, "step": 50530 }, { "epoch": 0.646912, "grad_norm": 1.6796875, "learning_rate": 8.416795008304094e-06, "loss": 2.2928, "step": 50540 }, { "epoch": 0.64704, "grad_norm": 1.5546875, "learning_rate": 8.411340737918574e-06, "loss": 2.2641, "step": 50550 }, { "epoch": 0.647168, "grad_norm": 1.609375, "learning_rate": 8.405887546712245e-06, "loss": 2.285, "step": 50560 }, { "epoch": 0.647296, "grad_norm": 1.578125, "learning_rate": 8.400435435578298e-06, "loss": 2.2883, "step": 50570 }, { "epoch": 0.647424, "grad_norm": 1.6328125, "learning_rate": 8.394984405409757e-06, "loss": 2.2789, "step": 50580 }, { "epoch": 0.647552, "grad_norm": 1.6640625, "learning_rate": 8.389534457099466e-06, "loss": 2.281, "step": 50590 }, { "epoch": 0.64768, "grad_norm": 1.640625, "learning_rate": 8.384085591540084e-06, "loss": 2.2943, "step": 50600 }, { "epoch": 0.647808, "grad_norm": 1.8984375, "learning_rate": 8.378637809624106e-06, "loss": 2.2766, "step": 50610 }, { "epoch": 0.647936, "grad_norm": 1.65625, "learning_rate": 8.373191112243838e-06, "loss": 2.28, "step": 50620 }, { "epoch": 0.648064, "grad_norm": 1.6796875, "learning_rate": 8.367745500291417e-06, "loss": 2.298, "step": 50630 }, { "epoch": 0.648192, "grad_norm": 1.4609375, "learning_rate": 8.362300974658808e-06, "loss": 2.2771, "step": 50640 }, { "epoch": 0.64832, "grad_norm": 1.6328125, "learning_rate": 8.356857536237769e-06, "loss": 2.2556, "step": 50650 }, { "epoch": 0.648448, "grad_norm": 1.546875, "learning_rate": 8.351415185919909e-06, "loss": 2.2623, "step": 50660 }, { "epoch": 0.648576, "grad_norm": 1.484375, "learning_rate": 8.345973924596658e-06, "loss": 2.2635, "step": 50670 }, { "epoch": 0.648704, "grad_norm": 1.5703125, "learning_rate": 8.340533753159241e-06, "loss": 2.2946, "step": 50680 }, { "epoch": 0.648832, "grad_norm": 1.625, "learning_rate": 8.335094672498733e-06, "loss": 2.285, "step": 50690 }, { "epoch": 0.64896, "grad_norm": 1.515625, "learning_rate": 8.329656683506026e-06, "loss": 2.3041, "step": 50700 }, { "epoch": 0.649088, "grad_norm": 1.484375, "learning_rate": 8.324219787071811e-06, "loss": 2.2832, "step": 50710 }, { "epoch": 0.649216, "grad_norm": 1.5234375, "learning_rate": 8.318783984086626e-06, "loss": 2.3028, "step": 50720 }, { "epoch": 0.649344, "grad_norm": 1.4453125, "learning_rate": 8.313349275440823e-06, "loss": 2.2923, "step": 50730 }, { "epoch": 0.649472, "grad_norm": 1.6875, "learning_rate": 8.307915662024558e-06, "loss": 2.3099, "step": 50740 }, { "epoch": 0.6496, "grad_norm": 1.453125, "learning_rate": 8.30248314472784e-06, "loss": 2.2912, "step": 50750 }, { "epoch": 0.649728, "grad_norm": 1.4765625, "learning_rate": 8.297051724440463e-06, "loss": 2.3051, "step": 50760 }, { "epoch": 0.649856, "grad_norm": 1.625, "learning_rate": 8.291621402052067e-06, "loss": 2.2956, "step": 50770 }, { "epoch": 0.649984, "grad_norm": 1.4453125, "learning_rate": 8.286192178452105e-06, "loss": 2.2943, "step": 50780 }, { "epoch": 0.650112, "grad_norm": 1.6640625, "learning_rate": 8.28076405452984e-06, "loss": 2.288, "step": 50790 }, { "epoch": 0.65024, "grad_norm": 1.6640625, "learning_rate": 8.275337031174366e-06, "loss": 2.2487, "step": 50800 }, { "epoch": 0.650368, "grad_norm": 1.59375, "learning_rate": 8.269911109274604e-06, "loss": 2.3467, "step": 50810 }, { "epoch": 0.650496, "grad_norm": 1.5703125, "learning_rate": 8.264486289719268e-06, "loss": 2.2913, "step": 50820 }, { "epoch": 0.650624, "grad_norm": 1.53125, "learning_rate": 8.259062573396917e-06, "loss": 2.3167, "step": 50830 }, { "epoch": 0.650752, "grad_norm": 1.5546875, "learning_rate": 8.253639961195924e-06, "loss": 2.297, "step": 50840 }, { "epoch": 0.65088, "grad_norm": 1.5546875, "learning_rate": 8.24821845400446e-06, "loss": 2.2677, "step": 50850 }, { "epoch": 0.651008, "grad_norm": 1.546875, "learning_rate": 8.242798052710555e-06, "loss": 2.2714, "step": 50860 }, { "epoch": 0.651136, "grad_norm": 1.5234375, "learning_rate": 8.23737875820202e-06, "loss": 2.2756, "step": 50870 }, { "epoch": 0.651264, "grad_norm": 1.4453125, "learning_rate": 8.231960571366504e-06, "loss": 2.2768, "step": 50880 }, { "epoch": 0.651392, "grad_norm": 1.5234375, "learning_rate": 8.226543493091478e-06, "loss": 2.315, "step": 50890 }, { "epoch": 0.65152, "grad_norm": 1.5546875, "learning_rate": 8.221127524264213e-06, "loss": 2.2765, "step": 50900 }, { "epoch": 0.651648, "grad_norm": 1.671875, "learning_rate": 8.215712665771811e-06, "loss": 2.2736, "step": 50910 }, { "epoch": 0.651776, "grad_norm": 1.5703125, "learning_rate": 8.210298918501198e-06, "loss": 2.2961, "step": 50920 }, { "epoch": 0.651904, "grad_norm": 5.96875, "learning_rate": 8.204886283339092e-06, "loss": 2.2771, "step": 50930 }, { "epoch": 0.652032, "grad_norm": 1.5078125, "learning_rate": 8.199474761172072e-06, "loss": 2.2544, "step": 50940 }, { "epoch": 0.65216, "grad_norm": 1.5078125, "learning_rate": 8.1940643528865e-06, "loss": 2.2698, "step": 50950 }, { "epoch": 0.652288, "grad_norm": 1.5625, "learning_rate": 8.18865505936855e-06, "loss": 2.2687, "step": 50960 }, { "epoch": 0.652416, "grad_norm": 1.5, "learning_rate": 8.183246881504253e-06, "loss": 2.2649, "step": 50970 }, { "epoch": 0.652544, "grad_norm": 1.625, "learning_rate": 8.177839820179418e-06, "loss": 2.2813, "step": 50980 }, { "epoch": 0.652672, "grad_norm": 1.5390625, "learning_rate": 8.172433876279689e-06, "loss": 2.2804, "step": 50990 }, { "epoch": 0.6528, "grad_norm": 1.5078125, "learning_rate": 8.167029050690532e-06, "loss": 2.284, "step": 51000 }, { "epoch": 0.652928, "grad_norm": 1.609375, "learning_rate": 8.161625344297209e-06, "loss": 2.3106, "step": 51010 }, { "epoch": 0.653056, "grad_norm": 1.5546875, "learning_rate": 8.156222757984817e-06, "loss": 2.289, "step": 51020 }, { "epoch": 0.653184, "grad_norm": 1.515625, "learning_rate": 8.150821292638273e-06, "loss": 2.2684, "step": 51030 }, { "epoch": 0.653312, "grad_norm": 10.125, "learning_rate": 8.145420949142286e-06, "loss": 2.3037, "step": 51040 }, { "epoch": 0.65344, "grad_norm": 1.5078125, "learning_rate": 8.140021728381408e-06, "loss": 2.297, "step": 51050 }, { "epoch": 0.653568, "grad_norm": 1.59375, "learning_rate": 8.134623631239996e-06, "loss": 2.2794, "step": 51060 }, { "epoch": 0.653696, "grad_norm": 1.640625, "learning_rate": 8.129226658602207e-06, "loss": 2.2844, "step": 51070 }, { "epoch": 0.653824, "grad_norm": 1.5, "learning_rate": 8.123830811352056e-06, "loss": 2.2896, "step": 51080 }, { "epoch": 0.653952, "grad_norm": 10.125, "learning_rate": 8.11843609037333e-06, "loss": 2.3177, "step": 51090 }, { "epoch": 0.65408, "grad_norm": 1.5625, "learning_rate": 8.113042496549648e-06, "loss": 2.2639, "step": 51100 }, { "epoch": 0.654208, "grad_norm": 1.5859375, "learning_rate": 8.10765003076446e-06, "loss": 2.2941, "step": 51110 }, { "epoch": 0.654336, "grad_norm": 1.4765625, "learning_rate": 8.102258693900999e-06, "loss": 2.2734, "step": 51120 }, { "epoch": 0.654464, "grad_norm": 1.6328125, "learning_rate": 8.096868486842337e-06, "loss": 2.3308, "step": 51130 }, { "epoch": 0.654592, "grad_norm": 1.6015625, "learning_rate": 8.091479410471363e-06, "loss": 2.2869, "step": 51140 }, { "epoch": 0.65472, "grad_norm": 1.6484375, "learning_rate": 8.08609146567076e-06, "loss": 2.2876, "step": 51150 }, { "epoch": 0.654848, "grad_norm": 1.4375, "learning_rate": 8.08070465332304e-06, "loss": 2.2752, "step": 51160 }, { "epoch": 0.654976, "grad_norm": 1.4765625, "learning_rate": 8.075318974310542e-06, "loss": 2.272, "step": 51170 }, { "epoch": 0.655104, "grad_norm": 1.4921875, "learning_rate": 8.069934429515383e-06, "loss": 2.2817, "step": 51180 }, { "epoch": 0.655232, "grad_norm": 1.4765625, "learning_rate": 8.064551019819529e-06, "loss": 2.3114, "step": 51190 }, { "epoch": 0.65536, "grad_norm": 1.6484375, "learning_rate": 8.059168746104745e-06, "loss": 2.2768, "step": 51200 }, { "epoch": 0.655488, "grad_norm": 1.53125, "learning_rate": 8.053787609252617e-06, "loss": 2.3216, "step": 51210 }, { "epoch": 0.655616, "grad_norm": 1.5, "learning_rate": 8.048407610144529e-06, "loss": 2.2894, "step": 51220 }, { "epoch": 0.655744, "grad_norm": 1.6640625, "learning_rate": 8.043028749661694e-06, "loss": 2.2957, "step": 51230 }, { "epoch": 0.655872, "grad_norm": 1.53125, "learning_rate": 8.037651028685139e-06, "loss": 2.2842, "step": 51240 }, { "epoch": 0.656, "grad_norm": 1.4765625, "learning_rate": 8.032274448095701e-06, "loss": 2.2639, "step": 51250 }, { "epoch": 0.656128, "grad_norm": 1.625, "learning_rate": 8.026899008774018e-06, "loss": 2.3095, "step": 51260 }, { "epoch": 0.656256, "grad_norm": 1.578125, "learning_rate": 8.021524711600556e-06, "loss": 2.3293, "step": 51270 }, { "epoch": 0.656384, "grad_norm": 1.7578125, "learning_rate": 8.016151557455594e-06, "loss": 2.2854, "step": 51280 }, { "epoch": 0.656512, "grad_norm": 1.5546875, "learning_rate": 8.01077954721922e-06, "loss": 2.2705, "step": 51290 }, { "epoch": 0.65664, "grad_norm": 1.5703125, "learning_rate": 8.005408681771325e-06, "loss": 2.2888, "step": 51300 }, { "epoch": 0.656768, "grad_norm": 1.53125, "learning_rate": 8.000038961991629e-06, "loss": 2.2902, "step": 51310 }, { "epoch": 0.656896, "grad_norm": 1.4765625, "learning_rate": 7.994670388759659e-06, "loss": 2.2777, "step": 51320 }, { "epoch": 0.657024, "grad_norm": 1.546875, "learning_rate": 7.989302962954743e-06, "loss": 2.2766, "step": 51330 }, { "epoch": 0.657152, "grad_norm": 1.5859375, "learning_rate": 7.983936685456036e-06, "loss": 2.2573, "step": 51340 }, { "epoch": 0.65728, "grad_norm": 1.546875, "learning_rate": 7.978571557142502e-06, "loss": 2.279, "step": 51350 }, { "epoch": 0.657408, "grad_norm": 1.625, "learning_rate": 7.973207578892905e-06, "loss": 2.2932, "step": 51360 }, { "epoch": 0.657536, "grad_norm": 1.59375, "learning_rate": 7.967844751585838e-06, "loss": 2.2608, "step": 51370 }, { "epoch": 0.657664, "grad_norm": 1.4453125, "learning_rate": 7.96248307609969e-06, "loss": 2.2869, "step": 51380 }, { "epoch": 0.657792, "grad_norm": 1.640625, "learning_rate": 7.957122553312672e-06, "loss": 2.2817, "step": 51390 }, { "epoch": 0.65792, "grad_norm": 1.6328125, "learning_rate": 7.95176318410281e-06, "loss": 2.3044, "step": 51400 }, { "epoch": 0.658048, "grad_norm": 1.6875, "learning_rate": 7.94640496934792e-06, "loss": 2.2897, "step": 51410 }, { "epoch": 0.658176, "grad_norm": 1.5234375, "learning_rate": 7.941047909925648e-06, "loss": 2.312, "step": 51420 }, { "epoch": 0.658304, "grad_norm": 1.515625, "learning_rate": 7.93569200671345e-06, "loss": 2.2506, "step": 51430 }, { "epoch": 0.658432, "grad_norm": 1.484375, "learning_rate": 7.930337260588579e-06, "loss": 2.2632, "step": 51440 }, { "epoch": 0.65856, "grad_norm": 1.4296875, "learning_rate": 7.924983672428113e-06, "loss": 2.2857, "step": 51450 }, { "epoch": 0.658688, "grad_norm": 1.5078125, "learning_rate": 7.919631243108936e-06, "loss": 2.2882, "step": 51460 }, { "epoch": 0.658816, "grad_norm": 1.6484375, "learning_rate": 7.914279973507735e-06, "loss": 2.3119, "step": 51470 }, { "epoch": 0.658944, "grad_norm": 1.40625, "learning_rate": 7.908929864501015e-06, "loss": 2.2623, "step": 51480 }, { "epoch": 0.659072, "grad_norm": 15.75, "learning_rate": 7.903580916965096e-06, "loss": 2.307, "step": 51490 }, { "epoch": 0.6592, "grad_norm": 1.5390625, "learning_rate": 7.898233131776084e-06, "loss": 2.2987, "step": 51500 }, { "epoch": 0.659328, "grad_norm": 1.5390625, "learning_rate": 7.89288650980993e-06, "loss": 2.2622, "step": 51510 }, { "epoch": 0.659456, "grad_norm": 1.8828125, "learning_rate": 7.88754105194237e-06, "loss": 2.2891, "step": 51520 }, { "epoch": 0.659584, "grad_norm": 1.546875, "learning_rate": 7.88219675904894e-06, "loss": 2.2505, "step": 51530 }, { "epoch": 0.659712, "grad_norm": 2.0625, "learning_rate": 7.876853632005021e-06, "loss": 2.2922, "step": 51540 }, { "epoch": 0.65984, "grad_norm": 1.625, "learning_rate": 7.871511671685771e-06, "loss": 2.2767, "step": 51550 }, { "epoch": 0.659968, "grad_norm": 2.078125, "learning_rate": 7.86617087896617e-06, "loss": 2.2952, "step": 51560 }, { "epoch": 0.660096, "grad_norm": 1.734375, "learning_rate": 7.86083125472101e-06, "loss": 2.2621, "step": 51570 }, { "epoch": 0.660224, "grad_norm": 1.9140625, "learning_rate": 7.855492799824877e-06, "loss": 2.2523, "step": 51580 }, { "epoch": 0.660352, "grad_norm": 1.5, "learning_rate": 7.850155515152179e-06, "loss": 2.2724, "step": 51590 }, { "epoch": 0.66048, "grad_norm": 1.5625, "learning_rate": 7.844819401577136e-06, "loss": 2.2794, "step": 51600 }, { "epoch": 0.660608, "grad_norm": 1.5625, "learning_rate": 7.839484459973747e-06, "loss": 2.2859, "step": 51610 }, { "epoch": 0.660736, "grad_norm": 1.515625, "learning_rate": 7.834150691215868e-06, "loss": 2.3003, "step": 51620 }, { "epoch": 0.660864, "grad_norm": 1.578125, "learning_rate": 7.828818096177118e-06, "loss": 2.2888, "step": 51630 }, { "epoch": 0.660992, "grad_norm": 1.609375, "learning_rate": 7.823486675730943e-06, "loss": 2.2996, "step": 51640 }, { "epoch": 0.66112, "grad_norm": 1.65625, "learning_rate": 7.818156430750605e-06, "loss": 2.308, "step": 51650 }, { "epoch": 0.661248, "grad_norm": 1.734375, "learning_rate": 7.812827362109146e-06, "loss": 2.2965, "step": 51660 }, { "epoch": 0.661376, "grad_norm": 1.5859375, "learning_rate": 7.807499470679444e-06, "loss": 2.2621, "step": 51670 }, { "epoch": 0.661504, "grad_norm": 1.484375, "learning_rate": 7.802172757334175e-06, "loss": 2.2907, "step": 51680 }, { "epoch": 0.661632, "grad_norm": 1.5390625, "learning_rate": 7.796847222945805e-06, "loss": 2.2755, "step": 51690 }, { "epoch": 0.66176, "grad_norm": 1.5234375, "learning_rate": 7.791522868386643e-06, "loss": 2.2882, "step": 51700 }, { "epoch": 0.661888, "grad_norm": 1.5625, "learning_rate": 7.786199694528772e-06, "loss": 2.2737, "step": 51710 }, { "epoch": 0.662016, "grad_norm": 1.5859375, "learning_rate": 7.78087770224408e-06, "loss": 2.3001, "step": 51720 }, { "epoch": 0.662144, "grad_norm": 1.6484375, "learning_rate": 7.7755568924043e-06, "loss": 2.2927, "step": 51730 }, { "epoch": 0.662272, "grad_norm": 10.9375, "learning_rate": 7.770237265880927e-06, "loss": 2.2678, "step": 51740 }, { "epoch": 0.6624, "grad_norm": 1.484375, "learning_rate": 7.764918823545286e-06, "loss": 2.2842, "step": 51750 }, { "epoch": 0.662528, "grad_norm": 1.46875, "learning_rate": 7.759601566268512e-06, "loss": 2.2665, "step": 51760 }, { "epoch": 0.662656, "grad_norm": 1.5, "learning_rate": 7.754285494921522e-06, "loss": 2.2977, "step": 51770 }, { "epoch": 0.662784, "grad_norm": 1.546875, "learning_rate": 7.748970610375062e-06, "loss": 2.2453, "step": 51780 }, { "epoch": 0.662912, "grad_norm": 1.53125, "learning_rate": 7.743656913499678e-06, "loss": 2.2812, "step": 51790 }, { "epoch": 0.66304, "grad_norm": 1.5390625, "learning_rate": 7.73834440516571e-06, "loss": 2.3024, "step": 51800 }, { "epoch": 0.663168, "grad_norm": 1.453125, "learning_rate": 7.733033086243316e-06, "loss": 2.3033, "step": 51810 }, { "epoch": 0.663296, "grad_norm": 1.546875, "learning_rate": 7.727722957602462e-06, "loss": 2.2748, "step": 51820 }, { "epoch": 0.663424, "grad_norm": 1.53125, "learning_rate": 7.722414020112901e-06, "loss": 2.2744, "step": 51830 }, { "epoch": 0.663552, "grad_norm": 1.5078125, "learning_rate": 7.717106274644207e-06, "loss": 2.2685, "step": 51840 }, { "epoch": 0.66368, "grad_norm": 1.5390625, "learning_rate": 7.711799722065754e-06, "loss": 2.2715, "step": 51850 }, { "epoch": 0.663808, "grad_norm": 1.671875, "learning_rate": 7.70649436324672e-06, "loss": 2.2649, "step": 51860 }, { "epoch": 0.663936, "grad_norm": 1.5546875, "learning_rate": 7.701190199056095e-06, "loss": 2.3021, "step": 51870 }, { "epoch": 0.664064, "grad_norm": 1.4609375, "learning_rate": 7.695887230362655e-06, "loss": 2.3096, "step": 51880 }, { "epoch": 0.664192, "grad_norm": 1.5390625, "learning_rate": 7.690585458034994e-06, "loss": 2.2987, "step": 51890 }, { "epoch": 0.66432, "grad_norm": 2.84375, "learning_rate": 7.685284882941517e-06, "loss": 2.2883, "step": 51900 }, { "epoch": 0.664448, "grad_norm": 1.4921875, "learning_rate": 7.679985505950411e-06, "loss": 2.2778, "step": 51910 }, { "epoch": 0.664576, "grad_norm": 1.4375, "learning_rate": 7.674687327929685e-06, "loss": 2.2635, "step": 51920 }, { "epoch": 0.664704, "grad_norm": 1.7578125, "learning_rate": 7.66939034974715e-06, "loss": 2.2901, "step": 51930 }, { "epoch": 0.664832, "grad_norm": 1.6328125, "learning_rate": 7.664094572270408e-06, "loss": 2.2692, "step": 51940 }, { "epoch": 0.66496, "grad_norm": 1.5703125, "learning_rate": 7.658799996366876e-06, "loss": 2.2716, "step": 51950 }, { "epoch": 0.665088, "grad_norm": 3.4375, "learning_rate": 7.653506622903772e-06, "loss": 2.3124, "step": 51960 }, { "epoch": 0.665216, "grad_norm": 1.9375, "learning_rate": 7.648214452748118e-06, "loss": 2.306, "step": 51970 }, { "epoch": 0.665344, "grad_norm": 1.6796875, "learning_rate": 7.64292348676673e-06, "loss": 2.2854, "step": 51980 }, { "epoch": 0.665472, "grad_norm": 1.8359375, "learning_rate": 7.637633725826237e-06, "loss": 2.2788, "step": 51990 }, { "epoch": 0.6656, "grad_norm": 1.5625, "learning_rate": 7.632345170793068e-06, "loss": 2.2793, "step": 52000 }, { "epoch": 0.665728, "grad_norm": 1.5, "learning_rate": 7.6270578225334595e-06, "loss": 2.2828, "step": 52010 }, { "epoch": 0.665856, "grad_norm": 1.4765625, "learning_rate": 7.6217716819134315e-06, "loss": 2.2699, "step": 52020 }, { "epoch": 0.665984, "grad_norm": 1.4375, "learning_rate": 7.616486749798826e-06, "loss": 2.2909, "step": 52030 }, { "epoch": 0.666112, "grad_norm": 2.078125, "learning_rate": 7.611203027055281e-06, "loss": 2.2984, "step": 52040 }, { "epoch": 0.66624, "grad_norm": 1.609375, "learning_rate": 7.6059205145482395e-06, "loss": 2.2536, "step": 52050 }, { "epoch": 0.666368, "grad_norm": 1.5859375, "learning_rate": 7.600639213142932e-06, "loss": 2.2731, "step": 52060 }, { "epoch": 0.666496, "grad_norm": 1.5234375, "learning_rate": 7.595359123704407e-06, "loss": 2.2747, "step": 52070 }, { "epoch": 0.666624, "grad_norm": 1.546875, "learning_rate": 7.590080247097514e-06, "loss": 2.279, "step": 52080 }, { "epoch": 0.666752, "grad_norm": 1.4609375, "learning_rate": 7.5848025841868875e-06, "loss": 2.2861, "step": 52090 }, { "epoch": 0.66688, "grad_norm": 1.5625, "learning_rate": 7.5795261358369775e-06, "loss": 2.2735, "step": 52100 }, { "epoch": 0.667008, "grad_norm": 1.6015625, "learning_rate": 7.574250902912041e-06, "loss": 2.3018, "step": 52110 }, { "epoch": 0.667136, "grad_norm": 1.5078125, "learning_rate": 7.568976886276111e-06, "loss": 2.2906, "step": 52120 }, { "epoch": 0.667264, "grad_norm": 1.5078125, "learning_rate": 7.563704086793047e-06, "loss": 2.2861, "step": 52130 }, { "epoch": 0.667392, "grad_norm": 1.484375, "learning_rate": 7.5584325053265e-06, "loss": 2.2931, "step": 52140 }, { "epoch": 0.66752, "grad_norm": 1.578125, "learning_rate": 7.553162142739909e-06, "loss": 2.3024, "step": 52150 }, { "epoch": 0.667648, "grad_norm": 1.5859375, "learning_rate": 7.547892999896541e-06, "loss": 2.2454, "step": 52160 }, { "epoch": 0.667776, "grad_norm": 1.640625, "learning_rate": 7.542625077659435e-06, "loss": 2.2714, "step": 52170 }, { "epoch": 0.667904, "grad_norm": 1.71875, "learning_rate": 7.537358376891447e-06, "loss": 2.3006, "step": 52180 }, { "epoch": 0.668032, "grad_norm": 1.5546875, "learning_rate": 7.532092898455232e-06, "loss": 2.2991, "step": 52190 }, { "epoch": 0.66816, "grad_norm": 1.6328125, "learning_rate": 7.526828643213231e-06, "loss": 2.2894, "step": 52200 }, { "epoch": 0.668288, "grad_norm": 1.6015625, "learning_rate": 7.521565612027699e-06, "loss": 2.2831, "step": 52210 }, { "epoch": 0.668416, "grad_norm": 1.515625, "learning_rate": 7.516303805760694e-06, "loss": 2.27, "step": 52220 }, { "epoch": 0.668544, "grad_norm": 1.6328125, "learning_rate": 7.511043225274051e-06, "loss": 2.2731, "step": 52230 }, { "epoch": 0.668672, "grad_norm": 2.3125, "learning_rate": 7.505783871429426e-06, "loss": 2.2533, "step": 52240 }, { "epoch": 0.6688, "grad_norm": 3.3125, "learning_rate": 7.500525745088271e-06, "loss": 2.2861, "step": 52250 }, { "epoch": 0.668928, "grad_norm": 1.4921875, "learning_rate": 7.495268847111817e-06, "loss": 2.268, "step": 52260 }, { "epoch": 0.669056, "grad_norm": 21.25, "learning_rate": 7.490013178361132e-06, "loss": 2.2923, "step": 52270 }, { "epoch": 0.669184, "grad_norm": 1.640625, "learning_rate": 7.484758739697048e-06, "loss": 2.2802, "step": 52280 }, { "epoch": 0.669312, "grad_norm": 1.5390625, "learning_rate": 7.479505531980197e-06, "loss": 2.2672, "step": 52290 }, { "epoch": 0.66944, "grad_norm": 1.546875, "learning_rate": 7.474253556071041e-06, "loss": 2.2649, "step": 52300 }, { "epoch": 0.669568, "grad_norm": 1.5234375, "learning_rate": 7.469002812829805e-06, "loss": 2.2895, "step": 52310 }, { "epoch": 0.669696, "grad_norm": 1.4765625, "learning_rate": 7.46375330311653e-06, "loss": 2.2528, "step": 52320 }, { "epoch": 0.669824, "grad_norm": 1.6015625, "learning_rate": 7.458505027791056e-06, "loss": 2.2828, "step": 52330 }, { "epoch": 0.669952, "grad_norm": 1.515625, "learning_rate": 7.453257987713008e-06, "loss": 2.2461, "step": 52340 }, { "epoch": 0.67008, "grad_norm": 1.4921875, "learning_rate": 7.448012183741822e-06, "loss": 2.2799, "step": 52350 }, { "epoch": 0.670208, "grad_norm": 1.65625, "learning_rate": 7.442767616736728e-06, "loss": 2.2759, "step": 52360 }, { "epoch": 0.670336, "grad_norm": 1.6328125, "learning_rate": 7.437524287556738e-06, "loss": 2.3003, "step": 52370 }, { "epoch": 0.670464, "grad_norm": 1.4921875, "learning_rate": 7.432282197060697e-06, "loss": 2.3176, "step": 52380 }, { "epoch": 0.670592, "grad_norm": 2.109375, "learning_rate": 7.427041346107208e-06, "loss": 2.2714, "step": 52390 }, { "epoch": 0.67072, "grad_norm": 1.5703125, "learning_rate": 7.421801735554692e-06, "loss": 2.26, "step": 52400 }, { "epoch": 0.670848, "grad_norm": 1.484375, "learning_rate": 7.416563366261372e-06, "loss": 2.2793, "step": 52410 }, { "epoch": 0.670976, "grad_norm": 1.546875, "learning_rate": 7.411326239085245e-06, "loss": 2.2802, "step": 52420 }, { "epoch": 0.671104, "grad_norm": 1.6875, "learning_rate": 7.406090354884123e-06, "loss": 2.2704, "step": 52430 }, { "epoch": 0.671232, "grad_norm": 1.734375, "learning_rate": 7.400855714515619e-06, "loss": 2.2787, "step": 52440 }, { "epoch": 0.67136, "grad_norm": 1.4921875, "learning_rate": 7.395622318837117e-06, "loss": 2.306, "step": 52450 }, { "epoch": 0.671488, "grad_norm": 1.6796875, "learning_rate": 7.390390168705819e-06, "loss": 2.2805, "step": 52460 }, { "epoch": 0.671616, "grad_norm": 1.4765625, "learning_rate": 7.385159264978727e-06, "loss": 2.2598, "step": 52470 }, { "epoch": 0.671744, "grad_norm": 1.5625, "learning_rate": 7.379929608512606e-06, "loss": 2.3031, "step": 52480 }, { "epoch": 0.671872, "grad_norm": 1.4765625, "learning_rate": 7.374701200164065e-06, "loss": 2.2869, "step": 52490 }, { "epoch": 0.672, "grad_norm": 1.59375, "learning_rate": 7.369474040789467e-06, "loss": 2.3177, "step": 52500 }, { "epoch": 0.672128, "grad_norm": 1.515625, "learning_rate": 7.36424813124499e-06, "loss": 2.2733, "step": 52510 }, { "epoch": 0.672256, "grad_norm": 1.59375, "learning_rate": 7.35902347238661e-06, "loss": 2.2776, "step": 52520 }, { "epoch": 0.672384, "grad_norm": 1.5234375, "learning_rate": 7.353800065070081e-06, "loss": 2.2933, "step": 52530 }, { "epoch": 0.672512, "grad_norm": 1.9375, "learning_rate": 7.3485779101509675e-06, "loss": 2.285, "step": 52540 }, { "epoch": 0.67264, "grad_norm": 1.5546875, "learning_rate": 7.3433570084846305e-06, "loss": 2.2791, "step": 52550 }, { "epoch": 0.672768, "grad_norm": 1.5546875, "learning_rate": 7.3381373609262094e-06, "loss": 2.266, "step": 52560 }, { "epoch": 0.672896, "grad_norm": 1.640625, "learning_rate": 7.332918968330653e-06, "loss": 2.2577, "step": 52570 }, { "epoch": 0.673024, "grad_norm": 2.5625, "learning_rate": 7.327701831552705e-06, "loss": 2.2904, "step": 52580 }, { "epoch": 0.673152, "grad_norm": 1.5, "learning_rate": 7.3224859514468854e-06, "loss": 2.2746, "step": 52590 }, { "epoch": 0.67328, "grad_norm": 1.59375, "learning_rate": 7.317271328867531e-06, "loss": 2.2884, "step": 52600 }, { "epoch": 0.673408, "grad_norm": 1.46875, "learning_rate": 7.312057964668759e-06, "loss": 2.2773, "step": 52610 }, { "epoch": 0.673536, "grad_norm": 1.5859375, "learning_rate": 7.3068458597044854e-06, "loss": 2.2661, "step": 52620 }, { "epoch": 0.673664, "grad_norm": 1.5625, "learning_rate": 7.301635014828424e-06, "loss": 2.2994, "step": 52630 }, { "epoch": 0.673792, "grad_norm": 1.578125, "learning_rate": 7.296425430894067e-06, "loss": 2.2768, "step": 52640 }, { "epoch": 0.67392, "grad_norm": 1.5546875, "learning_rate": 7.291217108754718e-06, "loss": 2.2777, "step": 52650 }, { "epoch": 0.674048, "grad_norm": 1.4296875, "learning_rate": 7.286010049263465e-06, "loss": 2.2663, "step": 52660 }, { "epoch": 0.674176, "grad_norm": 1.5546875, "learning_rate": 7.280804253273183e-06, "loss": 2.3153, "step": 52670 }, { "epoch": 0.674304, "grad_norm": 1.4921875, "learning_rate": 7.275599721636555e-06, "loss": 2.2594, "step": 52680 }, { "epoch": 0.674432, "grad_norm": 1.65625, "learning_rate": 7.270396455206051e-06, "loss": 2.2426, "step": 52690 }, { "epoch": 0.67456, "grad_norm": 1.578125, "learning_rate": 7.265194454833921e-06, "loss": 2.2942, "step": 52700 }, { "epoch": 0.674688, "grad_norm": 1.625, "learning_rate": 7.259993721372227e-06, "loss": 2.3018, "step": 52710 }, { "epoch": 0.674816, "grad_norm": 1.5546875, "learning_rate": 7.254794255672814e-06, "loss": 2.262, "step": 52720 }, { "epoch": 0.674944, "grad_norm": 1.5078125, "learning_rate": 7.249596058587322e-06, "loss": 2.295, "step": 52730 }, { "epoch": 0.675072, "grad_norm": 1.515625, "learning_rate": 7.244399130967177e-06, "loss": 2.2667, "step": 52740 }, { "epoch": 0.6752, "grad_norm": 1.640625, "learning_rate": 7.239203473663602e-06, "loss": 2.2857, "step": 52750 }, { "epoch": 0.675328, "grad_norm": 1.78125, "learning_rate": 7.2340090875276195e-06, "loss": 2.2721, "step": 52760 }, { "epoch": 0.675456, "grad_norm": 1.484375, "learning_rate": 7.228815973410025e-06, "loss": 2.2878, "step": 52770 }, { "epoch": 0.675584, "grad_norm": 1.515625, "learning_rate": 7.22362413216142e-06, "loss": 2.3025, "step": 52780 }, { "epoch": 0.675712, "grad_norm": 3.0625, "learning_rate": 7.218433564632198e-06, "loss": 2.2845, "step": 52790 }, { "epoch": 0.67584, "grad_norm": 1.5625, "learning_rate": 7.213244271672534e-06, "loss": 2.2739, "step": 52800 }, { "epoch": 0.675968, "grad_norm": 1.5390625, "learning_rate": 7.208056254132413e-06, "loss": 2.274, "step": 52810 }, { "epoch": 0.676096, "grad_norm": 1.484375, "learning_rate": 7.202869512861582e-06, "loss": 2.2898, "step": 52820 }, { "epoch": 0.676224, "grad_norm": 1.6484375, "learning_rate": 7.197684048709601e-06, "loss": 2.2574, "step": 52830 }, { "epoch": 0.676352, "grad_norm": 1.5625, "learning_rate": 7.192499862525822e-06, "loss": 2.298, "step": 52840 }, { "epoch": 0.67648, "grad_norm": 1.5078125, "learning_rate": 7.18731695515937e-06, "loss": 2.2704, "step": 52850 }, { "epoch": 0.676608, "grad_norm": 2.25, "learning_rate": 7.182135327459176e-06, "loss": 2.2986, "step": 52860 }, { "epoch": 0.676736, "grad_norm": 1.5703125, "learning_rate": 7.176954980273962e-06, "loss": 2.2739, "step": 52870 }, { "epoch": 0.676864, "grad_norm": 1.53125, "learning_rate": 7.1717759144522234e-06, "loss": 2.2529, "step": 52880 }, { "epoch": 0.676992, "grad_norm": 1.4765625, "learning_rate": 7.166598130842264e-06, "loss": 2.2554, "step": 52890 }, { "epoch": 0.67712, "grad_norm": 1.484375, "learning_rate": 7.161421630292176e-06, "loss": 2.3169, "step": 52900 }, { "epoch": 0.677248, "grad_norm": 1.6640625, "learning_rate": 7.15624641364982e-06, "loss": 2.2852, "step": 52910 }, { "epoch": 0.677376, "grad_norm": 1.6171875, "learning_rate": 7.151072481762882e-06, "loss": 2.2621, "step": 52920 }, { "epoch": 0.677504, "grad_norm": 1.4765625, "learning_rate": 7.145899835478805e-06, "loss": 2.2394, "step": 52930 }, { "epoch": 0.677632, "grad_norm": 1.6015625, "learning_rate": 7.1407284756448396e-06, "loss": 2.3033, "step": 52940 }, { "epoch": 0.67776, "grad_norm": 1.5078125, "learning_rate": 7.135558403108025e-06, "loss": 2.2994, "step": 52950 }, { "epoch": 0.677888, "grad_norm": 1.5546875, "learning_rate": 7.130389618715173e-06, "loss": 2.2757, "step": 52960 }, { "epoch": 0.678016, "grad_norm": 1.578125, "learning_rate": 7.125222123312905e-06, "loss": 2.2821, "step": 52970 }, { "epoch": 0.678144, "grad_norm": 1.6015625, "learning_rate": 7.120055917747626e-06, "loss": 2.2951, "step": 52980 }, { "epoch": 0.678272, "grad_norm": 1.515625, "learning_rate": 7.1148910028655165e-06, "loss": 2.2796, "step": 52990 }, { "epoch": 0.6784, "grad_norm": 2.25, "learning_rate": 7.109727379512562e-06, "loss": 2.2648, "step": 53000 }, { "epoch": 0.678528, "grad_norm": 1.5078125, "learning_rate": 7.1045650485345345e-06, "loss": 2.2849, "step": 53010 }, { "epoch": 0.678656, "grad_norm": 1.4921875, "learning_rate": 7.099404010776974e-06, "loss": 2.2893, "step": 53020 }, { "epoch": 0.678784, "grad_norm": 1.5, "learning_rate": 7.094244267085245e-06, "loss": 2.2649, "step": 53030 }, { "epoch": 0.678912, "grad_norm": 1.703125, "learning_rate": 7.0890858183044715e-06, "loss": 2.282, "step": 53040 }, { "epoch": 0.67904, "grad_norm": 1.4921875, "learning_rate": 7.083928665279562e-06, "loss": 2.2594, "step": 53050 }, { "epoch": 0.679168, "grad_norm": 1.5859375, "learning_rate": 7.0787728088552444e-06, "loss": 2.2846, "step": 53060 }, { "epoch": 0.679296, "grad_norm": 1.703125, "learning_rate": 7.073618249875998e-06, "loss": 2.311, "step": 53070 }, { "epoch": 0.679424, "grad_norm": 1.6015625, "learning_rate": 7.0684649891861144e-06, "loss": 2.331, "step": 53080 }, { "epoch": 0.679552, "grad_norm": 1.46875, "learning_rate": 7.063313027629665e-06, "loss": 2.3214, "step": 53090 }, { "epoch": 0.67968, "grad_norm": 12.25, "learning_rate": 7.0581623660505e-06, "loss": 2.28, "step": 53100 }, { "epoch": 0.679808, "grad_norm": 1.9296875, "learning_rate": 7.0530130052922676e-06, "loss": 2.2975, "step": 53110 }, { "epoch": 0.679936, "grad_norm": 1.515625, "learning_rate": 7.047864946198404e-06, "loss": 2.2837, "step": 53120 }, { "epoch": 0.680064, "grad_norm": 1.5625, "learning_rate": 7.042718189612114e-06, "loss": 2.2802, "step": 53130 }, { "epoch": 0.680192, "grad_norm": 1.484375, "learning_rate": 7.037572736376423e-06, "loss": 2.289, "step": 53140 }, { "epoch": 0.68032, "grad_norm": 2.296875, "learning_rate": 7.032428587334104e-06, "loss": 2.275, "step": 53150 }, { "epoch": 0.680448, "grad_norm": 1.609375, "learning_rate": 7.027285743327743e-06, "loss": 2.2731, "step": 53160 }, { "epoch": 0.680576, "grad_norm": 1.3984375, "learning_rate": 7.0221442051997094e-06, "loss": 2.2864, "step": 53170 }, { "epoch": 0.680704, "grad_norm": 1.828125, "learning_rate": 7.017003973792141e-06, "loss": 2.2854, "step": 53180 }, { "epoch": 0.680832, "grad_norm": 1.5859375, "learning_rate": 7.01186504994698e-06, "loss": 2.2931, "step": 53190 }, { "epoch": 0.68096, "grad_norm": 1.5390625, "learning_rate": 7.006727434505954e-06, "loss": 2.2721, "step": 53200 }, { "epoch": 0.681088, "grad_norm": 1.546875, "learning_rate": 7.001591128310563e-06, "loss": 2.2825, "step": 53210 }, { "epoch": 0.681216, "grad_norm": 1.578125, "learning_rate": 6.9964561322021e-06, "loss": 2.298, "step": 53220 }, { "epoch": 0.681344, "grad_norm": 1.6171875, "learning_rate": 6.991322447021653e-06, "loss": 2.2555, "step": 53230 }, { "epoch": 0.681472, "grad_norm": 1.546875, "learning_rate": 6.986190073610068e-06, "loss": 2.3133, "step": 53240 }, { "epoch": 0.6816, "grad_norm": 1.703125, "learning_rate": 6.981059012808016e-06, "loss": 2.2952, "step": 53250 }, { "epoch": 0.681728, "grad_norm": 1.7734375, "learning_rate": 6.975929265455916e-06, "loss": 2.2924, "step": 53260 }, { "epoch": 0.681856, "grad_norm": 1.4453125, "learning_rate": 6.970800832393991e-06, "loss": 2.2897, "step": 53270 }, { "epoch": 0.681984, "grad_norm": 1.5, "learning_rate": 6.965673714462252e-06, "loss": 2.2672, "step": 53280 }, { "epoch": 0.682112, "grad_norm": 1.5703125, "learning_rate": 6.960547912500475e-06, "loss": 2.2934, "step": 53290 }, { "epoch": 0.68224, "grad_norm": 1.5234375, "learning_rate": 6.955423427348237e-06, "loss": 2.2581, "step": 53300 }, { "epoch": 0.682368, "grad_norm": 1.59375, "learning_rate": 6.950300259844903e-06, "loss": 2.3071, "step": 53310 }, { "epoch": 0.682496, "grad_norm": 1.453125, "learning_rate": 6.945178410829603e-06, "loss": 2.2686, "step": 53320 }, { "epoch": 0.682624, "grad_norm": 1.5546875, "learning_rate": 6.940057881141267e-06, "loss": 2.2866, "step": 53330 }, { "epoch": 0.682752, "grad_norm": 1.59375, "learning_rate": 6.934938671618607e-06, "loss": 2.2951, "step": 53340 }, { "epoch": 0.68288, "grad_norm": 1.625, "learning_rate": 6.929820783100111e-06, "loss": 2.2858, "step": 53350 }, { "epoch": 0.683008, "grad_norm": 1.546875, "learning_rate": 6.924704216424057e-06, "loss": 2.3112, "step": 53360 }, { "epoch": 0.683136, "grad_norm": 2.953125, "learning_rate": 6.919588972428504e-06, "loss": 2.3016, "step": 53370 }, { "epoch": 0.683264, "grad_norm": 1.4375, "learning_rate": 6.914475051951304e-06, "loss": 2.2659, "step": 53380 }, { "epoch": 0.683392, "grad_norm": 1.875, "learning_rate": 6.909362455830071e-06, "loss": 2.2822, "step": 53390 }, { "epoch": 0.68352, "grad_norm": 2.34375, "learning_rate": 6.904251184902222e-06, "loss": 2.2586, "step": 53400 }, { "epoch": 0.683648, "grad_norm": 2.09375, "learning_rate": 6.899141240004948e-06, "loss": 2.2827, "step": 53410 }, { "epoch": 0.683776, "grad_norm": 1.5234375, "learning_rate": 6.89403262197523e-06, "loss": 2.2664, "step": 53420 }, { "epoch": 0.683904, "grad_norm": 1.5859375, "learning_rate": 6.888925331649815e-06, "loss": 2.2271, "step": 53430 }, { "epoch": 0.684032, "grad_norm": 1.4921875, "learning_rate": 6.883819369865249e-06, "loss": 2.2525, "step": 53440 }, { "epoch": 0.68416, "grad_norm": 1.5546875, "learning_rate": 6.878714737457862e-06, "loss": 2.2736, "step": 53450 }, { "epoch": 0.684288, "grad_norm": 1.5078125, "learning_rate": 6.873611435263747e-06, "loss": 2.2837, "step": 53460 }, { "epoch": 0.684416, "grad_norm": 1.4140625, "learning_rate": 6.868509464118798e-06, "loss": 2.2683, "step": 53470 }, { "epoch": 0.684544, "grad_norm": 1.5078125, "learning_rate": 6.863408824858683e-06, "loss": 2.2854, "step": 53480 }, { "epoch": 0.684672, "grad_norm": 1.59375, "learning_rate": 6.85830951831886e-06, "loss": 2.3153, "step": 53490 }, { "epoch": 0.6848, "grad_norm": 8.4375, "learning_rate": 6.8532115453345515e-06, "loss": 2.3229, "step": 53500 }, { "epoch": 0.684928, "grad_norm": 1.703125, "learning_rate": 6.8481149067407746e-06, "loss": 2.3043, "step": 53510 }, { "epoch": 0.685056, "grad_norm": 2.390625, "learning_rate": 6.843019603372335e-06, "loss": 2.2751, "step": 53520 }, { "epoch": 0.685184, "grad_norm": 1.7109375, "learning_rate": 6.837925636063794e-06, "loss": 2.2856, "step": 53530 }, { "epoch": 0.685312, "grad_norm": 1.578125, "learning_rate": 6.832833005649521e-06, "loss": 2.2531, "step": 53540 }, { "epoch": 0.68544, "grad_norm": 1.484375, "learning_rate": 6.827741712963652e-06, "loss": 2.2721, "step": 53550 }, { "epoch": 0.685568, "grad_norm": 8.3125, "learning_rate": 6.822651758840108e-06, "loss": 2.3026, "step": 53560 }, { "epoch": 0.685696, "grad_norm": 1.5703125, "learning_rate": 6.817563144112596e-06, "loss": 2.3053, "step": 53570 }, { "epoch": 0.685824, "grad_norm": 1.5859375, "learning_rate": 6.812475869614586e-06, "loss": 2.3032, "step": 53580 }, { "epoch": 0.685952, "grad_norm": 1.4296875, "learning_rate": 6.807389936179346e-06, "loss": 2.263, "step": 53590 }, { "epoch": 0.68608, "grad_norm": 1.6484375, "learning_rate": 6.802305344639925e-06, "loss": 2.2662, "step": 53600 }, { "epoch": 0.686208, "grad_norm": 1.6328125, "learning_rate": 6.7972220958291336e-06, "loss": 2.2895, "step": 53610 }, { "epoch": 0.686336, "grad_norm": 1.6640625, "learning_rate": 6.79214019057958e-06, "loss": 2.3004, "step": 53620 }, { "epoch": 0.686464, "grad_norm": 1.5546875, "learning_rate": 6.787059629723654e-06, "loss": 2.2952, "step": 53630 }, { "epoch": 0.686592, "grad_norm": 1.6328125, "learning_rate": 6.781980414093506e-06, "loss": 2.291, "step": 53640 }, { "epoch": 0.68672, "grad_norm": 2.078125, "learning_rate": 6.776902544521083e-06, "loss": 2.2774, "step": 53650 }, { "epoch": 0.686848, "grad_norm": 1.6171875, "learning_rate": 6.771826021838115e-06, "loss": 2.2926, "step": 53660 }, { "epoch": 0.686976, "grad_norm": 1.609375, "learning_rate": 6.766750846876082e-06, "loss": 2.2786, "step": 53670 }, { "epoch": 0.687104, "grad_norm": 1.5546875, "learning_rate": 6.761677020466292e-06, "loss": 2.2737, "step": 53680 }, { "epoch": 0.687232, "grad_norm": 1.640625, "learning_rate": 6.756604543439789e-06, "loss": 2.2696, "step": 53690 }, { "epoch": 0.68736, "grad_norm": 1.4765625, "learning_rate": 6.751533416627403e-06, "loss": 2.2754, "step": 53700 }, { "epoch": 0.687488, "grad_norm": 1.4453125, "learning_rate": 6.746463640859771e-06, "loss": 2.2971, "step": 53710 }, { "epoch": 0.687616, "grad_norm": 1.5078125, "learning_rate": 6.741395216967275e-06, "loss": 2.2977, "step": 53720 }, { "epoch": 0.687744, "grad_norm": 1.5078125, "learning_rate": 6.736328145780093e-06, "loss": 2.297, "step": 53730 }, { "epoch": 0.687872, "grad_norm": 1.453125, "learning_rate": 6.7312624281281865e-06, "loss": 2.2908, "step": 53740 }, { "epoch": 0.688, "grad_norm": 1.578125, "learning_rate": 6.726198064841272e-06, "loss": 2.2766, "step": 53750 }, { "epoch": 0.688128, "grad_norm": 1.53125, "learning_rate": 6.721135056748867e-06, "loss": 2.2635, "step": 53760 }, { "epoch": 0.688256, "grad_norm": 1.5546875, "learning_rate": 6.716073404680266e-06, "loss": 2.2585, "step": 53770 }, { "epoch": 0.688384, "grad_norm": 1.4609375, "learning_rate": 6.711013109464513e-06, "loss": 2.2597, "step": 53780 }, { "epoch": 0.688512, "grad_norm": 1.4921875, "learning_rate": 6.705954171930476e-06, "loss": 2.3317, "step": 53790 }, { "epoch": 0.68864, "grad_norm": 1.53125, "learning_rate": 6.700896592906768e-06, "loss": 2.2401, "step": 53800 }, { "epoch": 0.688768, "grad_norm": 1.53125, "learning_rate": 6.695840373221771e-06, "loss": 2.275, "step": 53810 }, { "epoch": 0.688896, "grad_norm": 1.4765625, "learning_rate": 6.690785513703685e-06, "loss": 2.256, "step": 53820 }, { "epoch": 0.689024, "grad_norm": 1.53125, "learning_rate": 6.685732015180446e-06, "loss": 2.2713, "step": 53830 }, { "epoch": 0.689152, "grad_norm": 1.5546875, "learning_rate": 6.68067987847979e-06, "loss": 2.2905, "step": 53840 }, { "epoch": 0.68928, "grad_norm": 1.6171875, "learning_rate": 6.675629104429229e-06, "loss": 2.2553, "step": 53850 }, { "epoch": 0.689408, "grad_norm": 1.5, "learning_rate": 6.6705796938560344e-06, "loss": 2.3023, "step": 53860 }, { "epoch": 0.689536, "grad_norm": 1.4765625, "learning_rate": 6.665531647587274e-06, "loss": 2.2697, "step": 53870 }, { "epoch": 0.689664, "grad_norm": 1.65625, "learning_rate": 6.66048496644979e-06, "loss": 2.3036, "step": 53880 }, { "epoch": 0.689792, "grad_norm": 1.5703125, "learning_rate": 6.655439651270178e-06, "loss": 2.2606, "step": 53890 }, { "epoch": 0.68992, "grad_norm": 1.5625, "learning_rate": 6.650395702874851e-06, "loss": 2.2982, "step": 53900 }, { "epoch": 0.690048, "grad_norm": 1.5859375, "learning_rate": 6.645353122089962e-06, "loss": 2.2966, "step": 53910 }, { "epoch": 0.690176, "grad_norm": 1.4375, "learning_rate": 6.6403119097414415e-06, "loss": 2.2787, "step": 53920 }, { "epoch": 0.690304, "grad_norm": 1.4140625, "learning_rate": 6.635272066655032e-06, "loss": 2.2776, "step": 53930 }, { "epoch": 0.690432, "grad_norm": 1.71875, "learning_rate": 6.630233593656207e-06, "loss": 2.2648, "step": 53940 }, { "epoch": 0.69056, "grad_norm": 1.578125, "learning_rate": 6.625196491570243e-06, "loss": 2.2655, "step": 53950 }, { "epoch": 0.690688, "grad_norm": 1.71875, "learning_rate": 6.620160761222188e-06, "loss": 2.2815, "step": 53960 }, { "epoch": 0.690816, "grad_norm": 1.484375, "learning_rate": 6.615126403436852e-06, "loss": 2.2936, "step": 53970 }, { "epoch": 0.690944, "grad_norm": 1.5, "learning_rate": 6.610093419038834e-06, "loss": 2.275, "step": 53980 }, { "epoch": 0.691072, "grad_norm": 1.4453125, "learning_rate": 6.605061808852512e-06, "loss": 2.2719, "step": 53990 }, { "epoch": 0.6912, "grad_norm": 1.578125, "learning_rate": 6.600031573702015e-06, "loss": 2.265, "step": 54000 }, { "epoch": 0.691328, "grad_norm": 1.6328125, "learning_rate": 6.595002714411274e-06, "loss": 2.2885, "step": 54010 }, { "epoch": 0.691456, "grad_norm": 4.125, "learning_rate": 6.58997523180398e-06, "loss": 2.3098, "step": 54020 }, { "epoch": 0.691584, "grad_norm": 1.6875, "learning_rate": 6.584949126703599e-06, "loss": 2.2625, "step": 54030 }, { "epoch": 0.691712, "grad_norm": 1.578125, "learning_rate": 6.579924399933385e-06, "loss": 2.2442, "step": 54040 }, { "epoch": 0.69184, "grad_norm": 1.5546875, "learning_rate": 6.574901052316341e-06, "loss": 2.2822, "step": 54050 }, { "epoch": 0.691968, "grad_norm": 1.515625, "learning_rate": 6.569879084675265e-06, "loss": 2.2807, "step": 54060 }, { "epoch": 0.692096, "grad_norm": 1.6015625, "learning_rate": 6.564858497832727e-06, "loss": 2.2862, "step": 54070 }, { "epoch": 0.692224, "grad_norm": 1.5078125, "learning_rate": 6.559839292611054e-06, "loss": 2.2499, "step": 54080 }, { "epoch": 0.692352, "grad_norm": 1.484375, "learning_rate": 6.5548214698323684e-06, "loss": 2.307, "step": 54090 }, { "epoch": 0.69248, "grad_norm": 1.546875, "learning_rate": 6.549805030318558e-06, "loss": 2.2873, "step": 54100 }, { "epoch": 0.692608, "grad_norm": 1.5625, "learning_rate": 6.544789974891276e-06, "loss": 2.2709, "step": 54110 }, { "epoch": 0.692736, "grad_norm": 1.53125, "learning_rate": 6.539776304371957e-06, "loss": 2.2805, "step": 54120 }, { "epoch": 0.692864, "grad_norm": 1.7578125, "learning_rate": 6.534764019581809e-06, "loss": 2.2713, "step": 54130 }, { "epoch": 0.692992, "grad_norm": 1.546875, "learning_rate": 6.529753121341817e-06, "loss": 2.2687, "step": 54140 }, { "epoch": 0.69312, "grad_norm": 1.515625, "learning_rate": 6.524743610472724e-06, "loss": 2.288, "step": 54150 }, { "epoch": 0.693248, "grad_norm": 1.6015625, "learning_rate": 6.519735487795056e-06, "loss": 2.2924, "step": 54160 }, { "epoch": 0.693376, "grad_norm": 1.90625, "learning_rate": 6.5147287541291145e-06, "loss": 2.2937, "step": 54170 }, { "epoch": 0.693504, "grad_norm": 1.5234375, "learning_rate": 6.509723410294975e-06, "loss": 2.2619, "step": 54180 }, { "epoch": 0.693632, "grad_norm": 1.453125, "learning_rate": 6.504719457112466e-06, "loss": 2.2485, "step": 54190 }, { "epoch": 0.69376, "grad_norm": 2.90625, "learning_rate": 6.499716895401211e-06, "loss": 2.2926, "step": 54200 }, { "epoch": 0.693888, "grad_norm": 1.5390625, "learning_rate": 6.4947157259806e-06, "loss": 2.3055, "step": 54210 }, { "epoch": 0.694016, "grad_norm": 1.7734375, "learning_rate": 6.489715949669783e-06, "loss": 2.2898, "step": 54220 }, { "epoch": 0.694144, "grad_norm": 1.6015625, "learning_rate": 6.4847175672876946e-06, "loss": 2.2572, "step": 54230 }, { "epoch": 0.694272, "grad_norm": 1.5390625, "learning_rate": 6.479720579653037e-06, "loss": 2.2928, "step": 54240 }, { "epoch": 0.6944, "grad_norm": 1.5703125, "learning_rate": 6.474724987584291e-06, "loss": 2.2914, "step": 54250 }, { "epoch": 0.694528, "grad_norm": 1.546875, "learning_rate": 6.469730791899688e-06, "loss": 2.2665, "step": 54260 }, { "epoch": 0.694656, "grad_norm": 1.578125, "learning_rate": 6.464737993417253e-06, "loss": 2.2563, "step": 54270 }, { "epoch": 0.694784, "grad_norm": 1.609375, "learning_rate": 6.459746592954777e-06, "loss": 2.2687, "step": 54280 }, { "epoch": 0.694912, "grad_norm": 1.7265625, "learning_rate": 6.4547565913298086e-06, "loss": 2.2614, "step": 54290 }, { "epoch": 0.69504, "grad_norm": 1.5078125, "learning_rate": 6.449767989359683e-06, "loss": 2.295, "step": 54300 }, { "epoch": 0.695168, "grad_norm": 1.5234375, "learning_rate": 6.444780787861507e-06, "loss": 2.2923, "step": 54310 }, { "epoch": 0.695296, "grad_norm": 1.6015625, "learning_rate": 6.439794987652133e-06, "loss": 2.2451, "step": 54320 }, { "epoch": 0.695424, "grad_norm": 1.4375, "learning_rate": 6.4348105895482226e-06, "loss": 2.2893, "step": 54330 }, { "epoch": 0.695552, "grad_norm": 1.546875, "learning_rate": 6.429827594366177e-06, "loss": 2.2909, "step": 54340 }, { "epoch": 0.69568, "grad_norm": 1.7578125, "learning_rate": 6.424846002922179e-06, "loss": 2.2707, "step": 54350 }, { "epoch": 0.695808, "grad_norm": 3.984375, "learning_rate": 6.419865816032187e-06, "loss": 2.2528, "step": 54360 }, { "epoch": 0.695936, "grad_norm": 1.53125, "learning_rate": 6.414887034511912e-06, "loss": 2.2856, "step": 54370 }, { "epoch": 0.696064, "grad_norm": 1.6328125, "learning_rate": 6.409909659176853e-06, "loss": 2.2678, "step": 54380 }, { "epoch": 0.696192, "grad_norm": 1.765625, "learning_rate": 6.404933690842275e-06, "loss": 2.2897, "step": 54390 }, { "epoch": 0.69632, "grad_norm": 1.5546875, "learning_rate": 6.399959130323197e-06, "loss": 2.2983, "step": 54400 }, { "epoch": 0.696448, "grad_norm": 1.5390625, "learning_rate": 6.394985978434427e-06, "loss": 2.2587, "step": 54410 }, { "epoch": 0.696576, "grad_norm": 1.53125, "learning_rate": 6.3900142359905405e-06, "loss": 2.2958, "step": 54420 }, { "epoch": 0.696704, "grad_norm": 1.5078125, "learning_rate": 6.385043903805859e-06, "loss": 2.2666, "step": 54430 }, { "epoch": 0.696832, "grad_norm": 1.515625, "learning_rate": 6.3800749826945105e-06, "loss": 2.2584, "step": 54440 }, { "epoch": 0.69696, "grad_norm": 1.6484375, "learning_rate": 6.375107473470364e-06, "loss": 2.2715, "step": 54450 }, { "epoch": 0.697088, "grad_norm": 17.0, "learning_rate": 6.370141376947053e-06, "loss": 2.2753, "step": 54460 }, { "epoch": 0.697216, "grad_norm": 1.359375, "learning_rate": 6.365176693938012e-06, "loss": 2.3073, "step": 54470 }, { "epoch": 0.697344, "grad_norm": 1.4296875, "learning_rate": 6.360213425256407e-06, "loss": 2.3027, "step": 54480 }, { "epoch": 0.697472, "grad_norm": 1.4921875, "learning_rate": 6.355251571715196e-06, "loss": 2.3006, "step": 54490 }, { "epoch": 0.6976, "grad_norm": 1.53125, "learning_rate": 6.350291134127102e-06, "loss": 2.2991, "step": 54500 }, { "epoch": 0.697728, "grad_norm": 1.5703125, "learning_rate": 6.345332113304603e-06, "loss": 2.3057, "step": 54510 }, { "epoch": 0.697856, "grad_norm": 1.6875, "learning_rate": 6.3403745100599585e-06, "loss": 2.2792, "step": 54520 }, { "epoch": 0.697984, "grad_norm": 1.4765625, "learning_rate": 6.335418325205196e-06, "loss": 2.2474, "step": 54530 }, { "epoch": 0.698112, "grad_norm": 1.71875, "learning_rate": 6.330463559552093e-06, "loss": 2.2881, "step": 54540 }, { "epoch": 0.69824, "grad_norm": 1.7421875, "learning_rate": 6.325510213912226e-06, "loss": 2.3276, "step": 54550 }, { "epoch": 0.698368, "grad_norm": 1.4921875, "learning_rate": 6.32055828909691e-06, "loss": 2.2905, "step": 54560 }, { "epoch": 0.698496, "grad_norm": 1.65625, "learning_rate": 6.315607785917227e-06, "loss": 2.3023, "step": 54570 }, { "epoch": 0.698624, "grad_norm": 1.6796875, "learning_rate": 6.310658705184063e-06, "loss": 2.2756, "step": 54580 }, { "epoch": 0.698752, "grad_norm": 1.5859375, "learning_rate": 6.305711047708022e-06, "loss": 2.2949, "step": 54590 }, { "epoch": 0.69888, "grad_norm": 2.734375, "learning_rate": 6.300764814299506e-06, "loss": 2.2806, "step": 54600 }, { "epoch": 0.699008, "grad_norm": 1.5234375, "learning_rate": 6.295820005768684e-06, "loss": 2.2587, "step": 54610 }, { "epoch": 0.699136, "grad_norm": 1.5078125, "learning_rate": 6.2908766229254685e-06, "loss": 2.2781, "step": 54620 }, { "epoch": 0.699264, "grad_norm": 1.5, "learning_rate": 6.285934666579559e-06, "loss": 2.2957, "step": 54630 }, { "epoch": 0.699392, "grad_norm": 1.5234375, "learning_rate": 6.280994137540421e-06, "loss": 2.297, "step": 54640 }, { "epoch": 0.69952, "grad_norm": 1.5546875, "learning_rate": 6.276055036617268e-06, "loss": 2.2953, "step": 54650 }, { "epoch": 0.699648, "grad_norm": 1.5859375, "learning_rate": 6.271117364619111e-06, "loss": 2.2829, "step": 54660 }, { "epoch": 0.699776, "grad_norm": 1.671875, "learning_rate": 6.266181122354697e-06, "loss": 2.2869, "step": 54670 }, { "epoch": 0.699904, "grad_norm": 1.5703125, "learning_rate": 6.26124631063254e-06, "loss": 2.2583, "step": 54680 }, { "epoch": 0.700032, "grad_norm": 1.5234375, "learning_rate": 6.256312930260953e-06, "loss": 2.2886, "step": 54690 }, { "epoch": 0.70016, "grad_norm": 1.484375, "learning_rate": 6.251380982047972e-06, "loss": 2.2796, "step": 54700 }, { "epoch": 0.700288, "grad_norm": 2.078125, "learning_rate": 6.246450466801425e-06, "loss": 2.2838, "step": 54710 }, { "epoch": 0.700416, "grad_norm": 1.5, "learning_rate": 6.241521385328905e-06, "loss": 2.2607, "step": 54720 }, { "epoch": 0.700544, "grad_norm": 1.515625, "learning_rate": 6.23659373843775e-06, "loss": 2.2644, "step": 54730 }, { "epoch": 0.700672, "grad_norm": 1.53125, "learning_rate": 6.231667526935081e-06, "loss": 2.2783, "step": 54740 }, { "epoch": 0.7008, "grad_norm": 1.578125, "learning_rate": 6.226742751627788e-06, "loss": 2.27, "step": 54750 }, { "epoch": 0.700928, "grad_norm": 1.5859375, "learning_rate": 6.221819413322502e-06, "loss": 2.2888, "step": 54760 }, { "epoch": 0.701056, "grad_norm": 1.546875, "learning_rate": 6.216897512825643e-06, "loss": 2.2693, "step": 54770 }, { "epoch": 0.701184, "grad_norm": 1.65625, "learning_rate": 6.211977050943384e-06, "loss": 2.2748, "step": 54780 }, { "epoch": 0.701312, "grad_norm": 1.5078125, "learning_rate": 6.2070580284816645e-06, "loss": 2.2954, "step": 54790 }, { "epoch": 0.70144, "grad_norm": 1.4453125, "learning_rate": 6.202140446246191e-06, "loss": 2.2733, "step": 54800 }, { "epoch": 0.701568, "grad_norm": 1.6171875, "learning_rate": 6.1972243050424255e-06, "loss": 2.258, "step": 54810 }, { "epoch": 0.701696, "grad_norm": 1.5859375, "learning_rate": 6.192309605675603e-06, "loss": 2.266, "step": 54820 }, { "epoch": 0.701824, "grad_norm": 1.75, "learning_rate": 6.187396348950721e-06, "loss": 2.2968, "step": 54830 }, { "epoch": 0.701952, "grad_norm": 1.5078125, "learning_rate": 6.182484535672533e-06, "loss": 2.2857, "step": 54840 }, { "epoch": 0.70208, "grad_norm": 1.7109375, "learning_rate": 6.177574166645566e-06, "loss": 2.3003, "step": 54850 }, { "epoch": 0.702208, "grad_norm": 1.5390625, "learning_rate": 6.172665242674109e-06, "loss": 2.2879, "step": 54860 }, { "epoch": 0.702336, "grad_norm": 1.59375, "learning_rate": 6.167757764562204e-06, "loss": 2.2911, "step": 54870 }, { "epoch": 0.702464, "grad_norm": 1.6015625, "learning_rate": 6.162851733113666e-06, "loss": 2.2823, "step": 54880 }, { "epoch": 0.702592, "grad_norm": 1.484375, "learning_rate": 6.1579471491320725e-06, "loss": 2.2634, "step": 54890 }, { "epoch": 0.70272, "grad_norm": 1.5390625, "learning_rate": 6.153044013420768e-06, "loss": 2.2694, "step": 54900 }, { "epoch": 0.702848, "grad_norm": 2.234375, "learning_rate": 6.148142326782843e-06, "loss": 2.2556, "step": 54910 }, { "epoch": 0.702976, "grad_norm": 1.578125, "learning_rate": 6.143242090021168e-06, "loss": 2.3058, "step": 54920 }, { "epoch": 0.703104, "grad_norm": 1.71875, "learning_rate": 6.138343303938372e-06, "loss": 2.2508, "step": 54930 }, { "epoch": 0.703232, "grad_norm": 1.5, "learning_rate": 6.133445969336835e-06, "loss": 2.3096, "step": 54940 }, { "epoch": 0.70336, "grad_norm": 1.484375, "learning_rate": 6.128550087018715e-06, "loss": 2.2922, "step": 54950 }, { "epoch": 0.703488, "grad_norm": 1.46875, "learning_rate": 6.123655657785923e-06, "loss": 2.2902, "step": 54960 }, { "epoch": 0.703616, "grad_norm": 1.5859375, "learning_rate": 6.118762682440142e-06, "loss": 2.2942, "step": 54970 }, { "epoch": 0.703744, "grad_norm": 1.546875, "learning_rate": 6.1138711617827964e-06, "loss": 2.2857, "step": 54980 }, { "epoch": 0.703872, "grad_norm": 1.7578125, "learning_rate": 6.108981096615093e-06, "loss": 2.2895, "step": 54990 }, { "epoch": 0.704, "grad_norm": 1.515625, "learning_rate": 6.1040924877379886e-06, "loss": 2.2595, "step": 55000 }, { "epoch": 0.704128, "grad_norm": 1.609375, "learning_rate": 6.099205335952216e-06, "loss": 2.2745, "step": 55010 }, { "epoch": 0.704256, "grad_norm": 1.5, "learning_rate": 6.094319642058244e-06, "loss": 2.2484, "step": 55020 }, { "epoch": 0.704384, "grad_norm": 1.6015625, "learning_rate": 6.089435406856324e-06, "loss": 2.2632, "step": 55030 }, { "epoch": 0.704512, "grad_norm": 1.53125, "learning_rate": 6.0845526311464655e-06, "loss": 2.2955, "step": 55040 }, { "epoch": 0.70464, "grad_norm": 1.5703125, "learning_rate": 6.079671315728425e-06, "loss": 2.2819, "step": 55050 }, { "epoch": 0.704768, "grad_norm": 1.7109375, "learning_rate": 6.074791461401738e-06, "loss": 2.2551, "step": 55060 }, { "epoch": 0.704896, "grad_norm": 1.5859375, "learning_rate": 6.069913068965695e-06, "loss": 2.2862, "step": 55070 }, { "epoch": 0.705024, "grad_norm": 1.84375, "learning_rate": 6.06503613921933e-06, "loss": 2.2976, "step": 55080 }, { "epoch": 0.705152, "grad_norm": 1.453125, "learning_rate": 6.060160672961473e-06, "loss": 2.2816, "step": 55090 }, { "epoch": 0.70528, "grad_norm": 1.5234375, "learning_rate": 6.055286670990677e-06, "loss": 2.2899, "step": 55100 }, { "epoch": 0.705408, "grad_norm": 1.4375, "learning_rate": 6.050414134105277e-06, "loss": 2.2587, "step": 55110 }, { "epoch": 0.705536, "grad_norm": 1.5625, "learning_rate": 6.0455430631033685e-06, "loss": 2.2818, "step": 55120 }, { "epoch": 0.705664, "grad_norm": 1.578125, "learning_rate": 6.04067345878279e-06, "loss": 2.272, "step": 55130 }, { "epoch": 0.705792, "grad_norm": 1.5859375, "learning_rate": 6.035805321941158e-06, "loss": 2.2947, "step": 55140 }, { "epoch": 0.70592, "grad_norm": 1.6796875, "learning_rate": 6.030938653375843e-06, "loss": 2.2622, "step": 55150 }, { "epoch": 0.706048, "grad_norm": 1.4453125, "learning_rate": 6.026073453883964e-06, "loss": 2.2578, "step": 55160 }, { "epoch": 0.706176, "grad_norm": 1.578125, "learning_rate": 6.021209724262416e-06, "loss": 2.278, "step": 55170 }, { "epoch": 0.706304, "grad_norm": 1.5390625, "learning_rate": 6.016347465307852e-06, "loss": 2.2973, "step": 55180 }, { "epoch": 0.706432, "grad_norm": 1.4453125, "learning_rate": 6.011486677816658e-06, "loss": 2.2905, "step": 55190 }, { "epoch": 0.70656, "grad_norm": 1.6015625, "learning_rate": 6.006627362585021e-06, "loss": 2.2745, "step": 55200 }, { "epoch": 0.706688, "grad_norm": 1.609375, "learning_rate": 6.00176952040886e-06, "loss": 2.293, "step": 55210 }, { "epoch": 0.706816, "grad_norm": 1.625, "learning_rate": 5.9969131520838406e-06, "loss": 2.2764, "step": 55220 }, { "epoch": 0.706944, "grad_norm": 1.6328125, "learning_rate": 5.992058258405428e-06, "loss": 2.2782, "step": 55230 }, { "epoch": 0.707072, "grad_norm": 1.703125, "learning_rate": 5.9872048401688075e-06, "loss": 2.303, "step": 55240 }, { "epoch": 0.7072, "grad_norm": 1.484375, "learning_rate": 5.982352898168938e-06, "loss": 2.2893, "step": 55250 }, { "epoch": 0.707328, "grad_norm": 1.4921875, "learning_rate": 5.977502433200546e-06, "loss": 2.2943, "step": 55260 }, { "epoch": 0.707456, "grad_norm": 1.65625, "learning_rate": 5.972653446058093e-06, "loss": 2.2967, "step": 55270 }, { "epoch": 0.707584, "grad_norm": 1.5625, "learning_rate": 5.967805937535816e-06, "loss": 2.2812, "step": 55280 }, { "epoch": 0.707712, "grad_norm": 1.6484375, "learning_rate": 5.962959908427713e-06, "loss": 2.2881, "step": 55290 }, { "epoch": 0.70784, "grad_norm": 1.6640625, "learning_rate": 5.958115359527512e-06, "loss": 2.2775, "step": 55300 }, { "epoch": 0.707968, "grad_norm": 1.546875, "learning_rate": 5.953272291628744e-06, "loss": 2.2907, "step": 55310 }, { "epoch": 0.708096, "grad_norm": 1.5625, "learning_rate": 5.948430705524657e-06, "loss": 2.321, "step": 55320 }, { "epoch": 0.708224, "grad_norm": 1.46875, "learning_rate": 5.9435906020082625e-06, "loss": 2.2806, "step": 55330 }, { "epoch": 0.708352, "grad_norm": 1.78125, "learning_rate": 5.938751981872358e-06, "loss": 2.3089, "step": 55340 }, { "epoch": 0.70848, "grad_norm": 1.609375, "learning_rate": 5.933914845909464e-06, "loss": 2.2905, "step": 55350 }, { "epoch": 0.708608, "grad_norm": 1.5390625, "learning_rate": 5.929079194911875e-06, "loss": 2.2768, "step": 55360 }, { "epoch": 0.708736, "grad_norm": 1.4453125, "learning_rate": 5.924245029671645e-06, "loss": 2.2635, "step": 55370 }, { "epoch": 0.708864, "grad_norm": 1.5234375, "learning_rate": 5.919412350980567e-06, "loss": 2.2868, "step": 55380 }, { "epoch": 0.708992, "grad_norm": 1.4921875, "learning_rate": 5.914581159630208e-06, "loss": 2.2512, "step": 55390 }, { "epoch": 0.70912, "grad_norm": 1.53125, "learning_rate": 5.909751456411892e-06, "loss": 2.2696, "step": 55400 }, { "epoch": 0.709248, "grad_norm": 1.75, "learning_rate": 5.904923242116675e-06, "loss": 2.2819, "step": 55410 }, { "epoch": 0.709376, "grad_norm": 1.4609375, "learning_rate": 5.900096517535409e-06, "loss": 2.3126, "step": 55420 }, { "epoch": 0.709504, "grad_norm": 1.4921875, "learning_rate": 5.895271283458668e-06, "loss": 2.3004, "step": 55430 }, { "epoch": 0.709632, "grad_norm": 1.546875, "learning_rate": 5.890447540676784e-06, "loss": 2.275, "step": 55440 }, { "epoch": 0.70976, "grad_norm": 3.46875, "learning_rate": 5.885625289979877e-06, "loss": 2.2524, "step": 55450 }, { "epoch": 0.709888, "grad_norm": 1.6796875, "learning_rate": 5.880804532157783e-06, "loss": 2.3016, "step": 55460 }, { "epoch": 0.710016, "grad_norm": 1.671875, "learning_rate": 5.875985268000114e-06, "loss": 2.2889, "step": 55470 }, { "epoch": 0.710144, "grad_norm": 1.4921875, "learning_rate": 5.871167498296242e-06, "loss": 2.3019, "step": 55480 }, { "epoch": 0.710272, "grad_norm": 1.4296875, "learning_rate": 5.866351223835274e-06, "loss": 2.2816, "step": 55490 }, { "epoch": 0.7104, "grad_norm": 1.578125, "learning_rate": 5.86153644540609e-06, "loss": 2.276, "step": 55500 }, { "epoch": 0.710528, "grad_norm": 1.609375, "learning_rate": 5.8567231637973244e-06, "loss": 2.242, "step": 55510 }, { "epoch": 0.710656, "grad_norm": 1.484375, "learning_rate": 5.851911379797351e-06, "loss": 2.2735, "step": 55520 }, { "epoch": 0.710784, "grad_norm": 1.5546875, "learning_rate": 5.847101094194313e-06, "loss": 2.2546, "step": 55530 }, { "epoch": 0.710912, "grad_norm": 1.6796875, "learning_rate": 5.842292307776102e-06, "loss": 2.2705, "step": 55540 }, { "epoch": 0.71104, "grad_norm": 1.515625, "learning_rate": 5.837485021330375e-06, "loss": 2.2685, "step": 55550 }, { "epoch": 0.711168, "grad_norm": 1.46875, "learning_rate": 5.83267923564452e-06, "loss": 2.277, "step": 55560 }, { "epoch": 0.711296, "grad_norm": 1.6171875, "learning_rate": 5.8278749515057e-06, "loss": 2.2662, "step": 55570 }, { "epoch": 0.711424, "grad_norm": 1.4609375, "learning_rate": 5.823072169700825e-06, "loss": 2.2723, "step": 55580 }, { "epoch": 0.711552, "grad_norm": 1.609375, "learning_rate": 5.818270891016566e-06, "loss": 2.2624, "step": 55590 }, { "epoch": 0.71168, "grad_norm": 2.421875, "learning_rate": 5.813471116239326e-06, "loss": 2.2783, "step": 55600 }, { "epoch": 0.711808, "grad_norm": 1.5625, "learning_rate": 5.8086728461552865e-06, "loss": 2.2677, "step": 55610 }, { "epoch": 0.711936, "grad_norm": 1.5703125, "learning_rate": 5.803876081550375e-06, "loss": 2.2685, "step": 55620 }, { "epoch": 0.712064, "grad_norm": 1.6171875, "learning_rate": 5.799080823210262e-06, "loss": 2.2415, "step": 55630 }, { "epoch": 0.712192, "grad_norm": 1.6875, "learning_rate": 5.794287071920383e-06, "loss": 2.2921, "step": 55640 }, { "epoch": 0.71232, "grad_norm": 1.5625, "learning_rate": 5.789494828465923e-06, "loss": 2.2916, "step": 55650 }, { "epoch": 0.712448, "grad_norm": 2.578125, "learning_rate": 5.784704093631826e-06, "loss": 2.2843, "step": 55660 }, { "epoch": 0.712576, "grad_norm": 1.5859375, "learning_rate": 5.779914868202773e-06, "loss": 2.282, "step": 55670 }, { "epoch": 0.712704, "grad_norm": 1.5546875, "learning_rate": 5.7751271529632105e-06, "loss": 2.3074, "step": 55680 }, { "epoch": 0.712832, "grad_norm": 1.53125, "learning_rate": 5.770340948697342e-06, "loss": 2.3282, "step": 55690 }, { "epoch": 0.71296, "grad_norm": 1.515625, "learning_rate": 5.765556256189107e-06, "loss": 2.2855, "step": 55700 }, { "epoch": 0.713088, "grad_norm": 1.5625, "learning_rate": 5.76077307622221e-06, "loss": 2.2658, "step": 55710 }, { "epoch": 0.713216, "grad_norm": 1.5859375, "learning_rate": 5.755991409580105e-06, "loss": 2.2727, "step": 55720 }, { "epoch": 0.713344, "grad_norm": 1.484375, "learning_rate": 5.751211257046003e-06, "loss": 2.286, "step": 55730 }, { "epoch": 0.713472, "grad_norm": 1.5390625, "learning_rate": 5.746432619402852e-06, "loss": 2.2808, "step": 55740 }, { "epoch": 0.7136, "grad_norm": 1.4609375, "learning_rate": 5.7416554974333635e-06, "loss": 2.2812, "step": 55750 }, { "epoch": 0.713728, "grad_norm": 1.453125, "learning_rate": 5.736879891920004e-06, "loss": 2.2737, "step": 55760 }, { "epoch": 0.713856, "grad_norm": 1.75, "learning_rate": 5.732105803644987e-06, "loss": 2.2669, "step": 55770 }, { "epoch": 0.713984, "grad_norm": 1.578125, "learning_rate": 5.727333233390269e-06, "loss": 2.2701, "step": 55780 }, { "epoch": 0.714112, "grad_norm": 1.6875, "learning_rate": 5.72256218193757e-06, "loss": 2.2878, "step": 55790 }, { "epoch": 0.71424, "grad_norm": 1.6328125, "learning_rate": 5.717792650068364e-06, "loss": 2.3067, "step": 55800 }, { "epoch": 0.714368, "grad_norm": 1.5234375, "learning_rate": 5.7130246385638546e-06, "loss": 2.2999, "step": 55810 }, { "epoch": 0.714496, "grad_norm": 1.59375, "learning_rate": 5.70825814820502e-06, "loss": 2.3159, "step": 55820 }, { "epoch": 0.714624, "grad_norm": 1.625, "learning_rate": 5.703493179772582e-06, "loss": 2.2756, "step": 55830 }, { "epoch": 0.714752, "grad_norm": 1.625, "learning_rate": 5.698729734046998e-06, "loss": 2.2852, "step": 55840 }, { "epoch": 0.71488, "grad_norm": 1.515625, "learning_rate": 5.69396781180851e-06, "loss": 2.2755, "step": 55850 }, { "epoch": 0.715008, "grad_norm": 1.546875, "learning_rate": 5.689207413837077e-06, "loss": 2.2822, "step": 55860 }, { "epoch": 0.715136, "grad_norm": 1.46875, "learning_rate": 5.6844485409124125e-06, "loss": 2.2724, "step": 55870 }, { "epoch": 0.715264, "grad_norm": 1.5625, "learning_rate": 5.67969119381401e-06, "loss": 2.2339, "step": 55880 }, { "epoch": 0.715392, "grad_norm": 1.484375, "learning_rate": 5.674935373321075e-06, "loss": 2.2506, "step": 55890 }, { "epoch": 0.71552, "grad_norm": 1.4921875, "learning_rate": 5.6701810802125844e-06, "loss": 2.2513, "step": 55900 }, { "epoch": 0.715648, "grad_norm": 1.515625, "learning_rate": 5.6654283152672655e-06, "loss": 2.2737, "step": 55910 }, { "epoch": 0.715776, "grad_norm": 1.6171875, "learning_rate": 5.660677079263582e-06, "loss": 2.2809, "step": 55920 }, { "epoch": 0.715904, "grad_norm": 1.6328125, "learning_rate": 5.655927372979757e-06, "loss": 2.3003, "step": 55930 }, { "epoch": 0.716032, "grad_norm": 1.5234375, "learning_rate": 5.651179197193769e-06, "loss": 2.3307, "step": 55940 }, { "epoch": 0.71616, "grad_norm": 1.515625, "learning_rate": 5.646432552683323e-06, "loss": 2.2571, "step": 55950 }, { "epoch": 0.716288, "grad_norm": 1.5234375, "learning_rate": 5.641687440225905e-06, "loss": 2.2481, "step": 55960 }, { "epoch": 0.716416, "grad_norm": 1.609375, "learning_rate": 5.636943860598727e-06, "loss": 2.3066, "step": 55970 }, { "epoch": 0.716544, "grad_norm": 1.65625, "learning_rate": 5.632201814578745e-06, "loss": 2.2683, "step": 55980 }, { "epoch": 0.716672, "grad_norm": 1.6015625, "learning_rate": 5.627461302942695e-06, "loss": 2.278, "step": 55990 }, { "epoch": 0.7168, "grad_norm": 1.7265625, "learning_rate": 5.6227223264670254e-06, "loss": 2.29, "step": 56000 }, { "epoch": 0.716928, "grad_norm": 1.5546875, "learning_rate": 5.6179848859279555e-06, "loss": 2.2484, "step": 56010 }, { "epoch": 0.717056, "grad_norm": 1.546875, "learning_rate": 5.6132489821014524e-06, "loss": 2.2857, "step": 56020 }, { "epoch": 0.717184, "grad_norm": 1.921875, "learning_rate": 5.6085146157632164e-06, "loss": 2.2511, "step": 56030 }, { "epoch": 0.717312, "grad_norm": 1.75, "learning_rate": 5.603781787688709e-06, "loss": 2.2789, "step": 56040 }, { "epoch": 0.71744, "grad_norm": 1.703125, "learning_rate": 5.599050498653144e-06, "loss": 2.2874, "step": 56050 }, { "epoch": 0.717568, "grad_norm": 1.4921875, "learning_rate": 5.594320749431457e-06, "loss": 2.3034, "step": 56060 }, { "epoch": 0.717696, "grad_norm": 1.5, "learning_rate": 5.589592540798374e-06, "loss": 2.2966, "step": 56070 }, { "epoch": 0.717824, "grad_norm": 1.6953125, "learning_rate": 5.584865873528332e-06, "loss": 2.301, "step": 56080 }, { "epoch": 0.717952, "grad_norm": 1.4765625, "learning_rate": 5.580140748395519e-06, "loss": 2.262, "step": 56090 }, { "epoch": 0.71808, "grad_norm": 1.6875, "learning_rate": 5.575417166173897e-06, "loss": 2.309, "step": 56100 }, { "epoch": 0.718208, "grad_norm": 1.5625, "learning_rate": 5.570695127637148e-06, "loss": 2.25, "step": 56110 }, { "epoch": 0.718336, "grad_norm": 1.6015625, "learning_rate": 5.565974633558711e-06, "loss": 2.2998, "step": 56120 }, { "epoch": 0.718464, "grad_norm": 1.6484375, "learning_rate": 5.561255684711778e-06, "loss": 2.2747, "step": 56130 }, { "epoch": 0.718592, "grad_norm": 1.5546875, "learning_rate": 5.556538281869274e-06, "loss": 2.2607, "step": 56140 }, { "epoch": 0.71872, "grad_norm": 1.6953125, "learning_rate": 5.551822425803883e-06, "loss": 2.2658, "step": 56150 }, { "epoch": 0.718848, "grad_norm": 1.484375, "learning_rate": 5.547108117288035e-06, "loss": 2.3003, "step": 56160 }, { "epoch": 0.718976, "grad_norm": 1.6484375, "learning_rate": 5.542395357093892e-06, "loss": 2.2634, "step": 56170 }, { "epoch": 0.719104, "grad_norm": 1.5546875, "learning_rate": 5.5376841459933815e-06, "loss": 2.3013, "step": 56180 }, { "epoch": 0.719232, "grad_norm": 1.609375, "learning_rate": 5.532974484758173e-06, "loss": 2.2958, "step": 56190 }, { "epoch": 0.71936, "grad_norm": 1.5, "learning_rate": 5.528266374159661e-06, "loss": 2.2851, "step": 56200 }, { "epoch": 0.719488, "grad_norm": 1.65625, "learning_rate": 5.523559814969024e-06, "loss": 2.2902, "step": 56210 }, { "epoch": 0.719616, "grad_norm": 1.5390625, "learning_rate": 5.518854807957151e-06, "loss": 2.2855, "step": 56220 }, { "epoch": 0.719744, "grad_norm": 1.4609375, "learning_rate": 5.514151353894698e-06, "loss": 2.2568, "step": 56230 }, { "epoch": 0.719872, "grad_norm": 1.65625, "learning_rate": 5.509449453552062e-06, "loss": 2.2708, "step": 56240 }, { "epoch": 0.72, "grad_norm": 1.421875, "learning_rate": 5.504749107699377e-06, "loss": 2.2779, "step": 56250 }, { "epoch": 0.720128, "grad_norm": 1.46875, "learning_rate": 5.500050317106529e-06, "loss": 2.2713, "step": 56260 }, { "epoch": 0.720256, "grad_norm": 1.53125, "learning_rate": 5.495353082543157e-06, "loss": 2.2861, "step": 56270 }, { "epoch": 0.720384, "grad_norm": 1.5234375, "learning_rate": 5.490657404778628e-06, "loss": 2.2933, "step": 56280 }, { "epoch": 0.720512, "grad_norm": 3.015625, "learning_rate": 5.4859632845820665e-06, "loss": 2.2958, "step": 56290 }, { "epoch": 0.72064, "grad_norm": 1.5, "learning_rate": 5.481270722722339e-06, "loss": 2.2853, "step": 56300 }, { "epoch": 0.720768, "grad_norm": 1.546875, "learning_rate": 5.476579719968062e-06, "loss": 2.277, "step": 56310 }, { "epoch": 0.720896, "grad_norm": 1.609375, "learning_rate": 5.471890277087579e-06, "loss": 2.2829, "step": 56320 }, { "epoch": 0.721024, "grad_norm": 1.4765625, "learning_rate": 5.467202394848998e-06, "loss": 2.2799, "step": 56330 }, { "epoch": 0.721152, "grad_norm": 1.4921875, "learning_rate": 5.462516074020161e-06, "loss": 2.292, "step": 56340 }, { "epoch": 0.72128, "grad_norm": 1.5, "learning_rate": 5.457831315368664e-06, "loss": 2.2815, "step": 56350 }, { "epoch": 0.721408, "grad_norm": 1.546875, "learning_rate": 5.4531481196618284e-06, "loss": 2.2495, "step": 56360 }, { "epoch": 0.721536, "grad_norm": 1.5, "learning_rate": 5.448466487666735e-06, "loss": 2.2816, "step": 56370 }, { "epoch": 0.721664, "grad_norm": 1.578125, "learning_rate": 5.443786420150211e-06, "loss": 2.266, "step": 56380 }, { "epoch": 0.721792, "grad_norm": 1.4921875, "learning_rate": 5.4391079178788085e-06, "loss": 2.2792, "step": 56390 }, { "epoch": 0.72192, "grad_norm": 1.671875, "learning_rate": 5.4344309816188444e-06, "loss": 2.2503, "step": 56400 }, { "epoch": 0.722048, "grad_norm": 1.5546875, "learning_rate": 5.429755612136366e-06, "loss": 2.2619, "step": 56410 }, { "epoch": 0.722176, "grad_norm": 1.53125, "learning_rate": 5.4250818101971755e-06, "loss": 2.2983, "step": 56420 }, { "epoch": 0.722304, "grad_norm": 1.5703125, "learning_rate": 5.4204095765667995e-06, "loss": 2.2726, "step": 56430 }, { "epoch": 0.722432, "grad_norm": 1.59375, "learning_rate": 5.415738912010528e-06, "loss": 2.2831, "step": 56440 }, { "epoch": 0.72256, "grad_norm": 1.578125, "learning_rate": 5.411069817293385e-06, "loss": 2.2703, "step": 56450 }, { "epoch": 0.722688, "grad_norm": 1.59375, "learning_rate": 5.406402293180131e-06, "loss": 2.2952, "step": 56460 }, { "epoch": 0.722816, "grad_norm": 1.640625, "learning_rate": 5.4017363404352805e-06, "loss": 2.2842, "step": 56470 }, { "epoch": 0.722944, "grad_norm": 1.53125, "learning_rate": 5.397071959823091e-06, "loss": 2.2633, "step": 56480 }, { "epoch": 0.723072, "grad_norm": 1.5390625, "learning_rate": 5.392409152107546e-06, "loss": 2.2897, "step": 56490 }, { "epoch": 0.7232, "grad_norm": 2.21875, "learning_rate": 5.387747918052387e-06, "loss": 2.3043, "step": 56500 }, { "epoch": 0.723328, "grad_norm": 1.5390625, "learning_rate": 5.383088258421096e-06, "loss": 2.3049, "step": 56510 }, { "epoch": 0.723456, "grad_norm": 1.7734375, "learning_rate": 5.378430173976895e-06, "loss": 2.2492, "step": 56520 }, { "epoch": 0.723584, "grad_norm": 1.6484375, "learning_rate": 5.373773665482753e-06, "loss": 2.267, "step": 56530 }, { "epoch": 0.723712, "grad_norm": 1.5234375, "learning_rate": 5.369118733701363e-06, "loss": 2.2942, "step": 56540 }, { "epoch": 0.72384, "grad_norm": 1.46875, "learning_rate": 5.364465379395177e-06, "loss": 2.2846, "step": 56550 }, { "epoch": 0.723968, "grad_norm": 1.9375, "learning_rate": 5.359813603326393e-06, "loss": 2.2944, "step": 56560 }, { "epoch": 0.724096, "grad_norm": 1.4609375, "learning_rate": 5.355163406256927e-06, "loss": 2.2761, "step": 56570 }, { "epoch": 0.724224, "grad_norm": 1.6796875, "learning_rate": 5.350514788948458e-06, "loss": 2.2903, "step": 56580 }, { "epoch": 0.724352, "grad_norm": 1.6875, "learning_rate": 5.345867752162405e-06, "loss": 2.2804, "step": 56590 }, { "epoch": 0.72448, "grad_norm": 1.8125, "learning_rate": 5.341222296659904e-06, "loss": 2.2767, "step": 56600 }, { "epoch": 0.724608, "grad_norm": 1.484375, "learning_rate": 5.336578423201874e-06, "loss": 2.2905, "step": 56610 }, { "epoch": 0.724736, "grad_norm": 2.546875, "learning_rate": 5.331936132548938e-06, "loss": 2.2881, "step": 56620 }, { "epoch": 0.724864, "grad_norm": 1.5859375, "learning_rate": 5.327295425461464e-06, "loss": 2.2632, "step": 56630 }, { "epoch": 0.724992, "grad_norm": 1.4921875, "learning_rate": 5.322656302699588e-06, "loss": 2.2882, "step": 56640 }, { "epoch": 0.72512, "grad_norm": 1.4140625, "learning_rate": 5.318018765023156e-06, "loss": 2.2766, "step": 56650 }, { "epoch": 0.725248, "grad_norm": 1.546875, "learning_rate": 5.313382813191769e-06, "loss": 2.2901, "step": 56660 }, { "epoch": 0.725376, "grad_norm": 1.46875, "learning_rate": 5.3087484479647694e-06, "loss": 2.2913, "step": 56670 }, { "epoch": 0.725504, "grad_norm": 1.59375, "learning_rate": 5.30411567010123e-06, "loss": 2.254, "step": 56680 }, { "epoch": 0.725632, "grad_norm": 1.59375, "learning_rate": 5.29948448035997e-06, "loss": 2.2832, "step": 56690 }, { "epoch": 0.72576, "grad_norm": 1.4296875, "learning_rate": 5.294854879499553e-06, "loss": 2.2957, "step": 56700 }, { "epoch": 0.725888, "grad_norm": 1.7265625, "learning_rate": 5.2902268682782675e-06, "loss": 2.2975, "step": 56710 }, { "epoch": 0.726016, "grad_norm": 1.4609375, "learning_rate": 5.285600447454165e-06, "loss": 2.272, "step": 56720 }, { "epoch": 0.726144, "grad_norm": 1.875, "learning_rate": 5.280975617785017e-06, "loss": 2.307, "step": 56730 }, { "epoch": 0.726272, "grad_norm": 1.5, "learning_rate": 5.276352380028329e-06, "loss": 2.2628, "step": 56740 }, { "epoch": 0.7264, "grad_norm": 1.671875, "learning_rate": 5.271730734941378e-06, "loss": 2.2828, "step": 56750 }, { "epoch": 0.726528, "grad_norm": 1.5234375, "learning_rate": 5.267110683281142e-06, "loss": 2.2777, "step": 56760 }, { "epoch": 0.726656, "grad_norm": 1.5234375, "learning_rate": 5.262492225804361e-06, "loss": 2.2501, "step": 56770 }, { "epoch": 0.726784, "grad_norm": 1.5703125, "learning_rate": 5.257875363267514e-06, "loss": 2.2741, "step": 56780 }, { "epoch": 0.726912, "grad_norm": 1.546875, "learning_rate": 5.253260096426802e-06, "loss": 2.2921, "step": 56790 }, { "epoch": 0.72704, "grad_norm": 1.5625, "learning_rate": 5.248646426038181e-06, "loss": 2.2638, "step": 56800 }, { "epoch": 0.727168, "grad_norm": 1.921875, "learning_rate": 5.244034352857344e-06, "loss": 2.3138, "step": 56810 }, { "epoch": 0.727296, "grad_norm": 1.53125, "learning_rate": 5.239423877639704e-06, "loss": 2.3012, "step": 56820 }, { "epoch": 0.727424, "grad_norm": 1.5859375, "learning_rate": 5.234815001140448e-06, "loss": 2.2702, "step": 56830 }, { "epoch": 0.727552, "grad_norm": 1.5234375, "learning_rate": 5.230207724114468e-06, "loss": 2.273, "step": 56840 }, { "epoch": 0.72768, "grad_norm": 1.5546875, "learning_rate": 5.225602047316395e-06, "loss": 2.3139, "step": 56850 }, { "epoch": 0.727808, "grad_norm": 2.53125, "learning_rate": 5.220997971500631e-06, "loss": 2.2732, "step": 56860 }, { "epoch": 0.727936, "grad_norm": 1.5390625, "learning_rate": 5.216395497421277e-06, "loss": 2.2991, "step": 56870 }, { "epoch": 0.728064, "grad_norm": 1.4921875, "learning_rate": 5.211794625832192e-06, "loss": 2.2719, "step": 56880 }, { "epoch": 0.728192, "grad_norm": 1.4609375, "learning_rate": 5.207195357486977e-06, "loss": 2.2881, "step": 56890 }, { "epoch": 0.72832, "grad_norm": 1.5625, "learning_rate": 5.202597693138948e-06, "loss": 2.2723, "step": 56900 }, { "epoch": 0.728448, "grad_norm": 1.546875, "learning_rate": 5.19800163354118e-06, "loss": 2.2818, "step": 56910 }, { "epoch": 0.728576, "grad_norm": 1.59375, "learning_rate": 5.193407179446481e-06, "loss": 2.2977, "step": 56920 }, { "epoch": 0.728704, "grad_norm": 1.59375, "learning_rate": 5.188814331607384e-06, "loss": 2.2713, "step": 56930 }, { "epoch": 0.728832, "grad_norm": 1.9609375, "learning_rate": 5.184223090776171e-06, "loss": 2.2649, "step": 56940 }, { "epoch": 0.72896, "grad_norm": 1.53125, "learning_rate": 5.179633457704863e-06, "loss": 2.2897, "step": 56950 }, { "epoch": 0.729088, "grad_norm": 1.59375, "learning_rate": 5.175045433145195e-06, "loss": 2.2716, "step": 56960 }, { "epoch": 0.729216, "grad_norm": 1.453125, "learning_rate": 5.170459017848679e-06, "loss": 2.2746, "step": 56970 }, { "epoch": 0.729344, "grad_norm": 1.640625, "learning_rate": 5.165874212566522e-06, "loss": 2.26, "step": 56980 }, { "epoch": 0.729472, "grad_norm": 49.5, "learning_rate": 5.161291018049688e-06, "loss": 2.293, "step": 56990 }, { "epoch": 0.7296, "grad_norm": 1.515625, "learning_rate": 5.156709435048884e-06, "loss": 2.2882, "step": 57000 }, { "epoch": 0.729728, "grad_norm": 1.4375, "learning_rate": 5.152129464314531e-06, "loss": 2.2709, "step": 57010 }, { "epoch": 0.729856, "grad_norm": 1.515625, "learning_rate": 5.147551106596802e-06, "loss": 2.3035, "step": 57020 }, { "epoch": 0.729984, "grad_norm": 1.6171875, "learning_rate": 5.142974362645609e-06, "loss": 2.3047, "step": 57030 }, { "epoch": 0.730112, "grad_norm": 1.8984375, "learning_rate": 5.138399233210581e-06, "loss": 2.2776, "step": 57040 }, { "epoch": 0.73024, "grad_norm": 1.6328125, "learning_rate": 5.133825719041101e-06, "loss": 2.2594, "step": 57050 }, { "epoch": 0.730368, "grad_norm": 1.53125, "learning_rate": 5.1292538208862785e-06, "loss": 2.2646, "step": 57060 }, { "epoch": 0.730496, "grad_norm": 1.4921875, "learning_rate": 5.124683539494968e-06, "loss": 2.2892, "step": 57070 }, { "epoch": 0.730624, "grad_norm": 1.4921875, "learning_rate": 5.120114875615739e-06, "loss": 2.2587, "step": 57080 }, { "epoch": 0.730752, "grad_norm": 1.5546875, "learning_rate": 5.1155478299969164e-06, "loss": 2.2537, "step": 57090 }, { "epoch": 0.73088, "grad_norm": 1.484375, "learning_rate": 5.110982403386554e-06, "loss": 2.3005, "step": 57100 }, { "epoch": 0.731008, "grad_norm": 1.65625, "learning_rate": 5.1064185965324306e-06, "loss": 2.261, "step": 57110 }, { "epoch": 0.731136, "grad_norm": 1.5078125, "learning_rate": 5.101856410182073e-06, "loss": 2.28, "step": 57120 }, { "epoch": 0.731264, "grad_norm": 1.7265625, "learning_rate": 5.0972958450827386e-06, "loss": 2.299, "step": 57130 }, { "epoch": 0.731392, "grad_norm": 1.546875, "learning_rate": 5.092736901981421e-06, "loss": 2.296, "step": 57140 }, { "epoch": 0.73152, "grad_norm": 1.515625, "learning_rate": 5.088179581624835e-06, "loss": 2.2945, "step": 57150 }, { "epoch": 0.731648, "grad_norm": 1.4453125, "learning_rate": 5.083623884759448e-06, "loss": 2.2745, "step": 57160 }, { "epoch": 0.731776, "grad_norm": 1.5859375, "learning_rate": 5.079069812131448e-06, "loss": 2.2811, "step": 57170 }, { "epoch": 0.731904, "grad_norm": 1.546875, "learning_rate": 5.074517364486771e-06, "loss": 2.2692, "step": 57180 }, { "epoch": 0.732032, "grad_norm": 1.765625, "learning_rate": 5.069966542571068e-06, "loss": 2.2772, "step": 57190 }, { "epoch": 0.73216, "grad_norm": 1.6015625, "learning_rate": 5.065417347129737e-06, "loss": 2.2983, "step": 57200 }, { "epoch": 0.732288, "grad_norm": 1.578125, "learning_rate": 5.060869778907913e-06, "loss": 2.2791, "step": 57210 }, { "epoch": 0.732416, "grad_norm": 1.6875, "learning_rate": 5.056323838650448e-06, "loss": 2.2641, "step": 57220 }, { "epoch": 0.732544, "grad_norm": 1.546875, "learning_rate": 5.05177952710194e-06, "loss": 2.2763, "step": 57230 }, { "epoch": 0.732672, "grad_norm": 1.53125, "learning_rate": 5.047236845006726e-06, "loss": 2.2685, "step": 57240 }, { "epoch": 0.7328, "grad_norm": 1.6484375, "learning_rate": 5.042695793108853e-06, "loss": 2.2845, "step": 57250 }, { "epoch": 0.732928, "grad_norm": 1.5078125, "learning_rate": 5.0381563721521254e-06, "loss": 2.2882, "step": 57260 }, { "epoch": 0.733056, "grad_norm": 1.5859375, "learning_rate": 5.033618582880066e-06, "loss": 2.2893, "step": 57270 }, { "epoch": 0.733184, "grad_norm": 1.6875, "learning_rate": 5.029082426035937e-06, "loss": 2.2594, "step": 57280 }, { "epoch": 0.733312, "grad_norm": 1.59375, "learning_rate": 5.024547902362738e-06, "loss": 2.2942, "step": 57290 }, { "epoch": 0.73344, "grad_norm": 1.828125, "learning_rate": 5.020015012603182e-06, "loss": 2.2733, "step": 57300 }, { "epoch": 0.733568, "grad_norm": 1.6015625, "learning_rate": 5.0154837574997324e-06, "loss": 2.284, "step": 57310 }, { "epoch": 0.733696, "grad_norm": 1.40625, "learning_rate": 5.010954137794583e-06, "loss": 2.2981, "step": 57320 }, { "epoch": 0.733824, "grad_norm": 1.6171875, "learning_rate": 5.006426154229648e-06, "loss": 2.3143, "step": 57330 }, { "epoch": 0.733952, "grad_norm": 1.6171875, "learning_rate": 5.001899807546583e-06, "loss": 2.2844, "step": 57340 }, { "epoch": 0.73408, "grad_norm": 1.5703125, "learning_rate": 4.997375098486782e-06, "loss": 2.2546, "step": 57350 }, { "epoch": 0.734208, "grad_norm": 1.421875, "learning_rate": 4.992852027791347e-06, "loss": 2.2865, "step": 57360 }, { "epoch": 0.734336, "grad_norm": 1.5546875, "learning_rate": 4.988330596201146e-06, "loss": 2.2889, "step": 57370 }, { "epoch": 0.734464, "grad_norm": 1.4921875, "learning_rate": 4.983810804456752e-06, "loss": 2.2665, "step": 57380 }, { "epoch": 0.734592, "grad_norm": 1.4765625, "learning_rate": 4.979292653298464e-06, "loss": 2.2894, "step": 57390 }, { "epoch": 0.73472, "grad_norm": 1.546875, "learning_rate": 4.97477614346635e-06, "loss": 2.2436, "step": 57400 }, { "epoch": 0.734848, "grad_norm": 1.5546875, "learning_rate": 4.970261275700166e-06, "loss": 2.2838, "step": 57410 }, { "epoch": 0.734976, "grad_norm": 1.71875, "learning_rate": 4.965748050739425e-06, "loss": 2.2825, "step": 57420 }, { "epoch": 0.735104, "grad_norm": 1.8515625, "learning_rate": 4.961236469323366e-06, "loss": 2.2715, "step": 57430 }, { "epoch": 0.735232, "grad_norm": 1.5078125, "learning_rate": 4.95672653219095e-06, "loss": 2.2757, "step": 57440 }, { "epoch": 0.73536, "grad_norm": 1.71875, "learning_rate": 4.952218240080876e-06, "loss": 2.2741, "step": 57450 }, { "epoch": 0.735488, "grad_norm": 1.4296875, "learning_rate": 4.9477115937315825e-06, "loss": 2.2929, "step": 57460 }, { "epoch": 0.735616, "grad_norm": 1.625, "learning_rate": 4.943206593881209e-06, "loss": 2.3048, "step": 57470 }, { "epoch": 0.735744, "grad_norm": 1.5703125, "learning_rate": 4.938703241267668e-06, "loss": 2.3075, "step": 57480 }, { "epoch": 0.735872, "grad_norm": 1.5390625, "learning_rate": 4.9342015366285685e-06, "loss": 2.3012, "step": 57490 }, { "epoch": 0.736, "grad_norm": 1.515625, "learning_rate": 4.92970148070125e-06, "loss": 2.2672, "step": 57500 }, { "epoch": 0.736128, "grad_norm": 1.5625, "learning_rate": 4.92520307422281e-06, "loss": 2.2616, "step": 57510 }, { "epoch": 0.736256, "grad_norm": 1.4609375, "learning_rate": 4.9207063179300465e-06, "loss": 2.2946, "step": 57520 }, { "epoch": 0.736384, "grad_norm": 1.6953125, "learning_rate": 4.916211212559502e-06, "loss": 2.2834, "step": 57530 }, { "epoch": 0.736512, "grad_norm": 1.6171875, "learning_rate": 4.911717758847451e-06, "loss": 2.302, "step": 57540 }, { "epoch": 0.73664, "grad_norm": 1.5546875, "learning_rate": 4.907225957529881e-06, "loss": 2.3102, "step": 57550 }, { "epoch": 0.736768, "grad_norm": 1.640625, "learning_rate": 4.902735809342524e-06, "loss": 2.2656, "step": 57560 }, { "epoch": 0.736896, "grad_norm": 1.546875, "learning_rate": 4.898247315020843e-06, "loss": 2.2581, "step": 57570 }, { "epoch": 0.737024, "grad_norm": 1.609375, "learning_rate": 4.89376047530001e-06, "loss": 2.2813, "step": 57580 }, { "epoch": 0.737152, "grad_norm": 1.515625, "learning_rate": 4.889275290914957e-06, "loss": 2.2666, "step": 57590 }, { "epoch": 0.73728, "grad_norm": 1.5859375, "learning_rate": 4.88479176260032e-06, "loss": 2.2977, "step": 57600 }, { "epoch": 0.737408, "grad_norm": 1.59375, "learning_rate": 4.880309891090461e-06, "loss": 2.2503, "step": 57610 }, { "epoch": 0.737536, "grad_norm": 1.4921875, "learning_rate": 4.8758296771195e-06, "loss": 2.3016, "step": 57620 }, { "epoch": 0.737664, "grad_norm": 2.15625, "learning_rate": 4.871351121421253e-06, "loss": 2.2878, "step": 57630 }, { "epoch": 0.737792, "grad_norm": 1.5859375, "learning_rate": 4.866874224729284e-06, "loss": 2.2776, "step": 57640 }, { "epoch": 0.73792, "grad_norm": 1.546875, "learning_rate": 4.862398987776882e-06, "loss": 2.2856, "step": 57650 }, { "epoch": 0.738048, "grad_norm": 1.671875, "learning_rate": 4.857925411297052e-06, "loss": 2.2721, "step": 57660 }, { "epoch": 0.738176, "grad_norm": 1.546875, "learning_rate": 4.853453496022543e-06, "loss": 2.2752, "step": 57670 }, { "epoch": 0.738304, "grad_norm": 1.46875, "learning_rate": 4.8489832426858295e-06, "loss": 2.3312, "step": 57680 }, { "epoch": 0.738432, "grad_norm": 1.6328125, "learning_rate": 4.844514652019099e-06, "loss": 2.2671, "step": 57690 }, { "epoch": 0.73856, "grad_norm": 1.5234375, "learning_rate": 4.840047724754284e-06, "loss": 2.2729, "step": 57700 }, { "epoch": 0.738688, "grad_norm": 1.53125, "learning_rate": 4.835582461623042e-06, "loss": 2.2618, "step": 57710 }, { "epoch": 0.738816, "grad_norm": 1.640625, "learning_rate": 4.831118863356743e-06, "loss": 2.2724, "step": 57720 }, { "epoch": 0.738944, "grad_norm": 1.5, "learning_rate": 4.826656930686501e-06, "loss": 2.3044, "step": 57730 }, { "epoch": 0.739072, "grad_norm": 1.53125, "learning_rate": 4.822196664343151e-06, "loss": 2.2633, "step": 57740 }, { "epoch": 0.7392, "grad_norm": 1.5703125, "learning_rate": 4.817738065057255e-06, "loss": 2.3046, "step": 57750 }, { "epoch": 0.739328, "grad_norm": 1.6015625, "learning_rate": 4.813281133559109e-06, "loss": 2.2839, "step": 57760 }, { "epoch": 0.739456, "grad_norm": 1.703125, "learning_rate": 4.808825870578718e-06, "loss": 2.303, "step": 57770 }, { "epoch": 0.739584, "grad_norm": 1.5546875, "learning_rate": 4.804372276845829e-06, "loss": 2.2555, "step": 57780 }, { "epoch": 0.739712, "grad_norm": 1.6875, "learning_rate": 4.7999203530899185e-06, "loss": 2.2813, "step": 57790 }, { "epoch": 0.73984, "grad_norm": 1.609375, "learning_rate": 4.79547010004017e-06, "loss": 2.2875, "step": 57800 }, { "epoch": 0.739968, "grad_norm": 1.8984375, "learning_rate": 4.791021518425514e-06, "loss": 2.2383, "step": 57810 }, { "epoch": 0.740096, "grad_norm": 1.390625, "learning_rate": 4.786574608974595e-06, "loss": 2.3008, "step": 57820 }, { "epoch": 0.740224, "grad_norm": 1.4921875, "learning_rate": 4.782129372415795e-06, "loss": 2.2585, "step": 57830 }, { "epoch": 0.740352, "grad_norm": 1.5703125, "learning_rate": 4.7776858094772045e-06, "loss": 2.2882, "step": 57840 }, { "epoch": 0.74048, "grad_norm": 1.5859375, "learning_rate": 4.773243920886657e-06, "loss": 2.2827, "step": 57850 }, { "epoch": 0.740608, "grad_norm": 1.5078125, "learning_rate": 4.768803707371704e-06, "loss": 2.2615, "step": 57860 }, { "epoch": 0.740736, "grad_norm": 1.5546875, "learning_rate": 4.764365169659619e-06, "loss": 2.2638, "step": 57870 }, { "epoch": 0.740864, "grad_norm": 1.546875, "learning_rate": 4.759928308477407e-06, "loss": 2.2724, "step": 57880 }, { "epoch": 0.740992, "grad_norm": 1.7890625, "learning_rate": 4.7554931245517984e-06, "loss": 2.2715, "step": 57890 }, { "epoch": 0.74112, "grad_norm": 1.515625, "learning_rate": 4.751059618609252e-06, "loss": 2.2416, "step": 57900 }, { "epoch": 0.741248, "grad_norm": 1.578125, "learning_rate": 4.746627791375936e-06, "loss": 2.2846, "step": 57910 }, { "epoch": 0.741376, "grad_norm": 1.6171875, "learning_rate": 4.742197643577761e-06, "loss": 2.2966, "step": 57920 }, { "epoch": 0.741504, "grad_norm": 1.53125, "learning_rate": 4.737769175940357e-06, "loss": 2.285, "step": 57930 }, { "epoch": 0.741632, "grad_norm": 1.546875, "learning_rate": 4.733342389189081e-06, "loss": 2.2813, "step": 57940 }, { "epoch": 0.74176, "grad_norm": 1.6640625, "learning_rate": 4.728917284049002e-06, "loss": 2.2628, "step": 57950 }, { "epoch": 0.741888, "grad_norm": 1.5, "learning_rate": 4.724493861244927e-06, "loss": 2.2683, "step": 57960 }, { "epoch": 0.742016, "grad_norm": 1.4921875, "learning_rate": 4.720072121501391e-06, "loss": 2.2981, "step": 57970 }, { "epoch": 0.742144, "grad_norm": 1.546875, "learning_rate": 4.715652065542637e-06, "loss": 2.3008, "step": 57980 }, { "epoch": 0.742272, "grad_norm": 1.6640625, "learning_rate": 4.711233694092641e-06, "loss": 2.2479, "step": 57990 }, { "epoch": 0.7424, "grad_norm": 1.546875, "learning_rate": 4.706817007875113e-06, "loss": 2.3009, "step": 58000 }, { "epoch": 0.742528, "grad_norm": 1.515625, "learning_rate": 4.702402007613466e-06, "loss": 2.2647, "step": 58010 }, { "epoch": 0.742656, "grad_norm": 1.53125, "learning_rate": 4.697988694030853e-06, "loss": 2.2668, "step": 58020 }, { "epoch": 0.742784, "grad_norm": 1.5546875, "learning_rate": 4.693577067850151e-06, "loss": 2.2973, "step": 58030 }, { "epoch": 0.742912, "grad_norm": 1.4453125, "learning_rate": 4.68916712979394e-06, "loss": 2.2833, "step": 58040 }, { "epoch": 0.74304, "grad_norm": 1.5078125, "learning_rate": 4.684758880584558e-06, "loss": 2.2811, "step": 58050 }, { "epoch": 0.743168, "grad_norm": 1.6953125, "learning_rate": 4.680352320944035e-06, "loss": 2.284, "step": 58060 }, { "epoch": 0.743296, "grad_norm": 1.4453125, "learning_rate": 4.67594745159414e-06, "loss": 2.2937, "step": 58070 }, { "epoch": 0.743424, "grad_norm": 1.5703125, "learning_rate": 4.671544273256365e-06, "loss": 2.3041, "step": 58080 }, { "epoch": 0.743552, "grad_norm": 1.5703125, "learning_rate": 4.667142786651914e-06, "loss": 2.2969, "step": 58090 }, { "epoch": 0.74368, "grad_norm": 1.484375, "learning_rate": 4.662742992501726e-06, "loss": 2.2767, "step": 58100 }, { "epoch": 0.743808, "grad_norm": 2.515625, "learning_rate": 4.658344891526465e-06, "loss": 2.2995, "step": 58110 }, { "epoch": 0.743936, "grad_norm": 1.4453125, "learning_rate": 4.6539484844464945e-06, "loss": 2.2745, "step": 58120 }, { "epoch": 0.744064, "grad_norm": 1.5, "learning_rate": 4.649553771981934e-06, "loss": 2.2825, "step": 58130 }, { "epoch": 0.744192, "grad_norm": 1.609375, "learning_rate": 4.645160754852606e-06, "loss": 2.293, "step": 58140 }, { "epoch": 0.74432, "grad_norm": 1.578125, "learning_rate": 4.640769433778042e-06, "loss": 2.2913, "step": 58150 }, { "epoch": 0.744448, "grad_norm": 1.5234375, "learning_rate": 4.636379809477533e-06, "loss": 2.266, "step": 58160 }, { "epoch": 0.744576, "grad_norm": 1.5234375, "learning_rate": 4.631991882670056e-06, "loss": 2.2737, "step": 58170 }, { "epoch": 0.744704, "grad_norm": 1.6796875, "learning_rate": 4.627605654074331e-06, "loss": 2.2734, "step": 58180 }, { "epoch": 0.744832, "grad_norm": 1.5703125, "learning_rate": 4.623221124408796e-06, "loss": 2.3185, "step": 58190 }, { "epoch": 0.74496, "grad_norm": 1.546875, "learning_rate": 4.618838294391601e-06, "loss": 2.2347, "step": 58200 }, { "epoch": 0.745088, "grad_norm": 1.8828125, "learning_rate": 4.614457164740627e-06, "loss": 2.2893, "step": 58210 }, { "epoch": 0.745216, "grad_norm": 1.4921875, "learning_rate": 4.610077736173481e-06, "loss": 2.2852, "step": 58220 }, { "epoch": 0.745344, "grad_norm": 1.5390625, "learning_rate": 4.605700009407467e-06, "loss": 2.2914, "step": 58230 }, { "epoch": 0.745472, "grad_norm": 1.5546875, "learning_rate": 4.601323985159652e-06, "loss": 2.3109, "step": 58240 }, { "epoch": 0.7456, "grad_norm": 2.765625, "learning_rate": 4.596949664146788e-06, "loss": 2.2882, "step": 58250 }, { "epoch": 0.745728, "grad_norm": 1.5703125, "learning_rate": 4.5925770470853506e-06, "loss": 2.2848, "step": 58260 }, { "epoch": 0.745856, "grad_norm": 1.4375, "learning_rate": 4.588206134691565e-06, "loss": 2.2935, "step": 58270 }, { "epoch": 0.745984, "grad_norm": 1.609375, "learning_rate": 4.583836927681345e-06, "loss": 2.2646, "step": 58280 }, { "epoch": 0.746112, "grad_norm": 1.5078125, "learning_rate": 4.579469426770341e-06, "loss": 2.2878, "step": 58290 }, { "epoch": 0.74624, "grad_norm": 1.4375, "learning_rate": 4.575103632673925e-06, "loss": 2.2703, "step": 58300 }, { "epoch": 0.746368, "grad_norm": 2.078125, "learning_rate": 4.570739546107178e-06, "loss": 2.2575, "step": 58310 }, { "epoch": 0.746496, "grad_norm": 1.4609375, "learning_rate": 4.566377167784914e-06, "loss": 2.2774, "step": 58320 }, { "epoch": 0.746624, "grad_norm": 1.5859375, "learning_rate": 4.562016498421664e-06, "loss": 2.2809, "step": 58330 }, { "epoch": 0.746752, "grad_norm": 3.59375, "learning_rate": 4.557657538731669e-06, "loss": 2.2782, "step": 58340 }, { "epoch": 0.74688, "grad_norm": 1.46875, "learning_rate": 4.553300289428904e-06, "loss": 2.2924, "step": 58350 }, { "epoch": 0.747008, "grad_norm": 1.6875, "learning_rate": 4.5489447512270624e-06, "loss": 2.259, "step": 58360 }, { "epoch": 0.747136, "grad_norm": 1.6171875, "learning_rate": 4.544590924839535e-06, "loss": 2.271, "step": 58370 }, { "epoch": 0.747264, "grad_norm": 1.5859375, "learning_rate": 4.540238810979473e-06, "loss": 2.2704, "step": 58380 }, { "epoch": 0.747392, "grad_norm": 2.171875, "learning_rate": 4.535888410359709e-06, "loss": 2.2893, "step": 58390 }, { "epoch": 0.74752, "grad_norm": 1.609375, "learning_rate": 4.5315397236928144e-06, "loss": 2.289, "step": 58400 }, { "epoch": 0.747648, "grad_norm": 1.6171875, "learning_rate": 4.527192751691079e-06, "loss": 2.2595, "step": 58410 }, { "epoch": 0.747776, "grad_norm": 1.4921875, "learning_rate": 4.5228474950665e-06, "loss": 2.2762, "step": 58420 }, { "epoch": 0.747904, "grad_norm": 1.53125, "learning_rate": 4.518503954530808e-06, "loss": 2.2627, "step": 58430 }, { "epoch": 0.748032, "grad_norm": 1.4609375, "learning_rate": 4.51416213079545e-06, "loss": 2.2621, "step": 58440 }, { "epoch": 0.74816, "grad_norm": 1.3828125, "learning_rate": 4.5098220245715785e-06, "loss": 2.2999, "step": 58450 }, { "epoch": 0.748288, "grad_norm": 1.609375, "learning_rate": 4.5054836365700776e-06, "loss": 2.2708, "step": 58460 }, { "epoch": 0.748416, "grad_norm": 1.59375, "learning_rate": 4.501146967501556e-06, "loss": 2.2968, "step": 58470 }, { "epoch": 0.748544, "grad_norm": 1.5234375, "learning_rate": 4.496812018076317e-06, "loss": 2.2782, "step": 58480 }, { "epoch": 0.748672, "grad_norm": 1.609375, "learning_rate": 4.492478789004407e-06, "loss": 2.2625, "step": 58490 }, { "epoch": 0.7488, "grad_norm": 1.5546875, "learning_rate": 4.488147280995576e-06, "loss": 2.3001, "step": 58500 }, { "epoch": 0.748928, "grad_norm": 1.484375, "learning_rate": 4.483817494759299e-06, "loss": 2.2417, "step": 58510 }, { "epoch": 0.749056, "grad_norm": 1.5859375, "learning_rate": 4.479489431004771e-06, "loss": 2.2372, "step": 58520 }, { "epoch": 0.749184, "grad_norm": 1.7265625, "learning_rate": 4.475163090440889e-06, "loss": 2.272, "step": 58530 }, { "epoch": 0.749312, "grad_norm": 1.921875, "learning_rate": 4.470838473776286e-06, "loss": 2.2898, "step": 58540 }, { "epoch": 0.74944, "grad_norm": 1.46875, "learning_rate": 4.4665155817193106e-06, "loss": 2.2638, "step": 58550 }, { "epoch": 0.749568, "grad_norm": 1.796875, "learning_rate": 4.462194414978014e-06, "loss": 2.2837, "step": 58560 }, { "epoch": 0.749696, "grad_norm": 1.5546875, "learning_rate": 4.457874974260181e-06, "loss": 2.2759, "step": 58570 }, { "epoch": 0.749824, "grad_norm": 1.6484375, "learning_rate": 4.4535572602733074e-06, "loss": 2.259, "step": 58580 }, { "epoch": 0.749952, "grad_norm": 1.4453125, "learning_rate": 4.449241273724609e-06, "loss": 2.2882, "step": 58590 }, { "epoch": 0.75008, "grad_norm": 1.5546875, "learning_rate": 4.444927015321008e-06, "loss": 2.3023, "step": 58600 }, { "epoch": 0.750208, "grad_norm": 1.5390625, "learning_rate": 4.440614485769157e-06, "loss": 2.2767, "step": 58610 }, { "epoch": 0.750336, "grad_norm": 1.546875, "learning_rate": 4.436303685775423e-06, "loss": 2.2676, "step": 58620 }, { "epoch": 0.750464, "grad_norm": 1.6328125, "learning_rate": 4.4319946160458804e-06, "loss": 2.2798, "step": 58630 }, { "epoch": 0.750592, "grad_norm": 1.5078125, "learning_rate": 4.427687277286329e-06, "loss": 2.3058, "step": 58640 }, { "epoch": 0.75072, "grad_norm": 1.484375, "learning_rate": 4.423381670202287e-06, "loss": 2.2604, "step": 58650 }, { "epoch": 0.750848, "grad_norm": 1.5859375, "learning_rate": 4.4190777954989776e-06, "loss": 2.2535, "step": 58660 }, { "epoch": 0.750976, "grad_norm": 1.6171875, "learning_rate": 4.414775653881348e-06, "loss": 2.2944, "step": 58670 }, { "epoch": 0.751104, "grad_norm": 1.5, "learning_rate": 4.410475246054064e-06, "loss": 2.2922, "step": 58680 }, { "epoch": 0.751232, "grad_norm": 1.5625, "learning_rate": 4.406176572721506e-06, "loss": 2.2865, "step": 58690 }, { "epoch": 0.75136, "grad_norm": 1.546875, "learning_rate": 4.401879634587768e-06, "loss": 2.2864, "step": 58700 }, { "epoch": 0.751488, "grad_norm": 1.953125, "learning_rate": 4.397584432356654e-06, "loss": 2.2592, "step": 58710 }, { "epoch": 0.751616, "grad_norm": 1.5234375, "learning_rate": 4.393290966731693e-06, "loss": 2.2825, "step": 58720 }, { "epoch": 0.751744, "grad_norm": 1.6328125, "learning_rate": 4.3889992384161335e-06, "loss": 2.3046, "step": 58730 }, { "epoch": 0.751872, "grad_norm": 1.5546875, "learning_rate": 4.3847092481129216e-06, "loss": 2.2703, "step": 58740 }, { "epoch": 0.752, "grad_norm": 1.6640625, "learning_rate": 4.380420996524735e-06, "loss": 2.2864, "step": 58750 }, { "epoch": 0.752128, "grad_norm": 1.453125, "learning_rate": 4.376134484353965e-06, "loss": 2.2833, "step": 58760 }, { "epoch": 0.752256, "grad_norm": 1.6015625, "learning_rate": 4.371849712302706e-06, "loss": 2.2753, "step": 58770 }, { "epoch": 0.752384, "grad_norm": 1.65625, "learning_rate": 4.367566681072779e-06, "loss": 2.2622, "step": 58780 }, { "epoch": 0.752512, "grad_norm": 1.5234375, "learning_rate": 4.363285391365722e-06, "loss": 2.2618, "step": 58790 }, { "epoch": 0.75264, "grad_norm": 1.4921875, "learning_rate": 4.359005843882767e-06, "loss": 2.2608, "step": 58800 }, { "epoch": 0.752768, "grad_norm": 1.609375, "learning_rate": 4.354728039324897e-06, "loss": 2.2698, "step": 58810 }, { "epoch": 0.752896, "grad_norm": 1.5390625, "learning_rate": 4.350451978392772e-06, "loss": 2.2689, "step": 58820 }, { "epoch": 0.753024, "grad_norm": 1.5, "learning_rate": 4.346177661786788e-06, "loss": 2.3022, "step": 58830 }, { "epoch": 0.753152, "grad_norm": 1.6171875, "learning_rate": 4.341905090207053e-06, "loss": 2.286, "step": 58840 }, { "epoch": 0.75328, "grad_norm": 1.453125, "learning_rate": 4.337634264353381e-06, "loss": 2.265, "step": 58850 }, { "epoch": 0.753408, "grad_norm": 1.578125, "learning_rate": 4.333365184925307e-06, "loss": 2.2799, "step": 58860 }, { "epoch": 0.753536, "grad_norm": 1.546875, "learning_rate": 4.329097852622083e-06, "loss": 2.2717, "step": 58870 }, { "epoch": 0.753664, "grad_norm": 1.6328125, "learning_rate": 4.324832268142657e-06, "loss": 2.2918, "step": 58880 }, { "epoch": 0.753792, "grad_norm": 1.578125, "learning_rate": 4.320568432185722e-06, "loss": 2.2944, "step": 58890 }, { "epoch": 0.75392, "grad_norm": 1.8125, "learning_rate": 4.316306345449657e-06, "loss": 2.3287, "step": 58900 }, { "epoch": 0.754048, "grad_norm": 1.703125, "learning_rate": 4.312046008632554e-06, "loss": 2.2793, "step": 58910 }, { "epoch": 0.754176, "grad_norm": 1.6640625, "learning_rate": 4.3077874224322485e-06, "loss": 2.2899, "step": 58920 }, { "epoch": 0.754304, "grad_norm": 1.5703125, "learning_rate": 4.303530587546252e-06, "loss": 2.2886, "step": 58930 }, { "epoch": 0.754432, "grad_norm": 1.484375, "learning_rate": 4.299275504671814e-06, "loss": 2.2458, "step": 58940 }, { "epoch": 0.75456, "grad_norm": 1.6875, "learning_rate": 4.2950221745058935e-06, "loss": 2.2977, "step": 58950 }, { "epoch": 0.754688, "grad_norm": 1.4765625, "learning_rate": 4.290770597745146e-06, "loss": 2.3102, "step": 58960 }, { "epoch": 0.754816, "grad_norm": 1.515625, "learning_rate": 4.2865207750859595e-06, "loss": 2.3007, "step": 58970 }, { "epoch": 0.754944, "grad_norm": 7.09375, "learning_rate": 4.2822727072244305e-06, "loss": 2.2933, "step": 58980 }, { "epoch": 0.755072, "grad_norm": 1.421875, "learning_rate": 4.2780263948563505e-06, "loss": 2.2786, "step": 58990 }, { "epoch": 0.7552, "grad_norm": 1.6328125, "learning_rate": 4.273781838677257e-06, "loss": 2.3121, "step": 59000 }, { "epoch": 0.755328, "grad_norm": 1.640625, "learning_rate": 4.269539039382371e-06, "loss": 2.2527, "step": 59010 }, { "epoch": 0.755456, "grad_norm": 1.7109375, "learning_rate": 4.265297997666624e-06, "loss": 2.2737, "step": 59020 }, { "epoch": 0.755584, "grad_norm": 1.53125, "learning_rate": 4.261058714224695e-06, "loss": 2.2965, "step": 59030 }, { "epoch": 0.755712, "grad_norm": 1.6015625, "learning_rate": 4.256821189750929e-06, "loss": 2.2832, "step": 59040 }, { "epoch": 0.75584, "grad_norm": 1.53125, "learning_rate": 4.252585424939416e-06, "loss": 2.2823, "step": 59050 }, { "epoch": 0.755968, "grad_norm": 1.625, "learning_rate": 4.2483514204839466e-06, "loss": 2.3155, "step": 59060 }, { "epoch": 0.756096, "grad_norm": 1.5, "learning_rate": 4.244119177078015e-06, "loss": 2.2544, "step": 59070 }, { "epoch": 0.756224, "grad_norm": 1.5234375, "learning_rate": 4.239888695414843e-06, "loss": 2.2623, "step": 59080 }, { "epoch": 0.756352, "grad_norm": 1.5703125, "learning_rate": 4.235659976187356e-06, "loss": 2.2672, "step": 59090 }, { "epoch": 0.75648, "grad_norm": 1.4609375, "learning_rate": 4.231433020088182e-06, "loss": 2.2895, "step": 59100 }, { "epoch": 0.756608, "grad_norm": 1.5625, "learning_rate": 4.227207827809674e-06, "loss": 2.2789, "step": 59110 }, { "epoch": 0.756736, "grad_norm": 1.734375, "learning_rate": 4.222984400043896e-06, "loss": 2.2852, "step": 59120 }, { "epoch": 0.756864, "grad_norm": 2.046875, "learning_rate": 4.218762737482604e-06, "loss": 2.2672, "step": 59130 }, { "epoch": 0.756992, "grad_norm": 1.609375, "learning_rate": 4.214542840817296e-06, "loss": 2.3038, "step": 59140 }, { "epoch": 0.75712, "grad_norm": 1.53125, "learning_rate": 4.210324710739152e-06, "loss": 2.3385, "step": 59150 }, { "epoch": 0.757248, "grad_norm": 1.71875, "learning_rate": 4.206108347939074e-06, "loss": 2.2573, "step": 59160 }, { "epoch": 0.757376, "grad_norm": 1.6484375, "learning_rate": 4.2018937531076825e-06, "loss": 2.2807, "step": 59170 }, { "epoch": 0.757504, "grad_norm": 1.625, "learning_rate": 4.197680926935291e-06, "loss": 2.2661, "step": 59180 }, { "epoch": 0.757632, "grad_norm": 1.4765625, "learning_rate": 4.193469870111939e-06, "loss": 2.2823, "step": 59190 }, { "epoch": 0.75776, "grad_norm": 1.5625, "learning_rate": 4.189260583327371e-06, "loss": 2.2621, "step": 59200 }, { "epoch": 0.757888, "grad_norm": 1.5390625, "learning_rate": 4.185053067271033e-06, "loss": 2.3036, "step": 59210 }, { "epoch": 0.758016, "grad_norm": 1.625, "learning_rate": 4.180847322632093e-06, "loss": 2.2704, "step": 59220 }, { "epoch": 0.758144, "grad_norm": 1.59375, "learning_rate": 4.1766433500994305e-06, "loss": 2.2693, "step": 59230 }, { "epoch": 0.758272, "grad_norm": 1.546875, "learning_rate": 4.172441150361616e-06, "loss": 2.2902, "step": 59240 }, { "epoch": 0.7584, "grad_norm": 1.6171875, "learning_rate": 4.168240724106952e-06, "loss": 2.2901, "step": 59250 }, { "epoch": 0.758528, "grad_norm": 1.40625, "learning_rate": 4.164042072023436e-06, "loss": 2.3048, "step": 59260 }, { "epoch": 0.758656, "grad_norm": 1.8984375, "learning_rate": 4.159845194798782e-06, "loss": 2.3103, "step": 59270 }, { "epoch": 0.758784, "grad_norm": 1.4765625, "learning_rate": 4.155650093120414e-06, "loss": 2.2703, "step": 59280 }, { "epoch": 0.758912, "grad_norm": 1.640625, "learning_rate": 4.151456767675456e-06, "loss": 2.2948, "step": 59290 }, { "epoch": 0.75904, "grad_norm": 1.609375, "learning_rate": 4.147265219150749e-06, "loss": 2.2852, "step": 59300 }, { "epoch": 0.759168, "grad_norm": 1.59375, "learning_rate": 4.143075448232846e-06, "loss": 2.2766, "step": 59310 }, { "epoch": 0.759296, "grad_norm": 1.4375, "learning_rate": 4.138887455607996e-06, "loss": 2.2832, "step": 59320 }, { "epoch": 0.759424, "grad_norm": 1.546875, "learning_rate": 4.134701241962169e-06, "loss": 2.2773, "step": 59330 }, { "epoch": 0.759552, "grad_norm": 1.453125, "learning_rate": 4.130516807981039e-06, "loss": 2.2669, "step": 59340 }, { "epoch": 0.75968, "grad_norm": 1.734375, "learning_rate": 4.126334154349992e-06, "loss": 2.3049, "step": 59350 }, { "epoch": 0.759808, "grad_norm": 1.546875, "learning_rate": 4.122153281754114e-06, "loss": 2.275, "step": 59360 }, { "epoch": 0.759936, "grad_norm": 1.5078125, "learning_rate": 4.117974190878205e-06, "loss": 2.2941, "step": 59370 }, { "epoch": 0.760064, "grad_norm": 1.546875, "learning_rate": 4.11379688240678e-06, "loss": 2.2836, "step": 59380 }, { "epoch": 0.760192, "grad_norm": 1.5546875, "learning_rate": 4.109621357024044e-06, "loss": 2.2806, "step": 59390 }, { "epoch": 0.76032, "grad_norm": 1.6640625, "learning_rate": 4.105447615413927e-06, "loss": 2.2608, "step": 59400 }, { "epoch": 0.760448, "grad_norm": 1.546875, "learning_rate": 4.101275658260063e-06, "loss": 2.2769, "step": 59410 }, { "epoch": 0.760576, "grad_norm": 1.5390625, "learning_rate": 4.097105486245782e-06, "loss": 2.2565, "step": 59420 }, { "epoch": 0.760704, "grad_norm": 1.59375, "learning_rate": 4.092937100054137e-06, "loss": 2.2954, "step": 59430 }, { "epoch": 0.760832, "grad_norm": 1.625, "learning_rate": 4.088770500367882e-06, "loss": 2.2858, "step": 59440 }, { "epoch": 0.76096, "grad_norm": 1.5390625, "learning_rate": 4.084605687869479e-06, "loss": 2.2741, "step": 59450 }, { "epoch": 0.761088, "grad_norm": 1.4296875, "learning_rate": 4.080442663241101e-06, "loss": 2.277, "step": 59460 }, { "epoch": 0.761216, "grad_norm": 1.5390625, "learning_rate": 4.076281427164615e-06, "loss": 2.2745, "step": 59470 }, { "epoch": 0.761344, "grad_norm": 1.484375, "learning_rate": 4.072121980321607e-06, "loss": 2.3215, "step": 59480 }, { "epoch": 0.761472, "grad_norm": 1.6328125, "learning_rate": 4.067964323393374e-06, "loss": 2.2441, "step": 59490 }, { "epoch": 0.7616, "grad_norm": 1.4375, "learning_rate": 4.063808457060904e-06, "loss": 2.2578, "step": 59500 }, { "epoch": 0.761728, "grad_norm": 1.5546875, "learning_rate": 4.0596543820049045e-06, "loss": 2.2926, "step": 59510 }, { "epoch": 0.761856, "grad_norm": 1.6796875, "learning_rate": 4.055502098905791e-06, "loss": 2.271, "step": 59520 }, { "epoch": 0.761984, "grad_norm": 1.546875, "learning_rate": 4.051351608443669e-06, "loss": 2.2736, "step": 59530 }, { "epoch": 0.762112, "grad_norm": 2.53125, "learning_rate": 4.047202911298367e-06, "loss": 2.2933, "step": 59540 }, { "epoch": 0.76224, "grad_norm": 3.046875, "learning_rate": 4.043056008149421e-06, "loss": 2.2868, "step": 59550 }, { "epoch": 0.762368, "grad_norm": 1.5546875, "learning_rate": 4.038910899676053e-06, "loss": 2.2643, "step": 59560 }, { "epoch": 0.762496, "grad_norm": 1.65625, "learning_rate": 4.034767586557218e-06, "loss": 2.2907, "step": 59570 }, { "epoch": 0.762624, "grad_norm": 1.53125, "learning_rate": 4.030626069471555e-06, "loss": 2.3028, "step": 59580 }, { "epoch": 0.762752, "grad_norm": 1.65625, "learning_rate": 4.026486349097418e-06, "loss": 2.2791, "step": 59590 }, { "epoch": 0.76288, "grad_norm": 1.4921875, "learning_rate": 4.022348426112875e-06, "loss": 2.2671, "step": 59600 }, { "epoch": 0.763008, "grad_norm": 1.6328125, "learning_rate": 4.018212301195678e-06, "loss": 2.2808, "step": 59610 }, { "epoch": 0.763136, "grad_norm": 1.59375, "learning_rate": 4.014077975023302e-06, "loss": 2.2871, "step": 59620 }, { "epoch": 0.763264, "grad_norm": 1.5859375, "learning_rate": 4.009945448272929e-06, "loss": 2.2966, "step": 59630 }, { "epoch": 0.763392, "grad_norm": 1.4296875, "learning_rate": 4.005814721621426e-06, "loss": 2.2728, "step": 59640 }, { "epoch": 0.76352, "grad_norm": 1.46875, "learning_rate": 4.0016857957453945e-06, "loss": 2.2551, "step": 59650 }, { "epoch": 0.763648, "grad_norm": 1.578125, "learning_rate": 3.997558671321118e-06, "loss": 2.2769, "step": 59660 }, { "epoch": 0.763776, "grad_norm": 1.4609375, "learning_rate": 3.993433349024585e-06, "loss": 2.255, "step": 59670 }, { "epoch": 0.763904, "grad_norm": 1.6015625, "learning_rate": 3.9893098295315115e-06, "loss": 2.2901, "step": 59680 }, { "epoch": 0.764032, "grad_norm": 1.6171875, "learning_rate": 3.985188113517291e-06, "loss": 2.295, "step": 59690 }, { "epoch": 0.76416, "grad_norm": 1.5390625, "learning_rate": 3.981068201657037e-06, "loss": 2.2359, "step": 59700 }, { "epoch": 0.764288, "grad_norm": 1.59375, "learning_rate": 3.97695009462557e-06, "loss": 2.2995, "step": 59710 }, { "epoch": 0.764416, "grad_norm": 1.5, "learning_rate": 3.9728337930974e-06, "loss": 2.2593, "step": 59720 }, { "epoch": 0.764544, "grad_norm": 1.7421875, "learning_rate": 3.9687192977467525e-06, "loss": 2.2651, "step": 59730 }, { "epoch": 0.764672, "grad_norm": 1.7265625, "learning_rate": 3.964606609247561e-06, "loss": 2.2838, "step": 59740 }, { "epoch": 0.7648, "grad_norm": 1.5078125, "learning_rate": 3.9604957282734445e-06, "loss": 2.2588, "step": 59750 }, { "epoch": 0.764928, "grad_norm": 1.5, "learning_rate": 3.956386655497753e-06, "loss": 2.2929, "step": 59760 }, { "epoch": 0.765056, "grad_norm": 1.609375, "learning_rate": 3.952279391593522e-06, "loss": 2.2923, "step": 59770 }, { "epoch": 0.765184, "grad_norm": 2.078125, "learning_rate": 3.94817393723348e-06, "loss": 2.291, "step": 59780 }, { "epoch": 0.765312, "grad_norm": 1.5234375, "learning_rate": 3.944070293090093e-06, "loss": 2.2769, "step": 59790 }, { "epoch": 0.76544, "grad_norm": 1.5625, "learning_rate": 3.939968459835499e-06, "loss": 2.2832, "step": 59800 }, { "epoch": 0.765568, "grad_norm": 1.5625, "learning_rate": 3.935868438141555e-06, "loss": 2.2745, "step": 59810 }, { "epoch": 0.765696, "grad_norm": 1.4375, "learning_rate": 3.931770228679821e-06, "loss": 2.2937, "step": 59820 }, { "epoch": 0.765824, "grad_norm": 1.515625, "learning_rate": 3.927673832121551e-06, "loss": 2.2699, "step": 59830 }, { "epoch": 0.765952, "grad_norm": 1.6015625, "learning_rate": 3.92357924913771e-06, "loss": 2.3015, "step": 59840 }, { "epoch": 0.76608, "grad_norm": 1.390625, "learning_rate": 3.919486480398967e-06, "loss": 2.3043, "step": 59850 }, { "epoch": 0.766208, "grad_norm": 1.5078125, "learning_rate": 3.915395526575685e-06, "loss": 2.287, "step": 59860 }, { "epoch": 0.766336, "grad_norm": 1.5859375, "learning_rate": 3.91130638833794e-06, "loss": 2.2591, "step": 59870 }, { "epoch": 0.766464, "grad_norm": 1.6484375, "learning_rate": 3.9072190663555065e-06, "loss": 2.2512, "step": 59880 }, { "epoch": 0.766592, "grad_norm": 1.421875, "learning_rate": 3.9031335612978516e-06, "loss": 2.2832, "step": 59890 }, { "epoch": 0.76672, "grad_norm": 1.4453125, "learning_rate": 3.899049873834168e-06, "loss": 2.3092, "step": 59900 }, { "epoch": 0.766848, "grad_norm": 1.4765625, "learning_rate": 3.894968004633328e-06, "loss": 2.2888, "step": 59910 }, { "epoch": 0.766976, "grad_norm": 1.5625, "learning_rate": 3.890887954363917e-06, "loss": 2.258, "step": 59920 }, { "epoch": 0.767104, "grad_norm": 1.59375, "learning_rate": 3.886809723694227e-06, "loss": 2.2598, "step": 59930 }, { "epoch": 0.767232, "grad_norm": 1.578125, "learning_rate": 3.882733313292233e-06, "loss": 2.2624, "step": 59940 }, { "epoch": 0.76736, "grad_norm": 1.6015625, "learning_rate": 3.878658723825632e-06, "loss": 2.2749, "step": 59950 }, { "epoch": 0.767488, "grad_norm": 1.5234375, "learning_rate": 3.874585955961818e-06, "loss": 2.2854, "step": 59960 }, { "epoch": 0.767616, "grad_norm": 1.703125, "learning_rate": 3.870515010367874e-06, "loss": 2.291, "step": 59970 }, { "epoch": 0.767744, "grad_norm": 1.515625, "learning_rate": 3.866445887710601e-06, "loss": 2.2818, "step": 59980 }, { "epoch": 0.767872, "grad_norm": 1.5, "learning_rate": 3.8623785886564974e-06, "loss": 2.3077, "step": 59990 }, { "epoch": 0.768, "grad_norm": 1.65625, "learning_rate": 3.858313113871751e-06, "loss": 2.2702, "step": 60000 } ], "logging_steps": 10, "max_steps": 78125, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.9580082963332202e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }