dangvansam's picture
Upload folder using huggingface_hub
3af22b8 verified
Raw
History Blame Contribute Delete
263 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.2999670003299966,
"eval_steps": 50000,
"global_step": 150000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0021999780002199976,
"grad_norm": 1.3319692611694336,
"learning_rate": 9.9e-06,
"loss": 0.5606,
"step": 100
},
{
"epoch": 0.004399956000439995,
"grad_norm": 1.4906107187271118,
"learning_rate": 9.99564212611423e-06,
"loss": 0.5478,
"step": 200
},
{
"epoch": 0.006599934000659994,
"grad_norm": 1.542100191116333,
"learning_rate": 9.99124023330032e-06,
"loss": 0.5508,
"step": 300
},
{
"epoch": 0.00879991200087999,
"grad_norm": 1.59752357006073,
"learning_rate": 9.98683834048641e-06,
"loss": 0.566,
"step": 400
},
{
"epoch": 0.010999890001099988,
"grad_norm": 1.535962462425232,
"learning_rate": 9.9824364476725e-06,
"loss": 0.5604,
"step": 500
},
{
"epoch": 0.013199868001319988,
"grad_norm": 1.6737797260284424,
"learning_rate": 9.97803455485859e-06,
"loss": 0.5651,
"step": 600
},
{
"epoch": 0.015399846001539985,
"grad_norm": 1.5698915719985962,
"learning_rate": 9.97363266204468e-06,
"loss": 0.5384,
"step": 700
},
{
"epoch": 0.01759982400175998,
"grad_norm": 1.501681923866272,
"learning_rate": 9.96923076923077e-06,
"loss": 0.5454,
"step": 800
},
{
"epoch": 0.01979980200197998,
"grad_norm": 1.6730457544326782,
"learning_rate": 9.96482887641686e-06,
"loss": 0.5515,
"step": 900
},
{
"epoch": 0.021999780002199976,
"grad_norm": 1.7415289878845215,
"learning_rate": 9.960426983602949e-06,
"loss": 0.5641,
"step": 1000
},
{
"epoch": 0.024199758002419976,
"grad_norm": 1.7273190021514893,
"learning_rate": 9.95602509078904e-06,
"loss": 0.559,
"step": 1100
},
{
"epoch": 0.026399736002639975,
"grad_norm": 1.7402335405349731,
"learning_rate": 9.95162319797513e-06,
"loss": 0.5391,
"step": 1200
},
{
"epoch": 0.02859971400285997,
"grad_norm": 1.8390350341796875,
"learning_rate": 9.94722130516122e-06,
"loss": 0.5563,
"step": 1300
},
{
"epoch": 0.03079969200307997,
"grad_norm": 1.3122905492782593,
"learning_rate": 9.94281941234731e-06,
"loss": 0.5594,
"step": 1400
},
{
"epoch": 0.032999670003299966,
"grad_norm": 1.3811813592910767,
"learning_rate": 9.9384175195334e-06,
"loss": 0.5592,
"step": 1500
},
{
"epoch": 0.03519964800351996,
"grad_norm": 1.8546792268753052,
"learning_rate": 9.934015626719489e-06,
"loss": 0.5522,
"step": 1600
},
{
"epoch": 0.037399626003739965,
"grad_norm": 1.6485520601272583,
"learning_rate": 9.92961373390558e-06,
"loss": 0.5354,
"step": 1700
},
{
"epoch": 0.03959960400395996,
"grad_norm": 1.366682767868042,
"learning_rate": 9.92521184109167e-06,
"loss": 0.5507,
"step": 1800
},
{
"epoch": 0.04179958200417996,
"grad_norm": 1.7690378427505493,
"learning_rate": 9.92080994827776e-06,
"loss": 0.5444,
"step": 1900
},
{
"epoch": 0.04399956000439995,
"grad_norm": 1.5437382459640503,
"learning_rate": 9.91640805546385e-06,
"loss": 0.5651,
"step": 2000
},
{
"epoch": 0.046199538004619956,
"grad_norm": 1.156587839126587,
"learning_rate": 9.91200616264994e-06,
"loss": 0.562,
"step": 2100
},
{
"epoch": 0.04839951600483995,
"grad_norm": 1.7941553592681885,
"learning_rate": 9.90760426983603e-06,
"loss": 0.5659,
"step": 2200
},
{
"epoch": 0.05059949400505995,
"grad_norm": 1.4848283529281616,
"learning_rate": 9.903202377022121e-06,
"loss": 0.5629,
"step": 2300
},
{
"epoch": 0.05279947200527995,
"grad_norm": 1.4486836194992065,
"learning_rate": 9.898800484208211e-06,
"loss": 0.5459,
"step": 2400
},
{
"epoch": 0.054999450005499946,
"grad_norm": 1.731554388999939,
"learning_rate": 9.894398591394302e-06,
"loss": 0.5626,
"step": 2500
},
{
"epoch": 0.05719942800571994,
"grad_norm": 1.6251667737960815,
"learning_rate": 9.88999669858039e-06,
"loss": 0.5516,
"step": 2600
},
{
"epoch": 0.05939940600593994,
"grad_norm": 1.256371021270752,
"learning_rate": 9.88559480576648e-06,
"loss": 0.5459,
"step": 2700
},
{
"epoch": 0.06159938400615994,
"grad_norm": 1.418700933456421,
"learning_rate": 9.88119291295257e-06,
"loss": 0.5495,
"step": 2800
},
{
"epoch": 0.06379936200637994,
"grad_norm": 1.6376900672912598,
"learning_rate": 9.876791020138661e-06,
"loss": 0.5641,
"step": 2900
},
{
"epoch": 0.06599934000659993,
"grad_norm": 1.5085667371749878,
"learning_rate": 9.872389127324751e-06,
"loss": 0.5625,
"step": 3000
},
{
"epoch": 0.06819931800681993,
"grad_norm": 1.5381278991699219,
"learning_rate": 9.86798723451084e-06,
"loss": 0.5603,
"step": 3100
},
{
"epoch": 0.07039929600703992,
"grad_norm": 1.5536515712738037,
"learning_rate": 9.86358534169693e-06,
"loss": 0.5529,
"step": 3200
},
{
"epoch": 0.07259927400725993,
"grad_norm": 1.9047861099243164,
"learning_rate": 9.85918344888302e-06,
"loss": 0.549,
"step": 3300
},
{
"epoch": 0.07479925200747993,
"grad_norm": 1.517338514328003,
"learning_rate": 9.85478155606911e-06,
"loss": 0.561,
"step": 3400
},
{
"epoch": 0.07699923000769993,
"grad_norm": 1.5779054164886475,
"learning_rate": 9.850379663255201e-06,
"loss": 0.5706,
"step": 3500
},
{
"epoch": 0.07919920800791992,
"grad_norm": 1.704124927520752,
"learning_rate": 9.845977770441291e-06,
"loss": 0.5523,
"step": 3600
},
{
"epoch": 0.08139918600813992,
"grad_norm": 1.5121921300888062,
"learning_rate": 9.84157587762738e-06,
"loss": 0.5539,
"step": 3700
},
{
"epoch": 0.08359916400835991,
"grad_norm": 1.6511967182159424,
"learning_rate": 9.83717398481347e-06,
"loss": 0.5443,
"step": 3800
},
{
"epoch": 0.08579914200857991,
"grad_norm": 1.719138503074646,
"learning_rate": 9.83277209199956e-06,
"loss": 0.55,
"step": 3900
},
{
"epoch": 0.0879991200087999,
"grad_norm": 1.6003084182739258,
"learning_rate": 9.82837019918565e-06,
"loss": 0.5588,
"step": 4000
},
{
"epoch": 0.09019909800901992,
"grad_norm": 1.787855625152588,
"learning_rate": 9.823968306371741e-06,
"loss": 0.5636,
"step": 4100
},
{
"epoch": 0.09239907600923991,
"grad_norm": 1.6582859754562378,
"learning_rate": 9.819566413557831e-06,
"loss": 0.5618,
"step": 4200
},
{
"epoch": 0.09459905400945991,
"grad_norm": 1.696978211402893,
"learning_rate": 9.81516452074392e-06,
"loss": 0.5546,
"step": 4300
},
{
"epoch": 0.0967990320096799,
"grad_norm": 1.8410296440124512,
"learning_rate": 9.81076262793001e-06,
"loss": 0.5471,
"step": 4400
},
{
"epoch": 0.0989990100098999,
"grad_norm": 1.736607313156128,
"learning_rate": 9.8063607351161e-06,
"loss": 0.5461,
"step": 4500
},
{
"epoch": 0.1011989880101199,
"grad_norm": 1.507016897201538,
"learning_rate": 9.80195884230219e-06,
"loss": 0.5609,
"step": 4600
},
{
"epoch": 0.10339896601033989,
"grad_norm": 1.6941606998443604,
"learning_rate": 9.797556949488281e-06,
"loss": 0.5656,
"step": 4700
},
{
"epoch": 0.1055989440105599,
"grad_norm": 1.6578975915908813,
"learning_rate": 9.793155056674371e-06,
"loss": 0.5624,
"step": 4800
},
{
"epoch": 0.1077989220107799,
"grad_norm": 1.6376292705535889,
"learning_rate": 9.78875316386046e-06,
"loss": 0.5483,
"step": 4900
},
{
"epoch": 0.10999890001099989,
"grad_norm": 1.8150690793991089,
"learning_rate": 9.78435127104655e-06,
"loss": 0.5739,
"step": 5000
},
{
"epoch": 0.11219887801121989,
"grad_norm": 1.8733948469161987,
"learning_rate": 9.77994937823264e-06,
"loss": 0.5511,
"step": 5100
},
{
"epoch": 0.11439885601143988,
"grad_norm": 1.3109201192855835,
"learning_rate": 9.77554748541873e-06,
"loss": 0.5584,
"step": 5200
},
{
"epoch": 0.11659883401165988,
"grad_norm": 2.0025064945220947,
"learning_rate": 9.771145592604821e-06,
"loss": 0.5638,
"step": 5300
},
{
"epoch": 0.11879881201187988,
"grad_norm": 1.584830641746521,
"learning_rate": 9.76674369979091e-06,
"loss": 0.575,
"step": 5400
},
{
"epoch": 0.12099879001209989,
"grad_norm": 1.7688754796981812,
"learning_rate": 9.762341806977e-06,
"loss": 0.5603,
"step": 5500
},
{
"epoch": 0.12319876801231988,
"grad_norm": 1.6688051223754883,
"learning_rate": 9.75793991416309e-06,
"loss": 0.5746,
"step": 5600
},
{
"epoch": 0.12539874601253986,
"grad_norm": 1.6409167051315308,
"learning_rate": 9.753538021349182e-06,
"loss": 0.5469,
"step": 5700
},
{
"epoch": 0.12759872401275987,
"grad_norm": 1.5867542028427124,
"learning_rate": 9.74913612853527e-06,
"loss": 0.5414,
"step": 5800
},
{
"epoch": 0.12979870201297988,
"grad_norm": 1.7665027379989624,
"learning_rate": 9.744734235721361e-06,
"loss": 0.5574,
"step": 5900
},
{
"epoch": 0.13199868001319986,
"grad_norm": 1.298757553100586,
"learning_rate": 9.740332342907451e-06,
"loss": 0.5356,
"step": 6000
},
{
"epoch": 0.13419865801341987,
"grad_norm": 1.381654143333435,
"learning_rate": 9.735930450093542e-06,
"loss": 0.5525,
"step": 6100
},
{
"epoch": 0.13639863601363986,
"grad_norm": 1.398958683013916,
"learning_rate": 9.731528557279632e-06,
"loss": 0.5427,
"step": 6200
},
{
"epoch": 0.13859861401385987,
"grad_norm": 1.4779409170150757,
"learning_rate": 9.727126664465722e-06,
"loss": 0.5583,
"step": 6300
},
{
"epoch": 0.14079859201407985,
"grad_norm": 1.5421425104141235,
"learning_rate": 9.72272477165181e-06,
"loss": 0.5484,
"step": 6400
},
{
"epoch": 0.14299857001429986,
"grad_norm": 1.7208441495895386,
"learning_rate": 9.718322878837901e-06,
"loss": 0.5478,
"step": 6500
},
{
"epoch": 0.14519854801451987,
"grad_norm": 1.643373727798462,
"learning_rate": 9.713920986023991e-06,
"loss": 0.5742,
"step": 6600
},
{
"epoch": 0.14739852601473985,
"grad_norm": 1.5801072120666504,
"learning_rate": 9.709519093210082e-06,
"loss": 0.5516,
"step": 6700
},
{
"epoch": 0.14959850401495986,
"grad_norm": 1.5034841299057007,
"learning_rate": 9.705117200396172e-06,
"loss": 0.558,
"step": 6800
},
{
"epoch": 0.15179848201517984,
"grad_norm": 1.6282888650894165,
"learning_rate": 9.70071530758226e-06,
"loss": 0.5575,
"step": 6900
},
{
"epoch": 0.15399846001539985,
"grad_norm": 1.4846858978271484,
"learning_rate": 9.69631341476835e-06,
"loss": 0.5487,
"step": 7000
},
{
"epoch": 0.15619843801561983,
"grad_norm": 1.6254215240478516,
"learning_rate": 9.691911521954441e-06,
"loss": 0.5443,
"step": 7100
},
{
"epoch": 0.15839841601583984,
"grad_norm": 1.7018550634384155,
"learning_rate": 9.687509629140531e-06,
"loss": 0.556,
"step": 7200
},
{
"epoch": 0.16059839401605983,
"grad_norm": 1.6466326713562012,
"learning_rate": 9.683107736326622e-06,
"loss": 0.5541,
"step": 7300
},
{
"epoch": 0.16279837201627984,
"grad_norm": 1.4446876049041748,
"learning_rate": 9.678705843512712e-06,
"loss": 0.5464,
"step": 7400
},
{
"epoch": 0.16499835001649985,
"grad_norm": 1.5896605253219604,
"learning_rate": 9.6743039506988e-06,
"loss": 0.5394,
"step": 7500
},
{
"epoch": 0.16719832801671983,
"grad_norm": 1.837875485420227,
"learning_rate": 9.66990205788489e-06,
"loss": 0.5351,
"step": 7600
},
{
"epoch": 0.16939830601693984,
"grad_norm": 1.5089105367660522,
"learning_rate": 9.665500165070981e-06,
"loss": 0.5434,
"step": 7700
},
{
"epoch": 0.17159828401715982,
"grad_norm": 1.5068552494049072,
"learning_rate": 9.661098272257071e-06,
"loss": 0.5542,
"step": 7800
},
{
"epoch": 0.17379826201737983,
"grad_norm": 1.7671160697937012,
"learning_rate": 9.656696379443162e-06,
"loss": 0.5434,
"step": 7900
},
{
"epoch": 0.1759982400175998,
"grad_norm": 1.612404227256775,
"learning_rate": 9.652294486629252e-06,
"loss": 0.5481,
"step": 8000
},
{
"epoch": 0.17819821801781982,
"grad_norm": 1.403520941734314,
"learning_rate": 9.64789259381534e-06,
"loss": 0.5436,
"step": 8100
},
{
"epoch": 0.18039819601803983,
"grad_norm": 1.786060094833374,
"learning_rate": 9.64349070100143e-06,
"loss": 0.5571,
"step": 8200
},
{
"epoch": 0.1825981740182598,
"grad_norm": 1.6619782447814941,
"learning_rate": 9.639088808187521e-06,
"loss": 0.5402,
"step": 8300
},
{
"epoch": 0.18479815201847982,
"grad_norm": 1.805365800857544,
"learning_rate": 9.634686915373611e-06,
"loss": 0.5705,
"step": 8400
},
{
"epoch": 0.1869981300186998,
"grad_norm": 1.5753322839736938,
"learning_rate": 9.630285022559702e-06,
"loss": 0.5477,
"step": 8500
},
{
"epoch": 0.18919810801891981,
"grad_norm": 1.688490629196167,
"learning_rate": 9.625883129745792e-06,
"loss": 0.5497,
"step": 8600
},
{
"epoch": 0.1913980860191398,
"grad_norm": 1.5862349271774292,
"learning_rate": 9.62148123693188e-06,
"loss": 0.5374,
"step": 8700
},
{
"epoch": 0.1935980640193598,
"grad_norm": 1.8771247863769531,
"learning_rate": 9.61707934411797e-06,
"loss": 0.5445,
"step": 8800
},
{
"epoch": 0.19579804201957982,
"grad_norm": 1.432055115699768,
"learning_rate": 9.612677451304061e-06,
"loss": 0.5478,
"step": 8900
},
{
"epoch": 0.1979980200197998,
"grad_norm": 1.7091459035873413,
"learning_rate": 9.608275558490151e-06,
"loss": 0.5509,
"step": 9000
},
{
"epoch": 0.2001979980200198,
"grad_norm": 1.5979877710342407,
"learning_rate": 9.603873665676242e-06,
"loss": 0.5439,
"step": 9100
},
{
"epoch": 0.2023979760202398,
"grad_norm": 1.5256608724594116,
"learning_rate": 9.599471772862332e-06,
"loss": 0.546,
"step": 9200
},
{
"epoch": 0.2045979540204598,
"grad_norm": 1.7038841247558594,
"learning_rate": 9.595069880048422e-06,
"loss": 0.5455,
"step": 9300
},
{
"epoch": 0.20679793202067978,
"grad_norm": 1.6116039752960205,
"learning_rate": 9.590667987234512e-06,
"loss": 0.5448,
"step": 9400
},
{
"epoch": 0.2089979100208998,
"grad_norm": 1.6021257638931274,
"learning_rate": 9.586266094420603e-06,
"loss": 0.5373,
"step": 9500
},
{
"epoch": 0.2111978880211198,
"grad_norm": 1.8599495887756348,
"learning_rate": 9.581864201606691e-06,
"loss": 0.5445,
"step": 9600
},
{
"epoch": 0.21339786602133978,
"grad_norm": 1.5737359523773193,
"learning_rate": 9.577462308792782e-06,
"loss": 0.554,
"step": 9700
},
{
"epoch": 0.2155978440215598,
"grad_norm": 1.9932422637939453,
"learning_rate": 9.573060415978872e-06,
"loss": 0.5466,
"step": 9800
},
{
"epoch": 0.21779782202177977,
"grad_norm": 1.2846128940582275,
"learning_rate": 9.568658523164962e-06,
"loss": 0.552,
"step": 9900
},
{
"epoch": 0.21999780002199978,
"grad_norm": 1.845566987991333,
"learning_rate": 9.564256630351052e-06,
"loss": 0.5351,
"step": 10000
},
{
"epoch": 0.22219777802221977,
"grad_norm": 1.7098534107208252,
"learning_rate": 9.559854737537143e-06,
"loss": 0.5701,
"step": 10100
},
{
"epoch": 0.22439775602243978,
"grad_norm": 1.6359370946884155,
"learning_rate": 9.555452844723231e-06,
"loss": 0.5399,
"step": 10200
},
{
"epoch": 0.22659773402265979,
"grad_norm": 1.8628222942352295,
"learning_rate": 9.551050951909322e-06,
"loss": 0.5428,
"step": 10300
},
{
"epoch": 0.22879771202287977,
"grad_norm": 1.7202619314193726,
"learning_rate": 9.546649059095412e-06,
"loss": 0.5473,
"step": 10400
},
{
"epoch": 0.23099769002309978,
"grad_norm": 1.6408450603485107,
"learning_rate": 9.542247166281502e-06,
"loss": 0.5566,
"step": 10500
},
{
"epoch": 0.23319766802331976,
"grad_norm": 1.6586904525756836,
"learning_rate": 9.537845273467592e-06,
"loss": 0.5357,
"step": 10600
},
{
"epoch": 0.23539764602353977,
"grad_norm": 1.8505043983459473,
"learning_rate": 9.533443380653683e-06,
"loss": 0.5596,
"step": 10700
},
{
"epoch": 0.23759762402375975,
"grad_norm": 1.9244803190231323,
"learning_rate": 9.529041487839771e-06,
"loss": 0.5428,
"step": 10800
},
{
"epoch": 0.23979760202397976,
"grad_norm": 1.5375540256500244,
"learning_rate": 9.524639595025862e-06,
"loss": 0.5478,
"step": 10900
},
{
"epoch": 0.24199758002419977,
"grad_norm": 1.7372453212738037,
"learning_rate": 9.520237702211952e-06,
"loss": 0.5458,
"step": 11000
},
{
"epoch": 0.24419755802441975,
"grad_norm": 1.5542049407958984,
"learning_rate": 9.515835809398042e-06,
"loss": 0.5412,
"step": 11100
},
{
"epoch": 0.24639753602463976,
"grad_norm": 1.5235602855682373,
"learning_rate": 9.511433916584132e-06,
"loss": 0.5631,
"step": 11200
},
{
"epoch": 0.24859751402485974,
"grad_norm": 1.7347521781921387,
"learning_rate": 9.507032023770221e-06,
"loss": 0.5508,
"step": 11300
},
{
"epoch": 0.2507974920250797,
"grad_norm": 1.8189500570297241,
"learning_rate": 9.502630130956311e-06,
"loss": 0.5346,
"step": 11400
},
{
"epoch": 0.25299747002529976,
"grad_norm": 1.5607105493545532,
"learning_rate": 9.498228238142402e-06,
"loss": 0.5454,
"step": 11500
},
{
"epoch": 0.25519744802551975,
"grad_norm": 1.5799516439437866,
"learning_rate": 9.493826345328492e-06,
"loss": 0.5271,
"step": 11600
},
{
"epoch": 0.25739742602573973,
"grad_norm": 1.4460997581481934,
"learning_rate": 9.489424452514582e-06,
"loss": 0.5437,
"step": 11700
},
{
"epoch": 0.25959740402595977,
"grad_norm": 1.368635892868042,
"learning_rate": 9.485022559700672e-06,
"loss": 0.5442,
"step": 11800
},
{
"epoch": 0.26179738202617975,
"grad_norm": 1.8246245384216309,
"learning_rate": 9.480620666886761e-06,
"loss": 0.5321,
"step": 11900
},
{
"epoch": 0.26399736002639973,
"grad_norm": 1.8881937265396118,
"learning_rate": 9.476218774072851e-06,
"loss": 0.5639,
"step": 12000
},
{
"epoch": 0.2661973380266197,
"grad_norm": 1.39218008518219,
"learning_rate": 9.471816881258942e-06,
"loss": 0.5634,
"step": 12100
},
{
"epoch": 0.26839731602683975,
"grad_norm": 1.5577659606933594,
"learning_rate": 9.467414988445032e-06,
"loss": 0.5422,
"step": 12200
},
{
"epoch": 0.27059729402705973,
"grad_norm": 1.9022492170333862,
"learning_rate": 9.463013095631122e-06,
"loss": 0.5429,
"step": 12300
},
{
"epoch": 0.2727972720272797,
"grad_norm": 1.7101701498031616,
"learning_rate": 9.458611202817212e-06,
"loss": 0.5473,
"step": 12400
},
{
"epoch": 0.27499725002749975,
"grad_norm": 2.0155210494995117,
"learning_rate": 9.454209310003301e-06,
"loss": 0.5689,
"step": 12500
},
{
"epoch": 0.27719722802771973,
"grad_norm": 1.994775414466858,
"learning_rate": 9.449807417189393e-06,
"loss": 0.514,
"step": 12600
},
{
"epoch": 0.2793972060279397,
"grad_norm": 1.5826818943023682,
"learning_rate": 9.445405524375483e-06,
"loss": 0.5413,
"step": 12700
},
{
"epoch": 0.2815971840281597,
"grad_norm": 1.589729905128479,
"learning_rate": 9.441003631561574e-06,
"loss": 0.5339,
"step": 12800
},
{
"epoch": 0.28379716202837973,
"grad_norm": 1.8156132698059082,
"learning_rate": 9.436601738747662e-06,
"loss": 0.5546,
"step": 12900
},
{
"epoch": 0.2859971400285997,
"grad_norm": 1.576416254043579,
"learning_rate": 9.432199845933752e-06,
"loss": 0.5465,
"step": 13000
},
{
"epoch": 0.2881971180288197,
"grad_norm": 1.9609074592590332,
"learning_rate": 9.427797953119843e-06,
"loss": 0.553,
"step": 13100
},
{
"epoch": 0.29039709602903974,
"grad_norm": 1.5881434679031372,
"learning_rate": 9.423396060305933e-06,
"loss": 0.5377,
"step": 13200
},
{
"epoch": 0.2925970740292597,
"grad_norm": 1.569200038909912,
"learning_rate": 9.418994167492023e-06,
"loss": 0.5467,
"step": 13300
},
{
"epoch": 0.2947970520294797,
"grad_norm": 1.7305947542190552,
"learning_rate": 9.414592274678112e-06,
"loss": 0.5388,
"step": 13400
},
{
"epoch": 0.2969970300296997,
"grad_norm": 1.9278624057769775,
"learning_rate": 9.410190381864202e-06,
"loss": 0.5419,
"step": 13500
},
{
"epoch": 0.2991970080299197,
"grad_norm": 1.6430861949920654,
"learning_rate": 9.405788489050292e-06,
"loss": 0.5579,
"step": 13600
},
{
"epoch": 0.3013969860301397,
"grad_norm": 1.4233689308166504,
"learning_rate": 9.401386596236383e-06,
"loss": 0.5385,
"step": 13700
},
{
"epoch": 0.3035969640303597,
"grad_norm": 1.705346941947937,
"learning_rate": 9.396984703422473e-06,
"loss": 0.5491,
"step": 13800
},
{
"epoch": 0.3057969420305797,
"grad_norm": 1.7933902740478516,
"learning_rate": 9.392582810608563e-06,
"loss": 0.5513,
"step": 13900
},
{
"epoch": 0.3079969200307997,
"grad_norm": 1.901663899421692,
"learning_rate": 9.388180917794652e-06,
"loss": 0.5614,
"step": 14000
},
{
"epoch": 0.3101968980310197,
"grad_norm": 1.6877708435058594,
"learning_rate": 9.383779024980742e-06,
"loss": 0.5334,
"step": 14100
},
{
"epoch": 0.31239687603123967,
"grad_norm": 1.7979609966278076,
"learning_rate": 9.379377132166832e-06,
"loss": 0.5527,
"step": 14200
},
{
"epoch": 0.3145968540314597,
"grad_norm": 1.7708429098129272,
"learning_rate": 9.374975239352923e-06,
"loss": 0.5386,
"step": 14300
},
{
"epoch": 0.3167968320316797,
"grad_norm": 1.3621147871017456,
"learning_rate": 9.370573346539013e-06,
"loss": 0.5626,
"step": 14400
},
{
"epoch": 0.31899681003189967,
"grad_norm": 1.5842787027359009,
"learning_rate": 9.366171453725103e-06,
"loss": 0.529,
"step": 14500
},
{
"epoch": 0.32119678803211965,
"grad_norm": 1.817987084388733,
"learning_rate": 9.361769560911192e-06,
"loss": 0.538,
"step": 14600
},
{
"epoch": 0.3233967660323397,
"grad_norm": 1.6293082237243652,
"learning_rate": 9.357367668097282e-06,
"loss": 0.5481,
"step": 14700
},
{
"epoch": 0.32559674403255967,
"grad_norm": 1.5916519165039062,
"learning_rate": 9.352965775283372e-06,
"loss": 0.5534,
"step": 14800
},
{
"epoch": 0.32779672203277965,
"grad_norm": 1.5773463249206543,
"learning_rate": 9.348563882469463e-06,
"loss": 0.5501,
"step": 14900
},
{
"epoch": 0.3299967000329997,
"grad_norm": 1.9787790775299072,
"learning_rate": 9.344161989655553e-06,
"loss": 0.541,
"step": 15000
},
{
"epoch": 0.3321966780332197,
"grad_norm": 1.3281339406967163,
"learning_rate": 9.339760096841642e-06,
"loss": 0.539,
"step": 15100
},
{
"epoch": 0.33439665603343965,
"grad_norm": 2.091588020324707,
"learning_rate": 9.335358204027732e-06,
"loss": 0.5393,
"step": 15200
},
{
"epoch": 0.33659663403365964,
"grad_norm": 1.912660837173462,
"learning_rate": 9.330956311213822e-06,
"loss": 0.5168,
"step": 15300
},
{
"epoch": 0.3387966120338797,
"grad_norm": 1.7248882055282593,
"learning_rate": 9.326554418399912e-06,
"loss": 0.538,
"step": 15400
},
{
"epoch": 0.34099659003409966,
"grad_norm": 1.8949754238128662,
"learning_rate": 9.322152525586003e-06,
"loss": 0.5444,
"step": 15500
},
{
"epoch": 0.34319656803431964,
"grad_norm": 1.4323865175247192,
"learning_rate": 9.317750632772093e-06,
"loss": 0.542,
"step": 15600
},
{
"epoch": 0.3453965460345397,
"grad_norm": 1.7454142570495605,
"learning_rate": 9.313348739958182e-06,
"loss": 0.5346,
"step": 15700
},
{
"epoch": 0.34759652403475966,
"grad_norm": 2.214750289916992,
"learning_rate": 9.308946847144272e-06,
"loss": 0.5391,
"step": 15800
},
{
"epoch": 0.34979650203497964,
"grad_norm": 1.7991106510162354,
"learning_rate": 9.304544954330362e-06,
"loss": 0.551,
"step": 15900
},
{
"epoch": 0.3519964800351996,
"grad_norm": 1.7487062215805054,
"learning_rate": 9.300143061516452e-06,
"loss": 0.5536,
"step": 16000
},
{
"epoch": 0.35419645803541966,
"grad_norm": 1.7137202024459839,
"learning_rate": 9.295741168702543e-06,
"loss": 0.5472,
"step": 16100
},
{
"epoch": 0.35639643603563964,
"grad_norm": 1.569287657737732,
"learning_rate": 9.291339275888633e-06,
"loss": 0.5286,
"step": 16200
},
{
"epoch": 0.3585964140358596,
"grad_norm": 1.805232286453247,
"learning_rate": 9.286937383074723e-06,
"loss": 0.535,
"step": 16300
},
{
"epoch": 0.36079639203607966,
"grad_norm": 1.8445895910263062,
"learning_rate": 9.282535490260814e-06,
"loss": 0.5297,
"step": 16400
},
{
"epoch": 0.36299637003629964,
"grad_norm": 1.8282471895217896,
"learning_rate": 9.278133597446904e-06,
"loss": 0.5341,
"step": 16500
},
{
"epoch": 0.3651963480365196,
"grad_norm": 1.5979552268981934,
"learning_rate": 9.273731704632994e-06,
"loss": 0.5471,
"step": 16600
},
{
"epoch": 0.3673963260367396,
"grad_norm": 1.6148823499679565,
"learning_rate": 9.269329811819083e-06,
"loss": 0.534,
"step": 16700
},
{
"epoch": 0.36959630403695964,
"grad_norm": 1.7306467294692993,
"learning_rate": 9.264927919005173e-06,
"loss": 0.5475,
"step": 16800
},
{
"epoch": 0.3717962820371796,
"grad_norm": 1.5774517059326172,
"learning_rate": 9.260526026191263e-06,
"loss": 0.5604,
"step": 16900
},
{
"epoch": 0.3739962600373996,
"grad_norm": 1.6581697463989258,
"learning_rate": 9.256124133377354e-06,
"loss": 0.5474,
"step": 17000
},
{
"epoch": 0.37619623803761965,
"grad_norm": 1.8324202299118042,
"learning_rate": 9.251722240563444e-06,
"loss": 0.5341,
"step": 17100
},
{
"epoch": 0.37839621603783963,
"grad_norm": 1.7121940851211548,
"learning_rate": 9.247320347749532e-06,
"loss": 0.5538,
"step": 17200
},
{
"epoch": 0.3805961940380596,
"grad_norm": 1.8483502864837646,
"learning_rate": 9.242918454935623e-06,
"loss": 0.5231,
"step": 17300
},
{
"epoch": 0.3827961720382796,
"grad_norm": 1.7600507736206055,
"learning_rate": 9.238516562121713e-06,
"loss": 0.5581,
"step": 17400
},
{
"epoch": 0.38499615003849963,
"grad_norm": 1.779398798942566,
"learning_rate": 9.234114669307803e-06,
"loss": 0.5468,
"step": 17500
},
{
"epoch": 0.3871961280387196,
"grad_norm": 1.7732363939285278,
"learning_rate": 9.229712776493894e-06,
"loss": 0.558,
"step": 17600
},
{
"epoch": 0.3893961060389396,
"grad_norm": 1.7597503662109375,
"learning_rate": 9.225310883679984e-06,
"loss": 0.5231,
"step": 17700
},
{
"epoch": 0.39159608403915963,
"grad_norm": 1.8344216346740723,
"learning_rate": 9.220908990866072e-06,
"loss": 0.5428,
"step": 17800
},
{
"epoch": 0.3937960620393796,
"grad_norm": 1.662919044494629,
"learning_rate": 9.216507098052163e-06,
"loss": 0.5314,
"step": 17900
},
{
"epoch": 0.3959960400395996,
"grad_norm": 1.3180632591247559,
"learning_rate": 9.212105205238253e-06,
"loss": 0.5335,
"step": 18000
},
{
"epoch": 0.3981960180398196,
"grad_norm": 1.8466808795928955,
"learning_rate": 9.207703312424343e-06,
"loss": 0.5251,
"step": 18100
},
{
"epoch": 0.4003959960400396,
"grad_norm": 1.942530632019043,
"learning_rate": 9.203301419610434e-06,
"loss": 0.5361,
"step": 18200
},
{
"epoch": 0.4025959740402596,
"grad_norm": 1.6795586347579956,
"learning_rate": 9.198899526796524e-06,
"loss": 0.5322,
"step": 18300
},
{
"epoch": 0.4047959520404796,
"grad_norm": 1.8028258085250854,
"learning_rate": 9.194497633982612e-06,
"loss": 0.5332,
"step": 18400
},
{
"epoch": 0.4069959300406996,
"grad_norm": 1.9072916507720947,
"learning_rate": 9.190095741168703e-06,
"loss": 0.5436,
"step": 18500
},
{
"epoch": 0.4091959080409196,
"grad_norm": 1.849950909614563,
"learning_rate": 9.185693848354793e-06,
"loss": 0.5464,
"step": 18600
},
{
"epoch": 0.4113958860411396,
"grad_norm": 1.8676297664642334,
"learning_rate": 9.181291955540883e-06,
"loss": 0.5598,
"step": 18700
},
{
"epoch": 0.41359586404135956,
"grad_norm": 1.8260865211486816,
"learning_rate": 9.176890062726974e-06,
"loss": 0.5433,
"step": 18800
},
{
"epoch": 0.4157958420415796,
"grad_norm": 1.6370753049850464,
"learning_rate": 9.172488169913064e-06,
"loss": 0.5473,
"step": 18900
},
{
"epoch": 0.4179958200417996,
"grad_norm": 1.583030104637146,
"learning_rate": 9.168086277099152e-06,
"loss": 0.5478,
"step": 19000
},
{
"epoch": 0.42019579804201956,
"grad_norm": 1.895065188407898,
"learning_rate": 9.163684384285243e-06,
"loss": 0.5391,
"step": 19100
},
{
"epoch": 0.4223957760422396,
"grad_norm": 1.6694116592407227,
"learning_rate": 9.159282491471333e-06,
"loss": 0.5206,
"step": 19200
},
{
"epoch": 0.4245957540424596,
"grad_norm": 1.630575180053711,
"learning_rate": 9.154880598657423e-06,
"loss": 0.5451,
"step": 19300
},
{
"epoch": 0.42679573204267957,
"grad_norm": 2.0224249362945557,
"learning_rate": 9.150478705843514e-06,
"loss": 0.5334,
"step": 19400
},
{
"epoch": 0.42899571004289955,
"grad_norm": 1.6329941749572754,
"learning_rate": 9.146076813029602e-06,
"loss": 0.5279,
"step": 19500
},
{
"epoch": 0.4311956880431196,
"grad_norm": 1.3999661207199097,
"learning_rate": 9.141674920215694e-06,
"loss": 0.5366,
"step": 19600
},
{
"epoch": 0.43339566604333957,
"grad_norm": 1.5041108131408691,
"learning_rate": 9.137273027401784e-06,
"loss": 0.5324,
"step": 19700
},
{
"epoch": 0.43559564404355955,
"grad_norm": 1.714513897895813,
"learning_rate": 9.132871134587875e-06,
"loss": 0.5341,
"step": 19800
},
{
"epoch": 0.4377956220437796,
"grad_norm": 1.7554248571395874,
"learning_rate": 9.128469241773963e-06,
"loss": 0.5436,
"step": 19900
},
{
"epoch": 0.43999560004399957,
"grad_norm": 1.665436029434204,
"learning_rate": 9.124067348960054e-06,
"loss": 0.5299,
"step": 20000
},
{
"epoch": 0.44219557804421955,
"grad_norm": 1.668437123298645,
"learning_rate": 9.119665456146144e-06,
"loss": 0.5188,
"step": 20100
},
{
"epoch": 0.44439555604443953,
"grad_norm": 1.9339295625686646,
"learning_rate": 9.115263563332234e-06,
"loss": 0.5574,
"step": 20200
},
{
"epoch": 0.44659553404465957,
"grad_norm": 1.7263190746307373,
"learning_rate": 9.110861670518324e-06,
"loss": 0.5469,
"step": 20300
},
{
"epoch": 0.44879551204487955,
"grad_norm": 1.5733555555343628,
"learning_rate": 9.106459777704415e-06,
"loss": 0.529,
"step": 20400
},
{
"epoch": 0.45099549004509953,
"grad_norm": 1.6786284446716309,
"learning_rate": 9.102057884890503e-06,
"loss": 0.539,
"step": 20500
},
{
"epoch": 0.45319546804531957,
"grad_norm": 1.6025316715240479,
"learning_rate": 9.097655992076594e-06,
"loss": 0.5394,
"step": 20600
},
{
"epoch": 0.45539544604553955,
"grad_norm": 1.7945187091827393,
"learning_rate": 9.093254099262684e-06,
"loss": 0.5233,
"step": 20700
},
{
"epoch": 0.45759542404575954,
"grad_norm": 1.6407737731933594,
"learning_rate": 9.088852206448774e-06,
"loss": 0.547,
"step": 20800
},
{
"epoch": 0.4597954020459795,
"grad_norm": 1.623547911643982,
"learning_rate": 9.084450313634864e-06,
"loss": 0.5609,
"step": 20900
},
{
"epoch": 0.46199538004619956,
"grad_norm": 1.7454668283462524,
"learning_rate": 9.080048420820953e-06,
"loss": 0.5484,
"step": 21000
},
{
"epoch": 0.46419535804641954,
"grad_norm": 2.0362443923950195,
"learning_rate": 9.075646528007043e-06,
"loss": 0.5199,
"step": 21100
},
{
"epoch": 0.4663953360466395,
"grad_norm": 1.8968782424926758,
"learning_rate": 9.071244635193134e-06,
"loss": 0.5471,
"step": 21200
},
{
"epoch": 0.46859531404685956,
"grad_norm": 1.7040385007858276,
"learning_rate": 9.066842742379224e-06,
"loss": 0.5167,
"step": 21300
},
{
"epoch": 0.47079529204707954,
"grad_norm": 1.8420989513397217,
"learning_rate": 9.062440849565314e-06,
"loss": 0.5359,
"step": 21400
},
{
"epoch": 0.4729952700472995,
"grad_norm": 1.6311464309692383,
"learning_rate": 9.058038956751404e-06,
"loss": 0.5375,
"step": 21500
},
{
"epoch": 0.4751952480475195,
"grad_norm": 2.0437209606170654,
"learning_rate": 9.053637063937493e-06,
"loss": 0.5427,
"step": 21600
},
{
"epoch": 0.47739522604773954,
"grad_norm": 1.6111825704574585,
"learning_rate": 9.049235171123583e-06,
"loss": 0.526,
"step": 21700
},
{
"epoch": 0.4795952040479595,
"grad_norm": 1.3677709102630615,
"learning_rate": 9.044833278309674e-06,
"loss": 0.5328,
"step": 21800
},
{
"epoch": 0.4817951820481795,
"grad_norm": 2.1056365966796875,
"learning_rate": 9.040431385495764e-06,
"loss": 0.5391,
"step": 21900
},
{
"epoch": 0.48399516004839954,
"grad_norm": 1.807760238647461,
"learning_rate": 9.036029492681854e-06,
"loss": 0.5606,
"step": 22000
},
{
"epoch": 0.4861951380486195,
"grad_norm": 1.8556056022644043,
"learning_rate": 9.031627599867944e-06,
"loss": 0.5351,
"step": 22100
},
{
"epoch": 0.4883951160488395,
"grad_norm": 2.0106847286224365,
"learning_rate": 9.027225707054033e-06,
"loss": 0.5542,
"step": 22200
},
{
"epoch": 0.4905950940490595,
"grad_norm": 1.6676563024520874,
"learning_rate": 9.022823814240123e-06,
"loss": 0.538,
"step": 22300
},
{
"epoch": 0.4927950720492795,
"grad_norm": 1.4103186130523682,
"learning_rate": 9.018421921426214e-06,
"loss": 0.5241,
"step": 22400
},
{
"epoch": 0.4949950500494995,
"grad_norm": 1.8032267093658447,
"learning_rate": 9.014020028612304e-06,
"loss": 0.5367,
"step": 22500
},
{
"epoch": 0.4971950280497195,
"grad_norm": 1.6195557117462158,
"learning_rate": 9.009618135798394e-06,
"loss": 0.5434,
"step": 22600
},
{
"epoch": 0.4993950060499395,
"grad_norm": 1.7808386087417603,
"learning_rate": 9.005216242984484e-06,
"loss": 0.5421,
"step": 22700
},
{
"epoch": 0.5015949840501595,
"grad_norm": 1.746341586112976,
"learning_rate": 9.000814350170573e-06,
"loss": 0.5362,
"step": 22800
},
{
"epoch": 0.5037949620503795,
"grad_norm": 2.1744487285614014,
"learning_rate": 8.996412457356663e-06,
"loss": 0.5243,
"step": 22900
},
{
"epoch": 0.5059949400505995,
"grad_norm": 1.7973219156265259,
"learning_rate": 8.992010564542755e-06,
"loss": 0.5504,
"step": 23000
},
{
"epoch": 0.5081949180508195,
"grad_norm": 1.6203027963638306,
"learning_rate": 8.987608671728844e-06,
"loss": 0.5426,
"step": 23100
},
{
"epoch": 0.5103948960510395,
"grad_norm": 1.6453986167907715,
"learning_rate": 8.983206778914934e-06,
"loss": 0.548,
"step": 23200
},
{
"epoch": 0.5125948740512595,
"grad_norm": 1.8163201808929443,
"learning_rate": 8.978804886101024e-06,
"loss": 0.5306,
"step": 23300
},
{
"epoch": 0.5147948520514795,
"grad_norm": 1.7606194019317627,
"learning_rate": 8.974402993287115e-06,
"loss": 0.5318,
"step": 23400
},
{
"epoch": 0.5169948300516994,
"grad_norm": 1.9621275663375854,
"learning_rate": 8.970001100473205e-06,
"loss": 0.5289,
"step": 23500
},
{
"epoch": 0.5191948080519195,
"grad_norm": 1.707217812538147,
"learning_rate": 8.965599207659295e-06,
"loss": 0.5374,
"step": 23600
},
{
"epoch": 0.5213947860521395,
"grad_norm": 1.9041409492492676,
"learning_rate": 8.961197314845384e-06,
"loss": 0.5512,
"step": 23700
},
{
"epoch": 0.5235947640523595,
"grad_norm": 1.7021831274032593,
"learning_rate": 8.956795422031474e-06,
"loss": 0.5363,
"step": 23800
},
{
"epoch": 0.5257947420525795,
"grad_norm": 1.6546313762664795,
"learning_rate": 8.952393529217564e-06,
"loss": 0.5355,
"step": 23900
},
{
"epoch": 0.5279947200527995,
"grad_norm": 2.1298437118530273,
"learning_rate": 8.947991636403655e-06,
"loss": 0.5336,
"step": 24000
},
{
"epoch": 0.5301946980530194,
"grad_norm": 1.6351710557937622,
"learning_rate": 8.943589743589745e-06,
"loss": 0.5298,
"step": 24100
},
{
"epoch": 0.5323946760532394,
"grad_norm": 1.7850167751312256,
"learning_rate": 8.939187850775835e-06,
"loss": 0.5295,
"step": 24200
},
{
"epoch": 0.5345946540534595,
"grad_norm": 1.6639127731323242,
"learning_rate": 8.934785957961924e-06,
"loss": 0.5482,
"step": 24300
},
{
"epoch": 0.5367946320536795,
"grad_norm": 1.6761794090270996,
"learning_rate": 8.930384065148014e-06,
"loss": 0.5398,
"step": 24400
},
{
"epoch": 0.5389946100538995,
"grad_norm": 2.0362918376922607,
"learning_rate": 8.925982172334104e-06,
"loss": 0.5387,
"step": 24500
},
{
"epoch": 0.5411945880541195,
"grad_norm": 1.5029228925704956,
"learning_rate": 8.921580279520195e-06,
"loss": 0.5296,
"step": 24600
},
{
"epoch": 0.5433945660543394,
"grad_norm": 1.7153294086456299,
"learning_rate": 8.917178386706285e-06,
"loss": 0.5395,
"step": 24700
},
{
"epoch": 0.5455945440545594,
"grad_norm": 1.6009351015090942,
"learning_rate": 8.912776493892375e-06,
"loss": 0.5301,
"step": 24800
},
{
"epoch": 0.5477945220547794,
"grad_norm": 1.7909400463104248,
"learning_rate": 8.908374601078464e-06,
"loss": 0.5292,
"step": 24900
},
{
"epoch": 0.5499945000549995,
"grad_norm": 2.1847472190856934,
"learning_rate": 8.903972708264554e-06,
"loss": 0.5326,
"step": 25000
},
{
"epoch": 0.5521944780552195,
"grad_norm": 2.270923614501953,
"learning_rate": 8.899570815450644e-06,
"loss": 0.545,
"step": 25100
},
{
"epoch": 0.5543944560554395,
"grad_norm": 2.044668436050415,
"learning_rate": 8.895168922636735e-06,
"loss": 0.5335,
"step": 25200
},
{
"epoch": 0.5565944340556594,
"grad_norm": 1.9989433288574219,
"learning_rate": 8.890767029822825e-06,
"loss": 0.5516,
"step": 25300
},
{
"epoch": 0.5587944120558794,
"grad_norm": 1.7529683113098145,
"learning_rate": 8.886365137008914e-06,
"loss": 0.5379,
"step": 25400
},
{
"epoch": 0.5609943900560994,
"grad_norm": 1.4954921007156372,
"learning_rate": 8.881963244195004e-06,
"loss": 0.5346,
"step": 25500
},
{
"epoch": 0.5631943680563194,
"grad_norm": 1.7510510683059692,
"learning_rate": 8.877561351381094e-06,
"loss": 0.5186,
"step": 25600
},
{
"epoch": 0.5653943460565395,
"grad_norm": 1.8264451026916504,
"learning_rate": 8.873159458567184e-06,
"loss": 0.5419,
"step": 25700
},
{
"epoch": 0.5675943240567595,
"grad_norm": 2.1004931926727295,
"learning_rate": 8.868757565753275e-06,
"loss": 0.5419,
"step": 25800
},
{
"epoch": 0.5697943020569795,
"grad_norm": 1.9316984415054321,
"learning_rate": 8.864355672939365e-06,
"loss": 0.5209,
"step": 25900
},
{
"epoch": 0.5719942800571994,
"grad_norm": 2.182731866836548,
"learning_rate": 8.859953780125454e-06,
"loss": 0.5356,
"step": 26000
},
{
"epoch": 0.5741942580574194,
"grad_norm": 1.6151630878448486,
"learning_rate": 8.855551887311544e-06,
"loss": 0.5419,
"step": 26100
},
{
"epoch": 0.5763942360576394,
"grad_norm": 1.8083909749984741,
"learning_rate": 8.851149994497634e-06,
"loss": 0.5218,
"step": 26200
},
{
"epoch": 0.5785942140578594,
"grad_norm": 1.6356123685836792,
"learning_rate": 8.846748101683724e-06,
"loss": 0.5256,
"step": 26300
},
{
"epoch": 0.5807941920580795,
"grad_norm": 2.2701175212860107,
"learning_rate": 8.842346208869815e-06,
"loss": 0.534,
"step": 26400
},
{
"epoch": 0.5829941700582995,
"grad_norm": 1.9146398305892944,
"learning_rate": 8.837944316055905e-06,
"loss": 0.5399,
"step": 26500
},
{
"epoch": 0.5851941480585194,
"grad_norm": 1.9954113960266113,
"learning_rate": 8.833542423241995e-06,
"loss": 0.537,
"step": 26600
},
{
"epoch": 0.5873941260587394,
"grad_norm": 1.6357481479644775,
"learning_rate": 8.829140530428086e-06,
"loss": 0.5322,
"step": 26700
},
{
"epoch": 0.5895941040589594,
"grad_norm": 1.7142163515090942,
"learning_rate": 8.824738637614176e-06,
"loss": 0.5475,
"step": 26800
},
{
"epoch": 0.5917940820591794,
"grad_norm": 1.7539161443710327,
"learning_rate": 8.820336744800266e-06,
"loss": 0.523,
"step": 26900
},
{
"epoch": 0.5939940600593994,
"grad_norm": 1.6141777038574219,
"learning_rate": 8.815934851986355e-06,
"loss": 0.5318,
"step": 27000
},
{
"epoch": 0.5961940380596195,
"grad_norm": 2.0629382133483887,
"learning_rate": 8.811532959172445e-06,
"loss": 0.5334,
"step": 27100
},
{
"epoch": 0.5983940160598394,
"grad_norm": 1.999254584312439,
"learning_rate": 8.807131066358535e-06,
"loss": 0.5504,
"step": 27200
},
{
"epoch": 0.6005939940600594,
"grad_norm": 1.8531382083892822,
"learning_rate": 8.802729173544626e-06,
"loss": 0.5376,
"step": 27300
},
{
"epoch": 0.6027939720602794,
"grad_norm": 1.4768983125686646,
"learning_rate": 8.798327280730716e-06,
"loss": 0.5344,
"step": 27400
},
{
"epoch": 0.6049939500604994,
"grad_norm": 1.7571672201156616,
"learning_rate": 8.793925387916804e-06,
"loss": 0.5342,
"step": 27500
},
{
"epoch": 0.6071939280607194,
"grad_norm": 1.7986180782318115,
"learning_rate": 8.789523495102895e-06,
"loss": 0.5474,
"step": 27600
},
{
"epoch": 0.6093939060609393,
"grad_norm": 1.9569381475448608,
"learning_rate": 8.785121602288985e-06,
"loss": 0.5403,
"step": 27700
},
{
"epoch": 0.6115938840611594,
"grad_norm": 2.1773102283477783,
"learning_rate": 8.780719709475075e-06,
"loss": 0.5239,
"step": 27800
},
{
"epoch": 0.6137938620613794,
"grad_norm": 2.050550937652588,
"learning_rate": 8.776317816661166e-06,
"loss": 0.5253,
"step": 27900
},
{
"epoch": 0.6159938400615994,
"grad_norm": 1.7763617038726807,
"learning_rate": 8.771915923847256e-06,
"loss": 0.5283,
"step": 28000
},
{
"epoch": 0.6181938180618194,
"grad_norm": 1.6701637506484985,
"learning_rate": 8.767514031033344e-06,
"loss": 0.5316,
"step": 28100
},
{
"epoch": 0.6203937960620394,
"grad_norm": 1.6922410726547241,
"learning_rate": 8.763112138219435e-06,
"loss": 0.5384,
"step": 28200
},
{
"epoch": 0.6225937740622594,
"grad_norm": 2.3351800441741943,
"learning_rate": 8.758710245405525e-06,
"loss": 0.5462,
"step": 28300
},
{
"epoch": 0.6247937520624793,
"grad_norm": 1.7946525812149048,
"learning_rate": 8.754308352591615e-06,
"loss": 0.5341,
"step": 28400
},
{
"epoch": 0.6269937300626994,
"grad_norm": 1.6485981941223145,
"learning_rate": 8.749906459777706e-06,
"loss": 0.5229,
"step": 28500
},
{
"epoch": 0.6291937080629194,
"grad_norm": 2.138338327407837,
"learning_rate": 8.745504566963796e-06,
"loss": 0.5489,
"step": 28600
},
{
"epoch": 0.6313936860631394,
"grad_norm": 1.7668613195419312,
"learning_rate": 8.741102674149884e-06,
"loss": 0.5239,
"step": 28700
},
{
"epoch": 0.6335936640633594,
"grad_norm": 2.0970587730407715,
"learning_rate": 8.736700781335975e-06,
"loss": 0.5313,
"step": 28800
},
{
"epoch": 0.6357936420635794,
"grad_norm": 1.7800394296646118,
"learning_rate": 8.732298888522065e-06,
"loss": 0.5322,
"step": 28900
},
{
"epoch": 0.6379936200637993,
"grad_norm": 1.7388654947280884,
"learning_rate": 8.727896995708155e-06,
"loss": 0.5291,
"step": 29000
},
{
"epoch": 0.6401935980640193,
"grad_norm": 1.6228729486465454,
"learning_rate": 8.723495102894246e-06,
"loss": 0.5318,
"step": 29100
},
{
"epoch": 0.6423935760642393,
"grad_norm": 2.1541671752929688,
"learning_rate": 8.719093210080334e-06,
"loss": 0.5376,
"step": 29200
},
{
"epoch": 0.6445935540644594,
"grad_norm": 2.0600032806396484,
"learning_rate": 8.714691317266424e-06,
"loss": 0.5342,
"step": 29300
},
{
"epoch": 0.6467935320646794,
"grad_norm": 1.673624873161316,
"learning_rate": 8.710289424452515e-06,
"loss": 0.5533,
"step": 29400
},
{
"epoch": 0.6489935100648994,
"grad_norm": 1.8217624425888062,
"learning_rate": 8.705887531638605e-06,
"loss": 0.526,
"step": 29500
},
{
"epoch": 0.6511934880651193,
"grad_norm": 2.1350643634796143,
"learning_rate": 8.701485638824695e-06,
"loss": 0.5254,
"step": 29600
},
{
"epoch": 0.6533934660653393,
"grad_norm": 1.7675269842147827,
"learning_rate": 8.697083746010786e-06,
"loss": 0.5191,
"step": 29700
},
{
"epoch": 0.6555934440655593,
"grad_norm": 2.134058952331543,
"learning_rate": 8.692681853196874e-06,
"loss": 0.5329,
"step": 29800
},
{
"epoch": 0.6577934220657793,
"grad_norm": 1.6623740196228027,
"learning_rate": 8.688279960382964e-06,
"loss": 0.5287,
"step": 29900
},
{
"epoch": 0.6599934000659994,
"grad_norm": 2.05334210395813,
"learning_rate": 8.683878067569056e-06,
"loss": 0.5393,
"step": 30000
},
{
"epoch": 0.6621933780662194,
"grad_norm": 1.7684849500656128,
"learning_rate": 8.679476174755147e-06,
"loss": 0.527,
"step": 30100
},
{
"epoch": 0.6643933560664393,
"grad_norm": 1.825725793838501,
"learning_rate": 8.675074281941235e-06,
"loss": 0.5314,
"step": 30200
},
{
"epoch": 0.6665933340666593,
"grad_norm": 1.9619163274765015,
"learning_rate": 8.670672389127326e-06,
"loss": 0.5238,
"step": 30300
},
{
"epoch": 0.6687933120668793,
"grad_norm": 1.7254787683486938,
"learning_rate": 8.666270496313416e-06,
"loss": 0.5253,
"step": 30400
},
{
"epoch": 0.6709932900670993,
"grad_norm": 1.739046335220337,
"learning_rate": 8.661868603499506e-06,
"loss": 0.5452,
"step": 30500
},
{
"epoch": 0.6731932680673193,
"grad_norm": 1.9458619356155396,
"learning_rate": 8.657466710685596e-06,
"loss": 0.5253,
"step": 30600
},
{
"epoch": 0.6753932460675394,
"grad_norm": 1.9501069784164429,
"learning_rate": 8.653064817871687e-06,
"loss": 0.5313,
"step": 30700
},
{
"epoch": 0.6775932240677593,
"grad_norm": 1.4754610061645508,
"learning_rate": 8.648662925057775e-06,
"loss": 0.5409,
"step": 30800
},
{
"epoch": 0.6797932020679793,
"grad_norm": 1.7951412200927734,
"learning_rate": 8.644261032243866e-06,
"loss": 0.558,
"step": 30900
},
{
"epoch": 0.6819931800681993,
"grad_norm": 1.5883880853652954,
"learning_rate": 8.639859139429956e-06,
"loss": 0.5668,
"step": 31000
},
{
"epoch": 0.6841931580684193,
"grad_norm": 1.7715564966201782,
"learning_rate": 8.635457246616046e-06,
"loss": 0.5567,
"step": 31100
},
{
"epoch": 0.6863931360686393,
"grad_norm": 1.7103959321975708,
"learning_rate": 8.631055353802136e-06,
"loss": 0.5646,
"step": 31200
},
{
"epoch": 0.6885931140688593,
"grad_norm": 2.053924322128296,
"learning_rate": 8.626653460988225e-06,
"loss": 0.5554,
"step": 31300
},
{
"epoch": 0.6907930920690794,
"grad_norm": 1.3964165449142456,
"learning_rate": 8.622251568174315e-06,
"loss": 0.5341,
"step": 31400
},
{
"epoch": 0.6929930700692993,
"grad_norm": 1.623286485671997,
"learning_rate": 8.617849675360406e-06,
"loss": 0.5475,
"step": 31500
},
{
"epoch": 0.6951930480695193,
"grad_norm": 1.5909929275512695,
"learning_rate": 8.613447782546496e-06,
"loss": 0.543,
"step": 31600
},
{
"epoch": 0.6973930260697393,
"grad_norm": 1.6793596744537354,
"learning_rate": 8.609045889732586e-06,
"loss": 0.5642,
"step": 31700
},
{
"epoch": 0.6995930040699593,
"grad_norm": 1.5003210306167603,
"learning_rate": 8.604643996918676e-06,
"loss": 0.5528,
"step": 31800
},
{
"epoch": 0.7017929820701793,
"grad_norm": 1.6098058223724365,
"learning_rate": 8.600242104104765e-06,
"loss": 0.5591,
"step": 31900
},
{
"epoch": 0.7039929600703992,
"grad_norm": 1.8180344104766846,
"learning_rate": 8.595840211290855e-06,
"loss": 0.5575,
"step": 32000
},
{
"epoch": 0.7061929380706193,
"grad_norm": 1.6185832023620605,
"learning_rate": 8.591438318476946e-06,
"loss": 0.5555,
"step": 32100
},
{
"epoch": 0.7083929160708393,
"grad_norm": 1.7686482667922974,
"learning_rate": 8.587036425663036e-06,
"loss": 0.5562,
"step": 32200
},
{
"epoch": 0.7105928940710593,
"grad_norm": 1.6809719800949097,
"learning_rate": 8.582634532849126e-06,
"loss": 0.5519,
"step": 32300
},
{
"epoch": 0.7127928720712793,
"grad_norm": 1.8532384634017944,
"learning_rate": 8.578232640035216e-06,
"loss": 0.5466,
"step": 32400
},
{
"epoch": 0.7149928500714993,
"grad_norm": 1.6389007568359375,
"learning_rate": 8.573830747221305e-06,
"loss": 0.5527,
"step": 32500
},
{
"epoch": 0.7171928280717192,
"grad_norm": 1.6388925313949585,
"learning_rate": 8.569428854407395e-06,
"loss": 0.5439,
"step": 32600
},
{
"epoch": 0.7193928060719392,
"grad_norm": 1.7384296655654907,
"learning_rate": 8.565026961593486e-06,
"loss": 0.5375,
"step": 32700
},
{
"epoch": 0.7215927840721593,
"grad_norm": 1.7327488660812378,
"learning_rate": 8.560625068779576e-06,
"loss": 0.5548,
"step": 32800
},
{
"epoch": 0.7237927620723793,
"grad_norm": 1.564349889755249,
"learning_rate": 8.556223175965666e-06,
"loss": 0.5573,
"step": 32900
},
{
"epoch": 0.7259927400725993,
"grad_norm": 1.8052953481674194,
"learning_rate": 8.551821283151756e-06,
"loss": 0.524,
"step": 33000
},
{
"epoch": 0.7281927180728193,
"grad_norm": 1.5981229543685913,
"learning_rate": 8.547419390337845e-06,
"loss": 0.5449,
"step": 33100
},
{
"epoch": 0.7303926960730392,
"grad_norm": 1.4789613485336304,
"learning_rate": 8.543017497523935e-06,
"loss": 0.5356,
"step": 33200
},
{
"epoch": 0.7325926740732592,
"grad_norm": 1.8192943334579468,
"learning_rate": 8.538615604710026e-06,
"loss": 0.5691,
"step": 33300
},
{
"epoch": 0.7347926520734792,
"grad_norm": 1.874607801437378,
"learning_rate": 8.534213711896116e-06,
"loss": 0.5539,
"step": 33400
},
{
"epoch": 0.7369926300736993,
"grad_norm": 1.6394860744476318,
"learning_rate": 8.529811819082206e-06,
"loss": 0.5653,
"step": 33500
},
{
"epoch": 0.7391926080739193,
"grad_norm": 1.9063067436218262,
"learning_rate": 8.525409926268296e-06,
"loss": 0.5515,
"step": 33600
},
{
"epoch": 0.7413925860741393,
"grad_norm": 1.6854544878005981,
"learning_rate": 8.521008033454387e-06,
"loss": 0.5534,
"step": 33700
},
{
"epoch": 0.7435925640743593,
"grad_norm": 1.7821418046951294,
"learning_rate": 8.516606140640477e-06,
"loss": 0.5521,
"step": 33800
},
{
"epoch": 0.7457925420745792,
"grad_norm": 1.5063166618347168,
"learning_rate": 8.512204247826567e-06,
"loss": 0.5667,
"step": 33900
},
{
"epoch": 0.7479925200747992,
"grad_norm": 1.9604572057724,
"learning_rate": 8.507802355012656e-06,
"loss": 0.5434,
"step": 34000
},
{
"epoch": 0.7501924980750192,
"grad_norm": 1.8538181781768799,
"learning_rate": 8.503400462198746e-06,
"loss": 0.5366,
"step": 34100
},
{
"epoch": 0.7523924760752393,
"grad_norm": 1.8284313678741455,
"learning_rate": 8.498998569384836e-06,
"loss": 0.5549,
"step": 34200
},
{
"epoch": 0.7545924540754593,
"grad_norm": 1.5392765998840332,
"learning_rate": 8.494596676570927e-06,
"loss": 0.5459,
"step": 34300
},
{
"epoch": 0.7567924320756793,
"grad_norm": 1.601608157157898,
"learning_rate": 8.490194783757017e-06,
"loss": 0.5478,
"step": 34400
},
{
"epoch": 0.7589924100758992,
"grad_norm": 1.602129340171814,
"learning_rate": 8.485792890943107e-06,
"loss": 0.5264,
"step": 34500
},
{
"epoch": 0.7611923880761192,
"grad_norm": 1.5455442667007446,
"learning_rate": 8.481390998129196e-06,
"loss": 0.5452,
"step": 34600
},
{
"epoch": 0.7633923660763392,
"grad_norm": 1.7308459281921387,
"learning_rate": 8.476989105315286e-06,
"loss": 0.5346,
"step": 34700
},
{
"epoch": 0.7655923440765592,
"grad_norm": 1.9421132802963257,
"learning_rate": 8.472587212501376e-06,
"loss": 0.5502,
"step": 34800
},
{
"epoch": 0.7677923220767793,
"grad_norm": 1.6126275062561035,
"learning_rate": 8.468185319687467e-06,
"loss": 0.5531,
"step": 34900
},
{
"epoch": 0.7699923000769993,
"grad_norm": 1.9307098388671875,
"learning_rate": 8.463783426873557e-06,
"loss": 0.5451,
"step": 35000
},
{
"epoch": 0.7721922780772192,
"grad_norm": 1.785501480102539,
"learning_rate": 8.459381534059646e-06,
"loss": 0.5657,
"step": 35100
},
{
"epoch": 0.7743922560774392,
"grad_norm": 1.3118321895599365,
"learning_rate": 8.454979641245736e-06,
"loss": 0.5425,
"step": 35200
},
{
"epoch": 0.7765922340776592,
"grad_norm": 1.6785212755203247,
"learning_rate": 8.450577748431826e-06,
"loss": 0.5608,
"step": 35300
},
{
"epoch": 0.7787922120778792,
"grad_norm": 1.687156081199646,
"learning_rate": 8.446175855617916e-06,
"loss": 0.5268,
"step": 35400
},
{
"epoch": 0.7809921900780992,
"grad_norm": 1.6766939163208008,
"learning_rate": 8.441773962804007e-06,
"loss": 0.5505,
"step": 35500
},
{
"epoch": 0.7831921680783193,
"grad_norm": 1.3873755931854248,
"learning_rate": 8.437372069990097e-06,
"loss": 0.5346,
"step": 35600
},
{
"epoch": 0.7853921460785392,
"grad_norm": 1.4507646560668945,
"learning_rate": 8.432970177176186e-06,
"loss": 0.5456,
"step": 35700
},
{
"epoch": 0.7875921240787592,
"grad_norm": 1.7354850769042969,
"learning_rate": 8.428568284362276e-06,
"loss": 0.5502,
"step": 35800
},
{
"epoch": 0.7897921020789792,
"grad_norm": 1.4922300577163696,
"learning_rate": 8.424166391548366e-06,
"loss": 0.5628,
"step": 35900
},
{
"epoch": 0.7919920800791992,
"grad_norm": 1.722380518913269,
"learning_rate": 8.419764498734456e-06,
"loss": 0.5556,
"step": 36000
},
{
"epoch": 0.7941920580794192,
"grad_norm": 1.905194640159607,
"learning_rate": 8.415362605920547e-06,
"loss": 0.5529,
"step": 36100
},
{
"epoch": 0.7963920360796392,
"grad_norm": 2.140815496444702,
"learning_rate": 8.410960713106637e-06,
"loss": 0.5567,
"step": 36200
},
{
"epoch": 0.7985920140798592,
"grad_norm": 1.5261491537094116,
"learning_rate": 8.406558820292726e-06,
"loss": 0.554,
"step": 36300
},
{
"epoch": 0.8007919920800792,
"grad_norm": 1.6273101568222046,
"learning_rate": 8.402156927478816e-06,
"loss": 0.5534,
"step": 36400
},
{
"epoch": 0.8029919700802992,
"grad_norm": 1.7818236351013184,
"learning_rate": 8.397755034664906e-06,
"loss": 0.5408,
"step": 36500
},
{
"epoch": 0.8051919480805192,
"grad_norm": 1.9317457675933838,
"learning_rate": 8.393353141850996e-06,
"loss": 0.5726,
"step": 36600
},
{
"epoch": 0.8073919260807392,
"grad_norm": 1.813769817352295,
"learning_rate": 8.388951249037087e-06,
"loss": 0.5605,
"step": 36700
},
{
"epoch": 0.8095919040809592,
"grad_norm": 1.9883424043655396,
"learning_rate": 8.384549356223177e-06,
"loss": 0.5489,
"step": 36800
},
{
"epoch": 0.8117918820811791,
"grad_norm": 1.709024429321289,
"learning_rate": 8.380147463409267e-06,
"loss": 0.5411,
"step": 36900
},
{
"epoch": 0.8139918600813992,
"grad_norm": 1.4431244134902954,
"learning_rate": 8.375745570595357e-06,
"loss": 0.5472,
"step": 37000
},
{
"epoch": 0.8161918380816192,
"grad_norm": 1.5251537561416626,
"learning_rate": 8.371343677781448e-06,
"loss": 0.5479,
"step": 37100
},
{
"epoch": 0.8183918160818392,
"grad_norm": 1.687023401260376,
"learning_rate": 8.366941784967536e-06,
"loss": 0.543,
"step": 37200
},
{
"epoch": 0.8205917940820592,
"grad_norm": 1.5462446212768555,
"learning_rate": 8.362539892153627e-06,
"loss": 0.55,
"step": 37300
},
{
"epoch": 0.8227917720822792,
"grad_norm": 1.984750747680664,
"learning_rate": 8.358137999339717e-06,
"loss": 0.5495,
"step": 37400
},
{
"epoch": 0.8249917500824991,
"grad_norm": 1.6375317573547363,
"learning_rate": 8.353736106525807e-06,
"loss": 0.5479,
"step": 37500
},
{
"epoch": 0.8271917280827191,
"grad_norm": 1.8285633325576782,
"learning_rate": 8.349334213711897e-06,
"loss": 0.5398,
"step": 37600
},
{
"epoch": 0.8293917060829392,
"grad_norm": 1.7603964805603027,
"learning_rate": 8.344932320897988e-06,
"loss": 0.5343,
"step": 37700
},
{
"epoch": 0.8315916840831592,
"grad_norm": 1.4836808443069458,
"learning_rate": 8.340530428084076e-06,
"loss": 0.5559,
"step": 37800
},
{
"epoch": 0.8337916620833792,
"grad_norm": 1.4867973327636719,
"learning_rate": 8.336128535270167e-06,
"loss": 0.5433,
"step": 37900
},
{
"epoch": 0.8359916400835992,
"grad_norm": 1.784264326095581,
"learning_rate": 8.331726642456257e-06,
"loss": 0.5451,
"step": 38000
},
{
"epoch": 0.8381916180838191,
"grad_norm": 1.3747423887252808,
"learning_rate": 8.327324749642347e-06,
"loss": 0.538,
"step": 38100
},
{
"epoch": 0.8403915960840391,
"grad_norm": 1.8073352575302124,
"learning_rate": 8.322922856828437e-06,
"loss": 0.545,
"step": 38200
},
{
"epoch": 0.8425915740842591,
"grad_norm": 1.6162651777267456,
"learning_rate": 8.318520964014528e-06,
"loss": 0.5448,
"step": 38300
},
{
"epoch": 0.8447915520844792,
"grad_norm": 1.6627821922302246,
"learning_rate": 8.314119071200616e-06,
"loss": 0.5504,
"step": 38400
},
{
"epoch": 0.8469915300846992,
"grad_norm": 1.594759464263916,
"learning_rate": 8.309717178386707e-06,
"loss": 0.5344,
"step": 38500
},
{
"epoch": 0.8491915080849192,
"grad_norm": 1.7449952363967896,
"learning_rate": 8.305315285572797e-06,
"loss": 0.5558,
"step": 38600
},
{
"epoch": 0.8513914860851391,
"grad_norm": 1.6787577867507935,
"learning_rate": 8.300913392758887e-06,
"loss": 0.5282,
"step": 38700
},
{
"epoch": 0.8535914640853591,
"grad_norm": 2.2145471572875977,
"learning_rate": 8.296511499944977e-06,
"loss": 0.5371,
"step": 38800
},
{
"epoch": 0.8557914420855791,
"grad_norm": 1.7959023714065552,
"learning_rate": 8.292109607131068e-06,
"loss": 0.5467,
"step": 38900
},
{
"epoch": 0.8579914200857991,
"grad_norm": 1.7362741231918335,
"learning_rate": 8.287707714317156e-06,
"loss": 0.5334,
"step": 39000
},
{
"epoch": 0.8601913980860192,
"grad_norm": 1.471660852432251,
"learning_rate": 8.283305821503247e-06,
"loss": 0.5563,
"step": 39100
},
{
"epoch": 0.8623913760862392,
"grad_norm": 1.9247560501098633,
"learning_rate": 8.278903928689337e-06,
"loss": 0.5422,
"step": 39200
},
{
"epoch": 0.8645913540864592,
"grad_norm": 1.4459770917892456,
"learning_rate": 8.274502035875427e-06,
"loss": 0.5549,
"step": 39300
},
{
"epoch": 0.8667913320866791,
"grad_norm": 1.8843663930892944,
"learning_rate": 8.270100143061517e-06,
"loss": 0.5463,
"step": 39400
},
{
"epoch": 0.8689913100868991,
"grad_norm": 1.6664437055587769,
"learning_rate": 8.265698250247606e-06,
"loss": 0.557,
"step": 39500
},
{
"epoch": 0.8711912880871191,
"grad_norm": 1.8281344175338745,
"learning_rate": 8.261296357433696e-06,
"loss": 0.5306,
"step": 39600
},
{
"epoch": 0.8733912660873391,
"grad_norm": 1.9608473777770996,
"learning_rate": 8.256894464619787e-06,
"loss": 0.5458,
"step": 39700
},
{
"epoch": 0.8755912440875592,
"grad_norm": 1.9003684520721436,
"learning_rate": 8.252492571805877e-06,
"loss": 0.55,
"step": 39800
},
{
"epoch": 0.8777912220877792,
"grad_norm": 1.8628289699554443,
"learning_rate": 8.248090678991967e-06,
"loss": 0.5379,
"step": 39900
},
{
"epoch": 0.8799912000879991,
"grad_norm": 1.5854053497314453,
"learning_rate": 8.243688786178057e-06,
"loss": 0.5352,
"step": 40000
},
{
"epoch": 0.8821911780882191,
"grad_norm": 1.957435965538025,
"learning_rate": 8.239286893364146e-06,
"loss": 0.5358,
"step": 40100
},
{
"epoch": 0.8843911560884391,
"grad_norm": 1.838132381439209,
"learning_rate": 8.234885000550236e-06,
"loss": 0.5423,
"step": 40200
},
{
"epoch": 0.8865911340886591,
"grad_norm": 1.936266541481018,
"learning_rate": 8.230483107736327e-06,
"loss": 0.5335,
"step": 40300
},
{
"epoch": 0.8887911120888791,
"grad_norm": 1.5629870891571045,
"learning_rate": 8.226081214922419e-06,
"loss": 0.5354,
"step": 40400
},
{
"epoch": 0.8909910900890992,
"grad_norm": 1.7080520391464233,
"learning_rate": 8.221679322108507e-06,
"loss": 0.5532,
"step": 40500
},
{
"epoch": 0.8931910680893191,
"grad_norm": 1.795921802520752,
"learning_rate": 8.217277429294597e-06,
"loss": 0.5528,
"step": 40600
},
{
"epoch": 0.8953910460895391,
"grad_norm": 1.955198884010315,
"learning_rate": 8.212875536480688e-06,
"loss": 0.5598,
"step": 40700
},
{
"epoch": 0.8975910240897591,
"grad_norm": 1.865143895149231,
"learning_rate": 8.208473643666778e-06,
"loss": 0.5371,
"step": 40800
},
{
"epoch": 0.8997910020899791,
"grad_norm": 1.8305407762527466,
"learning_rate": 8.204071750852868e-06,
"loss": 0.5459,
"step": 40900
},
{
"epoch": 0.9019909800901991,
"grad_norm": 2.158996820449829,
"learning_rate": 8.199669858038959e-06,
"loss": 0.5477,
"step": 41000
},
{
"epoch": 0.904190958090419,
"grad_norm": 1.5184693336486816,
"learning_rate": 8.195267965225047e-06,
"loss": 0.5536,
"step": 41100
},
{
"epoch": 0.9063909360906391,
"grad_norm": 1.2580761909484863,
"learning_rate": 8.190866072411137e-06,
"loss": 0.5444,
"step": 41200
},
{
"epoch": 0.9085909140908591,
"grad_norm": 1.5662882328033447,
"learning_rate": 8.186464179597228e-06,
"loss": 0.5474,
"step": 41300
},
{
"epoch": 0.9107908920910791,
"grad_norm": 1.775161623954773,
"learning_rate": 8.182062286783318e-06,
"loss": 0.5405,
"step": 41400
},
{
"epoch": 0.9129908700912991,
"grad_norm": 1.604435920715332,
"learning_rate": 8.177660393969408e-06,
"loss": 0.5425,
"step": 41500
},
{
"epoch": 0.9151908480915191,
"grad_norm": 1.9549158811569214,
"learning_rate": 8.173258501155497e-06,
"loss": 0.5398,
"step": 41600
},
{
"epoch": 0.917390826091739,
"grad_norm": 1.4547535181045532,
"learning_rate": 8.168856608341587e-06,
"loss": 0.5511,
"step": 41700
},
{
"epoch": 0.919590804091959,
"grad_norm": 1.8771201372146606,
"learning_rate": 8.164454715527677e-06,
"loss": 0.5481,
"step": 41800
},
{
"epoch": 0.9217907820921791,
"grad_norm": 2.0473129749298096,
"learning_rate": 8.160052822713768e-06,
"loss": 0.5418,
"step": 41900
},
{
"epoch": 0.9239907600923991,
"grad_norm": 1.8082759380340576,
"learning_rate": 8.155650929899858e-06,
"loss": 0.5346,
"step": 42000
},
{
"epoch": 0.9261907380926191,
"grad_norm": 1.8849467039108276,
"learning_rate": 8.151249037085948e-06,
"loss": 0.5563,
"step": 42100
},
{
"epoch": 0.9283907160928391,
"grad_norm": 1.6767569780349731,
"learning_rate": 8.146847144272037e-06,
"loss": 0.536,
"step": 42200
},
{
"epoch": 0.9305906940930591,
"grad_norm": 1.9930092096328735,
"learning_rate": 8.142445251458127e-06,
"loss": 0.5507,
"step": 42300
},
{
"epoch": 0.932790672093279,
"grad_norm": 1.9420870542526245,
"learning_rate": 8.138043358644217e-06,
"loss": 0.5405,
"step": 42400
},
{
"epoch": 0.934990650093499,
"grad_norm": 1.6965640783309937,
"learning_rate": 8.133641465830308e-06,
"loss": 0.5469,
"step": 42500
},
{
"epoch": 0.9371906280937191,
"grad_norm": 1.4808323383331299,
"learning_rate": 8.129239573016398e-06,
"loss": 0.5341,
"step": 42600
},
{
"epoch": 0.9393906060939391,
"grad_norm": 1.516119122505188,
"learning_rate": 8.124837680202488e-06,
"loss": 0.5515,
"step": 42700
},
{
"epoch": 0.9415905840941591,
"grad_norm": 1.6243934631347656,
"learning_rate": 8.120435787388577e-06,
"loss": 0.541,
"step": 42800
},
{
"epoch": 0.9437905620943791,
"grad_norm": 1.6918444633483887,
"learning_rate": 8.116033894574667e-06,
"loss": 0.5302,
"step": 42900
},
{
"epoch": 0.945990540094599,
"grad_norm": 1.6359889507293701,
"learning_rate": 8.111632001760757e-06,
"loss": 0.5295,
"step": 43000
},
{
"epoch": 0.948190518094819,
"grad_norm": 1.7587625980377197,
"learning_rate": 8.107230108946848e-06,
"loss": 0.5415,
"step": 43100
},
{
"epoch": 0.950390496095039,
"grad_norm": 1.8017805814743042,
"learning_rate": 8.102828216132938e-06,
"loss": 0.5422,
"step": 43200
},
{
"epoch": 0.9525904740952591,
"grad_norm": 1.970982313156128,
"learning_rate": 8.098426323319027e-06,
"loss": 0.5296,
"step": 43300
},
{
"epoch": 0.9547904520954791,
"grad_norm": 1.8112688064575195,
"learning_rate": 8.094024430505117e-06,
"loss": 0.5539,
"step": 43400
},
{
"epoch": 0.9569904300956991,
"grad_norm": 1.7808321714401245,
"learning_rate": 8.089622537691207e-06,
"loss": 0.5498,
"step": 43500
},
{
"epoch": 0.959190408095919,
"grad_norm": 1.9657952785491943,
"learning_rate": 8.085220644877297e-06,
"loss": 0.5424,
"step": 43600
},
{
"epoch": 0.961390386096139,
"grad_norm": 1.8520526885986328,
"learning_rate": 8.080818752063388e-06,
"loss": 0.5392,
"step": 43700
},
{
"epoch": 0.963590364096359,
"grad_norm": 1.7919948101043701,
"learning_rate": 8.076416859249478e-06,
"loss": 0.532,
"step": 43800
},
{
"epoch": 0.965790342096579,
"grad_norm": 1.600967288017273,
"learning_rate": 8.072014966435568e-06,
"loss": 0.5406,
"step": 43900
},
{
"epoch": 0.9679903200967991,
"grad_norm": 1.638075351715088,
"learning_rate": 8.067613073621659e-06,
"loss": 0.553,
"step": 44000
},
{
"epoch": 0.9701902980970191,
"grad_norm": 1.5249767303466797,
"learning_rate": 8.063211180807749e-06,
"loss": 0.5533,
"step": 44100
},
{
"epoch": 0.972390276097239,
"grad_norm": 1.6304973363876343,
"learning_rate": 8.05880928799384e-06,
"loss": 0.5377,
"step": 44200
},
{
"epoch": 0.974590254097459,
"grad_norm": 1.8152045011520386,
"learning_rate": 8.054407395179928e-06,
"loss": 0.5284,
"step": 44300
},
{
"epoch": 0.976790232097679,
"grad_norm": 1.652199625968933,
"learning_rate": 8.050005502366018e-06,
"loss": 0.5448,
"step": 44400
},
{
"epoch": 0.978990210097899,
"grad_norm": 1.7338589429855347,
"learning_rate": 8.045603609552108e-06,
"loss": 0.5395,
"step": 44500
},
{
"epoch": 0.981190188098119,
"grad_norm": 1.5801849365234375,
"learning_rate": 8.041201716738199e-06,
"loss": 0.5297,
"step": 44600
},
{
"epoch": 0.9833901660983391,
"grad_norm": 2.031813621520996,
"learning_rate": 8.036799823924289e-06,
"loss": 0.5617,
"step": 44700
},
{
"epoch": 0.985590144098559,
"grad_norm": 1.934370756149292,
"learning_rate": 8.03239793111038e-06,
"loss": 0.5329,
"step": 44800
},
{
"epoch": 0.987790122098779,
"grad_norm": 1.849741816520691,
"learning_rate": 8.027996038296468e-06,
"loss": 0.5413,
"step": 44900
},
{
"epoch": 0.989990100098999,
"grad_norm": 1.757784366607666,
"learning_rate": 8.023594145482558e-06,
"loss": 0.5319,
"step": 45000
},
{
"epoch": 0.992190078099219,
"grad_norm": 1.6084299087524414,
"learning_rate": 8.019192252668648e-06,
"loss": 0.5465,
"step": 45100
},
{
"epoch": 0.994390056099439,
"grad_norm": 1.9279767274856567,
"learning_rate": 8.014790359854739e-06,
"loss": 0.5425,
"step": 45200
},
{
"epoch": 0.996590034099659,
"grad_norm": 1.5739712715148926,
"learning_rate": 8.010388467040829e-06,
"loss": 0.5471,
"step": 45300
},
{
"epoch": 0.998790012099879,
"grad_norm": 1.5087926387786865,
"learning_rate": 8.005986574226917e-06,
"loss": 0.5417,
"step": 45400
},
{
"epoch": 1.000989990100099,
"grad_norm": 2.411069393157959,
"learning_rate": 8.001584681413008e-06,
"loss": 0.5328,
"step": 45500
},
{
"epoch": 1.003189968100319,
"grad_norm": 2.560279607772827,
"learning_rate": 7.997182788599098e-06,
"loss": 0.5018,
"step": 45600
},
{
"epoch": 1.005389946100539,
"grad_norm": 1.8764352798461914,
"learning_rate": 7.992780895785188e-06,
"loss": 0.4947,
"step": 45700
},
{
"epoch": 1.007589924100759,
"grad_norm": 2.0531773567199707,
"learning_rate": 7.988379002971279e-06,
"loss": 0.5016,
"step": 45800
},
{
"epoch": 1.009789902100979,
"grad_norm": 2.1719043254852295,
"learning_rate": 7.983977110157369e-06,
"loss": 0.504,
"step": 45900
},
{
"epoch": 1.011989880101199,
"grad_norm": 1.8235334157943726,
"learning_rate": 7.979575217343457e-06,
"loss": 0.4967,
"step": 46000
},
{
"epoch": 1.014189858101419,
"grad_norm": 2.329827308654785,
"learning_rate": 7.975173324529548e-06,
"loss": 0.5121,
"step": 46100
},
{
"epoch": 1.016389836101639,
"grad_norm": 2.2712931632995605,
"learning_rate": 7.970771431715638e-06,
"loss": 0.4901,
"step": 46200
},
{
"epoch": 1.018589814101859,
"grad_norm": 1.9942501783370972,
"learning_rate": 7.966369538901728e-06,
"loss": 0.5052,
"step": 46300
},
{
"epoch": 1.020789792102079,
"grad_norm": 2.014451742172241,
"learning_rate": 7.961967646087819e-06,
"loss": 0.5117,
"step": 46400
},
{
"epoch": 1.022989770102299,
"grad_norm": 2.1809909343719482,
"learning_rate": 7.957565753273909e-06,
"loss": 0.5106,
"step": 46500
},
{
"epoch": 1.025189748102519,
"grad_norm": 1.6118221282958984,
"learning_rate": 7.953163860459997e-06,
"loss": 0.4959,
"step": 46600
},
{
"epoch": 1.027389726102739,
"grad_norm": 1.9853328466415405,
"learning_rate": 7.948761967646088e-06,
"loss": 0.5127,
"step": 46700
},
{
"epoch": 1.029589704102959,
"grad_norm": 2.3931078910827637,
"learning_rate": 7.944360074832178e-06,
"loss": 0.5084,
"step": 46800
},
{
"epoch": 1.031789682103179,
"grad_norm": 1.6679604053497314,
"learning_rate": 7.939958182018268e-06,
"loss": 0.4913,
"step": 46900
},
{
"epoch": 1.0339896601033989,
"grad_norm": 2.377412796020508,
"learning_rate": 7.935556289204359e-06,
"loss": 0.4915,
"step": 47000
},
{
"epoch": 1.0361896381036189,
"grad_norm": 2.0759618282318115,
"learning_rate": 7.931154396390449e-06,
"loss": 0.5011,
"step": 47100
},
{
"epoch": 1.038389616103839,
"grad_norm": 2.061979055404663,
"learning_rate": 7.926752503576537e-06,
"loss": 0.4945,
"step": 47200
},
{
"epoch": 1.040589594104059,
"grad_norm": 1.912423849105835,
"learning_rate": 7.92235061076263e-06,
"loss": 0.496,
"step": 47300
},
{
"epoch": 1.042789572104279,
"grad_norm": 2.3455774784088135,
"learning_rate": 7.91794871794872e-06,
"loss": 0.5063,
"step": 47400
},
{
"epoch": 1.044989550104499,
"grad_norm": 1.7976536750793457,
"learning_rate": 7.913546825134808e-06,
"loss": 0.5053,
"step": 47500
},
{
"epoch": 1.047189528104719,
"grad_norm": 2.056267023086548,
"learning_rate": 7.909144932320899e-06,
"loss": 0.4939,
"step": 47600
},
{
"epoch": 1.049389506104939,
"grad_norm": 2.216721534729004,
"learning_rate": 7.904743039506989e-06,
"loss": 0.5007,
"step": 47700
},
{
"epoch": 1.051589484105159,
"grad_norm": 1.4782536029815674,
"learning_rate": 7.90034114669308e-06,
"loss": 0.4765,
"step": 47800
},
{
"epoch": 1.053789462105379,
"grad_norm": 1.739716649055481,
"learning_rate": 7.89593925387917e-06,
"loss": 0.5245,
"step": 47900
},
{
"epoch": 1.055989440105599,
"grad_norm": 1.5695744752883911,
"learning_rate": 7.89153736106526e-06,
"loss": 0.511,
"step": 48000
},
{
"epoch": 1.058189418105819,
"grad_norm": 2.0835139751434326,
"learning_rate": 7.887135468251348e-06,
"loss": 0.4989,
"step": 48100
},
{
"epoch": 1.0603893961060389,
"grad_norm": 1.9040948152542114,
"learning_rate": 7.882733575437439e-06,
"loss": 0.5001,
"step": 48200
},
{
"epoch": 1.0625893741062589,
"grad_norm": 2.1570136547088623,
"learning_rate": 7.878331682623529e-06,
"loss": 0.5031,
"step": 48300
},
{
"epoch": 1.0647893521064788,
"grad_norm": 1.8248552083969116,
"learning_rate": 7.873929789809619e-06,
"loss": 0.504,
"step": 48400
},
{
"epoch": 1.0669893301066988,
"grad_norm": 1.8128606081008911,
"learning_rate": 7.86952789699571e-06,
"loss": 0.4825,
"step": 48500
},
{
"epoch": 1.069189308106919,
"grad_norm": 2.15380597114563,
"learning_rate": 7.8651260041818e-06,
"loss": 0.4843,
"step": 48600
},
{
"epoch": 1.071389286107139,
"grad_norm": 2.4410858154296875,
"learning_rate": 7.860724111367888e-06,
"loss": 0.4973,
"step": 48700
},
{
"epoch": 1.073589264107359,
"grad_norm": 1.9602640867233276,
"learning_rate": 7.856322218553979e-06,
"loss": 0.5039,
"step": 48800
},
{
"epoch": 1.075789242107579,
"grad_norm": 2.189321994781494,
"learning_rate": 7.851920325740069e-06,
"loss": 0.5002,
"step": 48900
},
{
"epoch": 1.077989220107799,
"grad_norm": 2.153059244155884,
"learning_rate": 7.847518432926159e-06,
"loss": 0.5074,
"step": 49000
},
{
"epoch": 1.080189198108019,
"grad_norm": 1.9804766178131104,
"learning_rate": 7.84311654011225e-06,
"loss": 0.4981,
"step": 49100
},
{
"epoch": 1.082389176108239,
"grad_norm": 2.228227376937866,
"learning_rate": 7.838714647298338e-06,
"loss": 0.5115,
"step": 49200
},
{
"epoch": 1.084589154108459,
"grad_norm": 2.639230489730835,
"learning_rate": 7.834312754484428e-06,
"loss": 0.4956,
"step": 49300
},
{
"epoch": 1.086789132108679,
"grad_norm": 2.2388269901275635,
"learning_rate": 7.829910861670519e-06,
"loss": 0.4957,
"step": 49400
},
{
"epoch": 1.0889891101088989,
"grad_norm": 2.2344448566436768,
"learning_rate": 7.825508968856609e-06,
"loss": 0.5191,
"step": 49500
},
{
"epoch": 1.0911890881091189,
"grad_norm": 2.1383955478668213,
"learning_rate": 7.821107076042699e-06,
"loss": 0.5035,
"step": 49600
},
{
"epoch": 1.0933890661093388,
"grad_norm": 2.0469112396240234,
"learning_rate": 7.81670518322879e-06,
"loss": 0.4991,
"step": 49700
},
{
"epoch": 1.0955890441095588,
"grad_norm": 2.091733694076538,
"learning_rate": 7.812303290414878e-06,
"loss": 0.5213,
"step": 49800
},
{
"epoch": 1.0977890221097788,
"grad_norm": 2.2485196590423584,
"learning_rate": 7.807901397600968e-06,
"loss": 0.5159,
"step": 49900
},
{
"epoch": 1.099989000109999,
"grad_norm": 2.335508108139038,
"learning_rate": 7.803499504787059e-06,
"loss": 0.5035,
"step": 50000
},
{
"epoch": 1.099989000109999,
"eval_loss": 0.579010546207428,
"eval_runtime": 378.8096,
"eval_samples_per_second": 158.391,
"eval_steps_per_second": 4.95,
"step": 50000
},
{
"epoch": 1.102188978110219,
"grad_norm": 2.1119778156280518,
"learning_rate": 7.799097611973149e-06,
"loss": 0.5081,
"step": 50100
},
{
"epoch": 1.104388956110439,
"grad_norm": 2.182777166366577,
"learning_rate": 7.794695719159239e-06,
"loss": 0.4925,
"step": 50200
},
{
"epoch": 1.106588934110659,
"grad_norm": 2.2675302028656006,
"learning_rate": 7.79029382634533e-06,
"loss": 0.4865,
"step": 50300
},
{
"epoch": 1.108788912110879,
"grad_norm": 1.858472228050232,
"learning_rate": 7.785891933531418e-06,
"loss": 0.5118,
"step": 50400
},
{
"epoch": 1.110988890111099,
"grad_norm": 1.8882789611816406,
"learning_rate": 7.781490040717508e-06,
"loss": 0.5087,
"step": 50500
},
{
"epoch": 1.113188868111319,
"grad_norm": 1.9170640707015991,
"learning_rate": 7.777088147903599e-06,
"loss": 0.491,
"step": 50600
},
{
"epoch": 1.1153888461115389,
"grad_norm": 1.9825174808502197,
"learning_rate": 7.772686255089689e-06,
"loss": 0.5072,
"step": 50700
},
{
"epoch": 1.1175888241117589,
"grad_norm": 2.3916232585906982,
"learning_rate": 7.768284362275779e-06,
"loss": 0.5111,
"step": 50800
},
{
"epoch": 1.1197888021119788,
"grad_norm": 2.069160223007202,
"learning_rate": 7.76388246946187e-06,
"loss": 0.4927,
"step": 50900
},
{
"epoch": 1.1219887801121988,
"grad_norm": 1.780382752418518,
"learning_rate": 7.75948057664796e-06,
"loss": 0.4959,
"step": 51000
},
{
"epoch": 1.1241887581124188,
"grad_norm": 2.5268094539642334,
"learning_rate": 7.75507868383405e-06,
"loss": 0.4975,
"step": 51100
},
{
"epoch": 1.1263887361126388,
"grad_norm": 1.9989362955093384,
"learning_rate": 7.75067679102014e-06,
"loss": 0.504,
"step": 51200
},
{
"epoch": 1.1285887141128588,
"grad_norm": 2.230954647064209,
"learning_rate": 7.746274898206229e-06,
"loss": 0.5172,
"step": 51300
},
{
"epoch": 1.1307886921130788,
"grad_norm": 2.2332351207733154,
"learning_rate": 7.741873005392319e-06,
"loss": 0.5026,
"step": 51400
},
{
"epoch": 1.132988670113299,
"grad_norm": 2.234415054321289,
"learning_rate": 7.73747111257841e-06,
"loss": 0.5169,
"step": 51500
},
{
"epoch": 1.135188648113519,
"grad_norm": 1.9074784517288208,
"learning_rate": 7.7330692197645e-06,
"loss": 0.4878,
"step": 51600
},
{
"epoch": 1.137388626113739,
"grad_norm": 1.9809048175811768,
"learning_rate": 7.72866732695059e-06,
"loss": 0.4794,
"step": 51700
},
{
"epoch": 1.139588604113959,
"grad_norm": 1.90762460231781,
"learning_rate": 7.72426543413668e-06,
"loss": 0.4996,
"step": 51800
},
{
"epoch": 1.1417885821141789,
"grad_norm": 2.3830220699310303,
"learning_rate": 7.719863541322769e-06,
"loss": 0.5028,
"step": 51900
},
{
"epoch": 1.1439885601143989,
"grad_norm": 2.052335023880005,
"learning_rate": 7.715461648508859e-06,
"loss": 0.5189,
"step": 52000
},
{
"epoch": 1.1461885381146188,
"grad_norm": 2.3055222034454346,
"learning_rate": 7.71105975569495e-06,
"loss": 0.5117,
"step": 52100
},
{
"epoch": 1.1483885161148388,
"grad_norm": 2.7478485107421875,
"learning_rate": 7.70665786288104e-06,
"loss": 0.503,
"step": 52200
},
{
"epoch": 1.1505884941150588,
"grad_norm": 1.8724684715270996,
"learning_rate": 7.70225597006713e-06,
"loss": 0.5017,
"step": 52300
},
{
"epoch": 1.1527884721152788,
"grad_norm": 2.1905338764190674,
"learning_rate": 7.69785407725322e-06,
"loss": 0.4995,
"step": 52400
},
{
"epoch": 1.1549884501154988,
"grad_norm": 2.169680118560791,
"learning_rate": 7.693452184439309e-06,
"loss": 0.5012,
"step": 52500
},
{
"epoch": 1.1571884281157188,
"grad_norm": 2.3531687259674072,
"learning_rate": 7.689050291625399e-06,
"loss": 0.4835,
"step": 52600
},
{
"epoch": 1.159388406115939,
"grad_norm": 1.9876978397369385,
"learning_rate": 7.68464839881149e-06,
"loss": 0.4949,
"step": 52700
},
{
"epoch": 1.161588384116159,
"grad_norm": 2.463718891143799,
"learning_rate": 7.68024650599758e-06,
"loss": 0.5121,
"step": 52800
},
{
"epoch": 1.163788362116379,
"grad_norm": 2.4976985454559326,
"learning_rate": 7.67584461318367e-06,
"loss": 0.4882,
"step": 52900
},
{
"epoch": 1.165988340116599,
"grad_norm": 1.968513011932373,
"learning_rate": 7.67144272036976e-06,
"loss": 0.5052,
"step": 53000
},
{
"epoch": 1.168188318116819,
"grad_norm": 1.998396396636963,
"learning_rate": 7.667040827555849e-06,
"loss": 0.4912,
"step": 53100
},
{
"epoch": 1.1703882961170389,
"grad_norm": 2.0211946964263916,
"learning_rate": 7.662638934741939e-06,
"loss": 0.5087,
"step": 53200
},
{
"epoch": 1.1725882741172589,
"grad_norm": 1.97858464717865,
"learning_rate": 7.65823704192803e-06,
"loss": 0.5015,
"step": 53300
},
{
"epoch": 1.1747882521174788,
"grad_norm": 2.1665027141571045,
"learning_rate": 7.65383514911412e-06,
"loss": 0.5088,
"step": 53400
},
{
"epoch": 1.1769882301176988,
"grad_norm": 2.3747305870056152,
"learning_rate": 7.64943325630021e-06,
"loss": 0.4971,
"step": 53500
},
{
"epoch": 1.1791882081179188,
"grad_norm": 2.0653445720672607,
"learning_rate": 7.645031363486299e-06,
"loss": 0.4999,
"step": 53600
},
{
"epoch": 1.1813881861181388,
"grad_norm": 2.0202314853668213,
"learning_rate": 7.640629470672389e-06,
"loss": 0.4857,
"step": 53700
},
{
"epoch": 1.1835881641183588,
"grad_norm": 2.1644513607025146,
"learning_rate": 7.636227577858479e-06,
"loss": 0.4925,
"step": 53800
},
{
"epoch": 1.1857881421185787,
"grad_norm": 2.2284882068634033,
"learning_rate": 7.63182568504457e-06,
"loss": 0.5076,
"step": 53900
},
{
"epoch": 1.1879881201187987,
"grad_norm": 1.9216992855072021,
"learning_rate": 7.62742379223066e-06,
"loss": 0.4937,
"step": 54000
},
{
"epoch": 1.1901880981190187,
"grad_norm": 2.151033401489258,
"learning_rate": 7.623021899416749e-06,
"loss": 0.5042,
"step": 54100
},
{
"epoch": 1.1923880761192387,
"grad_norm": 2.544735908508301,
"learning_rate": 7.618620006602839e-06,
"loss": 0.5016,
"step": 54200
},
{
"epoch": 1.194588054119459,
"grad_norm": 2.404811382293701,
"learning_rate": 7.6142181137889306e-06,
"loss": 0.4859,
"step": 54300
},
{
"epoch": 1.1967880321196789,
"grad_norm": 2.071399450302124,
"learning_rate": 7.60981622097502e-06,
"loss": 0.5009,
"step": 54400
},
{
"epoch": 1.1989880101198989,
"grad_norm": 2.0729258060455322,
"learning_rate": 7.60541432816111e-06,
"loss": 0.5068,
"step": 54500
},
{
"epoch": 1.2011879881201188,
"grad_norm": 1.9438556432724,
"learning_rate": 7.6010124353472006e-06,
"loss": 0.5151,
"step": 54600
},
{
"epoch": 1.2033879661203388,
"grad_norm": 2.3928163051605225,
"learning_rate": 7.59661054253329e-06,
"loss": 0.5152,
"step": 54700
},
{
"epoch": 1.2055879441205588,
"grad_norm": 2.0218889713287354,
"learning_rate": 7.59220864971938e-06,
"loss": 0.4935,
"step": 54800
},
{
"epoch": 1.2077879221207788,
"grad_norm": 2.0265040397644043,
"learning_rate": 7.5878067569054706e-06,
"loss": 0.4995,
"step": 54900
},
{
"epoch": 1.2099879001209988,
"grad_norm": 2.6148312091827393,
"learning_rate": 7.58340486409156e-06,
"loss": 0.5082,
"step": 55000
},
{
"epoch": 1.2121878781212188,
"grad_norm": 2.4383389949798584,
"learning_rate": 7.57900297127765e-06,
"loss": 0.4982,
"step": 55100
},
{
"epoch": 1.2143878561214387,
"grad_norm": 2.649778366088867,
"learning_rate": 7.5746010784637406e-06,
"loss": 0.4974,
"step": 55200
},
{
"epoch": 1.2165878341216587,
"grad_norm": 2.525026559829712,
"learning_rate": 7.57019918564983e-06,
"loss": 0.4953,
"step": 55300
},
{
"epoch": 1.2187878121218787,
"grad_norm": 2.795290470123291,
"learning_rate": 7.56579729283592e-06,
"loss": 0.5118,
"step": 55400
},
{
"epoch": 1.220987790122099,
"grad_norm": 1.8484504222869873,
"learning_rate": 7.5613954000220105e-06,
"loss": 0.4897,
"step": 55500
},
{
"epoch": 1.2231877681223189,
"grad_norm": 2.673802614212036,
"learning_rate": 7.5569935072081e-06,
"loss": 0.4856,
"step": 55600
},
{
"epoch": 1.2253877461225389,
"grad_norm": 2.250032663345337,
"learning_rate": 7.55259161439419e-06,
"loss": 0.4942,
"step": 55700
},
{
"epoch": 1.2275877241227588,
"grad_norm": 2.281285285949707,
"learning_rate": 7.5481897215802805e-06,
"loss": 0.492,
"step": 55800
},
{
"epoch": 1.2297877021229788,
"grad_norm": 2.1768269538879395,
"learning_rate": 7.54378782876637e-06,
"loss": 0.5014,
"step": 55900
},
{
"epoch": 1.2319876801231988,
"grad_norm": 2.172852039337158,
"learning_rate": 7.53938593595246e-06,
"loss": 0.5055,
"step": 56000
},
{
"epoch": 1.2341876581234188,
"grad_norm": 2.2055068016052246,
"learning_rate": 7.5349840431385505e-06,
"loss": 0.4994,
"step": 56100
},
{
"epoch": 1.2363876361236388,
"grad_norm": 2.2056238651275635,
"learning_rate": 7.53058215032464e-06,
"loss": 0.5082,
"step": 56200
},
{
"epoch": 1.2385876141238588,
"grad_norm": 1.8684000968933105,
"learning_rate": 7.52618025751073e-06,
"loss": 0.5001,
"step": 56300
},
{
"epoch": 1.2407875921240787,
"grad_norm": 1.8799563646316528,
"learning_rate": 7.52177836469682e-06,
"loss": 0.4863,
"step": 56400
},
{
"epoch": 1.2429875701242987,
"grad_norm": 2.0053553581237793,
"learning_rate": 7.51737647188291e-06,
"loss": 0.5019,
"step": 56500
},
{
"epoch": 1.2451875481245187,
"grad_norm": 2.526304244995117,
"learning_rate": 7.512974579069e-06,
"loss": 0.4783,
"step": 56600
},
{
"epoch": 1.2473875261247387,
"grad_norm": 2.2301254272460938,
"learning_rate": 7.50857268625509e-06,
"loss": 0.4975,
"step": 56700
},
{
"epoch": 1.2495875041249587,
"grad_norm": 1.8377426862716675,
"learning_rate": 7.50417079344118e-06,
"loss": 0.4929,
"step": 56800
},
{
"epoch": 1.2517874821251787,
"grad_norm": 2.6410109996795654,
"learning_rate": 7.49976890062727e-06,
"loss": 0.4816,
"step": 56900
},
{
"epoch": 1.2539874601253986,
"grad_norm": 2.0295798778533936,
"learning_rate": 7.49536700781336e-06,
"loss": 0.5038,
"step": 57000
},
{
"epoch": 1.2561874381256186,
"grad_norm": 2.7886478900909424,
"learning_rate": 7.49096511499945e-06,
"loss": 0.5147,
"step": 57100
},
{
"epoch": 1.2583874161258388,
"grad_norm": 2.330388307571411,
"learning_rate": 7.48656322218554e-06,
"loss": 0.4929,
"step": 57200
},
{
"epoch": 1.2605873941260588,
"grad_norm": 1.756525993347168,
"learning_rate": 7.48216132937163e-06,
"loss": 0.4873,
"step": 57300
},
{
"epoch": 1.2627873721262788,
"grad_norm": 1.7345948219299316,
"learning_rate": 7.47775943655772e-06,
"loss": 0.4906,
"step": 57400
},
{
"epoch": 1.2649873501264988,
"grad_norm": 2.1234254837036133,
"learning_rate": 7.47335754374381e-06,
"loss": 0.5082,
"step": 57500
},
{
"epoch": 1.2671873281267187,
"grad_norm": 1.7519376277923584,
"learning_rate": 7.4689556509299e-06,
"loss": 0.5061,
"step": 57600
},
{
"epoch": 1.2693873061269387,
"grad_norm": 2.4111804962158203,
"learning_rate": 7.464553758115991e-06,
"loss": 0.4903,
"step": 57700
},
{
"epoch": 1.2715872841271587,
"grad_norm": 1.9729013442993164,
"learning_rate": 7.460151865302081e-06,
"loss": 0.4881,
"step": 57800
},
{
"epoch": 1.2737872621273787,
"grad_norm": 2.7246460914611816,
"learning_rate": 7.455749972488171e-06,
"loss": 0.517,
"step": 57900
},
{
"epoch": 1.2759872401275987,
"grad_norm": 1.660434603691101,
"learning_rate": 7.451348079674261e-06,
"loss": 0.502,
"step": 58000
},
{
"epoch": 1.2781872181278187,
"grad_norm": 2.782742500305176,
"learning_rate": 7.446946186860351e-06,
"loss": 0.4985,
"step": 58100
},
{
"epoch": 1.2803871961280386,
"grad_norm": 2.264404296875,
"learning_rate": 7.4425442940464405e-06,
"loss": 0.5016,
"step": 58200
},
{
"epoch": 1.2825871741282588,
"grad_norm": 2.0111939907073975,
"learning_rate": 7.438142401232531e-06,
"loss": 0.4905,
"step": 58300
},
{
"epoch": 1.2847871521284788,
"grad_norm": 2.0050606727600098,
"learning_rate": 7.433740508418621e-06,
"loss": 0.4864,
"step": 58400
},
{
"epoch": 1.2869871301286988,
"grad_norm": 1.3107115030288696,
"learning_rate": 7.4293386156047105e-06,
"loss": 0.4915,
"step": 58500
},
{
"epoch": 1.2891871081289188,
"grad_norm": 1.8996055126190186,
"learning_rate": 7.424936722790801e-06,
"loss": 0.49,
"step": 58600
},
{
"epoch": 1.2913870861291388,
"grad_norm": 1.7696682214736938,
"learning_rate": 7.420534829976891e-06,
"loss": 0.4968,
"step": 58700
},
{
"epoch": 1.2935870641293588,
"grad_norm": 2.1315739154815674,
"learning_rate": 7.4161329371629805e-06,
"loss": 0.4916,
"step": 58800
},
{
"epoch": 1.2957870421295787,
"grad_norm": 2.2679789066314697,
"learning_rate": 7.411731044349071e-06,
"loss": 0.5021,
"step": 58900
},
{
"epoch": 1.2979870201297987,
"grad_norm": 2.128899097442627,
"learning_rate": 7.407329151535161e-06,
"loss": 0.5148,
"step": 59000
},
{
"epoch": 1.3001869981300187,
"grad_norm": 2.085585832595825,
"learning_rate": 7.4029272587212505e-06,
"loss": 0.4804,
"step": 59100
},
{
"epoch": 1.3023869761302387,
"grad_norm": 2.367190361022949,
"learning_rate": 7.398525365907341e-06,
"loss": 0.4921,
"step": 59200
},
{
"epoch": 1.3045869541304587,
"grad_norm": 2.3802804946899414,
"learning_rate": 7.394123473093431e-06,
"loss": 0.4974,
"step": 59300
},
{
"epoch": 1.3067869321306786,
"grad_norm": 2.332484483718872,
"learning_rate": 7.3897215802795205e-06,
"loss": 0.5115,
"step": 59400
},
{
"epoch": 1.3089869101308986,
"grad_norm": 2.1906321048736572,
"learning_rate": 7.385319687465611e-06,
"loss": 0.505,
"step": 59500
},
{
"epoch": 1.3111868881311186,
"grad_norm": 1.942108154296875,
"learning_rate": 7.380917794651701e-06,
"loss": 0.4937,
"step": 59600
},
{
"epoch": 1.3133868661313386,
"grad_norm": 2.0868446826934814,
"learning_rate": 7.3765159018377905e-06,
"loss": 0.4963,
"step": 59700
},
{
"epoch": 1.3155868441315586,
"grad_norm": 2.3469884395599365,
"learning_rate": 7.372114009023881e-06,
"loss": 0.5038,
"step": 59800
},
{
"epoch": 1.3177868221317786,
"grad_norm": 2.1203341484069824,
"learning_rate": 7.367712116209971e-06,
"loss": 0.4891,
"step": 59900
},
{
"epoch": 1.3199868001319988,
"grad_norm": 1.7752751111984253,
"learning_rate": 7.3633102233960605e-06,
"loss": 0.5036,
"step": 60000
},
{
"epoch": 1.3221867781322187,
"grad_norm": 2.311631441116333,
"learning_rate": 7.358908330582151e-06,
"loss": 0.5104,
"step": 60100
},
{
"epoch": 1.3243867561324387,
"grad_norm": 1.9225836992263794,
"learning_rate": 7.354506437768241e-06,
"loss": 0.4926,
"step": 60200
},
{
"epoch": 1.3265867341326587,
"grad_norm": 1.9772847890853882,
"learning_rate": 7.3501045449543305e-06,
"loss": 0.4923,
"step": 60300
},
{
"epoch": 1.3287867121328787,
"grad_norm": 1.6036473512649536,
"learning_rate": 7.345702652140421e-06,
"loss": 0.4955,
"step": 60400
},
{
"epoch": 1.3309866901330987,
"grad_norm": 1.8488271236419678,
"learning_rate": 7.34130075932651e-06,
"loss": 0.512,
"step": 60500
},
{
"epoch": 1.3331866681333187,
"grad_norm": 2.149338722229004,
"learning_rate": 7.3368988665126005e-06,
"loss": 0.4914,
"step": 60600
},
{
"epoch": 1.3353866461335386,
"grad_norm": 2.4873788356781006,
"learning_rate": 7.332496973698691e-06,
"loss": 0.4965,
"step": 60700
},
{
"epoch": 1.3375866241337586,
"grad_norm": 2.4446520805358887,
"learning_rate": 7.32809508088478e-06,
"loss": 0.4917,
"step": 60800
},
{
"epoch": 1.3397866021339786,
"grad_norm": 2.2292611598968506,
"learning_rate": 7.3236931880708705e-06,
"loss": 0.4876,
"step": 60900
},
{
"epoch": 1.3419865801341986,
"grad_norm": 2.0160257816314697,
"learning_rate": 7.319291295256961e-06,
"loss": 0.4875,
"step": 61000
},
{
"epoch": 1.3441865581344188,
"grad_norm": 2.0969207286834717,
"learning_rate": 7.31488940244305e-06,
"loss": 0.5031,
"step": 61100
},
{
"epoch": 1.3463865361346388,
"grad_norm": 2.283207416534424,
"learning_rate": 7.310487509629141e-06,
"loss": 0.4907,
"step": 61200
},
{
"epoch": 1.3485865141348587,
"grad_norm": 1.9769617319107056,
"learning_rate": 7.306085616815232e-06,
"loss": 0.4942,
"step": 61300
},
{
"epoch": 1.3507864921350787,
"grad_norm": 2.156163454055786,
"learning_rate": 7.301683724001322e-06,
"loss": 0.4992,
"step": 61400
},
{
"epoch": 1.3529864701352987,
"grad_norm": 1.6328924894332886,
"learning_rate": 7.297281831187411e-06,
"loss": 0.4861,
"step": 61500
},
{
"epoch": 1.3551864481355187,
"grad_norm": 2.365056276321411,
"learning_rate": 7.292879938373502e-06,
"loss": 0.4915,
"step": 61600
},
{
"epoch": 1.3573864261357387,
"grad_norm": 2.6308701038360596,
"learning_rate": 7.288478045559592e-06,
"loss": 0.4837,
"step": 61700
},
{
"epoch": 1.3595864041359587,
"grad_norm": 2.454827070236206,
"learning_rate": 7.284076152745681e-06,
"loss": 0.4921,
"step": 61800
},
{
"epoch": 1.3617863821361786,
"grad_norm": 2.19412899017334,
"learning_rate": 7.279674259931772e-06,
"loss": 0.501,
"step": 61900
},
{
"epoch": 1.3639863601363986,
"grad_norm": 2.183582305908203,
"learning_rate": 7.275272367117862e-06,
"loss": 0.4934,
"step": 62000
},
{
"epoch": 1.3661863381366186,
"grad_norm": 2.2355942726135254,
"learning_rate": 7.270870474303951e-06,
"loss": 0.5037,
"step": 62100
},
{
"epoch": 1.3683863161368386,
"grad_norm": 1.8665735721588135,
"learning_rate": 7.266468581490042e-06,
"loss": 0.5054,
"step": 62200
},
{
"epoch": 1.3705862941370586,
"grad_norm": 2.457763433456421,
"learning_rate": 7.262066688676131e-06,
"loss": 0.4986,
"step": 62300
},
{
"epoch": 1.3727862721372786,
"grad_norm": 2.2373385429382324,
"learning_rate": 7.257664795862221e-06,
"loss": 0.4807,
"step": 62400
},
{
"epoch": 1.3749862501374985,
"grad_norm": 2.129803419113159,
"learning_rate": 7.253262903048312e-06,
"loss": 0.4877,
"step": 62500
},
{
"epoch": 1.3771862281377185,
"grad_norm": 2.2858309745788574,
"learning_rate": 7.248861010234401e-06,
"loss": 0.492,
"step": 62600
},
{
"epoch": 1.3793862061379385,
"grad_norm": 2.4332919120788574,
"learning_rate": 7.244459117420491e-06,
"loss": 0.4907,
"step": 62700
},
{
"epoch": 1.3815861841381587,
"grad_norm": 1.7995531558990479,
"learning_rate": 7.240057224606582e-06,
"loss": 0.5037,
"step": 62800
},
{
"epoch": 1.3837861621383787,
"grad_norm": 2.672942876815796,
"learning_rate": 7.235655331792671e-06,
"loss": 0.4968,
"step": 62900
},
{
"epoch": 1.3859861401385987,
"grad_norm": 2.1194186210632324,
"learning_rate": 7.231253438978761e-06,
"loss": 0.4944,
"step": 63000
},
{
"epoch": 1.3881861181388186,
"grad_norm": 2.5758581161499023,
"learning_rate": 7.226851546164852e-06,
"loss": 0.4844,
"step": 63100
},
{
"epoch": 1.3903860961390386,
"grad_norm": 2.359781503677368,
"learning_rate": 7.222449653350941e-06,
"loss": 0.4847,
"step": 63200
},
{
"epoch": 1.3925860741392586,
"grad_norm": 2.3243279457092285,
"learning_rate": 7.218047760537031e-06,
"loss": 0.4986,
"step": 63300
},
{
"epoch": 1.3947860521394786,
"grad_norm": 2.4134695529937744,
"learning_rate": 7.213645867723122e-06,
"loss": 0.4961,
"step": 63400
},
{
"epoch": 1.3969860301396986,
"grad_norm": 2.3432512283325195,
"learning_rate": 7.209243974909211e-06,
"loss": 0.5028,
"step": 63500
},
{
"epoch": 1.3991860081399186,
"grad_norm": 2.474076747894287,
"learning_rate": 7.204842082095301e-06,
"loss": 0.5004,
"step": 63600
},
{
"epoch": 1.4013859861401385,
"grad_norm": 2.43440580368042,
"learning_rate": 7.200440189281392e-06,
"loss": 0.5031,
"step": 63700
},
{
"epoch": 1.4035859641403585,
"grad_norm": 2.1737067699432373,
"learning_rate": 7.196038296467481e-06,
"loss": 0.4871,
"step": 63800
},
{
"epoch": 1.4057859421405787,
"grad_norm": 1.9419715404510498,
"learning_rate": 7.191636403653571e-06,
"loss": 0.4903,
"step": 63900
},
{
"epoch": 1.4079859201407987,
"grad_norm": 2.1449568271636963,
"learning_rate": 7.187234510839662e-06,
"loss": 0.4819,
"step": 64000
},
{
"epoch": 1.4101858981410187,
"grad_norm": 2.1790225505828857,
"learning_rate": 7.182832618025751e-06,
"loss": 0.5155,
"step": 64100
},
{
"epoch": 1.4123858761412387,
"grad_norm": 2.4493134021759033,
"learning_rate": 7.178430725211841e-06,
"loss": 0.4922,
"step": 64200
},
{
"epoch": 1.4145858541414587,
"grad_norm": 2.250734806060791,
"learning_rate": 7.174028832397932e-06,
"loss": 0.4911,
"step": 64300
},
{
"epoch": 1.4167858321416786,
"grad_norm": 2.312277317047119,
"learning_rate": 7.169626939584021e-06,
"loss": 0.4884,
"step": 64400
},
{
"epoch": 1.4189858101418986,
"grad_norm": 2.0889904499053955,
"learning_rate": 7.165225046770111e-06,
"loss": 0.5023,
"step": 64500
},
{
"epoch": 1.4211857881421186,
"grad_norm": 2.2084124088287354,
"learning_rate": 7.160823153956201e-06,
"loss": 0.4974,
"step": 64600
},
{
"epoch": 1.4233857661423386,
"grad_norm": 2.046213150024414,
"learning_rate": 7.156421261142292e-06,
"loss": 0.4935,
"step": 64700
},
{
"epoch": 1.4255857441425586,
"grad_norm": 2.1457226276397705,
"learning_rate": 7.152019368328382e-06,
"loss": 0.4903,
"step": 64800
},
{
"epoch": 1.4277857221427785,
"grad_norm": 2.058285713195801,
"learning_rate": 7.1476174755144725e-06,
"loss": 0.5002,
"step": 64900
},
{
"epoch": 1.4299857001429985,
"grad_norm": 2.269285202026367,
"learning_rate": 7.143215582700562e-06,
"loss": 0.4891,
"step": 65000
},
{
"epoch": 1.4321856781432185,
"grad_norm": 2.030383586883545,
"learning_rate": 7.138813689886652e-06,
"loss": 0.5101,
"step": 65100
},
{
"epoch": 1.4343856561434385,
"grad_norm": 2.0629866123199463,
"learning_rate": 7.1344117970727425e-06,
"loss": 0.4931,
"step": 65200
},
{
"epoch": 1.4365856341436585,
"grad_norm": 2.064944267272949,
"learning_rate": 7.130009904258832e-06,
"loss": 0.4992,
"step": 65300
},
{
"epoch": 1.4387856121438785,
"grad_norm": 2.1032135486602783,
"learning_rate": 7.125608011444922e-06,
"loss": 0.4919,
"step": 65400
},
{
"epoch": 1.4409855901440984,
"grad_norm": 2.3275599479675293,
"learning_rate": 7.1212061186310125e-06,
"loss": 0.5119,
"step": 65500
},
{
"epoch": 1.4431855681443184,
"grad_norm": 2.2477211952209473,
"learning_rate": 7.116804225817102e-06,
"loss": 0.5092,
"step": 65600
},
{
"epoch": 1.4453855461445386,
"grad_norm": 1.8756898641586304,
"learning_rate": 7.112402333003192e-06,
"loss": 0.4977,
"step": 65700
},
{
"epoch": 1.4475855241447586,
"grad_norm": 2.839963436126709,
"learning_rate": 7.1080004401892825e-06,
"loss": 0.4939,
"step": 65800
},
{
"epoch": 1.4497855021449786,
"grad_norm": 1.8775593042373657,
"learning_rate": 7.103598547375372e-06,
"loss": 0.4851,
"step": 65900
},
{
"epoch": 1.4519854801451986,
"grad_norm": 2.1938886642456055,
"learning_rate": 7.099196654561462e-06,
"loss": 0.4797,
"step": 66000
},
{
"epoch": 1.4541854581454186,
"grad_norm": 2.063523769378662,
"learning_rate": 7.0947947617475525e-06,
"loss": 0.4949,
"step": 66100
},
{
"epoch": 1.4563854361456385,
"grad_norm": 2.156369924545288,
"learning_rate": 7.090392868933642e-06,
"loss": 0.4936,
"step": 66200
},
{
"epoch": 1.4585854141458585,
"grad_norm": 2.4886789321899414,
"learning_rate": 7.085990976119732e-06,
"loss": 0.4979,
"step": 66300
},
{
"epoch": 1.4607853921460785,
"grad_norm": 2.3196351528167725,
"learning_rate": 7.081589083305822e-06,
"loss": 0.5121,
"step": 66400
},
{
"epoch": 1.4629853701462985,
"grad_norm": 2.057623863220215,
"learning_rate": 7.077187190491912e-06,
"loss": 0.4827,
"step": 66500
},
{
"epoch": 1.4651853481465185,
"grad_norm": 1.9187816381454468,
"learning_rate": 7.072785297678002e-06,
"loss": 0.4972,
"step": 66600
},
{
"epoch": 1.4673853261467387,
"grad_norm": 1.9243098497390747,
"learning_rate": 7.068383404864092e-06,
"loss": 0.48,
"step": 66700
},
{
"epoch": 1.4695853041469586,
"grad_norm": 2.221501111984253,
"learning_rate": 7.063981512050182e-06,
"loss": 0.4817,
"step": 66800
},
{
"epoch": 1.4717852821471786,
"grad_norm": 2.145901679992676,
"learning_rate": 7.059579619236272e-06,
"loss": 0.4974,
"step": 66900
},
{
"epoch": 1.4739852601473986,
"grad_norm": 2.7018229961395264,
"learning_rate": 7.055177726422362e-06,
"loss": 0.4776,
"step": 67000
},
{
"epoch": 1.4761852381476186,
"grad_norm": 1.826542854309082,
"learning_rate": 7.050775833608452e-06,
"loss": 0.494,
"step": 67100
},
{
"epoch": 1.4783852161478386,
"grad_norm": 2.528482437133789,
"learning_rate": 7.046373940794542e-06,
"loss": 0.4804,
"step": 67200
},
{
"epoch": 1.4805851941480586,
"grad_norm": 2.3805463314056396,
"learning_rate": 7.041972047980632e-06,
"loss": 0.5,
"step": 67300
},
{
"epoch": 1.4827851721482785,
"grad_norm": 2.379004716873169,
"learning_rate": 7.037570155166722e-06,
"loss": 0.5008,
"step": 67400
},
{
"epoch": 1.4849851501484985,
"grad_norm": 2.351308584213257,
"learning_rate": 7.033168262352812e-06,
"loss": 0.4917,
"step": 67500
},
{
"epoch": 1.4871851281487185,
"grad_norm": 2.390312910079956,
"learning_rate": 7.028766369538902e-06,
"loss": 0.4962,
"step": 67600
},
{
"epoch": 1.4893851061489385,
"grad_norm": 2.4329919815063477,
"learning_rate": 7.024364476724992e-06,
"loss": 0.4877,
"step": 67700
},
{
"epoch": 1.4915850841491585,
"grad_norm": 2.452253580093384,
"learning_rate": 7.019962583911082e-06,
"loss": 0.4908,
"step": 67800
},
{
"epoch": 1.4937850621493785,
"grad_norm": 2.1782665252685547,
"learning_rate": 7.015560691097172e-06,
"loss": 0.4804,
"step": 67900
},
{
"epoch": 1.4959850401495984,
"grad_norm": 2.0464863777160645,
"learning_rate": 7.011158798283262e-06,
"loss": 0.4947,
"step": 68000
},
{
"epoch": 1.4981850181498184,
"grad_norm": 1.713578701019287,
"learning_rate": 7.006756905469353e-06,
"loss": 0.4875,
"step": 68100
},
{
"epoch": 1.5003849961500384,
"grad_norm": 2.025834560394287,
"learning_rate": 7.002355012655443e-06,
"loss": 0.5027,
"step": 68200
},
{
"epoch": 1.5025849741502584,
"grad_norm": 2.509138822555542,
"learning_rate": 6.997953119841533e-06,
"loss": 0.4822,
"step": 68300
},
{
"epoch": 1.5047849521504784,
"grad_norm": 2.0234317779541016,
"learning_rate": 6.993551227027623e-06,
"loss": 0.4975,
"step": 68400
},
{
"epoch": 1.5069849301506983,
"grad_norm": 2.465769052505493,
"learning_rate": 6.9891493342137125e-06,
"loss": 0.5012,
"step": 68500
},
{
"epoch": 1.5091849081509183,
"grad_norm": 2.5200085639953613,
"learning_rate": 6.984747441399803e-06,
"loss": 0.5017,
"step": 68600
},
{
"epoch": 1.5113848861511385,
"grad_norm": 2.2190017700195312,
"learning_rate": 6.980345548585893e-06,
"loss": 0.4898,
"step": 68700
},
{
"epoch": 1.5135848641513585,
"grad_norm": 2.2302262783050537,
"learning_rate": 6.9759436557719825e-06,
"loss": 0.4989,
"step": 68800
},
{
"epoch": 1.5157848421515785,
"grad_norm": 2.4511725902557373,
"learning_rate": 6.971541762958073e-06,
"loss": 0.4934,
"step": 68900
},
{
"epoch": 1.5179848201517985,
"grad_norm": 2.3731210231781006,
"learning_rate": 6.967139870144163e-06,
"loss": 0.4724,
"step": 69000
},
{
"epoch": 1.5201847981520185,
"grad_norm": 2.2834906578063965,
"learning_rate": 6.9627379773302525e-06,
"loss": 0.4833,
"step": 69100
},
{
"epoch": 1.5223847761522384,
"grad_norm": 2.483689785003662,
"learning_rate": 6.958336084516343e-06,
"loss": 0.4923,
"step": 69200
},
{
"epoch": 1.5245847541524584,
"grad_norm": 2.316864490509033,
"learning_rate": 6.953934191702433e-06,
"loss": 0.5233,
"step": 69300
},
{
"epoch": 1.5267847321526786,
"grad_norm": 2.1905770301818848,
"learning_rate": 6.9495322988885225e-06,
"loss": 0.5233,
"step": 69400
},
{
"epoch": 1.5289847101528986,
"grad_norm": 2.5095105171203613,
"learning_rate": 6.945130406074613e-06,
"loss": 0.4927,
"step": 69500
},
{
"epoch": 1.5311846881531186,
"grad_norm": 2.210827112197876,
"learning_rate": 6.940728513260703e-06,
"loss": 0.4965,
"step": 69600
},
{
"epoch": 1.5333846661533386,
"grad_norm": 2.6142313480377197,
"learning_rate": 6.9363266204467925e-06,
"loss": 0.5025,
"step": 69700
},
{
"epoch": 1.5355846441535586,
"grad_norm": 2.3923892974853516,
"learning_rate": 6.931924727632883e-06,
"loss": 0.4793,
"step": 69800
},
{
"epoch": 1.5377846221537785,
"grad_norm": 2.1831846237182617,
"learning_rate": 6.927522834818973e-06,
"loss": 0.4935,
"step": 69900
},
{
"epoch": 1.5399846001539985,
"grad_norm": 2.030944347381592,
"learning_rate": 6.9231209420050625e-06,
"loss": 0.494,
"step": 70000
},
{
"epoch": 1.5421845781542185,
"grad_norm": 2.089087724685669,
"learning_rate": 6.918719049191153e-06,
"loss": 0.4989,
"step": 70100
},
{
"epoch": 1.5443845561544385,
"grad_norm": 2.7058706283569336,
"learning_rate": 6.914317156377243e-06,
"loss": 0.4982,
"step": 70200
},
{
"epoch": 1.5465845341546585,
"grad_norm": 2.312584638595581,
"learning_rate": 6.9099152635633325e-06,
"loss": 0.4981,
"step": 70300
},
{
"epoch": 1.5487845121548784,
"grad_norm": 2.5172085762023926,
"learning_rate": 6.905513370749423e-06,
"loss": 0.4871,
"step": 70400
},
{
"epoch": 1.5509844901550984,
"grad_norm": 2.035313367843628,
"learning_rate": 6.901111477935512e-06,
"loss": 0.4859,
"step": 70500
},
{
"epoch": 1.5531844681553184,
"grad_norm": 2.3374691009521484,
"learning_rate": 6.8967095851216025e-06,
"loss": 0.4831,
"step": 70600
},
{
"epoch": 1.5553844461555384,
"grad_norm": 2.2027342319488525,
"learning_rate": 6.892307692307693e-06,
"loss": 0.4974,
"step": 70700
},
{
"epoch": 1.5575844241557584,
"grad_norm": 2.4372105598449707,
"learning_rate": 6.887905799493782e-06,
"loss": 0.4902,
"step": 70800
},
{
"epoch": 1.5597844021559784,
"grad_norm": 2.320554256439209,
"learning_rate": 6.8835039066798725e-06,
"loss": 0.4917,
"step": 70900
},
{
"epoch": 1.5619843801561983,
"grad_norm": 2.323988437652588,
"learning_rate": 6.879102013865963e-06,
"loss": 0.5034,
"step": 71000
},
{
"epoch": 1.5641843581564183,
"grad_norm": 2.111454725265503,
"learning_rate": 6.874700121052052e-06,
"loss": 0.492,
"step": 71100
},
{
"epoch": 1.5663843361566383,
"grad_norm": 2.664884328842163,
"learning_rate": 6.8702982282381425e-06,
"loss": 0.4982,
"step": 71200
},
{
"epoch": 1.5685843141568583,
"grad_norm": 1.9500539302825928,
"learning_rate": 6.865896335424233e-06,
"loss": 0.5147,
"step": 71300
},
{
"epoch": 1.5707842921570783,
"grad_norm": 2.3592636585235596,
"learning_rate": 6.861494442610322e-06,
"loss": 0.4825,
"step": 71400
},
{
"epoch": 1.5729842701572985,
"grad_norm": 2.4548308849334717,
"learning_rate": 6.8570925497964125e-06,
"loss": 0.4949,
"step": 71500
},
{
"epoch": 1.5751842481575185,
"grad_norm": 2.971724033355713,
"learning_rate": 6.852690656982504e-06,
"loss": 0.4945,
"step": 71600
},
{
"epoch": 1.5773842261577384,
"grad_norm": 2.399245023727417,
"learning_rate": 6.848288764168594e-06,
"loss": 0.4888,
"step": 71700
},
{
"epoch": 1.5795842041579584,
"grad_norm": 2.2702841758728027,
"learning_rate": 6.843886871354683e-06,
"loss": 0.49,
"step": 71800
},
{
"epoch": 1.5817841821581784,
"grad_norm": 1.9252210855484009,
"learning_rate": 6.839484978540774e-06,
"loss": 0.494,
"step": 71900
},
{
"epoch": 1.5839841601583984,
"grad_norm": 2.4878454208374023,
"learning_rate": 6.835083085726864e-06,
"loss": 0.4984,
"step": 72000
},
{
"epoch": 1.5861841381586184,
"grad_norm": 2.035708427429199,
"learning_rate": 6.830681192912953e-06,
"loss": 0.4825,
"step": 72100
},
{
"epoch": 1.5883841161588386,
"grad_norm": 2.55355167388916,
"learning_rate": 6.826279300099044e-06,
"loss": 0.5056,
"step": 72200
},
{
"epoch": 1.5905840941590585,
"grad_norm": 2.4391555786132812,
"learning_rate": 6.821877407285133e-06,
"loss": 0.4928,
"step": 72300
},
{
"epoch": 1.5927840721592785,
"grad_norm": 2.2338058948516846,
"learning_rate": 6.817475514471223e-06,
"loss": 0.4874,
"step": 72400
},
{
"epoch": 1.5949840501594985,
"grad_norm": 2.7937569618225098,
"learning_rate": 6.813073621657314e-06,
"loss": 0.477,
"step": 72500
},
{
"epoch": 1.5971840281597185,
"grad_norm": 2.2559831142425537,
"learning_rate": 6.808671728843403e-06,
"loss": 0.501,
"step": 72600
},
{
"epoch": 1.5993840061599385,
"grad_norm": 2.1428000926971436,
"learning_rate": 6.804269836029493e-06,
"loss": 0.4872,
"step": 72700
},
{
"epoch": 1.6015839841601585,
"grad_norm": 2.306943655014038,
"learning_rate": 6.799867943215584e-06,
"loss": 0.5002,
"step": 72800
},
{
"epoch": 1.6037839621603784,
"grad_norm": 2.3396975994110107,
"learning_rate": 6.795466050401673e-06,
"loss": 0.4951,
"step": 72900
},
{
"epoch": 1.6059839401605984,
"grad_norm": 1.8894736766815186,
"learning_rate": 6.791064157587763e-06,
"loss": 0.4872,
"step": 73000
},
{
"epoch": 1.6081839181608184,
"grad_norm": 2.0049326419830322,
"learning_rate": 6.786662264773854e-06,
"loss": 0.4877,
"step": 73100
},
{
"epoch": 1.6103838961610384,
"grad_norm": 2.3615005016326904,
"learning_rate": 6.782260371959943e-06,
"loss": 0.4925,
"step": 73200
},
{
"epoch": 1.6125838741612584,
"grad_norm": 2.386545419692993,
"learning_rate": 6.777858479146033e-06,
"loss": 0.4881,
"step": 73300
},
{
"epoch": 1.6147838521614784,
"grad_norm": 2.3752076625823975,
"learning_rate": 6.773456586332124e-06,
"loss": 0.4813,
"step": 73400
},
{
"epoch": 1.6169838301616983,
"grad_norm": 2.156837224960327,
"learning_rate": 6.769054693518213e-06,
"loss": 0.4793,
"step": 73500
},
{
"epoch": 1.6191838081619183,
"grad_norm": 2.788848638534546,
"learning_rate": 6.764652800704303e-06,
"loss": 0.4946,
"step": 73600
},
{
"epoch": 1.6213837861621383,
"grad_norm": 2.1992275714874268,
"learning_rate": 6.760250907890394e-06,
"loss": 0.5019,
"step": 73700
},
{
"epoch": 1.6235837641623583,
"grad_norm": 2.664424419403076,
"learning_rate": 6.755849015076483e-06,
"loss": 0.4885,
"step": 73800
},
{
"epoch": 1.6257837421625783,
"grad_norm": 2.3380892276763916,
"learning_rate": 6.751447122262573e-06,
"loss": 0.4947,
"step": 73900
},
{
"epoch": 1.6279837201627982,
"grad_norm": 2.3588438034057617,
"learning_rate": 6.747045229448664e-06,
"loss": 0.4652,
"step": 74000
},
{
"epoch": 1.6301836981630182,
"grad_norm": 2.6669723987579346,
"learning_rate": 6.742643336634753e-06,
"loss": 0.491,
"step": 74100
},
{
"epoch": 1.6323836761632382,
"grad_norm": 2.4595651626586914,
"learning_rate": 6.738241443820843e-06,
"loss": 0.501,
"step": 74200
},
{
"epoch": 1.6345836541634584,
"grad_norm": 2.2686636447906494,
"learning_rate": 6.733839551006934e-06,
"loss": 0.482,
"step": 74300
},
{
"epoch": 1.6367836321636784,
"grad_norm": 2.4227776527404785,
"learning_rate": 6.729437658193023e-06,
"loss": 0.4958,
"step": 74400
},
{
"epoch": 1.6389836101638984,
"grad_norm": 1.9847477674484253,
"learning_rate": 6.725035765379113e-06,
"loss": 0.4834,
"step": 74500
},
{
"epoch": 1.6411835881641184,
"grad_norm": 2.6502370834350586,
"learning_rate": 6.720633872565203e-06,
"loss": 0.4815,
"step": 74600
},
{
"epoch": 1.6433835661643383,
"grad_norm": 2.2831785678863525,
"learning_rate": 6.716231979751293e-06,
"loss": 0.4826,
"step": 74700
},
{
"epoch": 1.6455835441645583,
"grad_norm": 1.8865406513214111,
"learning_rate": 6.711830086937383e-06,
"loss": 0.4986,
"step": 74800
},
{
"epoch": 1.6477835221647783,
"grad_norm": 2.026791572570801,
"learning_rate": 6.707428194123473e-06,
"loss": 0.4872,
"step": 74900
},
{
"epoch": 1.6499835001649985,
"grad_norm": 2.772639036178589,
"learning_rate": 6.703026301309563e-06,
"loss": 0.4891,
"step": 75000
},
{
"epoch": 1.6521834781652185,
"grad_norm": 2.4932167530059814,
"learning_rate": 6.698624408495654e-06,
"loss": 0.4868,
"step": 75100
},
{
"epoch": 1.6543834561654385,
"grad_norm": 2.5153396129608154,
"learning_rate": 6.6942225156817445e-06,
"loss": 0.5073,
"step": 75200
},
{
"epoch": 1.6565834341656585,
"grad_norm": 1.7845731973648071,
"learning_rate": 6.689820622867834e-06,
"loss": 0.496,
"step": 75300
},
{
"epoch": 1.6587834121658784,
"grad_norm": 2.392333745956421,
"learning_rate": 6.685418730053924e-06,
"loss": 0.5044,
"step": 75400
},
{
"epoch": 1.6609833901660984,
"grad_norm": 2.624262809753418,
"learning_rate": 6.6810168372400145e-06,
"loss": 0.5196,
"step": 75500
},
{
"epoch": 1.6631833681663184,
"grad_norm": 2.421013355255127,
"learning_rate": 6.676614944426104e-06,
"loss": 0.4938,
"step": 75600
},
{
"epoch": 1.6653833461665384,
"grad_norm": 2.1836936473846436,
"learning_rate": 6.672213051612194e-06,
"loss": 0.4735,
"step": 75700
},
{
"epoch": 1.6675833241667584,
"grad_norm": 2.523780345916748,
"learning_rate": 6.6678111587982845e-06,
"loss": 0.4868,
"step": 75800
},
{
"epoch": 1.6697833021669783,
"grad_norm": 3.20668363571167,
"learning_rate": 6.663409265984374e-06,
"loss": 0.4902,
"step": 75900
},
{
"epoch": 1.6719832801671983,
"grad_norm": 2.6450743675231934,
"learning_rate": 6.659007373170464e-06,
"loss": 0.4852,
"step": 76000
},
{
"epoch": 1.6741832581674183,
"grad_norm": 2.3257484436035156,
"learning_rate": 6.6546054803565545e-06,
"loss": 0.4913,
"step": 76100
},
{
"epoch": 1.6763832361676383,
"grad_norm": 1.7676602602005005,
"learning_rate": 6.650203587542644e-06,
"loss": 0.5051,
"step": 76200
},
{
"epoch": 1.6785832141678583,
"grad_norm": 2.2192280292510986,
"learning_rate": 6.645801694728734e-06,
"loss": 0.4959,
"step": 76300
},
{
"epoch": 1.6807831921680783,
"grad_norm": 2.4453659057617188,
"learning_rate": 6.641399801914824e-06,
"loss": 0.4841,
"step": 76400
},
{
"epoch": 1.6829831701682982,
"grad_norm": 1.9458132982254028,
"learning_rate": 6.636997909100914e-06,
"loss": 0.4911,
"step": 76500
},
{
"epoch": 1.6851831481685182,
"grad_norm": 2.2809267044067383,
"learning_rate": 6.632596016287004e-06,
"loss": 0.4871,
"step": 76600
},
{
"epoch": 1.6873831261687382,
"grad_norm": 2.630840301513672,
"learning_rate": 6.628194123473094e-06,
"loss": 0.4813,
"step": 76700
},
{
"epoch": 1.6895831041689582,
"grad_norm": 2.8288991451263428,
"learning_rate": 6.623792230659184e-06,
"loss": 0.4918,
"step": 76800
},
{
"epoch": 1.6917830821691782,
"grad_norm": 2.220552921295166,
"learning_rate": 6.619390337845274e-06,
"loss": 0.4958,
"step": 76900
},
{
"epoch": 1.6939830601693981,
"grad_norm": 2.3790931701660156,
"learning_rate": 6.614988445031364e-06,
"loss": 0.5098,
"step": 77000
},
{
"epoch": 1.6961830381696184,
"grad_norm": 2.605365753173828,
"learning_rate": 6.610586552217454e-06,
"loss": 0.4999,
"step": 77100
},
{
"epoch": 1.6983830161698383,
"grad_norm": 2.526428461074829,
"learning_rate": 6.606184659403544e-06,
"loss": 0.5008,
"step": 77200
},
{
"epoch": 1.7005829941700583,
"grad_norm": 2.2195465564727783,
"learning_rate": 6.601782766589634e-06,
"loss": 0.4846,
"step": 77300
},
{
"epoch": 1.7027829721702783,
"grad_norm": 2.925656318664551,
"learning_rate": 6.597380873775724e-06,
"loss": 0.4773,
"step": 77400
},
{
"epoch": 1.7049829501704983,
"grad_norm": 2.5258848667144775,
"learning_rate": 6.592978980961814e-06,
"loss": 0.4972,
"step": 77500
},
{
"epoch": 1.7071829281707183,
"grad_norm": 3.0461318492889404,
"learning_rate": 6.588577088147904e-06,
"loss": 0.4857,
"step": 77600
},
{
"epoch": 1.7093829061709382,
"grad_norm": 2.3932976722717285,
"learning_rate": 6.584175195333994e-06,
"loss": 0.4999,
"step": 77700
},
{
"epoch": 1.7115828841711584,
"grad_norm": 2.044865369796753,
"learning_rate": 6.579773302520084e-06,
"loss": 0.4898,
"step": 77800
},
{
"epoch": 1.7137828621713784,
"grad_norm": 2.366441011428833,
"learning_rate": 6.575371409706174e-06,
"loss": 0.4786,
"step": 77900
},
{
"epoch": 1.7159828401715984,
"grad_norm": 2.57084584236145,
"learning_rate": 6.570969516892264e-06,
"loss": 0.4766,
"step": 78000
},
{
"epoch": 1.7181828181718184,
"grad_norm": 2.560520887374878,
"learning_rate": 6.566567624078354e-06,
"loss": 0.4891,
"step": 78100
},
{
"epoch": 1.7203827961720384,
"grad_norm": 2.1307547092437744,
"learning_rate": 6.562165731264444e-06,
"loss": 0.4852,
"step": 78200
},
{
"epoch": 1.7225827741722584,
"grad_norm": 2.4924020767211914,
"learning_rate": 6.557763838450534e-06,
"loss": 0.4836,
"step": 78300
},
{
"epoch": 1.7247827521724783,
"grad_norm": 2.323122978210449,
"learning_rate": 6.553361945636624e-06,
"loss": 0.4926,
"step": 78400
},
{
"epoch": 1.7269827301726983,
"grad_norm": 2.1391868591308594,
"learning_rate": 6.5489600528227145e-06,
"loss": 0.4974,
"step": 78500
},
{
"epoch": 1.7291827081729183,
"grad_norm": 2.2388463020324707,
"learning_rate": 6.544558160008805e-06,
"loss": 0.4825,
"step": 78600
},
{
"epoch": 1.7313826861731383,
"grad_norm": 2.617159843444824,
"learning_rate": 6.540156267194895e-06,
"loss": 0.4969,
"step": 78700
},
{
"epoch": 1.7335826641733583,
"grad_norm": 1.9445505142211914,
"learning_rate": 6.5357543743809845e-06,
"loss": 0.494,
"step": 78800
},
{
"epoch": 1.7357826421735782,
"grad_norm": 1.8033205270767212,
"learning_rate": 6.531352481567075e-06,
"loss": 0.4901,
"step": 78900
},
{
"epoch": 1.7379826201737982,
"grad_norm": 2.480191469192505,
"learning_rate": 6.526950588753165e-06,
"loss": 0.4756,
"step": 79000
},
{
"epoch": 1.7401825981740182,
"grad_norm": 2.203779697418213,
"learning_rate": 6.5225486959392545e-06,
"loss": 0.4949,
"step": 79100
},
{
"epoch": 1.7423825761742382,
"grad_norm": 2.6420180797576904,
"learning_rate": 6.518146803125345e-06,
"loss": 0.476,
"step": 79200
},
{
"epoch": 1.7445825541744582,
"grad_norm": 2.4949381351470947,
"learning_rate": 6.513744910311435e-06,
"loss": 0.4805,
"step": 79300
},
{
"epoch": 1.7467825321746782,
"grad_norm": 1.6507716178894043,
"learning_rate": 6.5093430174975245e-06,
"loss": 0.4928,
"step": 79400
},
{
"epoch": 1.7489825101748981,
"grad_norm": 2.849067211151123,
"learning_rate": 6.504941124683615e-06,
"loss": 0.4879,
"step": 79500
},
{
"epoch": 1.7511824881751181,
"grad_norm": 2.404705047607422,
"learning_rate": 6.500539231869705e-06,
"loss": 0.4761,
"step": 79600
},
{
"epoch": 1.753382466175338,
"grad_norm": 2.653310537338257,
"learning_rate": 6.4961373390557945e-06,
"loss": 0.5017,
"step": 79700
},
{
"epoch": 1.755582444175558,
"grad_norm": 2.31355619430542,
"learning_rate": 6.491735446241885e-06,
"loss": 0.4802,
"step": 79800
},
{
"epoch": 1.7577824221757783,
"grad_norm": 2.361945867538452,
"learning_rate": 6.487333553427975e-06,
"loss": 0.4816,
"step": 79900
},
{
"epoch": 1.7599824001759983,
"grad_norm": 2.199768304824829,
"learning_rate": 6.4829316606140645e-06,
"loss": 0.4632,
"step": 80000
},
{
"epoch": 1.7621823781762183,
"grad_norm": 1.8634425401687622,
"learning_rate": 6.478529767800155e-06,
"loss": 0.4909,
"step": 80100
},
{
"epoch": 1.7643823561764382,
"grad_norm": 2.742694616317749,
"learning_rate": 6.474127874986245e-06,
"loss": 0.4939,
"step": 80200
},
{
"epoch": 1.7665823341766582,
"grad_norm": 2.8734514713287354,
"learning_rate": 6.4697259821723345e-06,
"loss": 0.4917,
"step": 80300
},
{
"epoch": 1.7687823121768782,
"grad_norm": 2.59197735786438,
"learning_rate": 6.465324089358425e-06,
"loss": 0.4781,
"step": 80400
},
{
"epoch": 1.7709822901770982,
"grad_norm": 2.3575127124786377,
"learning_rate": 6.460922196544514e-06,
"loss": 0.4801,
"step": 80500
},
{
"epoch": 1.7731822681773184,
"grad_norm": 2.599222421646118,
"learning_rate": 6.4565203037306045e-06,
"loss": 0.4891,
"step": 80600
},
{
"epoch": 1.7753822461775384,
"grad_norm": 2.7138659954071045,
"learning_rate": 6.452118410916695e-06,
"loss": 0.491,
"step": 80700
},
{
"epoch": 1.7775822241777584,
"grad_norm": 2.467128038406372,
"learning_rate": 6.447716518102784e-06,
"loss": 0.4984,
"step": 80800
},
{
"epoch": 1.7797822021779783,
"grad_norm": 2.4047677516937256,
"learning_rate": 6.4433146252888745e-06,
"loss": 0.4756,
"step": 80900
},
{
"epoch": 1.7819821801781983,
"grad_norm": 2.0229098796844482,
"learning_rate": 6.438912732474965e-06,
"loss": 0.4792,
"step": 81000
},
{
"epoch": 1.7841821581784183,
"grad_norm": 2.463090658187866,
"learning_rate": 6.434510839661054e-06,
"loss": 0.4824,
"step": 81100
},
{
"epoch": 1.7863821361786383,
"grad_norm": 2.3522398471832275,
"learning_rate": 6.4301089468471445e-06,
"loss": 0.4938,
"step": 81200
},
{
"epoch": 1.7885821141788583,
"grad_norm": 2.1566226482391357,
"learning_rate": 6.425707054033235e-06,
"loss": 0.4858,
"step": 81300
},
{
"epoch": 1.7907820921790782,
"grad_norm": 2.452099084854126,
"learning_rate": 6.421305161219324e-06,
"loss": 0.4879,
"step": 81400
},
{
"epoch": 1.7929820701792982,
"grad_norm": 2.3728647232055664,
"learning_rate": 6.4169032684054144e-06,
"loss": 0.499,
"step": 81500
},
{
"epoch": 1.7951820481795182,
"grad_norm": 2.499342441558838,
"learning_rate": 6.412501375591505e-06,
"loss": 0.4799,
"step": 81600
},
{
"epoch": 1.7973820261797382,
"grad_norm": 2.281799077987671,
"learning_rate": 6.408099482777594e-06,
"loss": 0.4823,
"step": 81700
},
{
"epoch": 1.7995820041799582,
"grad_norm": 2.5670275688171387,
"learning_rate": 6.4036975899636844e-06,
"loss": 0.4956,
"step": 81800
},
{
"epoch": 1.8017819821801782,
"grad_norm": 2.830780506134033,
"learning_rate": 6.399295697149775e-06,
"loss": 0.4909,
"step": 81900
},
{
"epoch": 1.8039819601803981,
"grad_norm": 2.3581204414367676,
"learning_rate": 6.394893804335866e-06,
"loss": 0.4906,
"step": 82000
},
{
"epoch": 1.8061819381806181,
"grad_norm": 2.6061856746673584,
"learning_rate": 6.390491911521955e-06,
"loss": 0.488,
"step": 82100
},
{
"epoch": 1.808381916180838,
"grad_norm": 2.3762636184692383,
"learning_rate": 6.386090018708046e-06,
"loss": 0.4957,
"step": 82200
},
{
"epoch": 1.810581894181058,
"grad_norm": 2.7238190174102783,
"learning_rate": 6.381688125894136e-06,
"loss": 0.4866,
"step": 82300
},
{
"epoch": 1.812781872181278,
"grad_norm": 2.1085996627807617,
"learning_rate": 6.377286233080225e-06,
"loss": 0.4666,
"step": 82400
},
{
"epoch": 1.814981850181498,
"grad_norm": 2.127675771713257,
"learning_rate": 6.372884340266316e-06,
"loss": 0.4975,
"step": 82500
},
{
"epoch": 1.817181828181718,
"grad_norm": 2.0977835655212402,
"learning_rate": 6.368482447452405e-06,
"loss": 0.5016,
"step": 82600
},
{
"epoch": 1.8193818061819382,
"grad_norm": 2.5928144454956055,
"learning_rate": 6.364080554638495e-06,
"loss": 0.4904,
"step": 82700
},
{
"epoch": 1.8215817841821582,
"grad_norm": 2.5363171100616455,
"learning_rate": 6.359678661824586e-06,
"loss": 0.4739,
"step": 82800
},
{
"epoch": 1.8237817621823782,
"grad_norm": 1.779845952987671,
"learning_rate": 6.355276769010675e-06,
"loss": 0.475,
"step": 82900
},
{
"epoch": 1.8259817401825982,
"grad_norm": 2.3891873359680176,
"learning_rate": 6.350874876196765e-06,
"loss": 0.4867,
"step": 83000
},
{
"epoch": 1.8281817181828182,
"grad_norm": 2.5663325786590576,
"learning_rate": 6.3464729833828556e-06,
"loss": 0.4706,
"step": 83100
},
{
"epoch": 1.8303816961830381,
"grad_norm": 2.2070469856262207,
"learning_rate": 6.342071090568945e-06,
"loss": 0.4894,
"step": 83200
},
{
"epoch": 1.8325816741832581,
"grad_norm": 2.3300230503082275,
"learning_rate": 6.337669197755035e-06,
"loss": 0.4843,
"step": 83300
},
{
"epoch": 1.8347816521834783,
"grad_norm": 2.1778311729431152,
"learning_rate": 6.3332673049411256e-06,
"loss": 0.5032,
"step": 83400
},
{
"epoch": 1.8369816301836983,
"grad_norm": 2.106933832168579,
"learning_rate": 6.328865412127215e-06,
"loss": 0.4875,
"step": 83500
},
{
"epoch": 1.8391816081839183,
"grad_norm": 2.6579482555389404,
"learning_rate": 6.324463519313305e-06,
"loss": 0.4892,
"step": 83600
},
{
"epoch": 1.8413815861841383,
"grad_norm": 2.3309366703033447,
"learning_rate": 6.3200616264993956e-06,
"loss": 0.4699,
"step": 83700
},
{
"epoch": 1.8435815641843583,
"grad_norm": 2.503455400466919,
"learning_rate": 6.315659733685485e-06,
"loss": 0.4801,
"step": 83800
},
{
"epoch": 1.8457815421845782,
"grad_norm": 2.5221006870269775,
"learning_rate": 6.311257840871575e-06,
"loss": 0.4834,
"step": 83900
},
{
"epoch": 1.8479815201847982,
"grad_norm": 2.271540403366089,
"learning_rate": 6.3068559480576656e-06,
"loss": 0.4759,
"step": 84000
},
{
"epoch": 1.8501814981850182,
"grad_norm": 2.2240519523620605,
"learning_rate": 6.302454055243755e-06,
"loss": 0.4858,
"step": 84100
},
{
"epoch": 1.8523814761852382,
"grad_norm": 2.41463041305542,
"learning_rate": 6.298052162429845e-06,
"loss": 0.4951,
"step": 84200
},
{
"epoch": 1.8545814541854582,
"grad_norm": 2.420825242996216,
"learning_rate": 6.2936502696159356e-06,
"loss": 0.4949,
"step": 84300
},
{
"epoch": 1.8567814321856781,
"grad_norm": 2.6283483505249023,
"learning_rate": 6.289248376802025e-06,
"loss": 0.4928,
"step": 84400
},
{
"epoch": 1.8589814101858981,
"grad_norm": 2.6053175926208496,
"learning_rate": 6.284846483988115e-06,
"loss": 0.4951,
"step": 84500
},
{
"epoch": 1.8611813881861181,
"grad_norm": 2.556842803955078,
"learning_rate": 6.280444591174205e-06,
"loss": 0.4766,
"step": 84600
},
{
"epoch": 1.863381366186338,
"grad_norm": 2.583364248275757,
"learning_rate": 6.276042698360295e-06,
"loss": 0.4964,
"step": 84700
},
{
"epoch": 1.865581344186558,
"grad_norm": 2.407144069671631,
"learning_rate": 6.271640805546385e-06,
"loss": 0.4882,
"step": 84800
},
{
"epoch": 1.867781322186778,
"grad_norm": 2.20274019241333,
"learning_rate": 6.267238912732475e-06,
"loss": 0.488,
"step": 84900
},
{
"epoch": 1.869981300186998,
"grad_norm": 2.537299871444702,
"learning_rate": 6.262837019918565e-06,
"loss": 0.4912,
"step": 85000
},
{
"epoch": 1.872181278187218,
"grad_norm": 2.4242103099823,
"learning_rate": 6.258435127104655e-06,
"loss": 0.4857,
"step": 85100
},
{
"epoch": 1.874381256187438,
"grad_norm": 1.9029467105865479,
"learning_rate": 6.254033234290745e-06,
"loss": 0.4969,
"step": 85200
},
{
"epoch": 1.876581234187658,
"grad_norm": 3.0369937419891357,
"learning_rate": 6.249631341476835e-06,
"loss": 0.4854,
"step": 85300
},
{
"epoch": 1.878781212187878,
"grad_norm": 2.6991753578186035,
"learning_rate": 6.245229448662925e-06,
"loss": 0.4771,
"step": 85400
},
{
"epoch": 1.8809811901880982,
"grad_norm": 2.336350679397583,
"learning_rate": 6.240827555849016e-06,
"loss": 0.4922,
"step": 85500
},
{
"epoch": 1.8831811681883182,
"grad_norm": 2.731637477874756,
"learning_rate": 6.236425663035106e-06,
"loss": 0.4877,
"step": 85600
},
{
"epoch": 1.8853811461885381,
"grad_norm": 2.438896417617798,
"learning_rate": 6.232023770221196e-06,
"loss": 0.4743,
"step": 85700
},
{
"epoch": 1.8875811241887581,
"grad_norm": 2.8118035793304443,
"learning_rate": 6.227621877407286e-06,
"loss": 0.4804,
"step": 85800
},
{
"epoch": 1.889781102188978,
"grad_norm": 2.5621535778045654,
"learning_rate": 6.223219984593376e-06,
"loss": 0.4849,
"step": 85900
},
{
"epoch": 1.891981080189198,
"grad_norm": 2.3240880966186523,
"learning_rate": 6.218818091779466e-06,
"loss": 0.4919,
"step": 86000
},
{
"epoch": 1.894181058189418,
"grad_norm": 2.481004238128662,
"learning_rate": 6.214416198965556e-06,
"loss": 0.4794,
"step": 86100
},
{
"epoch": 1.8963810361896383,
"grad_norm": 2.4835259914398193,
"learning_rate": 6.210014306151646e-06,
"loss": 0.479,
"step": 86200
},
{
"epoch": 1.8985810141898583,
"grad_norm": 2.3219950199127197,
"learning_rate": 6.205612413337736e-06,
"loss": 0.4743,
"step": 86300
},
{
"epoch": 1.9007809921900782,
"grad_norm": 2.9407191276550293,
"learning_rate": 6.201210520523826e-06,
"loss": 0.4641,
"step": 86400
},
{
"epoch": 1.9029809701902982,
"grad_norm": 2.64907169342041,
"learning_rate": 6.196808627709916e-06,
"loss": 0.4821,
"step": 86500
},
{
"epoch": 1.9051809481905182,
"grad_norm": 2.1783690452575684,
"learning_rate": 6.192406734896006e-06,
"loss": 0.4709,
"step": 86600
},
{
"epoch": 1.9073809261907382,
"grad_norm": 2.755631685256958,
"learning_rate": 6.1880048420820956e-06,
"loss": 0.4816,
"step": 86700
},
{
"epoch": 1.9095809041909582,
"grad_norm": 2.761409044265747,
"learning_rate": 6.183602949268186e-06,
"loss": 0.4833,
"step": 86800
},
{
"epoch": 1.9117808821911781,
"grad_norm": 2.676274061203003,
"learning_rate": 6.179201056454276e-06,
"loss": 0.4962,
"step": 86900
},
{
"epoch": 1.9139808601913981,
"grad_norm": 2.450660467147827,
"learning_rate": 6.1747991636403656e-06,
"loss": 0.473,
"step": 87000
},
{
"epoch": 1.916180838191618,
"grad_norm": 2.693134069442749,
"learning_rate": 6.170397270826456e-06,
"loss": 0.4781,
"step": 87100
},
{
"epoch": 1.918380816191838,
"grad_norm": 2.411348581314087,
"learning_rate": 6.165995378012546e-06,
"loss": 0.4804,
"step": 87200
},
{
"epoch": 1.920580794192058,
"grad_norm": 2.500234842300415,
"learning_rate": 6.1615934851986356e-06,
"loss": 0.4837,
"step": 87300
},
{
"epoch": 1.922780772192278,
"grad_norm": 3.033048391342163,
"learning_rate": 6.157191592384726e-06,
"loss": 0.471,
"step": 87400
},
{
"epoch": 1.924980750192498,
"grad_norm": 1.847033143043518,
"learning_rate": 6.152789699570816e-06,
"loss": 0.4823,
"step": 87500
},
{
"epoch": 1.927180728192718,
"grad_norm": 2.5302257537841797,
"learning_rate": 6.1483878067569056e-06,
"loss": 0.4826,
"step": 87600
},
{
"epoch": 1.929380706192938,
"grad_norm": 1.998494029045105,
"learning_rate": 6.143985913942996e-06,
"loss": 0.4891,
"step": 87700
},
{
"epoch": 1.931580684193158,
"grad_norm": 2.995784044265747,
"learning_rate": 6.139584021129086e-06,
"loss": 0.4847,
"step": 87800
},
{
"epoch": 1.933780662193378,
"grad_norm": 2.2645761966705322,
"learning_rate": 6.1351821283151756e-06,
"loss": 0.5042,
"step": 87900
},
{
"epoch": 1.935980640193598,
"grad_norm": 2.3474481105804443,
"learning_rate": 6.130780235501266e-06,
"loss": 0.4845,
"step": 88000
},
{
"epoch": 1.938180618193818,
"grad_norm": 2.570206880569458,
"learning_rate": 6.126378342687356e-06,
"loss": 0.4794,
"step": 88100
},
{
"epoch": 1.940380596194038,
"grad_norm": 1.8715978860855103,
"learning_rate": 6.1219764498734456e-06,
"loss": 0.4775,
"step": 88200
},
{
"epoch": 1.942580574194258,
"grad_norm": 2.443993330001831,
"learning_rate": 6.117574557059536e-06,
"loss": 0.4824,
"step": 88300
},
{
"epoch": 1.944780552194478,
"grad_norm": 2.4730186462402344,
"learning_rate": 6.113172664245626e-06,
"loss": 0.4914,
"step": 88400
},
{
"epoch": 1.946980530194698,
"grad_norm": 2.6471264362335205,
"learning_rate": 6.1087707714317156e-06,
"loss": 0.4826,
"step": 88500
},
{
"epoch": 1.949180508194918,
"grad_norm": 2.5795907974243164,
"learning_rate": 6.104368878617806e-06,
"loss": 0.4871,
"step": 88600
},
{
"epoch": 1.951380486195138,
"grad_norm": 2.3072896003723145,
"learning_rate": 6.099966985803895e-06,
"loss": 0.4937,
"step": 88700
},
{
"epoch": 1.953580464195358,
"grad_norm": 2.5398294925689697,
"learning_rate": 6.0955650929899856e-06,
"loss": 0.4919,
"step": 88800
},
{
"epoch": 1.955780442195578,
"grad_norm": 2.15952730178833,
"learning_rate": 6.091163200176077e-06,
"loss": 0.4934,
"step": 88900
},
{
"epoch": 1.957980420195798,
"grad_norm": 2.4487977027893066,
"learning_rate": 6.086761307362167e-06,
"loss": 0.4842,
"step": 89000
},
{
"epoch": 1.9601803981960182,
"grad_norm": 2.4906442165374756,
"learning_rate": 6.082359414548256e-06,
"loss": 0.484,
"step": 89100
},
{
"epoch": 1.9623803761962382,
"grad_norm": 2.605121374130249,
"learning_rate": 6.077957521734347e-06,
"loss": 0.4903,
"step": 89200
},
{
"epoch": 1.9645803541964582,
"grad_norm": 2.7144834995269775,
"learning_rate": 6.073555628920437e-06,
"loss": 0.4931,
"step": 89300
},
{
"epoch": 1.9667803321966781,
"grad_norm": 2.7881131172180176,
"learning_rate": 6.069153736106526e-06,
"loss": 0.495,
"step": 89400
},
{
"epoch": 1.9689803101968981,
"grad_norm": 3.044265031814575,
"learning_rate": 6.064751843292617e-06,
"loss": 0.4757,
"step": 89500
},
{
"epoch": 1.971180288197118,
"grad_norm": 2.3652849197387695,
"learning_rate": 6.060349950478707e-06,
"loss": 0.4761,
"step": 89600
},
{
"epoch": 1.973380266197338,
"grad_norm": 1.9909372329711914,
"learning_rate": 6.055948057664796e-06,
"loss": 0.492,
"step": 89700
},
{
"epoch": 1.975580244197558,
"grad_norm": 2.1215572357177734,
"learning_rate": 6.051546164850887e-06,
"loss": 0.4787,
"step": 89800
},
{
"epoch": 1.977780222197778,
"grad_norm": 2.807328701019287,
"learning_rate": 6.047144272036977e-06,
"loss": 0.4845,
"step": 89900
},
{
"epoch": 1.979980200197998,
"grad_norm": 2.344365358352661,
"learning_rate": 6.042742379223066e-06,
"loss": 0.4892,
"step": 90000
},
{
"epoch": 1.982180178198218,
"grad_norm": 2.1772940158843994,
"learning_rate": 6.038340486409157e-06,
"loss": 0.4849,
"step": 90100
},
{
"epoch": 1.984380156198438,
"grad_norm": 2.4292235374450684,
"learning_rate": 6.033938593595247e-06,
"loss": 0.4869,
"step": 90200
},
{
"epoch": 1.986580134198658,
"grad_norm": 2.350494861602783,
"learning_rate": 6.029536700781336e-06,
"loss": 0.4945,
"step": 90300
},
{
"epoch": 1.988780112198878,
"grad_norm": 2.447011709213257,
"learning_rate": 6.025134807967427e-06,
"loss": 0.4632,
"step": 90400
},
{
"epoch": 1.990980090199098,
"grad_norm": 2.229335069656372,
"learning_rate": 6.020732915153516e-06,
"loss": 0.491,
"step": 90500
},
{
"epoch": 1.993180068199318,
"grad_norm": 2.659064292907715,
"learning_rate": 6.016331022339606e-06,
"loss": 0.4788,
"step": 90600
},
{
"epoch": 1.995380046199538,
"grad_norm": 2.435239791870117,
"learning_rate": 6.011929129525697e-06,
"loss": 0.4947,
"step": 90700
},
{
"epoch": 1.9975800241997579,
"grad_norm": 2.0373647212982178,
"learning_rate": 6.007527236711786e-06,
"loss": 0.4832,
"step": 90800
},
{
"epoch": 1.9997800021999779,
"grad_norm": 2.644747734069824,
"learning_rate": 6.003125343897876e-06,
"loss": 0.4884,
"step": 90900
},
{
"epoch": 2.001979980200198,
"grad_norm": 2.4957003593444824,
"learning_rate": 5.998723451083967e-06,
"loss": 0.4441,
"step": 91000
},
{
"epoch": 2.004179958200418,
"grad_norm": 2.8672921657562256,
"learning_rate": 5.994321558270056e-06,
"loss": 0.4586,
"step": 91100
},
{
"epoch": 2.006379936200638,
"grad_norm": 2.2238707542419434,
"learning_rate": 5.989919665456146e-06,
"loss": 0.4508,
"step": 91200
},
{
"epoch": 2.008579914200858,
"grad_norm": 3.085266590118408,
"learning_rate": 5.985517772642237e-06,
"loss": 0.4454,
"step": 91300
},
{
"epoch": 2.010779892201078,
"grad_norm": 2.7190568447113037,
"learning_rate": 5.981115879828326e-06,
"loss": 0.4421,
"step": 91400
},
{
"epoch": 2.012979870201298,
"grad_norm": 2.966407537460327,
"learning_rate": 5.976713987014416e-06,
"loss": 0.4334,
"step": 91500
},
{
"epoch": 2.015179848201518,
"grad_norm": 2.963914394378662,
"learning_rate": 5.972312094200507e-06,
"loss": 0.4428,
"step": 91600
},
{
"epoch": 2.017379826201738,
"grad_norm": 3.2475080490112305,
"learning_rate": 5.967910201386596e-06,
"loss": 0.4387,
"step": 91700
},
{
"epoch": 2.019579804201958,
"grad_norm": 2.248386859893799,
"learning_rate": 5.963508308572686e-06,
"loss": 0.4509,
"step": 91800
},
{
"epoch": 2.021779782202178,
"grad_norm": 2.9276363849639893,
"learning_rate": 5.959106415758777e-06,
"loss": 0.4509,
"step": 91900
},
{
"epoch": 2.023979760202398,
"grad_norm": 3.2354319095611572,
"learning_rate": 5.954704522944866e-06,
"loss": 0.4396,
"step": 92000
},
{
"epoch": 2.026179738202618,
"grad_norm": 3.478252649307251,
"learning_rate": 5.950302630130956e-06,
"loss": 0.454,
"step": 92100
},
{
"epoch": 2.028379716202838,
"grad_norm": 2.1570658683776855,
"learning_rate": 5.945900737317047e-06,
"loss": 0.4426,
"step": 92200
},
{
"epoch": 2.030579694203058,
"grad_norm": 3.555510997772217,
"learning_rate": 5.941498844503136e-06,
"loss": 0.4278,
"step": 92300
},
{
"epoch": 2.032779672203278,
"grad_norm": 3.0837221145629883,
"learning_rate": 5.937096951689227e-06,
"loss": 0.4582,
"step": 92400
},
{
"epoch": 2.034979650203498,
"grad_norm": 3.023439407348633,
"learning_rate": 5.9326950588753175e-06,
"loss": 0.445,
"step": 92500
},
{
"epoch": 2.037179628203718,
"grad_norm": 2.8164618015289307,
"learning_rate": 5.928293166061407e-06,
"loss": 0.4474,
"step": 92600
},
{
"epoch": 2.039379606203938,
"grad_norm": 2.4497897624969482,
"learning_rate": 5.923891273247497e-06,
"loss": 0.4581,
"step": 92700
},
{
"epoch": 2.041579584204158,
"grad_norm": 2.560822010040283,
"learning_rate": 5.9194893804335875e-06,
"loss": 0.4402,
"step": 92800
},
{
"epoch": 2.043779562204378,
"grad_norm": 2.457819938659668,
"learning_rate": 5.915087487619677e-06,
"loss": 0.457,
"step": 92900
},
{
"epoch": 2.045979540204598,
"grad_norm": 2.840198278427124,
"learning_rate": 5.910685594805767e-06,
"loss": 0.4457,
"step": 93000
},
{
"epoch": 2.048179518204818,
"grad_norm": 3.289562940597534,
"learning_rate": 5.9062837019918575e-06,
"loss": 0.4458,
"step": 93100
},
{
"epoch": 2.050379496205038,
"grad_norm": 3.20574688911438,
"learning_rate": 5.901881809177947e-06,
"loss": 0.4464,
"step": 93200
},
{
"epoch": 2.052579474205258,
"grad_norm": 3.1382062435150146,
"learning_rate": 5.897479916364037e-06,
"loss": 0.4407,
"step": 93300
},
{
"epoch": 2.054779452205478,
"grad_norm": 2.4946656227111816,
"learning_rate": 5.8930780235501275e-06,
"loss": 0.4404,
"step": 93400
},
{
"epoch": 2.056979430205698,
"grad_norm": 3.4237630367279053,
"learning_rate": 5.888676130736217e-06,
"loss": 0.4549,
"step": 93500
},
{
"epoch": 2.059179408205918,
"grad_norm": 2.6181180477142334,
"learning_rate": 5.884274237922307e-06,
"loss": 0.4305,
"step": 93600
},
{
"epoch": 2.061379386206138,
"grad_norm": 2.9076225757598877,
"learning_rate": 5.8798723451083975e-06,
"loss": 0.4543,
"step": 93700
},
{
"epoch": 2.063579364206358,
"grad_norm": 2.6111700534820557,
"learning_rate": 5.875470452294487e-06,
"loss": 0.4426,
"step": 93800
},
{
"epoch": 2.0657793422065778,
"grad_norm": 3.1381430625915527,
"learning_rate": 5.871068559480577e-06,
"loss": 0.4509,
"step": 93900
},
{
"epoch": 2.0679793202067978,
"grad_norm": 2.934509754180908,
"learning_rate": 5.8666666666666675e-06,
"loss": 0.4538,
"step": 94000
},
{
"epoch": 2.0701792982070177,
"grad_norm": 2.8510279655456543,
"learning_rate": 5.862264773852757e-06,
"loss": 0.4396,
"step": 94100
},
{
"epoch": 2.0723792762072377,
"grad_norm": 2.753408670425415,
"learning_rate": 5.857862881038847e-06,
"loss": 0.4498,
"step": 94200
},
{
"epoch": 2.0745792542074577,
"grad_norm": 2.5191516876220703,
"learning_rate": 5.8534609882249375e-06,
"loss": 0.4355,
"step": 94300
},
{
"epoch": 2.076779232207678,
"grad_norm": 3.058117628097534,
"learning_rate": 5.849059095411027e-06,
"loss": 0.4496,
"step": 94400
},
{
"epoch": 2.078979210207898,
"grad_norm": 2.3892626762390137,
"learning_rate": 5.844657202597117e-06,
"loss": 0.448,
"step": 94500
},
{
"epoch": 2.081179188208118,
"grad_norm": 3.303252935409546,
"learning_rate": 5.840255309783207e-06,
"loss": 0.4423,
"step": 94600
},
{
"epoch": 2.083379166208338,
"grad_norm": 2.571668863296509,
"learning_rate": 5.835853416969297e-06,
"loss": 0.4477,
"step": 94700
},
{
"epoch": 2.085579144208558,
"grad_norm": 2.8675763607025146,
"learning_rate": 5.831451524155387e-06,
"loss": 0.4402,
"step": 94800
},
{
"epoch": 2.087779122208778,
"grad_norm": 1.920617938041687,
"learning_rate": 5.827049631341477e-06,
"loss": 0.4469,
"step": 94900
},
{
"epoch": 2.089979100208998,
"grad_norm": 2.4607462882995605,
"learning_rate": 5.822647738527567e-06,
"loss": 0.4578,
"step": 95000
},
{
"epoch": 2.092179078209218,
"grad_norm": 2.3950858116149902,
"learning_rate": 5.818245845713657e-06,
"loss": 0.449,
"step": 95100
},
{
"epoch": 2.094379056209438,
"grad_norm": 2.5188486576080322,
"learning_rate": 5.813843952899747e-06,
"loss": 0.4411,
"step": 95200
},
{
"epoch": 2.096579034209658,
"grad_norm": 2.665241003036499,
"learning_rate": 5.809442060085837e-06,
"loss": 0.4555,
"step": 95300
},
{
"epoch": 2.098779012209878,
"grad_norm": 3.0195603370666504,
"learning_rate": 5.805040167271927e-06,
"loss": 0.4605,
"step": 95400
},
{
"epoch": 2.100978990210098,
"grad_norm": 3.2705276012420654,
"learning_rate": 5.800638274458017e-06,
"loss": 0.437,
"step": 95500
},
{
"epoch": 2.103178968210318,
"grad_norm": 2.4358837604522705,
"learning_rate": 5.796236381644107e-06,
"loss": 0.4556,
"step": 95600
},
{
"epoch": 2.105378946210538,
"grad_norm": 2.609314203262329,
"learning_rate": 5.791834488830197e-06,
"loss": 0.4396,
"step": 95700
},
{
"epoch": 2.107578924210758,
"grad_norm": 2.715202808380127,
"learning_rate": 5.787432596016287e-06,
"loss": 0.4409,
"step": 95800
},
{
"epoch": 2.109778902210978,
"grad_norm": 2.89326548576355,
"learning_rate": 5.783030703202378e-06,
"loss": 0.4473,
"step": 95900
},
{
"epoch": 2.111978880211198,
"grad_norm": 2.722426414489746,
"learning_rate": 5.778628810388468e-06,
"loss": 0.4392,
"step": 96000
},
{
"epoch": 2.114178858211418,
"grad_norm": 2.5516304969787598,
"learning_rate": 5.774226917574558e-06,
"loss": 0.4327,
"step": 96100
},
{
"epoch": 2.116378836211638,
"grad_norm": 1.6953123807907104,
"learning_rate": 5.769825024760648e-06,
"loss": 0.4354,
"step": 96200
},
{
"epoch": 2.118578814211858,
"grad_norm": 3.260712146759033,
"learning_rate": 5.765423131946738e-06,
"loss": 0.4587,
"step": 96300
},
{
"epoch": 2.1207787922120778,
"grad_norm": 3.15496826171875,
"learning_rate": 5.761021239132828e-06,
"loss": 0.4455,
"step": 96400
},
{
"epoch": 2.1229787702122977,
"grad_norm": 3.02713680267334,
"learning_rate": 5.756619346318918e-06,
"loss": 0.443,
"step": 96500
},
{
"epoch": 2.1251787482125177,
"grad_norm": 2.6551177501678467,
"learning_rate": 5.752217453505008e-06,
"loss": 0.4361,
"step": 96600
},
{
"epoch": 2.1273787262127377,
"grad_norm": 3.143676996231079,
"learning_rate": 5.7478155606910975e-06,
"loss": 0.4463,
"step": 96700
},
{
"epoch": 2.1295787042129577,
"grad_norm": 3.07769775390625,
"learning_rate": 5.743413667877188e-06,
"loss": 0.4563,
"step": 96800
},
{
"epoch": 2.1317786822131777,
"grad_norm": 2.862227439880371,
"learning_rate": 5.739011775063278e-06,
"loss": 0.4393,
"step": 96900
},
{
"epoch": 2.1339786602133977,
"grad_norm": 2.652214288711548,
"learning_rate": 5.7346098822493675e-06,
"loss": 0.443,
"step": 97000
},
{
"epoch": 2.136178638213618,
"grad_norm": 2.3733363151550293,
"learning_rate": 5.730207989435458e-06,
"loss": 0.4449,
"step": 97100
},
{
"epoch": 2.138378616213838,
"grad_norm": 2.734473705291748,
"learning_rate": 5.725806096621548e-06,
"loss": 0.4357,
"step": 97200
},
{
"epoch": 2.140578594214058,
"grad_norm": 2.783421039581299,
"learning_rate": 5.7214042038076375e-06,
"loss": 0.434,
"step": 97300
},
{
"epoch": 2.142778572214278,
"grad_norm": 2.4740219116210938,
"learning_rate": 5.717002310993728e-06,
"loss": 0.4417,
"step": 97400
},
{
"epoch": 2.144978550214498,
"grad_norm": 2.809589147567749,
"learning_rate": 5.712600418179818e-06,
"loss": 0.4507,
"step": 97500
},
{
"epoch": 2.147178528214718,
"grad_norm": 2.179594039916992,
"learning_rate": 5.7081985253659075e-06,
"loss": 0.4552,
"step": 97600
},
{
"epoch": 2.149378506214938,
"grad_norm": 2.5812172889709473,
"learning_rate": 5.703796632551998e-06,
"loss": 0.4462,
"step": 97700
},
{
"epoch": 2.151578484215158,
"grad_norm": 2.6970343589782715,
"learning_rate": 5.699394739738088e-06,
"loss": 0.4448,
"step": 97800
},
{
"epoch": 2.153778462215378,
"grad_norm": 3.2081048488616943,
"learning_rate": 5.6949928469241775e-06,
"loss": 0.4477,
"step": 97900
},
{
"epoch": 2.155978440215598,
"grad_norm": 2.283027410507202,
"learning_rate": 5.690590954110268e-06,
"loss": 0.4554,
"step": 98000
},
{
"epoch": 2.158178418215818,
"grad_norm": 2.4790256023406982,
"learning_rate": 5.686189061296358e-06,
"loss": 0.4443,
"step": 98100
},
{
"epoch": 2.160378396216038,
"grad_norm": 3.0653131008148193,
"learning_rate": 5.6817871684824475e-06,
"loss": 0.4435,
"step": 98200
},
{
"epoch": 2.162578374216258,
"grad_norm": 3.14249849319458,
"learning_rate": 5.677385275668538e-06,
"loss": 0.4528,
"step": 98300
},
{
"epoch": 2.164778352216478,
"grad_norm": 3.3730337619781494,
"learning_rate": 5.672983382854628e-06,
"loss": 0.4397,
"step": 98400
},
{
"epoch": 2.166978330216698,
"grad_norm": 3.2641589641571045,
"learning_rate": 5.6685814900407175e-06,
"loss": 0.4365,
"step": 98500
},
{
"epoch": 2.169178308216918,
"grad_norm": 3.698474407196045,
"learning_rate": 5.664179597226808e-06,
"loss": 0.4416,
"step": 98600
},
{
"epoch": 2.171378286217138,
"grad_norm": 2.253495454788208,
"learning_rate": 5.659777704412897e-06,
"loss": 0.4534,
"step": 98700
},
{
"epoch": 2.173578264217358,
"grad_norm": 3.342864990234375,
"learning_rate": 5.6553758115989875e-06,
"loss": 0.4546,
"step": 98800
},
{
"epoch": 2.1757782422175778,
"grad_norm": 2.818357229232788,
"learning_rate": 5.650973918785078e-06,
"loss": 0.4327,
"step": 98900
},
{
"epoch": 2.1779782202177977,
"grad_norm": 3.623086452484131,
"learning_rate": 5.646572025971167e-06,
"loss": 0.4566,
"step": 99000
},
{
"epoch": 2.1801781982180177,
"grad_norm": 3.0294673442840576,
"learning_rate": 5.6421701331572575e-06,
"loss": 0.4437,
"step": 99100
},
{
"epoch": 2.1823781762182377,
"grad_norm": 2.562649726867676,
"learning_rate": 5.637768240343348e-06,
"loss": 0.4504,
"step": 99200
},
{
"epoch": 2.1845781542184577,
"grad_norm": 2.9399819374084473,
"learning_rate": 5.633366347529439e-06,
"loss": 0.4405,
"step": 99300
},
{
"epoch": 2.1867781322186777,
"grad_norm": 2.589012861251831,
"learning_rate": 5.628964454715528e-06,
"loss": 0.4332,
"step": 99400
},
{
"epoch": 2.1889781102188977,
"grad_norm": 3.24257230758667,
"learning_rate": 5.624562561901619e-06,
"loss": 0.4486,
"step": 99500
},
{
"epoch": 2.1911780882191176,
"grad_norm": 2.6864874362945557,
"learning_rate": 5.620160669087709e-06,
"loss": 0.4476,
"step": 99600
},
{
"epoch": 2.1933780662193376,
"grad_norm": 2.183894634246826,
"learning_rate": 5.615758776273798e-06,
"loss": 0.4517,
"step": 99700
},
{
"epoch": 2.1955780442195576,
"grad_norm": 2.297757625579834,
"learning_rate": 5.611356883459889e-06,
"loss": 0.4414,
"step": 99800
},
{
"epoch": 2.1977780222197776,
"grad_norm": 2.6887316703796387,
"learning_rate": 5.606954990645979e-06,
"loss": 0.4359,
"step": 99900
},
{
"epoch": 2.199978000219998,
"grad_norm": 2.8383491039276123,
"learning_rate": 5.602553097832068e-06,
"loss": 0.4455,
"step": 100000
},
{
"epoch": 2.199978000219998,
"eval_loss": 0.5539576411247253,
"eval_runtime": 386.4228,
"eval_samples_per_second": 155.27,
"eval_steps_per_second": 4.852,
"step": 100000
},
{
"epoch": 2.202177978220218,
"grad_norm": 2.4842607975006104,
"learning_rate": 5.598151205018159e-06,
"loss": 0.4421,
"step": 100100
},
{
"epoch": 2.204377956220438,
"grad_norm": 2.3061771392822266,
"learning_rate": 5.593749312204249e-06,
"loss": 0.4529,
"step": 100200
},
{
"epoch": 2.206577934220658,
"grad_norm": 2.9890830516815186,
"learning_rate": 5.589347419390338e-06,
"loss": 0.4251,
"step": 100300
},
{
"epoch": 2.208777912220878,
"grad_norm": 2.5472826957702637,
"learning_rate": 5.584945526576429e-06,
"loss": 0.4384,
"step": 100400
},
{
"epoch": 2.210977890221098,
"grad_norm": 3.314694881439209,
"learning_rate": 5.580543633762519e-06,
"loss": 0.4372,
"step": 100500
},
{
"epoch": 2.213177868221318,
"grad_norm": 3.1046979427337646,
"learning_rate": 5.576141740948608e-06,
"loss": 0.434,
"step": 100600
},
{
"epoch": 2.215377846221538,
"grad_norm": 2.180180788040161,
"learning_rate": 5.571739848134699e-06,
"loss": 0.43,
"step": 100700
},
{
"epoch": 2.217577824221758,
"grad_norm": 3.7238945960998535,
"learning_rate": 5.567337955320788e-06,
"loss": 0.4404,
"step": 100800
},
{
"epoch": 2.219777802221978,
"grad_norm": 3.2101945877075195,
"learning_rate": 5.562936062506878e-06,
"loss": 0.4393,
"step": 100900
},
{
"epoch": 2.221977780222198,
"grad_norm": 2.822737455368042,
"learning_rate": 5.558534169692969e-06,
"loss": 0.4407,
"step": 101000
},
{
"epoch": 2.224177758222418,
"grad_norm": 2.736593723297119,
"learning_rate": 5.554132276879058e-06,
"loss": 0.4603,
"step": 101100
},
{
"epoch": 2.226377736222638,
"grad_norm": 2.5259158611297607,
"learning_rate": 5.549730384065148e-06,
"loss": 0.438,
"step": 101200
},
{
"epoch": 2.2285777142228578,
"grad_norm": 2.8023760318756104,
"learning_rate": 5.545328491251239e-06,
"loss": 0.4476,
"step": 101300
},
{
"epoch": 2.2307776922230778,
"grad_norm": 3.469649076461792,
"learning_rate": 5.540926598437328e-06,
"loss": 0.4498,
"step": 101400
},
{
"epoch": 2.2329776702232977,
"grad_norm": 2.2170920372009277,
"learning_rate": 5.536524705623418e-06,
"loss": 0.4531,
"step": 101500
},
{
"epoch": 2.2351776482235177,
"grad_norm": 2.9399514198303223,
"learning_rate": 5.532122812809509e-06,
"loss": 0.4496,
"step": 101600
},
{
"epoch": 2.2373776262237377,
"grad_norm": 3.1350746154785156,
"learning_rate": 5.527720919995598e-06,
"loss": 0.4412,
"step": 101700
},
{
"epoch": 2.2395776042239577,
"grad_norm": 2.7231826782226562,
"learning_rate": 5.523319027181688e-06,
"loss": 0.4434,
"step": 101800
},
{
"epoch": 2.2417775822241777,
"grad_norm": 2.8241002559661865,
"learning_rate": 5.518917134367779e-06,
"loss": 0.4405,
"step": 101900
},
{
"epoch": 2.2439775602243976,
"grad_norm": 2.6854066848754883,
"learning_rate": 5.514515241553868e-06,
"loss": 0.4558,
"step": 102000
},
{
"epoch": 2.2461775382246176,
"grad_norm": 3.1952197551727295,
"learning_rate": 5.510113348739958e-06,
"loss": 0.4354,
"step": 102100
},
{
"epoch": 2.2483775162248376,
"grad_norm": 2.9026472568511963,
"learning_rate": 5.505711455926049e-06,
"loss": 0.4485,
"step": 102200
},
{
"epoch": 2.2505774942250576,
"grad_norm": 3.1712558269500732,
"learning_rate": 5.501309563112138e-06,
"loss": 0.4468,
"step": 102300
},
{
"epoch": 2.2527774722252776,
"grad_norm": 2.9717068672180176,
"learning_rate": 5.496907670298228e-06,
"loss": 0.4386,
"step": 102400
},
{
"epoch": 2.2549774502254976,
"grad_norm": 2.8104095458984375,
"learning_rate": 5.492505777484319e-06,
"loss": 0.4452,
"step": 102500
},
{
"epoch": 2.2571774282257175,
"grad_norm": 3.142512798309326,
"learning_rate": 5.488103884670408e-06,
"loss": 0.4487,
"step": 102600
},
{
"epoch": 2.259377406225938,
"grad_norm": 3.723659038543701,
"learning_rate": 5.483701991856498e-06,
"loss": 0.449,
"step": 102700
},
{
"epoch": 2.2615773842261575,
"grad_norm": 3.365520477294922,
"learning_rate": 5.4793000990425895e-06,
"loss": 0.4409,
"step": 102800
},
{
"epoch": 2.263777362226378,
"grad_norm": 2.1158196926116943,
"learning_rate": 5.474898206228679e-06,
"loss": 0.4526,
"step": 102900
},
{
"epoch": 2.265977340226598,
"grad_norm": 2.77187442779541,
"learning_rate": 5.470496313414769e-06,
"loss": 0.4597,
"step": 103000
},
{
"epoch": 2.268177318226818,
"grad_norm": 3.1668035984039307,
"learning_rate": 5.4660944206008595e-06,
"loss": 0.4515,
"step": 103100
},
{
"epoch": 2.270377296227038,
"grad_norm": 3.3199713230133057,
"learning_rate": 5.461692527786949e-06,
"loss": 0.4421,
"step": 103200
},
{
"epoch": 2.272577274227258,
"grad_norm": 3.0452702045440674,
"learning_rate": 5.457290634973039e-06,
"loss": 0.451,
"step": 103300
},
{
"epoch": 2.274777252227478,
"grad_norm": 2.889191150665283,
"learning_rate": 5.4528887421591295e-06,
"loss": 0.4433,
"step": 103400
},
{
"epoch": 2.276977230227698,
"grad_norm": 3.1005496978759766,
"learning_rate": 5.448486849345219e-06,
"loss": 0.459,
"step": 103500
},
{
"epoch": 2.279177208227918,
"grad_norm": 3.024289131164551,
"learning_rate": 5.444084956531309e-06,
"loss": 0.4369,
"step": 103600
},
{
"epoch": 2.281377186228138,
"grad_norm": 2.3427116870880127,
"learning_rate": 5.4396830637173995e-06,
"loss": 0.4461,
"step": 103700
},
{
"epoch": 2.2835771642283578,
"grad_norm": 3.6452486515045166,
"learning_rate": 5.435281170903489e-06,
"loss": 0.4626,
"step": 103800
},
{
"epoch": 2.2857771422285778,
"grad_norm": 3.5883066654205322,
"learning_rate": 5.430879278089579e-06,
"loss": 0.4439,
"step": 103900
},
{
"epoch": 2.2879771202287977,
"grad_norm": 3.1896305084228516,
"learning_rate": 5.4264773852756695e-06,
"loss": 0.4342,
"step": 104000
},
{
"epoch": 2.2901770982290177,
"grad_norm": 3.0149104595184326,
"learning_rate": 5.422075492461759e-06,
"loss": 0.4503,
"step": 104100
},
{
"epoch": 2.2923770762292377,
"grad_norm": 3.1118035316467285,
"learning_rate": 5.417673599647849e-06,
"loss": 0.4402,
"step": 104200
},
{
"epoch": 2.2945770542294577,
"grad_norm": 3.0000152587890625,
"learning_rate": 5.4132717068339395e-06,
"loss": 0.4321,
"step": 104300
},
{
"epoch": 2.2967770322296777,
"grad_norm": 3.1988613605499268,
"learning_rate": 5.408869814020029e-06,
"loss": 0.4458,
"step": 104400
},
{
"epoch": 2.2989770102298976,
"grad_norm": 2.5336127281188965,
"learning_rate": 5.404467921206119e-06,
"loss": 0.4412,
"step": 104500
},
{
"epoch": 2.3011769882301176,
"grad_norm": 2.478907823562622,
"learning_rate": 5.400066028392209e-06,
"loss": 0.4391,
"step": 104600
},
{
"epoch": 2.3033769662303376,
"grad_norm": 3.0680346488952637,
"learning_rate": 5.395664135578299e-06,
"loss": 0.4466,
"step": 104700
},
{
"epoch": 2.3055769442305576,
"grad_norm": 2.976754665374756,
"learning_rate": 5.391262242764389e-06,
"loss": 0.4534,
"step": 104800
},
{
"epoch": 2.3077769222307776,
"grad_norm": 2.921550989151001,
"learning_rate": 5.386860349950479e-06,
"loss": 0.4461,
"step": 104900
},
{
"epoch": 2.3099769002309976,
"grad_norm": 2.6085400581359863,
"learning_rate": 5.382458457136569e-06,
"loss": 0.439,
"step": 105000
},
{
"epoch": 2.3121768782312175,
"grad_norm": 3.231365203857422,
"learning_rate": 5.378056564322659e-06,
"loss": 0.4511,
"step": 105100
},
{
"epoch": 2.3143768562314375,
"grad_norm": 3.2471604347229004,
"learning_rate": 5.373654671508749e-06,
"loss": 0.434,
"step": 105200
},
{
"epoch": 2.3165768342316575,
"grad_norm": 3.265526056289673,
"learning_rate": 5.369252778694839e-06,
"loss": 0.4414,
"step": 105300
},
{
"epoch": 2.318776812231878,
"grad_norm": 2.632627010345459,
"learning_rate": 5.364850885880929e-06,
"loss": 0.4469,
"step": 105400
},
{
"epoch": 2.3209767902320975,
"grad_norm": 3.3575692176818848,
"learning_rate": 5.360448993067019e-06,
"loss": 0.4517,
"step": 105500
},
{
"epoch": 2.323176768232318,
"grad_norm": 2.751236915588379,
"learning_rate": 5.356047100253109e-06,
"loss": 0.4321,
"step": 105600
},
{
"epoch": 2.3253767462325374,
"grad_norm": 3.4512314796447754,
"learning_rate": 5.351645207439199e-06,
"loss": 0.4513,
"step": 105700
},
{
"epoch": 2.327576724232758,
"grad_norm": 2.5892844200134277,
"learning_rate": 5.347243314625289e-06,
"loss": 0.448,
"step": 105800
},
{
"epoch": 2.329776702232978,
"grad_norm": 3.1810543537139893,
"learning_rate": 5.342841421811379e-06,
"loss": 0.4489,
"step": 105900
},
{
"epoch": 2.331976680233198,
"grad_norm": 3.527425765991211,
"learning_rate": 5.338439528997469e-06,
"loss": 0.4362,
"step": 106000
},
{
"epoch": 2.334176658233418,
"grad_norm": 2.97705340385437,
"learning_rate": 5.334037636183559e-06,
"loss": 0.4424,
"step": 106100
},
{
"epoch": 2.336376636233638,
"grad_norm": 2.3554928302764893,
"learning_rate": 5.329635743369649e-06,
"loss": 0.4354,
"step": 106200
},
{
"epoch": 2.3385766142338578,
"grad_norm": 3.598785161972046,
"learning_rate": 5.32523385055574e-06,
"loss": 0.4429,
"step": 106300
},
{
"epoch": 2.3407765922340777,
"grad_norm": 3.603203058242798,
"learning_rate": 5.32083195774183e-06,
"loss": 0.4508,
"step": 106400
},
{
"epoch": 2.3429765702342977,
"grad_norm": 2.5761771202087402,
"learning_rate": 5.31643006492792e-06,
"loss": 0.448,
"step": 106500
},
{
"epoch": 2.3451765482345177,
"grad_norm": 3.6221818923950195,
"learning_rate": 5.31202817211401e-06,
"loss": 0.4305,
"step": 106600
},
{
"epoch": 2.3473765262347377,
"grad_norm": 3.062361717224121,
"learning_rate": 5.3076262793000995e-06,
"loss": 0.45,
"step": 106700
},
{
"epoch": 2.3495765042349577,
"grad_norm": 2.6559677124023438,
"learning_rate": 5.30322438648619e-06,
"loss": 0.4569,
"step": 106800
},
{
"epoch": 2.3517764822351777,
"grad_norm": 2.8080978393554688,
"learning_rate": 5.29882249367228e-06,
"loss": 0.4376,
"step": 106900
},
{
"epoch": 2.3539764602353976,
"grad_norm": 2.880061388015747,
"learning_rate": 5.2944206008583695e-06,
"loss": 0.4435,
"step": 107000
},
{
"epoch": 2.3561764382356176,
"grad_norm": 2.902592420578003,
"learning_rate": 5.29001870804446e-06,
"loss": 0.4446,
"step": 107100
},
{
"epoch": 2.3583764162358376,
"grad_norm": 2.560408592224121,
"learning_rate": 5.28561681523055e-06,
"loss": 0.4533,
"step": 107200
},
{
"epoch": 2.3605763942360576,
"grad_norm": 3.5301778316497803,
"learning_rate": 5.2812149224166395e-06,
"loss": 0.4499,
"step": 107300
},
{
"epoch": 2.3627763722362776,
"grad_norm": 3.1170268058776855,
"learning_rate": 5.27681302960273e-06,
"loss": 0.4392,
"step": 107400
},
{
"epoch": 2.3649763502364975,
"grad_norm": 2.9975242614746094,
"learning_rate": 5.27241113678882e-06,
"loss": 0.4443,
"step": 107500
},
{
"epoch": 2.3671763282367175,
"grad_norm": 2.9318737983703613,
"learning_rate": 5.2680092439749095e-06,
"loss": 0.4382,
"step": 107600
},
{
"epoch": 2.3693763062369375,
"grad_norm": 2.7009778022766113,
"learning_rate": 5.263607351161e-06,
"loss": 0.4486,
"step": 107700
},
{
"epoch": 2.3715762842371575,
"grad_norm": 3.265301465988159,
"learning_rate": 5.25920545834709e-06,
"loss": 0.4386,
"step": 107800
},
{
"epoch": 2.3737762622373775,
"grad_norm": 3.5099949836730957,
"learning_rate": 5.2548035655331795e-06,
"loss": 0.4354,
"step": 107900
},
{
"epoch": 2.3759762402375975,
"grad_norm": 2.997199296951294,
"learning_rate": 5.25040167271927e-06,
"loss": 0.4449,
"step": 108000
},
{
"epoch": 2.3781762182378174,
"grad_norm": 3.5661022663116455,
"learning_rate": 5.24599977990536e-06,
"loss": 0.4533,
"step": 108100
},
{
"epoch": 2.3803761962380374,
"grad_norm": 2.6311588287353516,
"learning_rate": 5.2415978870914495e-06,
"loss": 0.4535,
"step": 108200
},
{
"epoch": 2.382576174238258,
"grad_norm": 2.3854992389678955,
"learning_rate": 5.23719599427754e-06,
"loss": 0.441,
"step": 108300
},
{
"epoch": 2.3847761522384774,
"grad_norm": 3.3768720626831055,
"learning_rate": 5.23279410146363e-06,
"loss": 0.4467,
"step": 108400
},
{
"epoch": 2.386976130238698,
"grad_norm": 3.2119550704956055,
"learning_rate": 5.2283922086497195e-06,
"loss": 0.4608,
"step": 108500
},
{
"epoch": 2.389176108238918,
"grad_norm": 3.434720277786255,
"learning_rate": 5.22399031583581e-06,
"loss": 0.4415,
"step": 108600
},
{
"epoch": 2.3913760862391378,
"grad_norm": 2.7094149589538574,
"learning_rate": 5.219588423021899e-06,
"loss": 0.4562,
"step": 108700
},
{
"epoch": 2.3935760642393578,
"grad_norm": 2.9980342388153076,
"learning_rate": 5.2151865302079895e-06,
"loss": 0.4564,
"step": 108800
},
{
"epoch": 2.3957760422395777,
"grad_norm": 3.241988182067871,
"learning_rate": 5.21078463739408e-06,
"loss": 0.4494,
"step": 108900
},
{
"epoch": 2.3979760202397977,
"grad_norm": 3.08487606048584,
"learning_rate": 5.206382744580169e-06,
"loss": 0.439,
"step": 109000
},
{
"epoch": 2.4001759982400177,
"grad_norm": 3.0313308238983154,
"learning_rate": 5.2019808517662595e-06,
"loss": 0.4412,
"step": 109100
},
{
"epoch": 2.4023759762402377,
"grad_norm": 3.303107738494873,
"learning_rate": 5.19757895895235e-06,
"loss": 0.4437,
"step": 109200
},
{
"epoch": 2.4045759542404577,
"grad_norm": 2.7623887062072754,
"learning_rate": 5.193177066138439e-06,
"loss": 0.4426,
"step": 109300
},
{
"epoch": 2.4067759322406777,
"grad_norm": 3.3932597637176514,
"learning_rate": 5.1887751733245295e-06,
"loss": 0.4381,
"step": 109400
},
{
"epoch": 2.4089759102408976,
"grad_norm": 2.618532657623291,
"learning_rate": 5.18437328051062e-06,
"loss": 0.4466,
"step": 109500
},
{
"epoch": 2.4111758882411176,
"grad_norm": 2.4478089809417725,
"learning_rate": 5.179971387696709e-06,
"loss": 0.4494,
"step": 109600
},
{
"epoch": 2.4133758662413376,
"grad_norm": 3.646751642227173,
"learning_rate": 5.1755694948828e-06,
"loss": 0.4684,
"step": 109700
},
{
"epoch": 2.4155758442415576,
"grad_norm": 2.54317569732666,
"learning_rate": 5.171167602068891e-06,
"loss": 0.4467,
"step": 109800
},
{
"epoch": 2.4177758222417776,
"grad_norm": 2.939131021499634,
"learning_rate": 5.166765709254981e-06,
"loss": 0.4438,
"step": 109900
},
{
"epoch": 2.4199758002419975,
"grad_norm": 3.4897677898406982,
"learning_rate": 5.16236381644107e-06,
"loss": 0.4444,
"step": 110000
},
{
"epoch": 2.4221757782422175,
"grad_norm": 3.108614683151245,
"learning_rate": 5.157961923627161e-06,
"loss": 0.4639,
"step": 110100
},
{
"epoch": 2.4243757562424375,
"grad_norm": 3.135338068008423,
"learning_rate": 5.153560030813251e-06,
"loss": 0.4424,
"step": 110200
},
{
"epoch": 2.4265757342426575,
"grad_norm": 2.249314785003662,
"learning_rate": 5.14915813799934e-06,
"loss": 0.4298,
"step": 110300
},
{
"epoch": 2.4287757122428775,
"grad_norm": 2.582010269165039,
"learning_rate": 5.144756245185431e-06,
"loss": 0.4404,
"step": 110400
},
{
"epoch": 2.4309756902430975,
"grad_norm": 3.1904852390289307,
"learning_rate": 5.140354352371521e-06,
"loss": 0.4526,
"step": 110500
},
{
"epoch": 2.4331756682433174,
"grad_norm": 3.054769277572632,
"learning_rate": 5.13595245955761e-06,
"loss": 0.4332,
"step": 110600
},
{
"epoch": 2.4353756462435374,
"grad_norm": 2.8248226642608643,
"learning_rate": 5.131550566743701e-06,
"loss": 0.446,
"step": 110700
},
{
"epoch": 2.4375756242437574,
"grad_norm": 3.6720070838928223,
"learning_rate": 5.12714867392979e-06,
"loss": 0.445,
"step": 110800
},
{
"epoch": 2.4397756022439774,
"grad_norm": 2.920863389968872,
"learning_rate": 5.12274678111588e-06,
"loss": 0.4482,
"step": 110900
},
{
"epoch": 2.441975580244198,
"grad_norm": 2.426818370819092,
"learning_rate": 5.118344888301971e-06,
"loss": 0.452,
"step": 111000
},
{
"epoch": 2.4441755582444173,
"grad_norm": 3.390026330947876,
"learning_rate": 5.11394299548806e-06,
"loss": 0.4413,
"step": 111100
},
{
"epoch": 2.4463755362446378,
"grad_norm": 3.1187210083007812,
"learning_rate": 5.10954110267415e-06,
"loss": 0.4381,
"step": 111200
},
{
"epoch": 2.4485755142448573,
"grad_norm": 3.0595436096191406,
"learning_rate": 5.105139209860241e-06,
"loss": 0.4432,
"step": 111300
},
{
"epoch": 2.4507754922450777,
"grad_norm": 2.9489197731018066,
"learning_rate": 5.10073731704633e-06,
"loss": 0.438,
"step": 111400
},
{
"epoch": 2.4529754702452977,
"grad_norm": 2.4834353923797607,
"learning_rate": 5.09633542423242e-06,
"loss": 0.4452,
"step": 111500
},
{
"epoch": 2.4551754482455177,
"grad_norm": 3.135232925415039,
"learning_rate": 5.091933531418511e-06,
"loss": 0.4493,
"step": 111600
},
{
"epoch": 2.4573754262457377,
"grad_norm": 3.158200263977051,
"learning_rate": 5.0875316386046e-06,
"loss": 0.446,
"step": 111700
},
{
"epoch": 2.4595754042459577,
"grad_norm": 3.199408531188965,
"learning_rate": 5.08312974579069e-06,
"loss": 0.4457,
"step": 111800
},
{
"epoch": 2.4617753822461776,
"grad_norm": 2.692122459411621,
"learning_rate": 5.078727852976781e-06,
"loss": 0.4453,
"step": 111900
},
{
"epoch": 2.4639753602463976,
"grad_norm": 2.708963632583618,
"learning_rate": 5.07432596016287e-06,
"loss": 0.4375,
"step": 112000
},
{
"epoch": 2.4661753382466176,
"grad_norm": 2.9427683353424072,
"learning_rate": 5.06992406734896e-06,
"loss": 0.4419,
"step": 112100
},
{
"epoch": 2.4683753162468376,
"grad_norm": 3.154421329498291,
"learning_rate": 5.065522174535051e-06,
"loss": 0.4539,
"step": 112200
},
{
"epoch": 2.4705752942470576,
"grad_norm": 2.364830255508423,
"learning_rate": 5.06112028172114e-06,
"loss": 0.4535,
"step": 112300
},
{
"epoch": 2.4727752722472776,
"grad_norm": 3.534681797027588,
"learning_rate": 5.05671838890723e-06,
"loss": 0.4457,
"step": 112400
},
{
"epoch": 2.4749752502474975,
"grad_norm": 3.2701926231384277,
"learning_rate": 5.052316496093321e-06,
"loss": 0.447,
"step": 112500
},
{
"epoch": 2.4771752282477175,
"grad_norm": 2.395355701446533,
"learning_rate": 5.04791460327941e-06,
"loss": 0.4399,
"step": 112600
},
{
"epoch": 2.4793752062479375,
"grad_norm": 3.506509780883789,
"learning_rate": 5.0435127104655e-06,
"loss": 0.4527,
"step": 112700
},
{
"epoch": 2.4815751842481575,
"grad_norm": 2.844534397125244,
"learning_rate": 5.03911081765159e-06,
"loss": 0.4436,
"step": 112800
},
{
"epoch": 2.4837751622483775,
"grad_norm": 3.3356661796569824,
"learning_rate": 5.03470892483768e-06,
"loss": 0.4432,
"step": 112900
},
{
"epoch": 2.4859751402485974,
"grad_norm": 2.6262450218200684,
"learning_rate": 5.03030703202377e-06,
"loss": 0.4508,
"step": 113000
},
{
"epoch": 2.4881751182488174,
"grad_norm": 2.852914810180664,
"learning_rate": 5.02590513920986e-06,
"loss": 0.453,
"step": 113100
},
{
"epoch": 2.4903750962490374,
"grad_norm": 3.224490165710449,
"learning_rate": 5.021503246395951e-06,
"loss": 0.4637,
"step": 113200
},
{
"epoch": 2.4925750742492574,
"grad_norm": 2.180025577545166,
"learning_rate": 5.017101353582041e-06,
"loss": 0.4316,
"step": 113300
},
{
"epoch": 2.4947750522494774,
"grad_norm": 2.4598264694213867,
"learning_rate": 5.0126994607681314e-06,
"loss": 0.4381,
"step": 113400
},
{
"epoch": 2.4969750302496974,
"grad_norm": 2.587557315826416,
"learning_rate": 5.008297567954221e-06,
"loss": 0.4469,
"step": 113500
},
{
"epoch": 2.4991750082499173,
"grad_norm": 2.93766450881958,
"learning_rate": 5.003895675140311e-06,
"loss": 0.459,
"step": 113600
},
{
"epoch": 2.5013749862501378,
"grad_norm": 2.926692485809326,
"learning_rate": 4.999493782326401e-06,
"loss": 0.4444,
"step": 113700
},
{
"epoch": 2.5035749642503573,
"grad_norm": 3.22385311126709,
"learning_rate": 4.995091889512491e-06,
"loss": 0.4395,
"step": 113800
},
{
"epoch": 2.5057749422505777,
"grad_norm": 2.241689682006836,
"learning_rate": 4.99068999669858e-06,
"loss": 0.4461,
"step": 113900
},
{
"epoch": 2.5079749202507973,
"grad_norm": 3.1882591247558594,
"learning_rate": 4.986288103884671e-06,
"loss": 0.4482,
"step": 114000
},
{
"epoch": 2.5101748982510177,
"grad_norm": 2.0357823371887207,
"learning_rate": 4.981886211070761e-06,
"loss": 0.4291,
"step": 114100
},
{
"epoch": 2.5123748762512372,
"grad_norm": 3.0307114124298096,
"learning_rate": 4.977484318256851e-06,
"loss": 0.4556,
"step": 114200
},
{
"epoch": 2.5145748542514577,
"grad_norm": 3.274482488632202,
"learning_rate": 4.9730824254429414e-06,
"loss": 0.4561,
"step": 114300
},
{
"epoch": 2.5167748322516776,
"grad_norm": 2.6364364624023438,
"learning_rate": 4.968680532629031e-06,
"loss": 0.4462,
"step": 114400
},
{
"epoch": 2.5189748102518976,
"grad_norm": 3.74102520942688,
"learning_rate": 4.964278639815121e-06,
"loss": 0.4343,
"step": 114500
},
{
"epoch": 2.5211747882521176,
"grad_norm": 2.6400420665740967,
"learning_rate": 4.9598767470012114e-06,
"loss": 0.431,
"step": 114600
},
{
"epoch": 2.5233747662523376,
"grad_norm": 3.313936948776245,
"learning_rate": 4.955474854187301e-06,
"loss": 0.4361,
"step": 114700
},
{
"epoch": 2.5255747442525576,
"grad_norm": 3.1733415126800537,
"learning_rate": 4.951072961373391e-06,
"loss": 0.4346,
"step": 114800
},
{
"epoch": 2.5277747222527776,
"grad_norm": 3.2925596237182617,
"learning_rate": 4.946671068559481e-06,
"loss": 0.4382,
"step": 114900
},
{
"epoch": 2.5299747002529975,
"grad_norm": 3.0472724437713623,
"learning_rate": 4.942269175745571e-06,
"loss": 0.4294,
"step": 115000
},
{
"epoch": 2.5321746782532175,
"grad_norm": 2.684380054473877,
"learning_rate": 4.937867282931661e-06,
"loss": 0.446,
"step": 115100
},
{
"epoch": 2.5343746562534375,
"grad_norm": 2.86007022857666,
"learning_rate": 4.933465390117751e-06,
"loss": 0.4445,
"step": 115200
},
{
"epoch": 2.5365746342536575,
"grad_norm": 3.1201276779174805,
"learning_rate": 4.929063497303841e-06,
"loss": 0.4363,
"step": 115300
},
{
"epoch": 2.5387746122538775,
"grad_norm": 2.558084726333618,
"learning_rate": 4.924661604489931e-06,
"loss": 0.4272,
"step": 115400
},
{
"epoch": 2.5409745902540974,
"grad_norm": 3.3204970359802246,
"learning_rate": 4.920259711676021e-06,
"loss": 0.425,
"step": 115500
},
{
"epoch": 2.5431745682543174,
"grad_norm": 2.824089288711548,
"learning_rate": 4.915857818862111e-06,
"loss": 0.4428,
"step": 115600
},
{
"epoch": 2.5453745462545374,
"grad_norm": 3.430072784423828,
"learning_rate": 4.911455926048201e-06,
"loss": 0.435,
"step": 115700
},
{
"epoch": 2.5475745242547574,
"grad_norm": 3.415888547897339,
"learning_rate": 4.9070540332342914e-06,
"loss": 0.4546,
"step": 115800
},
{
"epoch": 2.5497745022549774,
"grad_norm": 3.0503039360046387,
"learning_rate": 4.902652140420382e-06,
"loss": 0.4359,
"step": 115900
},
{
"epoch": 2.5519744802551974,
"grad_norm": 3.0880868434906006,
"learning_rate": 4.898250247606471e-06,
"loss": 0.445,
"step": 116000
},
{
"epoch": 2.5541744582554173,
"grad_norm": 3.1325924396514893,
"learning_rate": 4.8938483547925614e-06,
"loss": 0.4339,
"step": 116100
},
{
"epoch": 2.5563744362556373,
"grad_norm": 3.6278367042541504,
"learning_rate": 4.889446461978652e-06,
"loss": 0.4573,
"step": 116200
},
{
"epoch": 2.5585744142558573,
"grad_norm": 2.8526251316070557,
"learning_rate": 4.885044569164741e-06,
"loss": 0.4359,
"step": 116300
},
{
"epoch": 2.5607743922560773,
"grad_norm": 2.335749864578247,
"learning_rate": 4.8806426763508314e-06,
"loss": 0.4336,
"step": 116400
},
{
"epoch": 2.5629743702562973,
"grad_norm": 2.912464141845703,
"learning_rate": 4.876240783536922e-06,
"loss": 0.4463,
"step": 116500
},
{
"epoch": 2.5651743482565177,
"grad_norm": 3.1221654415130615,
"learning_rate": 4.871838890723011e-06,
"loss": 0.4394,
"step": 116600
},
{
"epoch": 2.5673743262567372,
"grad_norm": 3.0189766883850098,
"learning_rate": 4.8674369979091014e-06,
"loss": 0.4279,
"step": 116700
},
{
"epoch": 2.5695743042569577,
"grad_norm": 2.7130327224731445,
"learning_rate": 4.863035105095191e-06,
"loss": 0.4397,
"step": 116800
},
{
"epoch": 2.571774282257177,
"grad_norm": 3.0579302310943604,
"learning_rate": 4.858633212281281e-06,
"loss": 0.4509,
"step": 116900
},
{
"epoch": 2.5739742602573976,
"grad_norm": 2.568333625793457,
"learning_rate": 4.8542313194673714e-06,
"loss": 0.4391,
"step": 117000
},
{
"epoch": 2.576174238257617,
"grad_norm": 3.574082612991333,
"learning_rate": 4.849829426653461e-06,
"loss": 0.4354,
"step": 117100
},
{
"epoch": 2.5783742162578376,
"grad_norm": 2.516016721725464,
"learning_rate": 4.845427533839551e-06,
"loss": 0.4417,
"step": 117200
},
{
"epoch": 2.5805741942580576,
"grad_norm": 2.464613199234009,
"learning_rate": 4.8410256410256414e-06,
"loss": 0.4464,
"step": 117300
},
{
"epoch": 2.5827741722582775,
"grad_norm": 2.841379404067993,
"learning_rate": 4.836623748211731e-06,
"loss": 0.45,
"step": 117400
},
{
"epoch": 2.5849741502584975,
"grad_norm": 2.6032309532165527,
"learning_rate": 4.832221855397821e-06,
"loss": 0.4258,
"step": 117500
},
{
"epoch": 2.5871741282587175,
"grad_norm": 2.964139938354492,
"learning_rate": 4.8278199625839114e-06,
"loss": 0.4495,
"step": 117600
},
{
"epoch": 2.5893741062589375,
"grad_norm": 4.020530700683594,
"learning_rate": 4.823418069770002e-06,
"loss": 0.435,
"step": 117700
},
{
"epoch": 2.5915740842591575,
"grad_norm": 2.493910312652588,
"learning_rate": 4.819016176956092e-06,
"loss": 0.4371,
"step": 117800
},
{
"epoch": 2.5937740622593775,
"grad_norm": 3.542248249053955,
"learning_rate": 4.8146142841421814e-06,
"loss": 0.4489,
"step": 117900
},
{
"epoch": 2.5959740402595974,
"grad_norm": 3.5736639499664307,
"learning_rate": 4.810212391328272e-06,
"loss": 0.43,
"step": 118000
},
{
"epoch": 2.5981740182598174,
"grad_norm": 2.6904780864715576,
"learning_rate": 4.805810498514362e-06,
"loss": 0.4326,
"step": 118100
},
{
"epoch": 2.6003739962600374,
"grad_norm": 3.0727078914642334,
"learning_rate": 4.8014086057004514e-06,
"loss": 0.4338,
"step": 118200
},
{
"epoch": 2.6025739742602574,
"grad_norm": 3.355623722076416,
"learning_rate": 4.797006712886542e-06,
"loss": 0.4489,
"step": 118300
},
{
"epoch": 2.6047739522604774,
"grad_norm": 2.657305955886841,
"learning_rate": 4.792604820072632e-06,
"loss": 0.4416,
"step": 118400
},
{
"epoch": 2.6069739302606973,
"grad_norm": 2.6770079135894775,
"learning_rate": 4.7882029272587214e-06,
"loss": 0.4317,
"step": 118500
},
{
"epoch": 2.6091739082609173,
"grad_norm": 3.4358301162719727,
"learning_rate": 4.783801034444812e-06,
"loss": 0.4307,
"step": 118600
},
{
"epoch": 2.6113738862611373,
"grad_norm": 2.719377040863037,
"learning_rate": 4.779399141630901e-06,
"loss": 0.4452,
"step": 118700
},
{
"epoch": 2.6135738642613573,
"grad_norm": 3.2705419063568115,
"learning_rate": 4.7749972488169914e-06,
"loss": 0.4391,
"step": 118800
},
{
"epoch": 2.6157738422615773,
"grad_norm": 2.5769264698028564,
"learning_rate": 4.770595356003082e-06,
"loss": 0.4566,
"step": 118900
},
{
"epoch": 2.6179738202617973,
"grad_norm": 3.0442352294921875,
"learning_rate": 4.766193463189171e-06,
"loss": 0.4394,
"step": 119000
},
{
"epoch": 2.6201737982620172,
"grad_norm": 2.913459539413452,
"learning_rate": 4.7617915703752614e-06,
"loss": 0.4445,
"step": 119100
},
{
"epoch": 2.622373776262237,
"grad_norm": 2.6879146099090576,
"learning_rate": 4.757389677561352e-06,
"loss": 0.4433,
"step": 119200
},
{
"epoch": 2.6245737542624576,
"grad_norm": 3.6051576137542725,
"learning_rate": 4.752987784747442e-06,
"loss": 0.4385,
"step": 119300
},
{
"epoch": 2.626773732262677,
"grad_norm": 2.6867752075195312,
"learning_rate": 4.748585891933532e-06,
"loss": 0.4569,
"step": 119400
},
{
"epoch": 2.6289737102628976,
"grad_norm": 2.795522928237915,
"learning_rate": 4.744183999119622e-06,
"loss": 0.4413,
"step": 119500
},
{
"epoch": 2.631173688263117,
"grad_norm": 3.2469236850738525,
"learning_rate": 4.739782106305712e-06,
"loss": 0.4539,
"step": 119600
},
{
"epoch": 2.6333736662633376,
"grad_norm": 3.3304011821746826,
"learning_rate": 4.735380213491802e-06,
"loss": 0.46,
"step": 119700
},
{
"epoch": 2.635573644263557,
"grad_norm": 3.8114166259765625,
"learning_rate": 4.730978320677892e-06,
"loss": 0.4472,
"step": 119800
},
{
"epoch": 2.6377736222637775,
"grad_norm": 3.1319470405578613,
"learning_rate": 4.726576427863982e-06,
"loss": 0.4381,
"step": 119900
},
{
"epoch": 2.6399736002639975,
"grad_norm": 3.4921023845672607,
"learning_rate": 4.722174535050072e-06,
"loss": 0.4397,
"step": 120000
},
{
"epoch": 2.6421735782642175,
"grad_norm": 2.191277265548706,
"learning_rate": 4.717772642236162e-06,
"loss": 0.4329,
"step": 120100
},
{
"epoch": 2.6443735562644375,
"grad_norm": 3.3526830673217773,
"learning_rate": 4.713370749422252e-06,
"loss": 0.4425,
"step": 120200
},
{
"epoch": 2.6465735342646575,
"grad_norm": 3.1728663444519043,
"learning_rate": 4.708968856608342e-06,
"loss": 0.4304,
"step": 120300
},
{
"epoch": 2.6487735122648774,
"grad_norm": 2.8686399459838867,
"learning_rate": 4.704566963794432e-06,
"loss": 0.4414,
"step": 120400
},
{
"epoch": 2.6509734902650974,
"grad_norm": 3.0160744190216064,
"learning_rate": 4.700165070980522e-06,
"loss": 0.4318,
"step": 120500
},
{
"epoch": 2.6531734682653174,
"grad_norm": 3.1512398719787598,
"learning_rate": 4.695763178166612e-06,
"loss": 0.4469,
"step": 120600
},
{
"epoch": 2.6553734462655374,
"grad_norm": 3.48527193069458,
"learning_rate": 4.691361285352702e-06,
"loss": 0.4239,
"step": 120700
},
{
"epoch": 2.6575734242657574,
"grad_norm": 3.1018311977386475,
"learning_rate": 4.686959392538792e-06,
"loss": 0.4555,
"step": 120800
},
{
"epoch": 2.6597734022659774,
"grad_norm": 2.867196559906006,
"learning_rate": 4.6825574997248814e-06,
"loss": 0.4392,
"step": 120900
},
{
"epoch": 2.6619733802661973,
"grad_norm": 3.6857316493988037,
"learning_rate": 4.6781556069109726e-06,
"loss": 0.4384,
"step": 121000
},
{
"epoch": 2.6641733582664173,
"grad_norm": 3.517019748687744,
"learning_rate": 4.673753714097062e-06,
"loss": 0.4323,
"step": 121100
},
{
"epoch": 2.6663733362666373,
"grad_norm": 2.7562782764434814,
"learning_rate": 4.669351821283152e-06,
"loss": 0.4425,
"step": 121200
},
{
"epoch": 2.6685733142668573,
"grad_norm": 3.2904388904571533,
"learning_rate": 4.6649499284692426e-06,
"loss": 0.4365,
"step": 121300
},
{
"epoch": 2.6707732922670773,
"grad_norm": 2.5950496196746826,
"learning_rate": 4.660548035655332e-06,
"loss": 0.4451,
"step": 121400
},
{
"epoch": 2.6729732702672973,
"grad_norm": 2.883255958557129,
"learning_rate": 4.656146142841422e-06,
"loss": 0.4327,
"step": 121500
},
{
"epoch": 2.6751732482675172,
"grad_norm": 2.8640213012695312,
"learning_rate": 4.6517442500275126e-06,
"loss": 0.4507,
"step": 121600
},
{
"epoch": 2.677373226267737,
"grad_norm": 2.547304630279541,
"learning_rate": 4.647342357213602e-06,
"loss": 0.4391,
"step": 121700
},
{
"epoch": 2.679573204267957,
"grad_norm": 2.599860668182373,
"learning_rate": 4.642940464399692e-06,
"loss": 0.4351,
"step": 121800
},
{
"epoch": 2.681773182268177,
"grad_norm": 2.898108959197998,
"learning_rate": 4.6385385715857826e-06,
"loss": 0.4486,
"step": 121900
},
{
"epoch": 2.683973160268397,
"grad_norm": 3.2468693256378174,
"learning_rate": 4.634136678771872e-06,
"loss": 0.4465,
"step": 122000
},
{
"epoch": 2.686173138268617,
"grad_norm": 2.5715901851654053,
"learning_rate": 4.629734785957962e-06,
"loss": 0.4464,
"step": 122100
},
{
"epoch": 2.6883731162688376,
"grad_norm": 3.2512638568878174,
"learning_rate": 4.6253328931440526e-06,
"loss": 0.436,
"step": 122200
},
{
"epoch": 2.690573094269057,
"grad_norm": 3.1215593814849854,
"learning_rate": 4.620931000330142e-06,
"loss": 0.4333,
"step": 122300
},
{
"epoch": 2.6927730722692775,
"grad_norm": 3.264613151550293,
"learning_rate": 4.616529107516232e-06,
"loss": 0.4288,
"step": 122400
},
{
"epoch": 2.694973050269497,
"grad_norm": 3.0146634578704834,
"learning_rate": 4.6121272147023226e-06,
"loss": 0.4436,
"step": 122500
},
{
"epoch": 2.6971730282697175,
"grad_norm": 3.1037158966064453,
"learning_rate": 4.607725321888412e-06,
"loss": 0.4545,
"step": 122600
},
{
"epoch": 2.699373006269937,
"grad_norm": 3.4465529918670654,
"learning_rate": 4.603323429074502e-06,
"loss": 0.421,
"step": 122700
},
{
"epoch": 2.7015729842701575,
"grad_norm": 3.160284996032715,
"learning_rate": 4.5989215362605926e-06,
"loss": 0.4323,
"step": 122800
},
{
"epoch": 2.7037729622703774,
"grad_norm": 2.4882216453552246,
"learning_rate": 4.594519643446683e-06,
"loss": 0.4477,
"step": 122900
},
{
"epoch": 2.7059729402705974,
"grad_norm": 2.794928550720215,
"learning_rate": 4.590117750632772e-06,
"loss": 0.4365,
"step": 123000
},
{
"epoch": 2.7081729182708174,
"grad_norm": 3.3920443058013916,
"learning_rate": 4.5857158578188626e-06,
"loss": 0.4419,
"step": 123100
},
{
"epoch": 2.7103728962710374,
"grad_norm": 3.186394214630127,
"learning_rate": 4.581313965004953e-06,
"loss": 0.4226,
"step": 123200
},
{
"epoch": 2.7125728742712574,
"grad_norm": 3.5185060501098633,
"learning_rate": 4.576912072191042e-06,
"loss": 0.4487,
"step": 123300
},
{
"epoch": 2.7147728522714774,
"grad_norm": 2.8069283962249756,
"learning_rate": 4.5725101793771326e-06,
"loss": 0.443,
"step": 123400
},
{
"epoch": 2.7169728302716973,
"grad_norm": 4.11994743347168,
"learning_rate": 4.568108286563223e-06,
"loss": 0.4387,
"step": 123500
},
{
"epoch": 2.7191728082719173,
"grad_norm": 3.6650454998016357,
"learning_rate": 4.563706393749312e-06,
"loss": 0.4431,
"step": 123600
},
{
"epoch": 2.7213727862721373,
"grad_norm": 2.7214787006378174,
"learning_rate": 4.5593045009354026e-06,
"loss": 0.4299,
"step": 123700
},
{
"epoch": 2.7235727642723573,
"grad_norm": 3.1517221927642822,
"learning_rate": 4.554902608121493e-06,
"loss": 0.4417,
"step": 123800
},
{
"epoch": 2.7257727422725773,
"grad_norm": 2.9995832443237305,
"learning_rate": 4.550500715307582e-06,
"loss": 0.4357,
"step": 123900
},
{
"epoch": 2.7279727202727972,
"grad_norm": 3.1918044090270996,
"learning_rate": 4.5460988224936726e-06,
"loss": 0.442,
"step": 124000
},
{
"epoch": 2.7301726982730172,
"grad_norm": 3.101876735687256,
"learning_rate": 4.541696929679763e-06,
"loss": 0.4333,
"step": 124100
},
{
"epoch": 2.732372676273237,
"grad_norm": 3.4324588775634766,
"learning_rate": 4.537295036865852e-06,
"loss": 0.431,
"step": 124200
},
{
"epoch": 2.734572654273457,
"grad_norm": 3.6786587238311768,
"learning_rate": 4.5328931440519426e-06,
"loss": 0.4499,
"step": 124300
},
{
"epoch": 2.736772632273677,
"grad_norm": 2.661198139190674,
"learning_rate": 4.528491251238033e-06,
"loss": 0.4446,
"step": 124400
},
{
"epoch": 2.738972610273897,
"grad_norm": 2.958374500274658,
"learning_rate": 4.524089358424123e-06,
"loss": 0.4364,
"step": 124500
},
{
"epoch": 2.741172588274117,
"grad_norm": 3.012861967086792,
"learning_rate": 4.519687465610213e-06,
"loss": 0.432,
"step": 124600
},
{
"epoch": 2.743372566274337,
"grad_norm": 2.911194324493408,
"learning_rate": 4.515285572796303e-06,
"loss": 0.4524,
"step": 124700
},
{
"epoch": 2.745572544274557,
"grad_norm": 2.891263961791992,
"learning_rate": 4.510883679982393e-06,
"loss": 0.435,
"step": 124800
},
{
"epoch": 2.7477725222747775,
"grad_norm": 3.8306422233581543,
"learning_rate": 4.5064817871684826e-06,
"loss": 0.4331,
"step": 124900
},
{
"epoch": 2.749972500274997,
"grad_norm": 3.3201122283935547,
"learning_rate": 4.502079894354573e-06,
"loss": 0.4483,
"step": 125000
},
{
"epoch": 2.7521724782752175,
"grad_norm": 3.1668500900268555,
"learning_rate": 4.497678001540663e-06,
"loss": 0.4525,
"step": 125100
},
{
"epoch": 2.754372456275437,
"grad_norm": 2.9911599159240723,
"learning_rate": 4.4932761087267525e-06,
"loss": 0.426,
"step": 125200
},
{
"epoch": 2.7565724342756575,
"grad_norm": 2.79228138923645,
"learning_rate": 4.488874215912843e-06,
"loss": 0.4494,
"step": 125300
},
{
"epoch": 2.758772412275877,
"grad_norm": 3.6654934883117676,
"learning_rate": 4.484472323098933e-06,
"loss": 0.4246,
"step": 125400
},
{
"epoch": 2.7609723902760974,
"grad_norm": 3.1065101623535156,
"learning_rate": 4.4800704302850225e-06,
"loss": 0.4419,
"step": 125500
},
{
"epoch": 2.7631723682763174,
"grad_norm": 3.4590702056884766,
"learning_rate": 4.475668537471113e-06,
"loss": 0.4429,
"step": 125600
},
{
"epoch": 2.7653723462765374,
"grad_norm": 2.956099033355713,
"learning_rate": 4.471266644657203e-06,
"loss": 0.444,
"step": 125700
},
{
"epoch": 2.7675723242767574,
"grad_norm": 3.131133556365967,
"learning_rate": 4.4668647518432925e-06,
"loss": 0.436,
"step": 125800
},
{
"epoch": 2.7697723022769773,
"grad_norm": 2.866058349609375,
"learning_rate": 4.462462859029383e-06,
"loss": 0.4308,
"step": 125900
},
{
"epoch": 2.7719722802771973,
"grad_norm": 2.9503538608551025,
"learning_rate": 4.458060966215473e-06,
"loss": 0.4339,
"step": 126000
},
{
"epoch": 2.7741722582774173,
"grad_norm": 2.9972116947174072,
"learning_rate": 4.4536590734015625e-06,
"loss": 0.4457,
"step": 126100
},
{
"epoch": 2.7763722362776373,
"grad_norm": 2.7631094455718994,
"learning_rate": 4.449257180587654e-06,
"loss": 0.4387,
"step": 126200
},
{
"epoch": 2.7785722142778573,
"grad_norm": 2.7902297973632812,
"learning_rate": 4.444855287773743e-06,
"loss": 0.44,
"step": 126300
},
{
"epoch": 2.7807721922780773,
"grad_norm": 2.688309907913208,
"learning_rate": 4.440453394959833e-06,
"loss": 0.4432,
"step": 126400
},
{
"epoch": 2.7829721702782972,
"grad_norm": 2.6654300689697266,
"learning_rate": 4.436051502145924e-06,
"loss": 0.4347,
"step": 126500
},
{
"epoch": 2.7851721482785172,
"grad_norm": 3.6261539459228516,
"learning_rate": 4.431649609332013e-06,
"loss": 0.4267,
"step": 126600
},
{
"epoch": 2.787372126278737,
"grad_norm": 3.265857696533203,
"learning_rate": 4.427247716518103e-06,
"loss": 0.4374,
"step": 126700
},
{
"epoch": 2.789572104278957,
"grad_norm": 3.311096668243408,
"learning_rate": 4.422845823704193e-06,
"loss": 0.4432,
"step": 126800
},
{
"epoch": 2.791772082279177,
"grad_norm": 3.3290562629699707,
"learning_rate": 4.418443930890283e-06,
"loss": 0.4326,
"step": 126900
},
{
"epoch": 2.793972060279397,
"grad_norm": 3.593282461166382,
"learning_rate": 4.414042038076373e-06,
"loss": 0.4422,
"step": 127000
},
{
"epoch": 2.796172038279617,
"grad_norm": 2.8509931564331055,
"learning_rate": 4.409640145262463e-06,
"loss": 0.4443,
"step": 127100
},
{
"epoch": 2.798372016279837,
"grad_norm": 2.7307536602020264,
"learning_rate": 4.405238252448553e-06,
"loss": 0.4362,
"step": 127200
},
{
"epoch": 2.800571994280057,
"grad_norm": 3.677067518234253,
"learning_rate": 4.400836359634643e-06,
"loss": 0.4391,
"step": 127300
},
{
"epoch": 2.802771972280277,
"grad_norm": 3.1093156337738037,
"learning_rate": 4.396434466820733e-06,
"loss": 0.4299,
"step": 127400
},
{
"epoch": 2.804971950280497,
"grad_norm": 3.1432149410247803,
"learning_rate": 4.392032574006823e-06,
"loss": 0.4454,
"step": 127500
},
{
"epoch": 2.807171928280717,
"grad_norm": 3.175234079360962,
"learning_rate": 4.387630681192913e-06,
"loss": 0.4361,
"step": 127600
},
{
"epoch": 2.809371906280937,
"grad_norm": 3.7147324085235596,
"learning_rate": 4.383228788379003e-06,
"loss": 0.434,
"step": 127700
},
{
"epoch": 2.8115718842811575,
"grad_norm": 3.4218947887420654,
"learning_rate": 4.378826895565093e-06,
"loss": 0.4392,
"step": 127800
},
{
"epoch": 2.813771862281377,
"grad_norm": 4.2883195877075195,
"learning_rate": 4.374425002751183e-06,
"loss": 0.4254,
"step": 127900
},
{
"epoch": 2.8159718402815974,
"grad_norm": 2.9192450046539307,
"learning_rate": 4.370023109937274e-06,
"loss": 0.4596,
"step": 128000
},
{
"epoch": 2.818171818281817,
"grad_norm": 3.3479373455047607,
"learning_rate": 4.365621217123364e-06,
"loss": 0.4299,
"step": 128100
},
{
"epoch": 2.8203717962820374,
"grad_norm": 3.0389039516448975,
"learning_rate": 4.361219324309453e-06,
"loss": 0.4348,
"step": 128200
},
{
"epoch": 2.822571774282257,
"grad_norm": 3.236820697784424,
"learning_rate": 4.356817431495544e-06,
"loss": 0.4244,
"step": 128300
},
{
"epoch": 2.8247717522824773,
"grad_norm": 3.319667339324951,
"learning_rate": 4.352415538681634e-06,
"loss": 0.4316,
"step": 128400
},
{
"epoch": 2.8269717302826973,
"grad_norm": 3.5438737869262695,
"learning_rate": 4.348013645867723e-06,
"loss": 0.4427,
"step": 128500
},
{
"epoch": 2.8291717082829173,
"grad_norm": 2.995439052581787,
"learning_rate": 4.343611753053814e-06,
"loss": 0.4402,
"step": 128600
},
{
"epoch": 2.8313716862831373,
"grad_norm": 2.894104480743408,
"learning_rate": 4.339209860239904e-06,
"loss": 0.4351,
"step": 128700
},
{
"epoch": 2.8335716642833573,
"grad_norm": 3.5675222873687744,
"learning_rate": 4.334807967425993e-06,
"loss": 0.4359,
"step": 128800
},
{
"epoch": 2.8357716422835773,
"grad_norm": 2.749405860900879,
"learning_rate": 4.330406074612084e-06,
"loss": 0.4353,
"step": 128900
},
{
"epoch": 2.8379716202837972,
"grad_norm": 3.581648826599121,
"learning_rate": 4.326004181798173e-06,
"loss": 0.4465,
"step": 129000
},
{
"epoch": 2.840171598284017,
"grad_norm": 2.5396058559417725,
"learning_rate": 4.321602288984263e-06,
"loss": 0.4505,
"step": 129100
},
{
"epoch": 2.842371576284237,
"grad_norm": 3.2663464546203613,
"learning_rate": 4.317200396170354e-06,
"loss": 0.4315,
"step": 129200
},
{
"epoch": 2.844571554284457,
"grad_norm": 3.687699556350708,
"learning_rate": 4.312798503356443e-06,
"loss": 0.4341,
"step": 129300
},
{
"epoch": 2.846771532284677,
"grad_norm": 3.822061538696289,
"learning_rate": 4.308396610542533e-06,
"loss": 0.4204,
"step": 129400
},
{
"epoch": 2.848971510284897,
"grad_norm": 4.063410758972168,
"learning_rate": 4.303994717728624e-06,
"loss": 0.4281,
"step": 129500
},
{
"epoch": 2.851171488285117,
"grad_norm": 3.304727554321289,
"learning_rate": 4.299592824914713e-06,
"loss": 0.4276,
"step": 129600
},
{
"epoch": 2.853371466285337,
"grad_norm": 3.195687770843506,
"learning_rate": 4.295190932100804e-06,
"loss": 0.4195,
"step": 129700
},
{
"epoch": 2.855571444285557,
"grad_norm": 3.390817403793335,
"learning_rate": 4.290789039286894e-06,
"loss": 0.4442,
"step": 129800
},
{
"epoch": 2.857771422285777,
"grad_norm": 3.095522403717041,
"learning_rate": 4.286387146472984e-06,
"loss": 0.4459,
"step": 129900
},
{
"epoch": 2.859971400285997,
"grad_norm": 2.7765722274780273,
"learning_rate": 4.281985253659074e-06,
"loss": 0.4347,
"step": 130000
},
{
"epoch": 2.862171378286217,
"grad_norm": 3.3501715660095215,
"learning_rate": 4.277583360845164e-06,
"loss": 0.4315,
"step": 130100
},
{
"epoch": 2.864371356286437,
"grad_norm": 2.8992860317230225,
"learning_rate": 4.273181468031254e-06,
"loss": 0.4196,
"step": 130200
},
{
"epoch": 2.866571334286657,
"grad_norm": 3.240837335586548,
"learning_rate": 4.268779575217344e-06,
"loss": 0.4253,
"step": 130300
},
{
"epoch": 2.868771312286877,
"grad_norm": 2.687161445617676,
"learning_rate": 4.264377682403434e-06,
"loss": 0.4297,
"step": 130400
},
{
"epoch": 2.870971290287097,
"grad_norm": 3.1937789916992188,
"learning_rate": 4.259975789589524e-06,
"loss": 0.4367,
"step": 130500
},
{
"epoch": 2.873171268287317,
"grad_norm": 2.9205288887023926,
"learning_rate": 4.255573896775614e-06,
"loss": 0.434,
"step": 130600
},
{
"epoch": 2.8753712462875374,
"grad_norm": 2.830369710922241,
"learning_rate": 4.251172003961704e-06,
"loss": 0.4487,
"step": 130700
},
{
"epoch": 2.877571224287757,
"grad_norm": 3.458214044570923,
"learning_rate": 4.246770111147794e-06,
"loss": 0.4326,
"step": 130800
},
{
"epoch": 2.8797712022879773,
"grad_norm": 3.2541399002075195,
"learning_rate": 4.242368218333883e-06,
"loss": 0.4541,
"step": 130900
},
{
"epoch": 2.881971180288197,
"grad_norm": 3.3345773220062256,
"learning_rate": 4.237966325519974e-06,
"loss": 0.434,
"step": 131000
},
{
"epoch": 2.8841711582884173,
"grad_norm": 3.172386646270752,
"learning_rate": 4.233564432706064e-06,
"loss": 0.4399,
"step": 131100
},
{
"epoch": 2.886371136288637,
"grad_norm": 2.489182472229004,
"learning_rate": 4.229162539892153e-06,
"loss": 0.4324,
"step": 131200
},
{
"epoch": 2.8885711142888573,
"grad_norm": 3.0109496116638184,
"learning_rate": 4.224760647078244e-06,
"loss": 0.4342,
"step": 131300
},
{
"epoch": 2.8907710922890772,
"grad_norm": 3.0989527702331543,
"learning_rate": 4.220358754264334e-06,
"loss": 0.4317,
"step": 131400
},
{
"epoch": 2.8929710702892972,
"grad_norm": 4.138854026794434,
"learning_rate": 4.215956861450424e-06,
"loss": 0.4243,
"step": 131500
},
{
"epoch": 2.895171048289517,
"grad_norm": 2.925975799560547,
"learning_rate": 4.2115549686365145e-06,
"loss": 0.4307,
"step": 131600
},
{
"epoch": 2.897371026289737,
"grad_norm": 2.7254014015197754,
"learning_rate": 4.207153075822604e-06,
"loss": 0.4311,
"step": 131700
},
{
"epoch": 2.899571004289957,
"grad_norm": 3.5956203937530518,
"learning_rate": 4.202751183008694e-06,
"loss": 0.43,
"step": 131800
},
{
"epoch": 2.901770982290177,
"grad_norm": 3.406620502471924,
"learning_rate": 4.1983492901947845e-06,
"loss": 0.4321,
"step": 131900
},
{
"epoch": 2.903970960290397,
"grad_norm": 3.0268537998199463,
"learning_rate": 4.193947397380874e-06,
"loss": 0.4328,
"step": 132000
},
{
"epoch": 2.906170938290617,
"grad_norm": 3.0812931060791016,
"learning_rate": 4.189545504566964e-06,
"loss": 0.4443,
"step": 132100
},
{
"epoch": 2.908370916290837,
"grad_norm": 2.5374112129211426,
"learning_rate": 4.1851436117530545e-06,
"loss": 0.4324,
"step": 132200
},
{
"epoch": 2.910570894291057,
"grad_norm": 2.906034231185913,
"learning_rate": 4.180741718939144e-06,
"loss": 0.4358,
"step": 132300
},
{
"epoch": 2.912770872291277,
"grad_norm": 3.593029499053955,
"learning_rate": 4.176339826125234e-06,
"loss": 0.4517,
"step": 132400
},
{
"epoch": 2.914970850291497,
"grad_norm": 2.914520025253296,
"learning_rate": 4.1719379333113245e-06,
"loss": 0.4485,
"step": 132500
},
{
"epoch": 2.917170828291717,
"grad_norm": 2.874202013015747,
"learning_rate": 4.167536040497414e-06,
"loss": 0.4506,
"step": 132600
},
{
"epoch": 2.919370806291937,
"grad_norm": 3.172924041748047,
"learning_rate": 4.163134147683504e-06,
"loss": 0.4416,
"step": 132700
},
{
"epoch": 2.921570784292157,
"grad_norm": 4.034905433654785,
"learning_rate": 4.1587322548695945e-06,
"loss": 0.4297,
"step": 132800
},
{
"epoch": 2.923770762292377,
"grad_norm": 2.940948963165283,
"learning_rate": 4.154330362055684e-06,
"loss": 0.4277,
"step": 132900
},
{
"epoch": 2.925970740292597,
"grad_norm": 4.134010314941406,
"learning_rate": 4.149928469241774e-06,
"loss": 0.4206,
"step": 133000
},
{
"epoch": 2.928170718292817,
"grad_norm": 3.641511917114258,
"learning_rate": 4.145526576427864e-06,
"loss": 0.4433,
"step": 133100
},
{
"epoch": 2.930370696293037,
"grad_norm": 3.1284308433532715,
"learning_rate": 4.141124683613955e-06,
"loss": 0.42,
"step": 133200
},
{
"epoch": 2.932570674293257,
"grad_norm": 3.499300956726074,
"learning_rate": 4.136722790800044e-06,
"loss": 0.4429,
"step": 133300
},
{
"epoch": 2.9347706522934773,
"grad_norm": 3.7782890796661377,
"learning_rate": 4.1323208979861345e-06,
"loss": 0.4207,
"step": 133400
},
{
"epoch": 2.936970630293697,
"grad_norm": 2.8001630306243896,
"learning_rate": 4.127919005172225e-06,
"loss": 0.423,
"step": 133500
},
{
"epoch": 2.9391706082939173,
"grad_norm": 3.0386412143707275,
"learning_rate": 4.123517112358314e-06,
"loss": 0.4353,
"step": 133600
},
{
"epoch": 2.941370586294137,
"grad_norm": 3.480564594268799,
"learning_rate": 4.1191152195444045e-06,
"loss": 0.4373,
"step": 133700
},
{
"epoch": 2.9435705642943573,
"grad_norm": 3.148545503616333,
"learning_rate": 4.114713326730495e-06,
"loss": 0.4359,
"step": 133800
},
{
"epoch": 2.945770542294577,
"grad_norm": 2.8668603897094727,
"learning_rate": 4.110311433916584e-06,
"loss": 0.4435,
"step": 133900
},
{
"epoch": 2.9479705202947972,
"grad_norm": 3.410372495651245,
"learning_rate": 4.1059095411026745e-06,
"loss": 0.4515,
"step": 134000
},
{
"epoch": 2.950170498295017,
"grad_norm": 3.0960798263549805,
"learning_rate": 4.101507648288765e-06,
"loss": 0.4359,
"step": 134100
},
{
"epoch": 2.952370476295237,
"grad_norm": 2.3949267864227295,
"learning_rate": 4.097105755474854e-06,
"loss": 0.4283,
"step": 134200
},
{
"epoch": 2.954570454295457,
"grad_norm": 3.325115442276001,
"learning_rate": 4.0927038626609445e-06,
"loss": 0.4281,
"step": 134300
},
{
"epoch": 2.956770432295677,
"grad_norm": 3.046936511993408,
"learning_rate": 4.088301969847035e-06,
"loss": 0.4431,
"step": 134400
},
{
"epoch": 2.958970410295897,
"grad_norm": 3.0470268726348877,
"learning_rate": 4.083900077033124e-06,
"loss": 0.4235,
"step": 134500
},
{
"epoch": 2.961170388296117,
"grad_norm": 2.8730931282043457,
"learning_rate": 4.0794981842192145e-06,
"loss": 0.4248,
"step": 134600
},
{
"epoch": 2.963370366296337,
"grad_norm": 2.930630922317505,
"learning_rate": 4.075096291405305e-06,
"loss": 0.4398,
"step": 134700
},
{
"epoch": 2.965570344296557,
"grad_norm": 3.920790672302246,
"learning_rate": 4.070694398591394e-06,
"loss": 0.4423,
"step": 134800
},
{
"epoch": 2.967770322296777,
"grad_norm": 3.2532870769500732,
"learning_rate": 4.0662925057774845e-06,
"loss": 0.424,
"step": 134900
},
{
"epoch": 2.969970300296997,
"grad_norm": 3.4274468421936035,
"learning_rate": 4.061890612963575e-06,
"loss": 0.4408,
"step": 135000
},
{
"epoch": 2.972170278297217,
"grad_norm": 3.6045656204223633,
"learning_rate": 4.057488720149665e-06,
"loss": 0.4439,
"step": 135100
},
{
"epoch": 2.974370256297437,
"grad_norm": 3.063582181930542,
"learning_rate": 4.0530868273357545e-06,
"loss": 0.4305,
"step": 135200
},
{
"epoch": 2.976570234297657,
"grad_norm": 3.4174232482910156,
"learning_rate": 4.048684934521845e-06,
"loss": 0.4334,
"step": 135300
},
{
"epoch": 2.978770212297877,
"grad_norm": 2.2402751445770264,
"learning_rate": 4.044283041707935e-06,
"loss": 0.4376,
"step": 135400
},
{
"epoch": 2.980970190298097,
"grad_norm": 3.14042067527771,
"learning_rate": 4.0398811488940245e-06,
"loss": 0.443,
"step": 135500
},
{
"epoch": 2.983170168298317,
"grad_norm": 4.143354892730713,
"learning_rate": 4.035479256080115e-06,
"loss": 0.4336,
"step": 135600
},
{
"epoch": 2.985370146298537,
"grad_norm": 3.4250524044036865,
"learning_rate": 4.031077363266205e-06,
"loss": 0.4286,
"step": 135700
},
{
"epoch": 2.987570124298757,
"grad_norm": 3.041456937789917,
"learning_rate": 4.0266754704522945e-06,
"loss": 0.4296,
"step": 135800
},
{
"epoch": 2.989770102298977,
"grad_norm": 2.1302220821380615,
"learning_rate": 4.022273577638385e-06,
"loss": 0.4336,
"step": 135900
},
{
"epoch": 2.991970080299197,
"grad_norm": 3.928239107131958,
"learning_rate": 4.017871684824475e-06,
"loss": 0.4352,
"step": 136000
},
{
"epoch": 2.994170058299417,
"grad_norm": 2.7161359786987305,
"learning_rate": 4.0134697920105645e-06,
"loss": 0.4409,
"step": 136100
},
{
"epoch": 2.996370036299637,
"grad_norm": 2.8443000316619873,
"learning_rate": 4.009067899196655e-06,
"loss": 0.4227,
"step": 136200
},
{
"epoch": 2.9985700142998573,
"grad_norm": 2.336637020111084,
"learning_rate": 4.004666006382745e-06,
"loss": 0.4296,
"step": 136300
},
{
"epoch": 3.000769992300077,
"grad_norm": 2.747061252593994,
"learning_rate": 4.0002641135688345e-06,
"loss": 0.4243,
"step": 136400
},
{
"epoch": 3.0029699703002968,
"grad_norm": 2.6186234951019287,
"learning_rate": 3.995862220754925e-06,
"loss": 0.3946,
"step": 136500
},
{
"epoch": 3.005169948300517,
"grad_norm": 3.578420400619507,
"learning_rate": 3.991460327941015e-06,
"loss": 0.3841,
"step": 136600
},
{
"epoch": 3.007369926300737,
"grad_norm": 3.9675164222717285,
"learning_rate": 3.987058435127105e-06,
"loss": 0.4017,
"step": 136700
},
{
"epoch": 3.009569904300957,
"grad_norm": 3.1522490978240967,
"learning_rate": 3.982656542313195e-06,
"loss": 0.3878,
"step": 136800
},
{
"epoch": 3.011769882301177,
"grad_norm": 3.3388068675994873,
"learning_rate": 3.978254649499285e-06,
"loss": 0.4005,
"step": 136900
},
{
"epoch": 3.013969860301397,
"grad_norm": 3.6714820861816406,
"learning_rate": 3.973852756685375e-06,
"loss": 0.406,
"step": 137000
},
{
"epoch": 3.016169838301617,
"grad_norm": 2.9617388248443604,
"learning_rate": 3.969450863871465e-06,
"loss": 0.3915,
"step": 137100
},
{
"epoch": 3.018369816301837,
"grad_norm": 3.648895263671875,
"learning_rate": 3.965048971057555e-06,
"loss": 0.3919,
"step": 137200
},
{
"epoch": 3.020569794302057,
"grad_norm": 3.127763509750366,
"learning_rate": 3.960647078243645e-06,
"loss": 0.3969,
"step": 137300
},
{
"epoch": 3.022769772302277,
"grad_norm": 4.054533004760742,
"learning_rate": 3.956245185429735e-06,
"loss": 0.4023,
"step": 137400
},
{
"epoch": 3.024969750302497,
"grad_norm": 3.8178627490997314,
"learning_rate": 3.951843292615825e-06,
"loss": 0.3848,
"step": 137500
},
{
"epoch": 3.027169728302717,
"grad_norm": 3.450464963912964,
"learning_rate": 3.947441399801915e-06,
"loss": 0.3983,
"step": 137600
},
{
"epoch": 3.029369706302937,
"grad_norm": 3.210991144180298,
"learning_rate": 3.943039506988005e-06,
"loss": 0.3993,
"step": 137700
},
{
"epoch": 3.031569684303157,
"grad_norm": 3.706838369369507,
"learning_rate": 3.938637614174095e-06,
"loss": 0.3916,
"step": 137800
},
{
"epoch": 3.033769662303377,
"grad_norm": 3.306352138519287,
"learning_rate": 3.934235721360185e-06,
"loss": 0.404,
"step": 137900
},
{
"epoch": 3.035969640303597,
"grad_norm": 4.016099452972412,
"learning_rate": 3.929833828546275e-06,
"loss": 0.4058,
"step": 138000
},
{
"epoch": 3.038169618303817,
"grad_norm": 3.057190418243408,
"learning_rate": 3.925431935732365e-06,
"loss": 0.4105,
"step": 138100
},
{
"epoch": 3.040369596304037,
"grad_norm": 3.2745933532714844,
"learning_rate": 3.921030042918455e-06,
"loss": 0.3888,
"step": 138200
},
{
"epoch": 3.042569574304257,
"grad_norm": 3.0036211013793945,
"learning_rate": 3.916628150104545e-06,
"loss": 0.4039,
"step": 138300
},
{
"epoch": 3.044769552304477,
"grad_norm": 3.3982667922973633,
"learning_rate": 3.912226257290636e-06,
"loss": 0.3955,
"step": 138400
},
{
"epoch": 3.046969530304697,
"grad_norm": 2.8318960666656494,
"learning_rate": 3.907824364476725e-06,
"loss": 0.406,
"step": 138500
},
{
"epoch": 3.049169508304917,
"grad_norm": 2.669373035430908,
"learning_rate": 3.903422471662816e-06,
"loss": 0.4009,
"step": 138600
},
{
"epoch": 3.051369486305137,
"grad_norm": 3.476454257965088,
"learning_rate": 3.899020578848906e-06,
"loss": 0.3958,
"step": 138700
},
{
"epoch": 3.053569464305357,
"grad_norm": 3.4908926486968994,
"learning_rate": 3.894618686034995e-06,
"loss": 0.4008,
"step": 138800
},
{
"epoch": 3.055769442305577,
"grad_norm": 3.15459942817688,
"learning_rate": 3.890216793221086e-06,
"loss": 0.3928,
"step": 138900
},
{
"epoch": 3.0579694203057968,
"grad_norm": 3.2345471382141113,
"learning_rate": 3.885814900407175e-06,
"loss": 0.4051,
"step": 139000
},
{
"epoch": 3.0601693983060168,
"grad_norm": 3.2796826362609863,
"learning_rate": 3.881413007593265e-06,
"loss": 0.3985,
"step": 139100
},
{
"epoch": 3.0623693763062367,
"grad_norm": 3.1456501483917236,
"learning_rate": 3.877011114779356e-06,
"loss": 0.4006,
"step": 139200
},
{
"epoch": 3.0645693543064567,
"grad_norm": 3.5905213356018066,
"learning_rate": 3.872609221965445e-06,
"loss": 0.4005,
"step": 139300
},
{
"epoch": 3.066769332306677,
"grad_norm": 3.593623399734497,
"learning_rate": 3.868207329151535e-06,
"loss": 0.3984,
"step": 139400
},
{
"epoch": 3.068969310306897,
"grad_norm": 3.059357166290283,
"learning_rate": 3.863805436337626e-06,
"loss": 0.4105,
"step": 139500
},
{
"epoch": 3.071169288307117,
"grad_norm": 3.4862234592437744,
"learning_rate": 3.859403543523715e-06,
"loss": 0.3943,
"step": 139600
},
{
"epoch": 3.073369266307337,
"grad_norm": 3.381134033203125,
"learning_rate": 3.855001650709805e-06,
"loss": 0.3865,
"step": 139700
},
{
"epoch": 3.075569244307557,
"grad_norm": 3.13862681388855,
"learning_rate": 3.850599757895896e-06,
"loss": 0.3895,
"step": 139800
},
{
"epoch": 3.077769222307777,
"grad_norm": 3.6578209400177,
"learning_rate": 3.846197865081985e-06,
"loss": 0.3972,
"step": 139900
},
{
"epoch": 3.079969200307997,
"grad_norm": 3.353710174560547,
"learning_rate": 3.841795972268075e-06,
"loss": 0.3935,
"step": 140000
},
{
"epoch": 3.082169178308217,
"grad_norm": 3.3863718509674072,
"learning_rate": 3.837394079454166e-06,
"loss": 0.401,
"step": 140100
},
{
"epoch": 3.084369156308437,
"grad_norm": 3.574791669845581,
"learning_rate": 3.832992186640256e-06,
"loss": 0.3932,
"step": 140200
},
{
"epoch": 3.086569134308657,
"grad_norm": 3.1452407836914062,
"learning_rate": 3.828590293826346e-06,
"loss": 0.3982,
"step": 140300
},
{
"epoch": 3.088769112308877,
"grad_norm": 3.7013602256774902,
"learning_rate": 3.824188401012436e-06,
"loss": 0.3871,
"step": 140400
},
{
"epoch": 3.090969090309097,
"grad_norm": 3.909804105758667,
"learning_rate": 3.819786508198526e-06,
"loss": 0.3843,
"step": 140500
},
{
"epoch": 3.093169068309317,
"grad_norm": 3.576997995376587,
"learning_rate": 3.815384615384616e-06,
"loss": 0.3895,
"step": 140600
},
{
"epoch": 3.095369046309537,
"grad_norm": 3.9983808994293213,
"learning_rate": 3.8109827225707056e-06,
"loss": 0.3864,
"step": 140700
},
{
"epoch": 3.097569024309757,
"grad_norm": 3.6163980960845947,
"learning_rate": 3.806580829756796e-06,
"loss": 0.3903,
"step": 140800
},
{
"epoch": 3.099769002309977,
"grad_norm": 3.2333366870880127,
"learning_rate": 3.8021789369428858e-06,
"loss": 0.3939,
"step": 140900
},
{
"epoch": 3.101968980310197,
"grad_norm": 3.2053537368774414,
"learning_rate": 3.7977770441289756e-06,
"loss": 0.3983,
"step": 141000
},
{
"epoch": 3.104168958310417,
"grad_norm": 3.423635721206665,
"learning_rate": 3.793375151315066e-06,
"loss": 0.3937,
"step": 141100
},
{
"epoch": 3.106368936310637,
"grad_norm": 3.9658424854278564,
"learning_rate": 3.7889732585011558e-06,
"loss": 0.3952,
"step": 141200
},
{
"epoch": 3.108568914310857,
"grad_norm": 3.184368848800659,
"learning_rate": 3.7845713656872456e-06,
"loss": 0.3854,
"step": 141300
},
{
"epoch": 3.110768892311077,
"grad_norm": 3.8191583156585693,
"learning_rate": 3.7801694728733355e-06,
"loss": 0.3965,
"step": 141400
},
{
"epoch": 3.1129688703112968,
"grad_norm": 3.2895469665527344,
"learning_rate": 3.7757675800594258e-06,
"loss": 0.4101,
"step": 141500
},
{
"epoch": 3.1151688483115167,
"grad_norm": 3.220507860183716,
"learning_rate": 3.7713656872455156e-06,
"loss": 0.3977,
"step": 141600
},
{
"epoch": 3.1173688263117367,
"grad_norm": 3.0123960971832275,
"learning_rate": 3.7669637944316055e-06,
"loss": 0.4042,
"step": 141700
},
{
"epoch": 3.1195688043119567,
"grad_norm": 3.4893038272857666,
"learning_rate": 3.762561901617696e-06,
"loss": 0.4052,
"step": 141800
},
{
"epoch": 3.1217687823121767,
"grad_norm": 3.987698793411255,
"learning_rate": 3.758160008803786e-06,
"loss": 0.3979,
"step": 141900
},
{
"epoch": 3.1239687603123967,
"grad_norm": 3.5184412002563477,
"learning_rate": 3.7537581159898763e-06,
"loss": 0.4114,
"step": 142000
},
{
"epoch": 3.126168738312617,
"grad_norm": 4.024544715881348,
"learning_rate": 3.749356223175966e-06,
"loss": 0.3996,
"step": 142100
},
{
"epoch": 3.1283687163128366,
"grad_norm": 3.358962059020996,
"learning_rate": 3.744954330362056e-06,
"loss": 0.3981,
"step": 142200
},
{
"epoch": 3.130568694313057,
"grad_norm": 2.8024742603302,
"learning_rate": 3.7405524375481463e-06,
"loss": 0.402,
"step": 142300
},
{
"epoch": 3.132768672313277,
"grad_norm": 3.1832938194274902,
"learning_rate": 3.736150544734236e-06,
"loss": 0.3951,
"step": 142400
},
{
"epoch": 3.134968650313497,
"grad_norm": 3.7349486351013184,
"learning_rate": 3.731748651920326e-06,
"loss": 0.3911,
"step": 142500
},
{
"epoch": 3.137168628313717,
"grad_norm": 3.722146987915039,
"learning_rate": 3.727346759106416e-06,
"loss": 0.3991,
"step": 142600
},
{
"epoch": 3.139368606313937,
"grad_norm": 3.13198184967041,
"learning_rate": 3.722944866292506e-06,
"loss": 0.3937,
"step": 142700
},
{
"epoch": 3.141568584314157,
"grad_norm": 3.511359453201294,
"learning_rate": 3.718542973478596e-06,
"loss": 0.3989,
"step": 142800
},
{
"epoch": 3.143768562314377,
"grad_norm": 4.363528251647949,
"learning_rate": 3.714141080664686e-06,
"loss": 0.4041,
"step": 142900
},
{
"epoch": 3.145968540314597,
"grad_norm": 3.696638584136963,
"learning_rate": 3.709739187850776e-06,
"loss": 0.3971,
"step": 143000
},
{
"epoch": 3.148168518314817,
"grad_norm": 3.170654058456421,
"learning_rate": 3.705337295036866e-06,
"loss": 0.4002,
"step": 143100
},
{
"epoch": 3.150368496315037,
"grad_norm": 4.185492992401123,
"learning_rate": 3.700935402222956e-06,
"loss": 0.3912,
"step": 143200
},
{
"epoch": 3.152568474315257,
"grad_norm": 3.829686403274536,
"learning_rate": 3.696533509409046e-06,
"loss": 0.3914,
"step": 143300
},
{
"epoch": 3.154768452315477,
"grad_norm": 2.9345476627349854,
"learning_rate": 3.692131616595136e-06,
"loss": 0.3962,
"step": 143400
},
{
"epoch": 3.156968430315697,
"grad_norm": 3.666574239730835,
"learning_rate": 3.687729723781226e-06,
"loss": 0.401,
"step": 143500
},
{
"epoch": 3.159168408315917,
"grad_norm": 3.359739303588867,
"learning_rate": 3.6833278309673166e-06,
"loss": 0.3992,
"step": 143600
},
{
"epoch": 3.161368386316137,
"grad_norm": 4.227367877960205,
"learning_rate": 3.6789259381534065e-06,
"loss": 0.401,
"step": 143700
},
{
"epoch": 3.163568364316357,
"grad_norm": 2.7858405113220215,
"learning_rate": 3.6745240453394963e-06,
"loss": 0.3907,
"step": 143800
},
{
"epoch": 3.1657683423165768,
"grad_norm": 3.08479905128479,
"learning_rate": 3.6701221525255866e-06,
"loss": 0.395,
"step": 143900
},
{
"epoch": 3.1679683203167968,
"grad_norm": 3.606621265411377,
"learning_rate": 3.6657202597116765e-06,
"loss": 0.3916,
"step": 144000
},
{
"epoch": 3.1701682983170167,
"grad_norm": 4.141706466674805,
"learning_rate": 3.6613183668977663e-06,
"loss": 0.3987,
"step": 144100
},
{
"epoch": 3.1723682763172367,
"grad_norm": 3.2608320713043213,
"learning_rate": 3.6569164740838566e-06,
"loss": 0.4112,
"step": 144200
},
{
"epoch": 3.1745682543174567,
"grad_norm": 3.0765554904937744,
"learning_rate": 3.6525145812699465e-06,
"loss": 0.4057,
"step": 144300
},
{
"epoch": 3.1767682323176767,
"grad_norm": 3.198472738265991,
"learning_rate": 3.6481126884560363e-06,
"loss": 0.3928,
"step": 144400
},
{
"epoch": 3.1789682103178967,
"grad_norm": 3.3553693294525146,
"learning_rate": 3.643710795642126e-06,
"loss": 0.3934,
"step": 144500
},
{
"epoch": 3.1811681883181167,
"grad_norm": 3.7630527019500732,
"learning_rate": 3.6393089028282165e-06,
"loss": 0.3957,
"step": 144600
},
{
"epoch": 3.1833681663183366,
"grad_norm": 3.3282408714294434,
"learning_rate": 3.6349070100143063e-06,
"loss": 0.4038,
"step": 144700
},
{
"epoch": 3.1855681443185566,
"grad_norm": 3.896204710006714,
"learning_rate": 3.630505117200396e-06,
"loss": 0.4176,
"step": 144800
},
{
"epoch": 3.1877681223187766,
"grad_norm": 3.9070045948028564,
"learning_rate": 3.6261032243864865e-06,
"loss": 0.394,
"step": 144900
},
{
"epoch": 3.189968100318997,
"grad_norm": 4.341803073883057,
"learning_rate": 3.6217013315725763e-06,
"loss": 0.4016,
"step": 145000
},
{
"epoch": 3.192168078319217,
"grad_norm": 3.0518646240234375,
"learning_rate": 3.617299438758666e-06,
"loss": 0.4021,
"step": 145100
},
{
"epoch": 3.194368056319437,
"grad_norm": 2.9907262325286865,
"learning_rate": 3.6128975459447565e-06,
"loss": 0.401,
"step": 145200
},
{
"epoch": 3.196568034319657,
"grad_norm": 3.994093894958496,
"learning_rate": 3.6084956531308467e-06,
"loss": 0.3869,
"step": 145300
},
{
"epoch": 3.198768012319877,
"grad_norm": 4.31938362121582,
"learning_rate": 3.6040937603169366e-06,
"loss": 0.3942,
"step": 145400
},
{
"epoch": 3.200967990320097,
"grad_norm": 3.5487558841705322,
"learning_rate": 3.599691867503027e-06,
"loss": 0.3958,
"step": 145500
},
{
"epoch": 3.203167968320317,
"grad_norm": 4.53445291519165,
"learning_rate": 3.5952899746891167e-06,
"loss": 0.3962,
"step": 145600
},
{
"epoch": 3.205367946320537,
"grad_norm": 3.816943645477295,
"learning_rate": 3.5908880818752066e-06,
"loss": 0.4112,
"step": 145700
},
{
"epoch": 3.207567924320757,
"grad_norm": 4.102901935577393,
"learning_rate": 3.586486189061297e-06,
"loss": 0.3914,
"step": 145800
},
{
"epoch": 3.209767902320977,
"grad_norm": 3.5486576557159424,
"learning_rate": 3.5820842962473867e-06,
"loss": 0.3993,
"step": 145900
},
{
"epoch": 3.211967880321197,
"grad_norm": 3.8645424842834473,
"learning_rate": 3.5776824034334766e-06,
"loss": 0.4078,
"step": 146000
},
{
"epoch": 3.214167858321417,
"grad_norm": 3.282376527786255,
"learning_rate": 3.573280510619567e-06,
"loss": 0.4007,
"step": 146100
},
{
"epoch": 3.216367836321637,
"grad_norm": 3.293292999267578,
"learning_rate": 3.5688786178056567e-06,
"loss": 0.3909,
"step": 146200
},
{
"epoch": 3.218567814321857,
"grad_norm": 3.7592716217041016,
"learning_rate": 3.5644767249917466e-06,
"loss": 0.3916,
"step": 146300
},
{
"epoch": 3.2207677923220768,
"grad_norm": 3.8671295642852783,
"learning_rate": 3.5600748321778365e-06,
"loss": 0.4028,
"step": 146400
},
{
"epoch": 3.2229677703222968,
"grad_norm": 3.9341673851013184,
"learning_rate": 3.5556729393639267e-06,
"loss": 0.3957,
"step": 146500
},
{
"epoch": 3.2251677483225167,
"grad_norm": 3.232847213745117,
"learning_rate": 3.5512710465500166e-06,
"loss": 0.39,
"step": 146600
},
{
"epoch": 3.2273677263227367,
"grad_norm": 3.2240495681762695,
"learning_rate": 3.5468691537361065e-06,
"loss": 0.3947,
"step": 146700
},
{
"epoch": 3.2295677043229567,
"grad_norm": 3.527489423751831,
"learning_rate": 3.5424672609221967e-06,
"loss": 0.3968,
"step": 146800
},
{
"epoch": 3.2317676823231767,
"grad_norm": 3.633652687072754,
"learning_rate": 3.5380653681082866e-06,
"loss": 0.3883,
"step": 146900
},
{
"epoch": 3.2339676603233967,
"grad_norm": 3.5555477142333984,
"learning_rate": 3.5336634752943773e-06,
"loss": 0.3965,
"step": 147000
},
{
"epoch": 3.2361676383236166,
"grad_norm": 3.5308194160461426,
"learning_rate": 3.529261582480467e-06,
"loss": 0.3881,
"step": 147100
},
{
"epoch": 3.2383676163238366,
"grad_norm": 4.819995880126953,
"learning_rate": 3.524859689666557e-06,
"loss": 0.4036,
"step": 147200
},
{
"epoch": 3.2405675943240566,
"grad_norm": 4.3929033279418945,
"learning_rate": 3.5204577968526473e-06,
"loss": 0.3931,
"step": 147300
},
{
"epoch": 3.2427675723242766,
"grad_norm": 2.9747936725616455,
"learning_rate": 3.516055904038737e-06,
"loss": 0.3986,
"step": 147400
},
{
"epoch": 3.2449675503244966,
"grad_norm": 3.78959321975708,
"learning_rate": 3.511654011224827e-06,
"loss": 0.4019,
"step": 147500
},
{
"epoch": 3.2471675283247166,
"grad_norm": 3.943894624710083,
"learning_rate": 3.507252118410917e-06,
"loss": 0.3924,
"step": 147600
},
{
"epoch": 3.249367506324937,
"grad_norm": 3.963569402694702,
"learning_rate": 3.502850225597007e-06,
"loss": 0.4118,
"step": 147700
},
{
"epoch": 3.2515674843251565,
"grad_norm": 3.0138792991638184,
"learning_rate": 3.498448332783097e-06,
"loss": 0.3914,
"step": 147800
},
{
"epoch": 3.253767462325377,
"grad_norm": 3.723484754562378,
"learning_rate": 3.494046439969187e-06,
"loss": 0.3847,
"step": 147900
},
{
"epoch": 3.255967440325597,
"grad_norm": 3.7332823276519775,
"learning_rate": 3.489644547155277e-06,
"loss": 0.3943,
"step": 148000
},
{
"epoch": 3.258167418325817,
"grad_norm": 4.216028690338135,
"learning_rate": 3.485242654341367e-06,
"loss": 0.3959,
"step": 148100
},
{
"epoch": 3.260367396326037,
"grad_norm": 2.8157236576080322,
"learning_rate": 3.480840761527457e-06,
"loss": 0.4163,
"step": 148200
},
{
"epoch": 3.262567374326257,
"grad_norm": 3.428497076034546,
"learning_rate": 3.476438868713547e-06,
"loss": 0.4064,
"step": 148300
},
{
"epoch": 3.264767352326477,
"grad_norm": 3.9073712825775146,
"learning_rate": 3.472036975899637e-06,
"loss": 0.4014,
"step": 148400
},
{
"epoch": 3.266967330326697,
"grad_norm": 4.03035306930542,
"learning_rate": 3.467635083085727e-06,
"loss": 0.3991,
"step": 148500
},
{
"epoch": 3.269167308326917,
"grad_norm": 2.9807870388031006,
"learning_rate": 3.4632331902718167e-06,
"loss": 0.4106,
"step": 148600
},
{
"epoch": 3.271367286327137,
"grad_norm": 2.9369282722473145,
"learning_rate": 3.458831297457907e-06,
"loss": 0.4048,
"step": 148700
},
{
"epoch": 3.273567264327357,
"grad_norm": 3.9154739379882812,
"learning_rate": 3.4544294046439973e-06,
"loss": 0.3984,
"step": 148800
},
{
"epoch": 3.2757672423275768,
"grad_norm": 4.0573601722717285,
"learning_rate": 3.4500275118300876e-06,
"loss": 0.3994,
"step": 148900
},
{
"epoch": 3.2779672203277967,
"grad_norm": 3.521632671356201,
"learning_rate": 3.4456256190161774e-06,
"loss": 0.4101,
"step": 149000
},
{
"epoch": 3.2801671983280167,
"grad_norm": 3.2614357471466064,
"learning_rate": 3.4412237262022673e-06,
"loss": 0.404,
"step": 149100
},
{
"epoch": 3.2823671763282367,
"grad_norm": 2.79972767829895,
"learning_rate": 3.4368218333883576e-06,
"loss": 0.3997,
"step": 149200
},
{
"epoch": 3.2845671543284567,
"grad_norm": 3.753329038619995,
"learning_rate": 3.4324199405744474e-06,
"loss": 0.3992,
"step": 149300
},
{
"epoch": 3.2867671323286767,
"grad_norm": 2.569004774093628,
"learning_rate": 3.4280180477605373e-06,
"loss": 0.4007,
"step": 149400
},
{
"epoch": 3.2889671103288967,
"grad_norm": 3.0934865474700928,
"learning_rate": 3.423616154946627e-06,
"loss": 0.3978,
"step": 149500
},
{
"epoch": 3.2911670883291166,
"grad_norm": 3.7940945625305176,
"learning_rate": 3.4192142621327174e-06,
"loss": 0.4009,
"step": 149600
},
{
"epoch": 3.2933670663293366,
"grad_norm": 3.0418498516082764,
"learning_rate": 3.4148123693188073e-06,
"loss": 0.3977,
"step": 149700
},
{
"epoch": 3.2955670443295566,
"grad_norm": 3.6287832260131836,
"learning_rate": 3.410410476504897e-06,
"loss": 0.399,
"step": 149800
},
{
"epoch": 3.2977670223297766,
"grad_norm": 3.9616570472717285,
"learning_rate": 3.4060085836909874e-06,
"loss": 0.392,
"step": 149900
},
{
"epoch": 3.2999670003299966,
"grad_norm": 3.6250250339508057,
"learning_rate": 3.4016066908770773e-06,
"loss": 0.3837,
"step": 150000
},
{
"epoch": 3.2999670003299966,
"eval_loss": 0.5354483723640442,
"eval_runtime": 386.9888,
"eval_samples_per_second": 155.043,
"eval_steps_per_second": 4.845,
"step": 150000
}
],
"logging_steps": 100,
"max_steps": 227275,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.913014241035995e+17,
"train_batch_size": 22,
"trial_name": null,
"trial_params": null
}