Tohrumi's picture
#2 full iwslt_15
92bba5e verified
raw
history blame contribute delete
455 kB
[
{
"loss": 1.2057,
"grad_norm": 1.1315475702285767,
"learning_rate": 0.00019992790844372354,
"epoch": 0.0,
"step": 25
},
{
"loss": 1.1553,
"grad_norm": 1.134278655052185,
"learning_rate": 0.00019985281307260223,
"epoch": 0.0,
"step": 50
},
{
"loss": 1.0896,
"grad_norm": 1.4255499839782715,
"learning_rate": 0.00019977771770148087,
"epoch": 0.0,
"step": 75
},
{
"loss": 1.1009,
"grad_norm": 1.0321508646011353,
"learning_rate": 0.00019970262233035956,
"epoch": 0.0,
"step": 100
},
{
"loss": 1.0906,
"grad_norm": 1.065851092338562,
"learning_rate": 0.00019962752695923823,
"epoch": 0.0,
"step": 125
},
{
"loss": 1.2285,
"grad_norm": 1.3161128759384155,
"learning_rate": 0.00019955243158811692,
"epoch": 0.0,
"step": 150
},
{
"loss": 1.2242,
"grad_norm": 1.0888772010803223,
"learning_rate": 0.0001994773362169956,
"epoch": 0.0,
"step": 175
},
{
"loss": 1.1169,
"grad_norm": 1.3239021301269531,
"learning_rate": 0.00019940224084587428,
"epoch": 0.0,
"step": 200
},
{
"loss": 1.2012,
"grad_norm": 1.0381895303726196,
"learning_rate": 0.00019932714547475294,
"epoch": 0.0,
"step": 225
},
{
"loss": 1.1967,
"grad_norm": 1.1826602220535278,
"learning_rate": 0.0001992520501036316,
"epoch": 0.0,
"step": 250
},
{
"loss": 1.1632,
"grad_norm": 1.3360055685043335,
"learning_rate": 0.0001991769547325103,
"epoch": 0.0,
"step": 275
},
{
"loss": 1.1724,
"grad_norm": 1.4393656253814697,
"learning_rate": 0.00019910185936138896,
"epoch": 0.0,
"step": 300
},
{
"loss": 1.1483,
"grad_norm": 0.7849452495574951,
"learning_rate": 0.00019902676399026766,
"epoch": 0.0,
"step": 325
},
{
"loss": 1.1145,
"grad_norm": 1.1918078660964966,
"learning_rate": 0.00019895166861914635,
"epoch": 0.01,
"step": 350
},
{
"loss": 1.1695,
"grad_norm": 0.9958588480949402,
"learning_rate": 0.000198876573248025,
"epoch": 0.01,
"step": 375
},
{
"loss": 1.1884,
"grad_norm": 1.6034517288208008,
"learning_rate": 0.00019880147787690368,
"epoch": 0.01,
"step": 400
},
{
"loss": 1.1034,
"grad_norm": 1.0951026678085327,
"learning_rate": 0.00019872638250578235,
"epoch": 0.01,
"step": 425
},
{
"loss": 1.1748,
"grad_norm": 1.3773316144943237,
"learning_rate": 0.00019865128713466104,
"epoch": 0.01,
"step": 450
},
{
"loss": 1.1022,
"grad_norm": 0.9198249578475952,
"learning_rate": 0.0001985761917635397,
"epoch": 0.01,
"step": 475
},
{
"loss": 1.1509,
"grad_norm": 1.4825350046157837,
"learning_rate": 0.0001985010963924184,
"epoch": 0.01,
"step": 500
},
{
"loss": 1.1985,
"grad_norm": 1.1909708976745605,
"learning_rate": 0.00019842600102129706,
"epoch": 0.01,
"step": 525
},
{
"loss": 1.2119,
"grad_norm": 1.6249839067459106,
"learning_rate": 0.00019835090565017573,
"epoch": 0.01,
"step": 550
},
{
"loss": 1.1233,
"grad_norm": 1.279699444770813,
"learning_rate": 0.00019827581027905442,
"epoch": 0.01,
"step": 575
},
{
"loss": 1.2015,
"grad_norm": 1.2097326517105103,
"learning_rate": 0.00019820071490793308,
"epoch": 0.01,
"step": 600
},
{
"loss": 1.0881,
"grad_norm": 1.3173500299453735,
"learning_rate": 0.00019812561953681178,
"epoch": 0.01,
"step": 625
},
{
"loss": 1.1941,
"grad_norm": 1.4401942491531372,
"learning_rate": 0.00019805052416569044,
"epoch": 0.01,
"step": 650
},
{
"loss": 1.1371,
"grad_norm": 1.4831886291503906,
"learning_rate": 0.0001979754287945691,
"epoch": 0.01,
"step": 675
},
{
"loss": 1.1284,
"grad_norm": 1.3925830125808716,
"learning_rate": 0.00019790033342344777,
"epoch": 0.01,
"step": 700
},
{
"loss": 1.151,
"grad_norm": 1.1581556797027588,
"learning_rate": 0.00019782523805232647,
"epoch": 0.01,
"step": 725
},
{
"loss": 1.1213,
"grad_norm": 0.9816817045211792,
"learning_rate": 0.00019775014268120513,
"epoch": 0.01,
"step": 750
},
{
"loss": 1.189,
"grad_norm": 1.771814227104187,
"learning_rate": 0.00019767504731008382,
"epoch": 0.01,
"step": 775
},
{
"loss": 1.1613,
"grad_norm": 1.2354599237442017,
"learning_rate": 0.00019759995193896252,
"epoch": 0.01,
"step": 800
},
{
"loss": 1.1801,
"grad_norm": 1.1712677478790283,
"learning_rate": 0.00019752485656784115,
"epoch": 0.01,
"step": 825
},
{
"loss": 1.1177,
"grad_norm": 1.372815489768982,
"learning_rate": 0.00019744976119671985,
"epoch": 0.01,
"step": 850
},
{
"loss": 1.2127,
"grad_norm": 1.7617968320846558,
"learning_rate": 0.0001973746658255985,
"epoch": 0.01,
"step": 875
},
{
"loss": 1.1913,
"grad_norm": 1.5143325328826904,
"learning_rate": 0.0001972995704544772,
"epoch": 0.01,
"step": 900
},
{
"loss": 1.1401,
"grad_norm": 1.9220173358917236,
"learning_rate": 0.00019722447508335587,
"epoch": 0.01,
"step": 925
},
{
"loss": 1.209,
"grad_norm": 1.1575367450714111,
"learning_rate": 0.0001971523835270794,
"epoch": 0.01,
"step": 950
},
{
"loss": 1.1926,
"grad_norm": 0.950126051902771,
"learning_rate": 0.00019707728815595806,
"epoch": 0.01,
"step": 975
},
{
"loss": 1.1288,
"grad_norm": 1.4479137659072876,
"learning_rate": 0.00019700219278483676,
"epoch": 0.02,
"step": 1000
},
{
"loss": 1.1002,
"grad_norm": 1.2142207622528076,
"learning_rate": 0.00019692709741371542,
"epoch": 0.02,
"step": 1025
},
{
"loss": 1.1094,
"grad_norm": 1.5564340353012085,
"learning_rate": 0.0001968520020425941,
"epoch": 0.02,
"step": 1050
},
{
"loss": 1.1565,
"grad_norm": 0.9252703189849854,
"learning_rate": 0.00019677690667147278,
"epoch": 0.02,
"step": 1075
},
{
"loss": 1.1714,
"grad_norm": 1.2928420305252075,
"learning_rate": 0.00019670181130035147,
"epoch": 0.02,
"step": 1100
},
{
"loss": 1.1822,
"grad_norm": 6.4705281257629395,
"learning_rate": 0.0001966267159292301,
"epoch": 0.02,
"step": 1125
},
{
"loss": 1.1806,
"grad_norm": 1.6441676616668701,
"learning_rate": 0.0001965516205581088,
"epoch": 0.02,
"step": 1150
},
{
"loss": 1.1523,
"grad_norm": 1.981781244277954,
"learning_rate": 0.0001964765251869875,
"epoch": 0.02,
"step": 1175
},
{
"loss": 1.1556,
"grad_norm": 1.6746410131454468,
"learning_rate": 0.00019640142981586616,
"epoch": 0.02,
"step": 1200
},
{
"loss": 1.1145,
"grad_norm": 1.4516457319259644,
"learning_rate": 0.00019632633444474485,
"epoch": 0.02,
"step": 1225
},
{
"loss": 1.1196,
"grad_norm": 1.1103088855743408,
"learning_rate": 0.00019625123907362352,
"epoch": 0.02,
"step": 1250
},
{
"loss": 1.1693,
"grad_norm": 2.238402843475342,
"learning_rate": 0.00019617614370250218,
"epoch": 0.02,
"step": 1275
},
{
"loss": 1.1292,
"grad_norm": 1.3010998964309692,
"learning_rate": 0.00019610104833138085,
"epoch": 0.02,
"step": 1300
},
{
"loss": 1.1058,
"grad_norm": 1.2125681638717651,
"learning_rate": 0.00019602595296025954,
"epoch": 0.02,
"step": 1325
},
{
"loss": 1.2092,
"grad_norm": 1.263877272605896,
"learning_rate": 0.00019595085758913823,
"epoch": 0.02,
"step": 1350
},
{
"loss": 1.1451,
"grad_norm": 1.3143881559371948,
"learning_rate": 0.0001958757622180169,
"epoch": 0.02,
"step": 1375
},
{
"loss": 1.1226,
"grad_norm": 1.081262230873108,
"learning_rate": 0.00019580066684689556,
"epoch": 0.02,
"step": 1400
},
{
"loss": 1.1559,
"grad_norm": 1.4016741514205933,
"learning_rate": 0.00019572557147577423,
"epoch": 0.02,
"step": 1425
},
{
"loss": 1.1568,
"grad_norm": 1.435803771018982,
"learning_rate": 0.00019565047610465292,
"epoch": 0.02,
"step": 1450
},
{
"loss": 1.1661,
"grad_norm": 0.9757218956947327,
"learning_rate": 0.00019557538073353159,
"epoch": 0.02,
"step": 1475
},
{
"loss": 1.1674,
"grad_norm": 1.1646333932876587,
"learning_rate": 0.00019550028536241028,
"epoch": 0.02,
"step": 1500
},
{
"loss": 1.229,
"grad_norm": 1.707133412361145,
"learning_rate": 0.00019542518999128894,
"epoch": 0.02,
"step": 1525
},
{
"loss": 1.3046,
"grad_norm": 1.5240947008132935,
"learning_rate": 0.00019535009462016764,
"epoch": 0.02,
"step": 1550
},
{
"loss": 1.3126,
"grad_norm": 1.0018959045410156,
"learning_rate": 0.0001952749992490463,
"epoch": 0.02,
"step": 1575
},
{
"loss": 1.2822,
"grad_norm": 1.017600655555725,
"learning_rate": 0.00019519990387792497,
"epoch": 0.02,
"step": 1600
},
{
"loss": 1.339,
"grad_norm": 1.2864093780517578,
"learning_rate": 0.00019512480850680366,
"epoch": 0.02,
"step": 1625
},
{
"loss": 1.277,
"grad_norm": 1.2390028238296509,
"learning_rate": 0.00019504971313568232,
"epoch": 0.02,
"step": 1650
},
{
"loss": 1.2122,
"grad_norm": 1.8026188611984253,
"learning_rate": 0.00019497461776456102,
"epoch": 0.03,
"step": 1675
},
{
"loss": 1.3177,
"grad_norm": 1.143813967704773,
"learning_rate": 0.00019489952239343968,
"epoch": 0.03,
"step": 1700
},
{
"loss": 1.3457,
"grad_norm": 1.5295052528381348,
"learning_rate": 0.00019482442702231835,
"epoch": 0.03,
"step": 1725
},
{
"loss": 1.2132,
"grad_norm": 1.7378249168395996,
"learning_rate": 0.000194749331651197,
"epoch": 0.03,
"step": 1750
},
{
"loss": 1.2284,
"grad_norm": 2.3345251083374023,
"learning_rate": 0.0001946742362800757,
"epoch": 0.03,
"step": 1775
},
{
"loss": 1.3233,
"grad_norm": 2.047725200653076,
"learning_rate": 0.0001945991409089544,
"epoch": 0.03,
"step": 1800
},
{
"loss": 1.2398,
"grad_norm": 1.7765051126480103,
"learning_rate": 0.00019452404553783306,
"epoch": 0.03,
"step": 1825
},
{
"loss": 1.226,
"grad_norm": 1.4397103786468506,
"learning_rate": 0.00019444895016671173,
"epoch": 0.03,
"step": 1850
},
{
"loss": 1.3669,
"grad_norm": 1.741700530052185,
"learning_rate": 0.0001943738547955904,
"epoch": 0.03,
"step": 1875
},
{
"loss": 1.2807,
"grad_norm": 1.4095584154129028,
"learning_rate": 0.00019429875942446909,
"epoch": 0.03,
"step": 1900
},
{
"loss": 1.1971,
"grad_norm": 1.1434788703918457,
"learning_rate": 0.00019422366405334775,
"epoch": 0.03,
"step": 1925
},
{
"loss": 1.3073,
"grad_norm": 1.1965715885162354,
"learning_rate": 0.00019414856868222644,
"epoch": 0.03,
"step": 1950
},
{
"loss": 1.1621,
"grad_norm": 1.6625946760177612,
"learning_rate": 0.0001940734733111051,
"epoch": 0.03,
"step": 1975
},
{
"loss": 1.2672,
"grad_norm": 1.454901933670044,
"learning_rate": 0.0001939983779399838,
"epoch": 0.03,
"step": 2000
},
{
"loss": 1.2422,
"grad_norm": 1.198033094406128,
"learning_rate": 0.00019392328256886247,
"epoch": 0.03,
"step": 2025
},
{
"loss": 1.1992,
"grad_norm": 1.9674854278564453,
"learning_rate": 0.00019384818719774113,
"epoch": 0.03,
"step": 2050
},
{
"loss": 1.2854,
"grad_norm": 1.4203040599822998,
"learning_rate": 0.00019377309182661982,
"epoch": 0.03,
"step": 2075
},
{
"loss": 1.2896,
"grad_norm": 2.059704065322876,
"learning_rate": 0.0001936979964554985,
"epoch": 0.03,
"step": 2100
},
{
"loss": 1.2304,
"grad_norm": 3.035451889038086,
"learning_rate": 0.00019362290108437718,
"epoch": 0.03,
"step": 2125
},
{
"loss": 1.2555,
"grad_norm": 0.9351598620414734,
"learning_rate": 0.00019354780571325585,
"epoch": 0.03,
"step": 2150
},
{
"loss": 1.2276,
"grad_norm": 1.0075334310531616,
"learning_rate": 0.0001934727103421345,
"epoch": 0.03,
"step": 2175
},
{
"loss": 1.2042,
"grad_norm": 1.5209815502166748,
"learning_rate": 0.00019339761497101318,
"epoch": 0.03,
"step": 2200
},
{
"loss": 1.2262,
"grad_norm": 0.9942296147346497,
"learning_rate": 0.00019332251959989187,
"epoch": 0.03,
"step": 2225
},
{
"loss": 1.2108,
"grad_norm": 1.755386233329773,
"learning_rate": 0.00019324742422877056,
"epoch": 0.03,
"step": 2250
},
{
"loss": 1.2354,
"grad_norm": 1.129966378211975,
"learning_rate": 0.00019317232885764923,
"epoch": 0.03,
"step": 2275
},
{
"loss": 1.1907,
"grad_norm": 0.871713399887085,
"learning_rate": 0.0001930972334865279,
"epoch": 0.03,
"step": 2300
},
{
"loss": 1.2465,
"grad_norm": 1.4679888486862183,
"learning_rate": 0.00019302213811540656,
"epoch": 0.03,
"step": 2325
},
{
"loss": 1.2956,
"grad_norm": 1.5835942029953003,
"learning_rate": 0.00019294704274428525,
"epoch": 0.04,
"step": 2350
},
{
"loss": 1.238,
"grad_norm": 1.7425931692123413,
"learning_rate": 0.00019287194737316392,
"epoch": 0.04,
"step": 2375
},
{
"loss": 1.3505,
"grad_norm": 0.9919810891151428,
"learning_rate": 0.0001927968520020426,
"epoch": 0.04,
"step": 2400
},
{
"loss": 1.3327,
"grad_norm": 1.3356170654296875,
"learning_rate": 0.00019272175663092127,
"epoch": 0.04,
"step": 2425
},
{
"loss": 1.2885,
"grad_norm": 0.9927514791488647,
"learning_rate": 0.00019264666125979997,
"epoch": 0.04,
"step": 2450
},
{
"loss": 1.245,
"grad_norm": 1.3974716663360596,
"learning_rate": 0.00019257156588867863,
"epoch": 0.04,
"step": 2475
},
{
"loss": 1.2714,
"grad_norm": 1.3735284805297852,
"learning_rate": 0.0001924964705175573,
"epoch": 0.04,
"step": 2500
},
{
"loss": 1.2481,
"grad_norm": 1.096691370010376,
"learning_rate": 0.000192421375146436,
"epoch": 0.04,
"step": 2525
},
{
"loss": 1.2765,
"grad_norm": 1.1713175773620605,
"learning_rate": 0.00019234928359015952,
"epoch": 0.04,
"step": 2550
},
{
"loss": 1.1508,
"grad_norm": 1.2733495235443115,
"learning_rate": 0.0001922741882190382,
"epoch": 0.04,
"step": 2575
},
{
"loss": 1.2932,
"grad_norm": 0.9217672944068909,
"learning_rate": 0.00019219909284791685,
"epoch": 0.04,
"step": 2600
},
{
"loss": 1.2598,
"grad_norm": 2.117608070373535,
"learning_rate": 0.00019212399747679554,
"epoch": 0.04,
"step": 2625
},
{
"loss": 1.3049,
"grad_norm": 1.0736849308013916,
"learning_rate": 0.0001920489021056742,
"epoch": 0.04,
"step": 2650
},
{
"loss": 1.2122,
"grad_norm": 0.9429724812507629,
"learning_rate": 0.0001919738067345529,
"epoch": 0.04,
"step": 2675
},
{
"loss": 1.2863,
"grad_norm": 1.8061331510543823,
"learning_rate": 0.00019189871136343156,
"epoch": 0.04,
"step": 2700
},
{
"loss": 1.2269,
"grad_norm": 1.2970006465911865,
"learning_rate": 0.00019182361599231026,
"epoch": 0.04,
"step": 2725
},
{
"loss": 1.2015,
"grad_norm": 1.2956987619400024,
"learning_rate": 0.00019174852062118892,
"epoch": 0.04,
"step": 2750
},
{
"loss": 1.2501,
"grad_norm": 2.2317731380462646,
"learning_rate": 0.0001916734252500676,
"epoch": 0.04,
"step": 2775
},
{
"loss": 1.256,
"grad_norm": 1.3463131189346313,
"learning_rate": 0.00019159832987894628,
"epoch": 0.04,
"step": 2800
},
{
"loss": 1.2649,
"grad_norm": 1.146892786026001,
"learning_rate": 0.00019152323450782495,
"epoch": 0.04,
"step": 2825
},
{
"loss": 1.3137,
"grad_norm": 1.524172306060791,
"learning_rate": 0.00019144813913670364,
"epoch": 0.04,
"step": 2850
},
{
"loss": 1.2658,
"grad_norm": 1.0975860357284546,
"learning_rate": 0.0001913730437655823,
"epoch": 0.04,
"step": 2875
},
{
"loss": 1.2279,
"grad_norm": 1.350852608680725,
"learning_rate": 0.00019129794839446097,
"epoch": 0.04,
"step": 2900
},
{
"loss": 1.2253,
"grad_norm": 1.8295092582702637,
"learning_rate": 0.00019122285302333963,
"epoch": 0.04,
"step": 2925
},
{
"loss": 1.3226,
"grad_norm": 2.276642322540283,
"learning_rate": 0.00019114775765221833,
"epoch": 0.04,
"step": 2950
},
{
"loss": 1.2016,
"grad_norm": 1.5693854093551636,
"learning_rate": 0.000191072662281097,
"epoch": 0.04,
"step": 2975
},
{
"loss": 1.1537,
"grad_norm": 1.3827277421951294,
"learning_rate": 0.00019099756690997568,
"epoch": 0.05,
"step": 3000
},
{
"loss": 1.3013,
"grad_norm": 1.0224173069000244,
"learning_rate": 0.00019092247153885438,
"epoch": 0.05,
"step": 3025
},
{
"loss": 1.248,
"grad_norm": 1.8332293033599854,
"learning_rate": 0.00019084737616773301,
"epoch": 0.05,
"step": 3050
},
{
"loss": 1.2415,
"grad_norm": 0.9008692502975464,
"learning_rate": 0.0001907722807966117,
"epoch": 0.05,
"step": 3075
},
{
"loss": 1.2225,
"grad_norm": 1.5711129903793335,
"learning_rate": 0.00019069718542549037,
"epoch": 0.05,
"step": 3100
},
{
"loss": 1.197,
"grad_norm": 2.6612651348114014,
"learning_rate": 0.00019062209005436906,
"epoch": 0.05,
"step": 3125
},
{
"loss": 1.2788,
"grad_norm": 1.3320034742355347,
"learning_rate": 0.00019054699468324773,
"epoch": 0.05,
"step": 3150
},
{
"loss": 1.3182,
"grad_norm": 1.8735719919204712,
"learning_rate": 0.00019047189931212642,
"epoch": 0.05,
"step": 3175
},
{
"loss": 1.2997,
"grad_norm": 1.550970435142517,
"learning_rate": 0.0001903968039410051,
"epoch": 0.05,
"step": 3200
},
{
"loss": 1.2142,
"grad_norm": 1.3348615169525146,
"learning_rate": 0.00019032170856988375,
"epoch": 0.05,
"step": 3225
},
{
"loss": 1.1912,
"grad_norm": 1.036444902420044,
"learning_rate": 0.00019024661319876245,
"epoch": 0.05,
"step": 3250
},
{
"loss": 1.2365,
"grad_norm": 1.920847773551941,
"learning_rate": 0.0001901715178276411,
"epoch": 0.05,
"step": 3275
},
{
"loss": 1.2733,
"grad_norm": 3.4518144130706787,
"learning_rate": 0.0001900964224565198,
"epoch": 0.05,
"step": 3300
},
{
"loss": 1.1872,
"grad_norm": 2.0837206840515137,
"learning_rate": 0.00019002132708539847,
"epoch": 0.05,
"step": 3325
},
{
"loss": 1.2276,
"grad_norm": 1.4272059202194214,
"learning_rate": 0.00018994623171427713,
"epoch": 0.05,
"step": 3350
},
{
"loss": 1.2622,
"grad_norm": 1.0555847883224487,
"learning_rate": 0.0001898711363431558,
"epoch": 0.05,
"step": 3375
},
{
"loss": 1.2896,
"grad_norm": 0.9901136755943298,
"learning_rate": 0.0001897960409720345,
"epoch": 0.05,
"step": 3400
},
{
"loss": 1.2141,
"grad_norm": 2.292473793029785,
"learning_rate": 0.00018972094560091316,
"epoch": 0.05,
"step": 3425
},
{
"loss": 1.353,
"grad_norm": 1.7795960903167725,
"learning_rate": 0.00018964585022979185,
"epoch": 0.05,
"step": 3450
},
{
"loss": 1.2276,
"grad_norm": 1.4300271272659302,
"learning_rate": 0.00018957075485867054,
"epoch": 0.05,
"step": 3475
},
{
"loss": 1.2115,
"grad_norm": 1.8741381168365479,
"learning_rate": 0.00018949565948754918,
"epoch": 0.05,
"step": 3500
},
{
"loss": 1.2229,
"grad_norm": 1.2686586380004883,
"learning_rate": 0.00018942056411642787,
"epoch": 0.05,
"step": 3525
},
{
"loss": 1.2484,
"grad_norm": 1.245797872543335,
"learning_rate": 0.00018934546874530654,
"epoch": 0.05,
"step": 3550
},
{
"loss": 1.2026,
"grad_norm": 1.744106650352478,
"learning_rate": 0.00018927037337418523,
"epoch": 0.05,
"step": 3575
},
{
"loss": 1.1809,
"grad_norm": 1.4250385761260986,
"learning_rate": 0.0001891952780030639,
"epoch": 0.05,
"step": 3600
},
{
"loss": 1.2408,
"grad_norm": 3.535332202911377,
"learning_rate": 0.0001891201826319426,
"epoch": 0.05,
"step": 3625
},
{
"loss": 1.3137,
"grad_norm": 1.612424612045288,
"learning_rate": 0.00018904508726082125,
"epoch": 0.05,
"step": 3650
},
{
"loss": 1.2419,
"grad_norm": 2.110978841781616,
"learning_rate": 0.00018896999188969992,
"epoch": 0.06,
"step": 3675
},
{
"loss": 1.2782,
"grad_norm": 1.3754994869232178,
"learning_rate": 0.0001888948965185786,
"epoch": 0.06,
"step": 3700
},
{
"loss": 1.1582,
"grad_norm": 0.849475085735321,
"learning_rate": 0.00018881980114745728,
"epoch": 0.06,
"step": 3725
},
{
"loss": 1.1607,
"grad_norm": 1.4278253316879272,
"learning_rate": 0.00018874470577633597,
"epoch": 0.06,
"step": 3750
},
{
"loss": 1.2154,
"grad_norm": 3.2679355144500732,
"learning_rate": 0.00018866961040521463,
"epoch": 0.06,
"step": 3775
},
{
"loss": 1.1998,
"grad_norm": 0.9529170393943787,
"learning_rate": 0.0001885945150340933,
"epoch": 0.06,
"step": 3800
},
{
"loss": 1.2879,
"grad_norm": 1.243181586265564,
"learning_rate": 0.00018851941966297196,
"epoch": 0.06,
"step": 3825
},
{
"loss": 1.2077,
"grad_norm": 1.7659603357315063,
"learning_rate": 0.00018844432429185066,
"epoch": 0.06,
"step": 3850
},
{
"loss": 1.2047,
"grad_norm": 1.5011489391326904,
"learning_rate": 0.00018836922892072935,
"epoch": 0.06,
"step": 3875
},
{
"loss": 1.2347,
"grad_norm": 1.761047601699829,
"learning_rate": 0.00018829413354960801,
"epoch": 0.06,
"step": 3900
},
{
"loss": 1.1974,
"grad_norm": 3.6463253498077393,
"learning_rate": 0.0001882190381784867,
"epoch": 0.06,
"step": 3925
},
{
"loss": 1.1995,
"grad_norm": 0.955506443977356,
"learning_rate": 0.00018814394280736537,
"epoch": 0.06,
"step": 3950
},
{
"loss": 1.2918,
"grad_norm": 1.0750863552093506,
"learning_rate": 0.00018806884743624404,
"epoch": 0.06,
"step": 3975
},
{
"loss": 1.2288,
"grad_norm": 1.1837942600250244,
"learning_rate": 0.0001879937520651227,
"epoch": 0.06,
"step": 4000
},
{
"loss": 1.2013,
"grad_norm": 1.5817101001739502,
"learning_rate": 0.0001879186566940014,
"epoch": 0.06,
"step": 4025
},
{
"loss": 1.1851,
"grad_norm": 1.1778972148895264,
"learning_rate": 0.00018784356132288006,
"epoch": 0.06,
"step": 4050
},
{
"loss": 1.2426,
"grad_norm": 1.3166766166687012,
"learning_rate": 0.00018776846595175875,
"epoch": 0.06,
"step": 4075
},
{
"loss": 1.2837,
"grad_norm": 2.850275993347168,
"learning_rate": 0.00018769337058063742,
"epoch": 0.06,
"step": 4100
},
{
"loss": 1.2267,
"grad_norm": 1.6599894762039185,
"learning_rate": 0.00018761827520951608,
"epoch": 0.06,
"step": 4125
},
{
"loss": 1.2885,
"grad_norm": 1.5684510469436646,
"learning_rate": 0.00018754317983839478,
"epoch": 0.06,
"step": 4150
},
{
"loss": 1.2471,
"grad_norm": 2.0227372646331787,
"learning_rate": 0.00018746808446727344,
"epoch": 0.06,
"step": 4175
},
{
"loss": 1.2944,
"grad_norm": 1.747456669807434,
"learning_rate": 0.00018739298909615213,
"epoch": 0.06,
"step": 4200
},
{
"loss": 1.2535,
"grad_norm": 1.6037201881408691,
"learning_rate": 0.0001873178937250308,
"epoch": 0.06,
"step": 4225
},
{
"loss": 1.2354,
"grad_norm": 0.9807868003845215,
"learning_rate": 0.00018724279835390946,
"epoch": 0.06,
"step": 4250
},
{
"loss": 1.1942,
"grad_norm": 0.8951900601387024,
"learning_rate": 0.00018716770298278813,
"epoch": 0.06,
"step": 4275
},
{
"loss": 1.2506,
"grad_norm": 1.4032387733459473,
"learning_rate": 0.00018709260761166682,
"epoch": 0.06,
"step": 4300
},
{
"loss": 1.2388,
"grad_norm": 1.407606840133667,
"learning_rate": 0.00018701751224054551,
"epoch": 0.06,
"step": 4325
},
{
"loss": 1.2856,
"grad_norm": 1.153029441833496,
"learning_rate": 0.00018694241686942418,
"epoch": 0.07,
"step": 4350
},
{
"loss": 1.2268,
"grad_norm": 1.075202226638794,
"learning_rate": 0.00018686732149830287,
"epoch": 0.07,
"step": 4375
},
{
"loss": 1.257,
"grad_norm": 1.2537505626678467,
"learning_rate": 0.00018679222612718154,
"epoch": 0.07,
"step": 4400
},
{
"loss": 1.2183,
"grad_norm": 0.9912234544754028,
"learning_rate": 0.0001867171307560602,
"epoch": 0.07,
"step": 4425
},
{
"loss": 1.1376,
"grad_norm": 1.3906209468841553,
"learning_rate": 0.00018664203538493887,
"epoch": 0.07,
"step": 4450
},
{
"loss": 1.2794,
"grad_norm": 1.3347073793411255,
"learning_rate": 0.00018656694001381756,
"epoch": 0.07,
"step": 4475
},
{
"loss": 1.2486,
"grad_norm": 1.259150505065918,
"learning_rate": 0.00018649184464269623,
"epoch": 0.07,
"step": 4500
},
{
"loss": 1.2408,
"grad_norm": 1.7800498008728027,
"learning_rate": 0.00018641674927157492,
"epoch": 0.07,
"step": 4525
},
{
"loss": 1.282,
"grad_norm": 0.9904906153678894,
"learning_rate": 0.00018634165390045358,
"epoch": 0.07,
"step": 4550
},
{
"loss": 1.2944,
"grad_norm": 1.2220566272735596,
"learning_rate": 0.00018626655852933225,
"epoch": 0.07,
"step": 4575
},
{
"loss": 1.266,
"grad_norm": 1.4289559125900269,
"learning_rate": 0.00018619146315821094,
"epoch": 0.07,
"step": 4600
},
{
"loss": 1.2164,
"grad_norm": 1.5805399417877197,
"learning_rate": 0.0001861163677870896,
"epoch": 0.07,
"step": 4625
},
{
"loss": 1.2176,
"grad_norm": 1.2486138343811035,
"learning_rate": 0.0001860412724159683,
"epoch": 0.07,
"step": 4650
},
{
"loss": 1.2165,
"grad_norm": 1.4444175958633423,
"learning_rate": 0.00018596617704484696,
"epoch": 0.07,
"step": 4675
},
{
"loss": 1.2247,
"grad_norm": 1.6640115976333618,
"learning_rate": 0.00018589108167372563,
"epoch": 0.07,
"step": 4700
},
{
"loss": 1.2123,
"grad_norm": 1.1432693004608154,
"learning_rate": 0.0001858159863026043,
"epoch": 0.07,
"step": 4725
},
{
"loss": 1.2347,
"grad_norm": 0.9574340581893921,
"learning_rate": 0.000185740890931483,
"epoch": 0.07,
"step": 4750
},
{
"loss": 1.2177,
"grad_norm": 1.5829005241394043,
"learning_rate": 0.00018566579556036168,
"epoch": 0.07,
"step": 4775
},
{
"loss": 1.2693,
"grad_norm": 1.0968513488769531,
"learning_rate": 0.00018559070018924035,
"epoch": 0.07,
"step": 4800
},
{
"loss": 1.2203,
"grad_norm": 1.2009191513061523,
"learning_rate": 0.00018551560481811904,
"epoch": 0.07,
"step": 4825
},
{
"loss": 1.2341,
"grad_norm": 1.4881080389022827,
"learning_rate": 0.0001854405094469977,
"epoch": 0.07,
"step": 4850
},
{
"loss": 1.1719,
"grad_norm": 1.083778977394104,
"learning_rate": 0.00018536541407587637,
"epoch": 0.07,
"step": 4875
},
{
"loss": 1.2091,
"grad_norm": 1.382657766342163,
"learning_rate": 0.00018529031870475503,
"epoch": 0.07,
"step": 4900
},
{
"loss": 1.2022,
"grad_norm": 1.0024495124816895,
"learning_rate": 0.00018521522333363373,
"epoch": 0.07,
"step": 4925
},
{
"loss": 1.2967,
"grad_norm": 1.2385984659194946,
"learning_rate": 0.0001851401279625124,
"epoch": 0.07,
"step": 4950
},
{
"loss": 1.2568,
"grad_norm": 2.7820701599121094,
"learning_rate": 0.00018506503259139108,
"epoch": 0.07,
"step": 4975
},
{
"loss": 1.2483,
"grad_norm": 1.5247501134872437,
"learning_rate": 0.00018498993722026975,
"epoch": 0.08,
"step": 5000
},
{
"loss": 1.2827,
"grad_norm": 1.855117678642273,
"learning_rate": 0.00018491484184914841,
"epoch": 0.08,
"step": 5025
},
{
"loss": 1.2518,
"grad_norm": 1.2518528699874878,
"learning_rate": 0.0001848397464780271,
"epoch": 0.08,
"step": 5050
},
{
"loss": 1.2962,
"grad_norm": 1.2159770727157593,
"learning_rate": 0.00018476465110690577,
"epoch": 0.08,
"step": 5075
},
{
"loss": 1.2611,
"grad_norm": 1.3085296154022217,
"learning_rate": 0.00018468955573578446,
"epoch": 0.08,
"step": 5100
},
{
"loss": 1.2261,
"grad_norm": 2.2151153087615967,
"learning_rate": 0.00018461446036466313,
"epoch": 0.08,
"step": 5125
},
{
"loss": 1.3178,
"grad_norm": 2.004448175430298,
"learning_rate": 0.0001845393649935418,
"epoch": 0.08,
"step": 5150
},
{
"loss": 1.2419,
"grad_norm": 3.063715934753418,
"learning_rate": 0.0001844642696224205,
"epoch": 0.08,
"step": 5175
},
{
"loss": 1.2084,
"grad_norm": 1.432442545890808,
"learning_rate": 0.00018438917425129915,
"epoch": 0.08,
"step": 5200
},
{
"loss": 1.2254,
"grad_norm": 2.0840189456939697,
"learning_rate": 0.00018431407888017785,
"epoch": 0.08,
"step": 5225
},
{
"loss": 1.1547,
"grad_norm": 1.479894995689392,
"learning_rate": 0.0001842389835090565,
"epoch": 0.08,
"step": 5250
},
{
"loss": 1.2749,
"grad_norm": 1.097493290901184,
"learning_rate": 0.0001841638881379352,
"epoch": 0.08,
"step": 5275
},
{
"loss": 1.2417,
"grad_norm": 4.6398539543151855,
"learning_rate": 0.00018408879276681387,
"epoch": 0.08,
"step": 5300
},
{
"loss": 1.2506,
"grad_norm": 0.9642776250839233,
"learning_rate": 0.00018401369739569253,
"epoch": 0.08,
"step": 5325
},
{
"loss": 1.2349,
"grad_norm": 1.2694449424743652,
"learning_rate": 0.0001839386020245712,
"epoch": 0.08,
"step": 5350
},
{
"loss": 1.2158,
"grad_norm": 1.2243396043777466,
"learning_rate": 0.0001838635066534499,
"epoch": 0.08,
"step": 5375
},
{
"loss": 1.2665,
"grad_norm": 1.859632134437561,
"learning_rate": 0.00018378841128232858,
"epoch": 0.08,
"step": 5400
},
{
"loss": 1.2325,
"grad_norm": 1.0260474681854248,
"learning_rate": 0.00018371331591120725,
"epoch": 0.08,
"step": 5425
},
{
"loss": 1.2785,
"grad_norm": 1.646101713180542,
"learning_rate": 0.00018363822054008591,
"epoch": 0.08,
"step": 5450
},
{
"loss": 1.1545,
"grad_norm": 0.9569182395935059,
"learning_rate": 0.00018356312516896458,
"epoch": 0.08,
"step": 5475
},
{
"loss": 1.2172,
"grad_norm": 1.4357048273086548,
"learning_rate": 0.00018348802979784327,
"epoch": 0.08,
"step": 5500
},
{
"loss": 1.2557,
"grad_norm": 1.1532384157180786,
"learning_rate": 0.00018341293442672194,
"epoch": 0.08,
"step": 5525
},
{
"loss": 1.2258,
"grad_norm": 1.1566283702850342,
"learning_rate": 0.00018333783905560063,
"epoch": 0.08,
"step": 5550
},
{
"loss": 1.1953,
"grad_norm": 1.5711147785186768,
"learning_rate": 0.0001832627436844793,
"epoch": 0.08,
"step": 5575
},
{
"loss": 1.1736,
"grad_norm": 1.2852180004119873,
"learning_rate": 0.00018318764831335796,
"epoch": 0.08,
"step": 5600
},
{
"loss": 1.2572,
"grad_norm": 1.2723329067230225,
"learning_rate": 0.00018311255294223665,
"epoch": 0.08,
"step": 5625
},
{
"loss": 1.241,
"grad_norm": 1.6078953742980957,
"learning_rate": 0.00018303745757111532,
"epoch": 0.08,
"step": 5650
},
{
"loss": 1.2206,
"grad_norm": 1.821363925933838,
"learning_rate": 0.000182962362199994,
"epoch": 0.09,
"step": 5675
},
{
"loss": 1.2775,
"grad_norm": 1.3025563955307007,
"learning_rate": 0.00018288726682887268,
"epoch": 0.09,
"step": 5700
},
{
"loss": 1.2296,
"grad_norm": 1.7497808933258057,
"learning_rate": 0.00018281217145775137,
"epoch": 0.09,
"step": 5725
},
{
"loss": 1.306,
"grad_norm": 1.5627915859222412,
"learning_rate": 0.00018273707608663003,
"epoch": 0.09,
"step": 5750
},
{
"loss": 1.2339,
"grad_norm": 1.217433214187622,
"learning_rate": 0.0001826619807155087,
"epoch": 0.09,
"step": 5775
},
{
"loss": 1.2396,
"grad_norm": 2.4516422748565674,
"learning_rate": 0.00018258688534438736,
"epoch": 0.09,
"step": 5800
},
{
"loss": 1.2847,
"grad_norm": 1.0460309982299805,
"learning_rate": 0.00018251178997326606,
"epoch": 0.09,
"step": 5825
},
{
"loss": 1.2075,
"grad_norm": 1.3472404479980469,
"learning_rate": 0.00018243669460214475,
"epoch": 0.09,
"step": 5850
},
{
"loss": 1.1984,
"grad_norm": 1.1247375011444092,
"learning_rate": 0.00018236159923102341,
"epoch": 0.09,
"step": 5875
},
{
"loss": 1.1911,
"grad_norm": 1.175132393836975,
"learning_rate": 0.00018228650385990208,
"epoch": 0.09,
"step": 5900
},
{
"loss": 1.2405,
"grad_norm": 1.176147699356079,
"learning_rate": 0.00018221140848878075,
"epoch": 0.09,
"step": 5925
},
{
"loss": 1.1924,
"grad_norm": 1.0580313205718994,
"learning_rate": 0.00018213631311765944,
"epoch": 0.09,
"step": 5950
},
{
"loss": 1.2185,
"grad_norm": 1.1505495309829712,
"learning_rate": 0.0001820612177465381,
"epoch": 0.09,
"step": 5975
},
{
"loss": 1.2511,
"grad_norm": 1.1643320322036743,
"learning_rate": 0.0001819861223754168,
"epoch": 0.09,
"step": 6000
},
{
"loss": 1.2177,
"grad_norm": 1.3354824781417847,
"learning_rate": 0.00018191102700429546,
"epoch": 0.09,
"step": 6025
},
{
"loss": 1.2254,
"grad_norm": 1.547897458076477,
"learning_rate": 0.00018183593163317413,
"epoch": 0.09,
"step": 6050
},
{
"loss": 1.2464,
"grad_norm": 1.5310362577438354,
"learning_rate": 0.00018176083626205282,
"epoch": 0.09,
"step": 6075
},
{
"loss": 1.2767,
"grad_norm": 1.947996735572815,
"learning_rate": 0.00018168574089093148,
"epoch": 0.09,
"step": 6100
},
{
"loss": 1.1888,
"grad_norm": 1.7055374383926392,
"learning_rate": 0.00018161064551981018,
"epoch": 0.09,
"step": 6125
},
{
"loss": 1.1704,
"grad_norm": 1.5656442642211914,
"learning_rate": 0.00018153555014868884,
"epoch": 0.09,
"step": 6150
},
{
"loss": 1.2117,
"grad_norm": 2.2562601566314697,
"learning_rate": 0.00018146045477756753,
"epoch": 0.09,
"step": 6175
},
{
"loss": 1.2164,
"grad_norm": 1.9555296897888184,
"learning_rate": 0.0001813853594064462,
"epoch": 0.09,
"step": 6200
},
{
"loss": 1.181,
"grad_norm": 1.350396990776062,
"learning_rate": 0.00018131026403532486,
"epoch": 0.09,
"step": 6225
},
{
"loss": 1.1364,
"grad_norm": 1.306662678718567,
"learning_rate": 0.00018123516866420353,
"epoch": 0.09,
"step": 6250
},
{
"loss": 1.3395,
"grad_norm": 2.2892208099365234,
"learning_rate": 0.00018116007329308222,
"epoch": 0.09,
"step": 6275
},
{
"loss": 1.2155,
"grad_norm": 1.4777361154556274,
"learning_rate": 0.00018108497792196091,
"epoch": 0.09,
"step": 6300
},
{
"loss": 1.2512,
"grad_norm": 1.4510390758514404,
"learning_rate": 0.00018100988255083958,
"epoch": 0.09,
"step": 6325
},
{
"loss": 1.3135,
"grad_norm": 1.2510138750076294,
"learning_rate": 0.00018093478717971825,
"epoch": 0.1,
"step": 6350
},
{
"loss": 1.2027,
"grad_norm": 1.6383109092712402,
"learning_rate": 0.0001808596918085969,
"epoch": 0.1,
"step": 6375
},
{
"loss": 1.1677,
"grad_norm": 1.6669763326644897,
"learning_rate": 0.0001807845964374756,
"epoch": 0.1,
"step": 6400
},
{
"loss": 1.2344,
"grad_norm": 1.2947137355804443,
"learning_rate": 0.00018070950106635427,
"epoch": 0.1,
"step": 6425
},
{
"loss": 1.2195,
"grad_norm": 1.673285961151123,
"learning_rate": 0.00018063440569523296,
"epoch": 0.1,
"step": 6450
},
{
"loss": 1.2719,
"grad_norm": 2.102374792098999,
"learning_rate": 0.00018055931032411165,
"epoch": 0.1,
"step": 6475
},
{
"loss": 1.2524,
"grad_norm": 1.37187659740448,
"learning_rate": 0.0001804842149529903,
"epoch": 0.1,
"step": 6500
},
{
"loss": 1.2169,
"grad_norm": 2.1124603748321533,
"learning_rate": 0.00018040911958186898,
"epoch": 0.1,
"step": 6525
},
{
"loss": 1.2206,
"grad_norm": 1.2506129741668701,
"learning_rate": 0.00018033402421074765,
"epoch": 0.1,
"step": 6550
},
{
"loss": 1.1774,
"grad_norm": 1.5893070697784424,
"learning_rate": 0.00018025892883962634,
"epoch": 0.1,
"step": 6575
},
{
"loss": 1.2101,
"grad_norm": 2.9019079208374023,
"learning_rate": 0.000180183833468505,
"epoch": 0.1,
"step": 6600
},
{
"loss": 1.205,
"grad_norm": 1.71237313747406,
"learning_rate": 0.0001801087380973837,
"epoch": 0.1,
"step": 6625
},
{
"loss": 1.2044,
"grad_norm": 1.9124983549118042,
"learning_rate": 0.00018003364272626236,
"epoch": 0.1,
"step": 6650
},
{
"loss": 1.2125,
"grad_norm": 1.4448764324188232,
"learning_rate": 0.00017995854735514103,
"epoch": 0.1,
"step": 6675
},
{
"loss": 1.2076,
"grad_norm": 3.0220255851745605,
"learning_rate": 0.00017988345198401972,
"epoch": 0.1,
"step": 6700
},
{
"loss": 1.1899,
"grad_norm": 1.3249489068984985,
"learning_rate": 0.0001798083566128984,
"epoch": 0.1,
"step": 6725
},
{
"loss": 1.2352,
"grad_norm": 1.4463756084442139,
"learning_rate": 0.00017973326124177708,
"epoch": 0.1,
"step": 6750
},
{
"loss": 1.3259,
"grad_norm": 1.4026572704315186,
"learning_rate": 0.00017965816587065575,
"epoch": 0.1,
"step": 6775
},
{
"loss": 1.2282,
"grad_norm": 0.9847255349159241,
"learning_rate": 0.0001795830704995344,
"epoch": 0.1,
"step": 6800
},
{
"loss": 1.1899,
"grad_norm": 0.91238933801651,
"learning_rate": 0.00017950797512841308,
"epoch": 0.1,
"step": 6825
},
{
"loss": 1.2386,
"grad_norm": 2.1862552165985107,
"learning_rate": 0.00017943287975729177,
"epoch": 0.1,
"step": 6850
},
{
"loss": 1.2094,
"grad_norm": 1.10003662109375,
"learning_rate": 0.00017935778438617043,
"epoch": 0.1,
"step": 6875
},
{
"loss": 1.2218,
"grad_norm": 1.8453437089920044,
"learning_rate": 0.00017928268901504913,
"epoch": 0.1,
"step": 6900
},
{
"loss": 1.262,
"grad_norm": 1.0639673471450806,
"learning_rate": 0.00017920759364392782,
"epoch": 0.1,
"step": 6925
},
{
"loss": 1.2188,
"grad_norm": 1.3008592128753662,
"learning_rate": 0.00017913249827280646,
"epoch": 0.1,
"step": 6950
},
{
"loss": 1.23,
"grad_norm": 1.7703325748443604,
"learning_rate": 0.00017905740290168515,
"epoch": 0.1,
"step": 6975
},
{
"loss": 1.2316,
"grad_norm": 1.1259385347366333,
"learning_rate": 0.00017898230753056381,
"epoch": 0.11,
"step": 7000
},
{
"loss": 1.2378,
"grad_norm": 2.1661126613616943,
"learning_rate": 0.0001789072121594425,
"epoch": 0.11,
"step": 7025
},
{
"loss": 1.1915,
"grad_norm": 2.1428678035736084,
"learning_rate": 0.00017883211678832117,
"epoch": 0.11,
"step": 7050
},
{
"loss": 1.2344,
"grad_norm": 1.4568270444869995,
"learning_rate": 0.00017875702141719986,
"epoch": 0.11,
"step": 7075
},
{
"loss": 1.2825,
"grad_norm": 1.5431725978851318,
"learning_rate": 0.00017868192604607853,
"epoch": 0.11,
"step": 7100
},
{
"loss": 1.2178,
"grad_norm": 1.2386250495910645,
"learning_rate": 0.0001786068306749572,
"epoch": 0.11,
"step": 7125
},
{
"loss": 1.2189,
"grad_norm": 1.1443992853164673,
"learning_rate": 0.0001785317353038359,
"epoch": 0.11,
"step": 7150
},
{
"loss": 1.2145,
"grad_norm": 1.0868651866912842,
"learning_rate": 0.00017845663993271455,
"epoch": 0.11,
"step": 7175
},
{
"loss": 1.2001,
"grad_norm": 1.2621536254882812,
"learning_rate": 0.00017838154456159325,
"epoch": 0.11,
"step": 7200
},
{
"loss": 1.2605,
"grad_norm": 1.3004405498504639,
"learning_rate": 0.0001783064491904719,
"epoch": 0.11,
"step": 7225
},
{
"loss": 1.1685,
"grad_norm": 1.7868775129318237,
"learning_rate": 0.00017823135381935058,
"epoch": 0.11,
"step": 7250
},
{
"loss": 1.1724,
"grad_norm": 1.525883674621582,
"learning_rate": 0.00017815625844822924,
"epoch": 0.11,
"step": 7275
},
{
"loss": 1.1917,
"grad_norm": 1.7897926568984985,
"learning_rate": 0.00017808116307710793,
"epoch": 0.11,
"step": 7300
},
{
"loss": 1.17,
"grad_norm": 1.770201325416565,
"learning_rate": 0.0001780060677059866,
"epoch": 0.11,
"step": 7325
},
{
"loss": 1.1758,
"grad_norm": 1.23914635181427,
"learning_rate": 0.0001779309723348653,
"epoch": 0.11,
"step": 7350
},
{
"loss": 1.266,
"grad_norm": 1.5685780048370361,
"learning_rate": 0.00017785587696374398,
"epoch": 0.11,
"step": 7375
},
{
"loss": 1.1515,
"grad_norm": 1.4432404041290283,
"learning_rate": 0.00017778078159262265,
"epoch": 0.11,
"step": 7400
},
{
"loss": 1.2221,
"grad_norm": 1.4710851907730103,
"learning_rate": 0.00017770568622150131,
"epoch": 0.11,
"step": 7425
},
{
"loss": 1.2636,
"grad_norm": 1.5943934917449951,
"learning_rate": 0.00017763059085037998,
"epoch": 0.11,
"step": 7450
},
{
"loss": 1.1915,
"grad_norm": 1.3364222049713135,
"learning_rate": 0.00017755549547925867,
"epoch": 0.11,
"step": 7475
},
{
"loss": 1.2061,
"grad_norm": 1.3201831579208374,
"learning_rate": 0.00017748040010813734,
"epoch": 0.11,
"step": 7500
},
{
"loss": 1.1852,
"grad_norm": 1.3895928859710693,
"learning_rate": 0.00017740530473701603,
"epoch": 0.11,
"step": 7525
},
{
"loss": 1.2233,
"grad_norm": 1.0795204639434814,
"learning_rate": 0.0001773302093658947,
"epoch": 0.11,
"step": 7550
},
{
"loss": 1.2034,
"grad_norm": 1.7997777462005615,
"learning_rate": 0.00017725511399477336,
"epoch": 0.11,
"step": 7575
},
{
"loss": 1.1786,
"grad_norm": 1.156964898109436,
"learning_rate": 0.00017718001862365205,
"epoch": 0.11,
"step": 7600
},
{
"loss": 1.1597,
"grad_norm": 1.6956669092178345,
"learning_rate": 0.00017710492325253072,
"epoch": 0.11,
"step": 7625
},
{
"loss": 1.272,
"grad_norm": 1.3330657482147217,
"learning_rate": 0.0001770298278814094,
"epoch": 0.11,
"step": 7650
},
{
"loss": 1.2597,
"grad_norm": 0.8610468506813049,
"learning_rate": 0.00017695473251028808,
"epoch": 0.12,
"step": 7675
},
{
"loss": 1.2418,
"grad_norm": 1.9568647146224976,
"learning_rate": 0.00017687963713916674,
"epoch": 0.12,
"step": 7700
},
{
"loss": 1.0969,
"grad_norm": 1.2442560195922852,
"learning_rate": 0.0001768045417680454,
"epoch": 0.12,
"step": 7725
},
{
"loss": 1.1941,
"grad_norm": 1.949724793434143,
"learning_rate": 0.0001767294463969241,
"epoch": 0.12,
"step": 7750
},
{
"loss": 1.2424,
"grad_norm": 1.4135985374450684,
"learning_rate": 0.0001766543510258028,
"epoch": 0.12,
"step": 7775
},
{
"loss": 1.1894,
"grad_norm": 1.8493655920028687,
"learning_rate": 0.00017657925565468146,
"epoch": 0.12,
"step": 7800
},
{
"loss": 1.2078,
"grad_norm": 1.921787977218628,
"learning_rate": 0.00017650416028356015,
"epoch": 0.12,
"step": 7825
},
{
"loss": 1.2593,
"grad_norm": 1.7355767488479614,
"learning_rate": 0.00017642906491243881,
"epoch": 0.12,
"step": 7850
},
{
"loss": 1.2583,
"grad_norm": 2.5150203704833984,
"learning_rate": 0.00017635396954131748,
"epoch": 0.12,
"step": 7875
},
{
"loss": 1.1617,
"grad_norm": 1.4067972898483276,
"learning_rate": 0.00017627887417019615,
"epoch": 0.12,
"step": 7900
},
{
"loss": 1.2691,
"grad_norm": 1.6826621294021606,
"learning_rate": 0.00017620377879907484,
"epoch": 0.12,
"step": 7925
},
{
"loss": 1.1767,
"grad_norm": 1.0282503366470337,
"learning_rate": 0.0001761286834279535,
"epoch": 0.12,
"step": 7950
},
{
"loss": 1.1887,
"grad_norm": 2.0548410415649414,
"learning_rate": 0.0001760535880568322,
"epoch": 0.12,
"step": 7975
},
{
"loss": 1.2639,
"grad_norm": 1.1392240524291992,
"learning_rate": 0.00017597849268571086,
"epoch": 0.12,
"step": 8000
},
{
"loss": 1.1991,
"grad_norm": 1.8351316452026367,
"learning_rate": 0.00017590339731458953,
"epoch": 0.12,
"step": 8025
},
{
"loss": 1.265,
"grad_norm": 1.801256775856018,
"learning_rate": 0.00017582830194346822,
"epoch": 0.12,
"step": 8050
},
{
"loss": 1.2895,
"grad_norm": 1.379420280456543,
"learning_rate": 0.00017575320657234688,
"epoch": 0.12,
"step": 8075
},
{
"loss": 1.1896,
"grad_norm": 1.7537370920181274,
"learning_rate": 0.00017567811120122558,
"epoch": 0.12,
"step": 8100
},
{
"loss": 1.2039,
"grad_norm": 2.159595489501953,
"learning_rate": 0.00017560301583010424,
"epoch": 0.12,
"step": 8125
},
{
"loss": 1.2145,
"grad_norm": 1.690534234046936,
"learning_rate": 0.0001755279204589829,
"epoch": 0.12,
"step": 8150
},
{
"loss": 1.1935,
"grad_norm": 1.0568920373916626,
"learning_rate": 0.00017545282508786157,
"epoch": 0.12,
"step": 8175
},
{
"loss": 1.2304,
"grad_norm": 1.6981247663497925,
"learning_rate": 0.00017537772971674026,
"epoch": 0.12,
"step": 8200
},
{
"loss": 1.1961,
"grad_norm": 2.1610305309295654,
"learning_rate": 0.00017530263434561896,
"epoch": 0.12,
"step": 8225
},
{
"loss": 1.2387,
"grad_norm": 1.8722481727600098,
"learning_rate": 0.00017522753897449762,
"epoch": 0.12,
"step": 8250
},
{
"loss": 1.2477,
"grad_norm": 2.1257529258728027,
"learning_rate": 0.00017515244360337631,
"epoch": 0.12,
"step": 8275
},
{
"loss": 1.173,
"grad_norm": 2.786665439605713,
"learning_rate": 0.00017507734823225498,
"epoch": 0.12,
"step": 8300
},
{
"loss": 1.3121,
"grad_norm": 1.4147156476974487,
"learning_rate": 0.00017500225286113365,
"epoch": 0.13,
"step": 8325
},
{
"loss": 1.2495,
"grad_norm": 1.6025553941726685,
"learning_rate": 0.0001749271574900123,
"epoch": 0.13,
"step": 8350
},
{
"loss": 1.2802,
"grad_norm": 1.6167206764221191,
"learning_rate": 0.000174852062118891,
"epoch": 0.13,
"step": 8375
},
{
"loss": 1.2587,
"grad_norm": 1.346677303314209,
"learning_rate": 0.00017477696674776967,
"epoch": 0.13,
"step": 8400
},
{
"loss": 1.1743,
"grad_norm": 1.8484021425247192,
"learning_rate": 0.00017470187137664836,
"epoch": 0.13,
"step": 8425
},
{
"loss": 1.2875,
"grad_norm": 2.0601062774658203,
"learning_rate": 0.00017462677600552703,
"epoch": 0.13,
"step": 8450
},
{
"loss": 1.1846,
"grad_norm": 1.455112338066101,
"learning_rate": 0.0001745516806344057,
"epoch": 0.13,
"step": 8475
},
{
"loss": 1.2606,
"grad_norm": 1.336016058921814,
"learning_rate": 0.00017447658526328438,
"epoch": 0.13,
"step": 8500
},
{
"loss": 1.2648,
"grad_norm": 0.9691543579101562,
"learning_rate": 0.00017440148989216305,
"epoch": 0.13,
"step": 8525
},
{
"loss": 1.2686,
"grad_norm": 1.4051158428192139,
"learning_rate": 0.00017432639452104174,
"epoch": 0.13,
"step": 8550
},
{
"loss": 1.2357,
"grad_norm": 1.1069400310516357,
"learning_rate": 0.0001742512991499204,
"epoch": 0.13,
"step": 8575
},
{
"loss": 1.243,
"grad_norm": 1.2926398515701294,
"learning_rate": 0.00017417920759364394,
"epoch": 0.13,
"step": 8600
},
{
"loss": 1.2178,
"grad_norm": 2.581450939178467,
"learning_rate": 0.0001741041122225226,
"epoch": 0.13,
"step": 8625
},
{
"loss": 1.2037,
"grad_norm": 3.525554656982422,
"learning_rate": 0.0001740290168514013,
"epoch": 0.13,
"step": 8650
},
{
"loss": 1.2447,
"grad_norm": 1.7870151996612549,
"learning_rate": 0.00017395392148027996,
"epoch": 0.13,
"step": 8675
},
{
"loss": 1.2299,
"grad_norm": 1.8541524410247803,
"learning_rate": 0.00017387882610915865,
"epoch": 0.13,
"step": 8700
},
{
"loss": 1.2425,
"grad_norm": 1.767638921737671,
"learning_rate": 0.00017380373073803732,
"epoch": 0.13,
"step": 8725
},
{
"loss": 1.2167,
"grad_norm": 1.1607838869094849,
"learning_rate": 0.00017372863536691598,
"epoch": 0.13,
"step": 8750
},
{
"loss": 1.2148,
"grad_norm": 2.044637441635132,
"learning_rate": 0.00017365353999579467,
"epoch": 0.13,
"step": 8775
},
{
"loss": 1.1762,
"grad_norm": 1.519467830657959,
"learning_rate": 0.00017357844462467334,
"epoch": 0.13,
"step": 8800
},
{
"loss": 1.2796,
"grad_norm": 1.461225986480713,
"learning_rate": 0.00017350334925355203,
"epoch": 0.13,
"step": 8825
},
{
"loss": 1.2516,
"grad_norm": 1.52583646774292,
"learning_rate": 0.0001734282538824307,
"epoch": 0.13,
"step": 8850
},
{
"loss": 1.1365,
"grad_norm": 1.617851734161377,
"learning_rate": 0.0001733531585113094,
"epoch": 0.13,
"step": 8875
},
{
"loss": 1.1494,
"grad_norm": 1.112012267112732,
"learning_rate": 0.00017327806314018803,
"epoch": 0.13,
"step": 8900
},
{
"loss": 1.199,
"grad_norm": 1.9818586111068726,
"learning_rate": 0.00017320296776906672,
"epoch": 0.13,
"step": 8925
},
{
"loss": 1.2784,
"grad_norm": 1.3736644983291626,
"learning_rate": 0.00017312787239794539,
"epoch": 0.13,
"step": 8950
},
{
"loss": 1.2714,
"grad_norm": 1.3875724077224731,
"learning_rate": 0.00017305277702682408,
"epoch": 0.13,
"step": 8975
},
{
"loss": 1.2155,
"grad_norm": 1.4930505752563477,
"learning_rate": 0.00017297768165570277,
"epoch": 0.14,
"step": 9000
},
{
"loss": 1.237,
"grad_norm": 1.9728326797485352,
"learning_rate": 0.00017290258628458144,
"epoch": 0.14,
"step": 9025
},
{
"loss": 1.1828,
"grad_norm": 1.2152589559555054,
"learning_rate": 0.0001728274909134601,
"epoch": 0.14,
"step": 9050
},
{
"loss": 1.2602,
"grad_norm": 2.241239547729492,
"learning_rate": 0.00017275239554233877,
"epoch": 0.14,
"step": 9075
},
{
"loss": 1.2174,
"grad_norm": 2.192455768585205,
"learning_rate": 0.00017267730017121746,
"epoch": 0.14,
"step": 9100
},
{
"loss": 1.2197,
"grad_norm": 1.2781050205230713,
"learning_rate": 0.00017260220480009612,
"epoch": 0.14,
"step": 9125
},
{
"loss": 1.1877,
"grad_norm": 1.4289566278457642,
"learning_rate": 0.00017252710942897482,
"epoch": 0.14,
"step": 9150
},
{
"loss": 1.2482,
"grad_norm": 1.4809447526931763,
"learning_rate": 0.00017245201405785348,
"epoch": 0.14,
"step": 9175
},
{
"loss": 1.2194,
"grad_norm": 1.5703109502792358,
"learning_rate": 0.00017237691868673215,
"epoch": 0.14,
"step": 9200
},
{
"loss": 1.2036,
"grad_norm": 1.5251587629318237,
"learning_rate": 0.00017230182331561084,
"epoch": 0.14,
"step": 9225
},
{
"loss": 1.2469,
"grad_norm": 0.9070261120796204,
"learning_rate": 0.0001722267279444895,
"epoch": 0.14,
"step": 9250
},
{
"loss": 1.2316,
"grad_norm": 2.398056745529175,
"learning_rate": 0.0001721516325733682,
"epoch": 0.14,
"step": 9275
},
{
"loss": 1.2225,
"grad_norm": 1.3680628538131714,
"learning_rate": 0.00017207653720224686,
"epoch": 0.14,
"step": 9300
},
{
"loss": 1.2154,
"grad_norm": 1.577989935874939,
"learning_rate": 0.00017200144183112556,
"epoch": 0.14,
"step": 9325
},
{
"loss": 1.1482,
"grad_norm": 1.868891716003418,
"learning_rate": 0.0001719263464600042,
"epoch": 0.14,
"step": 9350
},
{
"loss": 1.2566,
"grad_norm": 2.225888729095459,
"learning_rate": 0.00017185125108888289,
"epoch": 0.14,
"step": 9375
},
{
"loss": 1.3212,
"grad_norm": 1.519579529762268,
"learning_rate": 0.00017177615571776155,
"epoch": 0.14,
"step": 9400
},
{
"loss": 1.2287,
"grad_norm": 1.0716164112091064,
"learning_rate": 0.00017170106034664024,
"epoch": 0.14,
"step": 9425
},
{
"loss": 1.1992,
"grad_norm": 1.8280526399612427,
"learning_rate": 0.00017162596497551894,
"epoch": 0.14,
"step": 9450
},
{
"loss": 1.1633,
"grad_norm": 1.8167325258255005,
"learning_rate": 0.0001715508696043976,
"epoch": 0.14,
"step": 9475
},
{
"loss": 1.2104,
"grad_norm": 1.3616135120391846,
"learning_rate": 0.00017147577423327627,
"epoch": 0.14,
"step": 9500
},
{
"loss": 1.1943,
"grad_norm": 2.055335283279419,
"learning_rate": 0.00017140067886215493,
"epoch": 0.14,
"step": 9525
},
{
"loss": 1.1744,
"grad_norm": 1.173204779624939,
"learning_rate": 0.00017132558349103362,
"epoch": 0.14,
"step": 9550
},
{
"loss": 1.204,
"grad_norm": 1.9650391340255737,
"learning_rate": 0.0001712504881199123,
"epoch": 0.14,
"step": 9575
},
{
"loss": 1.1757,
"grad_norm": 1.167233943939209,
"learning_rate": 0.00017117539274879098,
"epoch": 0.14,
"step": 9600
},
{
"loss": 1.1676,
"grad_norm": 1.160571575164795,
"learning_rate": 0.00017110029737766965,
"epoch": 0.14,
"step": 9625
},
{
"loss": 1.2729,
"grad_norm": 1.3096935749053955,
"learning_rate": 0.0001710252020065483,
"epoch": 0.14,
"step": 9650
},
{
"loss": 1.1832,
"grad_norm": 1.2549477815628052,
"learning_rate": 0.000170950106635427,
"epoch": 0.15,
"step": 9675
},
{
"loss": 1.1984,
"grad_norm": 1.156880497932434,
"learning_rate": 0.00017087501126430567,
"epoch": 0.15,
"step": 9700
},
{
"loss": 1.2719,
"grad_norm": 1.974812626838684,
"learning_rate": 0.00017079991589318436,
"epoch": 0.15,
"step": 9725
},
{
"loss": 1.2269,
"grad_norm": 1.3916237354278564,
"learning_rate": 0.00017072482052206303,
"epoch": 0.15,
"step": 9750
},
{
"loss": 1.2587,
"grad_norm": 1.1099380254745483,
"learning_rate": 0.00017064972515094172,
"epoch": 0.15,
"step": 9775
},
{
"loss": 1.171,
"grad_norm": 1.169327735900879,
"learning_rate": 0.00017057462977982036,
"epoch": 0.15,
"step": 9800
},
{
"loss": 1.1879,
"grad_norm": 1.3058301210403442,
"learning_rate": 0.00017049953440869905,
"epoch": 0.15,
"step": 9825
},
{
"loss": 1.3067,
"grad_norm": 1.6860321760177612,
"learning_rate": 0.00017042443903757772,
"epoch": 0.15,
"step": 9850
},
{
"loss": 1.2436,
"grad_norm": 1.7748676538467407,
"learning_rate": 0.0001703493436664564,
"epoch": 0.15,
"step": 9875
},
{
"loss": 1.2847,
"grad_norm": 1.3805527687072754,
"learning_rate": 0.0001702742482953351,
"epoch": 0.15,
"step": 9900
},
{
"loss": 1.1507,
"grad_norm": 1.1719329357147217,
"learning_rate": 0.00017019915292421377,
"epoch": 0.15,
"step": 9925
},
{
"loss": 1.1982,
"grad_norm": 3.254032850265503,
"learning_rate": 0.00017012405755309243,
"epoch": 0.15,
"step": 9950
},
{
"loss": 1.2556,
"grad_norm": 1.6937150955200195,
"learning_rate": 0.0001700489621819711,
"epoch": 0.15,
"step": 9975
},
{
"loss": 1.1951,
"grad_norm": 0.9732112884521484,
"learning_rate": 0.0001699738668108498,
"epoch": 0.15,
"step": 10000
},
{
"loss": 1.2225,
"grad_norm": 1.152357816696167,
"learning_rate": 0.00016989877143972845,
"epoch": 0.15,
"step": 10025
},
{
"loss": 1.2185,
"grad_norm": 1.9174104928970337,
"learning_rate": 0.00016982367606860715,
"epoch": 0.15,
"step": 10050
},
{
"loss": 1.1886,
"grad_norm": 2.638831377029419,
"learning_rate": 0.0001697485806974858,
"epoch": 0.15,
"step": 10075
},
{
"loss": 1.2805,
"grad_norm": 1.4505808353424072,
"learning_rate": 0.00016967348532636448,
"epoch": 0.15,
"step": 10100
},
{
"loss": 1.2714,
"grad_norm": 1.9908664226531982,
"learning_rate": 0.00016959838995524317,
"epoch": 0.15,
"step": 10125
},
{
"loss": 1.2362,
"grad_norm": 0.9299131035804749,
"learning_rate": 0.00016952329458412184,
"epoch": 0.15,
"step": 10150
},
{
"loss": 1.212,
"grad_norm": 2.036597490310669,
"learning_rate": 0.00016944819921300053,
"epoch": 0.15,
"step": 10175
},
{
"loss": 1.1789,
"grad_norm": 0.9963513016700745,
"learning_rate": 0.0001693731038418792,
"epoch": 0.15,
"step": 10200
},
{
"loss": 1.2206,
"grad_norm": 1.2980750799179077,
"learning_rate": 0.00016929800847075789,
"epoch": 0.15,
"step": 10225
},
{
"loss": 1.2015,
"grad_norm": 2.1614062786102295,
"learning_rate": 0.00016922291309963652,
"epoch": 0.15,
"step": 10250
},
{
"loss": 1.2411,
"grad_norm": 2.045715093612671,
"learning_rate": 0.00016914781772851522,
"epoch": 0.15,
"step": 10275
},
{
"loss": 1.2117,
"grad_norm": 1.5198246240615845,
"learning_rate": 0.0001690727223573939,
"epoch": 0.15,
"step": 10300
},
{
"loss": 1.2269,
"grad_norm": 1.563063383102417,
"learning_rate": 0.00016900063080111744,
"epoch": 0.16,
"step": 10325
},
{
"loss": 1.2857,
"grad_norm": 1.2115886211395264,
"learning_rate": 0.0001689255354299961,
"epoch": 0.16,
"step": 10350
},
{
"loss": 1.2339,
"grad_norm": 1.6522163152694702,
"learning_rate": 0.00016885044005887477,
"epoch": 0.16,
"step": 10375
},
{
"loss": 1.2416,
"grad_norm": 1.334186315536499,
"learning_rate": 0.00016877534468775343,
"epoch": 0.16,
"step": 10400
},
{
"loss": 1.1811,
"grad_norm": 2.520540475845337,
"learning_rate": 0.00016870024931663213,
"epoch": 0.16,
"step": 10425
},
{
"loss": 1.2504,
"grad_norm": 1.4244968891143799,
"learning_rate": 0.00016862515394551082,
"epoch": 0.16,
"step": 10450
},
{
"loss": 1.3024,
"grad_norm": 1.7212327718734741,
"learning_rate": 0.00016855005857438948,
"epoch": 0.16,
"step": 10475
},
{
"loss": 1.2481,
"grad_norm": 1.369234323501587,
"learning_rate": 0.00016847496320326818,
"epoch": 0.16,
"step": 10500
},
{
"loss": 1.2456,
"grad_norm": 1.2592421770095825,
"learning_rate": 0.00016839986783214684,
"epoch": 0.16,
"step": 10525
},
{
"loss": 1.2845,
"grad_norm": 1.891320824623108,
"learning_rate": 0.0001683247724610255,
"epoch": 0.16,
"step": 10550
},
{
"loss": 1.1581,
"grad_norm": 1.653385877609253,
"learning_rate": 0.00016824967708990417,
"epoch": 0.16,
"step": 10575
},
{
"loss": 1.2469,
"grad_norm": 1.4522532224655151,
"learning_rate": 0.00016817458171878286,
"epoch": 0.16,
"step": 10600
},
{
"loss": 1.1964,
"grad_norm": 2.0278687477111816,
"learning_rate": 0.00016809948634766153,
"epoch": 0.16,
"step": 10625
},
{
"loss": 1.1826,
"grad_norm": 7.241672039031982,
"learning_rate": 0.00016802439097654022,
"epoch": 0.16,
"step": 10650
},
{
"loss": 1.2321,
"grad_norm": 1.5438281297683716,
"learning_rate": 0.0001679553032351086,
"epoch": 0.16,
"step": 10675
},
{
"loss": 1.1776,
"grad_norm": 1.0334819555282593,
"learning_rate": 0.00016788020786398728,
"epoch": 0.16,
"step": 10700
},
{
"loss": 1.2905,
"grad_norm": 1.578046202659607,
"learning_rate": 0.0001678081163077108,
"epoch": 0.16,
"step": 10725
},
{
"loss": 1.1721,
"grad_norm": 1.1447938680648804,
"learning_rate": 0.00016773302093658947,
"epoch": 0.16,
"step": 10750
},
{
"loss": 1.1305,
"grad_norm": 1.328674554824829,
"learning_rate": 0.00016765792556546817,
"epoch": 0.16,
"step": 10775
},
{
"loss": 1.1729,
"grad_norm": 1.5958043336868286,
"learning_rate": 0.00016758283019434683,
"epoch": 0.16,
"step": 10800
},
{
"loss": 1.1459,
"grad_norm": 1.4962025880813599,
"learning_rate": 0.0001675077348232255,
"epoch": 0.16,
"step": 10825
},
{
"loss": 1.1864,
"grad_norm": 1.540279507637024,
"learning_rate": 0.0001674326394521042,
"epoch": 0.16,
"step": 10850
},
{
"loss": 1.2842,
"grad_norm": 1.6456973552703857,
"learning_rate": 0.00016735754408098285,
"epoch": 0.16,
"step": 10875
},
{
"loss": 1.2236,
"grad_norm": 1.9610776901245117,
"learning_rate": 0.00016728244870986155,
"epoch": 0.16,
"step": 10900
},
{
"loss": 1.1825,
"grad_norm": 1.4995285272598267,
"learning_rate": 0.0001672073533387402,
"epoch": 0.16,
"step": 10925
},
{
"loss": 1.2341,
"grad_norm": 1.0755623579025269,
"learning_rate": 0.00016713225796761888,
"epoch": 0.16,
"step": 10950
},
{
"loss": 1.2119,
"grad_norm": 1.8127145767211914,
"learning_rate": 0.00016705716259649754,
"epoch": 0.16,
"step": 10975
},
{
"loss": 1.2211,
"grad_norm": 1.8502295017242432,
"learning_rate": 0.00016698206722537623,
"epoch": 0.17,
"step": 11000
},
{
"loss": 1.2158,
"grad_norm": 1.6311461925506592,
"learning_rate": 0.0001669069718542549,
"epoch": 0.17,
"step": 11025
},
{
"loss": 1.2579,
"grad_norm": 1.3985036611557007,
"learning_rate": 0.0001668318764831336,
"epoch": 0.17,
"step": 11050
},
{
"loss": 1.1595,
"grad_norm": 2.4565913677215576,
"learning_rate": 0.00016675678111201228,
"epoch": 0.17,
"step": 11075
},
{
"loss": 1.2151,
"grad_norm": 1.9943021535873413,
"learning_rate": 0.00016668168574089092,
"epoch": 0.17,
"step": 11100
},
{
"loss": 1.2253,
"grad_norm": 1.246576189994812,
"learning_rate": 0.00016660659036976962,
"epoch": 0.17,
"step": 11125
},
{
"loss": 1.1946,
"grad_norm": 1.1769171953201294,
"learning_rate": 0.00016653149499864828,
"epoch": 0.17,
"step": 11150
},
{
"loss": 1.2189,
"grad_norm": 1.2264093160629272,
"learning_rate": 0.00016645639962752697,
"epoch": 0.17,
"step": 11175
},
{
"loss": 1.1773,
"grad_norm": 1.0736924409866333,
"learning_rate": 0.00016638130425640564,
"epoch": 0.17,
"step": 11200
},
{
"loss": 1.2057,
"grad_norm": 1.527783751487732,
"learning_rate": 0.00016630620888528433,
"epoch": 0.17,
"step": 11225
},
{
"loss": 1.2292,
"grad_norm": 1.3747711181640625,
"learning_rate": 0.000166231113514163,
"epoch": 0.17,
"step": 11250
},
{
"loss": 1.2385,
"grad_norm": 1.5345367193222046,
"learning_rate": 0.00016615601814304166,
"epoch": 0.17,
"step": 11275
},
{
"loss": 1.2456,
"grad_norm": 1.1881415843963623,
"learning_rate": 0.00016608092277192035,
"epoch": 0.17,
"step": 11300
},
{
"loss": 1.2099,
"grad_norm": 1.1072256565093994,
"learning_rate": 0.00016600582740079902,
"epoch": 0.17,
"step": 11325
},
{
"loss": 1.2128,
"grad_norm": 2.2356455326080322,
"learning_rate": 0.0001659307320296777,
"epoch": 0.17,
"step": 11350
},
{
"loss": 1.1638,
"grad_norm": 3.343397855758667,
"learning_rate": 0.00016585563665855638,
"epoch": 0.17,
"step": 11375
},
{
"loss": 1.1879,
"grad_norm": 1.5066440105438232,
"learning_rate": 0.00016578054128743504,
"epoch": 0.17,
"step": 11400
},
{
"loss": 1.1868,
"grad_norm": 1.4099555015563965,
"learning_rate": 0.0001657054459163137,
"epoch": 0.17,
"step": 11425
},
{
"loss": 1.1882,
"grad_norm": 1.6867519617080688,
"learning_rate": 0.0001656303505451924,
"epoch": 0.17,
"step": 11450
},
{
"loss": 1.1986,
"grad_norm": 1.4436876773834229,
"learning_rate": 0.0001655552551740711,
"epoch": 0.17,
"step": 11475
},
{
"loss": 1.2023,
"grad_norm": 1.2447092533111572,
"learning_rate": 0.00016548015980294976,
"epoch": 0.17,
"step": 11500
},
{
"loss": 1.1136,
"grad_norm": 2.2803616523742676,
"learning_rate": 0.00016540506443182845,
"epoch": 0.17,
"step": 11525
},
{
"loss": 1.2135,
"grad_norm": 2.992870807647705,
"learning_rate": 0.00016532996906070712,
"epoch": 0.17,
"step": 11550
},
{
"loss": 1.1864,
"grad_norm": 1.6845029592514038,
"learning_rate": 0.00016525487368958578,
"epoch": 0.17,
"step": 11575
},
{
"loss": 1.1924,
"grad_norm": 1.52716863155365,
"learning_rate": 0.00016517977831846445,
"epoch": 0.17,
"step": 11600
},
{
"loss": 1.2462,
"grad_norm": 1.1273847818374634,
"learning_rate": 0.00016510468294734314,
"epoch": 0.17,
"step": 11625
},
{
"loss": 1.2411,
"grad_norm": 1.472161054611206,
"learning_rate": 0.0001650295875762218,
"epoch": 0.17,
"step": 11650
},
{
"loss": 1.2534,
"grad_norm": 1.0381845235824585,
"learning_rate": 0.0001649544922051005,
"epoch": 0.18,
"step": 11675
},
{
"loss": 1.1666,
"grad_norm": 1.7663735151290894,
"learning_rate": 0.00016487939683397916,
"epoch": 0.18,
"step": 11700
},
{
"loss": 1.1811,
"grad_norm": 1.8977352380752563,
"learning_rate": 0.00016480430146285783,
"epoch": 0.18,
"step": 11725
},
{
"loss": 1.2674,
"grad_norm": 1.2944955825805664,
"learning_rate": 0.00016472920609173652,
"epoch": 0.18,
"step": 11750
},
{
"loss": 1.259,
"grad_norm": 1.2735075950622559,
"learning_rate": 0.00016465411072061518,
"epoch": 0.18,
"step": 11775
},
{
"loss": 1.1718,
"grad_norm": 1.3027160167694092,
"learning_rate": 0.00016457901534949388,
"epoch": 0.18,
"step": 11800
},
{
"loss": 1.2034,
"grad_norm": 1.0722211599349976,
"learning_rate": 0.00016450391997837254,
"epoch": 0.18,
"step": 11825
},
{
"loss": 1.1761,
"grad_norm": 2.5194356441497803,
"learning_rate": 0.0001644288246072512,
"epoch": 0.18,
"step": 11850
},
{
"loss": 1.213,
"grad_norm": 1.5070539712905884,
"learning_rate": 0.00016435372923612987,
"epoch": 0.18,
"step": 11875
},
{
"loss": 1.1899,
"grad_norm": 2.126924991607666,
"learning_rate": 0.00016427863386500857,
"epoch": 0.18,
"step": 11900
},
{
"loss": 1.2114,
"grad_norm": 1.6474621295928955,
"learning_rate": 0.00016420353849388726,
"epoch": 0.18,
"step": 11925
},
{
"loss": 1.2885,
"grad_norm": 1.4247677326202393,
"learning_rate": 0.00016412844312276592,
"epoch": 0.18,
"step": 11950
},
{
"loss": 1.1922,
"grad_norm": 1.7299450635910034,
"learning_rate": 0.00016405334775164462,
"epoch": 0.18,
"step": 11975
},
{
"loss": 1.255,
"grad_norm": 1.1741243600845337,
"learning_rate": 0.00016397825238052328,
"epoch": 0.18,
"step": 12000
},
{
"loss": 1.1358,
"grad_norm": 3.301985502243042,
"learning_rate": 0.00016390315700940195,
"epoch": 0.18,
"step": 12025
},
{
"loss": 1.2252,
"grad_norm": 1.730089783668518,
"learning_rate": 0.0001638280616382806,
"epoch": 0.18,
"step": 12050
},
{
"loss": 1.2015,
"grad_norm": 0.9660411477088928,
"learning_rate": 0.0001637529662671593,
"epoch": 0.18,
"step": 12075
},
{
"loss": 1.1952,
"grad_norm": 1.2349923849105835,
"learning_rate": 0.00016367787089603797,
"epoch": 0.18,
"step": 12100
},
{
"loss": 1.1539,
"grad_norm": 1.5074914693832397,
"learning_rate": 0.00016360277552491666,
"epoch": 0.18,
"step": 12125
},
{
"loss": 1.2497,
"grad_norm": 1.2159485816955566,
"learning_rate": 0.00016352768015379533,
"epoch": 0.18,
"step": 12150
},
{
"loss": 1.2624,
"grad_norm": 1.7598042488098145,
"learning_rate": 0.000163452584782674,
"epoch": 0.18,
"step": 12175
},
{
"loss": 1.2243,
"grad_norm": 1.2727563381195068,
"learning_rate": 0.00016337748941155268,
"epoch": 0.18,
"step": 12200
},
{
"loss": 1.2093,
"grad_norm": 1.205769658088684,
"learning_rate": 0.00016330239404043135,
"epoch": 0.18,
"step": 12225
},
{
"loss": 1.1866,
"grad_norm": 5.114007949829102,
"learning_rate": 0.00016322729866931004,
"epoch": 0.18,
"step": 12250
},
{
"loss": 1.2824,
"grad_norm": 2.016160488128662,
"learning_rate": 0.0001631522032981887,
"epoch": 0.18,
"step": 12275
},
{
"loss": 1.1501,
"grad_norm": 1.1405665874481201,
"learning_rate": 0.00016307710792706737,
"epoch": 0.18,
"step": 12300
},
{
"loss": 1.1795,
"grad_norm": 2.2503092288970947,
"learning_rate": 0.00016300201255594604,
"epoch": 0.19,
"step": 12325
},
{
"loss": 1.2146,
"grad_norm": 1.0414721965789795,
"learning_rate": 0.00016292691718482473,
"epoch": 0.19,
"step": 12350
},
{
"loss": 1.2338,
"grad_norm": 1.7036515474319458,
"learning_rate": 0.00016285182181370342,
"epoch": 0.19,
"step": 12375
},
{
"loss": 1.2638,
"grad_norm": 1.2566134929656982,
"learning_rate": 0.0001627767264425821,
"epoch": 0.19,
"step": 12400
},
{
"loss": 1.1694,
"grad_norm": 1.4524366855621338,
"learning_rate": 0.00016270163107146078,
"epoch": 0.19,
"step": 12425
},
{
"loss": 1.1826,
"grad_norm": 1.2547303438186646,
"learning_rate": 0.00016262653570033945,
"epoch": 0.19,
"step": 12450
},
{
"loss": 1.1885,
"grad_norm": 3.284105062484741,
"learning_rate": 0.0001625514403292181,
"epoch": 0.19,
"step": 12475
},
{
"loss": 1.1785,
"grad_norm": 1.14161217212677,
"learning_rate": 0.00016247634495809678,
"epoch": 0.19,
"step": 12500
},
{
"loss": 1.156,
"grad_norm": 1.9379956722259521,
"learning_rate": 0.00016240124958697547,
"epoch": 0.19,
"step": 12525
},
{
"loss": 1.1911,
"grad_norm": 1.1594531536102295,
"learning_rate": 0.00016232615421585413,
"epoch": 0.19,
"step": 12550
},
{
"loss": 1.1905,
"grad_norm": 1.3584635257720947,
"learning_rate": 0.00016225105884473283,
"epoch": 0.19,
"step": 12575
},
{
"loss": 1.2023,
"grad_norm": 1.402160406112671,
"learning_rate": 0.0001621759634736115,
"epoch": 0.19,
"step": 12600
},
{
"loss": 1.235,
"grad_norm": 1.3611042499542236,
"learning_rate": 0.00016210086810249016,
"epoch": 0.19,
"step": 12625
},
{
"loss": 1.1894,
"grad_norm": 1.1458463668823242,
"learning_rate": 0.00016202577273136885,
"epoch": 0.19,
"step": 12650
},
{
"loss": 1.1829,
"grad_norm": 0.8500710725784302,
"learning_rate": 0.00016195067736024752,
"epoch": 0.19,
"step": 12675
},
{
"loss": 1.1632,
"grad_norm": 1.5723693370819092,
"learning_rate": 0.0001618755819891262,
"epoch": 0.19,
"step": 12700
},
{
"loss": 1.1982,
"grad_norm": 1.399224042892456,
"learning_rate": 0.00016180048661800487,
"epoch": 0.19,
"step": 12725
},
{
"loss": 1.2511,
"grad_norm": 2.703968048095703,
"learning_rate": 0.00016172539124688354,
"epoch": 0.19,
"step": 12750
},
{
"loss": 1.1905,
"grad_norm": 1.6090725660324097,
"learning_rate": 0.00016165029587576223,
"epoch": 0.19,
"step": 12775
},
{
"loss": 1.2074,
"grad_norm": 2.323432207107544,
"learning_rate": 0.0001615752005046409,
"epoch": 0.19,
"step": 12800
},
{
"loss": 1.2514,
"grad_norm": 1.0441837310791016,
"learning_rate": 0.0001615001051335196,
"epoch": 0.19,
"step": 12825
},
{
"loss": 1.2018,
"grad_norm": 1.3072987794876099,
"learning_rate": 0.00016142500976239825,
"epoch": 0.19,
"step": 12850
},
{
"loss": 1.169,
"grad_norm": 2.1105244159698486,
"learning_rate": 0.00016134991439127695,
"epoch": 0.19,
"step": 12875
},
{
"loss": 1.2361,
"grad_norm": 1.4109976291656494,
"learning_rate": 0.0001612748190201556,
"epoch": 0.19,
"step": 12900
},
{
"loss": 1.2543,
"grad_norm": 1.5119200944900513,
"learning_rate": 0.00016119972364903428,
"epoch": 0.19,
"step": 12925
},
{
"loss": 1.2326,
"grad_norm": 1.3456885814666748,
"learning_rate": 0.00016112462827791294,
"epoch": 0.19,
"step": 12950
},
{
"loss": 1.1761,
"grad_norm": 2.7535812854766846,
"learning_rate": 0.00016104953290679163,
"epoch": 0.19,
"step": 12975
},
{
"loss": 1.1413,
"grad_norm": 1.665337085723877,
"learning_rate": 0.00016097443753567033,
"epoch": 0.2,
"step": 13000
},
{
"loss": 1.1836,
"grad_norm": 1.1174890995025635,
"learning_rate": 0.000160899342164549,
"epoch": 0.2,
"step": 13025
},
{
"loss": 1.1405,
"grad_norm": 2.4042136669158936,
"learning_rate": 0.00016082424679342766,
"epoch": 0.2,
"step": 13050
},
{
"loss": 1.2339,
"grad_norm": 2.3713090419769287,
"learning_rate": 0.00016074915142230632,
"epoch": 0.2,
"step": 13075
},
{
"loss": 1.2351,
"grad_norm": 1.7716904878616333,
"learning_rate": 0.00016067405605118502,
"epoch": 0.2,
"step": 13100
},
{
"loss": 1.194,
"grad_norm": 1.2277339696884155,
"learning_rate": 0.00016059896068006368,
"epoch": 0.2,
"step": 13125
},
{
"loss": 1.2429,
"grad_norm": 1.2725192308425903,
"learning_rate": 0.00016052386530894237,
"epoch": 0.2,
"step": 13150
},
{
"loss": 1.1837,
"grad_norm": 1.4028089046478271,
"learning_rate": 0.00016044876993782104,
"epoch": 0.2,
"step": 13175
},
{
"loss": 1.2633,
"grad_norm": 3.1674065589904785,
"learning_rate": 0.0001603736745666997,
"epoch": 0.2,
"step": 13200
},
{
"loss": 1.2575,
"grad_norm": 1.3717881441116333,
"learning_rate": 0.0001602985791955784,
"epoch": 0.2,
"step": 13225
},
{
"loss": 1.2385,
"grad_norm": 1.5640596151351929,
"learning_rate": 0.00016022348382445706,
"epoch": 0.2,
"step": 13250
},
{
"loss": 1.231,
"grad_norm": 1.336003303527832,
"learning_rate": 0.00016014838845333575,
"epoch": 0.2,
"step": 13275
},
{
"loss": 1.1247,
"grad_norm": 1.0398321151733398,
"learning_rate": 0.00016007329308221442,
"epoch": 0.2,
"step": 13300
},
{
"loss": 1.1848,
"grad_norm": 1.5215067863464355,
"learning_rate": 0.0001599981977110931,
"epoch": 0.2,
"step": 13325
},
{
"loss": 1.2053,
"grad_norm": 1.184665560722351,
"learning_rate": 0.00015992310233997178,
"epoch": 0.2,
"step": 13350
},
{
"loss": 1.2213,
"grad_norm": 3.2756311893463135,
"learning_rate": 0.00015984800696885044,
"epoch": 0.2,
"step": 13375
},
{
"loss": 1.1623,
"grad_norm": 2.2092206478118896,
"learning_rate": 0.0001597729115977291,
"epoch": 0.2,
"step": 13400
},
{
"loss": 1.1939,
"grad_norm": 1.701504111289978,
"learning_rate": 0.0001596978162266078,
"epoch": 0.2,
"step": 13425
},
{
"loss": 1.194,
"grad_norm": 1.0575650930404663,
"learning_rate": 0.0001596227208554865,
"epoch": 0.2,
"step": 13450
},
{
"loss": 1.1561,
"grad_norm": 2.7198948860168457,
"learning_rate": 0.00015954762548436516,
"epoch": 0.2,
"step": 13475
},
{
"loss": 1.1449,
"grad_norm": 1.2031759023666382,
"learning_rate": 0.00015947253011324382,
"epoch": 0.2,
"step": 13500
},
{
"loss": 1.195,
"grad_norm": 1.3267816305160522,
"learning_rate": 0.0001593974347421225,
"epoch": 0.2,
"step": 13525
},
{
"loss": 1.1566,
"grad_norm": 1.4941660165786743,
"learning_rate": 0.00015932233937100118,
"epoch": 0.2,
"step": 13550
},
{
"loss": 1.1218,
"grad_norm": 0.9819481372833252,
"learning_rate": 0.00015924724399987985,
"epoch": 0.2,
"step": 13575
},
{
"loss": 1.2457,
"grad_norm": 1.1329920291900635,
"learning_rate": 0.00015917214862875854,
"epoch": 0.2,
"step": 13600
},
{
"loss": 1.2218,
"grad_norm": 1.0208684206008911,
"learning_rate": 0.0001590970532576372,
"epoch": 0.2,
"step": 13625
},
{
"loss": 1.3095,
"grad_norm": 1.9692599773406982,
"learning_rate": 0.00015902195788651587,
"epoch": 0.21,
"step": 13650
},
{
"loss": 1.1426,
"grad_norm": 1.1488243341445923,
"learning_rate": 0.00015894686251539456,
"epoch": 0.21,
"step": 13675
},
{
"loss": 1.1786,
"grad_norm": 2.137523651123047,
"learning_rate": 0.00015887176714427323,
"epoch": 0.21,
"step": 13700
},
{
"loss": 1.2123,
"grad_norm": 1.74925696849823,
"learning_rate": 0.00015879667177315192,
"epoch": 0.21,
"step": 13725
},
{
"loss": 1.2237,
"grad_norm": 1.931201457977295,
"learning_rate": 0.00015872157640203058,
"epoch": 0.21,
"step": 13750
},
{
"loss": 1.1822,
"grad_norm": 1.3742233514785767,
"learning_rate": 0.00015864648103090928,
"epoch": 0.21,
"step": 13775
},
{
"loss": 1.2393,
"grad_norm": 1.860449194908142,
"learning_rate": 0.00015857138565978794,
"epoch": 0.21,
"step": 13800
},
{
"loss": 1.16,
"grad_norm": 2.664776086807251,
"learning_rate": 0.0001584962902886666,
"epoch": 0.21,
"step": 13825
},
{
"loss": 1.172,
"grad_norm": 2.5164761543273926,
"learning_rate": 0.00015842119491754527,
"epoch": 0.21,
"step": 13850
},
{
"loss": 1.1531,
"grad_norm": 1.644278645515442,
"learning_rate": 0.00015834609954642397,
"epoch": 0.21,
"step": 13875
},
{
"loss": 1.2801,
"grad_norm": 1.2100858688354492,
"learning_rate": 0.00015827100417530266,
"epoch": 0.21,
"step": 13900
},
{
"loss": 1.2011,
"grad_norm": 1.9542933702468872,
"learning_rate": 0.00015819590880418132,
"epoch": 0.21,
"step": 13925
},
{
"loss": 1.2344,
"grad_norm": 1.1991852521896362,
"learning_rate": 0.00015812081343306,
"epoch": 0.21,
"step": 13950
},
{
"loss": 1.1884,
"grad_norm": 1.9113025665283203,
"learning_rate": 0.00015804571806193865,
"epoch": 0.21,
"step": 13975
},
{
"loss": 1.242,
"grad_norm": 1.4621787071228027,
"learning_rate": 0.00015797062269081735,
"epoch": 0.21,
"step": 14000
},
{
"loss": 1.1961,
"grad_norm": 1.9302442073822021,
"learning_rate": 0.000157895527319696,
"epoch": 0.21,
"step": 14025
},
{
"loss": 1.2159,
"grad_norm": 1.3267945051193237,
"learning_rate": 0.0001578204319485747,
"epoch": 0.21,
"step": 14050
},
{
"loss": 1.1573,
"grad_norm": 1.2569104433059692,
"learning_rate": 0.0001577453365774534,
"epoch": 0.21,
"step": 14075
},
{
"loss": 1.2149,
"grad_norm": 1.3353804349899292,
"learning_rate": 0.00015767024120633203,
"epoch": 0.21,
"step": 14100
},
{
"loss": 1.198,
"grad_norm": 1.9309898614883423,
"learning_rate": 0.00015759514583521073,
"epoch": 0.21,
"step": 14125
},
{
"loss": 1.1742,
"grad_norm": 1.2149921655654907,
"learning_rate": 0.0001575200504640894,
"epoch": 0.21,
"step": 14150
},
{
"loss": 1.1855,
"grad_norm": 1.9573317766189575,
"learning_rate": 0.00015744495509296808,
"epoch": 0.21,
"step": 14175
},
{
"loss": 1.2459,
"grad_norm": 1.384567379951477,
"learning_rate": 0.00015736985972184675,
"epoch": 0.21,
"step": 14200
},
{
"loss": 1.1853,
"grad_norm": 1.7285842895507812,
"learning_rate": 0.00015729476435072544,
"epoch": 0.21,
"step": 14225
},
{
"loss": 1.1728,
"grad_norm": 2.050541877746582,
"learning_rate": 0.0001572196689796041,
"epoch": 0.21,
"step": 14250
},
{
"loss": 1.2248,
"grad_norm": 1.735643744468689,
"learning_rate": 0.00015714457360848277,
"epoch": 0.21,
"step": 14275
},
{
"loss": 1.1792,
"grad_norm": 1.511836290359497,
"learning_rate": 0.00015706947823736147,
"epoch": 0.21,
"step": 14300
},
{
"loss": 1.1978,
"grad_norm": 1.1453663110733032,
"learning_rate": 0.00015699438286624013,
"epoch": 0.22,
"step": 14325
},
{
"loss": 1.1747,
"grad_norm": 1.8787868022918701,
"learning_rate": 0.00015691928749511882,
"epoch": 0.22,
"step": 14350
},
{
"loss": 1.1946,
"grad_norm": 2.0433459281921387,
"learning_rate": 0.0001568441921239975,
"epoch": 0.22,
"step": 14375
},
{
"loss": 1.1676,
"grad_norm": 1.6258106231689453,
"learning_rate": 0.00015676909675287615,
"epoch": 0.22,
"step": 14400
},
{
"loss": 1.1486,
"grad_norm": 1.0429004430770874,
"learning_rate": 0.00015669400138175482,
"epoch": 0.22,
"step": 14425
},
{
"loss": 1.2211,
"grad_norm": 1.5074403285980225,
"learning_rate": 0.0001566189060106335,
"epoch": 0.22,
"step": 14450
},
{
"loss": 1.2161,
"grad_norm": 1.4326659440994263,
"learning_rate": 0.00015654381063951218,
"epoch": 0.22,
"step": 14475
},
{
"loss": 1.2538,
"grad_norm": 1.8539921045303345,
"learning_rate": 0.00015646871526839087,
"epoch": 0.22,
"step": 14500
},
{
"loss": 1.1929,
"grad_norm": 1.7635362148284912,
"learning_rate": 0.00015639361989726956,
"epoch": 0.22,
"step": 14525
},
{
"loss": 1.2115,
"grad_norm": 1.3895171880722046,
"learning_rate": 0.00015631852452614823,
"epoch": 0.22,
"step": 14550
},
{
"loss": 1.1723,
"grad_norm": 1.5900187492370605,
"learning_rate": 0.0001562434291550269,
"epoch": 0.22,
"step": 14575
},
{
"loss": 1.186,
"grad_norm": 1.7074415683746338,
"learning_rate": 0.00015616833378390556,
"epoch": 0.22,
"step": 14600
},
{
"loss": 1.187,
"grad_norm": 1.3961682319641113,
"learning_rate": 0.00015609323841278425,
"epoch": 0.22,
"step": 14625
},
{
"loss": 1.1981,
"grad_norm": 1.4976271390914917,
"learning_rate": 0.00015601814304166292,
"epoch": 0.22,
"step": 14650
},
{
"loss": 1.1749,
"grad_norm": 1.286617398262024,
"learning_rate": 0.0001559430476705416,
"epoch": 0.22,
"step": 14675
},
{
"loss": 1.1506,
"grad_norm": 1.8841774463653564,
"learning_rate": 0.00015586795229942027,
"epoch": 0.22,
"step": 14700
},
{
"loss": 1.1846,
"grad_norm": 2.3921959400177,
"learning_rate": 0.00015579285692829894,
"epoch": 0.22,
"step": 14725
},
{
"loss": 1.1553,
"grad_norm": 1.139286756515503,
"learning_rate": 0.00015571776155717763,
"epoch": 0.22,
"step": 14750
},
{
"loss": 1.213,
"grad_norm": 1.5389468669891357,
"learning_rate": 0.0001556426661860563,
"epoch": 0.22,
"step": 14775
},
{
"loss": 1.2504,
"grad_norm": 1.1002377271652222,
"learning_rate": 0.000155567570814935,
"epoch": 0.22,
"step": 14800
},
{
"loss": 1.2369,
"grad_norm": 1.2907332181930542,
"learning_rate": 0.00015549247544381365,
"epoch": 0.22,
"step": 14825
},
{
"loss": 1.2327,
"grad_norm": 2.8189125061035156,
"learning_rate": 0.00015541738007269232,
"epoch": 0.22,
"step": 14850
},
{
"loss": 1.2142,
"grad_norm": 1.4760026931762695,
"learning_rate": 0.00015534228470157098,
"epoch": 0.22,
"step": 14875
},
{
"loss": 1.2538,
"grad_norm": 1.4497836828231812,
"learning_rate": 0.00015526718933044968,
"epoch": 0.22,
"step": 14900
},
{
"loss": 1.2757,
"grad_norm": 1.2099194526672363,
"learning_rate": 0.00015519209395932834,
"epoch": 0.22,
"step": 14925
},
{
"loss": 1.2636,
"grad_norm": 1.2008768320083618,
"learning_rate": 0.00015511699858820703,
"epoch": 0.22,
"step": 14950
},
{
"loss": 1.165,
"grad_norm": 4.421905040740967,
"learning_rate": 0.00015504190321708573,
"epoch": 0.22,
"step": 14975
},
{
"loss": 1.164,
"grad_norm": 1.2725390195846558,
"learning_rate": 0.0001549668078459644,
"epoch": 0.23,
"step": 15000
},
{
"loss": 1.2026,
"grad_norm": 2.9403913021087646,
"learning_rate": 0.00015489171247484306,
"epoch": 0.23,
"step": 15025
},
{
"loss": 1.1938,
"grad_norm": 1.8553730249404907,
"learning_rate": 0.00015481661710372172,
"epoch": 0.23,
"step": 15050
},
{
"loss": 1.1879,
"grad_norm": 1.242799162864685,
"learning_rate": 0.00015474152173260042,
"epoch": 0.23,
"step": 15075
},
{
"loss": 1.1512,
"grad_norm": 1.5785107612609863,
"learning_rate": 0.00015466642636147908,
"epoch": 0.23,
"step": 15100
},
{
"loss": 1.1802,
"grad_norm": 2.665036916732788,
"learning_rate": 0.00015459133099035777,
"epoch": 0.23,
"step": 15125
},
{
"loss": 1.1603,
"grad_norm": 1.8509407043457031,
"learning_rate": 0.00015451623561923644,
"epoch": 0.23,
"step": 15150
},
{
"loss": 1.1711,
"grad_norm": 1.2315629720687866,
"learning_rate": 0.0001544411402481151,
"epoch": 0.23,
"step": 15175
},
{
"loss": 1.1784,
"grad_norm": 1.6980071067810059,
"learning_rate": 0.0001543660448769938,
"epoch": 0.23,
"step": 15200
},
{
"loss": 1.2922,
"grad_norm": 1.1929773092269897,
"learning_rate": 0.00015429094950587246,
"epoch": 0.23,
"step": 15225
},
{
"loss": 1.1272,
"grad_norm": 1.722090244293213,
"learning_rate": 0.00015421585413475115,
"epoch": 0.23,
"step": 15250
},
{
"loss": 1.2982,
"grad_norm": 1.712141990661621,
"learning_rate": 0.00015414075876362982,
"epoch": 0.23,
"step": 15275
},
{
"loss": 1.237,
"grad_norm": 2.6743271350860596,
"learning_rate": 0.00015406566339250848,
"epoch": 0.23,
"step": 15300
},
{
"loss": 1.1982,
"grad_norm": 1.842942714691162,
"learning_rate": 0.00015399056802138715,
"epoch": 0.23,
"step": 15325
},
{
"loss": 1.2102,
"grad_norm": 1.8020812273025513,
"learning_rate": 0.00015391547265026584,
"epoch": 0.23,
"step": 15350
},
{
"loss": 1.2009,
"grad_norm": 1.4913078546524048,
"learning_rate": 0.00015384037727914453,
"epoch": 0.23,
"step": 15375
},
{
"loss": 1.2133,
"grad_norm": 1.1852643489837646,
"learning_rate": 0.0001537652819080232,
"epoch": 0.23,
"step": 15400
},
{
"loss": 1.2375,
"grad_norm": 1.9560911655426025,
"learning_rate": 0.0001536901865369019,
"epoch": 0.23,
"step": 15425
},
{
"loss": 1.2484,
"grad_norm": 1.743415355682373,
"learning_rate": 0.00015361509116578056,
"epoch": 0.23,
"step": 15450
},
{
"loss": 1.1939,
"grad_norm": 2.6720640659332275,
"learning_rate": 0.00015353999579465922,
"epoch": 0.23,
"step": 15475
},
{
"loss": 1.2031,
"grad_norm": 1.5238986015319824,
"learning_rate": 0.0001534649004235379,
"epoch": 0.23,
"step": 15500
},
{
"loss": 1.2155,
"grad_norm": 1.7103843688964844,
"learning_rate": 0.00015338980505241658,
"epoch": 0.23,
"step": 15525
},
{
"loss": 1.2001,
"grad_norm": 1.6735540628433228,
"learning_rate": 0.00015331470968129525,
"epoch": 0.23,
"step": 15550
},
{
"loss": 1.1737,
"grad_norm": 1.4866646528244019,
"learning_rate": 0.00015323961431017394,
"epoch": 0.23,
"step": 15575
},
{
"loss": 1.2778,
"grad_norm": 1.4038907289505005,
"learning_rate": 0.0001531645189390526,
"epoch": 0.23,
"step": 15600
},
{
"loss": 1.1966,
"grad_norm": 2.238800048828125,
"learning_rate": 0.00015308942356793127,
"epoch": 0.23,
"step": 15625
},
{
"loss": 1.2119,
"grad_norm": 1.6463327407836914,
"learning_rate": 0.00015301432819680996,
"epoch": 0.24,
"step": 15650
},
{
"loss": 1.2049,
"grad_norm": 1.1655962467193604,
"learning_rate": 0.00015293923282568863,
"epoch": 0.24,
"step": 15675
},
{
"loss": 1.1357,
"grad_norm": 1.2663848400115967,
"learning_rate": 0.00015286413745456732,
"epoch": 0.24,
"step": 15700
},
{
"loss": 1.2133,
"grad_norm": 1.140039324760437,
"learning_rate": 0.00015278904208344598,
"epoch": 0.24,
"step": 15725
},
{
"loss": 1.174,
"grad_norm": 2.119586944580078,
"learning_rate": 0.00015271394671232465,
"epoch": 0.24,
"step": 15750
},
{
"loss": 1.2107,
"grad_norm": 1.7722172737121582,
"learning_rate": 0.00015263885134120332,
"epoch": 0.24,
"step": 15775
},
{
"loss": 1.2139,
"grad_norm": 1.7310364246368408,
"learning_rate": 0.000152563755970082,
"epoch": 0.24,
"step": 15800
},
{
"loss": 1.126,
"grad_norm": 0.9670734405517578,
"learning_rate": 0.0001524886605989607,
"epoch": 0.24,
"step": 15825
},
{
"loss": 1.2521,
"grad_norm": 2.075798511505127,
"learning_rate": 0.00015241356522783937,
"epoch": 0.24,
"step": 15850
},
{
"loss": 1.2131,
"grad_norm": 1.7291430234909058,
"learning_rate": 0.00015233846985671806,
"epoch": 0.24,
"step": 15875
},
{
"loss": 1.2042,
"grad_norm": 2.976837635040283,
"learning_rate": 0.00015226337448559672,
"epoch": 0.24,
"step": 15900
},
{
"loss": 1.2391,
"grad_norm": 1.3992162942886353,
"learning_rate": 0.0001521882791144754,
"epoch": 0.24,
"step": 15925
},
{
"loss": 1.1702,
"grad_norm": 0.8179588317871094,
"learning_rate": 0.00015211618755819892,
"epoch": 0.24,
"step": 15950
},
{
"loss": 1.1432,
"grad_norm": 1.6531869173049927,
"learning_rate": 0.0001520410921870776,
"epoch": 0.24,
"step": 15975
},
{
"loss": 1.1531,
"grad_norm": 2.893293857574463,
"learning_rate": 0.00015196599681595627,
"epoch": 0.24,
"step": 16000
},
{
"loss": 1.1839,
"grad_norm": 1.686982274055481,
"learning_rate": 0.00015189090144483497,
"epoch": 0.24,
"step": 16025
},
{
"loss": 1.1529,
"grad_norm": 1.0813180208206177,
"learning_rate": 0.0001518158060737136,
"epoch": 0.24,
"step": 16050
},
{
"loss": 1.2914,
"grad_norm": 1.8390347957611084,
"learning_rate": 0.0001517407107025923,
"epoch": 0.24,
"step": 16075
},
{
"loss": 1.2263,
"grad_norm": 1.4947305917739868,
"learning_rate": 0.00015166561533147096,
"epoch": 0.24,
"step": 16100
},
{
"loss": 1.2098,
"grad_norm": 1.0743931531906128,
"learning_rate": 0.00015159051996034966,
"epoch": 0.24,
"step": 16125
},
{
"loss": 1.1824,
"grad_norm": 1.6704978942871094,
"learning_rate": 0.00015151542458922832,
"epoch": 0.24,
"step": 16150
},
{
"loss": 1.1727,
"grad_norm": 1.23310387134552,
"learning_rate": 0.000151440329218107,
"epoch": 0.24,
"step": 16175
},
{
"loss": 1.1947,
"grad_norm": 1.678554892539978,
"learning_rate": 0.00015136523384698568,
"epoch": 0.24,
"step": 16200
},
{
"loss": 1.2033,
"grad_norm": 1.4678512811660767,
"learning_rate": 0.00015129013847586434,
"epoch": 0.24,
"step": 16225
},
{
"loss": 1.2855,
"grad_norm": 2.2149295806884766,
"learning_rate": 0.00015121504310474304,
"epoch": 0.24,
"step": 16250
},
{
"loss": 1.1601,
"grad_norm": 0.9399513006210327,
"learning_rate": 0.0001511399477336217,
"epoch": 0.24,
"step": 16275
},
{
"loss": 1.1618,
"grad_norm": 1.5738555192947388,
"learning_rate": 0.0001510648523625004,
"epoch": 0.24,
"step": 16300
},
{
"loss": 1.1984,
"grad_norm": 2.3447060585021973,
"learning_rate": 0.00015098975699137906,
"epoch": 0.25,
"step": 16325
},
{
"loss": 1.2129,
"grad_norm": 2.5573129653930664,
"learning_rate": 0.00015091466162025772,
"epoch": 0.25,
"step": 16350
},
{
"loss": 1.2152,
"grad_norm": 1.161568284034729,
"learning_rate": 0.00015083956624913642,
"epoch": 0.25,
"step": 16375
},
{
"loss": 1.1788,
"grad_norm": 1.2641152143478394,
"learning_rate": 0.00015076447087801508,
"epoch": 0.25,
"step": 16400
},
{
"loss": 1.2635,
"grad_norm": 1.1497838497161865,
"learning_rate": 0.00015068937550689377,
"epoch": 0.25,
"step": 16425
},
{
"loss": 1.2427,
"grad_norm": 1.777820110321045,
"learning_rate": 0.00015061428013577244,
"epoch": 0.25,
"step": 16450
},
{
"loss": 1.2167,
"grad_norm": 1.704571008682251,
"learning_rate": 0.00015053918476465113,
"epoch": 0.25,
"step": 16475
},
{
"loss": 1.1348,
"grad_norm": 1.2531949281692505,
"learning_rate": 0.00015046408939352977,
"epoch": 0.25,
"step": 16500
},
{
"loss": 1.2118,
"grad_norm": 2.0152504444122314,
"learning_rate": 0.00015038899402240846,
"epoch": 0.25,
"step": 16525
},
{
"loss": 1.2169,
"grad_norm": 1.327596664428711,
"learning_rate": 0.00015031389865128713,
"epoch": 0.25,
"step": 16550
},
{
"loss": 1.1057,
"grad_norm": 2.2122318744659424,
"learning_rate": 0.00015023880328016582,
"epoch": 0.25,
"step": 16575
},
{
"loss": 1.1939,
"grad_norm": 1.4037036895751953,
"learning_rate": 0.0001501637079090445,
"epoch": 0.25,
"step": 16600
},
{
"loss": 1.1178,
"grad_norm": 1.947090983390808,
"learning_rate": 0.00015008861253792318,
"epoch": 0.25,
"step": 16625
},
{
"loss": 1.2499,
"grad_norm": 1.9275078773498535,
"learning_rate": 0.00015001351716680184,
"epoch": 0.25,
"step": 16650
},
{
"loss": 1.203,
"grad_norm": 1.6140542030334473,
"learning_rate": 0.0001499384217956805,
"epoch": 0.25,
"step": 16675
},
{
"loss": 1.1617,
"grad_norm": 1.370875358581543,
"learning_rate": 0.0001498633264245592,
"epoch": 0.25,
"step": 16700
},
{
"loss": 1.1351,
"grad_norm": 2.523732900619507,
"learning_rate": 0.00014978823105343787,
"epoch": 0.25,
"step": 16725
},
{
"loss": 1.1793,
"grad_norm": 1.3012944459915161,
"learning_rate": 0.00014971313568231656,
"epoch": 0.25,
"step": 16750
},
{
"loss": 1.1834,
"grad_norm": 1.382142424583435,
"learning_rate": 0.00014963804031119522,
"epoch": 0.25,
"step": 16775
},
{
"loss": 1.1841,
"grad_norm": 3.1386773586273193,
"learning_rate": 0.0001495629449400739,
"epoch": 0.25,
"step": 16800
},
{
"loss": 1.1846,
"grad_norm": 1.6328222751617432,
"learning_rate": 0.00014948784956895258,
"epoch": 0.25,
"step": 16825
},
{
"loss": 1.1879,
"grad_norm": 1.3339941501617432,
"learning_rate": 0.00014941275419783125,
"epoch": 0.25,
"step": 16850
},
{
"loss": 1.1679,
"grad_norm": 2.250485897064209,
"learning_rate": 0.00014933765882670994,
"epoch": 0.25,
"step": 16875
},
{
"loss": 1.1362,
"grad_norm": 2.045668363571167,
"learning_rate": 0.0001492625634555886,
"epoch": 0.25,
"step": 16900
},
{
"loss": 1.1654,
"grad_norm": 1.1913504600524902,
"learning_rate": 0.0001491874680844673,
"epoch": 0.25,
"step": 16925
},
{
"loss": 1.2208,
"grad_norm": 1.6065621376037598,
"learning_rate": 0.00014911237271334594,
"epoch": 0.25,
"step": 16950
},
{
"loss": 1.1542,
"grad_norm": 1.5805847644805908,
"learning_rate": 0.00014903727734222463,
"epoch": 0.25,
"step": 16975
},
{
"loss": 1.2015,
"grad_norm": 5.944768905639648,
"learning_rate": 0.0001489621819711033,
"epoch": 0.26,
"step": 17000
},
{
"loss": 1.1992,
"grad_norm": 3.976229667663574,
"learning_rate": 0.00014888708659998199,
"epoch": 0.26,
"step": 17025
},
{
"loss": 1.1746,
"grad_norm": 2.31911301612854,
"learning_rate": 0.00014881199122886068,
"epoch": 0.26,
"step": 17050
},
{
"loss": 1.1205,
"grad_norm": 1.8674370050430298,
"learning_rate": 0.00014873689585773934,
"epoch": 0.26,
"step": 17075
},
{
"loss": 1.2318,
"grad_norm": 1.6549973487854004,
"learning_rate": 0.000148661800486618,
"epoch": 0.26,
"step": 17100
},
{
"loss": 1.2004,
"grad_norm": 1.3497843742370605,
"learning_rate": 0.00014858670511549667,
"epoch": 0.26,
"step": 17125
},
{
"loss": 1.2421,
"grad_norm": 1.8397778272628784,
"learning_rate": 0.00014851160974437537,
"epoch": 0.26,
"step": 17150
},
{
"loss": 1.1316,
"grad_norm": 0.9151533842086792,
"learning_rate": 0.00014843651437325403,
"epoch": 0.26,
"step": 17175
},
{
"loss": 1.1847,
"grad_norm": 1.389743447303772,
"learning_rate": 0.00014836141900213272,
"epoch": 0.26,
"step": 17200
},
{
"loss": 1.1785,
"grad_norm": 0.9278027415275574,
"learning_rate": 0.0001482863236310114,
"epoch": 0.26,
"step": 17225
},
{
"loss": 1.1768,
"grad_norm": 1.018211841583252,
"learning_rate": 0.00014821122825989006,
"epoch": 0.26,
"step": 17250
},
{
"loss": 1.1549,
"grad_norm": 1.9112569093704224,
"learning_rate": 0.00014813613288876875,
"epoch": 0.26,
"step": 17275
},
{
"loss": 1.1876,
"grad_norm": 1.2178176641464233,
"learning_rate": 0.0001480610375176474,
"epoch": 0.26,
"step": 17300
},
{
"loss": 1.2158,
"grad_norm": 1.7924511432647705,
"learning_rate": 0.0001479859421465261,
"epoch": 0.26,
"step": 17325
},
{
"loss": 1.2083,
"grad_norm": 2.1684257984161377,
"learning_rate": 0.00014791084677540477,
"epoch": 0.26,
"step": 17350
},
{
"loss": 1.1649,
"grad_norm": 1.368639349937439,
"learning_rate": 0.00014783575140428346,
"epoch": 0.26,
"step": 17375
},
{
"loss": 1.2448,
"grad_norm": 1.5606473684310913,
"learning_rate": 0.0001477606560331621,
"epoch": 0.26,
"step": 17400
},
{
"loss": 1.2516,
"grad_norm": 1.3743770122528076,
"learning_rate": 0.0001476855606620408,
"epoch": 0.26,
"step": 17425
},
{
"loss": 1.1748,
"grad_norm": 1.4341908693313599,
"learning_rate": 0.00014761046529091946,
"epoch": 0.26,
"step": 17450
},
{
"loss": 1.1752,
"grad_norm": 2.299916982650757,
"learning_rate": 0.00014753536991979815,
"epoch": 0.26,
"step": 17475
},
{
"loss": 1.2068,
"grad_norm": 2.3646254539489746,
"learning_rate": 0.00014746027454867684,
"epoch": 0.26,
"step": 17500
},
{
"loss": 1.171,
"grad_norm": 2.4026846885681152,
"learning_rate": 0.0001473851791775555,
"epoch": 0.26,
"step": 17525
},
{
"loss": 1.2248,
"grad_norm": 1.358500599861145,
"learning_rate": 0.00014731008380643417,
"epoch": 0.26,
"step": 17550
},
{
"loss": 1.2743,
"grad_norm": 2.302159547805786,
"learning_rate": 0.00014723498843531284,
"epoch": 0.26,
"step": 17575
},
{
"loss": 1.1939,
"grad_norm": 1.4632925987243652,
"learning_rate": 0.00014715989306419153,
"epoch": 0.26,
"step": 17600
},
{
"loss": 1.1962,
"grad_norm": 3.442080020904541,
"learning_rate": 0.0001470847976930702,
"epoch": 0.26,
"step": 17625
},
{
"loss": 1.1649,
"grad_norm": 0.879815936088562,
"learning_rate": 0.0001470097023219489,
"epoch": 0.27,
"step": 17650
},
{
"loss": 1.2207,
"grad_norm": 1.877156376838684,
"learning_rate": 0.00014693460695082758,
"epoch": 0.27,
"step": 17675
},
{
"loss": 1.2056,
"grad_norm": 1.6536662578582764,
"learning_rate": 0.00014685951157970622,
"epoch": 0.27,
"step": 17700
},
{
"loss": 1.1719,
"grad_norm": 1.321970820426941,
"learning_rate": 0.0001467844162085849,
"epoch": 0.27,
"step": 17725
},
{
"loss": 1.2081,
"grad_norm": 1.4853167533874512,
"learning_rate": 0.00014670932083746358,
"epoch": 0.27,
"step": 17750
},
{
"loss": 1.1692,
"grad_norm": 1.9838991165161133,
"learning_rate": 0.00014663422546634227,
"epoch": 0.27,
"step": 17775
},
{
"loss": 1.1826,
"grad_norm": 2.436300039291382,
"learning_rate": 0.00014655913009522094,
"epoch": 0.27,
"step": 17800
},
{
"loss": 1.1814,
"grad_norm": 1.899038314819336,
"learning_rate": 0.00014648403472409963,
"epoch": 0.27,
"step": 17825
},
{
"loss": 1.2291,
"grad_norm": 1.3306931257247925,
"learning_rate": 0.00014640893935297827,
"epoch": 0.27,
"step": 17850
},
{
"loss": 1.1888,
"grad_norm": 1.6196904182434082,
"learning_rate": 0.00014633384398185696,
"epoch": 0.27,
"step": 17875
},
{
"loss": 1.2531,
"grad_norm": 1.9150115251541138,
"learning_rate": 0.00014625874861073565,
"epoch": 0.27,
"step": 17900
},
{
"loss": 1.1236,
"grad_norm": 1.7596296072006226,
"learning_rate": 0.00014618365323961432,
"epoch": 0.27,
"step": 17925
},
{
"loss": 1.264,
"grad_norm": 2.536665678024292,
"learning_rate": 0.000146108557868493,
"epoch": 0.27,
"step": 17950
},
{
"loss": 1.2295,
"grad_norm": 1.5203639268875122,
"learning_rate": 0.00014603346249737167,
"epoch": 0.27,
"step": 17975
},
{
"loss": 1.1534,
"grad_norm": 1.316978931427002,
"learning_rate": 0.00014595836712625034,
"epoch": 0.27,
"step": 18000
},
{
"loss": 1.2754,
"grad_norm": 1.4424588680267334,
"learning_rate": 0.000145883271755129,
"epoch": 0.27,
"step": 18025
},
{
"loss": 1.2349,
"grad_norm": 2.4499781131744385,
"learning_rate": 0.0001458081763840077,
"epoch": 0.27,
"step": 18050
},
{
"loss": 1.1908,
"grad_norm": 1.3816992044448853,
"learning_rate": 0.00014573308101288636,
"epoch": 0.27,
"step": 18075
},
{
"loss": 1.2685,
"grad_norm": 1.1324695348739624,
"learning_rate": 0.00014565798564176506,
"epoch": 0.27,
"step": 18100
},
{
"loss": 1.1553,
"grad_norm": 1.7215017080307007,
"learning_rate": 0.00014558289027064375,
"epoch": 0.27,
"step": 18125
},
{
"loss": 1.153,
"grad_norm": 0.9789482355117798,
"learning_rate": 0.00014550779489952239,
"epoch": 0.27,
"step": 18150
},
{
"loss": 1.2484,
"grad_norm": 3.6144516468048096,
"learning_rate": 0.00014543269952840108,
"epoch": 0.27,
"step": 18175
},
{
"loss": 1.1652,
"grad_norm": 5.405023574829102,
"learning_rate": 0.00014535760415727974,
"epoch": 0.27,
"step": 18200
},
{
"loss": 1.1736,
"grad_norm": 1.360303521156311,
"learning_rate": 0.00014528250878615844,
"epoch": 0.27,
"step": 18225
},
{
"loss": 1.1258,
"grad_norm": 2.1543657779693604,
"learning_rate": 0.0001452074134150371,
"epoch": 0.27,
"step": 18250
},
{
"loss": 1.2295,
"grad_norm": 1.6289156675338745,
"learning_rate": 0.0001451323180439158,
"epoch": 0.27,
"step": 18275
},
{
"loss": 1.1509,
"grad_norm": 1.6996594667434692,
"learning_rate": 0.00014505722267279446,
"epoch": 0.27,
"step": 18300
},
{
"loss": 1.1466,
"grad_norm": 1.9973461627960205,
"learning_rate": 0.00014498212730167312,
"epoch": 0.28,
"step": 18325
},
{
"loss": 1.1387,
"grad_norm": 1.3268439769744873,
"learning_rate": 0.00014490703193055182,
"epoch": 0.28,
"step": 18350
},
{
"loss": 1.2239,
"grad_norm": 1.3260868787765503,
"learning_rate": 0.00014483193655943048,
"epoch": 0.28,
"step": 18375
},
{
"loss": 1.2155,
"grad_norm": 1.745481014251709,
"learning_rate": 0.00014475684118830917,
"epoch": 0.28,
"step": 18400
},
{
"loss": 1.1715,
"grad_norm": 1.1252262592315674,
"learning_rate": 0.00014468174581718784,
"epoch": 0.28,
"step": 18425
},
{
"loss": 1.1727,
"grad_norm": 2.9935803413391113,
"learning_rate": 0.0001446066504460665,
"epoch": 0.28,
"step": 18450
},
{
"loss": 1.1934,
"grad_norm": 3.0998411178588867,
"learning_rate": 0.00014453155507494517,
"epoch": 0.28,
"step": 18475
},
{
"loss": 1.19,
"grad_norm": 2.01745343208313,
"learning_rate": 0.00014445645970382386,
"epoch": 0.28,
"step": 18500
},
{
"loss": 1.1656,
"grad_norm": 1.6752148866653442,
"learning_rate": 0.00014438136433270253,
"epoch": 0.28,
"step": 18525
},
{
"loss": 1.1701,
"grad_norm": 1.126939058303833,
"learning_rate": 0.00014430626896158122,
"epoch": 0.28,
"step": 18550
},
{
"loss": 1.1228,
"grad_norm": 1.5768241882324219,
"learning_rate": 0.0001442311735904599,
"epoch": 0.28,
"step": 18575
},
{
"loss": 1.1935,
"grad_norm": 1.1016457080841064,
"learning_rate": 0.00014415607821933855,
"epoch": 0.28,
"step": 18600
},
{
"loss": 1.2472,
"grad_norm": 2.9630792140960693,
"learning_rate": 0.00014408098284821724,
"epoch": 0.28,
"step": 18625
},
{
"loss": 1.191,
"grad_norm": 1.2299975156784058,
"learning_rate": 0.0001440058874770959,
"epoch": 0.28,
"step": 18650
},
{
"loss": 1.1604,
"grad_norm": 1.3096675872802734,
"learning_rate": 0.0001439307921059746,
"epoch": 0.28,
"step": 18675
},
{
"loss": 1.1423,
"grad_norm": 2.186399459838867,
"learning_rate": 0.00014385569673485327,
"epoch": 0.28,
"step": 18700
},
{
"loss": 1.1783,
"grad_norm": 1.5450773239135742,
"learning_rate": 0.00014378060136373196,
"epoch": 0.28,
"step": 18725
},
{
"loss": 1.2721,
"grad_norm": 1.384564757347107,
"learning_rate": 0.00014370550599261062,
"epoch": 0.28,
"step": 18750
},
{
"loss": 1.2521,
"grad_norm": 2.277376174926758,
"learning_rate": 0.0001436304106214893,
"epoch": 0.28,
"step": 18775
},
{
"loss": 1.2283,
"grad_norm": 1.0917941331863403,
"learning_rate": 0.00014355531525036798,
"epoch": 0.28,
"step": 18800
},
{
"loss": 1.2139,
"grad_norm": 2.3607280254364014,
"learning_rate": 0.00014348021987924665,
"epoch": 0.28,
"step": 18825
},
{
"loss": 1.2017,
"grad_norm": 1.4834787845611572,
"learning_rate": 0.00014340512450812534,
"epoch": 0.28,
"step": 18850
},
{
"loss": 1.1556,
"grad_norm": 1.913205623626709,
"learning_rate": 0.000143330029137004,
"epoch": 0.28,
"step": 18875
},
{
"loss": 1.1796,
"grad_norm": 1.4506784677505493,
"learning_rate": 0.00014325493376588267,
"epoch": 0.28,
"step": 18900
},
{
"loss": 1.1792,
"grad_norm": 1.0843782424926758,
"learning_rate": 0.00014317983839476134,
"epoch": 0.28,
"step": 18925
},
{
"loss": 1.1894,
"grad_norm": 1.2553937435150146,
"learning_rate": 0.00014310474302364003,
"epoch": 0.28,
"step": 18950
},
{
"loss": 1.1944,
"grad_norm": 0.9680384397506714,
"learning_rate": 0.0001430296476525187,
"epoch": 0.28,
"step": 18975
},
{
"loss": 1.2441,
"grad_norm": 1.4088304042816162,
"learning_rate": 0.00014295455228139739,
"epoch": 0.29,
"step": 19000
},
{
"loss": 1.1978,
"grad_norm": 1.0669535398483276,
"learning_rate": 0.00014287945691027608,
"epoch": 0.29,
"step": 19025
},
{
"loss": 1.2014,
"grad_norm": 1.6889104843139648,
"learning_rate": 0.00014280436153915472,
"epoch": 0.29,
"step": 19050
},
{
"loss": 1.2006,
"grad_norm": 1.6797627210617065,
"learning_rate": 0.0001427292661680334,
"epoch": 0.29,
"step": 19075
},
{
"loss": 1.203,
"grad_norm": 1.4236091375350952,
"learning_rate": 0.00014265417079691207,
"epoch": 0.29,
"step": 19100
},
{
"loss": 1.1643,
"grad_norm": 1.0303690433502197,
"learning_rate": 0.00014257907542579077,
"epoch": 0.29,
"step": 19125
},
{
"loss": 1.1999,
"grad_norm": 1.8537395000457764,
"learning_rate": 0.00014250398005466943,
"epoch": 0.29,
"step": 19150
},
{
"loss": 1.123,
"grad_norm": 1.440233588218689,
"learning_rate": 0.00014242888468354812,
"epoch": 0.29,
"step": 19175
},
{
"loss": 1.1654,
"grad_norm": 2.0533230304718018,
"learning_rate": 0.0001423537893124268,
"epoch": 0.29,
"step": 19200
},
{
"loss": 1.1724,
"grad_norm": 1.7699745893478394,
"learning_rate": 0.00014227869394130546,
"epoch": 0.29,
"step": 19225
},
{
"loss": 1.1701,
"grad_norm": 1.248593807220459,
"learning_rate": 0.00014220359857018415,
"epoch": 0.29,
"step": 19250
},
{
"loss": 1.2097,
"grad_norm": 1.6481257677078247,
"learning_rate": 0.0001421285031990628,
"epoch": 0.29,
"step": 19275
},
{
"loss": 1.1776,
"grad_norm": 1.5135223865509033,
"learning_rate": 0.0001420534078279415,
"epoch": 0.29,
"step": 19300
},
{
"loss": 1.166,
"grad_norm": 1.790306568145752,
"learning_rate": 0.00014197831245682017,
"epoch": 0.29,
"step": 19325
},
{
"loss": 1.1318,
"grad_norm": 2.1356446743011475,
"learning_rate": 0.00014190321708569884,
"epoch": 0.29,
"step": 19350
},
{
"loss": 1.178,
"grad_norm": 1.4826107025146484,
"learning_rate": 0.0001418281217145775,
"epoch": 0.29,
"step": 19375
},
{
"loss": 1.1652,
"grad_norm": 1.3520580530166626,
"learning_rate": 0.0001417530263434562,
"epoch": 0.29,
"step": 19400
},
{
"loss": 1.2568,
"grad_norm": 1.3266022205352783,
"learning_rate": 0.0001416779309723349,
"epoch": 0.29,
"step": 19425
},
{
"loss": 1.1697,
"grad_norm": 1.5133330821990967,
"learning_rate": 0.00014160283560121355,
"epoch": 0.29,
"step": 19450
},
{
"loss": 1.1317,
"grad_norm": 1.729530692100525,
"learning_rate": 0.00014152774023009224,
"epoch": 0.29,
"step": 19475
},
{
"loss": 1.1676,
"grad_norm": 1.2013927698135376,
"learning_rate": 0.00014145264485897088,
"epoch": 0.29,
"step": 19500
},
{
"loss": 1.2311,
"grad_norm": 1.1489402055740356,
"learning_rate": 0.00014137754948784957,
"epoch": 0.29,
"step": 19525
},
{
"loss": 1.1642,
"grad_norm": 1.405923843383789,
"learning_rate": 0.00014130245411672824,
"epoch": 0.29,
"step": 19550
},
{
"loss": 1.1818,
"grad_norm": 1.4068244695663452,
"learning_rate": 0.00014122735874560693,
"epoch": 0.29,
"step": 19575
},
{
"loss": 1.228,
"grad_norm": 1.8172351121902466,
"learning_rate": 0.0001411522633744856,
"epoch": 0.29,
"step": 19600
},
{
"loss": 1.1981,
"grad_norm": 2.907489776611328,
"learning_rate": 0.0001410771680033643,
"epoch": 0.29,
"step": 19625
},
{
"loss": 1.1957,
"grad_norm": 2.162321090698242,
"learning_rate": 0.00014100207263224296,
"epoch": 0.3,
"step": 19650
},
{
"loss": 1.1492,
"grad_norm": 1.433248519897461,
"learning_rate": 0.00014092697726112162,
"epoch": 0.3,
"step": 19675
},
{
"loss": 1.16,
"grad_norm": 1.9054490327835083,
"learning_rate": 0.0001408518818900003,
"epoch": 0.3,
"step": 19700
},
{
"loss": 1.1988,
"grad_norm": 1.7673982381820679,
"learning_rate": 0.00014077678651887898,
"epoch": 0.3,
"step": 19725
},
{
"loss": 1.2049,
"grad_norm": 1.3216012716293335,
"learning_rate": 0.00014070169114775767,
"epoch": 0.3,
"step": 19750
},
{
"loss": 1.1345,
"grad_norm": 1.4515612125396729,
"learning_rate": 0.00014062659577663634,
"epoch": 0.3,
"step": 19775
},
{
"loss": 1.1776,
"grad_norm": 1.968056559562683,
"learning_rate": 0.000140551500405515,
"epoch": 0.3,
"step": 19800
},
{
"loss": 1.2182,
"grad_norm": 1.6644461154937744,
"learning_rate": 0.00014047640503439367,
"epoch": 0.3,
"step": 19825
},
{
"loss": 1.1897,
"grad_norm": 2.2730207443237305,
"learning_rate": 0.00014040130966327236,
"epoch": 0.3,
"step": 19850
},
{
"loss": 1.1552,
"grad_norm": 1.038794755935669,
"learning_rate": 0.00014032621429215105,
"epoch": 0.3,
"step": 19875
},
{
"loss": 1.1796,
"grad_norm": 1.4719074964523315,
"learning_rate": 0.00014025111892102972,
"epoch": 0.3,
"step": 19900
},
{
"loss": 1.2031,
"grad_norm": 1.8013041019439697,
"learning_rate": 0.0001401760235499084,
"epoch": 0.3,
"step": 19925
},
{
"loss": 1.1864,
"grad_norm": 2.0032236576080322,
"learning_rate": 0.00014010092817878705,
"epoch": 0.3,
"step": 19950
},
{
"loss": 1.225,
"grad_norm": 2.1414427757263184,
"learning_rate": 0.00014002583280766574,
"epoch": 0.3,
"step": 19975
},
{
"loss": 1.1585,
"grad_norm": 4.096096515655518,
"learning_rate": 0.0001399507374365444,
"epoch": 0.3,
"step": 20000
},
{
"loss": 1.2254,
"grad_norm": 1.5664288997650146,
"learning_rate": 0.0001398756420654231,
"epoch": 0.3,
"step": 20025
},
{
"loss": 1.0905,
"grad_norm": 1.7429243326187134,
"learning_rate": 0.00013980054669430176,
"epoch": 0.3,
"step": 20050
},
{
"loss": 1.1744,
"grad_norm": 1.551805019378662,
"learning_rate": 0.00013972545132318046,
"epoch": 0.3,
"step": 20075
},
{
"loss": 1.1998,
"grad_norm": 1.483031988143921,
"learning_rate": 0.00013965035595205912,
"epoch": 0.3,
"step": 20100
},
{
"loss": 1.1391,
"grad_norm": 1.2282016277313232,
"learning_rate": 0.00013957526058093779,
"epoch": 0.3,
"step": 20125
},
{
"loss": 1.0928,
"grad_norm": 1.4983934164047241,
"learning_rate": 0.00013950016520981648,
"epoch": 0.3,
"step": 20150
},
{
"loss": 1.2218,
"grad_norm": 1.7510052919387817,
"learning_rate": 0.00013942506983869514,
"epoch": 0.3,
"step": 20175
},
{
"loss": 1.2014,
"grad_norm": 1.6214317083358765,
"learning_rate": 0.00013934997446757384,
"epoch": 0.3,
"step": 20200
},
{
"loss": 1.2299,
"grad_norm": 1.8761943578720093,
"learning_rate": 0.0001392748790964525,
"epoch": 0.3,
"step": 20225
},
{
"loss": 1.2065,
"grad_norm": 2.8093338012695312,
"learning_rate": 0.00013919978372533117,
"epoch": 0.3,
"step": 20250
},
{
"loss": 1.145,
"grad_norm": 1.5288567543029785,
"learning_rate": 0.00013912468835420983,
"epoch": 0.3,
"step": 20275
},
{
"loss": 1.1886,
"grad_norm": 1.5765314102172852,
"learning_rate": 0.00013904959298308852,
"epoch": 0.3,
"step": 20300
},
{
"loss": 1.1761,
"grad_norm": 1.0417560338974,
"learning_rate": 0.00013897449761196722,
"epoch": 0.31,
"step": 20325
},
{
"loss": 1.2366,
"grad_norm": 1.2328884601593018,
"learning_rate": 0.00013889940224084588,
"epoch": 0.31,
"step": 20350
},
{
"loss": 1.1157,
"grad_norm": 1.6982795000076294,
"learning_rate": 0.00013882430686972458,
"epoch": 0.31,
"step": 20375
},
{
"loss": 1.2139,
"grad_norm": 1.3879860639572144,
"learning_rate": 0.0001387492114986032,
"epoch": 0.31,
"step": 20400
},
{
"loss": 1.1945,
"grad_norm": 1.8985368013381958,
"learning_rate": 0.0001386741161274819,
"epoch": 0.31,
"step": 20425
},
{
"loss": 1.1541,
"grad_norm": 1.1783545017242432,
"learning_rate": 0.00013859902075636057,
"epoch": 0.31,
"step": 20450
},
{
"loss": 1.1777,
"grad_norm": 1.639700174331665,
"learning_rate": 0.00013852392538523926,
"epoch": 0.31,
"step": 20475
},
{
"loss": 1.1743,
"grad_norm": 1.1630868911743164,
"learning_rate": 0.00013844883001411796,
"epoch": 0.31,
"step": 20500
},
{
"loss": 1.231,
"grad_norm": 1.5663248300552368,
"learning_rate": 0.00013837373464299662,
"epoch": 0.31,
"step": 20525
},
{
"loss": 1.2136,
"grad_norm": 1.1791601181030273,
"learning_rate": 0.0001382986392718753,
"epoch": 0.31,
"step": 20550
},
{
"loss": 1.1534,
"grad_norm": 1.1631137132644653,
"learning_rate": 0.00013822354390075395,
"epoch": 0.31,
"step": 20575
},
{
"loss": 1.2065,
"grad_norm": 3.0869953632354736,
"learning_rate": 0.00013814844852963264,
"epoch": 0.31,
"step": 20600
},
{
"loss": 1.1659,
"grad_norm": 1.5045863389968872,
"learning_rate": 0.0001380733531585113,
"epoch": 0.31,
"step": 20625
},
{
"loss": 1.2137,
"grad_norm": 1.555591344833374,
"learning_rate": 0.00013799825778739,
"epoch": 0.31,
"step": 20650
},
{
"loss": 1.1867,
"grad_norm": 1.1660338640213013,
"learning_rate": 0.00013792316241626867,
"epoch": 0.31,
"step": 20675
},
{
"loss": 1.1656,
"grad_norm": 1.3633331060409546,
"learning_rate": 0.00013784806704514733,
"epoch": 0.31,
"step": 20700
},
{
"loss": 1.2777,
"grad_norm": 1.714920163154602,
"learning_rate": 0.00013777297167402603,
"epoch": 0.31,
"step": 20725
},
{
"loss": 1.226,
"grad_norm": 2.6200525760650635,
"learning_rate": 0.0001376978763029047,
"epoch": 0.31,
"step": 20750
},
{
"loss": 1.2066,
"grad_norm": 1.176538109779358,
"learning_rate": 0.00013762278093178338,
"epoch": 0.31,
"step": 20775
},
{
"loss": 1.204,
"grad_norm": 1.6918548345565796,
"learning_rate": 0.00013754768556066205,
"epoch": 0.31,
"step": 20800
},
{
"loss": 1.1103,
"grad_norm": 1.2101995944976807,
"learning_rate": 0.00013747259018954074,
"epoch": 0.31,
"step": 20825
},
{
"loss": 1.2484,
"grad_norm": 2.0804872512817383,
"learning_rate": 0.00013739749481841938,
"epoch": 0.31,
"step": 20850
},
{
"loss": 1.2183,
"grad_norm": 2.115626573562622,
"learning_rate": 0.00013732239944729807,
"epoch": 0.31,
"step": 20875
},
{
"loss": 1.1542,
"grad_norm": 1.6519482135772705,
"learning_rate": 0.00013724730407617674,
"epoch": 0.31,
"step": 20900
},
{
"loss": 1.1894,
"grad_norm": 2.619948625564575,
"learning_rate": 0.00013717220870505543,
"epoch": 0.31,
"step": 20925
},
{
"loss": 1.2,
"grad_norm": 1.5296428203582764,
"learning_rate": 0.00013709711333393412,
"epoch": 0.31,
"step": 20950
},
{
"loss": 1.1985,
"grad_norm": 2.707340717315674,
"learning_rate": 0.0001370220179628128,
"epoch": 0.32,
"step": 20975
},
{
"loss": 1.179,
"grad_norm": 1.8074674606323242,
"learning_rate": 0.00013694692259169145,
"epoch": 0.32,
"step": 21000
},
{
"loss": 1.1687,
"grad_norm": 1.1176238059997559,
"learning_rate": 0.00013687182722057012,
"epoch": 0.32,
"step": 21025
},
{
"loss": 1.2645,
"grad_norm": 2.0191187858581543,
"learning_rate": 0.0001367967318494488,
"epoch": 0.32,
"step": 21050
},
{
"loss": 1.2807,
"grad_norm": 1.368486762046814,
"learning_rate": 0.00013672163647832747,
"epoch": 0.32,
"step": 21075
},
{
"loss": 1.1935,
"grad_norm": 1.582977294921875,
"learning_rate": 0.00013664654110720617,
"epoch": 0.32,
"step": 21100
},
{
"loss": 1.2249,
"grad_norm": 1.6462111473083496,
"learning_rate": 0.00013657144573608483,
"epoch": 0.32,
"step": 21125
},
{
"loss": 1.1481,
"grad_norm": 2.2449021339416504,
"learning_rate": 0.0001364963503649635,
"epoch": 0.32,
"step": 21150
},
{
"loss": 1.2925,
"grad_norm": 1.45096755027771,
"learning_rate": 0.0001364212549938422,
"epoch": 0.32,
"step": 21175
},
{
"loss": 1.2251,
"grad_norm": 1.5417848825454712,
"learning_rate": 0.00013634615962272086,
"epoch": 0.32,
"step": 21200
},
{
"loss": 1.1838,
"grad_norm": 1.4828438758850098,
"learning_rate": 0.00013627106425159955,
"epoch": 0.32,
"step": 21225
},
{
"loss": 1.2017,
"grad_norm": 1.9270501136779785,
"learning_rate": 0.0001361959688804782,
"epoch": 0.32,
"step": 21250
},
{
"loss": 1.167,
"grad_norm": 1.438550353050232,
"learning_rate": 0.0001361208735093569,
"epoch": 0.32,
"step": 21275
},
{
"loss": 1.1553,
"grad_norm": 1.860770344734192,
"learning_rate": 0.00013604577813823557,
"epoch": 0.32,
"step": 21300
},
{
"loss": 1.1355,
"grad_norm": 2.12158203125,
"learning_rate": 0.00013597068276711424,
"epoch": 0.32,
"step": 21325
},
{
"loss": 1.1958,
"grad_norm": 1.2415894269943237,
"learning_rate": 0.0001358955873959929,
"epoch": 0.32,
"step": 21350
},
{
"loss": 1.0986,
"grad_norm": 2.1204869747161865,
"learning_rate": 0.0001358204920248716,
"epoch": 0.32,
"step": 21375
},
{
"loss": 1.1916,
"grad_norm": 2.0683250427246094,
"learning_rate": 0.0001357453966537503,
"epoch": 0.32,
"step": 21400
},
{
"loss": 1.2799,
"grad_norm": 1.136094331741333,
"learning_rate": 0.00013567030128262895,
"epoch": 0.32,
"step": 21425
},
{
"loss": 1.1714,
"grad_norm": 2.614771842956543,
"learning_rate": 0.00013559520591150762,
"epoch": 0.32,
"step": 21450
},
{
"loss": 1.1808,
"grad_norm": 1.1263775825500488,
"learning_rate": 0.00013552011054038628,
"epoch": 0.32,
"step": 21475
},
{
"loss": 1.1903,
"grad_norm": 1.8330289125442505,
"learning_rate": 0.00013544501516926498,
"epoch": 0.32,
"step": 21500
},
{
"loss": 1.1863,
"grad_norm": 2.0172111988067627,
"learning_rate": 0.00013536991979814364,
"epoch": 0.32,
"step": 21525
},
{
"loss": 1.2356,
"grad_norm": 1.7615008354187012,
"learning_rate": 0.00013529482442702233,
"epoch": 0.32,
"step": 21550
},
{
"loss": 1.2365,
"grad_norm": 3.3480842113494873,
"learning_rate": 0.000135219729055901,
"epoch": 0.32,
"step": 21575
},
{
"loss": 1.0925,
"grad_norm": 1.5129296779632568,
"learning_rate": 0.00013514463368477966,
"epoch": 0.32,
"step": 21600
},
{
"loss": 1.1838,
"grad_norm": 1.1446235179901123,
"learning_rate": 0.00013506953831365836,
"epoch": 0.32,
"step": 21625
},
{
"loss": 1.2593,
"grad_norm": 1.2927684783935547,
"learning_rate": 0.00013499444294253702,
"epoch": 0.33,
"step": 21650
},
{
"loss": 1.1879,
"grad_norm": 1.6593775749206543,
"learning_rate": 0.00013491934757141571,
"epoch": 0.33,
"step": 21675
},
{
"loss": 1.169,
"grad_norm": 1.3151673078536987,
"learning_rate": 0.00013484425220029438,
"epoch": 0.33,
"step": 21700
},
{
"loss": 1.159,
"grad_norm": 1.4625322818756104,
"learning_rate": 0.00013476915682917307,
"epoch": 0.33,
"step": 21725
},
{
"loss": 1.1482,
"grad_norm": 1.4630295038223267,
"learning_rate": 0.00013469406145805174,
"epoch": 0.33,
"step": 21750
},
{
"loss": 1.1558,
"grad_norm": 1.914694905281067,
"learning_rate": 0.0001346189660869304,
"epoch": 0.33,
"step": 21775
},
{
"loss": 1.1895,
"grad_norm": 1.1685444116592407,
"learning_rate": 0.0001345438707158091,
"epoch": 0.33,
"step": 21800
},
{
"loss": 1.137,
"grad_norm": 1.8522282838821411,
"learning_rate": 0.00013446877534468776,
"epoch": 0.33,
"step": 21825
},
{
"loss": 1.2181,
"grad_norm": 2.1433138847351074,
"learning_rate": 0.00013439367997356645,
"epoch": 0.33,
"step": 21850
},
{
"loss": 1.1694,
"grad_norm": 1.1134564876556396,
"learning_rate": 0.00013431858460244512,
"epoch": 0.33,
"step": 21875
},
{
"loss": 1.1859,
"grad_norm": 0.9985026121139526,
"learning_rate": 0.00013424348923132378,
"epoch": 0.33,
"step": 21900
},
{
"loss": 1.1866,
"grad_norm": 1.732964038848877,
"learning_rate": 0.00013416839386020245,
"epoch": 0.33,
"step": 21925
},
{
"loss": 1.1623,
"grad_norm": 1.2273517847061157,
"learning_rate": 0.00013409329848908114,
"epoch": 0.33,
"step": 21950
},
{
"loss": 1.1336,
"grad_norm": 1.2174320220947266,
"learning_rate": 0.0001340182031179598,
"epoch": 0.33,
"step": 21975
},
{
"loss": 1.1903,
"grad_norm": 2.137214422225952,
"learning_rate": 0.0001339431077468385,
"epoch": 0.33,
"step": 22000
},
{
"loss": 1.214,
"grad_norm": 1.2529860734939575,
"learning_rate": 0.0001338680123757172,
"epoch": 0.33,
"step": 22025
},
{
"loss": 1.1389,
"grad_norm": 1.8254303932189941,
"learning_rate": 0.00013379291700459583,
"epoch": 0.33,
"step": 22050
},
{
"loss": 1.1559,
"grad_norm": 2.0765380859375,
"learning_rate": 0.00013371782163347452,
"epoch": 0.33,
"step": 22075
},
{
"loss": 1.1536,
"grad_norm": 1.504064917564392,
"learning_rate": 0.0001336427262623532,
"epoch": 0.33,
"step": 22100
},
{
"loss": 1.2144,
"grad_norm": 2.490610122680664,
"learning_rate": 0.00013356763089123188,
"epoch": 0.33,
"step": 22125
},
{
"loss": 1.1543,
"grad_norm": 1.6488279104232788,
"learning_rate": 0.00013349253552011054,
"epoch": 0.33,
"step": 22150
},
{
"loss": 1.1756,
"grad_norm": 2.970743417739868,
"learning_rate": 0.00013341744014898924,
"epoch": 0.33,
"step": 22175
},
{
"loss": 1.2094,
"grad_norm": 1.299083948135376,
"learning_rate": 0.0001333423447778679,
"epoch": 0.33,
"step": 22200
},
{
"loss": 1.0779,
"grad_norm": 1.3857295513153076,
"learning_rate": 0.00013326724940674657,
"epoch": 0.33,
"step": 22225
},
{
"loss": 1.2177,
"grad_norm": 1.7416950464248657,
"learning_rate": 0.00013319215403562526,
"epoch": 0.33,
"step": 22250
},
{
"loss": 1.179,
"grad_norm": 2.380249261856079,
"learning_rate": 0.00013311705866450393,
"epoch": 0.33,
"step": 22275
},
{
"loss": 1.2057,
"grad_norm": 1.3791347742080688,
"learning_rate": 0.00013304196329338262,
"epoch": 0.33,
"step": 22300
},
{
"loss": 1.1725,
"grad_norm": 1.0284641981124878,
"learning_rate": 0.00013296686792226128,
"epoch": 0.34,
"step": 22325
},
{
"loss": 1.1518,
"grad_norm": 2.1696279048919678,
"learning_rate": 0.00013289177255113995,
"epoch": 0.34,
"step": 22350
},
{
"loss": 1.2832,
"grad_norm": 1.2163208723068237,
"learning_rate": 0.0001328166771800186,
"epoch": 0.34,
"step": 22375
},
{
"loss": 1.1366,
"grad_norm": 1.724770426750183,
"learning_rate": 0.0001327415818088973,
"epoch": 0.34,
"step": 22400
},
{
"loss": 1.2067,
"grad_norm": 1.9105318784713745,
"learning_rate": 0.00013266648643777597,
"epoch": 0.34,
"step": 22425
},
{
"loss": 1.1917,
"grad_norm": 1.1520806550979614,
"learning_rate": 0.00013259139106665466,
"epoch": 0.34,
"step": 22450
},
{
"loss": 1.1637,
"grad_norm": 1.8389378786087036,
"learning_rate": 0.00013251629569553336,
"epoch": 0.34,
"step": 22475
},
{
"loss": 1.2151,
"grad_norm": 4.63606595993042,
"learning_rate": 0.000132441200324412,
"epoch": 0.34,
"step": 22500
},
{
"loss": 1.196,
"grad_norm": 2.179290771484375,
"learning_rate": 0.0001323661049532907,
"epoch": 0.34,
"step": 22525
},
{
"loss": 1.158,
"grad_norm": 1.1105175018310547,
"learning_rate": 0.00013229100958216935,
"epoch": 0.34,
"step": 22550
},
{
"loss": 1.1638,
"grad_norm": 1.1015607118606567,
"learning_rate": 0.00013221591421104804,
"epoch": 0.34,
"step": 22575
},
{
"loss": 1.1948,
"grad_norm": 1.314866304397583,
"learning_rate": 0.0001321408188399267,
"epoch": 0.34,
"step": 22600
},
{
"loss": 1.1234,
"grad_norm": 1.3410804271697998,
"learning_rate": 0.0001320657234688054,
"epoch": 0.34,
"step": 22625
},
{
"loss": 1.2106,
"grad_norm": 1.4340014457702637,
"learning_rate": 0.00013199062809768407,
"epoch": 0.34,
"step": 22650
},
{
"loss": 1.2023,
"grad_norm": 2.40155291557312,
"learning_rate": 0.00013191553272656273,
"epoch": 0.34,
"step": 22675
},
{
"loss": 1.1545,
"grad_norm": 1.752961277961731,
"learning_rate": 0.00013184043735544143,
"epoch": 0.34,
"step": 22700
},
{
"loss": 1.2083,
"grad_norm": 2.0551249980926514,
"learning_rate": 0.0001317653419843201,
"epoch": 0.34,
"step": 22725
},
{
"loss": 1.2815,
"grad_norm": 2.0029456615448,
"learning_rate": 0.00013169024661319878,
"epoch": 0.34,
"step": 22750
},
{
"loss": 1.1618,
"grad_norm": 1.6569886207580566,
"learning_rate": 0.00013161515124207745,
"epoch": 0.34,
"step": 22775
},
{
"loss": 1.1506,
"grad_norm": 1.0627089738845825,
"learning_rate": 0.00013154005587095611,
"epoch": 0.34,
"step": 22800
},
{
"loss": 1.178,
"grad_norm": 1.4119595289230347,
"learning_rate": 0.00013146496049983478,
"epoch": 0.34,
"step": 22825
},
{
"loss": 1.2293,
"grad_norm": 2.070948839187622,
"learning_rate": 0.00013138986512871347,
"epoch": 0.34,
"step": 22850
},
{
"loss": 1.2195,
"grad_norm": 3.2543933391571045,
"learning_rate": 0.00013131476975759214,
"epoch": 0.34,
"step": 22875
},
{
"loss": 1.196,
"grad_norm": 2.154444694519043,
"learning_rate": 0.00013123967438647083,
"epoch": 0.34,
"step": 22900
},
{
"loss": 1.1807,
"grad_norm": 1.9498579502105713,
"learning_rate": 0.00013116457901534952,
"epoch": 0.34,
"step": 22925
},
{
"loss": 1.1659,
"grad_norm": 1.2425457239151,
"learning_rate": 0.00013108948364422816,
"epoch": 0.34,
"step": 22950
},
{
"loss": 1.181,
"grad_norm": 1.0989060401916504,
"learning_rate": 0.00013101438827310685,
"epoch": 0.35,
"step": 22975
},
{
"loss": 1.1095,
"grad_norm": 1.509493350982666,
"learning_rate": 0.00013093929290198552,
"epoch": 0.35,
"step": 23000
},
{
"loss": 1.1686,
"grad_norm": 1.762772798538208,
"learning_rate": 0.0001308641975308642,
"epoch": 0.35,
"step": 23025
},
{
"loss": 1.1062,
"grad_norm": 2.1119191646575928,
"learning_rate": 0.00013078910215974288,
"epoch": 0.35,
"step": 23050
},
{
"loss": 1.1906,
"grad_norm": 1.1782546043395996,
"learning_rate": 0.00013071400678862157,
"epoch": 0.35,
"step": 23075
},
{
"loss": 1.2047,
"grad_norm": 1.2365734577178955,
"learning_rate": 0.00013063891141750023,
"epoch": 0.35,
"step": 23100
},
{
"loss": 1.1824,
"grad_norm": 1.0874519348144531,
"learning_rate": 0.0001305638160463789,
"epoch": 0.35,
"step": 23125
},
{
"loss": 1.127,
"grad_norm": 1.9339088201522827,
"learning_rate": 0.0001304887206752576,
"epoch": 0.35,
"step": 23150
},
{
"loss": 1.1529,
"grad_norm": 2.087249517440796,
"learning_rate": 0.00013041362530413626,
"epoch": 0.35,
"step": 23175
},
{
"loss": 1.1736,
"grad_norm": 1.0799955129623413,
"learning_rate": 0.00013033852993301495,
"epoch": 0.35,
"step": 23200
},
{
"loss": 1.1652,
"grad_norm": 4.290017127990723,
"learning_rate": 0.00013026343456189361,
"epoch": 0.35,
"step": 23225
},
{
"loss": 1.1845,
"grad_norm": 1.8332254886627197,
"learning_rate": 0.00013018833919077228,
"epoch": 0.35,
"step": 23250
},
{
"loss": 1.227,
"grad_norm": 2.3208718299865723,
"learning_rate": 0.00013011324381965094,
"epoch": 0.35,
"step": 23275
},
{
"loss": 1.0917,
"grad_norm": 1.9536670446395874,
"learning_rate": 0.00013003814844852964,
"epoch": 0.35,
"step": 23300
},
{
"loss": 1.1812,
"grad_norm": 1.225029468536377,
"learning_rate": 0.00012996305307740833,
"epoch": 0.35,
"step": 23325
},
{
"loss": 1.2249,
"grad_norm": 2.538161039352417,
"learning_rate": 0.000129887957706287,
"epoch": 0.35,
"step": 23350
},
{
"loss": 1.1578,
"grad_norm": 1.2378344535827637,
"learning_rate": 0.0001298128623351657,
"epoch": 0.35,
"step": 23375
},
{
"loss": 1.1544,
"grad_norm": 3.9860634803771973,
"learning_rate": 0.00012973776696404433,
"epoch": 0.35,
"step": 23400
},
{
"loss": 1.1704,
"grad_norm": 1.1592284440994263,
"learning_rate": 0.00012966267159292302,
"epoch": 0.35,
"step": 23425
},
{
"loss": 1.2261,
"grad_norm": 0.9641034603118896,
"learning_rate": 0.00012958757622180168,
"epoch": 0.35,
"step": 23450
},
{
"loss": 1.1879,
"grad_norm": 2.3419320583343506,
"learning_rate": 0.00012951248085068038,
"epoch": 0.35,
"step": 23475
},
{
"loss": 1.1237,
"grad_norm": 1.641772747039795,
"learning_rate": 0.00012943738547955904,
"epoch": 0.35,
"step": 23500
},
{
"loss": 1.1636,
"grad_norm": 1.8921740055084229,
"learning_rate": 0.00012936229010843773,
"epoch": 0.35,
"step": 23525
},
{
"loss": 1.1919,
"grad_norm": 1.5332955121994019,
"learning_rate": 0.0001292871947373164,
"epoch": 0.35,
"step": 23550
},
{
"loss": 1.1632,
"grad_norm": 1.6443663835525513,
"learning_rate": 0.00012921209936619506,
"epoch": 0.35,
"step": 23575
},
{
"loss": 1.1963,
"grad_norm": 2.044127941131592,
"learning_rate": 0.00012913700399507376,
"epoch": 0.35,
"step": 23600
},
{
"loss": 1.1971,
"grad_norm": 2.1552951335906982,
"learning_rate": 0.00012906190862395242,
"epoch": 0.35,
"step": 23625
},
{
"loss": 1.221,
"grad_norm": 1.7061282396316528,
"learning_rate": 0.00012898681325283111,
"epoch": 0.36,
"step": 23650
},
{
"loss": 1.1243,
"grad_norm": 1.581986904144287,
"learning_rate": 0.00012891171788170978,
"epoch": 0.36,
"step": 23675
},
{
"loss": 1.2158,
"grad_norm": 1.999489665031433,
"learning_rate": 0.00012883662251058844,
"epoch": 0.36,
"step": 23700
},
{
"loss": 1.1868,
"grad_norm": 1.5865546464920044,
"learning_rate": 0.0001287615271394671,
"epoch": 0.36,
"step": 23725
},
{
"loss": 1.1772,
"grad_norm": 1.1765635013580322,
"learning_rate": 0.0001286864317683458,
"epoch": 0.36,
"step": 23750
},
{
"loss": 1.1669,
"grad_norm": 2.248819589614868,
"learning_rate": 0.0001286113363972245,
"epoch": 0.36,
"step": 23775
},
{
"loss": 1.1574,
"grad_norm": 1.4647800922393799,
"learning_rate": 0.00012853624102610316,
"epoch": 0.36,
"step": 23800
},
{
"loss": 1.1986,
"grad_norm": 1.1818993091583252,
"learning_rate": 0.00012846114565498185,
"epoch": 0.36,
"step": 23825
},
{
"loss": 1.1631,
"grad_norm": 1.785582423210144,
"learning_rate": 0.0001283860502838605,
"epoch": 0.36,
"step": 23850
},
{
"loss": 1.2067,
"grad_norm": 1.7691236734390259,
"learning_rate": 0.00012831095491273918,
"epoch": 0.36,
"step": 23875
},
{
"loss": 1.0843,
"grad_norm": 1.4879204034805298,
"learning_rate": 0.00012823585954161785,
"epoch": 0.36,
"step": 23900
},
{
"loss": 1.1911,
"grad_norm": 1.4341880083084106,
"learning_rate": 0.00012816076417049654,
"epoch": 0.36,
"step": 23925
},
{
"loss": 1.162,
"grad_norm": 0.8942863345146179,
"learning_rate": 0.0001280856687993752,
"epoch": 0.36,
"step": 23950
},
{
"loss": 1.2,
"grad_norm": 1.329323172569275,
"learning_rate": 0.0001280105734282539,
"epoch": 0.36,
"step": 23975
},
{
"loss": 1.1484,
"grad_norm": 1.621002197265625,
"learning_rate": 0.00012793547805713256,
"epoch": 0.36,
"step": 24000
},
{
"loss": 1.181,
"grad_norm": 1.8257761001586914,
"learning_rate": 0.00012786038268601123,
"epoch": 0.36,
"step": 24025
},
{
"loss": 1.1984,
"grad_norm": 2.572247266769409,
"learning_rate": 0.00012778528731488992,
"epoch": 0.36,
"step": 24050
},
{
"loss": 1.1867,
"grad_norm": 1.7765648365020752,
"learning_rate": 0.0001277101919437686,
"epoch": 0.36,
"step": 24075
},
{
"loss": 1.1984,
"grad_norm": 1.3976967334747314,
"learning_rate": 0.00012763509657264728,
"epoch": 0.36,
"step": 24100
},
{
"loss": 1.1685,
"grad_norm": 1.6491625308990479,
"learning_rate": 0.00012756000120152594,
"epoch": 0.36,
"step": 24125
},
{
"loss": 1.1497,
"grad_norm": 1.698404312133789,
"learning_rate": 0.0001274849058304046,
"epoch": 0.36,
"step": 24150
},
{
"loss": 1.2076,
"grad_norm": 1.2471705675125122,
"learning_rate": 0.00012740981045928328,
"epoch": 0.36,
"step": 24175
},
{
"loss": 1.1596,
"grad_norm": 1.2114017009735107,
"learning_rate": 0.00012733471508816197,
"epoch": 0.36,
"step": 24200
},
{
"loss": 1.2032,
"grad_norm": 1.1424446105957031,
"learning_rate": 0.00012725961971704066,
"epoch": 0.36,
"step": 24225
},
{
"loss": 1.1548,
"grad_norm": 1.3526264429092407,
"learning_rate": 0.00012718452434591933,
"epoch": 0.36,
"step": 24250
},
{
"loss": 1.2415,
"grad_norm": 1.2714468240737915,
"learning_rate": 0.00012710942897479802,
"epoch": 0.36,
"step": 24275
},
{
"loss": 1.1264,
"grad_norm": 2.064203977584839,
"learning_rate": 0.00012703433360367668,
"epoch": 0.36,
"step": 24300
},
{
"loss": 1.1578,
"grad_norm": 1.4952439069747925,
"learning_rate": 0.00012695923823255535,
"epoch": 0.37,
"step": 24325
},
{
"loss": 1.1495,
"grad_norm": 1.4773337841033936,
"learning_rate": 0.00012688414286143401,
"epoch": 0.37,
"step": 24350
},
{
"loss": 1.1591,
"grad_norm": 1.1870368719100952,
"learning_rate": 0.0001268090474903127,
"epoch": 0.37,
"step": 24375
},
{
"loss": 1.1744,
"grad_norm": 1.824880838394165,
"learning_rate": 0.00012673395211919137,
"epoch": 0.37,
"step": 24400
},
{
"loss": 1.198,
"grad_norm": 1.18766188621521,
"learning_rate": 0.00012665885674807006,
"epoch": 0.37,
"step": 24425
},
{
"loss": 1.2227,
"grad_norm": 1.719905138015747,
"learning_rate": 0.00012658376137694873,
"epoch": 0.37,
"step": 24450
},
{
"loss": 1.294,
"grad_norm": 1.9146957397460938,
"learning_rate": 0.0001265086660058274,
"epoch": 0.37,
"step": 24475
},
{
"loss": 1.2087,
"grad_norm": 2.0763649940490723,
"learning_rate": 0.0001264335706347061,
"epoch": 0.37,
"step": 24500
},
{
"loss": 1.1887,
"grad_norm": 2.3640265464782715,
"learning_rate": 0.00012635847526358475,
"epoch": 0.37,
"step": 24525
},
{
"loss": 1.2053,
"grad_norm": 1.9339317083358765,
"learning_rate": 0.00012628337989246344,
"epoch": 0.37,
"step": 24550
},
{
"loss": 1.1123,
"grad_norm": 1.4369031190872192,
"learning_rate": 0.0001262082845213421,
"epoch": 0.37,
"step": 24575
},
{
"loss": 1.1309,
"grad_norm": 1.2952880859375,
"learning_rate": 0.00012613318915022078,
"epoch": 0.37,
"step": 24600
},
{
"loss": 1.1589,
"grad_norm": 2.8487777709960938,
"learning_rate": 0.00012605809377909947,
"epoch": 0.37,
"step": 24625
},
{
"loss": 1.1054,
"grad_norm": 1.1736781597137451,
"learning_rate": 0.00012598299840797813,
"epoch": 0.37,
"step": 24650
},
{
"loss": 1.1255,
"grad_norm": 1.5358980894088745,
"learning_rate": 0.00012590790303685683,
"epoch": 0.37,
"step": 24675
},
{
"loss": 1.2041,
"grad_norm": 2.0065975189208984,
"learning_rate": 0.0001258328076657355,
"epoch": 0.37,
"step": 24700
},
{
"loss": 1.15,
"grad_norm": 1.2211554050445557,
"learning_rate": 0.00012575771229461418,
"epoch": 0.37,
"step": 24725
},
{
"loss": 1.1834,
"grad_norm": 1.3376033306121826,
"learning_rate": 0.00012568261692349285,
"epoch": 0.37,
"step": 24750
},
{
"loss": 1.2355,
"grad_norm": 2.8535170555114746,
"learning_rate": 0.00012560752155237151,
"epoch": 0.37,
"step": 24775
},
{
"loss": 1.1949,
"grad_norm": 1.9856910705566406,
"learning_rate": 0.00012553242618125018,
"epoch": 0.37,
"step": 24800
},
{
"loss": 1.1887,
"grad_norm": 2.9144210815429688,
"learning_rate": 0.00012545733081012887,
"epoch": 0.37,
"step": 24825
},
{
"loss": 1.1893,
"grad_norm": 1.4913091659545898,
"learning_rate": 0.00012538223543900756,
"epoch": 0.37,
"step": 24850
},
{
"loss": 1.1173,
"grad_norm": 1.685804009437561,
"learning_rate": 0.00012530714006788623,
"epoch": 0.37,
"step": 24875
},
{
"loss": 1.1303,
"grad_norm": 1.3694686889648438,
"learning_rate": 0.00012523504851160973,
"epoch": 0.37,
"step": 24900
},
{
"loss": 1.2075,
"grad_norm": 1.3392975330352783,
"learning_rate": 0.00012515995314048842,
"epoch": 0.37,
"step": 24925
},
{
"loss": 1.1981,
"grad_norm": 1.352869987487793,
"learning_rate": 0.0001250848577693671,
"epoch": 0.37,
"step": 24950
},
{
"loss": 1.1808,
"grad_norm": 1.1106911897659302,
"learning_rate": 0.00012500976239824578,
"epoch": 0.38,
"step": 24975
},
{
"loss": 1.1819,
"grad_norm": 1.2609456777572632,
"learning_rate": 0.00012493466702712447,
"epoch": 0.38,
"step": 25000
},
{
"loss": 1.1571,
"grad_norm": 1.3581352233886719,
"learning_rate": 0.00012485957165600314,
"epoch": 0.38,
"step": 25025
},
{
"loss": 1.2111,
"grad_norm": 1.7891106605529785,
"learning_rate": 0.0001247844762848818,
"epoch": 0.38,
"step": 25050
},
{
"loss": 1.2029,
"grad_norm": 2.628241539001465,
"learning_rate": 0.00012470938091376047,
"epoch": 0.38,
"step": 25075
},
{
"loss": 1.1415,
"grad_norm": 1.5528656244277954,
"learning_rate": 0.00012463428554263916,
"epoch": 0.38,
"step": 25100
},
{
"loss": 1.0769,
"grad_norm": 2.0100932121276855,
"learning_rate": 0.00012455919017151783,
"epoch": 0.38,
"step": 25125
},
{
"loss": 1.171,
"grad_norm": 2.7479538917541504,
"learning_rate": 0.00012448409480039652,
"epoch": 0.38,
"step": 25150
},
{
"loss": 1.1868,
"grad_norm": 2.177091360092163,
"learning_rate": 0.00012440899942927518,
"epoch": 0.38,
"step": 25175
},
{
"loss": 1.1472,
"grad_norm": 1.9711464643478394,
"learning_rate": 0.00012433390405815385,
"epoch": 0.38,
"step": 25200
},
{
"loss": 1.1982,
"grad_norm": 1.4624091386795044,
"learning_rate": 0.00012425880868703254,
"epoch": 0.38,
"step": 25225
},
{
"loss": 1.1806,
"grad_norm": 1.7121859788894653,
"learning_rate": 0.0001241837133159112,
"epoch": 0.38,
"step": 25250
},
{
"loss": 1.1943,
"grad_norm": 2.1174204349517822,
"learning_rate": 0.0001241086179447899,
"epoch": 0.38,
"step": 25275
},
{
"loss": 1.1742,
"grad_norm": 1.2425144910812378,
"learning_rate": 0.00012403352257366857,
"epoch": 0.38,
"step": 25300
},
{
"loss": 1.1316,
"grad_norm": 2.102142572402954,
"learning_rate": 0.00012395842720254726,
"epoch": 0.38,
"step": 25325
},
{
"loss": 1.1717,
"grad_norm": 1.7592540979385376,
"learning_rate": 0.0001238833318314259,
"epoch": 0.38,
"step": 25350
},
{
"loss": 1.2086,
"grad_norm": 1.7676315307617188,
"learning_rate": 0.0001238082364603046,
"epoch": 0.38,
"step": 25375
},
{
"loss": 1.1386,
"grad_norm": 1.154153823852539,
"learning_rate": 0.00012373314108918325,
"epoch": 0.38,
"step": 25400
},
{
"loss": 1.1803,
"grad_norm": 2.522324800491333,
"learning_rate": 0.00012365804571806195,
"epoch": 0.38,
"step": 25425
},
{
"loss": 1.2331,
"grad_norm": 1.699385404586792,
"learning_rate": 0.00012358295034694064,
"epoch": 0.38,
"step": 25450
},
{
"loss": 1.2247,
"grad_norm": 1.836391568183899,
"learning_rate": 0.0001235078549758193,
"epoch": 0.38,
"step": 25475
},
{
"loss": 1.1509,
"grad_norm": 1.2097364664077759,
"learning_rate": 0.00012343275960469797,
"epoch": 0.38,
"step": 25500
},
{
"loss": 1.1488,
"grad_norm": 0.8426992893218994,
"learning_rate": 0.00012335766423357663,
"epoch": 0.38,
"step": 25525
},
{
"loss": 1.1434,
"grad_norm": 1.2710751295089722,
"learning_rate": 0.00012328256886245533,
"epoch": 0.38,
"step": 25550
},
{
"loss": 1.131,
"grad_norm": 1.567521095275879,
"learning_rate": 0.000123207473491334,
"epoch": 0.38,
"step": 25575
},
{
"loss": 1.2268,
"grad_norm": 1.6876307725906372,
"learning_rate": 0.00012313237812021268,
"epoch": 0.38,
"step": 25600
},
{
"loss": 1.178,
"grad_norm": 1.5570650100708008,
"learning_rate": 0.00012305728274909135,
"epoch": 0.38,
"step": 25625
},
{
"loss": 1.1128,
"grad_norm": 1.9181684255599976,
"learning_rate": 0.00012298218737797002,
"epoch": 0.39,
"step": 25650
},
{
"loss": 1.1662,
"grad_norm": 1.4703614711761475,
"learning_rate": 0.0001229070920068487,
"epoch": 0.39,
"step": 25675
},
{
"loss": 1.2166,
"grad_norm": 1.1674293279647827,
"learning_rate": 0.00012283199663572737,
"epoch": 0.39,
"step": 25700
},
{
"loss": 1.1962,
"grad_norm": 2.910494565963745,
"learning_rate": 0.00012275690126460607,
"epoch": 0.39,
"step": 25725
},
{
"loss": 1.1996,
"grad_norm": 1.249042272567749,
"learning_rate": 0.00012268180589348473,
"epoch": 0.39,
"step": 25750
},
{
"loss": 1.1962,
"grad_norm": 2.1757421493530273,
"learning_rate": 0.00012260671052236342,
"epoch": 0.39,
"step": 25775
},
{
"loss": 1.1302,
"grad_norm": 1.8201817274093628,
"learning_rate": 0.00012253161515124206,
"epoch": 0.39,
"step": 25800
},
{
"loss": 1.1242,
"grad_norm": 1.2587064504623413,
"learning_rate": 0.00012245651978012075,
"epoch": 0.39,
"step": 25825
},
{
"loss": 1.1353,
"grad_norm": 1.9519400596618652,
"learning_rate": 0.00012238142440899945,
"epoch": 0.39,
"step": 25850
},
{
"loss": 1.2128,
"grad_norm": 1.997555136680603,
"learning_rate": 0.0001223063290378781,
"epoch": 0.39,
"step": 25875
},
{
"loss": 1.1383,
"grad_norm": 1.9942442178726196,
"learning_rate": 0.0001222312336667568,
"epoch": 0.39,
"step": 25900
},
{
"loss": 1.1726,
"grad_norm": 2.1078426837921143,
"learning_rate": 0.00012215613829563547,
"epoch": 0.39,
"step": 25925
},
{
"loss": 1.1349,
"grad_norm": 2.8128950595855713,
"learning_rate": 0.00012208104292451413,
"epoch": 0.39,
"step": 25950
},
{
"loss": 1.1536,
"grad_norm": 1.986128330230713,
"learning_rate": 0.00012200594755339281,
"epoch": 0.39,
"step": 25975
},
{
"loss": 1.1194,
"grad_norm": 1.418022871017456,
"learning_rate": 0.00012193085218227149,
"epoch": 0.39,
"step": 26000
},
{
"loss": 1.1819,
"grad_norm": 1.2267699241638184,
"learning_rate": 0.00012185575681115016,
"epoch": 0.39,
"step": 26025
},
{
"loss": 1.1222,
"grad_norm": 1.4214072227478027,
"learning_rate": 0.00012178066144002884,
"epoch": 0.39,
"step": 26050
},
{
"loss": 1.2028,
"grad_norm": 3.486180543899536,
"learning_rate": 0.00012170556606890753,
"epoch": 0.39,
"step": 26075
},
{
"loss": 1.1714,
"grad_norm": 1.6389093399047852,
"learning_rate": 0.0001216304706977862,
"epoch": 0.39,
"step": 26100
},
{
"loss": 1.1689,
"grad_norm": 1.5613031387329102,
"learning_rate": 0.00012155537532666487,
"epoch": 0.39,
"step": 26125
},
{
"loss": 1.1821,
"grad_norm": 1.5050113201141357,
"learning_rate": 0.00012148027995554354,
"epoch": 0.39,
"step": 26150
},
{
"loss": 1.167,
"grad_norm": 1.2190027236938477,
"learning_rate": 0.00012140518458442223,
"epoch": 0.39,
"step": 26175
},
{
"loss": 1.2042,
"grad_norm": 1.0376909971237183,
"learning_rate": 0.0001213300892133009,
"epoch": 0.39,
"step": 26200
},
{
"loss": 1.1713,
"grad_norm": 1.036734938621521,
"learning_rate": 0.00012126100147186927,
"epoch": 0.39,
"step": 26225
},
{
"loss": 1.1867,
"grad_norm": 0.933276355266571,
"learning_rate": 0.00012118890991559282,
"epoch": 0.39,
"step": 26250
},
{
"loss": 1.1568,
"grad_norm": 1.8247997760772705,
"learning_rate": 0.00012111381454447148,
"epoch": 0.39,
"step": 26275
},
{
"loss": 1.1209,
"grad_norm": 1.7920253276824951,
"learning_rate": 0.00012103871917335017,
"epoch": 0.39,
"step": 26300
},
{
"loss": 1.1424,
"grad_norm": 1.558129906654358,
"learning_rate": 0.00012096362380222883,
"epoch": 0.4,
"step": 26325
},
{
"loss": 1.1207,
"grad_norm": 2.0236053466796875,
"learning_rate": 0.00012088852843110752,
"epoch": 0.4,
"step": 26350
},
{
"loss": 1.1367,
"grad_norm": 2.042004108428955,
"learning_rate": 0.00012081343305998618,
"epoch": 0.4,
"step": 26375
},
{
"loss": 1.1823,
"grad_norm": 1.694769024848938,
"learning_rate": 0.00012073833768886486,
"epoch": 0.4,
"step": 26400
},
{
"loss": 1.1536,
"grad_norm": 2.398012399673462,
"learning_rate": 0.00012066324231774353,
"epoch": 0.4,
"step": 26425
},
{
"loss": 1.2019,
"grad_norm": 3.8714237213134766,
"learning_rate": 0.00012058814694662222,
"epoch": 0.4,
"step": 26450
},
{
"loss": 1.2109,
"grad_norm": 2.893437147140503,
"learning_rate": 0.0001205130515755009,
"epoch": 0.4,
"step": 26475
},
{
"loss": 1.2099,
"grad_norm": 1.7134922742843628,
"learning_rate": 0.00012043795620437956,
"epoch": 0.4,
"step": 26500
},
{
"loss": 1.1989,
"grad_norm": 2.3126907348632812,
"learning_rate": 0.00012036286083325826,
"epoch": 0.4,
"step": 26525
},
{
"loss": 1.121,
"grad_norm": 2.5289969444274902,
"learning_rate": 0.00012028776546213691,
"epoch": 0.4,
"step": 26550
},
{
"loss": 1.21,
"grad_norm": 2.551736354827881,
"learning_rate": 0.0001202126700910156,
"epoch": 0.4,
"step": 26575
},
{
"loss": 1.1433,
"grad_norm": 1.8382607698440552,
"learning_rate": 0.00012013757471989427,
"epoch": 0.4,
"step": 26600
},
{
"loss": 1.1714,
"grad_norm": 1.9856308698654175,
"learning_rate": 0.00012006247934877295,
"epoch": 0.4,
"step": 26625
},
{
"loss": 1.1646,
"grad_norm": 1.3132210969924927,
"learning_rate": 0.00011998738397765161,
"epoch": 0.4,
"step": 26650
},
{
"loss": 1.1394,
"grad_norm": 1.8171156644821167,
"learning_rate": 0.0001199122886065303,
"epoch": 0.4,
"step": 26675
},
{
"loss": 1.1494,
"grad_norm": 1.6852163076400757,
"learning_rate": 0.00011983719323540898,
"epoch": 0.4,
"step": 26700
},
{
"loss": 1.1522,
"grad_norm": 1.7947680950164795,
"learning_rate": 0.00011976209786428765,
"epoch": 0.4,
"step": 26725
},
{
"loss": 1.1794,
"grad_norm": 2.0458626747131348,
"learning_rate": 0.00011968700249316634,
"epoch": 0.4,
"step": 26750
},
{
"loss": 1.2013,
"grad_norm": 1.6670138835906982,
"learning_rate": 0.00011961190712204499,
"epoch": 0.4,
"step": 26775
},
{
"loss": 1.2052,
"grad_norm": 1.9082565307617188,
"learning_rate": 0.00011953681175092368,
"epoch": 0.4,
"step": 26800
},
{
"loss": 1.1175,
"grad_norm": 1.3584920167922974,
"learning_rate": 0.00011946171637980235,
"epoch": 0.4,
"step": 26825
},
{
"loss": 1.1264,
"grad_norm": 2.0976293087005615,
"learning_rate": 0.00011938662100868103,
"epoch": 0.4,
"step": 26850
},
{
"loss": 1.2392,
"grad_norm": 2.034069776535034,
"learning_rate": 0.00011931152563755969,
"epoch": 0.4,
"step": 26875
},
{
"loss": 1.1871,
"grad_norm": 1.4389294385910034,
"learning_rate": 0.00011923643026643839,
"epoch": 0.4,
"step": 26900
},
{
"loss": 1.1544,
"grad_norm": 1.7886531352996826,
"learning_rate": 0.00011916133489531706,
"epoch": 0.4,
"step": 26925
},
{
"loss": 1.1557,
"grad_norm": 1.1227729320526123,
"learning_rate": 0.00011908623952419573,
"epoch": 0.4,
"step": 26950
},
{
"loss": 1.113,
"grad_norm": 1.5332506895065308,
"learning_rate": 0.00011901114415307442,
"epoch": 0.41,
"step": 26975
},
{
"loss": 1.2323,
"grad_norm": 1.5316015481948853,
"learning_rate": 0.00011893604878195307,
"epoch": 0.41,
"step": 27000
},
{
"loss": 1.1814,
"grad_norm": 1.7715721130371094,
"learning_rate": 0.00011886095341083177,
"epoch": 0.41,
"step": 27025
},
{
"loss": 1.117,
"grad_norm": 1.1491894721984863,
"learning_rate": 0.00011878585803971043,
"epoch": 0.41,
"step": 27050
},
{
"loss": 1.1541,
"grad_norm": 1.2926276922225952,
"learning_rate": 0.00011871076266858911,
"epoch": 0.41,
"step": 27075
},
{
"loss": 1.1933,
"grad_norm": 1.9231313467025757,
"learning_rate": 0.0001186356672974678,
"epoch": 0.41,
"step": 27100
},
{
"loss": 1.1409,
"grad_norm": 2.1319782733917236,
"learning_rate": 0.00011856057192634647,
"epoch": 0.41,
"step": 27125
},
{
"loss": 1.2189,
"grad_norm": 1.5915454626083374,
"learning_rate": 0.00011848547655522515,
"epoch": 0.41,
"step": 27150
},
{
"loss": 1.1335,
"grad_norm": 1.5728384256362915,
"learning_rate": 0.00011841038118410381,
"epoch": 0.41,
"step": 27175
},
{
"loss": 1.1534,
"grad_norm": 1.0829964876174927,
"learning_rate": 0.0001183352858129825,
"epoch": 0.41,
"step": 27200
},
{
"loss": 1.0959,
"grad_norm": 2.1620664596557617,
"learning_rate": 0.00011826019044186116,
"epoch": 0.41,
"step": 27225
},
{
"loss": 1.1316,
"grad_norm": 1.7385821342468262,
"learning_rate": 0.00011818509507073985,
"epoch": 0.41,
"step": 27250
},
{
"loss": 1.121,
"grad_norm": 2.2649617195129395,
"learning_rate": 0.00011810999969961851,
"epoch": 0.41,
"step": 27275
},
{
"loss": 1.1831,
"grad_norm": 1.98993980884552,
"learning_rate": 0.00011803490432849719,
"epoch": 0.41,
"step": 27300
},
{
"loss": 1.1661,
"grad_norm": 1.3478261232376099,
"learning_rate": 0.00011795980895737589,
"epoch": 0.41,
"step": 27325
},
{
"loss": 1.1912,
"grad_norm": 1.5697304010391235,
"learning_rate": 0.00011788471358625455,
"epoch": 0.41,
"step": 27350
},
{
"loss": 1.2364,
"grad_norm": 1.7027043104171753,
"learning_rate": 0.00011780961821513323,
"epoch": 0.41,
"step": 27375
},
{
"loss": 1.1422,
"grad_norm": 1.8551706075668335,
"learning_rate": 0.0001177345228440119,
"epoch": 0.41,
"step": 27400
},
{
"loss": 1.1839,
"grad_norm": 1.9152601957321167,
"learning_rate": 0.00011765942747289059,
"epoch": 0.41,
"step": 27425
},
{
"loss": 1.201,
"grad_norm": 2.2264368534088135,
"learning_rate": 0.00011758433210176924,
"epoch": 0.41,
"step": 27450
},
{
"loss": 1.1721,
"grad_norm": 1.2363280057907104,
"learning_rate": 0.00011750923673064793,
"epoch": 0.41,
"step": 27475
},
{
"loss": 1.1533,
"grad_norm": 1.1803810596466064,
"learning_rate": 0.0001174341413595266,
"epoch": 0.41,
"step": 27500
},
{
"loss": 1.1379,
"grad_norm": 1.3785597085952759,
"learning_rate": 0.00011735904598840528,
"epoch": 0.41,
"step": 27525
},
{
"loss": 1.1466,
"grad_norm": 1.222312331199646,
"learning_rate": 0.00011728395061728397,
"epoch": 0.41,
"step": 27550
},
{
"loss": 1.1886,
"grad_norm": 1.3862022161483765,
"learning_rate": 0.00011720885524616263,
"epoch": 0.41,
"step": 27575
},
{
"loss": 1.1591,
"grad_norm": 1.8599638938903809,
"learning_rate": 0.00011713375987504131,
"epoch": 0.41,
"step": 27600
},
{
"loss": 1.2353,
"grad_norm": 2.57729172706604,
"learning_rate": 0.00011705866450391998,
"epoch": 0.41,
"step": 27625
},
{
"loss": 1.1376,
"grad_norm": 1.734212040901184,
"learning_rate": 0.00011698356913279867,
"epoch": 0.42,
"step": 27650
},
{
"loss": 1.1869,
"grad_norm": 1.5688458681106567,
"learning_rate": 0.00011690847376167732,
"epoch": 0.42,
"step": 27675
},
{
"loss": 1.2175,
"grad_norm": 1.8952748775482178,
"learning_rate": 0.00011683337839055601,
"epoch": 0.42,
"step": 27700
},
{
"loss": 1.1552,
"grad_norm": 1.0788073539733887,
"learning_rate": 0.00011675828301943468,
"epoch": 0.42,
"step": 27725
},
{
"loss": 1.1604,
"grad_norm": 2.346510410308838,
"learning_rate": 0.00011668318764831336,
"epoch": 0.42,
"step": 27750
},
{
"loss": 1.1789,
"grad_norm": 1.775448203086853,
"learning_rate": 0.00011660809227719205,
"epoch": 0.42,
"step": 27775
},
{
"loss": 1.1386,
"grad_norm": 2.772287130355835,
"learning_rate": 0.00011653299690607072,
"epoch": 0.42,
"step": 27800
},
{
"loss": 1.1659,
"grad_norm": 1.017460584640503,
"learning_rate": 0.0001164579015349494,
"epoch": 0.42,
"step": 27825
},
{
"loss": 1.1647,
"grad_norm": 1.9251552820205688,
"learning_rate": 0.00011638280616382806,
"epoch": 0.42,
"step": 27850
},
{
"loss": 1.2273,
"grad_norm": 1.44833242893219,
"learning_rate": 0.00011630771079270675,
"epoch": 0.42,
"step": 27875
},
{
"loss": 1.2112,
"grad_norm": 1.3559473752975464,
"learning_rate": 0.0001162326154215854,
"epoch": 0.42,
"step": 27900
},
{
"loss": 1.219,
"grad_norm": 2.958477258682251,
"learning_rate": 0.0001161575200504641,
"epoch": 0.42,
"step": 27925
},
{
"loss": 1.142,
"grad_norm": 1.5322625637054443,
"learning_rate": 0.00011608242467934276,
"epoch": 0.42,
"step": 27950
},
{
"loss": 1.1681,
"grad_norm": 1.0819323062896729,
"learning_rate": 0.00011600732930822144,
"epoch": 0.42,
"step": 27975
},
{
"loss": 1.1307,
"grad_norm": 1.086040735244751,
"learning_rate": 0.00011593223393710013,
"epoch": 0.42,
"step": 28000
},
{
"loss": 1.1872,
"grad_norm": 2.5879430770874023,
"learning_rate": 0.0001158571385659788,
"epoch": 0.42,
"step": 28025
},
{
"loss": 1.1294,
"grad_norm": 1.5664997100830078,
"learning_rate": 0.00011578204319485748,
"epoch": 0.42,
"step": 28050
},
{
"loss": 1.1441,
"grad_norm": 1.5841997861862183,
"learning_rate": 0.00011570694782373614,
"epoch": 0.42,
"step": 28075
},
{
"loss": 1.1663,
"grad_norm": 1.0428881645202637,
"learning_rate": 0.00011563185245261484,
"epoch": 0.42,
"step": 28100
},
{
"loss": 1.1412,
"grad_norm": 1.4296401739120483,
"learning_rate": 0.00011555675708149349,
"epoch": 0.42,
"step": 28125
},
{
"loss": 1.1706,
"grad_norm": 1.3985766172409058,
"learning_rate": 0.00011548166171037218,
"epoch": 0.42,
"step": 28150
},
{
"loss": 1.1258,
"grad_norm": 1.4393442869186401,
"learning_rate": 0.00011540656633925085,
"epoch": 0.42,
"step": 28175
},
{
"loss": 1.1365,
"grad_norm": 1.9533663988113403,
"learning_rate": 0.00011533147096812952,
"epoch": 0.42,
"step": 28200
},
{
"loss": 1.1164,
"grad_norm": 1.4977903366088867,
"learning_rate": 0.00011525637559700822,
"epoch": 0.42,
"step": 28225
},
{
"loss": 1.2153,
"grad_norm": 1.857847809791565,
"learning_rate": 0.00011518128022588688,
"epoch": 0.42,
"step": 28250
},
{
"loss": 1.1949,
"grad_norm": 1.774740219116211,
"learning_rate": 0.00011510618485476556,
"epoch": 0.42,
"step": 28275
},
{
"loss": 1.1738,
"grad_norm": 1.1024271249771118,
"learning_rate": 0.00011503108948364423,
"epoch": 0.43,
"step": 28300
},
{
"loss": 1.1814,
"grad_norm": 3.308375358581543,
"learning_rate": 0.00011495599411252292,
"epoch": 0.43,
"step": 28325
},
{
"loss": 1.1684,
"grad_norm": 1.0142186880111694,
"learning_rate": 0.00011488089874140158,
"epoch": 0.43,
"step": 28350
},
{
"loss": 1.1109,
"grad_norm": 2.34968638420105,
"learning_rate": 0.00011480580337028026,
"epoch": 0.43,
"step": 28375
},
{
"loss": 1.1584,
"grad_norm": 1.1933414936065674,
"learning_rate": 0.00011473070799915895,
"epoch": 0.43,
"step": 28400
},
{
"loss": 1.1562,
"grad_norm": 1.6852394342422485,
"learning_rate": 0.0001146556126280376,
"epoch": 0.43,
"step": 28425
},
{
"loss": 1.0875,
"grad_norm": 1.276416301727295,
"learning_rate": 0.0001145805172569163,
"epoch": 0.43,
"step": 28450
},
{
"loss": 1.2261,
"grad_norm": 2.792825937271118,
"learning_rate": 0.00011450542188579496,
"epoch": 0.43,
"step": 28475
},
{
"loss": 1.166,
"grad_norm": 1.2241714000701904,
"learning_rate": 0.00011443032651467364,
"epoch": 0.43,
"step": 28500
},
{
"loss": 1.2041,
"grad_norm": 1.3080065250396729,
"learning_rate": 0.00011435523114355231,
"epoch": 0.43,
"step": 28525
},
{
"loss": 1.1149,
"grad_norm": 1.7479028701782227,
"learning_rate": 0.000114280135772431,
"epoch": 0.43,
"step": 28550
},
{
"loss": 1.1577,
"grad_norm": 2.0920069217681885,
"learning_rate": 0.00011420504040130967,
"epoch": 0.43,
"step": 28575
},
{
"loss": 1.1439,
"grad_norm": 1.1147267818450928,
"learning_rate": 0.00011412994503018835,
"epoch": 0.43,
"step": 28600
},
{
"loss": 1.1928,
"grad_norm": 1.3617130517959595,
"learning_rate": 0.00011405484965906704,
"epoch": 0.43,
"step": 28625
},
{
"loss": 1.1807,
"grad_norm": 1.8022890090942383,
"learning_rate": 0.00011397975428794569,
"epoch": 0.43,
"step": 28650
},
{
"loss": 1.1285,
"grad_norm": 1.4314754009246826,
"learning_rate": 0.00011390465891682438,
"epoch": 0.43,
"step": 28675
},
{
"loss": 1.2854,
"grad_norm": 1.2290889024734497,
"learning_rate": 0.00011382956354570305,
"epoch": 0.43,
"step": 28700
},
{
"loss": 1.1868,
"grad_norm": 1.2961443662643433,
"learning_rate": 0.00011375446817458173,
"epoch": 0.43,
"step": 28725
},
{
"loss": 1.1627,
"grad_norm": 1.629899501800537,
"learning_rate": 0.00011367937280346039,
"epoch": 0.43,
"step": 28750
},
{
"loss": 1.1232,
"grad_norm": 1.3125689029693604,
"learning_rate": 0.00011360427743233908,
"epoch": 0.43,
"step": 28775
},
{
"loss": 1.1396,
"grad_norm": 1.3124148845672607,
"learning_rate": 0.00011352918206121775,
"epoch": 0.43,
"step": 28800
},
{
"loss": 1.0702,
"grad_norm": 1.1580018997192383,
"learning_rate": 0.00011345408669009643,
"epoch": 0.43,
"step": 28825
},
{
"loss": 1.1338,
"grad_norm": 1.5117197036743164,
"learning_rate": 0.00011337899131897512,
"epoch": 0.43,
"step": 28850
},
{
"loss": 1.1742,
"grad_norm": 1.6845176219940186,
"learning_rate": 0.00011330389594785377,
"epoch": 0.43,
"step": 28875
},
{
"loss": 1.1828,
"grad_norm": 1.1892350912094116,
"learning_rate": 0.00011322880057673246,
"epoch": 0.43,
"step": 28900
},
{
"loss": 1.237,
"grad_norm": 1.562537431716919,
"learning_rate": 0.00011315370520561113,
"epoch": 0.43,
"step": 28925
},
{
"loss": 1.193,
"grad_norm": 1.7920253276824951,
"learning_rate": 0.00011307860983448981,
"epoch": 0.43,
"step": 28950
},
{
"loss": 1.1734,
"grad_norm": 1.7338802814483643,
"learning_rate": 0.00011300351446336847,
"epoch": 0.44,
"step": 28975
},
{
"loss": 1.1254,
"grad_norm": 1.6084978580474854,
"learning_rate": 0.00011292841909224717,
"epoch": 0.44,
"step": 29000
},
{
"loss": 1.1564,
"grad_norm": 2.1127138137817383,
"learning_rate": 0.00011285332372112583,
"epoch": 0.44,
"step": 29025
},
{
"loss": 1.1894,
"grad_norm": 2.373610019683838,
"learning_rate": 0.00011277822835000451,
"epoch": 0.44,
"step": 29050
},
{
"loss": 1.0381,
"grad_norm": 1.090454339981079,
"learning_rate": 0.0001127031329788832,
"epoch": 0.44,
"step": 29075
},
{
"loss": 1.1866,
"grad_norm": 1.2997491359710693,
"learning_rate": 0.00011262803760776185,
"epoch": 0.44,
"step": 29100
},
{
"loss": 1.2086,
"grad_norm": 1.9946448802947998,
"learning_rate": 0.00011255294223664055,
"epoch": 0.44,
"step": 29125
},
{
"loss": 1.1127,
"grad_norm": 1.276667594909668,
"learning_rate": 0.00011247784686551921,
"epoch": 0.44,
"step": 29150
},
{
"loss": 1.2735,
"grad_norm": 1.8735250234603882,
"learning_rate": 0.00011240275149439789,
"epoch": 0.44,
"step": 29175
},
{
"loss": 1.1269,
"grad_norm": 1.4805363416671753,
"learning_rate": 0.00011232765612327656,
"epoch": 0.44,
"step": 29200
},
{
"loss": 1.1203,
"grad_norm": 1.6462610960006714,
"learning_rate": 0.00011225256075215525,
"epoch": 0.44,
"step": 29225
},
{
"loss": 1.1661,
"grad_norm": 1.334406852722168,
"learning_rate": 0.00011217746538103391,
"epoch": 0.44,
"step": 29250
},
{
"loss": 1.1603,
"grad_norm": 1.3393394947052002,
"learning_rate": 0.0001121023700099126,
"epoch": 0.44,
"step": 29275
},
{
"loss": 1.1295,
"grad_norm": 2.316953420639038,
"learning_rate": 0.00011202727463879129,
"epoch": 0.44,
"step": 29300
},
{
"loss": 1.1566,
"grad_norm": 1.7229734659194946,
"learning_rate": 0.00011195217926766994,
"epoch": 0.44,
"step": 29325
},
{
"loss": 1.1669,
"grad_norm": 2.08143949508667,
"learning_rate": 0.00011187708389654863,
"epoch": 0.44,
"step": 29350
},
{
"loss": 1.1525,
"grad_norm": 2.7917256355285645,
"learning_rate": 0.0001118019885254273,
"epoch": 0.44,
"step": 29375
},
{
"loss": 1.1114,
"grad_norm": 1.8444219827651978,
"learning_rate": 0.00011172689315430597,
"epoch": 0.44,
"step": 29400
},
{
"loss": 1.1588,
"grad_norm": 1.2194463014602661,
"learning_rate": 0.00011165179778318464,
"epoch": 0.44,
"step": 29425
},
{
"loss": 1.2405,
"grad_norm": 1.1201077699661255,
"learning_rate": 0.00011157670241206333,
"epoch": 0.44,
"step": 29450
},
{
"loss": 1.2188,
"grad_norm": 2.771019220352173,
"learning_rate": 0.000111501607040942,
"epoch": 0.44,
"step": 29475
},
{
"loss": 1.1978,
"grad_norm": 2.0680384635925293,
"learning_rate": 0.00011142651166982068,
"epoch": 0.44,
"step": 29500
},
{
"loss": 1.1814,
"grad_norm": 1.2148905992507935,
"learning_rate": 0.00011135141629869937,
"epoch": 0.44,
"step": 29525
},
{
"loss": 1.1382,
"grad_norm": 1.3024623394012451,
"learning_rate": 0.00011127632092757802,
"epoch": 0.44,
"step": 29550
},
{
"loss": 1.173,
"grad_norm": 1.3196483850479126,
"learning_rate": 0.00011120122555645671,
"epoch": 0.44,
"step": 29575
},
{
"loss": 1.2608,
"grad_norm": 1.9761130809783936,
"learning_rate": 0.00011112613018533538,
"epoch": 0.44,
"step": 29600
},
{
"loss": 1.1977,
"grad_norm": 2.152472734451294,
"learning_rate": 0.00011105103481421406,
"epoch": 0.44,
"step": 29625
},
{
"loss": 1.2164,
"grad_norm": 1.2230114936828613,
"learning_rate": 0.00011097593944309272,
"epoch": 0.45,
"step": 29650
},
{
"loss": 1.1368,
"grad_norm": 1.2674063444137573,
"learning_rate": 0.00011090084407197141,
"epoch": 0.45,
"step": 29675
},
{
"loss": 1.1773,
"grad_norm": 1.7089192867279053,
"learning_rate": 0.00011082574870085008,
"epoch": 0.45,
"step": 29700
},
{
"loss": 1.2058,
"grad_norm": 1.6862412691116333,
"learning_rate": 0.00011075065332972876,
"epoch": 0.45,
"step": 29725
},
{
"loss": 1.1363,
"grad_norm": 1.8428794145584106,
"learning_rate": 0.00011067555795860745,
"epoch": 0.45,
"step": 29750
},
{
"loss": 1.1239,
"grad_norm": 1.7620809078216553,
"learning_rate": 0.0001106004625874861,
"epoch": 0.45,
"step": 29775
},
{
"loss": 1.1175,
"grad_norm": 2.246371269226074,
"learning_rate": 0.0001105253672163648,
"epoch": 0.45,
"step": 29800
},
{
"loss": 1.1432,
"grad_norm": 1.3259189128875732,
"learning_rate": 0.00011045027184524346,
"epoch": 0.45,
"step": 29825
},
{
"loss": 1.1352,
"grad_norm": 1.642720103263855,
"learning_rate": 0.00011037517647412214,
"epoch": 0.45,
"step": 29850
},
{
"loss": 1.1642,
"grad_norm": 1.3091384172439575,
"learning_rate": 0.0001103000811030008,
"epoch": 0.45,
"step": 29875
},
{
"loss": 1.155,
"grad_norm": 1.44764244556427,
"learning_rate": 0.0001102249857318795,
"epoch": 0.45,
"step": 29900
},
{
"loss": 1.1144,
"grad_norm": 3.290072441101074,
"learning_rate": 0.00011014989036075818,
"epoch": 0.45,
"step": 29925
},
{
"loss": 1.1852,
"grad_norm": 1.8344993591308594,
"learning_rate": 0.00011007479498963684,
"epoch": 0.45,
"step": 29950
},
{
"loss": 1.2341,
"grad_norm": 1.0677040815353394,
"learning_rate": 0.00010999969961851553,
"epoch": 0.45,
"step": 29975
},
{
"loss": 1.1522,
"grad_norm": 1.430322527885437,
"learning_rate": 0.00010992460424739419,
"epoch": 0.45,
"step": 30000
},
{
"loss": 1.1885,
"grad_norm": 2.407017230987549,
"learning_rate": 0.00010984950887627288,
"epoch": 0.45,
"step": 30025
},
{
"loss": 1.2033,
"grad_norm": 1.9406884908676147,
"learning_rate": 0.00010977441350515154,
"epoch": 0.45,
"step": 30050
},
{
"loss": 1.1372,
"grad_norm": 2.1446497440338135,
"learning_rate": 0.00010969931813403022,
"epoch": 0.45,
"step": 30075
},
{
"loss": 1.1924,
"grad_norm": 1.4735894203186035,
"learning_rate": 0.00010962422276290889,
"epoch": 0.45,
"step": 30100
},
{
"loss": 1.1923,
"grad_norm": 1.4889634847640991,
"learning_rate": 0.00010954912739178758,
"epoch": 0.45,
"step": 30125
},
{
"loss": 1.1588,
"grad_norm": 1.8243343830108643,
"learning_rate": 0.00010947403202066626,
"epoch": 0.45,
"step": 30150
},
{
"loss": 1.1383,
"grad_norm": 1.3423229455947876,
"learning_rate": 0.00010939893664954492,
"epoch": 0.45,
"step": 30175
},
{
"loss": 1.1461,
"grad_norm": 2.072646141052246,
"learning_rate": 0.00010932384127842362,
"epoch": 0.45,
"step": 30200
},
{
"loss": 1.2188,
"grad_norm": 2.139387845993042,
"learning_rate": 0.00010924874590730227,
"epoch": 0.45,
"step": 30225
},
{
"loss": 1.1639,
"grad_norm": 6.252641677856445,
"learning_rate": 0.00010917365053618096,
"epoch": 0.45,
"step": 30250
},
{
"loss": 1.1542,
"grad_norm": 1.225797176361084,
"learning_rate": 0.00010909855516505963,
"epoch": 0.45,
"step": 30275
},
{
"loss": 1.1593,
"grad_norm": 1.5084859132766724,
"learning_rate": 0.0001090234597939383,
"epoch": 0.46,
"step": 30300
},
{
"loss": 1.1765,
"grad_norm": 1.0552685260772705,
"learning_rate": 0.00010894836442281697,
"epoch": 0.46,
"step": 30325
},
{
"loss": 1.1944,
"grad_norm": 3.4387400150299072,
"learning_rate": 0.00010887326905169566,
"epoch": 0.46,
"step": 30350
},
{
"loss": 1.0752,
"grad_norm": 1.3896501064300537,
"learning_rate": 0.00010879817368057434,
"epoch": 0.46,
"step": 30375
},
{
"loss": 1.1576,
"grad_norm": 1.6324450969696045,
"learning_rate": 0.00010872307830945301,
"epoch": 0.46,
"step": 30400
},
{
"loss": 1.1854,
"grad_norm": 2.059718132019043,
"learning_rate": 0.0001086479829383317,
"epoch": 0.46,
"step": 30425
},
{
"loss": 1.1918,
"grad_norm": 1.7998640537261963,
"learning_rate": 0.00010857288756721035,
"epoch": 0.46,
"step": 30450
},
{
"loss": 1.186,
"grad_norm": 1.9032535552978516,
"learning_rate": 0.00010849779219608904,
"epoch": 0.46,
"step": 30475
},
{
"loss": 1.1738,
"grad_norm": 1.7081289291381836,
"learning_rate": 0.00010842269682496771,
"epoch": 0.46,
"step": 30500
},
{
"loss": 1.1147,
"grad_norm": 1.2194355726242065,
"learning_rate": 0.00010834760145384639,
"epoch": 0.46,
"step": 30525
},
{
"loss": 1.0827,
"grad_norm": 1.396530032157898,
"learning_rate": 0.00010827250608272505,
"epoch": 0.46,
"step": 30550
},
{
"loss": 1.1544,
"grad_norm": 2.1990020275115967,
"learning_rate": 0.00010819741071160375,
"epoch": 0.46,
"step": 30575
},
{
"loss": 1.1533,
"grad_norm": 1.4652187824249268,
"learning_rate": 0.00010812231534048242,
"epoch": 0.46,
"step": 30600
},
{
"loss": 1.1761,
"grad_norm": 1.4150506258010864,
"learning_rate": 0.00010804721996936109,
"epoch": 0.46,
"step": 30625
},
{
"loss": 1.2356,
"grad_norm": 1.5214896202087402,
"learning_rate": 0.00010797212459823978,
"epoch": 0.46,
"step": 30650
},
{
"loss": 1.1893,
"grad_norm": 1.470495581626892,
"learning_rate": 0.00010789702922711843,
"epoch": 0.46,
"step": 30675
},
{
"loss": 1.1708,
"grad_norm": 2.846820592880249,
"learning_rate": 0.00010782193385599713,
"epoch": 0.46,
"step": 30700
},
{
"loss": 1.1546,
"grad_norm": 1.4119728803634644,
"learning_rate": 0.00010774683848487579,
"epoch": 0.46,
"step": 30725
},
{
"loss": 1.1822,
"grad_norm": 1.4061907529830933,
"learning_rate": 0.00010767174311375447,
"epoch": 0.46,
"step": 30750
},
{
"loss": 1.1481,
"grad_norm": 1.3078978061676025,
"learning_rate": 0.00010759664774263314,
"epoch": 0.46,
"step": 30775
},
{
"loss": 1.1322,
"grad_norm": 2.0098421573638916,
"learning_rate": 0.00010752155237151183,
"epoch": 0.46,
"step": 30800
},
{
"loss": 1.1943,
"grad_norm": 2.3420894145965576,
"learning_rate": 0.00010744645700039051,
"epoch": 0.46,
"step": 30825
},
{
"loss": 1.1382,
"grad_norm": 2.183663845062256,
"learning_rate": 0.00010737136162926917,
"epoch": 0.46,
"step": 30850
},
{
"loss": 1.2107,
"grad_norm": 1.6581045389175415,
"learning_rate": 0.00010729626625814786,
"epoch": 0.46,
"step": 30875
},
{
"loss": 1.2586,
"grad_norm": 1.961310625076294,
"learning_rate": 0.00010722117088702652,
"epoch": 0.46,
"step": 30900
},
{
"loss": 1.1607,
"grad_norm": 1.231471061706543,
"learning_rate": 0.00010714607551590521,
"epoch": 0.46,
"step": 30925
},
{
"loss": 1.1467,
"grad_norm": 1.653730869293213,
"learning_rate": 0.00010707098014478387,
"epoch": 0.46,
"step": 30950
},
{
"loss": 1.2346,
"grad_norm": 1.830336332321167,
"learning_rate": 0.00010699588477366255,
"epoch": 0.47,
"step": 30975
},
{
"loss": 1.174,
"grad_norm": 1.4249459505081177,
"learning_rate": 0.00010692078940254122,
"epoch": 0.47,
"step": 31000
},
{
"loss": 1.1379,
"grad_norm": 1.7390903234481812,
"learning_rate": 0.00010684569403141991,
"epoch": 0.47,
"step": 31025
},
{
"loss": 1.2185,
"grad_norm": 1.3198795318603516,
"learning_rate": 0.00010677059866029859,
"epoch": 0.47,
"step": 31050
},
{
"loss": 1.1644,
"grad_norm": 1.7585688829421997,
"learning_rate": 0.00010669550328917725,
"epoch": 0.47,
"step": 31075
},
{
"loss": 1.2051,
"grad_norm": 1.4614295959472656,
"learning_rate": 0.00010662040791805595,
"epoch": 0.47,
"step": 31100
},
{
"loss": 1.0994,
"grad_norm": 2.1233184337615967,
"learning_rate": 0.0001065453125469346,
"epoch": 0.47,
"step": 31125
},
{
"loss": 1.1336,
"grad_norm": 2.0219411849975586,
"learning_rate": 0.00010647021717581329,
"epoch": 0.47,
"step": 31150
},
{
"loss": 1.1349,
"grad_norm": 1.2599328756332397,
"learning_rate": 0.00010639512180469196,
"epoch": 0.47,
"step": 31175
},
{
"loss": 1.2062,
"grad_norm": 1.209994912147522,
"learning_rate": 0.00010632002643357064,
"epoch": 0.47,
"step": 31200
},
{
"loss": 1.1555,
"grad_norm": 1.5804765224456787,
"learning_rate": 0.00010624493106244933,
"epoch": 0.47,
"step": 31225
},
{
"loss": 1.2051,
"grad_norm": 1.906879186630249,
"learning_rate": 0.000106169835691328,
"epoch": 0.47,
"step": 31250
},
{
"loss": 1.2132,
"grad_norm": 1.4314424991607666,
"learning_rate": 0.00010609474032020667,
"epoch": 0.47,
"step": 31275
},
{
"loss": 1.2105,
"grad_norm": 1.4528160095214844,
"learning_rate": 0.00010601964494908534,
"epoch": 0.47,
"step": 31300
},
{
"loss": 1.2308,
"grad_norm": 1.5849334001541138,
"learning_rate": 0.00010594454957796403,
"epoch": 0.47,
"step": 31325
},
{
"loss": 1.1983,
"grad_norm": 1.6990954875946045,
"learning_rate": 0.0001058694542068427,
"epoch": 0.47,
"step": 31350
},
{
"loss": 1.1251,
"grad_norm": 1.3091074228286743,
"learning_rate": 0.00010579435883572137,
"epoch": 0.47,
"step": 31375
},
{
"loss": 1.1784,
"grad_norm": 2.4281911849975586,
"learning_rate": 0.00010571926346460004,
"epoch": 0.47,
"step": 31400
},
{
"loss": 1.1874,
"grad_norm": 1.9910012483596802,
"learning_rate": 0.00010564416809347872,
"epoch": 0.47,
"step": 31425
},
{
"loss": 1.2311,
"grad_norm": 2.100861072540283,
"learning_rate": 0.00010556907272235741,
"epoch": 0.47,
"step": 31450
},
{
"loss": 1.1173,
"grad_norm": 1.6685750484466553,
"learning_rate": 0.00010549397735123608,
"epoch": 0.47,
"step": 31475
},
{
"loss": 1.1874,
"grad_norm": 3.5001275539398193,
"learning_rate": 0.00010541888198011476,
"epoch": 0.47,
"step": 31500
},
{
"loss": 1.1384,
"grad_norm": 1.6073639392852783,
"learning_rate": 0.00010534378660899342,
"epoch": 0.47,
"step": 31525
},
{
"loss": 1.2495,
"grad_norm": 1.9744518995285034,
"learning_rate": 0.00010526869123787211,
"epoch": 0.47,
"step": 31550
},
{
"loss": 1.1328,
"grad_norm": 1.4878309965133667,
"learning_rate": 0.00010519359586675078,
"epoch": 0.47,
"step": 31575
},
{
"loss": 1.2093,
"grad_norm": 3.258043050765991,
"learning_rate": 0.00010511850049562946,
"epoch": 0.47,
"step": 31600
},
{
"loss": 1.1802,
"grad_norm": 2.012786865234375,
"learning_rate": 0.00010504340512450812,
"epoch": 0.47,
"step": 31625
},
{
"loss": 1.096,
"grad_norm": 1.3581587076187134,
"learning_rate": 0.0001049683097533868,
"epoch": 0.48,
"step": 31650
},
{
"loss": 1.1548,
"grad_norm": 1.2571851015090942,
"learning_rate": 0.0001048932143822655,
"epoch": 0.48,
"step": 31675
},
{
"loss": 1.1615,
"grad_norm": 1.5408381223678589,
"learning_rate": 0.00010481811901114416,
"epoch": 0.48,
"step": 31700
},
{
"loss": 1.1491,
"grad_norm": 2.3489863872528076,
"learning_rate": 0.00010474302364002284,
"epoch": 0.48,
"step": 31725
},
{
"loss": 1.174,
"grad_norm": 1.5670727491378784,
"learning_rate": 0.0001046679282689015,
"epoch": 0.48,
"step": 31750
},
{
"loss": 1.1087,
"grad_norm": 1.6657809019088745,
"learning_rate": 0.0001045928328977802,
"epoch": 0.48,
"step": 31775
},
{
"loss": 1.1351,
"grad_norm": 2.1541805267333984,
"learning_rate": 0.00010451773752665886,
"epoch": 0.48,
"step": 31800
},
{
"loss": 1.0992,
"grad_norm": 1.6802806854248047,
"learning_rate": 0.00010444264215553754,
"epoch": 0.48,
"step": 31825
},
{
"loss": 1.2207,
"grad_norm": 1.513509750366211,
"learning_rate": 0.0001043675467844162,
"epoch": 0.48,
"step": 31850
},
{
"loss": 1.1775,
"grad_norm": 1.223694920539856,
"learning_rate": 0.00010429245141329488,
"epoch": 0.48,
"step": 31875
},
{
"loss": 1.1863,
"grad_norm": 1.8998793363571167,
"learning_rate": 0.00010421735604217358,
"epoch": 0.48,
"step": 31900
},
{
"loss": 1.1404,
"grad_norm": 2.1678850650787354,
"learning_rate": 0.00010414226067105224,
"epoch": 0.48,
"step": 31925
},
{
"loss": 1.1979,
"grad_norm": 1.7826672792434692,
"learning_rate": 0.00010406716529993092,
"epoch": 0.48,
"step": 31950
},
{
"loss": 1.2179,
"grad_norm": 2.165457248687744,
"learning_rate": 0.00010399206992880959,
"epoch": 0.48,
"step": 31975
},
{
"loss": 1.1489,
"grad_norm": 1.6185364723205566,
"learning_rate": 0.00010391697455768828,
"epoch": 0.48,
"step": 32000
},
{
"loss": 1.1699,
"grad_norm": 1.2954517602920532,
"learning_rate": 0.00010384187918656694,
"epoch": 0.48,
"step": 32025
},
{
"loss": 1.2003,
"grad_norm": 1.919216275215149,
"learning_rate": 0.00010376678381544562,
"epoch": 0.48,
"step": 32050
},
{
"loss": 1.1411,
"grad_norm": 1.045401692390442,
"learning_rate": 0.00010369168844432429,
"epoch": 0.48,
"step": 32075
},
{
"loss": 1.1166,
"grad_norm": 1.860318660736084,
"learning_rate": 0.00010361659307320297,
"epoch": 0.48,
"step": 32100
},
{
"loss": 1.2215,
"grad_norm": 0.9368788599967957,
"learning_rate": 0.00010354149770208166,
"epoch": 0.48,
"step": 32125
},
{
"loss": 1.1697,
"grad_norm": 1.7075835466384888,
"learning_rate": 0.00010346640233096032,
"epoch": 0.48,
"step": 32150
},
{
"loss": 1.1934,
"grad_norm": 1.1933406591415405,
"learning_rate": 0.000103391306959839,
"epoch": 0.48,
"step": 32175
},
{
"loss": 1.2051,
"grad_norm": 1.681666612625122,
"learning_rate": 0.00010331621158871767,
"epoch": 0.48,
"step": 32200
},
{
"loss": 1.1314,
"grad_norm": 2.0042386054992676,
"learning_rate": 0.00010324111621759636,
"epoch": 0.48,
"step": 32225
},
{
"loss": 1.1505,
"grad_norm": 1.976456880569458,
"learning_rate": 0.00010316602084647503,
"epoch": 0.48,
"step": 32250
},
{
"loss": 1.1234,
"grad_norm": 1.852589726448059,
"learning_rate": 0.0001030909254753537,
"epoch": 0.48,
"step": 32275
},
{
"loss": 1.1631,
"grad_norm": 1.812740445137024,
"learning_rate": 0.00010301583010423237,
"epoch": 0.49,
"step": 32300
},
{
"loss": 1.2414,
"grad_norm": 1.424230694770813,
"learning_rate": 0.00010294073473311105,
"epoch": 0.49,
"step": 32325
},
{
"loss": 1.1732,
"grad_norm": 1.4877756834030151,
"learning_rate": 0.00010286563936198974,
"epoch": 0.49,
"step": 32350
},
{
"loss": 1.0968,
"grad_norm": 0.8852760195732117,
"learning_rate": 0.00010279054399086841,
"epoch": 0.49,
"step": 32375
},
{
"loss": 1.1685,
"grad_norm": 1.217244029045105,
"learning_rate": 0.00010271544861974709,
"epoch": 0.49,
"step": 32400
},
{
"loss": 1.2013,
"grad_norm": 1.0668590068817139,
"learning_rate": 0.00010264035324862575,
"epoch": 0.49,
"step": 32425
},
{
"loss": 1.2111,
"grad_norm": 2.8531405925750732,
"learning_rate": 0.00010256525787750444,
"epoch": 0.49,
"step": 32450
},
{
"loss": 1.1358,
"grad_norm": 1.3973661661148071,
"learning_rate": 0.00010249016250638311,
"epoch": 0.49,
"step": 32475
},
{
"loss": 1.1817,
"grad_norm": 1.641974925994873,
"learning_rate": 0.00010241506713526179,
"epoch": 0.49,
"step": 32500
},
{
"loss": 1.162,
"grad_norm": 1.5248854160308838,
"learning_rate": 0.00010233997176414048,
"epoch": 0.49,
"step": 32525
},
{
"loss": 1.1503,
"grad_norm": 1.7267481088638306,
"learning_rate": 0.00010226487639301913,
"epoch": 0.49,
"step": 32550
},
{
"loss": 1.1459,
"grad_norm": 1.0616050958633423,
"learning_rate": 0.00010218978102189782,
"epoch": 0.49,
"step": 32575
},
{
"loss": 1.0668,
"grad_norm": 3.3019354343414307,
"learning_rate": 0.00010211468565077649,
"epoch": 0.49,
"step": 32600
},
{
"loss": 1.1959,
"grad_norm": 0.9270702004432678,
"learning_rate": 0.00010203959027965517,
"epoch": 0.49,
"step": 32625
},
{
"loss": 1.1643,
"grad_norm": 1.6093809604644775,
"learning_rate": 0.00010196449490853383,
"epoch": 0.49,
"step": 32650
},
{
"loss": 1.1398,
"grad_norm": 1.0636630058288574,
"learning_rate": 0.00010188939953741253,
"epoch": 0.49,
"step": 32675
},
{
"loss": 1.1828,
"grad_norm": 1.4841707944869995,
"learning_rate": 0.00010181430416629119,
"epoch": 0.49,
"step": 32700
},
{
"loss": 1.2011,
"grad_norm": 1.9186432361602783,
"learning_rate": 0.00010173920879516987,
"epoch": 0.49,
"step": 32725
},
{
"loss": 1.1309,
"grad_norm": 1.3214590549468994,
"learning_rate": 0.00010166411342404856,
"epoch": 0.49,
"step": 32750
},
{
"loss": 1.1553,
"grad_norm": 1.2666594982147217,
"learning_rate": 0.00010158901805292721,
"epoch": 0.49,
"step": 32775
},
{
"loss": 1.1451,
"grad_norm": 1.2383131980895996,
"learning_rate": 0.00010151392268180591,
"epoch": 0.49,
"step": 32800
},
{
"loss": 1.1737,
"grad_norm": 1.585282564163208,
"learning_rate": 0.00010143882731068457,
"epoch": 0.49,
"step": 32825
},
{
"loss": 1.189,
"grad_norm": 2.2569665908813477,
"learning_rate": 0.00010136373193956325,
"epoch": 0.49,
"step": 32850
},
{
"loss": 1.1316,
"grad_norm": 1.6479202508926392,
"learning_rate": 0.00010128863656844192,
"epoch": 0.49,
"step": 32875
},
{
"loss": 1.2062,
"grad_norm": 1.6843442916870117,
"learning_rate": 0.00010121354119732061,
"epoch": 0.49,
"step": 32900
},
{
"loss": 1.216,
"grad_norm": 1.4396450519561768,
"learning_rate": 0.00010113844582619927,
"epoch": 0.49,
"step": 32925
},
{
"loss": 1.1504,
"grad_norm": 2.33687162399292,
"learning_rate": 0.00010106335045507795,
"epoch": 0.49,
"step": 32950
},
{
"loss": 1.1324,
"grad_norm": 1.071869969367981,
"learning_rate": 0.00010098825508395665,
"epoch": 0.5,
"step": 32975
},
{
"loss": 1.1709,
"grad_norm": 1.5846800804138184,
"learning_rate": 0.0001009131597128353,
"epoch": 0.5,
"step": 33000
},
{
"loss": 1.1503,
"grad_norm": 2.1538047790527344,
"learning_rate": 0.00010083806434171399,
"epoch": 0.5,
"step": 33025
},
{
"loss": 1.1561,
"grad_norm": 1.5584303140640259,
"learning_rate": 0.00010076296897059266,
"epoch": 0.5,
"step": 33050
},
{
"loss": 1.156,
"grad_norm": 1.192090392112732,
"learning_rate": 0.00010068787359947133,
"epoch": 0.5,
"step": 33075
},
{
"loss": 1.1814,
"grad_norm": 1.8236268758773804,
"learning_rate": 0.00010061277822835,
"epoch": 0.5,
"step": 33100
},
{
"loss": 1.1463,
"grad_norm": 1.067664384841919,
"learning_rate": 0.00010053768285722869,
"epoch": 0.5,
"step": 33125
},
{
"loss": 1.151,
"grad_norm": 2.6256847381591797,
"learning_rate": 0.00010046258748610736,
"epoch": 0.5,
"step": 33150
},
{
"loss": 1.2417,
"grad_norm": 2.632324695587158,
"learning_rate": 0.00010038749211498604,
"epoch": 0.5,
"step": 33175
},
{
"loss": 1.2562,
"grad_norm": 1.5089225769042969,
"learning_rate": 0.00010031239674386473,
"epoch": 0.5,
"step": 33200
},
{
"loss": 1.1732,
"grad_norm": 2.253978967666626,
"learning_rate": 0.00010023730137274338,
"epoch": 0.5,
"step": 33225
},
{
"loss": 1.1754,
"grad_norm": 1.3430489301681519,
"learning_rate": 0.00010016220600162207,
"epoch": 0.5,
"step": 33250
},
{
"loss": 1.1398,
"grad_norm": 1.406375765800476,
"learning_rate": 0.00010008711063050074,
"epoch": 0.5,
"step": 33275
},
{
"loss": 1.1073,
"grad_norm": 1.3083038330078125,
"learning_rate": 0.00010001201525937942,
"epoch": 0.5,
"step": 33300
},
{
"loss": 1.1303,
"grad_norm": 1.0769158601760864,
"learning_rate": 9.99369198882581e-05,
"epoch": 0.5,
"step": 33325
},
{
"loss": 1.1311,
"grad_norm": 1.7924445867538452,
"learning_rate": 9.986182451713677e-05,
"epoch": 0.5,
"step": 33350
},
{
"loss": 1.1369,
"grad_norm": 2.062908411026001,
"learning_rate": 9.978672914601544e-05,
"epoch": 0.5,
"step": 33375
},
{
"loss": 1.0535,
"grad_norm": 1.7589771747589111,
"learning_rate": 9.971163377489412e-05,
"epoch": 0.5,
"step": 33400
},
{
"loss": 1.2129,
"grad_norm": 2.6144607067108154,
"learning_rate": 9.96365384037728e-05,
"epoch": 0.5,
"step": 33425
},
{
"loss": 1.1676,
"grad_norm": 1.4699029922485352,
"learning_rate": 9.956144303265146e-05,
"epoch": 0.5,
"step": 33450
},
{
"loss": 1.1679,
"grad_norm": 2.1169466972351074,
"learning_rate": 9.948634766153014e-05,
"epoch": 0.5,
"step": 33475
},
{
"loss": 1.1909,
"grad_norm": 2.1322262287139893,
"learning_rate": 9.941125229040883e-05,
"epoch": 0.5,
"step": 33500
},
{
"loss": 1.1367,
"grad_norm": 1.2691850662231445,
"learning_rate": 9.93361569192875e-05,
"epoch": 0.5,
"step": 33525
},
{
"loss": 1.1963,
"grad_norm": 1.6899739503860474,
"learning_rate": 9.926106154816618e-05,
"epoch": 0.5,
"step": 33550
},
{
"loss": 1.174,
"grad_norm": 2.2241880893707275,
"learning_rate": 9.918596617704486e-05,
"epoch": 0.5,
"step": 33575
},
{
"loss": 1.0904,
"grad_norm": 1.1701431274414062,
"learning_rate": 9.911087080592352e-05,
"epoch": 0.5,
"step": 33600
},
{
"loss": 1.1726,
"grad_norm": 2.3160314559936523,
"learning_rate": 9.90357754348022e-05,
"epoch": 0.51,
"step": 33625
},
{
"loss": 1.2542,
"grad_norm": 1.301832675933838,
"learning_rate": 9.896068006368088e-05,
"epoch": 0.51,
"step": 33650
},
{
"loss": 1.1653,
"grad_norm": 2.0493037700653076,
"learning_rate": 9.888558469255955e-05,
"epoch": 0.51,
"step": 33675
},
{
"loss": 1.1477,
"grad_norm": 1.5900102853775024,
"learning_rate": 9.881048932143822e-05,
"epoch": 0.51,
"step": 33700
},
{
"loss": 1.2375,
"grad_norm": 1.2943110466003418,
"learning_rate": 9.873539395031692e-05,
"epoch": 0.51,
"step": 33725
},
{
"loss": 1.1826,
"grad_norm": 1.2338217496871948,
"learning_rate": 9.866029857919558e-05,
"epoch": 0.51,
"step": 33750
},
{
"loss": 1.0829,
"grad_norm": 1.4232094287872314,
"learning_rate": 9.858520320807426e-05,
"epoch": 0.51,
"step": 33775
},
{
"loss": 1.1526,
"grad_norm": 1.8396953344345093,
"learning_rate": 9.851010783695294e-05,
"epoch": 0.51,
"step": 33800
},
{
"loss": 1.1136,
"grad_norm": 2.607694625854492,
"learning_rate": 9.84350124658316e-05,
"epoch": 0.51,
"step": 33825
},
{
"loss": 1.1214,
"grad_norm": 1.8624433279037476,
"learning_rate": 9.835991709471028e-05,
"epoch": 0.51,
"step": 33850
},
{
"loss": 1.1397,
"grad_norm": 0.9864051938056946,
"learning_rate": 9.828482172358896e-05,
"epoch": 0.51,
"step": 33875
},
{
"loss": 1.1547,
"grad_norm": 2.811202049255371,
"learning_rate": 9.820972635246763e-05,
"epoch": 0.51,
"step": 33900
},
{
"loss": 1.1279,
"grad_norm": 1.2450023889541626,
"learning_rate": 9.813463098134631e-05,
"epoch": 0.51,
"step": 33925
},
{
"loss": 1.1307,
"grad_norm": 1.43215811252594,
"learning_rate": 9.8059535610225e-05,
"epoch": 0.51,
"step": 33950
},
{
"loss": 1.2241,
"grad_norm": 2.1191306114196777,
"learning_rate": 9.798444023910366e-05,
"epoch": 0.51,
"step": 33975
},
{
"loss": 1.0813,
"grad_norm": 1.793713092803955,
"learning_rate": 9.790934486798234e-05,
"epoch": 0.51,
"step": 34000
},
{
"loss": 1.1351,
"grad_norm": 1.3615442514419556,
"learning_rate": 9.783424949686102e-05,
"epoch": 0.51,
"step": 34025
},
{
"loss": 1.1081,
"grad_norm": 1.8019038438796997,
"learning_rate": 9.775915412573969e-05,
"epoch": 0.51,
"step": 34050
},
{
"loss": 1.1063,
"grad_norm": 2.837644100189209,
"learning_rate": 9.768405875461837e-05,
"epoch": 0.51,
"step": 34075
},
{
"loss": 1.1894,
"grad_norm": 1.2097357511520386,
"learning_rate": 9.760896338349705e-05,
"epoch": 0.51,
"step": 34100
},
{
"loss": 1.1942,
"grad_norm": 1.1675305366516113,
"learning_rate": 9.753386801237571e-05,
"epoch": 0.51,
"step": 34125
},
{
"loss": 1.1185,
"grad_norm": 1.949704885482788,
"learning_rate": 9.745877264125439e-05,
"epoch": 0.51,
"step": 34150
},
{
"loss": 1.1904,
"grad_norm": 1.5967111587524414,
"learning_rate": 9.738367727013308e-05,
"epoch": 0.51,
"step": 34175
},
{
"loss": 1.0594,
"grad_norm": 0.8626694083213806,
"learning_rate": 9.730858189901175e-05,
"epoch": 0.51,
"step": 34200
},
{
"loss": 1.1637,
"grad_norm": 1.7055697441101074,
"learning_rate": 9.723348652789043e-05,
"epoch": 0.51,
"step": 34225
},
{
"loss": 1.1241,
"grad_norm": 1.559312105178833,
"learning_rate": 9.71583911567691e-05,
"epoch": 0.51,
"step": 34250
},
{
"loss": 1.1188,
"grad_norm": 1.095895767211914,
"learning_rate": 9.708329578564777e-05,
"epoch": 0.51,
"step": 34275
},
{
"loss": 1.1647,
"grad_norm": 1.85615074634552,
"learning_rate": 9.700820041452645e-05,
"epoch": 0.52,
"step": 34300
},
{
"loss": 1.2034,
"grad_norm": 1.3546233177185059,
"learning_rate": 9.693310504340513e-05,
"epoch": 0.52,
"step": 34325
},
{
"loss": 1.1603,
"grad_norm": 1.756169080734253,
"learning_rate": 9.68580096722838e-05,
"epoch": 0.52,
"step": 34350
},
{
"loss": 1.1961,
"grad_norm": 1.8548756837844849,
"learning_rate": 9.678291430116249e-05,
"epoch": 0.52,
"step": 34375
},
{
"loss": 1.1465,
"grad_norm": 1.8958889245986938,
"learning_rate": 9.670781893004116e-05,
"epoch": 0.52,
"step": 34400
},
{
"loss": 1.1634,
"grad_norm": 1.0698497295379639,
"learning_rate": 9.663272355891983e-05,
"epoch": 0.52,
"step": 34425
},
{
"loss": 1.1787,
"grad_norm": 1.7011641263961792,
"learning_rate": 9.655762818779851e-05,
"epoch": 0.52,
"step": 34450
},
{
"loss": 1.2188,
"grad_norm": 1.8526285886764526,
"learning_rate": 9.648253281667719e-05,
"epoch": 0.52,
"step": 34475
},
{
"loss": 1.1691,
"grad_norm": 1.3944551944732666,
"learning_rate": 9.640743744555585e-05,
"epoch": 0.52,
"step": 34500
},
{
"loss": 1.2114,
"grad_norm": 1.9814480543136597,
"learning_rate": 9.633234207443453e-05,
"epoch": 0.52,
"step": 34525
},
{
"loss": 1.2095,
"grad_norm": 2.6231672763824463,
"learning_rate": 9.625724670331321e-05,
"epoch": 0.52,
"step": 34550
},
{
"loss": 1.0963,
"grad_norm": 1.4759525060653687,
"learning_rate": 9.618215133219189e-05,
"epoch": 0.52,
"step": 34575
},
{
"loss": 1.1529,
"grad_norm": 1.029731273651123,
"learning_rate": 9.610705596107057e-05,
"epoch": 0.52,
"step": 34600
},
{
"loss": 1.1502,
"grad_norm": 1.3262224197387695,
"learning_rate": 9.603196058994925e-05,
"epoch": 0.52,
"step": 34625
},
{
"loss": 1.1467,
"grad_norm": 2.3982503414154053,
"learning_rate": 9.595686521882791e-05,
"epoch": 0.52,
"step": 34650
},
{
"loss": 1.1648,
"grad_norm": 1.6530815362930298,
"learning_rate": 9.588176984770659e-05,
"epoch": 0.52,
"step": 34675
},
{
"loss": 1.1066,
"grad_norm": 1.987338662147522,
"learning_rate": 9.580667447658527e-05,
"epoch": 0.52,
"step": 34700
},
{
"loss": 1.1234,
"grad_norm": 1.4502911567687988,
"learning_rate": 9.573157910546394e-05,
"epoch": 0.52,
"step": 34725
},
{
"loss": 1.1992,
"grad_norm": 2.197833776473999,
"learning_rate": 9.565648373434261e-05,
"epoch": 0.52,
"step": 34750
},
{
"loss": 1.1252,
"grad_norm": 1.55136239528656,
"learning_rate": 9.55813883632213e-05,
"epoch": 0.52,
"step": 34775
},
{
"loss": 1.2176,
"grad_norm": 3.1894094944000244,
"learning_rate": 9.550629299209997e-05,
"epoch": 0.52,
"step": 34800
},
{
"loss": 1.232,
"grad_norm": 1.971354603767395,
"learning_rate": 9.543119762097865e-05,
"epoch": 0.52,
"step": 34825
},
{
"loss": 1.1468,
"grad_norm": 2.7667553424835205,
"learning_rate": 9.535610224985733e-05,
"epoch": 0.52,
"step": 34850
},
{
"loss": 1.0825,
"grad_norm": 1.0196588039398193,
"learning_rate": 9.5281006878736e-05,
"epoch": 0.52,
"step": 34875
},
{
"loss": 1.1179,
"grad_norm": 1.2213141918182373,
"learning_rate": 9.520591150761467e-05,
"epoch": 0.52,
"step": 34900
},
{
"loss": 1.1399,
"grad_norm": 1.981288194656372,
"learning_rate": 9.513081613649335e-05,
"epoch": 0.52,
"step": 34925
},
{
"loss": 1.1343,
"grad_norm": 1.6537185907363892,
"learning_rate": 9.505572076537202e-05,
"epoch": 0.52,
"step": 34950
},
{
"loss": 1.0935,
"grad_norm": 1.4479026794433594,
"learning_rate": 9.49806253942507e-05,
"epoch": 0.53,
"step": 34975
},
{
"loss": 1.1032,
"grad_norm": 1.706716537475586,
"learning_rate": 9.490553002312938e-05,
"epoch": 0.53,
"step": 35000
},
{
"loss": 1.2447,
"grad_norm": 1.594125747680664,
"learning_rate": 9.483043465200806e-05,
"epoch": 0.53,
"step": 35025
},
{
"loss": 1.1596,
"grad_norm": 1.97038996219635,
"learning_rate": 9.475533928088673e-05,
"epoch": 0.53,
"step": 35050
},
{
"loss": 1.0628,
"grad_norm": 1.643943428993225,
"learning_rate": 9.468024390976541e-05,
"epoch": 0.53,
"step": 35075
},
{
"loss": 1.1548,
"grad_norm": 1.9907810688018799,
"learning_rate": 9.460514853864408e-05,
"epoch": 0.53,
"step": 35100
},
{
"loss": 1.1677,
"grad_norm": 1.1716595888137817,
"learning_rate": 9.453005316752276e-05,
"epoch": 0.53,
"step": 35125
},
{
"loss": 1.1908,
"grad_norm": 2.2323215007781982,
"learning_rate": 9.445495779640144e-05,
"epoch": 0.53,
"step": 35150
},
{
"loss": 1.1572,
"grad_norm": 1.4074227809906006,
"learning_rate": 9.43798624252801e-05,
"epoch": 0.53,
"step": 35175
},
{
"loss": 1.167,
"grad_norm": 4.070502281188965,
"learning_rate": 9.430476705415878e-05,
"epoch": 0.53,
"step": 35200
},
{
"loss": 1.1227,
"grad_norm": 1.3812352418899536,
"learning_rate": 9.422967168303746e-05,
"epoch": 0.53,
"step": 35225
},
{
"loss": 1.1987,
"grad_norm": 1.3674787282943726,
"learning_rate": 9.415457631191614e-05,
"epoch": 0.53,
"step": 35250
},
{
"loss": 1.1032,
"grad_norm": 1.9738848209381104,
"learning_rate": 9.407948094079482e-05,
"epoch": 0.53,
"step": 35275
},
{
"loss": 1.1589,
"grad_norm": 1.1826382875442505,
"learning_rate": 9.40043855696735e-05,
"epoch": 0.53,
"step": 35300
},
{
"loss": 1.1984,
"grad_norm": 2.014425277709961,
"learning_rate": 9.392929019855216e-05,
"epoch": 0.53,
"step": 35325
},
{
"loss": 1.1509,
"grad_norm": 1.1934667825698853,
"learning_rate": 9.385419482743084e-05,
"epoch": 0.53,
"step": 35350
},
{
"loss": 1.2054,
"grad_norm": 1.2793216705322266,
"learning_rate": 9.377909945630952e-05,
"epoch": 0.53,
"step": 35375
},
{
"loss": 1.1462,
"grad_norm": 1.46218740940094,
"learning_rate": 9.370400408518818e-05,
"epoch": 0.53,
"step": 35400
},
{
"loss": 1.09,
"grad_norm": 1.2986016273498535,
"learning_rate": 9.362890871406686e-05,
"epoch": 0.53,
"step": 35425
},
{
"loss": 1.1911,
"grad_norm": 1.3429057598114014,
"learning_rate": 9.355381334294554e-05,
"epoch": 0.53,
"step": 35450
},
{
"loss": 1.0894,
"grad_norm": 2.130441904067993,
"learning_rate": 9.347871797182422e-05,
"epoch": 0.53,
"step": 35475
},
{
"loss": 1.0808,
"grad_norm": 2.666227102279663,
"learning_rate": 9.34036226007029e-05,
"epoch": 0.53,
"step": 35500
},
{
"loss": 1.1766,
"grad_norm": 2.273437023162842,
"learning_rate": 9.332852722958158e-05,
"epoch": 0.53,
"step": 35525
},
{
"loss": 1.1705,
"grad_norm": 2.2733075618743896,
"learning_rate": 9.325343185846024e-05,
"epoch": 0.53,
"step": 35550
},
{
"loss": 1.1519,
"grad_norm": 3.6471107006073,
"learning_rate": 9.317833648733892e-05,
"epoch": 0.53,
"step": 35575
},
{
"loss": 1.1314,
"grad_norm": 1.2116317749023438,
"learning_rate": 9.31032411162176e-05,
"epoch": 0.53,
"step": 35600
},
{
"loss": 1.2151,
"grad_norm": 2.057880163192749,
"learning_rate": 9.302814574509627e-05,
"epoch": 0.54,
"step": 35625
},
{
"loss": 1.104,
"grad_norm": 1.8840137720108032,
"learning_rate": 9.295305037397495e-05,
"epoch": 0.54,
"step": 35650
},
{
"loss": 1.153,
"grad_norm": 1.324926733970642,
"learning_rate": 9.287795500285364e-05,
"epoch": 0.54,
"step": 35675
},
{
"loss": 1.1716,
"grad_norm": 1.6749731302261353,
"learning_rate": 9.28028596317323e-05,
"epoch": 0.54,
"step": 35700
},
{
"loss": 1.2032,
"grad_norm": 2.625720977783203,
"learning_rate": 9.272776426061098e-05,
"epoch": 0.54,
"step": 35725
},
{
"loss": 1.1532,
"grad_norm": 1.7075999975204468,
"learning_rate": 9.265266888948966e-05,
"epoch": 0.54,
"step": 35750
},
{
"loss": 1.099,
"grad_norm": 2.0305819511413574,
"learning_rate": 9.257757351836833e-05,
"epoch": 0.54,
"step": 35775
},
{
"loss": 1.1765,
"grad_norm": 1.1253600120544434,
"learning_rate": 9.2502478147247e-05,
"epoch": 0.54,
"step": 35800
},
{
"loss": 1.1198,
"grad_norm": 1.2533643245697021,
"learning_rate": 9.242738277612568e-05,
"epoch": 0.54,
"step": 35825
},
{
"loss": 1.1672,
"grad_norm": 2.331897735595703,
"learning_rate": 9.235228740500435e-05,
"epoch": 0.54,
"step": 35850
},
{
"loss": 1.1543,
"grad_norm": 1.556606411933899,
"learning_rate": 9.227719203388303e-05,
"epoch": 0.54,
"step": 35875
},
{
"loss": 1.143,
"grad_norm": 2.3850412368774414,
"learning_rate": 9.220209666276172e-05,
"epoch": 0.54,
"step": 35900
},
{
"loss": 1.1789,
"grad_norm": 2.364520788192749,
"learning_rate": 9.212700129164039e-05,
"epoch": 0.54,
"step": 35925
},
{
"loss": 1.1408,
"grad_norm": 1.6768343448638916,
"learning_rate": 9.205190592051907e-05,
"epoch": 0.54,
"step": 35950
},
{
"loss": 1.2382,
"grad_norm": 1.6469035148620605,
"learning_rate": 9.197681054939774e-05,
"epoch": 0.54,
"step": 35975
},
{
"loss": 1.1741,
"grad_norm": 1.1769710779190063,
"learning_rate": 9.190171517827641e-05,
"epoch": 0.54,
"step": 36000
},
{
"loss": 1.1858,
"grad_norm": 1.3249248266220093,
"learning_rate": 9.182661980715509e-05,
"epoch": 0.54,
"step": 36025
},
{
"loss": 1.1672,
"grad_norm": 1.8996431827545166,
"learning_rate": 9.175152443603377e-05,
"epoch": 0.54,
"step": 36050
},
{
"loss": 1.1392,
"grad_norm": 1.7952135801315308,
"learning_rate": 9.167642906491245e-05,
"epoch": 0.54,
"step": 36075
},
{
"loss": 1.1508,
"grad_norm": 1.642858862876892,
"learning_rate": 9.160133369379111e-05,
"epoch": 0.54,
"step": 36100
},
{
"loss": 1.1122,
"grad_norm": 1.287514090538025,
"learning_rate": 9.15262383226698e-05,
"epoch": 0.54,
"step": 36125
},
{
"loss": 1.0894,
"grad_norm": 1.7376103401184082,
"learning_rate": 9.145114295154847e-05,
"epoch": 0.54,
"step": 36150
},
{
"loss": 1.0989,
"grad_norm": 1.9539545774459839,
"learning_rate": 9.137604758042715e-05,
"epoch": 0.54,
"step": 36175
},
{
"loss": 1.2123,
"grad_norm": 1.42177414894104,
"learning_rate": 9.130095220930583e-05,
"epoch": 0.54,
"step": 36200
},
{
"loss": 1.178,
"grad_norm": 1.0218976736068726,
"learning_rate": 9.122585683818449e-05,
"epoch": 0.54,
"step": 36225
},
{
"loss": 1.1677,
"grad_norm": 2.1340959072113037,
"learning_rate": 9.115076146706317e-05,
"epoch": 0.54,
"step": 36250
},
{
"loss": 1.1543,
"grad_norm": 1.4234435558319092,
"learning_rate": 9.107866991078671e-05,
"epoch": 0.54,
"step": 36275
},
{
"loss": 1.1862,
"grad_norm": 2.7633371353149414,
"learning_rate": 9.100357453966538e-05,
"epoch": 0.55,
"step": 36300
},
{
"loss": 1.1253,
"grad_norm": 1.0972270965576172,
"learning_rate": 9.092847916854406e-05,
"epoch": 0.55,
"step": 36325
},
{
"loss": 1.1722,
"grad_norm": 1.9171634912490845,
"learning_rate": 9.085338379742274e-05,
"epoch": 0.55,
"step": 36350
},
{
"loss": 1.2198,
"grad_norm": 1.712023138999939,
"learning_rate": 9.07782884263014e-05,
"epoch": 0.55,
"step": 36375
},
{
"loss": 1.1141,
"grad_norm": 1.8943400382995605,
"learning_rate": 9.070319305518008e-05,
"epoch": 0.55,
"step": 36400
},
{
"loss": 1.14,
"grad_norm": 2.2785110473632812,
"learning_rate": 9.062809768405876e-05,
"epoch": 0.55,
"step": 36425
},
{
"loss": 1.1121,
"grad_norm": 1.8687163591384888,
"learning_rate": 9.055300231293742e-05,
"epoch": 0.55,
"step": 36450
},
{
"loss": 1.1212,
"grad_norm": 1.2206839323043823,
"learning_rate": 9.047790694181612e-05,
"epoch": 0.55,
"step": 36475
},
{
"loss": 1.1483,
"grad_norm": 1.451223611831665,
"learning_rate": 9.04028115706948e-05,
"epoch": 0.55,
"step": 36500
},
{
"loss": 1.1291,
"grad_norm": 1.4097929000854492,
"learning_rate": 9.032771619957346e-05,
"epoch": 0.55,
"step": 36525
},
{
"loss": 1.212,
"grad_norm": 1.2845067977905273,
"learning_rate": 9.025262082845214e-05,
"epoch": 0.55,
"step": 36550
},
{
"loss": 1.1501,
"grad_norm": 1.6853928565979004,
"learning_rate": 9.017752545733082e-05,
"epoch": 0.55,
"step": 36575
},
{
"loss": 1.1752,
"grad_norm": 1.6147632598876953,
"learning_rate": 9.010243008620948e-05,
"epoch": 0.55,
"step": 36600
},
{
"loss": 1.0684,
"grad_norm": 1.1251403093338013,
"learning_rate": 9.002733471508816e-05,
"epoch": 0.55,
"step": 36625
},
{
"loss": 1.182,
"grad_norm": 2.0793652534484863,
"learning_rate": 8.995223934396684e-05,
"epoch": 0.55,
"step": 36650
},
{
"loss": 1.1353,
"grad_norm": 1.7693026065826416,
"learning_rate": 8.987714397284552e-05,
"epoch": 0.55,
"step": 36675
},
{
"loss": 1.1888,
"grad_norm": 2.8078482151031494,
"learning_rate": 8.98020486017242e-05,
"epoch": 0.55,
"step": 36700
},
{
"loss": 1.1771,
"grad_norm": 1.857947587966919,
"learning_rate": 8.972695323060288e-05,
"epoch": 0.55,
"step": 36725
},
{
"loss": 1.1906,
"grad_norm": 1.8160065412521362,
"learning_rate": 8.965185785948154e-05,
"epoch": 0.55,
"step": 36750
},
{
"loss": 1.2188,
"grad_norm": 1.113638997077942,
"learning_rate": 8.957676248836022e-05,
"epoch": 0.55,
"step": 36775
},
{
"loss": 1.1625,
"grad_norm": 2.105451822280884,
"learning_rate": 8.95016671172389e-05,
"epoch": 0.55,
"step": 36800
},
{
"loss": 1.1613,
"grad_norm": 2.07865309715271,
"learning_rate": 8.942657174611757e-05,
"epoch": 0.55,
"step": 36825
},
{
"loss": 1.1396,
"grad_norm": 3.033181667327881,
"learning_rate": 8.935147637499625e-05,
"epoch": 0.55,
"step": 36850
},
{
"loss": 1.1236,
"grad_norm": 1.4927865266799927,
"learning_rate": 8.927638100387492e-05,
"epoch": 0.55,
"step": 36875
},
{
"loss": 1.218,
"grad_norm": 1.5946248769760132,
"learning_rate": 8.92012856327536e-05,
"epoch": 0.55,
"step": 36900
},
{
"loss": 1.1734,
"grad_norm": 2.349677562713623,
"learning_rate": 8.912619026163228e-05,
"epoch": 0.55,
"step": 36925
},
{
"loss": 1.1605,
"grad_norm": 2.3983821868896484,
"learning_rate": 8.905109489051096e-05,
"epoch": 0.55,
"step": 36950
},
{
"loss": 1.1976,
"grad_norm": 2.228635787963867,
"learning_rate": 8.897599951938963e-05,
"epoch": 0.56,
"step": 36975
},
{
"loss": 1.1316,
"grad_norm": 1.0619374513626099,
"learning_rate": 8.89009041482683e-05,
"epoch": 0.56,
"step": 37000
},
{
"loss": 1.0945,
"grad_norm": 2.490835428237915,
"learning_rate": 8.882580877714698e-05,
"epoch": 0.56,
"step": 37025
},
{
"loss": 1.1367,
"grad_norm": 1.3563780784606934,
"learning_rate": 8.875071340602565e-05,
"epoch": 0.56,
"step": 37050
},
{
"loss": 1.1431,
"grad_norm": 2.2259418964385986,
"learning_rate": 8.867561803490433e-05,
"epoch": 0.56,
"step": 37075
},
{
"loss": 1.1641,
"grad_norm": 1.6267977952957153,
"learning_rate": 8.860052266378301e-05,
"epoch": 0.56,
"step": 37100
},
{
"loss": 1.1515,
"grad_norm": 1.1950840950012207,
"learning_rate": 8.852542729266169e-05,
"epoch": 0.56,
"step": 37125
},
{
"loss": 1.1531,
"grad_norm": 2.104607343673706,
"learning_rate": 8.845033192154036e-05,
"epoch": 0.56,
"step": 37150
},
{
"loss": 1.1484,
"grad_norm": 1.70024573802948,
"learning_rate": 8.837523655041904e-05,
"epoch": 0.56,
"step": 37175
},
{
"loss": 1.1501,
"grad_norm": 1.2371578216552734,
"learning_rate": 8.830014117929771e-05,
"epoch": 0.56,
"step": 37200
},
{
"loss": 1.186,
"grad_norm": 0.9835503101348877,
"learning_rate": 8.822504580817639e-05,
"epoch": 0.56,
"step": 37225
},
{
"loss": 1.2074,
"grad_norm": 1.633745551109314,
"learning_rate": 8.814995043705507e-05,
"epoch": 0.56,
"step": 37250
},
{
"loss": 1.0625,
"grad_norm": 0.9619184732437134,
"learning_rate": 8.807485506593373e-05,
"epoch": 0.56,
"step": 37275
},
{
"loss": 1.1741,
"grad_norm": 2.740689516067505,
"learning_rate": 8.799975969481241e-05,
"epoch": 0.56,
"step": 37300
},
{
"loss": 1.1879,
"grad_norm": 1.7593574523925781,
"learning_rate": 8.79246643236911e-05,
"epoch": 0.56,
"step": 37325
},
{
"loss": 1.1428,
"grad_norm": 1.7317709922790527,
"learning_rate": 8.784956895256977e-05,
"epoch": 0.56,
"step": 37350
},
{
"loss": 1.178,
"grad_norm": 2.689879894256592,
"learning_rate": 8.777447358144845e-05,
"epoch": 0.56,
"step": 37375
},
{
"loss": 1.1386,
"grad_norm": 1.3778091669082642,
"learning_rate": 8.769937821032713e-05,
"epoch": 0.56,
"step": 37400
},
{
"loss": 1.1171,
"grad_norm": 1.4201562404632568,
"learning_rate": 8.762428283920579e-05,
"epoch": 0.56,
"step": 37425
},
{
"loss": 1.1575,
"grad_norm": 1.2890523672103882,
"learning_rate": 8.754918746808447e-05,
"epoch": 0.56,
"step": 37450
},
{
"loss": 1.1488,
"grad_norm": 1.5881069898605347,
"learning_rate": 8.747409209696315e-05,
"epoch": 0.56,
"step": 37475
},
{
"loss": 1.1466,
"grad_norm": 2.2226951122283936,
"learning_rate": 8.739899672584181e-05,
"epoch": 0.56,
"step": 37500
},
{
"loss": 1.2388,
"grad_norm": 1.865116000175476,
"learning_rate": 8.73239013547205e-05,
"epoch": 0.56,
"step": 37525
},
{
"loss": 1.1089,
"grad_norm": 1.7630786895751953,
"learning_rate": 8.724880598359919e-05,
"epoch": 0.56,
"step": 37550
},
{
"loss": 1.1414,
"grad_norm": 1.465029001235962,
"learning_rate": 8.717371061247785e-05,
"epoch": 0.56,
"step": 37575
},
{
"loss": 1.1533,
"grad_norm": 1.2078875303268433,
"learning_rate": 8.709861524135653e-05,
"epoch": 0.56,
"step": 37600
},
{
"loss": 1.1931,
"grad_norm": 1.6650409698486328,
"learning_rate": 8.702351987023521e-05,
"epoch": 0.57,
"step": 37625
},
{
"loss": 1.2402,
"grad_norm": 2.3124303817749023,
"learning_rate": 8.694842449911387e-05,
"epoch": 0.57,
"step": 37650
},
{
"loss": 1.1566,
"grad_norm": 1.6543500423431396,
"learning_rate": 8.687332912799255e-05,
"epoch": 0.57,
"step": 37675
},
{
"loss": 1.1466,
"grad_norm": 1.0098353624343872,
"learning_rate": 8.680123757171608e-05,
"epoch": 0.57,
"step": 37700
},
{
"loss": 1.1809,
"grad_norm": 4.323051929473877,
"learning_rate": 8.672614220059476e-05,
"epoch": 0.57,
"step": 37725
},
{
"loss": 1.1547,
"grad_norm": 1.610032081604004,
"learning_rate": 8.665104682947344e-05,
"epoch": 0.57,
"step": 37750
},
{
"loss": 1.1416,
"grad_norm": 2.1819934844970703,
"learning_rate": 8.65759514583521e-05,
"epoch": 0.57,
"step": 37775
},
{
"loss": 1.1733,
"grad_norm": 1.3869298696517944,
"learning_rate": 8.650085608723078e-05,
"epoch": 0.57,
"step": 37800
},
{
"loss": 1.1933,
"grad_norm": 4.251647472381592,
"learning_rate": 8.642576071610946e-05,
"epoch": 0.57,
"step": 37825
},
{
"loss": 1.1496,
"grad_norm": 1.59364914894104,
"learning_rate": 8.635066534498814e-05,
"epoch": 0.57,
"step": 37850
},
{
"loss": 1.1961,
"grad_norm": 1.4821110963821411,
"learning_rate": 8.62755699738668e-05,
"epoch": 0.57,
"step": 37875
},
{
"loss": 1.1613,
"grad_norm": 1.1281379461288452,
"learning_rate": 8.62004746027455e-05,
"epoch": 0.57,
"step": 37900
},
{
"loss": 1.1104,
"grad_norm": 1.1649848222732544,
"learning_rate": 8.612537923162416e-05,
"epoch": 0.57,
"step": 37925
},
{
"loss": 1.1502,
"grad_norm": 1.2780572175979614,
"learning_rate": 8.605028386050284e-05,
"epoch": 0.57,
"step": 37950
},
{
"loss": 1.1132,
"grad_norm": 0.8712659478187561,
"learning_rate": 8.597518848938152e-05,
"epoch": 0.57,
"step": 37975
},
{
"loss": 1.1839,
"grad_norm": 1.912044882774353,
"learning_rate": 8.59000931182602e-05,
"epoch": 0.57,
"step": 38000
},
{
"loss": 1.2002,
"grad_norm": 1.3458526134490967,
"learning_rate": 8.582499774713887e-05,
"epoch": 0.57,
"step": 38025
},
{
"loss": 1.0894,
"grad_norm": 0.9828363060951233,
"learning_rate": 8.574990237601755e-05,
"epoch": 0.57,
"step": 38050
},
{
"loss": 1.1063,
"grad_norm": 1.625246286392212,
"learning_rate": 8.567480700489622e-05,
"epoch": 0.57,
"step": 38075
},
{
"loss": 1.1812,
"grad_norm": 2.1737546920776367,
"learning_rate": 8.559971163377489e-05,
"epoch": 0.57,
"step": 38100
},
{
"loss": 1.2085,
"grad_norm": 2.304011583328247,
"learning_rate": 8.552461626265358e-05,
"epoch": 0.57,
"step": 38125
},
{
"loss": 1.2123,
"grad_norm": 2.7804811000823975,
"learning_rate": 8.544952089153225e-05,
"epoch": 0.57,
"step": 38150
},
{
"loss": 1.0834,
"grad_norm": 1.5996559858322144,
"learning_rate": 8.537442552041093e-05,
"epoch": 0.57,
"step": 38175
},
{
"loss": 1.1438,
"grad_norm": 2.293555736541748,
"learning_rate": 8.52993301492896e-05,
"epoch": 0.57,
"step": 38200
},
{
"loss": 1.1268,
"grad_norm": 3.2344138622283936,
"learning_rate": 8.522423477816828e-05,
"epoch": 0.57,
"step": 38225
},
{
"loss": 1.1241,
"grad_norm": 1.3770357370376587,
"learning_rate": 8.514913940704695e-05,
"epoch": 0.57,
"step": 38250
},
{
"loss": 1.1562,
"grad_norm": 1.8360143899917603,
"learning_rate": 8.507404403592563e-05,
"epoch": 0.57,
"step": 38275
},
{
"loss": 1.0981,
"grad_norm": 1.4072625637054443,
"learning_rate": 8.49989486648043e-05,
"epoch": 0.58,
"step": 38300
},
{
"loss": 1.1859,
"grad_norm": 1.3473397493362427,
"learning_rate": 8.492385329368299e-05,
"epoch": 0.58,
"step": 38325
},
{
"loss": 1.1482,
"grad_norm": 1.2026944160461426,
"learning_rate": 8.484875792256166e-05,
"epoch": 0.58,
"step": 38350
},
{
"loss": 1.1669,
"grad_norm": 1.541365146636963,
"learning_rate": 8.477366255144033e-05,
"epoch": 0.58,
"step": 38375
},
{
"loss": 1.1388,
"grad_norm": 1.4856247901916504,
"learning_rate": 8.469856718031901e-05,
"epoch": 0.58,
"step": 38400
},
{
"loss": 1.1062,
"grad_norm": 1.3066248893737793,
"learning_rate": 8.462347180919769e-05,
"epoch": 0.58,
"step": 38425
},
{
"loss": 1.2007,
"grad_norm": 1.4467949867248535,
"learning_rate": 8.454837643807637e-05,
"epoch": 0.58,
"step": 38450
},
{
"loss": 1.1274,
"grad_norm": 1.0385124683380127,
"learning_rate": 8.447328106695503e-05,
"epoch": 0.58,
"step": 38475
},
{
"loss": 1.1398,
"grad_norm": 1.994707465171814,
"learning_rate": 8.439818569583371e-05,
"epoch": 0.58,
"step": 38500
},
{
"loss": 1.1434,
"grad_norm": 1.9492372274398804,
"learning_rate": 8.432309032471239e-05,
"epoch": 0.58,
"step": 38525
},
{
"loss": 1.1526,
"grad_norm": 1.5071452856063843,
"learning_rate": 8.424799495359107e-05,
"epoch": 0.58,
"step": 38550
},
{
"loss": 1.2361,
"grad_norm": 1.9257084131240845,
"learning_rate": 8.417289958246975e-05,
"epoch": 0.58,
"step": 38575
},
{
"loss": 1.1397,
"grad_norm": 1.549302101135254,
"learning_rate": 8.409780421134841e-05,
"epoch": 0.58,
"step": 38600
},
{
"loss": 1.0881,
"grad_norm": 1.1353474855422974,
"learning_rate": 8.402270884022709e-05,
"epoch": 0.58,
"step": 38625
},
{
"loss": 1.1171,
"grad_norm": 1.092421293258667,
"learning_rate": 8.394761346910577e-05,
"epoch": 0.58,
"step": 38650
},
{
"loss": 1.1855,
"grad_norm": 2.2004194259643555,
"learning_rate": 8.387251809798445e-05,
"epoch": 0.58,
"step": 38675
},
{
"loss": 1.1005,
"grad_norm": 1.2563297748565674,
"learning_rate": 8.379742272686311e-05,
"epoch": 0.58,
"step": 38700
},
{
"loss": 1.1269,
"grad_norm": 1.0468568801879883,
"learning_rate": 8.37223273557418e-05,
"epoch": 0.58,
"step": 38725
},
{
"loss": 1.1281,
"grad_norm": 1.2064344882965088,
"learning_rate": 8.364723198462047e-05,
"epoch": 0.58,
"step": 38750
},
{
"loss": 1.1261,
"grad_norm": 1.8252434730529785,
"learning_rate": 8.357213661349915e-05,
"epoch": 0.58,
"step": 38775
},
{
"loss": 1.1461,
"grad_norm": 2.017496109008789,
"learning_rate": 8.349704124237783e-05,
"epoch": 0.58,
"step": 38800
},
{
"loss": 1.1649,
"grad_norm": 2.0913796424865723,
"learning_rate": 8.342494968610134e-05,
"epoch": 0.58,
"step": 38825
},
{
"loss": 1.109,
"grad_norm": 1.446608304977417,
"learning_rate": 8.334985431498002e-05,
"epoch": 0.58,
"step": 38850
},
{
"loss": 1.1402,
"grad_norm": 1.3379923105239868,
"learning_rate": 8.32747589438587e-05,
"epoch": 0.58,
"step": 38875
},
{
"loss": 1.151,
"grad_norm": 1.2311291694641113,
"learning_rate": 8.319966357273738e-05,
"epoch": 0.58,
"step": 38900
},
{
"loss": 1.0964,
"grad_norm": 1.5600236654281616,
"learning_rate": 8.312456820161606e-05,
"epoch": 0.58,
"step": 38925
},
{
"loss": 1.2035,
"grad_norm": 1.953704833984375,
"learning_rate": 8.304947283049474e-05,
"epoch": 0.58,
"step": 38950
},
{
"loss": 1.1308,
"grad_norm": 2.2543299198150635,
"learning_rate": 8.29743774593734e-05,
"epoch": 0.59,
"step": 38975
},
{
"loss": 1.1265,
"grad_norm": 1.1865688562393188,
"learning_rate": 8.289928208825208e-05,
"epoch": 0.59,
"step": 39000
},
{
"loss": 1.1292,
"grad_norm": 5.55220890045166,
"learning_rate": 8.282418671713076e-05,
"epoch": 0.59,
"step": 39025
},
{
"loss": 1.1661,
"grad_norm": 4.146172523498535,
"learning_rate": 8.274909134600943e-05,
"epoch": 0.59,
"step": 39050
},
{
"loss": 1.0739,
"grad_norm": 1.3395634889602661,
"learning_rate": 8.26739959748881e-05,
"epoch": 0.59,
"step": 39075
},
{
"loss": 1.1592,
"grad_norm": 1.3389477729797363,
"learning_rate": 8.259890060376679e-05,
"epoch": 0.59,
"step": 39100
},
{
"loss": 1.2078,
"grad_norm": 1.6884431838989258,
"learning_rate": 8.252380523264546e-05,
"epoch": 0.59,
"step": 39125
},
{
"loss": 1.1245,
"grad_norm": 2.1312379837036133,
"learning_rate": 8.244870986152414e-05,
"epoch": 0.59,
"step": 39150
},
{
"loss": 1.1295,
"grad_norm": 1.0998674631118774,
"learning_rate": 8.237361449040282e-05,
"epoch": 0.59,
"step": 39175
},
{
"loss": 1.0666,
"grad_norm": 1.3891421556472778,
"learning_rate": 8.229851911928149e-05,
"epoch": 0.59,
"step": 39200
},
{
"loss": 1.2052,
"grad_norm": 1.069043755531311,
"learning_rate": 8.222342374816017e-05,
"epoch": 0.59,
"step": 39225
},
{
"loss": 1.1251,
"grad_norm": 2.247673273086548,
"learning_rate": 8.214832837703884e-05,
"epoch": 0.59,
"step": 39250
},
{
"loss": 1.124,
"grad_norm": 1.6093597412109375,
"learning_rate": 8.207323300591751e-05,
"epoch": 0.59,
"step": 39275
},
{
"loss": 1.0996,
"grad_norm": 2.3855221271514893,
"learning_rate": 8.199813763479619e-05,
"epoch": 0.59,
"step": 39300
},
{
"loss": 1.1916,
"grad_norm": 2.479825258255005,
"learning_rate": 8.192304226367488e-05,
"epoch": 0.59,
"step": 39325
},
{
"loss": 1.1752,
"grad_norm": 1.302335262298584,
"learning_rate": 8.184794689255355e-05,
"epoch": 0.59,
"step": 39350
},
{
"loss": 1.0752,
"grad_norm": 1.7336974143981934,
"learning_rate": 8.177285152143223e-05,
"epoch": 0.59,
"step": 39375
},
{
"loss": 1.1491,
"grad_norm": 1.7334376573562622,
"learning_rate": 8.16977561503109e-05,
"epoch": 0.59,
"step": 39400
},
{
"loss": 1.1288,
"grad_norm": 1.5370564460754395,
"learning_rate": 8.162266077918957e-05,
"epoch": 0.59,
"step": 39425
},
{
"loss": 1.1544,
"grad_norm": 2.190082550048828,
"learning_rate": 8.154756540806825e-05,
"epoch": 0.59,
"step": 39450
},
{
"loss": 1.1265,
"grad_norm": 1.2925649881362915,
"learning_rate": 8.147247003694693e-05,
"epoch": 0.59,
"step": 39475
},
{
"loss": 1.1175,
"grad_norm": 1.5995088815689087,
"learning_rate": 8.139737466582559e-05,
"epoch": 0.59,
"step": 39500
},
{
"loss": 1.1401,
"grad_norm": 2.018927812576294,
"learning_rate": 8.132227929470427e-05,
"epoch": 0.59,
"step": 39525
},
{
"loss": 1.145,
"grad_norm": 2.101435661315918,
"learning_rate": 8.124718392358296e-05,
"epoch": 0.59,
"step": 39550
},
{
"loss": 1.2138,
"grad_norm": 1.0594968795776367,
"learning_rate": 8.117208855246163e-05,
"epoch": 0.59,
"step": 39575
},
{
"loss": 1.1428,
"grad_norm": 1.3600685596466064,
"learning_rate": 8.109699318134031e-05,
"epoch": 0.59,
"step": 39600
},
{
"loss": 1.1717,
"grad_norm": 1.4228684902191162,
"learning_rate": 8.102189781021899e-05,
"epoch": 0.6,
"step": 39625
},
{
"loss": 1.1585,
"grad_norm": 1.1526142358779907,
"learning_rate": 8.094680243909765e-05,
"epoch": 0.6,
"step": 39650
},
{
"loss": 1.182,
"grad_norm": 1.6106703281402588,
"learning_rate": 8.087170706797633e-05,
"epoch": 0.6,
"step": 39675
},
{
"loss": 1.0861,
"grad_norm": 1.4839822053909302,
"learning_rate": 8.079661169685501e-05,
"epoch": 0.6,
"step": 39700
},
{
"loss": 1.1069,
"grad_norm": 1.2879067659378052,
"learning_rate": 8.072151632573368e-05,
"epoch": 0.6,
"step": 39725
},
{
"loss": 1.0987,
"grad_norm": 1.0558810234069824,
"learning_rate": 8.064642095461235e-05,
"epoch": 0.6,
"step": 39750
},
{
"loss": 1.1328,
"grad_norm": 1.1778608560562134,
"learning_rate": 8.057132558349105e-05,
"epoch": 0.6,
"step": 39775
},
{
"loss": 1.1309,
"grad_norm": 6.927417755126953,
"learning_rate": 8.049623021236971e-05,
"epoch": 0.6,
"step": 39800
},
{
"loss": 1.0943,
"grad_norm": 2.03812837600708,
"learning_rate": 8.042113484124839e-05,
"epoch": 0.6,
"step": 39825
},
{
"loss": 1.1745,
"grad_norm": 1.8612788915634155,
"learning_rate": 8.034603947012707e-05,
"epoch": 0.6,
"step": 39850
},
{
"loss": 1.1406,
"grad_norm": 1.7891243696212769,
"learning_rate": 8.027094409900574e-05,
"epoch": 0.6,
"step": 39875
},
{
"loss": 1.1637,
"grad_norm": 1.3130542039871216,
"learning_rate": 8.019584872788441e-05,
"epoch": 0.6,
"step": 39900
},
{
"loss": 1.161,
"grad_norm": 1.4459270238876343,
"learning_rate": 8.012075335676309e-05,
"epoch": 0.6,
"step": 39925
},
{
"loss": 1.0771,
"grad_norm": 1.576799988746643,
"learning_rate": 8.004565798564176e-05,
"epoch": 0.6,
"step": 39950
},
{
"loss": 1.1488,
"grad_norm": 1.5947468280792236,
"learning_rate": 7.997056261452045e-05,
"epoch": 0.6,
"step": 39975
},
{
"loss": 1.082,
"grad_norm": 1.6659477949142456,
"learning_rate": 7.989546724339913e-05,
"epoch": 0.6,
"step": 40000
},
{
"loss": 1.1407,
"grad_norm": 5.243545055389404,
"learning_rate": 7.98203718722778e-05,
"epoch": 0.6,
"step": 40025
},
{
"loss": 1.0316,
"grad_norm": 1.1737557649612427,
"learning_rate": 7.974527650115647e-05,
"epoch": 0.6,
"step": 40050
},
{
"loss": 1.1271,
"grad_norm": 1.7314780950546265,
"learning_rate": 7.967018113003515e-05,
"epoch": 0.6,
"step": 40075
},
{
"loss": 1.1789,
"grad_norm": 2.0266127586364746,
"learning_rate": 7.959508575891382e-05,
"epoch": 0.6,
"step": 40100
},
{
"loss": 1.1694,
"grad_norm": 1.2344083786010742,
"learning_rate": 7.95199903877925e-05,
"epoch": 0.6,
"step": 40125
},
{
"loss": 1.2533,
"grad_norm": 1.9182096719741821,
"learning_rate": 7.944489501667118e-05,
"epoch": 0.6,
"step": 40150
},
{
"loss": 1.1746,
"grad_norm": 1.2988171577453613,
"learning_rate": 7.936979964554984e-05,
"epoch": 0.6,
"step": 40175
},
{
"loss": 1.1278,
"grad_norm": 1.847265601158142,
"learning_rate": 7.929470427442853e-05,
"epoch": 0.6,
"step": 40200
},
{
"loss": 1.1505,
"grad_norm": 1.2763010263442993,
"learning_rate": 7.921960890330721e-05,
"epoch": 0.6,
"step": 40225
},
{
"loss": 1.1054,
"grad_norm": 1.8603181838989258,
"learning_rate": 7.914451353218588e-05,
"epoch": 0.6,
"step": 40250
},
{
"loss": 1.141,
"grad_norm": 1.320823073387146,
"learning_rate": 7.906941816106456e-05,
"epoch": 0.6,
"step": 40275
},
{
"loss": 1.1459,
"grad_norm": 1.6374232769012451,
"learning_rate": 7.899432278994324e-05,
"epoch": 0.61,
"step": 40300
},
{
"loss": 1.1029,
"grad_norm": 2.58848237991333,
"learning_rate": 7.89192274188219e-05,
"epoch": 0.61,
"step": 40325
},
{
"loss": 1.0986,
"grad_norm": 2.0660908222198486,
"learning_rate": 7.884413204770058e-05,
"epoch": 0.61,
"step": 40350
},
{
"loss": 1.1133,
"grad_norm": 1.2920788526535034,
"learning_rate": 7.876903667657926e-05,
"epoch": 0.61,
"step": 40375
},
{
"loss": 1.1248,
"grad_norm": 1.9770618677139282,
"learning_rate": 7.869394130545792e-05,
"epoch": 0.61,
"step": 40400
},
{
"loss": 1.1426,
"grad_norm": 1.2287840843200684,
"learning_rate": 7.861884593433662e-05,
"epoch": 0.61,
"step": 40425
},
{
"loss": 1.13,
"grad_norm": 1.4473248720169067,
"learning_rate": 7.85437505632153e-05,
"epoch": 0.61,
"step": 40450
},
{
"loss": 1.1542,
"grad_norm": 1.8491151332855225,
"learning_rate": 7.846865519209396e-05,
"epoch": 0.61,
"step": 40475
},
{
"loss": 1.1202,
"grad_norm": 1.6479681730270386,
"learning_rate": 7.839355982097264e-05,
"epoch": 0.61,
"step": 40500
},
{
"loss": 1.1155,
"grad_norm": 1.355073094367981,
"learning_rate": 7.831846444985132e-05,
"epoch": 0.61,
"step": 40525
},
{
"loss": 1.0916,
"grad_norm": 2.459308385848999,
"learning_rate": 7.824336907872998e-05,
"epoch": 0.61,
"step": 40550
},
{
"loss": 1.0323,
"grad_norm": 2.234042167663574,
"learning_rate": 7.816827370760866e-05,
"epoch": 0.61,
"step": 40575
},
{
"loss": 1.082,
"grad_norm": 1.3024649620056152,
"learning_rate": 7.809317833648734e-05,
"epoch": 0.61,
"step": 40600
},
{
"loss": 1.1336,
"grad_norm": 1.6423088312149048,
"learning_rate": 7.801808296536602e-05,
"epoch": 0.61,
"step": 40625
},
{
"loss": 1.1655,
"grad_norm": 1.7982234954833984,
"learning_rate": 7.79429875942447e-05,
"epoch": 0.61,
"step": 40650
},
{
"loss": 1.1202,
"grad_norm": 1.7683314085006714,
"learning_rate": 7.786789222312338e-05,
"epoch": 0.61,
"step": 40675
},
{
"loss": 1.148,
"grad_norm": 1.1973870992660522,
"learning_rate": 7.779279685200204e-05,
"epoch": 0.61,
"step": 40700
},
{
"loss": 1.1323,
"grad_norm": 1.366228461265564,
"learning_rate": 7.771770148088072e-05,
"epoch": 0.61,
"step": 40725
},
{
"loss": 1.1777,
"grad_norm": 1.8791155815124512,
"learning_rate": 7.76426061097594e-05,
"epoch": 0.61,
"step": 40750
},
{
"loss": 1.2012,
"grad_norm": 1.5715335607528687,
"learning_rate": 7.756751073863807e-05,
"epoch": 0.61,
"step": 40775
},
{
"loss": 1.1607,
"grad_norm": 2.0917367935180664,
"learning_rate": 7.749241536751674e-05,
"epoch": 0.61,
"step": 40800
},
{
"loss": 1.1775,
"grad_norm": 2.049710988998413,
"learning_rate": 7.741731999639542e-05,
"epoch": 0.61,
"step": 40825
},
{
"loss": 1.171,
"grad_norm": 1.9413490295410156,
"learning_rate": 7.73422246252741e-05,
"epoch": 0.61,
"step": 40850
},
{
"loss": 1.147,
"grad_norm": 1.760611891746521,
"learning_rate": 7.726712925415278e-05,
"epoch": 0.61,
"step": 40875
},
{
"loss": 1.0593,
"grad_norm": 1.515251874923706,
"learning_rate": 7.719203388303146e-05,
"epoch": 0.61,
"step": 40900
},
{
"loss": 1.1917,
"grad_norm": 1.9760046005249023,
"learning_rate": 7.711693851191013e-05,
"epoch": 0.61,
"step": 40925
},
{
"loss": 1.1901,
"grad_norm": 2.0077996253967285,
"learning_rate": 7.70418431407888e-05,
"epoch": 0.62,
"step": 40950
},
{
"loss": 1.1352,
"grad_norm": 1.6817588806152344,
"learning_rate": 7.696674776966748e-05,
"epoch": 0.62,
"step": 40975
},
{
"loss": 1.1396,
"grad_norm": 1.1577296257019043,
"learning_rate": 7.689165239854615e-05,
"epoch": 0.62,
"step": 41000
},
{
"loss": 1.1017,
"grad_norm": 1.2814109325408936,
"learning_rate": 7.681655702742483e-05,
"epoch": 0.62,
"step": 41025
},
{
"loss": 1.1205,
"grad_norm": 2.1550331115722656,
"learning_rate": 7.67414616563035e-05,
"epoch": 0.62,
"step": 41050
},
{
"loss": 1.1016,
"grad_norm": 0.8784595131874084,
"learning_rate": 7.666636628518219e-05,
"epoch": 0.62,
"step": 41075
},
{
"loss": 1.2382,
"grad_norm": 1.159589171409607,
"learning_rate": 7.659127091406086e-05,
"epoch": 0.62,
"step": 41100
},
{
"loss": 1.2277,
"grad_norm": 2.137002468109131,
"learning_rate": 7.651617554293954e-05,
"epoch": 0.62,
"step": 41125
},
{
"loss": 1.2025,
"grad_norm": 1.5546560287475586,
"learning_rate": 7.644108017181821e-05,
"epoch": 0.62,
"step": 41150
},
{
"loss": 1.1419,
"grad_norm": 1.3389211893081665,
"learning_rate": 7.636598480069689e-05,
"epoch": 0.62,
"step": 41175
},
{
"loss": 1.1326,
"grad_norm": 1.7950804233551025,
"learning_rate": 7.629088942957557e-05,
"epoch": 0.62,
"step": 41200
},
{
"loss": 1.1444,
"grad_norm": 6.291619777679443,
"learning_rate": 7.621579405845423e-05,
"epoch": 0.62,
"step": 41225
},
{
"loss": 1.1504,
"grad_norm": 1.4547916650772095,
"learning_rate": 7.614069868733291e-05,
"epoch": 0.62,
"step": 41250
},
{
"loss": 1.2186,
"grad_norm": 1.098617672920227,
"learning_rate": 7.60656033162116e-05,
"epoch": 0.62,
"step": 41275
},
{
"loss": 1.1272,
"grad_norm": 2.3291661739349365,
"learning_rate": 7.599050794509027e-05,
"epoch": 0.62,
"step": 41300
},
{
"loss": 1.118,
"grad_norm": 2.5848639011383057,
"learning_rate": 7.591541257396895e-05,
"epoch": 0.62,
"step": 41325
},
{
"loss": 1.0754,
"grad_norm": 1.1806175708770752,
"learning_rate": 7.584031720284763e-05,
"epoch": 0.62,
"step": 41350
},
{
"loss": 1.1162,
"grad_norm": 2.0705809593200684,
"learning_rate": 7.576522183172629e-05,
"epoch": 0.62,
"step": 41375
},
{
"loss": 1.1894,
"grad_norm": 2.1457672119140625,
"learning_rate": 7.569012646060497e-05,
"epoch": 0.62,
"step": 41400
},
{
"loss": 1.1474,
"grad_norm": 2.5769121646881104,
"learning_rate": 7.561503108948365e-05,
"epoch": 0.62,
"step": 41425
},
{
"loss": 1.1491,
"grad_norm": 1.2455226182937622,
"learning_rate": 7.553993571836231e-05,
"epoch": 0.62,
"step": 41450
},
{
"loss": 1.1613,
"grad_norm": 1.5082957744598389,
"learning_rate": 7.546484034724099e-05,
"epoch": 0.62,
"step": 41475
},
{
"loss": 1.132,
"grad_norm": 1.4825623035430908,
"learning_rate": 7.538974497611969e-05,
"epoch": 0.62,
"step": 41500
},
{
"loss": 1.132,
"grad_norm": 1.490708589553833,
"learning_rate": 7.531464960499835e-05,
"epoch": 0.62,
"step": 41525
},
{
"loss": 1.1115,
"grad_norm": 1.2338794469833374,
"learning_rate": 7.523955423387703e-05,
"epoch": 0.62,
"step": 41550
},
{
"loss": 1.0703,
"grad_norm": 2.4002695083618164,
"learning_rate": 7.516445886275571e-05,
"epoch": 0.62,
"step": 41575
},
{
"loss": 1.1811,
"grad_norm": 1.6812978982925415,
"learning_rate": 7.508936349163437e-05,
"epoch": 0.62,
"step": 41600
},
{
"loss": 1.1221,
"grad_norm": 1.1364158391952515,
"learning_rate": 7.501426812051305e-05,
"epoch": 0.63,
"step": 41625
},
{
"loss": 1.1513,
"grad_norm": 1.2960102558135986,
"learning_rate": 7.493917274939173e-05,
"epoch": 0.63,
"step": 41650
},
{
"loss": 1.1192,
"grad_norm": 1.6246578693389893,
"learning_rate": 7.48640773782704e-05,
"epoch": 0.63,
"step": 41675
},
{
"loss": 1.1717,
"grad_norm": 3.2029411792755127,
"learning_rate": 7.478898200714908e-05,
"epoch": 0.63,
"step": 41700
},
{
"loss": 1.1804,
"grad_norm": 1.4046978950500488,
"learning_rate": 7.471388663602777e-05,
"epoch": 0.63,
"step": 41725
},
{
"loss": 1.075,
"grad_norm": 1.478977918624878,
"learning_rate": 7.463879126490643e-05,
"epoch": 0.63,
"step": 41750
},
{
"loss": 1.1016,
"grad_norm": 1.5317085981369019,
"learning_rate": 7.456369589378511e-05,
"epoch": 0.63,
"step": 41775
},
{
"loss": 1.124,
"grad_norm": 1.253780484199524,
"learning_rate": 7.448860052266379e-05,
"epoch": 0.63,
"step": 41800
},
{
"loss": 1.1664,
"grad_norm": 1.669776439666748,
"learning_rate": 7.441350515154246e-05,
"epoch": 0.63,
"step": 41825
},
{
"loss": 1.2216,
"grad_norm": 1.3725230693817139,
"learning_rate": 7.433840978042114e-05,
"epoch": 0.63,
"step": 41850
},
{
"loss": 1.1648,
"grad_norm": 1.3599203824996948,
"learning_rate": 7.426331440929981e-05,
"epoch": 0.63,
"step": 41875
},
{
"loss": 1.1791,
"grad_norm": 0.8592632412910461,
"learning_rate": 7.418821903817848e-05,
"epoch": 0.63,
"step": 41900
},
{
"loss": 1.0981,
"grad_norm": 2.0452094078063965,
"learning_rate": 7.411312366705717e-05,
"epoch": 0.63,
"step": 41925
},
{
"loss": 1.1431,
"grad_norm": 2.367089033126831,
"learning_rate": 7.403802829593585e-05,
"epoch": 0.63,
"step": 41950
},
{
"loss": 1.1168,
"grad_norm": 1.2674223184585571,
"learning_rate": 7.396293292481452e-05,
"epoch": 0.63,
"step": 41975
},
{
"loss": 1.1096,
"grad_norm": 1.589810848236084,
"learning_rate": 7.38878375536932e-05,
"epoch": 0.63,
"step": 42000
},
{
"loss": 1.1267,
"grad_norm": 1.9553534984588623,
"learning_rate": 7.381274218257187e-05,
"epoch": 0.63,
"step": 42025
},
{
"loss": 1.1687,
"grad_norm": 1.646224021911621,
"learning_rate": 7.373764681145054e-05,
"epoch": 0.63,
"step": 42050
},
{
"loss": 1.1368,
"grad_norm": 1.515450119972229,
"learning_rate": 7.366255144032922e-05,
"epoch": 0.63,
"step": 42075
},
{
"loss": 1.1289,
"grad_norm": 1.4205098152160645,
"learning_rate": 7.35874560692079e-05,
"epoch": 0.63,
"step": 42100
},
{
"loss": 1.0966,
"grad_norm": 1.0404484272003174,
"learning_rate": 7.351236069808656e-05,
"epoch": 0.63,
"step": 42125
},
{
"loss": 1.0713,
"grad_norm": 1.615555763244629,
"learning_rate": 7.343726532696525e-05,
"epoch": 0.63,
"step": 42150
},
{
"loss": 1.1928,
"grad_norm": 1.5871440172195435,
"learning_rate": 7.336216995584393e-05,
"epoch": 0.63,
"step": 42175
},
{
"loss": 1.1243,
"grad_norm": 1.4905834197998047,
"learning_rate": 7.32870745847226e-05,
"epoch": 0.63,
"step": 42200
},
{
"loss": 1.1623,
"grad_norm": 1.8793108463287354,
"learning_rate": 7.321197921360128e-05,
"epoch": 0.63,
"step": 42225
},
{
"loss": 1.1306,
"grad_norm": 1.7370017766952515,
"learning_rate": 7.313688384247996e-05,
"epoch": 0.63,
"step": 42250
},
{
"loss": 1.1043,
"grad_norm": 1.2322642803192139,
"learning_rate": 7.306178847135862e-05,
"epoch": 0.63,
"step": 42275
},
{
"loss": 1.1336,
"grad_norm": 1.9522937536239624,
"learning_rate": 7.29866931002373e-05,
"epoch": 0.64,
"step": 42300
},
{
"loss": 1.2031,
"grad_norm": 1.7146880626678467,
"learning_rate": 7.291159772911598e-05,
"epoch": 0.64,
"step": 42325
},
{
"loss": 1.0689,
"grad_norm": 1.353827953338623,
"learning_rate": 7.283650235799465e-05,
"epoch": 0.64,
"step": 42350
},
{
"loss": 1.046,
"grad_norm": 1.0300394296646118,
"learning_rate": 7.276140698687334e-05,
"epoch": 0.64,
"step": 42375
},
{
"loss": 1.1271,
"grad_norm": 1.7847486734390259,
"learning_rate": 7.268631161575202e-05,
"epoch": 0.64,
"step": 42400
},
{
"loss": 1.1355,
"grad_norm": 1.5269598960876465,
"learning_rate": 7.261121624463068e-05,
"epoch": 0.64,
"step": 42425
},
{
"loss": 1.1399,
"grad_norm": 1.863671898841858,
"learning_rate": 7.253612087350936e-05,
"epoch": 0.64,
"step": 42450
},
{
"loss": 1.1696,
"grad_norm": 1.3562769889831543,
"learning_rate": 7.246102550238804e-05,
"epoch": 0.64,
"step": 42475
},
{
"loss": 1.1442,
"grad_norm": 1.9278922080993652,
"learning_rate": 7.23859301312667e-05,
"epoch": 0.64,
"step": 42500
},
{
"loss": 1.1339,
"grad_norm": 1.8279402256011963,
"learning_rate": 7.231083476014538e-05,
"epoch": 0.64,
"step": 42525
},
{
"loss": 1.1319,
"grad_norm": 1.165343999862671,
"learning_rate": 7.223573938902406e-05,
"epoch": 0.64,
"step": 42550
},
{
"loss": 1.0568,
"grad_norm": 1.109803318977356,
"learning_rate": 7.216064401790274e-05,
"epoch": 0.64,
"step": 42575
},
{
"loss": 1.214,
"grad_norm": 1.3198269605636597,
"learning_rate": 7.208554864678142e-05,
"epoch": 0.64,
"step": 42600
},
{
"loss": 1.1511,
"grad_norm": 1.4255495071411133,
"learning_rate": 7.20104532756601e-05,
"epoch": 0.64,
"step": 42625
},
{
"loss": 1.2014,
"grad_norm": 2.5674383640289307,
"learning_rate": 7.193535790453876e-05,
"epoch": 0.64,
"step": 42650
},
{
"loss": 1.1177,
"grad_norm": 2.265868663787842,
"learning_rate": 7.186026253341744e-05,
"epoch": 0.64,
"step": 42675
},
{
"loss": 1.151,
"grad_norm": 1.6792558431625366,
"learning_rate": 7.178516716229612e-05,
"epoch": 0.64,
"step": 42700
},
{
"loss": 1.211,
"grad_norm": 1.5409029722213745,
"learning_rate": 7.171007179117479e-05,
"epoch": 0.64,
"step": 42725
},
{
"loss": 1.1475,
"grad_norm": 2.21852970123291,
"learning_rate": 7.163497642005347e-05,
"epoch": 0.64,
"step": 42750
},
{
"loss": 1.1559,
"grad_norm": 1.770150065422058,
"learning_rate": 7.155988104893215e-05,
"epoch": 0.64,
"step": 42775
},
{
"loss": 1.1443,
"grad_norm": 2.0208752155303955,
"learning_rate": 7.148478567781082e-05,
"epoch": 0.64,
"step": 42800
},
{
"loss": 1.1486,
"grad_norm": 1.3952795267105103,
"learning_rate": 7.14096903066895e-05,
"epoch": 0.64,
"step": 42825
},
{
"loss": 1.1166,
"grad_norm": 1.055246114730835,
"learning_rate": 7.133459493556818e-05,
"epoch": 0.64,
"step": 42850
},
{
"loss": 1.1424,
"grad_norm": 1.9727169275283813,
"learning_rate": 7.125949956444685e-05,
"epoch": 0.64,
"step": 42875
},
{
"loss": 1.1111,
"grad_norm": 1.2607585191726685,
"learning_rate": 7.118440419332553e-05,
"epoch": 0.64,
"step": 42900
},
{
"loss": 1.1684,
"grad_norm": 1.3263331651687622,
"learning_rate": 7.11093088222042e-05,
"epoch": 0.64,
"step": 42925
},
{
"loss": 1.1461,
"grad_norm": 1.7771466970443726,
"learning_rate": 7.103421345108287e-05,
"epoch": 0.65,
"step": 42950
},
{
"loss": 1.0846,
"grad_norm": 1.842961311340332,
"learning_rate": 7.095911807996155e-05,
"epoch": 0.65,
"step": 42975
},
{
"loss": 1.143,
"grad_norm": 1.8266829252243042,
"learning_rate": 7.088402270884023e-05,
"epoch": 0.65,
"step": 43000
},
{
"loss": 1.1259,
"grad_norm": 2.703138828277588,
"learning_rate": 7.08089273377189e-05,
"epoch": 0.65,
"step": 43025
},
{
"loss": 1.1592,
"grad_norm": 1.7669565677642822,
"learning_rate": 7.073383196659759e-05,
"epoch": 0.65,
"step": 43050
},
{
"loss": 1.157,
"grad_norm": 2.708080530166626,
"learning_rate": 7.065873659547626e-05,
"epoch": 0.65,
"step": 43075
},
{
"loss": 1.1409,
"grad_norm": 0.9941558241844177,
"learning_rate": 7.058364122435493e-05,
"epoch": 0.65,
"step": 43100
},
{
"loss": 1.09,
"grad_norm": 2.0420825481414795,
"learning_rate": 7.050854585323361e-05,
"epoch": 0.65,
"step": 43125
},
{
"loss": 1.1426,
"grad_norm": 2.205864667892456,
"learning_rate": 7.043345048211229e-05,
"epoch": 0.65,
"step": 43150
},
{
"loss": 1.0324,
"grad_norm": 1.426829218864441,
"learning_rate": 7.035835511099095e-05,
"epoch": 0.65,
"step": 43175
},
{
"loss": 1.0635,
"grad_norm": 2.733449935913086,
"learning_rate": 7.028325973986963e-05,
"epoch": 0.65,
"step": 43200
},
{
"loss": 1.0572,
"grad_norm": 1.3944034576416016,
"learning_rate": 7.020816436874832e-05,
"epoch": 0.65,
"step": 43225
},
{
"loss": 1.0696,
"grad_norm": 3.0517678260803223,
"learning_rate": 7.013306899762699e-05,
"epoch": 0.65,
"step": 43250
},
{
"loss": 1.1799,
"grad_norm": 1.6215708255767822,
"learning_rate": 7.005797362650567e-05,
"epoch": 0.65,
"step": 43275
},
{
"loss": 1.2173,
"grad_norm": 1.6113760471343994,
"learning_rate": 6.99858820702292e-05,
"epoch": 0.65,
"step": 43300
},
{
"loss": 1.1714,
"grad_norm": 2.9850752353668213,
"learning_rate": 6.991078669910786e-05,
"epoch": 0.65,
"step": 43325
},
{
"loss": 1.0623,
"grad_norm": 1.9807687997817993,
"learning_rate": 6.983569132798654e-05,
"epoch": 0.65,
"step": 43350
},
{
"loss": 1.1881,
"grad_norm": 1.4547510147094727,
"learning_rate": 6.976059595686523e-05,
"epoch": 0.65,
"step": 43375
},
{
"loss": 1.1096,
"grad_norm": 1.768142819404602,
"learning_rate": 6.96855005857439e-05,
"epoch": 0.65,
"step": 43400
},
{
"loss": 1.1513,
"grad_norm": 1.4340214729309082,
"learning_rate": 6.961040521462258e-05,
"epoch": 0.65,
"step": 43425
},
{
"loss": 1.1259,
"grad_norm": 2.260941743850708,
"learning_rate": 6.953530984350126e-05,
"epoch": 0.65,
"step": 43450
},
{
"loss": 1.0911,
"grad_norm": 1.3704345226287842,
"learning_rate": 6.946021447237992e-05,
"epoch": 0.65,
"step": 43475
},
{
"loss": 1.1814,
"grad_norm": 2.298049211502075,
"learning_rate": 6.93851191012586e-05,
"epoch": 0.65,
"step": 43500
},
{
"loss": 1.127,
"grad_norm": 1.4709994792938232,
"learning_rate": 6.931002373013728e-05,
"epoch": 0.65,
"step": 43525
},
{
"loss": 1.0889,
"grad_norm": 1.7260873317718506,
"learning_rate": 6.923492835901594e-05,
"epoch": 0.65,
"step": 43550
},
{
"loss": 1.1871,
"grad_norm": 1.912359595298767,
"learning_rate": 6.915983298789464e-05,
"epoch": 0.65,
"step": 43575
},
{
"loss": 1.1445,
"grad_norm": 1.4736065864562988,
"learning_rate": 6.908473761677332e-05,
"epoch": 0.65,
"step": 43600
},
{
"loss": 1.1363,
"grad_norm": 2.0081377029418945,
"learning_rate": 6.900964224565198e-05,
"epoch": 0.66,
"step": 43625
},
{
"loss": 1.1329,
"grad_norm": 1.3601313829421997,
"learning_rate": 6.893454687453066e-05,
"epoch": 0.66,
"step": 43650
},
{
"loss": 1.0892,
"grad_norm": 1.9667214155197144,
"learning_rate": 6.885945150340934e-05,
"epoch": 0.66,
"step": 43675
},
{
"loss": 1.1483,
"grad_norm": 1.3687251806259155,
"learning_rate": 6.878735994713285e-05,
"epoch": 0.66,
"step": 43700
},
{
"loss": 1.0915,
"grad_norm": 2.4071569442749023,
"learning_rate": 6.871226457601155e-05,
"epoch": 0.66,
"step": 43725
},
{
"loss": 1.1005,
"grad_norm": 1.577147126197815,
"learning_rate": 6.863716920489021e-05,
"epoch": 0.66,
"step": 43750
},
{
"loss": 1.1643,
"grad_norm": 4.050117015838623,
"learning_rate": 6.856207383376889e-05,
"epoch": 0.66,
"step": 43775
},
{
"loss": 1.1211,
"grad_norm": 1.6273080110549927,
"learning_rate": 6.848697846264757e-05,
"epoch": 0.66,
"step": 43800
},
{
"loss": 1.082,
"grad_norm": 1.4899144172668457,
"learning_rate": 6.841188309152623e-05,
"epoch": 0.66,
"step": 43825
},
{
"loss": 1.1292,
"grad_norm": 2.176234006881714,
"learning_rate": 6.833678772040491e-05,
"epoch": 0.66,
"step": 43850
},
{
"loss": 1.1189,
"grad_norm": 1.6825004816055298,
"learning_rate": 6.826169234928359e-05,
"epoch": 0.66,
"step": 43875
},
{
"loss": 1.0886,
"grad_norm": 1.342085361480713,
"learning_rate": 6.818659697816227e-05,
"epoch": 0.66,
"step": 43900
},
{
"loss": 1.146,
"grad_norm": 0.8940933346748352,
"learning_rate": 6.811150160704095e-05,
"epoch": 0.66,
"step": 43925
},
{
"loss": 1.0925,
"grad_norm": 1.4835485219955444,
"learning_rate": 6.803640623591963e-05,
"epoch": 0.66,
"step": 43950
},
{
"loss": 1.1992,
"grad_norm": 1.9649572372436523,
"learning_rate": 6.79613108647983e-05,
"epoch": 0.66,
"step": 43975
},
{
"loss": 1.1081,
"grad_norm": 1.4442362785339355,
"learning_rate": 6.788621549367697e-05,
"epoch": 0.66,
"step": 44000
},
{
"loss": 1.1475,
"grad_norm": 3.4296488761901855,
"learning_rate": 6.781112012255565e-05,
"epoch": 0.66,
"step": 44025
},
{
"loss": 1.1343,
"grad_norm": 2.3116354942321777,
"learning_rate": 6.773602475143432e-05,
"epoch": 0.66,
"step": 44050
},
{
"loss": 1.1686,
"grad_norm": 1.6859666109085083,
"learning_rate": 6.7660929380313e-05,
"epoch": 0.66,
"step": 44075
},
{
"loss": 1.1612,
"grad_norm": 1.4094436168670654,
"learning_rate": 6.758583400919168e-05,
"epoch": 0.66,
"step": 44100
},
{
"loss": 1.1254,
"grad_norm": 2.2246947288513184,
"learning_rate": 6.751073863807035e-05,
"epoch": 0.66,
"step": 44125
},
{
"loss": 1.16,
"grad_norm": 1.2146118879318237,
"learning_rate": 6.743564326694903e-05,
"epoch": 0.66,
"step": 44150
},
{
"loss": 1.1512,
"grad_norm": 1.7137471437454224,
"learning_rate": 6.736054789582771e-05,
"epoch": 0.66,
"step": 44175
},
{
"loss": 1.0637,
"grad_norm": 1.3975019454956055,
"learning_rate": 6.728545252470638e-05,
"epoch": 0.66,
"step": 44200
},
{
"loss": 1.1295,
"grad_norm": 1.0238609313964844,
"learning_rate": 6.721035715358506e-05,
"epoch": 0.66,
"step": 44225
},
{
"loss": 1.0978,
"grad_norm": 1.798096776008606,
"learning_rate": 6.713526178246373e-05,
"epoch": 0.66,
"step": 44250
},
{
"loss": 1.1513,
"grad_norm": 1.8276596069335938,
"learning_rate": 6.70601664113424e-05,
"epoch": 0.66,
"step": 44275
},
{
"loss": 1.1275,
"grad_norm": 2.5206449031829834,
"learning_rate": 6.698507104022108e-05,
"epoch": 0.67,
"step": 44300
},
{
"loss": 1.1837,
"grad_norm": 1.401331901550293,
"learning_rate": 6.690997566909976e-05,
"epoch": 0.67,
"step": 44325
},
{
"loss": 1.137,
"grad_norm": 1.3737742900848389,
"learning_rate": 6.683488029797844e-05,
"epoch": 0.67,
"step": 44350
},
{
"loss": 1.161,
"grad_norm": 1.461982011795044,
"learning_rate": 6.675978492685712e-05,
"epoch": 0.67,
"step": 44375
},
{
"loss": 1.1194,
"grad_norm": 1.4352518320083618,
"learning_rate": 6.66846895557358e-05,
"epoch": 0.67,
"step": 44400
},
{
"loss": 1.2318,
"grad_norm": 1.9560725688934326,
"learning_rate": 6.660959418461446e-05,
"epoch": 0.67,
"step": 44425
},
{
"loss": 1.1218,
"grad_norm": 1.0845290422439575,
"learning_rate": 6.653449881349314e-05,
"epoch": 0.67,
"step": 44450
},
{
"loss": 1.2239,
"grad_norm": 1.4423961639404297,
"learning_rate": 6.645940344237182e-05,
"epoch": 0.67,
"step": 44475
},
{
"loss": 1.18,
"grad_norm": 2.2228844165802,
"learning_rate": 6.638430807125048e-05,
"epoch": 0.67,
"step": 44500
},
{
"loss": 1.1529,
"grad_norm": 1.524857521057129,
"learning_rate": 6.630921270012916e-05,
"epoch": 0.67,
"step": 44525
},
{
"loss": 1.121,
"grad_norm": 1.349579930305481,
"learning_rate": 6.623411732900784e-05,
"epoch": 0.67,
"step": 44550
},
{
"loss": 1.1108,
"grad_norm": 1.5544127225875854,
"learning_rate": 6.615902195788652e-05,
"epoch": 0.67,
"step": 44575
},
{
"loss": 1.1202,
"grad_norm": 1.1992143392562866,
"learning_rate": 6.60839265867652e-05,
"epoch": 0.67,
"step": 44600
},
{
"loss": 1.1227,
"grad_norm": 1.020192265510559,
"learning_rate": 6.600883121564388e-05,
"epoch": 0.67,
"step": 44625
},
{
"loss": 1.0743,
"grad_norm": 0.9846924543380737,
"learning_rate": 6.593373584452254e-05,
"epoch": 0.67,
"step": 44650
},
{
"loss": 1.1423,
"grad_norm": 1.3693522214889526,
"learning_rate": 6.585864047340122e-05,
"epoch": 0.67,
"step": 44675
},
{
"loss": 1.1301,
"grad_norm": 1.9597666263580322,
"learning_rate": 6.57835451022799e-05,
"epoch": 0.67,
"step": 44700
},
{
"loss": 1.1554,
"grad_norm": 1.1841599941253662,
"learning_rate": 6.570844973115857e-05,
"epoch": 0.67,
"step": 44725
},
{
"loss": 1.1822,
"grad_norm": 1.071419596672058,
"learning_rate": 6.563335436003724e-05,
"epoch": 0.67,
"step": 44750
},
{
"loss": 1.1234,
"grad_norm": 1.3992092609405518,
"learning_rate": 6.555825898891592e-05,
"epoch": 0.67,
"step": 44775
},
{
"loss": 1.089,
"grad_norm": 1.3875787258148193,
"learning_rate": 6.54831636177946e-05,
"epoch": 0.67,
"step": 44800
},
{
"loss": 1.1601,
"grad_norm": 1.7100720405578613,
"learning_rate": 6.540806824667328e-05,
"epoch": 0.67,
"step": 44825
},
{
"loss": 1.1281,
"grad_norm": 1.3852653503417969,
"learning_rate": 6.533297287555196e-05,
"epoch": 0.67,
"step": 44850
},
{
"loss": 1.1651,
"grad_norm": 2.1557846069335938,
"learning_rate": 6.525787750443063e-05,
"epoch": 0.67,
"step": 44875
},
{
"loss": 1.1061,
"grad_norm": 1.711010217666626,
"learning_rate": 6.51827821333093e-05,
"epoch": 0.67,
"step": 44900
},
{
"loss": 1.1282,
"grad_norm": 1.7825603485107422,
"learning_rate": 6.510768676218798e-05,
"epoch": 0.67,
"step": 44925
},
{
"loss": 1.1178,
"grad_norm": 1.6146150827407837,
"learning_rate": 6.503259139106666e-05,
"epoch": 0.68,
"step": 44950
},
{
"loss": 1.1025,
"grad_norm": 2.3008651733398438,
"learning_rate": 6.495749601994533e-05,
"epoch": 0.68,
"step": 44975
},
{
"loss": 1.1944,
"grad_norm": 1.2394930124282837,
"learning_rate": 6.4882400648824e-05,
"epoch": 0.68,
"step": 45000
},
{
"loss": 1.1354,
"grad_norm": 1.3226121664047241,
"learning_rate": 6.480730527770268e-05,
"epoch": 0.68,
"step": 45025
},
{
"loss": 1.082,
"grad_norm": 1.0096830129623413,
"learning_rate": 6.473220990658136e-05,
"epoch": 0.68,
"step": 45050
},
{
"loss": 1.1808,
"grad_norm": 1.8499752283096313,
"learning_rate": 6.465711453546004e-05,
"epoch": 0.68,
"step": 45075
},
{
"loss": 1.1287,
"grad_norm": 1.8377041816711426,
"learning_rate": 6.458201916433871e-05,
"epoch": 0.68,
"step": 45100
},
{
"loss": 1.1541,
"grad_norm": 1.5449110269546509,
"learning_rate": 6.450692379321739e-05,
"epoch": 0.68,
"step": 45125
},
{
"loss": 1.054,
"grad_norm": 1.7001621723175049,
"learning_rate": 6.443182842209607e-05,
"epoch": 0.68,
"step": 45150
},
{
"loss": 1.0684,
"grad_norm": 1.0393651723861694,
"learning_rate": 6.435673305097474e-05,
"epoch": 0.68,
"step": 45175
},
{
"loss": 1.1805,
"grad_norm": 1.6634325981140137,
"learning_rate": 6.428163767985341e-05,
"epoch": 0.68,
"step": 45200
},
{
"loss": 1.1241,
"grad_norm": 1.3879557847976685,
"learning_rate": 6.42065423087321e-05,
"epoch": 0.68,
"step": 45225
},
{
"loss": 1.0822,
"grad_norm": 1.3733693361282349,
"learning_rate": 6.413144693761077e-05,
"epoch": 0.68,
"step": 45250
},
{
"loss": 1.1762,
"grad_norm": 1.540552020072937,
"learning_rate": 6.405635156648945e-05,
"epoch": 0.68,
"step": 45275
},
{
"loss": 1.1056,
"grad_norm": 1.7979633808135986,
"learning_rate": 6.398125619536813e-05,
"epoch": 0.68,
"step": 45300
},
{
"loss": 1.0967,
"grad_norm": 1.601650357246399,
"learning_rate": 6.390616082424679e-05,
"epoch": 0.68,
"step": 45325
},
{
"loss": 1.13,
"grad_norm": 1.2980515956878662,
"learning_rate": 6.383106545312547e-05,
"epoch": 0.68,
"step": 45350
},
{
"loss": 1.1626,
"grad_norm": 1.1858327388763428,
"learning_rate": 6.375597008200415e-05,
"epoch": 0.68,
"step": 45375
},
{
"loss": 1.097,
"grad_norm": 1.3785401582717896,
"learning_rate": 6.368087471088283e-05,
"epoch": 0.68,
"step": 45400
},
{
"loss": 1.1721,
"grad_norm": 0.9480896592140198,
"learning_rate": 6.360577933976149e-05,
"epoch": 0.68,
"step": 45425
},
{
"loss": 1.1218,
"grad_norm": 1.55319344997406,
"learning_rate": 6.353068396864019e-05,
"epoch": 0.68,
"step": 45450
},
{
"loss": 1.1096,
"grad_norm": 1.589424729347229,
"learning_rate": 6.345558859751885e-05,
"epoch": 0.68,
"step": 45475
},
{
"loss": 1.1252,
"grad_norm": 1.1488457918167114,
"learning_rate": 6.338049322639753e-05,
"epoch": 0.68,
"step": 45500
},
{
"loss": 1.1776,
"grad_norm": 2.43381667137146,
"learning_rate": 6.330539785527621e-05,
"epoch": 0.68,
"step": 45525
},
{
"loss": 1.1062,
"grad_norm": 1.4571008682250977,
"learning_rate": 6.323030248415487e-05,
"epoch": 0.68,
"step": 45550
},
{
"loss": 1.0569,
"grad_norm": 2.75005841255188,
"learning_rate": 6.315520711303355e-05,
"epoch": 0.68,
"step": 45575
},
{
"loss": 1.1457,
"grad_norm": 1.2686755657196045,
"learning_rate": 6.308011174191223e-05,
"epoch": 0.68,
"step": 45600
},
{
"loss": 1.1216,
"grad_norm": 1.4540945291519165,
"learning_rate": 6.300501637079091e-05,
"epoch": 0.69,
"step": 45625
},
{
"loss": 1.1018,
"grad_norm": 1.2058906555175781,
"learning_rate": 6.292992099966958e-05,
"epoch": 0.69,
"step": 45650
},
{
"loss": 1.1387,
"grad_norm": 2.0346646308898926,
"learning_rate": 6.285482562854827e-05,
"epoch": 0.69,
"step": 45675
},
{
"loss": 1.1894,
"grad_norm": 1.4020074605941772,
"learning_rate": 6.277973025742693e-05,
"epoch": 0.69,
"step": 45700
},
{
"loss": 1.1201,
"grad_norm": 1.4608796834945679,
"learning_rate": 6.270463488630561e-05,
"epoch": 0.69,
"step": 45725
},
{
"loss": 1.1399,
"grad_norm": 1.7157222032546997,
"learning_rate": 6.262953951518429e-05,
"epoch": 0.69,
"step": 45750
},
{
"loss": 1.1118,
"grad_norm": 1.1699299812316895,
"learning_rate": 6.255444414406296e-05,
"epoch": 0.69,
"step": 45775
},
{
"loss": 1.1491,
"grad_norm": 1.5801405906677246,
"learning_rate": 6.247934877294163e-05,
"epoch": 0.69,
"step": 45800
},
{
"loss": 1.1591,
"grad_norm": 2.6088712215423584,
"learning_rate": 6.240425340182031e-05,
"epoch": 0.69,
"step": 45825
},
{
"loss": 1.1241,
"grad_norm": 1.1088968515396118,
"learning_rate": 6.232915803069899e-05,
"epoch": 0.69,
"step": 45850
},
{
"loss": 1.1937,
"grad_norm": 1.9700263738632202,
"learning_rate": 6.225406265957767e-05,
"epoch": 0.69,
"step": 45875
},
{
"loss": 1.0887,
"grad_norm": 1.8540663719177246,
"learning_rate": 6.217896728845635e-05,
"epoch": 0.69,
"step": 45900
},
{
"loss": 1.1068,
"grad_norm": 1.1432939767837524,
"learning_rate": 6.210387191733502e-05,
"epoch": 0.69,
"step": 45925
},
{
"loss": 1.1209,
"grad_norm": 1.9326074123382568,
"learning_rate": 6.20287765462137e-05,
"epoch": 0.69,
"step": 45950
},
{
"loss": 1.1356,
"grad_norm": 1.928639531135559,
"learning_rate": 6.195368117509237e-05,
"epoch": 0.69,
"step": 45975
},
{
"loss": 1.1867,
"grad_norm": 1.4352381229400635,
"learning_rate": 6.187858580397104e-05,
"epoch": 0.69,
"step": 46000
},
{
"loss": 1.1867,
"grad_norm": 1.4124820232391357,
"learning_rate": 6.180349043284972e-05,
"epoch": 0.69,
"step": 46025
},
{
"loss": 1.1144,
"grad_norm": 2.557101249694824,
"learning_rate": 6.17283950617284e-05,
"epoch": 0.69,
"step": 46050
},
{
"loss": 1.0894,
"grad_norm": 1.6086410284042358,
"learning_rate": 6.165630350545192e-05,
"epoch": 0.69,
"step": 46075
},
{
"loss": 1.1666,
"grad_norm": 1.3408067226409912,
"learning_rate": 6.15812081343306e-05,
"epoch": 0.69,
"step": 46100
},
{
"loss": 1.0923,
"grad_norm": 1.303733229637146,
"learning_rate": 6.150611276320928e-05,
"epoch": 0.69,
"step": 46125
},
{
"loss": 1.1683,
"grad_norm": 2.2426815032958984,
"learning_rate": 6.143101739208795e-05,
"epoch": 0.69,
"step": 46150
},
{
"loss": 1.1749,
"grad_norm": 1.519041657447815,
"learning_rate": 6.135592202096663e-05,
"epoch": 0.69,
"step": 46175
},
{
"loss": 1.1571,
"grad_norm": 1.224007248878479,
"learning_rate": 6.12808266498453e-05,
"epoch": 0.69,
"step": 46200
},
{
"loss": 1.1916,
"grad_norm": 1.8132357597351074,
"learning_rate": 6.120573127872398e-05,
"epoch": 0.69,
"step": 46225
},
{
"loss": 1.122,
"grad_norm": 1.465853214263916,
"learning_rate": 6.113063590760266e-05,
"epoch": 0.69,
"step": 46250
},
{
"loss": 1.126,
"grad_norm": 1.07510507106781,
"learning_rate": 6.105554053648134e-05,
"epoch": 0.69,
"step": 46275
},
{
"loss": 1.1603,
"grad_norm": 1.3960785865783691,
"learning_rate": 6.0980445165360014e-05,
"epoch": 0.7,
"step": 46300
},
{
"loss": 1.1132,
"grad_norm": 1.2278742790222168,
"learning_rate": 6.0905349794238687e-05,
"epoch": 0.7,
"step": 46325
},
{
"loss": 1.159,
"grad_norm": 1.0870790481567383,
"learning_rate": 6.083025442311736e-05,
"epoch": 0.7,
"step": 46350
},
{
"loss": 1.1108,
"grad_norm": 1.3281099796295166,
"learning_rate": 6.075515905199604e-05,
"epoch": 0.7,
"step": 46375
},
{
"loss": 1.1345,
"grad_norm": 1.004966139793396,
"learning_rate": 6.068006368087471e-05,
"epoch": 0.7,
"step": 46400
},
{
"loss": 1.1449,
"grad_norm": 1.102554202079773,
"learning_rate": 6.060496830975338e-05,
"epoch": 0.7,
"step": 46425
},
{
"loss": 1.1252,
"grad_norm": 1.598632574081421,
"learning_rate": 6.052987293863207e-05,
"epoch": 0.7,
"step": 46450
},
{
"loss": 1.1236,
"grad_norm": 1.7185138463974,
"learning_rate": 6.0454777567510746e-05,
"epoch": 0.7,
"step": 46475
},
{
"loss": 1.0913,
"grad_norm": 1.3190034627914429,
"learning_rate": 6.037968219638942e-05,
"epoch": 0.7,
"step": 46500
},
{
"loss": 1.0544,
"grad_norm": 2.019202947616577,
"learning_rate": 6.03045868252681e-05,
"epoch": 0.7,
"step": 46525
},
{
"loss": 1.12,
"grad_norm": 1.3409463167190552,
"learning_rate": 6.022949145414677e-05,
"epoch": 0.7,
"step": 46550
},
{
"loss": 1.1206,
"grad_norm": 1.9381085634231567,
"learning_rate": 6.015439608302544e-05,
"epoch": 0.7,
"step": 46575
},
{
"loss": 1.0804,
"grad_norm": 1.1460068225860596,
"learning_rate": 6.007930071190412e-05,
"epoch": 0.7,
"step": 46600
},
{
"loss": 1.0772,
"grad_norm": 1.3288995027542114,
"learning_rate": 6.000420534078279e-05,
"epoch": 0.7,
"step": 46625
},
{
"loss": 1.1105,
"grad_norm": 1.3131263256072998,
"learning_rate": 5.9929109969661464e-05,
"epoch": 0.7,
"step": 46650
},
{
"loss": 1.1337,
"grad_norm": 4.209181308746338,
"learning_rate": 5.985401459854016e-05,
"epoch": 0.7,
"step": 46675
},
{
"loss": 1.1791,
"grad_norm": 1.737858772277832,
"learning_rate": 5.977891922741883e-05,
"epoch": 0.7,
"step": 46700
},
{
"loss": 1.1116,
"grad_norm": 1.5516709089279175,
"learning_rate": 5.97038238562975e-05,
"epoch": 0.7,
"step": 46725
},
{
"loss": 1.1036,
"grad_norm": 1.0340015888214111,
"learning_rate": 5.962872848517618e-05,
"epoch": 0.7,
"step": 46750
},
{
"loss": 1.1996,
"grad_norm": 2.427554130554199,
"learning_rate": 5.955363311405485e-05,
"epoch": 0.7,
"step": 46775
},
{
"loss": 1.195,
"grad_norm": 2.024531602859497,
"learning_rate": 5.9478537742933524e-05,
"epoch": 0.7,
"step": 46800
},
{
"loss": 1.2264,
"grad_norm": 1.2301228046417236,
"learning_rate": 5.94034423718122e-05,
"epoch": 0.7,
"step": 46825
},
{
"loss": 1.0799,
"grad_norm": 1.2450840473175049,
"learning_rate": 5.9328347000690875e-05,
"epoch": 0.7,
"step": 46850
},
{
"loss": 1.1088,
"grad_norm": 1.3164767026901245,
"learning_rate": 5.925325162956956e-05,
"epoch": 0.7,
"step": 46875
},
{
"loss": 1.1358,
"grad_norm": 1.6260111331939697,
"learning_rate": 5.917815625844824e-05,
"epoch": 0.7,
"step": 46900
},
{
"loss": 1.1555,
"grad_norm": 1.2355117797851562,
"learning_rate": 5.910306088732691e-05,
"epoch": 0.7,
"step": 46925
},
{
"loss": 1.1982,
"grad_norm": 1.639583706855774,
"learning_rate": 5.9027965516205584e-05,
"epoch": 0.71,
"step": 46950
},
{
"loss": 1.1229,
"grad_norm": 1.5127111673355103,
"learning_rate": 5.895287014508426e-05,
"epoch": 0.71,
"step": 46975
},
{
"loss": 1.0991,
"grad_norm": 1.5103808641433716,
"learning_rate": 5.8877774773962935e-05,
"epoch": 0.71,
"step": 47000
},
{
"loss": 1.1568,
"grad_norm": 1.9068769216537476,
"learning_rate": 5.880267940284161e-05,
"epoch": 0.71,
"step": 47025
},
{
"loss": 1.1621,
"grad_norm": 1.520690679550171,
"learning_rate": 5.8727584031720286e-05,
"epoch": 0.71,
"step": 47050
},
{
"loss": 1.148,
"grad_norm": 3.158663749694824,
"learning_rate": 5.865248866059896e-05,
"epoch": 0.71,
"step": 47075
},
{
"loss": 1.1379,
"grad_norm": 2.538459300994873,
"learning_rate": 5.857739328947764e-05,
"epoch": 0.71,
"step": 47100
},
{
"loss": 1.2194,
"grad_norm": 1.1677653789520264,
"learning_rate": 5.850229791835632e-05,
"epoch": 0.71,
"step": 47125
},
{
"loss": 1.1959,
"grad_norm": 1.7188001871109009,
"learning_rate": 5.8427202547234994e-05,
"epoch": 0.71,
"step": 47150
},
{
"loss": 1.1502,
"grad_norm": 1.4081776142120361,
"learning_rate": 5.8352107176113666e-05,
"epoch": 0.71,
"step": 47175
},
{
"loss": 1.182,
"grad_norm": 1.6037064790725708,
"learning_rate": 5.8277011804992345e-05,
"epoch": 0.71,
"step": 47200
},
{
"loss": 1.1552,
"grad_norm": 2.5029053688049316,
"learning_rate": 5.820191643387102e-05,
"epoch": 0.71,
"step": 47225
},
{
"loss": 1.0832,
"grad_norm": 1.6237151622772217,
"learning_rate": 5.812682106274969e-05,
"epoch": 0.71,
"step": 47250
},
{
"loss": 1.1307,
"grad_norm": 1.8060946464538574,
"learning_rate": 5.805172569162837e-05,
"epoch": 0.71,
"step": 47275
},
{
"loss": 1.0566,
"grad_norm": 1.7570223808288574,
"learning_rate": 5.797663032050704e-05,
"epoch": 0.71,
"step": 47300
},
{
"loss": 1.1227,
"grad_norm": 0.9945117831230164,
"learning_rate": 5.7901534949385726e-05,
"epoch": 0.71,
"step": 47325
},
{
"loss": 1.1882,
"grad_norm": 2.105391025543213,
"learning_rate": 5.7826439578264405e-05,
"epoch": 0.71,
"step": 47350
},
{
"loss": 1.1754,
"grad_norm": 2.504192352294922,
"learning_rate": 5.775134420714308e-05,
"epoch": 0.71,
"step": 47375
},
{
"loss": 1.0904,
"grad_norm": 1.456892967224121,
"learning_rate": 5.767624883602175e-05,
"epoch": 0.71,
"step": 47400
},
{
"loss": 1.1371,
"grad_norm": 1.5804184675216675,
"learning_rate": 5.760115346490043e-05,
"epoch": 0.71,
"step": 47425
},
{
"loss": 1.132,
"grad_norm": 2.141827344894409,
"learning_rate": 5.75260580937791e-05,
"epoch": 0.71,
"step": 47450
},
{
"loss": 1.142,
"grad_norm": 2.439383029937744,
"learning_rate": 5.745096272265777e-05,
"epoch": 0.71,
"step": 47475
},
{
"loss": 1.1004,
"grad_norm": 1.3394807577133179,
"learning_rate": 5.737586735153645e-05,
"epoch": 0.71,
"step": 47500
},
{
"loss": 1.0454,
"grad_norm": 1.1147385835647583,
"learning_rate": 5.730077198041514e-05,
"epoch": 0.71,
"step": 47525
},
{
"loss": 1.1216,
"grad_norm": 2.184941291809082,
"learning_rate": 5.722567660929381e-05,
"epoch": 0.71,
"step": 47550
},
{
"loss": 1.1123,
"grad_norm": 1.4739607572555542,
"learning_rate": 5.715058123817249e-05,
"epoch": 0.71,
"step": 47575
},
{
"loss": 1.0541,
"grad_norm": 1.1950966119766235,
"learning_rate": 5.707548586705116e-05,
"epoch": 0.71,
"step": 47600
},
{
"loss": 1.0943,
"grad_norm": 1.8516206741333008,
"learning_rate": 5.700039049592983e-05,
"epoch": 0.72,
"step": 47625
},
{
"loss": 1.1332,
"grad_norm": 1.2161145210266113,
"learning_rate": 5.692529512480851e-05,
"epoch": 0.72,
"step": 47650
},
{
"loss": 1.182,
"grad_norm": 1.776667833328247,
"learning_rate": 5.685019975368718e-05,
"epoch": 0.72,
"step": 47675
},
{
"loss": 1.1426,
"grad_norm": 1.0378066301345825,
"learning_rate": 5.6775104382565855e-05,
"epoch": 0.72,
"step": 47700
},
{
"loss": 1.1334,
"grad_norm": 1.2625921964645386,
"learning_rate": 5.6700009011444534e-05,
"epoch": 0.72,
"step": 47725
},
{
"loss": 1.0911,
"grad_norm": 1.5473499298095703,
"learning_rate": 5.662491364032322e-05,
"epoch": 0.72,
"step": 47750
},
{
"loss": 1.1503,
"grad_norm": 2.243377447128296,
"learning_rate": 5.654981826920189e-05,
"epoch": 0.72,
"step": 47775
},
{
"loss": 1.1469,
"grad_norm": 1.3182121515274048,
"learning_rate": 5.647472289808057e-05,
"epoch": 0.72,
"step": 47800
},
{
"loss": 1.1264,
"grad_norm": 1.2882803678512573,
"learning_rate": 5.639962752695924e-05,
"epoch": 0.72,
"step": 47825
},
{
"loss": 1.1511,
"grad_norm": 1.2225452661514282,
"learning_rate": 5.6324532155837915e-05,
"epoch": 0.72,
"step": 47850
},
{
"loss": 1.1468,
"grad_norm": 2.1035497188568115,
"learning_rate": 5.6249436784716593e-05,
"epoch": 0.72,
"step": 47875
},
{
"loss": 1.1549,
"grad_norm": 1.6698153018951416,
"learning_rate": 5.6174341413595265e-05,
"epoch": 0.72,
"step": 47900
},
{
"loss": 1.176,
"grad_norm": 1.939454436302185,
"learning_rate": 5.609924604247394e-05,
"epoch": 0.72,
"step": 47925
},
{
"loss": 1.1494,
"grad_norm": 1.100350260734558,
"learning_rate": 5.6024150671352616e-05,
"epoch": 0.72,
"step": 47950
},
{
"loss": 1.1401,
"grad_norm": 1.3846749067306519,
"learning_rate": 5.59490553002313e-05,
"epoch": 0.72,
"step": 47975
},
{
"loss": 1.1711,
"grad_norm": 1.5334635972976685,
"learning_rate": 5.5873959929109974e-05,
"epoch": 0.72,
"step": 48000
},
{
"loss": 1.1391,
"grad_norm": 1.1351529359817505,
"learning_rate": 5.579886455798865e-05,
"epoch": 0.72,
"step": 48025
},
{
"loss": 1.1284,
"grad_norm": 1.8126013278961182,
"learning_rate": 5.5723769186867325e-05,
"epoch": 0.72,
"step": 48050
},
{
"loss": 1.1845,
"grad_norm": 1.187787652015686,
"learning_rate": 5.5648673815746e-05,
"epoch": 0.72,
"step": 48075
},
{
"loss": 1.1659,
"grad_norm": 2.978299140930176,
"learning_rate": 5.5573578444624676e-05,
"epoch": 0.72,
"step": 48100
},
{
"loss": 1.1497,
"grad_norm": 1.6019436120986938,
"learning_rate": 5.549848307350335e-05,
"epoch": 0.72,
"step": 48125
},
{
"loss": 1.0611,
"grad_norm": 1.380462408065796,
"learning_rate": 5.542338770238202e-05,
"epoch": 0.72,
"step": 48150
},
{
"loss": 1.1723,
"grad_norm": 1.732176661491394,
"learning_rate": 5.53482923312607e-05,
"epoch": 0.72,
"step": 48175
},
{
"loss": 1.2292,
"grad_norm": 1.4990782737731934,
"learning_rate": 5.5273196960139385e-05,
"epoch": 0.72,
"step": 48200
},
{
"loss": 1.1319,
"grad_norm": 1.7996023893356323,
"learning_rate": 5.519810158901806e-05,
"epoch": 0.72,
"step": 48225
},
{
"loss": 1.1087,
"grad_norm": 1.8955588340759277,
"learning_rate": 5.5123006217896736e-05,
"epoch": 0.72,
"step": 48250
},
{
"loss": 1.1206,
"grad_norm": 1.337724208831787,
"learning_rate": 5.504791084677541e-05,
"epoch": 0.73,
"step": 48275
},
{
"loss": 1.1627,
"grad_norm": 1.515310525894165,
"learning_rate": 5.497281547565408e-05,
"epoch": 0.73,
"step": 48300
},
{
"loss": 1.1334,
"grad_norm": 1.9032014608383179,
"learning_rate": 5.489772010453276e-05,
"epoch": 0.73,
"step": 48325
},
{
"loss": 1.0996,
"grad_norm": 1.8782274723052979,
"learning_rate": 5.482262473341143e-05,
"epoch": 0.73,
"step": 48350
},
{
"loss": 1.1287,
"grad_norm": 1.1794263124465942,
"learning_rate": 5.474752936229011e-05,
"epoch": 0.73,
"step": 48375
},
{
"loss": 1.1001,
"grad_norm": 2.261495351791382,
"learning_rate": 5.4672433991168795e-05,
"epoch": 0.73,
"step": 48400
},
{
"loss": 1.0928,
"grad_norm": 1.380417823791504,
"learning_rate": 5.459733862004747e-05,
"epoch": 0.73,
"step": 48425
},
{
"loss": 1.0945,
"grad_norm": 1.0887725353240967,
"learning_rate": 5.452224324892614e-05,
"epoch": 0.73,
"step": 48450
},
{
"loss": 1.1247,
"grad_norm": 1.2714431285858154,
"learning_rate": 5.444714787780482e-05,
"epoch": 0.73,
"step": 48475
},
{
"loss": 1.0856,
"grad_norm": 1.475818157196045,
"learning_rate": 5.437205250668349e-05,
"epoch": 0.73,
"step": 48500
},
{
"loss": 1.0579,
"grad_norm": 1.0250446796417236,
"learning_rate": 5.429695713556216e-05,
"epoch": 0.73,
"step": 48525
},
{
"loss": 1.0451,
"grad_norm": 1.4754000902175903,
"learning_rate": 5.422186176444084e-05,
"epoch": 0.73,
"step": 48550
},
{
"loss": 1.1335,
"grad_norm": 1.6405061483383179,
"learning_rate": 5.4146766393319514e-05,
"epoch": 0.73,
"step": 48575
},
{
"loss": 1.1399,
"grad_norm": 1.1049416065216064,
"learning_rate": 5.407167102219819e-05,
"epoch": 0.73,
"step": 48600
},
{
"loss": 1.1112,
"grad_norm": 1.7172602415084839,
"learning_rate": 5.399657565107688e-05,
"epoch": 0.73,
"step": 48625
},
{
"loss": 1.1008,
"grad_norm": 1.833646297454834,
"learning_rate": 5.392148027995555e-05,
"epoch": 0.73,
"step": 48650
},
{
"loss": 1.1053,
"grad_norm": 1.4659416675567627,
"learning_rate": 5.384638490883422e-05,
"epoch": 0.73,
"step": 48675
},
{
"loss": 1.0825,
"grad_norm": 1.6295710802078247,
"learning_rate": 5.37712895377129e-05,
"epoch": 0.73,
"step": 48700
},
{
"loss": 1.1665,
"grad_norm": 1.4064006805419922,
"learning_rate": 5.369619416659157e-05,
"epoch": 0.73,
"step": 48725
},
{
"loss": 1.0461,
"grad_norm": 1.548251986503601,
"learning_rate": 5.3621098795470245e-05,
"epoch": 0.73,
"step": 48750
},
{
"loss": 1.1471,
"grad_norm": 1.6318676471710205,
"learning_rate": 5.3546003424348924e-05,
"epoch": 0.73,
"step": 48775
},
{
"loss": 1.1411,
"grad_norm": 1.321753978729248,
"learning_rate": 5.3470908053227596e-05,
"epoch": 0.73,
"step": 48800
},
{
"loss": 1.1102,
"grad_norm": 1.167902946472168,
"learning_rate": 5.3395812682106275e-05,
"epoch": 0.73,
"step": 48825
},
{
"loss": 1.0909,
"grad_norm": 1.3905353546142578,
"learning_rate": 5.332071731098496e-05,
"epoch": 0.73,
"step": 48850
},
{
"loss": 1.1796,
"grad_norm": 1.0988661050796509,
"learning_rate": 5.324562193986363e-05,
"epoch": 0.73,
"step": 48875
},
{
"loss": 1.2214,
"grad_norm": 1.348551630973816,
"learning_rate": 5.3170526568742305e-05,
"epoch": 0.73,
"step": 48900
},
{
"loss": 1.0649,
"grad_norm": 1.176352858543396,
"learning_rate": 5.3095431197620984e-05,
"epoch": 0.73,
"step": 48925
},
{
"loss": 1.1334,
"grad_norm": 1.448281168937683,
"learning_rate": 5.3020335826499656e-05,
"epoch": 0.74,
"step": 48950
},
{
"loss": 1.1764,
"grad_norm": 1.7525986433029175,
"learning_rate": 5.294524045537833e-05,
"epoch": 0.74,
"step": 48975
},
{
"loss": 1.1873,
"grad_norm": 1.6281651258468628,
"learning_rate": 5.287014508425701e-05,
"epoch": 0.74,
"step": 49000
},
{
"loss": 1.1259,
"grad_norm": 1.1979387998580933,
"learning_rate": 5.279504971313568e-05,
"epoch": 0.74,
"step": 49025
},
{
"loss": 1.084,
"grad_norm": 1.2359240055084229,
"learning_rate": 5.2719954342014365e-05,
"epoch": 0.74,
"step": 49050
},
{
"loss": 1.0872,
"grad_norm": 1.6398156881332397,
"learning_rate": 5.2644858970893044e-05,
"epoch": 0.74,
"step": 49075
},
{
"loss": 1.1564,
"grad_norm": 1.6317737102508545,
"learning_rate": 5.2569763599771716e-05,
"epoch": 0.74,
"step": 49100
},
{
"loss": 1.1798,
"grad_norm": 1.1270239353179932,
"learning_rate": 5.249466822865039e-05,
"epoch": 0.74,
"step": 49125
},
{
"loss": 1.1076,
"grad_norm": 1.504706621170044,
"learning_rate": 5.2419572857529067e-05,
"epoch": 0.74,
"step": 49150
},
{
"loss": 1.0761,
"grad_norm": 2.199673891067505,
"learning_rate": 5.234447748640774e-05,
"epoch": 0.74,
"step": 49175
},
{
"loss": 1.1395,
"grad_norm": 1.288529396057129,
"learning_rate": 5.226938211528641e-05,
"epoch": 0.74,
"step": 49200
},
{
"loss": 1.1776,
"grad_norm": 1.4537650346755981,
"learning_rate": 5.219428674416509e-05,
"epoch": 0.74,
"step": 49225
},
{
"loss": 1.1639,
"grad_norm": 2.028994560241699,
"learning_rate": 5.211919137304376e-05,
"epoch": 0.74,
"step": 49250
},
{
"loss": 1.0929,
"grad_norm": 1.8188201189041138,
"learning_rate": 5.204409600192245e-05,
"epoch": 0.74,
"step": 49275
},
{
"loss": 1.1054,
"grad_norm": 1.5506641864776611,
"learning_rate": 5.1969000630801126e-05,
"epoch": 0.74,
"step": 49300
},
{
"loss": 1.0699,
"grad_norm": 1.526999592781067,
"learning_rate": 5.18939052596798e-05,
"epoch": 0.74,
"step": 49325
},
{
"loss": 1.1666,
"grad_norm": 1.4270501136779785,
"learning_rate": 5.181880988855847e-05,
"epoch": 0.74,
"step": 49350
},
{
"loss": 1.11,
"grad_norm": 1.3699140548706055,
"learning_rate": 5.174371451743715e-05,
"epoch": 0.74,
"step": 49375
},
{
"loss": 1.1069,
"grad_norm": 1.468616008758545,
"learning_rate": 5.166861914631582e-05,
"epoch": 0.74,
"step": 49400
},
{
"loss": 1.0625,
"grad_norm": 2.158735513687134,
"learning_rate": 5.1593523775194493e-05,
"epoch": 0.74,
"step": 49425
},
{
"loss": 1.1034,
"grad_norm": 1.0673458576202393,
"learning_rate": 5.151842840407317e-05,
"epoch": 0.74,
"step": 49450
},
{
"loss": 1.1262,
"grad_norm": 1.5599156618118286,
"learning_rate": 5.1443333032951844e-05,
"epoch": 0.74,
"step": 49475
},
{
"loss": 1.1222,
"grad_norm": 1.7378026247024536,
"learning_rate": 5.136823766183053e-05,
"epoch": 0.74,
"step": 49500
},
{
"loss": 1.2029,
"grad_norm": 2.2808854579925537,
"learning_rate": 5.129314229070921e-05,
"epoch": 0.74,
"step": 49525
},
{
"loss": 1.1349,
"grad_norm": 1.3279706239700317,
"learning_rate": 5.121804691958788e-05,
"epoch": 0.74,
"step": 49550
},
{
"loss": 1.1291,
"grad_norm": 1.7178452014923096,
"learning_rate": 5.114295154846655e-05,
"epoch": 0.74,
"step": 49575
},
{
"loss": 1.1029,
"grad_norm": 1.7041009664535522,
"learning_rate": 5.106785617734523e-05,
"epoch": 0.74,
"step": 49600
},
{
"loss": 1.1401,
"grad_norm": 1.7986174821853638,
"learning_rate": 5.0992760806223904e-05,
"epoch": 0.75,
"step": 49625
},
{
"loss": 1.095,
"grad_norm": 1.9708353281021118,
"learning_rate": 5.0917665435102576e-05,
"epoch": 0.75,
"step": 49650
},
{
"loss": 1.0777,
"grad_norm": 1.675958275794983,
"learning_rate": 5.0842570063981255e-05,
"epoch": 0.75,
"step": 49675
},
{
"loss": 1.0473,
"grad_norm": 1.1292997598648071,
"learning_rate": 5.076747469285994e-05,
"epoch": 0.75,
"step": 49700
},
{
"loss": 1.1196,
"grad_norm": 1.3241393566131592,
"learning_rate": 5.069237932173861e-05,
"epoch": 0.75,
"step": 49725
},
{
"loss": 1.1062,
"grad_norm": 1.148818850517273,
"learning_rate": 5.061728395061729e-05,
"epoch": 0.75,
"step": 49750
},
{
"loss": 1.1366,
"grad_norm": 1.6072300672531128,
"learning_rate": 5.0542188579495964e-05,
"epoch": 0.75,
"step": 49775
},
{
"loss": 1.062,
"grad_norm": 2.6563072204589844,
"learning_rate": 5.0467093208374636e-05,
"epoch": 0.75,
"step": 49800
},
{
"loss": 1.1615,
"grad_norm": 1.5061039924621582,
"learning_rate": 5.0391997837253315e-05,
"epoch": 0.75,
"step": 49825
},
{
"loss": 1.1299,
"grad_norm": 1.6999189853668213,
"learning_rate": 5.031690246613199e-05,
"epoch": 0.75,
"step": 49850
},
{
"loss": 1.157,
"grad_norm": 1.8330657482147217,
"learning_rate": 5.0241807095010666e-05,
"epoch": 0.75,
"step": 49875
},
{
"loss": 1.1452,
"grad_norm": 1.6632230281829834,
"learning_rate": 5.016671172388934e-05,
"epoch": 0.75,
"step": 49900
},
{
"loss": 1.1767,
"grad_norm": 1.2450637817382812,
"learning_rate": 5.0091616352768023e-05,
"epoch": 0.75,
"step": 49925
},
{
"loss": 1.1398,
"grad_norm": 1.1371906995773315,
"learning_rate": 5.0016520981646695e-05,
"epoch": 0.75,
"step": 49950
},
{
"loss": 1.1173,
"grad_norm": 1.1480075120925903,
"learning_rate": 4.9941425610525374e-05,
"epoch": 0.75,
"step": 49975
},
{
"loss": 1.0499,
"grad_norm": 1.0953096151351929,
"learning_rate": 4.9866330239404046e-05,
"epoch": 0.75,
"step": 50000
},
{
"loss": 1.0824,
"grad_norm": 1.4675097465515137,
"learning_rate": 4.979123486828272e-05,
"epoch": 0.75,
"step": 50025
},
{
"loss": 1.1413,
"grad_norm": 1.9132678508758545,
"learning_rate": 4.97161394971614e-05,
"epoch": 0.75,
"step": 50050
},
{
"loss": 1.0562,
"grad_norm": 1.3277969360351562,
"learning_rate": 4.9641044126040076e-05,
"epoch": 0.75,
"step": 50075
},
{
"loss": 1.1071,
"grad_norm": 1.4362881183624268,
"learning_rate": 4.956594875491875e-05,
"epoch": 0.75,
"step": 50100
},
{
"loss": 1.1572,
"grad_norm": 0.8633365631103516,
"learning_rate": 4.949085338379743e-05,
"epoch": 0.75,
"step": 50125
},
{
"loss": 1.0881,
"grad_norm": 1.653272271156311,
"learning_rate": 4.94157580126761e-05,
"epoch": 0.75,
"step": 50150
},
{
"loss": 1.158,
"grad_norm": 2.0135273933410645,
"learning_rate": 4.934066264155477e-05,
"epoch": 0.75,
"step": 50175
},
{
"loss": 1.1024,
"grad_norm": 1.119586706161499,
"learning_rate": 4.926556727043346e-05,
"epoch": 0.75,
"step": 50200
},
{
"loss": 1.1258,
"grad_norm": 0.9510914087295532,
"learning_rate": 4.919047189931213e-05,
"epoch": 0.75,
"step": 50225
},
{
"loss": 1.2239,
"grad_norm": 1.510134220123291,
"learning_rate": 4.91153765281908e-05,
"epoch": 0.75,
"step": 50250
},
{
"loss": 1.0673,
"grad_norm": 1.0487359762191772,
"learning_rate": 4.904028115706948e-05,
"epoch": 0.76,
"step": 50275
},
{
"loss": 1.1268,
"grad_norm": 1.5456733703613281,
"learning_rate": 4.896518578594816e-05,
"epoch": 0.76,
"step": 50300
},
{
"loss": 1.2115,
"grad_norm": 1.761313557624817,
"learning_rate": 4.889009041482683e-05,
"epoch": 0.76,
"step": 50325
},
{
"loss": 1.1656,
"grad_norm": 1.7086901664733887,
"learning_rate": 4.881499504370551e-05,
"epoch": 0.76,
"step": 50350
},
{
"loss": 1.0727,
"grad_norm": 1.7276190519332886,
"learning_rate": 4.874290348742904e-05,
"epoch": 0.76,
"step": 50375
},
{
"loss": 1.0991,
"grad_norm": 1.3871339559555054,
"learning_rate": 4.866780811630771e-05,
"epoch": 0.76,
"step": 50400
},
{
"loss": 1.0951,
"grad_norm": 1.9214799404144287,
"learning_rate": 4.859271274518639e-05,
"epoch": 0.76,
"step": 50425
},
{
"loss": 1.1219,
"grad_norm": 1.5749818086624146,
"learning_rate": 4.851761737406507e-05,
"epoch": 0.76,
"step": 50450
},
{
"loss": 1.106,
"grad_norm": 4.110257625579834,
"learning_rate": 4.844252200294374e-05,
"epoch": 0.76,
"step": 50475
},
{
"loss": 1.0866,
"grad_norm": 1.6532156467437744,
"learning_rate": 4.836742663182241e-05,
"epoch": 0.76,
"step": 50500
},
{
"loss": 1.1183,
"grad_norm": 1.485727071762085,
"learning_rate": 4.829233126070109e-05,
"epoch": 0.76,
"step": 50525
},
{
"loss": 1.1405,
"grad_norm": 1.4788265228271484,
"learning_rate": 4.821723588957977e-05,
"epoch": 0.76,
"step": 50550
},
{
"loss": 1.1491,
"grad_norm": 2.762058973312378,
"learning_rate": 4.814214051845844e-05,
"epoch": 0.76,
"step": 50575
},
{
"loss": 1.1483,
"grad_norm": 1.7897312641143799,
"learning_rate": 4.806704514733712e-05,
"epoch": 0.76,
"step": 50600
},
{
"loss": 1.0947,
"grad_norm": 1.1446094512939453,
"learning_rate": 4.799194977621579e-05,
"epoch": 0.76,
"step": 50625
},
{
"loss": 1.1116,
"grad_norm": 0.997673749923706,
"learning_rate": 4.791685440509447e-05,
"epoch": 0.76,
"step": 50650
},
{
"loss": 1.1374,
"grad_norm": 1.243049144744873,
"learning_rate": 4.784175903397315e-05,
"epoch": 0.76,
"step": 50675
},
{
"loss": 1.1458,
"grad_norm": 1.5746535062789917,
"learning_rate": 4.776666366285182e-05,
"epoch": 0.76,
"step": 50700
},
{
"loss": 1.1191,
"grad_norm": 1.0133841037750244,
"learning_rate": 4.7691568291730495e-05,
"epoch": 0.76,
"step": 50725
},
{
"loss": 1.0886,
"grad_norm": 2.07098650932312,
"learning_rate": 4.761647292060918e-05,
"epoch": 0.76,
"step": 50750
},
{
"loss": 1.1666,
"grad_norm": 2.41629695892334,
"learning_rate": 4.754137754948785e-05,
"epoch": 0.76,
"step": 50775
},
{
"loss": 1.1094,
"grad_norm": 1.2683985233306885,
"learning_rate": 4.7466282178366525e-05,
"epoch": 0.76,
"step": 50800
},
{
"loss": 1.1979,
"grad_norm": 2.520268201828003,
"learning_rate": 4.7391186807245204e-05,
"epoch": 0.76,
"step": 50825
},
{
"loss": 1.124,
"grad_norm": 1.5505998134613037,
"learning_rate": 4.7316091436123876e-05,
"epoch": 0.76,
"step": 50850
},
{
"loss": 1.1447,
"grad_norm": 1.3630999326705933,
"learning_rate": 4.7240996065002555e-05,
"epoch": 0.76,
"step": 50875
},
{
"loss": 1.0995,
"grad_norm": 1.36336350440979,
"learning_rate": 4.7165900693881234e-05,
"epoch": 0.76,
"step": 50900
},
{
"loss": 1.0873,
"grad_norm": 1.852036952972412,
"learning_rate": 4.7090805322759906e-05,
"epoch": 0.76,
"step": 50925
},
{
"loss": 1.1525,
"grad_norm": 1.3926178216934204,
"learning_rate": 4.7015709951638585e-05,
"epoch": 0.77,
"step": 50950
},
{
"loss": 1.0773,
"grad_norm": 1.5449055433273315,
"learning_rate": 4.6940614580517263e-05,
"epoch": 0.77,
"step": 50975
},
{
"loss": 1.1175,
"grad_norm": 1.1615759134292603,
"learning_rate": 4.6865519209395936e-05,
"epoch": 0.77,
"step": 51000
},
{
"loss": 1.1354,
"grad_norm": 1.8657679557800293,
"learning_rate": 4.679042383827461e-05,
"epoch": 0.77,
"step": 51025
},
{
"loss": 1.0671,
"grad_norm": 1.0491373538970947,
"learning_rate": 4.6715328467153287e-05,
"epoch": 0.77,
"step": 51050
},
{
"loss": 1.0703,
"grad_norm": 1.1459797620773315,
"learning_rate": 4.6640233096031965e-05,
"epoch": 0.77,
"step": 51075
},
{
"loss": 1.0243,
"grad_norm": 2.31217098236084,
"learning_rate": 4.656513772491064e-05,
"epoch": 0.77,
"step": 51100
},
{
"loss": 1.1454,
"grad_norm": 1.3556299209594727,
"learning_rate": 4.6490042353789316e-05,
"epoch": 0.77,
"step": 51125
},
{
"loss": 1.0407,
"grad_norm": 1.560073971748352,
"learning_rate": 4.641494698266799e-05,
"epoch": 0.77,
"step": 51150
},
{
"loss": 1.1132,
"grad_norm": 2.1798226833343506,
"learning_rate": 4.633985161154667e-05,
"epoch": 0.77,
"step": 51175
},
{
"loss": 1.1708,
"grad_norm": 1.246620774269104,
"learning_rate": 4.6264756240425346e-05,
"epoch": 0.77,
"step": 51200
},
{
"loss": 1.1629,
"grad_norm": 1.2318778038024902,
"learning_rate": 4.618966086930402e-05,
"epoch": 0.77,
"step": 51225
},
{
"loss": 1.1289,
"grad_norm": 1.7169677019119263,
"learning_rate": 4.611456549818269e-05,
"epoch": 0.77,
"step": 51250
},
{
"loss": 1.1142,
"grad_norm": 1.4329315423965454,
"learning_rate": 4.603947012706137e-05,
"epoch": 0.77,
"step": 51275
},
{
"loss": 1.0928,
"grad_norm": 1.3384554386138916,
"learning_rate": 4.596437475594005e-05,
"epoch": 0.77,
"step": 51300
},
{
"loss": 1.0633,
"grad_norm": 1.2550382614135742,
"learning_rate": 4.588927938481872e-05,
"epoch": 0.77,
"step": 51325
},
{
"loss": 1.1229,
"grad_norm": 1.7870949506759644,
"learning_rate": 4.58141840136974e-05,
"epoch": 0.77,
"step": 51350
},
{
"loss": 1.0861,
"grad_norm": 1.3869023323059082,
"learning_rate": 4.573908864257607e-05,
"epoch": 0.77,
"step": 51375
},
{
"loss": 1.129,
"grad_norm": 1.477853536605835,
"learning_rate": 4.566399327145475e-05,
"epoch": 0.77,
"step": 51400
},
{
"loss": 1.1998,
"grad_norm": 1.8617823123931885,
"learning_rate": 4.558889790033343e-05,
"epoch": 0.77,
"step": 51425
},
{
"loss": 1.1041,
"grad_norm": 1.5415617227554321,
"learning_rate": 4.55138025292121e-05,
"epoch": 0.77,
"step": 51450
},
{
"loss": 1.0528,
"grad_norm": 1.7439303398132324,
"learning_rate": 4.543870715809077e-05,
"epoch": 0.77,
"step": 51475
},
{
"loss": 1.1482,
"grad_norm": 1.3494808673858643,
"learning_rate": 4.536361178696945e-05,
"epoch": 0.77,
"step": 51500
},
{
"loss": 1.1014,
"grad_norm": 2.855989456176758,
"learning_rate": 4.528851641584813e-05,
"epoch": 0.77,
"step": 51525
},
{
"loss": 1.1184,
"grad_norm": 1.9916918277740479,
"learning_rate": 4.52134210447268e-05,
"epoch": 0.77,
"step": 51550
},
{
"loss": 1.1653,
"grad_norm": 2.4080750942230225,
"learning_rate": 4.513832567360548e-05,
"epoch": 0.77,
"step": 51575
},
{
"loss": 1.0892,
"grad_norm": 2.9220423698425293,
"learning_rate": 4.5063230302484154e-05,
"epoch": 0.77,
"step": 51600
},
{
"loss": 1.1106,
"grad_norm": 1.840510368347168,
"learning_rate": 4.498813493136283e-05,
"epoch": 0.78,
"step": 51625
},
{
"loss": 1.1684,
"grad_norm": 1.8185040950775146,
"learning_rate": 4.491303956024151e-05,
"epoch": 0.78,
"step": 51650
},
{
"loss": 1.1476,
"grad_norm": 1.5126756429672241,
"learning_rate": 4.4837944189120184e-05,
"epoch": 0.78,
"step": 51675
},
{
"loss": 1.0972,
"grad_norm": 2.2125062942504883,
"learning_rate": 4.476284881799886e-05,
"epoch": 0.78,
"step": 51700
},
{
"loss": 1.1401,
"grad_norm": 2.3873279094696045,
"learning_rate": 4.4687753446877535e-05,
"epoch": 0.78,
"step": 51725
},
{
"loss": 1.0909,
"grad_norm": 1.7589930295944214,
"learning_rate": 4.4612658075756213e-05,
"epoch": 0.78,
"step": 51750
},
{
"loss": 1.1803,
"grad_norm": 1.4225094318389893,
"learning_rate": 4.4537562704634886e-05,
"epoch": 0.78,
"step": 51775
},
{
"loss": 1.1538,
"grad_norm": 1.1532394886016846,
"learning_rate": 4.4462467333513564e-05,
"epoch": 0.78,
"step": 51800
},
{
"loss": 1.1152,
"grad_norm": 1.6151081323623657,
"learning_rate": 4.4387371962392237e-05,
"epoch": 0.78,
"step": 51825
},
{
"loss": 1.1382,
"grad_norm": 2.63031005859375,
"learning_rate": 4.4312276591270915e-05,
"epoch": 0.78,
"step": 51850
},
{
"loss": 1.1601,
"grad_norm": 2.0375289916992188,
"learning_rate": 4.4237181220149594e-05,
"epoch": 0.78,
"step": 51875
},
{
"loss": 1.1179,
"grad_norm": 1.1906908750534058,
"learning_rate": 4.4162085849028266e-05,
"epoch": 0.78,
"step": 51900
},
{
"loss": 1.1114,
"grad_norm": 1.8762083053588867,
"learning_rate": 4.4086990477906945e-05,
"epoch": 0.78,
"step": 51925
},
{
"loss": 1.1274,
"grad_norm": 1.7392081022262573,
"learning_rate": 4.4011895106785624e-05,
"epoch": 0.78,
"step": 51950
},
{
"loss": 1.1117,
"grad_norm": 1.480962872505188,
"learning_rate": 4.3936799735664296e-05,
"epoch": 0.78,
"step": 51975
},
{
"loss": 1.1623,
"grad_norm": 1.1408778429031372,
"learning_rate": 4.386170436454297e-05,
"epoch": 0.78,
"step": 52000
},
{
"loss": 1.0937,
"grad_norm": 2.5744760036468506,
"learning_rate": 4.378660899342165e-05,
"epoch": 0.78,
"step": 52025
},
{
"loss": 1.1968,
"grad_norm": 1.890031337738037,
"learning_rate": 4.371151362230032e-05,
"epoch": 0.78,
"step": 52050
},
{
"loss": 1.1761,
"grad_norm": 1.9659225940704346,
"learning_rate": 4.3636418251179e-05,
"epoch": 0.78,
"step": 52075
},
{
"loss": 1.176,
"grad_norm": 1.4123088121414185,
"learning_rate": 4.356132288005768e-05,
"epoch": 0.78,
"step": 52100
},
{
"loss": 1.1493,
"grad_norm": 1.9861273765563965,
"learning_rate": 4.348622750893635e-05,
"epoch": 0.78,
"step": 52125
},
{
"loss": 1.1376,
"grad_norm": 1.3282297849655151,
"learning_rate": 4.341113213781503e-05,
"epoch": 0.78,
"step": 52150
},
{
"loss": 1.1337,
"grad_norm": 1.8567203283309937,
"learning_rate": 4.333603676669371e-05,
"epoch": 0.78,
"step": 52175
},
{
"loss": 1.1678,
"grad_norm": 1.387803077697754,
"learning_rate": 4.326094139557238e-05,
"epoch": 0.78,
"step": 52200
},
{
"loss": 1.1369,
"grad_norm": 1.4993411302566528,
"learning_rate": 4.318584602445105e-05,
"epoch": 0.78,
"step": 52225
},
{
"loss": 1.1072,
"grad_norm": 1.83002769947052,
"learning_rate": 4.311075065332973e-05,
"epoch": 0.78,
"step": 52250
},
{
"loss": 1.1275,
"grad_norm": 1.0453954935073853,
"learning_rate": 4.303565528220841e-05,
"epoch": 0.79,
"step": 52275
},
{
"loss": 1.0546,
"grad_norm": 1.6221436262130737,
"learning_rate": 4.296055991108708e-05,
"epoch": 0.79,
"step": 52300
},
{
"loss": 1.1474,
"grad_norm": 1.4290169477462769,
"learning_rate": 4.288546453996576e-05,
"epoch": 0.79,
"step": 52325
},
{
"loss": 1.0947,
"grad_norm": 2.136678695678711,
"learning_rate": 4.281036916884443e-05,
"epoch": 0.79,
"step": 52350
},
{
"loss": 1.1738,
"grad_norm": 1.6790881156921387,
"learning_rate": 4.273527379772311e-05,
"epoch": 0.79,
"step": 52375
},
{
"loss": 1.1474,
"grad_norm": 1.1431602239608765,
"learning_rate": 4.266017842660179e-05,
"epoch": 0.79,
"step": 52400
},
{
"loss": 1.1478,
"grad_norm": 1.6592998504638672,
"learning_rate": 4.258508305548046e-05,
"epoch": 0.79,
"step": 52425
},
{
"loss": 1.0866,
"grad_norm": 3.1507890224456787,
"learning_rate": 4.250998768435914e-05,
"epoch": 0.79,
"step": 52450
},
{
"loss": 1.1061,
"grad_norm": 2.271561861038208,
"learning_rate": 4.243489231323781e-05,
"epoch": 0.79,
"step": 52475
},
{
"loss": 1.1853,
"grad_norm": 1.3959341049194336,
"learning_rate": 4.235979694211649e-05,
"epoch": 0.79,
"step": 52500
},
{
"loss": 1.1685,
"grad_norm": 1.9828035831451416,
"learning_rate": 4.2284701570995164e-05,
"epoch": 0.79,
"step": 52525
},
{
"loss": 1.1353,
"grad_norm": 1.348754644393921,
"learning_rate": 4.220960619987384e-05,
"epoch": 0.79,
"step": 52550
},
{
"loss": 1.0879,
"grad_norm": 2.036592483520508,
"learning_rate": 4.2134510828752514e-05,
"epoch": 0.79,
"step": 52575
},
{
"loss": 1.1763,
"grad_norm": 2.1805171966552734,
"learning_rate": 4.205941545763119e-05,
"epoch": 0.79,
"step": 52600
},
{
"loss": 1.0777,
"grad_norm": 1.3825914859771729,
"learning_rate": 4.198432008650987e-05,
"epoch": 0.79,
"step": 52625
},
{
"loss": 1.1068,
"grad_norm": 1.282179832458496,
"learning_rate": 4.1909224715388544e-05,
"epoch": 0.79,
"step": 52650
},
{
"loss": 1.1387,
"grad_norm": 1.4758460521697998,
"learning_rate": 4.183412934426722e-05,
"epoch": 0.79,
"step": 52675
},
{
"loss": 1.1992,
"grad_norm": 1.5327672958374023,
"learning_rate": 4.1759033973145895e-05,
"epoch": 0.79,
"step": 52700
},
{
"loss": 1.1051,
"grad_norm": 1.300261378288269,
"learning_rate": 4.1683938602024574e-05,
"epoch": 0.79,
"step": 52725
},
{
"loss": 1.1114,
"grad_norm": 1.1575740575790405,
"learning_rate": 4.1608843230903246e-05,
"epoch": 0.79,
"step": 52750
},
{
"loss": 1.1132,
"grad_norm": 1.5386431217193604,
"learning_rate": 4.1533747859781925e-05,
"epoch": 0.79,
"step": 52775
},
{
"loss": 1.1619,
"grad_norm": 1.5786181688308716,
"learning_rate": 4.14586524886606e-05,
"epoch": 0.79,
"step": 52800
},
{
"loss": 1.1353,
"grad_norm": 2.4933083057403564,
"learning_rate": 4.1383557117539276e-05,
"epoch": 0.79,
"step": 52825
},
{
"loss": 1.1693,
"grad_norm": 2.640106439590454,
"learning_rate": 4.1308461746417955e-05,
"epoch": 0.79,
"step": 52850
},
{
"loss": 1.1121,
"grad_norm": 1.0569820404052734,
"learning_rate": 4.123336637529663e-05,
"epoch": 0.79,
"step": 52875
},
{
"loss": 1.0855,
"grad_norm": 1.2210026979446411,
"learning_rate": 4.1158271004175306e-05,
"epoch": 0.79,
"step": 52900
},
{
"loss": 1.1511,
"grad_norm": 1.5878472328186035,
"learning_rate": 4.1083175633053985e-05,
"epoch": 0.79,
"step": 52925
},
{
"loss": 1.1387,
"grad_norm": 1.8040430545806885,
"learning_rate": 4.100808026193266e-05,
"epoch": 0.8,
"step": 52950
},
{
"loss": 1.1351,
"grad_norm": 1.5732437372207642,
"learning_rate": 4.093298489081133e-05,
"epoch": 0.8,
"step": 52975
},
{
"loss": 1.1317,
"grad_norm": 1.9610670804977417,
"learning_rate": 4.085788951969001e-05,
"epoch": 0.8,
"step": 53000
},
{
"loss": 1.1201,
"grad_norm": 1.1342912912368774,
"learning_rate": 4.078279414856868e-05,
"epoch": 0.8,
"step": 53025
},
{
"loss": 1.1302,
"grad_norm": 1.7887520790100098,
"learning_rate": 4.070769877744736e-05,
"epoch": 0.8,
"step": 53050
},
{
"loss": 1.1894,
"grad_norm": 2.0694801807403564,
"learning_rate": 4.063260340632604e-05,
"epoch": 0.8,
"step": 53075
},
{
"loss": 1.106,
"grad_norm": 1.582124948501587,
"learning_rate": 4.055750803520471e-05,
"epoch": 0.8,
"step": 53100
},
{
"loss": 1.1535,
"grad_norm": 0.8474487066268921,
"learning_rate": 4.048241266408339e-05,
"epoch": 0.8,
"step": 53125
},
{
"loss": 1.1005,
"grad_norm": 1.3499822616577148,
"learning_rate": 4.041032110780692e-05,
"epoch": 0.8,
"step": 53150
},
{
"loss": 1.1252,
"grad_norm": 3.777379274368286,
"learning_rate": 4.0335225736685596e-05,
"epoch": 0.8,
"step": 53175
},
{
"loss": 1.0519,
"grad_norm": 1.1216979026794434,
"learning_rate": 4.026013036556427e-05,
"epoch": 0.8,
"step": 53200
},
{
"loss": 1.1814,
"grad_norm": 1.710165023803711,
"learning_rate": 4.018503499444295e-05,
"epoch": 0.8,
"step": 53225
},
{
"loss": 1.1011,
"grad_norm": 1.4310054779052734,
"learning_rate": 4.010993962332162e-05,
"epoch": 0.8,
"step": 53250
},
{
"loss": 1.1253,
"grad_norm": 1.3393102884292603,
"learning_rate": 4.00348442522003e-05,
"epoch": 0.8,
"step": 53275
},
{
"loss": 1.2024,
"grad_norm": 0.9497338533401489,
"learning_rate": 3.995974888107897e-05,
"epoch": 0.8,
"step": 53300
},
{
"loss": 1.1295,
"grad_norm": 1.777761697769165,
"learning_rate": 3.988465350995765e-05,
"epoch": 0.8,
"step": 53325
},
{
"loss": 1.0795,
"grad_norm": 1.3849236965179443,
"learning_rate": 3.980955813883632e-05,
"epoch": 0.8,
"step": 53350
},
{
"loss": 1.0983,
"grad_norm": 1.8323969841003418,
"learning_rate": 3.9734462767715e-05,
"epoch": 0.8,
"step": 53375
},
{
"loss": 1.1308,
"grad_norm": 1.5328776836395264,
"learning_rate": 3.965936739659368e-05,
"epoch": 0.8,
"step": 53400
},
{
"loss": 1.1389,
"grad_norm": 1.2152605056762695,
"learning_rate": 3.958427202547235e-05,
"epoch": 0.8,
"step": 53425
},
{
"loss": 1.1156,
"grad_norm": 1.0716402530670166,
"learning_rate": 3.950917665435103e-05,
"epoch": 0.8,
"step": 53450
},
{
"loss": 1.0923,
"grad_norm": 1.3493958711624146,
"learning_rate": 3.94340812832297e-05,
"epoch": 0.8,
"step": 53475
},
{
"loss": 1.0859,
"grad_norm": 1.8882994651794434,
"learning_rate": 3.935898591210838e-05,
"epoch": 0.8,
"step": 53500
},
{
"loss": 1.0888,
"grad_norm": 1.1161054372787476,
"learning_rate": 3.928389054098706e-05,
"epoch": 0.8,
"step": 53525
},
{
"loss": 1.1155,
"grad_norm": 1.3479957580566406,
"learning_rate": 3.920879516986573e-05,
"epoch": 0.8,
"step": 53550
},
{
"loss": 1.135,
"grad_norm": 1.2609208822250366,
"learning_rate": 3.9133699798744404e-05,
"epoch": 0.8,
"step": 53575
},
{
"loss": 1.0736,
"grad_norm": 1.8553820848464966,
"learning_rate": 3.905860442762308e-05,
"epoch": 0.81,
"step": 53600
},
{
"loss": 1.091,
"grad_norm": 1.7198560237884521,
"learning_rate": 3.898350905650176e-05,
"epoch": 0.81,
"step": 53625
},
{
"loss": 1.1926,
"grad_norm": 1.0929125547409058,
"learning_rate": 3.8908413685380433e-05,
"epoch": 0.81,
"step": 53650
},
{
"loss": 1.1161,
"grad_norm": 1.4651769399642944,
"learning_rate": 3.883331831425911e-05,
"epoch": 0.81,
"step": 53675
},
{
"loss": 1.1704,
"grad_norm": 2.1259841918945312,
"learning_rate": 3.8758222943137784e-05,
"epoch": 0.81,
"step": 53700
},
{
"loss": 1.1024,
"grad_norm": 1.6856151819229126,
"learning_rate": 3.868312757201646e-05,
"epoch": 0.81,
"step": 53725
},
{
"loss": 1.1725,
"grad_norm": 1.9457602500915527,
"learning_rate": 3.860803220089514e-05,
"epoch": 0.81,
"step": 53750
},
{
"loss": 1.1107,
"grad_norm": 1.1278740167617798,
"learning_rate": 3.8532936829773814e-05,
"epoch": 0.81,
"step": 53775
},
{
"loss": 1.0866,
"grad_norm": 1.989402174949646,
"learning_rate": 3.8457841458652486e-05,
"epoch": 0.81,
"step": 53800
},
{
"loss": 1.09,
"grad_norm": 2.686849355697632,
"learning_rate": 3.8382746087531165e-05,
"epoch": 0.81,
"step": 53825
},
{
"loss": 1.0897,
"grad_norm": 2.199162244796753,
"learning_rate": 3.8307650716409844e-05,
"epoch": 0.81,
"step": 53850
},
{
"loss": 1.1071,
"grad_norm": 0.9810658693313599,
"learning_rate": 3.8232555345288516e-05,
"epoch": 0.81,
"step": 53875
},
{
"loss": 1.1032,
"grad_norm": 1.1024478673934937,
"learning_rate": 3.8157459974167195e-05,
"epoch": 0.81,
"step": 53900
},
{
"loss": 1.0515,
"grad_norm": 1.8465054035186768,
"learning_rate": 3.8082364603045874e-05,
"epoch": 0.81,
"step": 53925
},
{
"loss": 1.0606,
"grad_norm": 0.9782311320304871,
"learning_rate": 3.8007269231924546e-05,
"epoch": 0.81,
"step": 53950
},
{
"loss": 1.1054,
"grad_norm": 1.4638195037841797,
"learning_rate": 3.7932173860803225e-05,
"epoch": 0.81,
"step": 53975
},
{
"loss": 1.1584,
"grad_norm": 2.066131114959717,
"learning_rate": 3.78570784896819e-05,
"epoch": 0.81,
"step": 54000
},
{
"loss": 1.1254,
"grad_norm": 1.6561390161514282,
"learning_rate": 3.778198311856057e-05,
"epoch": 0.81,
"step": 54025
},
{
"loss": 1.1213,
"grad_norm": 1.693764090538025,
"learning_rate": 3.770688774743925e-05,
"epoch": 0.81,
"step": 54050
},
{
"loss": 1.0754,
"grad_norm": 1.5490859746932983,
"learning_rate": 3.763179237631793e-05,
"epoch": 0.81,
"step": 54075
},
{
"loss": 1.1704,
"grad_norm": 1.7576946020126343,
"learning_rate": 3.75566970051966e-05,
"epoch": 0.81,
"step": 54100
},
{
"loss": 1.1473,
"grad_norm": 1.8954912424087524,
"learning_rate": 3.748160163407528e-05,
"epoch": 0.81,
"step": 54125
},
{
"loss": 1.1391,
"grad_norm": 2.0620269775390625,
"learning_rate": 3.7406506262953957e-05,
"epoch": 0.81,
"step": 54150
},
{
"loss": 1.1656,
"grad_norm": 1.3634029626846313,
"learning_rate": 3.733141089183263e-05,
"epoch": 0.81,
"step": 54175
},
{
"loss": 1.1246,
"grad_norm": 1.4298192262649536,
"learning_rate": 3.725631552071131e-05,
"epoch": 0.81,
"step": 54200
},
{
"loss": 1.1954,
"grad_norm": 1.760016679763794,
"learning_rate": 3.718122014958998e-05,
"epoch": 0.81,
"step": 54225
},
{
"loss": 1.1634,
"grad_norm": 1.460942268371582,
"learning_rate": 3.710612477846866e-05,
"epoch": 0.81,
"step": 54250
},
{
"loss": 1.0715,
"grad_norm": 0.9953238368034363,
"learning_rate": 3.703102940734734e-05,
"epoch": 0.82,
"step": 54275
},
{
"loss": 1.1588,
"grad_norm": 1.3567308187484741,
"learning_rate": 3.695593403622601e-05,
"epoch": 0.82,
"step": 54300
},
{
"loss": 1.1086,
"grad_norm": 1.4111878871917725,
"learning_rate": 3.688083866510468e-05,
"epoch": 0.82,
"step": 54325
},
{
"loss": 1.161,
"grad_norm": 1.7530951499938965,
"learning_rate": 3.680574329398336e-05,
"epoch": 0.82,
"step": 54350
},
{
"loss": 1.1003,
"grad_norm": 1.5563117265701294,
"learning_rate": 3.673064792286204e-05,
"epoch": 0.82,
"step": 54375
},
{
"loss": 1.163,
"grad_norm": 1.0254262685775757,
"learning_rate": 3.665555255174071e-05,
"epoch": 0.82,
"step": 54400
},
{
"loss": 1.1384,
"grad_norm": 2.547769784927368,
"learning_rate": 3.658045718061939e-05,
"epoch": 0.82,
"step": 54425
},
{
"loss": 1.107,
"grad_norm": 1.0468461513519287,
"learning_rate": 3.650536180949806e-05,
"epoch": 0.82,
"step": 54450
},
{
"loss": 1.1431,
"grad_norm": 1.1783130168914795,
"learning_rate": 3.643026643837674e-05,
"epoch": 0.82,
"step": 54475
},
{
"loss": 1.1398,
"grad_norm": 1.3592449426651,
"learning_rate": 3.635517106725542e-05,
"epoch": 0.82,
"step": 54500
},
{
"loss": 1.0942,
"grad_norm": 1.687246322631836,
"learning_rate": 3.628007569613409e-05,
"epoch": 0.82,
"step": 54525
},
{
"loss": 1.0334,
"grad_norm": 1.396044373512268,
"learning_rate": 3.6204980325012764e-05,
"epoch": 0.82,
"step": 54550
},
{
"loss": 1.0524,
"grad_norm": 2.7093379497528076,
"learning_rate": 3.612988495389144e-05,
"epoch": 0.82,
"step": 54575
},
{
"loss": 1.1404,
"grad_norm": 1.8118054866790771,
"learning_rate": 3.605478958277012e-05,
"epoch": 0.82,
"step": 54600
},
{
"loss": 1.0971,
"grad_norm": 1.241155982017517,
"learning_rate": 3.5979694211648794e-05,
"epoch": 0.82,
"step": 54625
},
{
"loss": 1.0957,
"grad_norm": 1.7652029991149902,
"learning_rate": 3.590459884052747e-05,
"epoch": 0.82,
"step": 54650
},
{
"loss": 1.1159,
"grad_norm": 2.1295764446258545,
"learning_rate": 3.5829503469406145e-05,
"epoch": 0.82,
"step": 54675
},
{
"loss": 1.1132,
"grad_norm": 1.4499530792236328,
"learning_rate": 3.5754408098284824e-05,
"epoch": 0.82,
"step": 54700
},
{
"loss": 1.1356,
"grad_norm": 1.3263312578201294,
"learning_rate": 3.56793127271635e-05,
"epoch": 0.82,
"step": 54725
},
{
"loss": 1.0752,
"grad_norm": 1.5875509977340698,
"learning_rate": 3.5604217356042175e-05,
"epoch": 0.82,
"step": 54750
},
{
"loss": 1.1495,
"grad_norm": 1.2747198343276978,
"learning_rate": 3.552912198492085e-05,
"epoch": 0.82,
"step": 54775
},
{
"loss": 1.0794,
"grad_norm": 3.039198637008667,
"learning_rate": 3.5454026613799526e-05,
"epoch": 0.82,
"step": 54800
},
{
"loss": 1.0468,
"grad_norm": 1.2731279134750366,
"learning_rate": 3.5378931242678205e-05,
"epoch": 0.82,
"step": 54825
},
{
"loss": 1.0554,
"grad_norm": 2.2691845893859863,
"learning_rate": 3.530383587155688e-05,
"epoch": 0.82,
"step": 54850
},
{
"loss": 1.1324,
"grad_norm": 1.3395280838012695,
"learning_rate": 3.5228740500435556e-05,
"epoch": 0.82,
"step": 54875
},
{
"loss": 1.036,
"grad_norm": 1.1065006256103516,
"learning_rate": 3.5153645129314235e-05,
"epoch": 0.82,
"step": 54900
},
{
"loss": 1.05,
"grad_norm": 1.7697545289993286,
"learning_rate": 3.507854975819291e-05,
"epoch": 0.82,
"step": 54925
},
{
"loss": 1.1362,
"grad_norm": 1.0653153657913208,
"learning_rate": 3.5003454387071585e-05,
"epoch": 0.83,
"step": 54950
},
{
"loss": 1.0546,
"grad_norm": 1.5754921436309814,
"learning_rate": 3.492835901595026e-05,
"epoch": 0.83,
"step": 54975
},
{
"loss": 0.9954,
"grad_norm": 1.166438341140747,
"learning_rate": 3.485326364482893e-05,
"epoch": 0.83,
"step": 55000
},
{
"loss": 1.0758,
"grad_norm": 1.2350513935089111,
"learning_rate": 3.4778168273707615e-05,
"epoch": 0.83,
"step": 55025
},
{
"loss": 1.0664,
"grad_norm": 1.1638765335083008,
"learning_rate": 3.470307290258629e-05,
"epoch": 0.83,
"step": 55050
},
{
"loss": 1.1558,
"grad_norm": 1.0206239223480225,
"learning_rate": 3.462797753146496e-05,
"epoch": 0.83,
"step": 55075
},
{
"loss": 1.1298,
"grad_norm": 1.1066503524780273,
"learning_rate": 3.455288216034364e-05,
"epoch": 0.83,
"step": 55100
},
{
"loss": 1.114,
"grad_norm": 1.242811918258667,
"learning_rate": 3.447778678922232e-05,
"epoch": 0.83,
"step": 55125
},
{
"loss": 1.0795,
"grad_norm": 0.8851810097694397,
"learning_rate": 3.440269141810099e-05,
"epoch": 0.83,
"step": 55150
},
{
"loss": 1.1065,
"grad_norm": 1.7904212474822998,
"learning_rate": 3.432759604697967e-05,
"epoch": 0.83,
"step": 55175
},
{
"loss": 1.013,
"grad_norm": 1.2688441276550293,
"learning_rate": 3.425250067585834e-05,
"epoch": 0.83,
"step": 55200
},
{
"loss": 1.1373,
"grad_norm": 1.1899800300598145,
"learning_rate": 3.417740530473702e-05,
"epoch": 0.83,
"step": 55225
},
{
"loss": 1.1135,
"grad_norm": 1.9331419467926025,
"learning_rate": 3.41023099336157e-05,
"epoch": 0.83,
"step": 55250
},
{
"loss": 1.0993,
"grad_norm": 1.6777852773666382,
"learning_rate": 3.402721456249437e-05,
"epoch": 0.83,
"step": 55275
},
{
"loss": 1.0528,
"grad_norm": 1.469103217124939,
"learning_rate": 3.395211919137304e-05,
"epoch": 0.83,
"step": 55300
},
{
"loss": 1.1432,
"grad_norm": 1.6784319877624512,
"learning_rate": 3.387702382025172e-05,
"epoch": 0.83,
"step": 55325
},
{
"loss": 1.0986,
"grad_norm": 1.4824069738388062,
"learning_rate": 3.38019284491304e-05,
"epoch": 0.83,
"step": 55350
},
{
"loss": 1.1599,
"grad_norm": 1.1543418169021606,
"learning_rate": 3.372683307800907e-05,
"epoch": 0.83,
"step": 55375
},
{
"loss": 1.0552,
"grad_norm": 2.3624870777130127,
"learning_rate": 3.365173770688775e-05,
"epoch": 0.83,
"step": 55400
},
{
"loss": 1.1395,
"grad_norm": 1.5518018007278442,
"learning_rate": 3.357664233576642e-05,
"epoch": 0.83,
"step": 55425
},
{
"loss": 1.1172,
"grad_norm": 1.820732593536377,
"learning_rate": 3.35015469646451e-05,
"epoch": 0.83,
"step": 55450
},
{
"loss": 1.1409,
"grad_norm": 1.5625290870666504,
"learning_rate": 3.342645159352378e-05,
"epoch": 0.83,
"step": 55475
},
{
"loss": 1.1088,
"grad_norm": 1.6013075113296509,
"learning_rate": 3.335135622240245e-05,
"epoch": 0.83,
"step": 55500
},
{
"loss": 1.1256,
"grad_norm": 1.2917579412460327,
"learning_rate": 3.3276260851281125e-05,
"epoch": 0.83,
"step": 55525
},
{
"loss": 1.1221,
"grad_norm": 2.3396828174591064,
"learning_rate": 3.3201165480159804e-05,
"epoch": 0.83,
"step": 55550
},
{
"loss": 1.1166,
"grad_norm": 0.9828691482543945,
"learning_rate": 3.312607010903848e-05,
"epoch": 0.83,
"step": 55575
},
{
"loss": 1.0945,
"grad_norm": 1.5887751579284668,
"learning_rate": 3.3050974737917155e-05,
"epoch": 0.84,
"step": 55600
},
{
"loss": 1.0711,
"grad_norm": 1.1289055347442627,
"learning_rate": 3.2975879366795834e-05,
"epoch": 0.84,
"step": 55625
},
{
"loss": 1.0644,
"grad_norm": 1.4812935590744019,
"learning_rate": 3.2900783995674506e-05,
"epoch": 0.84,
"step": 55650
},
{
"loss": 1.102,
"grad_norm": 1.3823920488357544,
"learning_rate": 3.2825688624553185e-05,
"epoch": 0.84,
"step": 55675
},
{
"loss": 1.1011,
"grad_norm": 1.7511022090911865,
"learning_rate": 3.2750593253431863e-05,
"epoch": 0.84,
"step": 55700
},
{
"loss": 1.183,
"grad_norm": 1.9509655237197876,
"learning_rate": 3.2675497882310536e-05,
"epoch": 0.84,
"step": 55725
},
{
"loss": 1.0795,
"grad_norm": 0.9376107454299927,
"learning_rate": 3.260040251118921e-05,
"epoch": 0.84,
"step": 55750
},
{
"loss": 1.1001,
"grad_norm": 1.123742938041687,
"learning_rate": 3.252530714006789e-05,
"epoch": 0.84,
"step": 55775
},
{
"loss": 1.1292,
"grad_norm": 2.6337194442749023,
"learning_rate": 3.2450211768946565e-05,
"epoch": 0.84,
"step": 55800
},
{
"loss": 1.0631,
"grad_norm": 1.1294831037521362,
"learning_rate": 3.237511639782524e-05,
"epoch": 0.84,
"step": 55825
},
{
"loss": 1.1375,
"grad_norm": 1.2995752096176147,
"learning_rate": 3.2300021026703916e-05,
"epoch": 0.84,
"step": 55850
},
{
"loss": 1.1308,
"grad_norm": 4.79863166809082,
"learning_rate": 3.2224925655582595e-05,
"epoch": 0.84,
"step": 55875
},
{
"loss": 1.1005,
"grad_norm": 1.031606674194336,
"learning_rate": 3.214983028446127e-05,
"epoch": 0.84,
"step": 55900
},
{
"loss": 1.011,
"grad_norm": 1.0204112529754639,
"learning_rate": 3.2074734913339946e-05,
"epoch": 0.84,
"step": 55925
},
{
"loss": 1.1464,
"grad_norm": 1.0727862119674683,
"learning_rate": 3.199963954221862e-05,
"epoch": 0.84,
"step": 55950
},
{
"loss": 1.1208,
"grad_norm": 1.1318399906158447,
"learning_rate": 3.192454417109729e-05,
"epoch": 0.84,
"step": 55975
},
{
"loss": 1.1243,
"grad_norm": 1.1461580991744995,
"learning_rate": 3.1849448799975976e-05,
"epoch": 0.84,
"step": 56000
},
{
"loss": 1.1367,
"grad_norm": 1.8615853786468506,
"learning_rate": 3.177435342885465e-05,
"epoch": 0.84,
"step": 56025
},
{
"loss": 1.1489,
"grad_norm": 1.4172084331512451,
"learning_rate": 3.169925805773332e-05,
"epoch": 0.84,
"step": 56050
},
{
"loss": 1.1035,
"grad_norm": 1.299654245376587,
"learning_rate": 3.1624162686612e-05,
"epoch": 0.84,
"step": 56075
},
{
"loss": 1.1718,
"grad_norm": 1.4679521322250366,
"learning_rate": 3.154906731549068e-05,
"epoch": 0.84,
"step": 56100
},
{
"loss": 1.11,
"grad_norm": 2.0875778198242188,
"learning_rate": 3.147397194436935e-05,
"epoch": 0.84,
"step": 56125
},
{
"loss": 1.1843,
"grad_norm": 0.9587807655334473,
"learning_rate": 3.139887657324803e-05,
"epoch": 0.84,
"step": 56150
},
{
"loss": 1.1362,
"grad_norm": 2.253598690032959,
"learning_rate": 3.13237812021267e-05,
"epoch": 0.84,
"step": 56175
},
{
"loss": 1.0859,
"grad_norm": 2.3193461894989014,
"learning_rate": 3.124868583100538e-05,
"epoch": 0.84,
"step": 56200
},
{
"loss": 1.1347,
"grad_norm": 1.5526835918426514,
"learning_rate": 3.117359045988406e-05,
"epoch": 0.84,
"step": 56225
},
{
"loss": 1.1155,
"grad_norm": 1.7318717241287231,
"learning_rate": 3.109849508876273e-05,
"epoch": 0.84,
"step": 56250
},
{
"loss": 1.1701,
"grad_norm": 0.9637216925621033,
"learning_rate": 3.10233997176414e-05,
"epoch": 0.85,
"step": 56275
},
{
"loss": 1.0931,
"grad_norm": 1.6898939609527588,
"learning_rate": 3.094830434652008e-05,
"epoch": 0.85,
"step": 56300
},
{
"loss": 1.1269,
"grad_norm": 1.4921387434005737,
"learning_rate": 3.087320897539876e-05,
"epoch": 0.85,
"step": 56325
},
{
"loss": 1.098,
"grad_norm": 1.4121395349502563,
"learning_rate": 3.079811360427743e-05,
"epoch": 0.85,
"step": 56350
},
{
"loss": 1.1518,
"grad_norm": 1.144964575767517,
"learning_rate": 3.072301823315611e-05,
"epoch": 0.85,
"step": 56375
},
{
"loss": 1.1257,
"grad_norm": 1.5664513111114502,
"learning_rate": 3.0647922862034784e-05,
"epoch": 0.85,
"step": 56400
},
{
"loss": 1.12,
"grad_norm": 1.3426709175109863,
"learning_rate": 3.057282749091346e-05,
"epoch": 0.85,
"step": 56425
},
{
"loss": 1.0914,
"grad_norm": 1.4727264642715454,
"learning_rate": 3.0497732119792138e-05,
"epoch": 0.85,
"step": 56450
},
{
"loss": 1.2281,
"grad_norm": 3.242955207824707,
"learning_rate": 3.0422636748670813e-05,
"epoch": 0.85,
"step": 56475
},
{
"loss": 1.1146,
"grad_norm": 1.4398702383041382,
"learning_rate": 3.034754137754949e-05,
"epoch": 0.85,
"step": 56500
},
{
"loss": 1.1887,
"grad_norm": 1.533019781112671,
"learning_rate": 3.0272446006428168e-05,
"epoch": 0.85,
"step": 56525
},
{
"loss": 1.0688,
"grad_norm": 1.3019578456878662,
"learning_rate": 3.0197350635306843e-05,
"epoch": 0.85,
"step": 56550
},
{
"loss": 1.0937,
"grad_norm": 3.774083375930786,
"learning_rate": 3.012225526418552e-05,
"epoch": 0.85,
"step": 56575
},
{
"loss": 1.0797,
"grad_norm": 1.535316824913025,
"learning_rate": 3.004715989306419e-05,
"epoch": 0.85,
"step": 56600
},
{
"loss": 1.1406,
"grad_norm": 3.624013662338257,
"learning_rate": 2.9972064521942866e-05,
"epoch": 0.85,
"step": 56625
},
{
"loss": 1.0986,
"grad_norm": 2.2039883136749268,
"learning_rate": 2.9896969150821545e-05,
"epoch": 0.85,
"step": 56650
},
{
"loss": 1.1318,
"grad_norm": 1.0096391439437866,
"learning_rate": 2.982187377970022e-05,
"epoch": 0.85,
"step": 56675
},
{
"loss": 1.1223,
"grad_norm": 1.8855111598968506,
"learning_rate": 2.9746778408578896e-05,
"epoch": 0.85,
"step": 56700
},
{
"loss": 1.0381,
"grad_norm": 1.7449959516525269,
"learning_rate": 2.967168303745757e-05,
"epoch": 0.85,
"step": 56725
},
{
"loss": 1.1104,
"grad_norm": 1.9731217622756958,
"learning_rate": 2.959658766633625e-05,
"epoch": 0.85,
"step": 56750
},
{
"loss": 1.1399,
"grad_norm": 1.2654249668121338,
"learning_rate": 2.9521492295214926e-05,
"epoch": 0.85,
"step": 56775
},
{
"loss": 1.0705,
"grad_norm": 2.2102811336517334,
"learning_rate": 2.94463969240936e-05,
"epoch": 0.85,
"step": 56800
},
{
"loss": 1.1073,
"grad_norm": 1.3911298513412476,
"learning_rate": 2.9371301552972274e-05,
"epoch": 0.85,
"step": 56825
},
{
"loss": 1.132,
"grad_norm": 1.2584389448165894,
"learning_rate": 2.9296206181850956e-05,
"epoch": 0.85,
"step": 56850
},
{
"loss": 1.0874,
"grad_norm": 2.038180351257324,
"learning_rate": 2.9221110810729628e-05,
"epoch": 0.85,
"step": 56875
},
{
"loss": 1.1324,
"grad_norm": 2.4636690616607666,
"learning_rate": 2.9146015439608303e-05,
"epoch": 0.85,
"step": 56900
},
{
"loss": 1.1063,
"grad_norm": 1.3092725276947021,
"learning_rate": 2.907092006848698e-05,
"epoch": 0.85,
"step": 56925
},
{
"loss": 1.0472,
"grad_norm": 1.3636903762817383,
"learning_rate": 2.8995824697365654e-05,
"epoch": 0.86,
"step": 56950
},
{
"loss": 1.1155,
"grad_norm": 1.915647029876709,
"learning_rate": 2.8920729326244333e-05,
"epoch": 0.86,
"step": 56975
},
{
"loss": 1.141,
"grad_norm": 1.1012380123138428,
"learning_rate": 2.884563395512301e-05,
"epoch": 0.86,
"step": 57000
},
{
"loss": 1.1004,
"grad_norm": 1.4259084463119507,
"learning_rate": 2.8770538584001684e-05,
"epoch": 0.86,
"step": 57025
},
{
"loss": 1.0786,
"grad_norm": 1.411359190940857,
"learning_rate": 2.8695443212880356e-05,
"epoch": 0.86,
"step": 57050
},
{
"loss": 1.1746,
"grad_norm": 1.6087229251861572,
"learning_rate": 2.862034784175904e-05,
"epoch": 0.86,
"step": 57075
},
{
"loss": 1.1506,
"grad_norm": 1.2207622528076172,
"learning_rate": 2.854525247063771e-05,
"epoch": 0.86,
"step": 57100
},
{
"loss": 1.177,
"grad_norm": 1.542277455329895,
"learning_rate": 2.8470157099516386e-05,
"epoch": 0.86,
"step": 57125
},
{
"loss": 1.1666,
"grad_norm": 1.7982580661773682,
"learning_rate": 2.839506172839506e-05,
"epoch": 0.86,
"step": 57150
},
{
"loss": 1.1294,
"grad_norm": 1.2220053672790527,
"learning_rate": 2.831996635727374e-05,
"epoch": 0.86,
"step": 57175
},
{
"loss": 1.1134,
"grad_norm": 2.173220157623291,
"learning_rate": 2.8244870986152416e-05,
"epoch": 0.86,
"step": 57200
},
{
"loss": 1.1039,
"grad_norm": 1.5563225746154785,
"learning_rate": 2.816977561503109e-05,
"epoch": 0.86,
"step": 57225
},
{
"loss": 1.1248,
"grad_norm": 1.477427363395691,
"learning_rate": 2.8094680243909767e-05,
"epoch": 0.86,
"step": 57250
},
{
"loss": 1.106,
"grad_norm": 1.374334692955017,
"learning_rate": 2.801958487278844e-05,
"epoch": 0.86,
"step": 57275
},
{
"loss": 1.1058,
"grad_norm": 1.437056541442871,
"learning_rate": 2.794448950166712e-05,
"epoch": 0.86,
"step": 57300
},
{
"loss": 1.1278,
"grad_norm": 1.2822084426879883,
"learning_rate": 2.7869394130545797e-05,
"epoch": 0.86,
"step": 57325
},
{
"loss": 1.1003,
"grad_norm": 0.9716039299964905,
"learning_rate": 2.779429875942447e-05,
"epoch": 0.86,
"step": 57350
},
{
"loss": 1.0853,
"grad_norm": 1.4873470067977905,
"learning_rate": 2.7719203388303144e-05,
"epoch": 0.86,
"step": 57375
},
{
"loss": 1.0961,
"grad_norm": 1.8672046661376953,
"learning_rate": 2.7644108017181823e-05,
"epoch": 0.86,
"step": 57400
},
{
"loss": 1.1141,
"grad_norm": 1.4299520254135132,
"learning_rate": 2.75690126460605e-05,
"epoch": 0.86,
"step": 57425
},
{
"loss": 1.1041,
"grad_norm": 2.402892827987671,
"learning_rate": 2.7493917274939174e-05,
"epoch": 0.86,
"step": 57450
},
{
"loss": 1.1231,
"grad_norm": 1.2294812202453613,
"learning_rate": 2.741882190381785e-05,
"epoch": 0.86,
"step": 57475
},
{
"loss": 1.127,
"grad_norm": 1.7184091806411743,
"learning_rate": 2.734372653269653e-05,
"epoch": 0.86,
"step": 57500
},
{
"loss": 1.1925,
"grad_norm": 1.3573827743530273,
"learning_rate": 2.7268631161575204e-05,
"epoch": 0.86,
"step": 57525
},
{
"loss": 1.1482,
"grad_norm": 1.7570611238479614,
"learning_rate": 2.719353579045388e-05,
"epoch": 0.86,
"step": 57550
},
{
"loss": 1.1096,
"grad_norm": 1.4989982843399048,
"learning_rate": 2.711844041933255e-05,
"epoch": 0.86,
"step": 57575
},
{
"loss": 1.0666,
"grad_norm": 1.59767746925354,
"learning_rate": 2.7043345048211227e-05,
"epoch": 0.87,
"step": 57600
},
{
"loss": 1.125,
"grad_norm": 1.3916709423065186,
"learning_rate": 2.6968249677089906e-05,
"epoch": 0.87,
"step": 57625
},
{
"loss": 1.038,
"grad_norm": 1.6316527128219604,
"learning_rate": 2.689315430596858e-05,
"epoch": 0.87,
"step": 57650
},
{
"loss": 1.0536,
"grad_norm": 1.4137283563613892,
"learning_rate": 2.6818058934847257e-05,
"epoch": 0.87,
"step": 57675
},
{
"loss": 1.0917,
"grad_norm": 1.9997875690460205,
"learning_rate": 2.6742963563725932e-05,
"epoch": 0.87,
"step": 57700
},
{
"loss": 1.0194,
"grad_norm": 1.3425413370132446,
"learning_rate": 2.666786819260461e-05,
"epoch": 0.87,
"step": 57725
},
{
"loss": 1.0669,
"grad_norm": 0.9919766783714294,
"learning_rate": 2.6592772821483287e-05,
"epoch": 0.87,
"step": 57750
},
{
"loss": 1.15,
"grad_norm": 1.5039972066879272,
"learning_rate": 2.6517677450361962e-05,
"epoch": 0.87,
"step": 57775
},
{
"loss": 1.0898,
"grad_norm": 2.429229974746704,
"learning_rate": 2.6442582079240634e-05,
"epoch": 0.87,
"step": 57800
},
{
"loss": 1.1132,
"grad_norm": 1.3402752876281738,
"learning_rate": 2.6367486708119316e-05,
"epoch": 0.87,
"step": 57825
},
{
"loss": 1.0645,
"grad_norm": 1.041297435760498,
"learning_rate": 2.629239133699799e-05,
"epoch": 0.87,
"step": 57850
},
{
"loss": 1.105,
"grad_norm": 1.0299885272979736,
"learning_rate": 2.6217295965876664e-05,
"epoch": 0.87,
"step": 57875
},
{
"loss": 1.1492,
"grad_norm": 1.3811683654785156,
"learning_rate": 2.614220059475534e-05,
"epoch": 0.87,
"step": 57900
},
{
"loss": 1.1381,
"grad_norm": 1.3449524641036987,
"learning_rate": 2.6067105223634015e-05,
"epoch": 0.87,
"step": 57925
},
{
"loss": 1.1077,
"grad_norm": 1.1133577823638916,
"learning_rate": 2.5995013667357543e-05,
"epoch": 0.87,
"step": 57950
},
{
"loss": 1.1155,
"grad_norm": 1.2379744052886963,
"learning_rate": 2.5919918296236222e-05,
"epoch": 0.87,
"step": 57975
},
{
"loss": 1.0956,
"grad_norm": 1.4516429901123047,
"learning_rate": 2.5844822925114898e-05,
"epoch": 0.87,
"step": 58000
},
{
"loss": 1.157,
"grad_norm": 2.1705074310302734,
"learning_rate": 2.5769727553993573e-05,
"epoch": 0.87,
"step": 58025
},
{
"loss": 1.1116,
"grad_norm": 1.8582936525344849,
"learning_rate": 2.5694632182872245e-05,
"epoch": 0.87,
"step": 58050
},
{
"loss": 1.0901,
"grad_norm": 1.2407045364379883,
"learning_rate": 2.5619536811750928e-05,
"epoch": 0.87,
"step": 58075
},
{
"loss": 1.0979,
"grad_norm": 1.4852651357650757,
"learning_rate": 2.5544441440629603e-05,
"epoch": 0.87,
"step": 58100
},
{
"loss": 1.1655,
"grad_norm": 1.1345808506011963,
"learning_rate": 2.5469346069508275e-05,
"epoch": 0.87,
"step": 58125
},
{
"loss": 1.1008,
"grad_norm": 1.741289734840393,
"learning_rate": 2.539425069838695e-05,
"epoch": 0.87,
"step": 58150
},
{
"loss": 1.1831,
"grad_norm": 1.26760995388031,
"learning_rate": 2.532215914211048e-05,
"epoch": 0.87,
"step": 58175
},
{
"loss": 1.0911,
"grad_norm": 1.9289544820785522,
"learning_rate": 2.524706377098916e-05,
"epoch": 0.87,
"step": 58200
},
{
"loss": 1.0442,
"grad_norm": 1.9321314096450806,
"learning_rate": 2.5171968399867834e-05,
"epoch": 0.87,
"step": 58225
},
{
"loss": 1.1097,
"grad_norm": 1.1289350986480713,
"learning_rate": 2.509687302874651e-05,
"epoch": 0.87,
"step": 58250
},
{
"loss": 1.0505,
"grad_norm": 1.3914735317230225,
"learning_rate": 2.5021777657625184e-05,
"epoch": 0.88,
"step": 58275
},
{
"loss": 1.0804,
"grad_norm": 1.2914477586746216,
"learning_rate": 2.494668228650386e-05,
"epoch": 0.88,
"step": 58300
},
{
"loss": 1.064,
"grad_norm": 1.1069772243499756,
"learning_rate": 2.4871586915382535e-05,
"epoch": 0.88,
"step": 58325
},
{
"loss": 1.0294,
"grad_norm": 1.7709311246871948,
"learning_rate": 2.4796491544261214e-05,
"epoch": 0.88,
"step": 58350
},
{
"loss": 1.0995,
"grad_norm": 1.3731812238693237,
"learning_rate": 2.4721396173139886e-05,
"epoch": 0.88,
"step": 58375
},
{
"loss": 1.0574,
"grad_norm": 1.3423503637313843,
"learning_rate": 2.4646300802018565e-05,
"epoch": 0.88,
"step": 58400
},
{
"loss": 1.1297,
"grad_norm": 1.5664671659469604,
"learning_rate": 2.457120543089724e-05,
"epoch": 0.88,
"step": 58425
},
{
"loss": 1.1686,
"grad_norm": 1.7989689111709595,
"learning_rate": 2.4496110059775916e-05,
"epoch": 0.88,
"step": 58450
},
{
"loss": 1.063,
"grad_norm": 2.50423526763916,
"learning_rate": 2.442101468865459e-05,
"epoch": 0.88,
"step": 58475
},
{
"loss": 1.1528,
"grad_norm": 2.081894636154175,
"learning_rate": 2.434591931753327e-05,
"epoch": 0.88,
"step": 58500
},
{
"loss": 1.0845,
"grad_norm": 1.7260534763336182,
"learning_rate": 2.4270823946411943e-05,
"epoch": 0.88,
"step": 58525
},
{
"loss": 1.1555,
"grad_norm": 1.6785259246826172,
"learning_rate": 2.419572857529062e-05,
"epoch": 0.88,
"step": 58550
},
{
"loss": 1.1315,
"grad_norm": 1.5150628089904785,
"learning_rate": 2.4120633204169297e-05,
"epoch": 0.88,
"step": 58575
},
{
"loss": 1.1247,
"grad_norm": 1.3232154846191406,
"learning_rate": 2.4045537833047972e-05,
"epoch": 0.88,
"step": 58600
},
{
"loss": 1.1638,
"grad_norm": 1.435685157775879,
"learning_rate": 2.3970442461926648e-05,
"epoch": 0.88,
"step": 58625
},
{
"loss": 1.1466,
"grad_norm": 1.562098741531372,
"learning_rate": 2.3895347090805323e-05,
"epoch": 0.88,
"step": 58650
},
{
"loss": 1.0823,
"grad_norm": 1.6774852275848389,
"learning_rate": 2.3820251719684e-05,
"epoch": 0.88,
"step": 58675
},
{
"loss": 1.1152,
"grad_norm": 2.8691372871398926,
"learning_rate": 2.3745156348562674e-05,
"epoch": 0.88,
"step": 58700
},
{
"loss": 1.0675,
"grad_norm": 1.2133371829986572,
"learning_rate": 2.3670060977441353e-05,
"epoch": 0.88,
"step": 58725
},
{
"loss": 1.1358,
"grad_norm": 1.280999779701233,
"learning_rate": 2.3594965606320025e-05,
"epoch": 0.88,
"step": 58750
},
{
"loss": 1.073,
"grad_norm": 2.144066333770752,
"learning_rate": 2.3519870235198704e-05,
"epoch": 0.88,
"step": 58775
},
{
"loss": 1.1203,
"grad_norm": 1.4125479459762573,
"learning_rate": 2.344477486407738e-05,
"epoch": 0.88,
"step": 58800
},
{
"loss": 1.1467,
"grad_norm": 1.402156949043274,
"learning_rate": 2.3369679492956055e-05,
"epoch": 0.88,
"step": 58825
},
{
"loss": 1.1691,
"grad_norm": 1.3000797033309937,
"learning_rate": 2.329458412183473e-05,
"epoch": 0.88,
"step": 58850
},
{
"loss": 1.1105,
"grad_norm": 1.9694422483444214,
"learning_rate": 2.321948875071341e-05,
"epoch": 0.88,
"step": 58875
},
{
"loss": 1.098,
"grad_norm": 1.4404619932174683,
"learning_rate": 2.314439337959208e-05,
"epoch": 0.88,
"step": 58900
},
{
"loss": 1.0979,
"grad_norm": 2.1054556369781494,
"learning_rate": 2.3069298008470757e-05,
"epoch": 0.88,
"step": 58925
},
{
"loss": 1.1015,
"grad_norm": 1.2658005952835083,
"learning_rate": 2.2994202637349436e-05,
"epoch": 0.89,
"step": 58950
},
{
"loss": 1.1349,
"grad_norm": 1.4039870500564575,
"learning_rate": 2.291910726622811e-05,
"epoch": 0.89,
"step": 58975
},
{
"loss": 1.0923,
"grad_norm": 1.5480154752731323,
"learning_rate": 2.2844011895106787e-05,
"epoch": 0.89,
"step": 59000
},
{
"loss": 1.1652,
"grad_norm": 1.9261832237243652,
"learning_rate": 2.2768916523985462e-05,
"epoch": 0.89,
"step": 59025
},
{
"loss": 1.0539,
"grad_norm": 1.2835638523101807,
"learning_rate": 2.2693821152864138e-05,
"epoch": 0.89,
"step": 59050
},
{
"loss": 1.0895,
"grad_norm": 1.7522798776626587,
"learning_rate": 2.2618725781742813e-05,
"epoch": 0.89,
"step": 59075
},
{
"loss": 1.0988,
"grad_norm": 1.2995007038116455,
"learning_rate": 2.2543630410621492e-05,
"epoch": 0.89,
"step": 59100
},
{
"loss": 1.0471,
"grad_norm": 1.5621485710144043,
"learning_rate": 2.2468535039500164e-05,
"epoch": 0.89,
"step": 59125
},
{
"loss": 1.1299,
"grad_norm": 3.184175968170166,
"learning_rate": 2.2393439668378843e-05,
"epoch": 0.89,
"step": 59150
},
{
"loss": 1.201,
"grad_norm": 1.7400543689727783,
"learning_rate": 2.231834429725752e-05,
"epoch": 0.89,
"step": 59175
},
{
"loss": 1.148,
"grad_norm": 1.880234956741333,
"learning_rate": 2.2243248926136194e-05,
"epoch": 0.89,
"step": 59200
},
{
"loss": 1.0385,
"grad_norm": 1.2461950778961182,
"learning_rate": 2.216815355501487e-05,
"epoch": 0.89,
"step": 59225
},
{
"loss": 1.1849,
"grad_norm": 2.8920862674713135,
"learning_rate": 2.2093058183893545e-05,
"epoch": 0.89,
"step": 59250
},
{
"loss": 1.0813,
"grad_norm": 1.3439332246780396,
"learning_rate": 2.201796281277222e-05,
"epoch": 0.89,
"step": 59275
},
{
"loss": 1.0912,
"grad_norm": 1.2441843748092651,
"learning_rate": 2.1942867441650896e-05,
"epoch": 0.89,
"step": 59300
},
{
"loss": 1.121,
"grad_norm": 1.5612194538116455,
"learning_rate": 2.1867772070529575e-05,
"epoch": 0.89,
"step": 59325
},
{
"loss": 1.1187,
"grad_norm": 2.292187213897705,
"learning_rate": 2.179267669940825e-05,
"epoch": 0.89,
"step": 59350
},
{
"loss": 1.098,
"grad_norm": 1.3217053413391113,
"learning_rate": 2.1717581328286926e-05,
"epoch": 0.89,
"step": 59375
},
{
"loss": 1.07,
"grad_norm": 2.108124017715454,
"learning_rate": 2.16424859571656e-05,
"epoch": 0.89,
"step": 59400
},
{
"loss": 1.1967,
"grad_norm": 1.461854100227356,
"learning_rate": 2.1567390586044277e-05,
"epoch": 0.89,
"step": 59425
},
{
"loss": 1.0829,
"grad_norm": 2.4140448570251465,
"learning_rate": 2.1492295214922952e-05,
"epoch": 0.89,
"step": 59450
},
{
"loss": 1.084,
"grad_norm": 1.3833210468292236,
"learning_rate": 2.141719984380163e-05,
"epoch": 0.89,
"step": 59475
},
{
"loss": 1.0312,
"grad_norm": 1.463707447052002,
"learning_rate": 2.1342104472680303e-05,
"epoch": 0.89,
"step": 59500
},
{
"loss": 1.0947,
"grad_norm": 1.0634888410568237,
"learning_rate": 2.1267009101558982e-05,
"epoch": 0.89,
"step": 59525
},
{
"loss": 1.1448,
"grad_norm": 1.6115715503692627,
"learning_rate": 2.1191913730437658e-05,
"epoch": 0.89,
"step": 59550
},
{
"loss": 1.1279,
"grad_norm": 2.023573398590088,
"learning_rate": 2.1116818359316333e-05,
"epoch": 0.89,
"step": 59575
},
{
"loss": 1.0725,
"grad_norm": 1.3353021144866943,
"learning_rate": 2.104172298819501e-05,
"epoch": 0.9,
"step": 59600
},
{
"loss": 1.1597,
"grad_norm": 2.067376136779785,
"learning_rate": 2.0966627617073684e-05,
"epoch": 0.9,
"step": 59625
},
{
"loss": 1.0665,
"grad_norm": 1.4394888877868652,
"learning_rate": 2.089153224595236e-05,
"epoch": 0.9,
"step": 59650
},
{
"loss": 1.1449,
"grad_norm": 1.1642546653747559,
"learning_rate": 2.0816436874831035e-05,
"epoch": 0.9,
"step": 59675
},
{
"loss": 1.0973,
"grad_norm": 1.6994637250900269,
"learning_rate": 2.0741341503709714e-05,
"epoch": 0.9,
"step": 59700
},
{
"loss": 1.1076,
"grad_norm": 2.0998518466949463,
"learning_rate": 2.066624613258839e-05,
"epoch": 0.9,
"step": 59725
},
{
"loss": 1.1332,
"grad_norm": 1.608519196510315,
"learning_rate": 2.0591150761467065e-05,
"epoch": 0.9,
"step": 59750
},
{
"loss": 1.0639,
"grad_norm": 5.120492935180664,
"learning_rate": 2.051605539034574e-05,
"epoch": 0.9,
"step": 59775
},
{
"loss": 1.1295,
"grad_norm": 1.2980087995529175,
"learning_rate": 2.0440960019224416e-05,
"epoch": 0.9,
"step": 59800
},
{
"loss": 1.0998,
"grad_norm": 1.518433928489685,
"learning_rate": 2.036586464810309e-05,
"epoch": 0.9,
"step": 59825
},
{
"loss": 1.0911,
"grad_norm": 1.1310094594955444,
"learning_rate": 2.0290769276981767e-05,
"epoch": 0.9,
"step": 59850
},
{
"loss": 1.1567,
"grad_norm": 0.9931915998458862,
"learning_rate": 2.0215673905860442e-05,
"epoch": 0.9,
"step": 59875
},
{
"loss": 1.1115,
"grad_norm": 2.011012077331543,
"learning_rate": 2.0140578534739118e-05,
"epoch": 0.9,
"step": 59900
},
{
"loss": 1.119,
"grad_norm": 1.6782035827636719,
"learning_rate": 2.0065483163617797e-05,
"epoch": 0.9,
"step": 59925
},
{
"loss": 1.0836,
"grad_norm": 1.6010968685150146,
"learning_rate": 1.9990387792496472e-05,
"epoch": 0.9,
"step": 59950
},
{
"loss": 1.0171,
"grad_norm": 1.8368406295776367,
"learning_rate": 1.9915292421375148e-05,
"epoch": 0.9,
"step": 59975
},
{
"loss": 1.1612,
"grad_norm": 1.6433417797088623,
"learning_rate": 1.9840197050253823e-05,
"epoch": 0.9,
"step": 60000
},
{
"loss": 1.1051,
"grad_norm": 1.0590778589248657,
"learning_rate": 1.97651016791325e-05,
"epoch": 0.9,
"step": 60025
},
{
"loss": 1.1467,
"grad_norm": 2.4711523056030273,
"learning_rate": 1.9690006308011174e-05,
"epoch": 0.9,
"step": 60050
},
{
"loss": 1.1249,
"grad_norm": 1.817872166633606,
"learning_rate": 1.9614910936889853e-05,
"epoch": 0.9,
"step": 60075
},
{
"loss": 1.1509,
"grad_norm": 1.9354240894317627,
"learning_rate": 1.953981556576853e-05,
"epoch": 0.9,
"step": 60100
},
{
"loss": 1.1021,
"grad_norm": 2.382876396179199,
"learning_rate": 1.9464720194647204e-05,
"epoch": 0.9,
"step": 60125
},
{
"loss": 1.0521,
"grad_norm": 1.6886651515960693,
"learning_rate": 1.938962482352588e-05,
"epoch": 0.9,
"step": 60150
},
{
"loss": 1.0828,
"grad_norm": 1.0179933309555054,
"learning_rate": 1.9314529452404555e-05,
"epoch": 0.9,
"step": 60175
},
{
"loss": 1.0767,
"grad_norm": 1.041438102722168,
"learning_rate": 1.923943408128323e-05,
"epoch": 0.9,
"step": 60200
},
{
"loss": 1.0739,
"grad_norm": 1.1048403978347778,
"learning_rate": 1.9164338710161906e-05,
"epoch": 0.9,
"step": 60225
},
{
"loss": 1.1025,
"grad_norm": 1.113214373588562,
"learning_rate": 1.908924333904058e-05,
"epoch": 0.9,
"step": 60250
},
{
"loss": 1.1523,
"grad_norm": 2.7546420097351074,
"learning_rate": 1.9014147967919257e-05,
"epoch": 0.91,
"step": 60275
},
{
"loss": 1.043,
"grad_norm": 1.3055835962295532,
"learning_rate": 1.8939052596797936e-05,
"epoch": 0.91,
"step": 60300
},
{
"loss": 1.0768,
"grad_norm": 1.9900767803192139,
"learning_rate": 1.8866961040521464e-05,
"epoch": 0.91,
"step": 60325
},
{
"loss": 1.1571,
"grad_norm": 1.605908751487732,
"learning_rate": 1.879186566940014e-05,
"epoch": 0.91,
"step": 60350
},
{
"loss": 1.1399,
"grad_norm": 1.9245578050613403,
"learning_rate": 1.8716770298278815e-05,
"epoch": 0.91,
"step": 60375
},
{
"loss": 1.0959,
"grad_norm": 1.897222638130188,
"learning_rate": 1.864167492715749e-05,
"epoch": 0.91,
"step": 60400
},
{
"loss": 1.1132,
"grad_norm": 2.4311060905456543,
"learning_rate": 1.856657955603617e-05,
"epoch": 0.91,
"step": 60425
},
{
"loss": 1.1102,
"grad_norm": 1.378459095954895,
"learning_rate": 1.849148418491484e-05,
"epoch": 0.91,
"step": 60450
},
{
"loss": 1.0765,
"grad_norm": 2.5490572452545166,
"learning_rate": 1.841638881379352e-05,
"epoch": 0.91,
"step": 60475
},
{
"loss": 1.1314,
"grad_norm": 1.2700508832931519,
"learning_rate": 1.8341293442672196e-05,
"epoch": 0.91,
"step": 60500
},
{
"loss": 1.1452,
"grad_norm": 1.636888027191162,
"learning_rate": 1.826619807155087e-05,
"epoch": 0.91,
"step": 60525
},
{
"loss": 1.0367,
"grad_norm": 1.4893200397491455,
"learning_rate": 1.8191102700429547e-05,
"epoch": 0.91,
"step": 60550
},
{
"loss": 1.0222,
"grad_norm": 0.9594138860702515,
"learning_rate": 1.8116007329308222e-05,
"epoch": 0.91,
"step": 60575
},
{
"loss": 1.115,
"grad_norm": 2.1128294467926025,
"learning_rate": 1.8040911958186898e-05,
"epoch": 0.91,
"step": 60600
},
{
"loss": 1.1439,
"grad_norm": 1.5294193029403687,
"learning_rate": 1.7965816587065573e-05,
"epoch": 0.91,
"step": 60625
},
{
"loss": 1.121,
"grad_norm": 1.5716066360473633,
"learning_rate": 1.7890721215944252e-05,
"epoch": 0.91,
"step": 60650
},
{
"loss": 1.0839,
"grad_norm": 1.2781248092651367,
"learning_rate": 1.7815625844822924e-05,
"epoch": 0.91,
"step": 60675
},
{
"loss": 1.1282,
"grad_norm": 1.1077611446380615,
"learning_rate": 1.7740530473701603e-05,
"epoch": 0.91,
"step": 60700
},
{
"loss": 1.1352,
"grad_norm": 1.1450996398925781,
"learning_rate": 1.766543510258028e-05,
"epoch": 0.91,
"step": 60725
},
{
"loss": 1.0536,
"grad_norm": 1.3349822759628296,
"learning_rate": 1.7590339731458954e-05,
"epoch": 0.91,
"step": 60750
},
{
"loss": 1.13,
"grad_norm": 1.6306883096694946,
"learning_rate": 1.751524436033763e-05,
"epoch": 0.91,
"step": 60775
},
{
"loss": 1.1103,
"grad_norm": 1.7724149227142334,
"learning_rate": 1.744014898921631e-05,
"epoch": 0.91,
"step": 60800
},
{
"loss": 1.0242,
"grad_norm": 1.5324548482894897,
"learning_rate": 1.736505361809498e-05,
"epoch": 0.91,
"step": 60825
},
{
"loss": 1.0809,
"grad_norm": 0.8965089917182922,
"learning_rate": 1.728995824697366e-05,
"epoch": 0.91,
"step": 60850
},
{
"loss": 1.1012,
"grad_norm": 1.551774263381958,
"learning_rate": 1.7214862875852335e-05,
"epoch": 0.91,
"step": 60875
},
{
"loss": 1.1264,
"grad_norm": 1.070957064628601,
"learning_rate": 1.7139767504731007e-05,
"epoch": 0.91,
"step": 60900
},
{
"loss": 1.1098,
"grad_norm": 1.5298128128051758,
"learning_rate": 1.7064672133609686e-05,
"epoch": 0.92,
"step": 60925
},
{
"loss": 1.089,
"grad_norm": 1.6738872528076172,
"learning_rate": 1.698957676248836e-05,
"epoch": 0.92,
"step": 60950
},
{
"loss": 1.023,
"grad_norm": 1.2859163284301758,
"learning_rate": 1.6914481391367037e-05,
"epoch": 0.92,
"step": 60975
},
{
"loss": 1.227,
"grad_norm": 1.181386113166809,
"learning_rate": 1.6839386020245712e-05,
"epoch": 0.92,
"step": 61000
},
{
"loss": 1.1462,
"grad_norm": 1.9334174394607544,
"learning_rate": 1.676429064912439e-05,
"epoch": 0.92,
"step": 61025
},
{
"loss": 1.0915,
"grad_norm": 1.1935040950775146,
"learning_rate": 1.6689195278003063e-05,
"epoch": 0.92,
"step": 61050
},
{
"loss": 1.029,
"grad_norm": 1.1765645742416382,
"learning_rate": 1.6614099906881742e-05,
"epoch": 0.92,
"step": 61075
},
{
"loss": 1.15,
"grad_norm": 2.0349085330963135,
"learning_rate": 1.6539004535760417e-05,
"epoch": 0.92,
"step": 61100
},
{
"loss": 1.1565,
"grad_norm": 2.330791711807251,
"learning_rate": 1.6463909164639093e-05,
"epoch": 0.92,
"step": 61125
},
{
"loss": 1.0929,
"grad_norm": 1.6865901947021484,
"learning_rate": 1.638881379351777e-05,
"epoch": 0.92,
"step": 61150
},
{
"loss": 1.199,
"grad_norm": 2.1335840225219727,
"learning_rate": 1.6313718422396444e-05,
"epoch": 0.92,
"step": 61175
},
{
"loss": 1.1875,
"grad_norm": 0.9578272104263306,
"learning_rate": 1.623862305127512e-05,
"epoch": 0.92,
"step": 61200
},
{
"loss": 1.117,
"grad_norm": 1.564257025718689,
"learning_rate": 1.6163527680153795e-05,
"epoch": 0.92,
"step": 61225
},
{
"loss": 1.1286,
"grad_norm": 2.076204538345337,
"learning_rate": 1.6088432309032474e-05,
"epoch": 0.92,
"step": 61250
},
{
"loss": 1.1491,
"grad_norm": 1.695163607597351,
"learning_rate": 1.6013336937911146e-05,
"epoch": 0.92,
"step": 61275
},
{
"loss": 1.1108,
"grad_norm": 1.0644354820251465,
"learning_rate": 1.5938241566789825e-05,
"epoch": 0.92,
"step": 61300
},
{
"loss": 1.0865,
"grad_norm": 1.13369619846344,
"learning_rate": 1.58631461956685e-05,
"epoch": 0.92,
"step": 61325
},
{
"loss": 1.09,
"grad_norm": 0.8873293995857239,
"learning_rate": 1.5788050824547176e-05,
"epoch": 0.92,
"step": 61350
},
{
"loss": 1.1836,
"grad_norm": 1.4285056591033936,
"learning_rate": 1.571295545342585e-05,
"epoch": 0.92,
"step": 61375
},
{
"loss": 1.1597,
"grad_norm": 0.8853715658187866,
"learning_rate": 1.563786008230453e-05,
"epoch": 0.92,
"step": 61400
},
{
"loss": 1.1089,
"grad_norm": 1.2858846187591553,
"learning_rate": 1.5562764711183202e-05,
"epoch": 0.92,
"step": 61425
},
{
"loss": 1.1041,
"grad_norm": 1.2523924112319946,
"learning_rate": 1.548766934006188e-05,
"epoch": 0.92,
"step": 61450
},
{
"loss": 1.1523,
"grad_norm": 1.9986999034881592,
"learning_rate": 1.5412573968940556e-05,
"epoch": 0.92,
"step": 61475
},
{
"loss": 1.0461,
"grad_norm": 2.06295108795166,
"learning_rate": 1.533747859781923e-05,
"epoch": 0.92,
"step": 61500
},
{
"loss": 1.0988,
"grad_norm": 0.9304774403572083,
"learning_rate": 1.5262383226697907e-05,
"epoch": 0.92,
"step": 61525
},
{
"loss": 1.1569,
"grad_norm": 1.6813061237335205,
"learning_rate": 1.5187287855576581e-05,
"epoch": 0.92,
"step": 61550
},
{
"loss": 1.1434,
"grad_norm": 1.320822834968567,
"learning_rate": 1.511219248445526e-05,
"epoch": 0.92,
"step": 61575
},
{
"loss": 1.0182,
"grad_norm": 1.2629307508468628,
"learning_rate": 1.5037097113333934e-05,
"epoch": 0.93,
"step": 61600
},
{
"loss": 1.0842,
"grad_norm": 2.044494390487671,
"learning_rate": 1.4962001742212611e-05,
"epoch": 0.93,
"step": 61625
},
{
"loss": 1.0935,
"grad_norm": 1.1815024614334106,
"learning_rate": 1.4886906371091286e-05,
"epoch": 0.93,
"step": 61650
},
{
"loss": 1.1862,
"grad_norm": 1.5776236057281494,
"learning_rate": 1.4811810999969964e-05,
"epoch": 0.93,
"step": 61675
},
{
"loss": 1.0616,
"grad_norm": 2.1838979721069336,
"learning_rate": 1.4736715628848637e-05,
"epoch": 0.93,
"step": 61700
},
{
"loss": 1.1325,
"grad_norm": 1.7168885469436646,
"learning_rate": 1.4661620257727315e-05,
"epoch": 0.93,
"step": 61725
},
{
"loss": 1.1015,
"grad_norm": 1.0847703218460083,
"learning_rate": 1.458652488660599e-05,
"epoch": 0.93,
"step": 61750
},
{
"loss": 1.0838,
"grad_norm": 1.3423173427581787,
"learning_rate": 1.4511429515484667e-05,
"epoch": 0.93,
"step": 61775
},
{
"loss": 1.1676,
"grad_norm": 1.405914306640625,
"learning_rate": 1.4436334144363343e-05,
"epoch": 0.93,
"step": 61800
},
{
"loss": 1.1124,
"grad_norm": 1.570953607559204,
"learning_rate": 1.4361238773242017e-05,
"epoch": 0.93,
"step": 61825
},
{
"loss": 1.1462,
"grad_norm": 2.7975118160247803,
"learning_rate": 1.4286143402120694e-05,
"epoch": 0.93,
"step": 61850
},
{
"loss": 1.117,
"grad_norm": 3.7135069370269775,
"learning_rate": 1.421104803099937e-05,
"epoch": 0.93,
"step": 61875
},
{
"loss": 1.1057,
"grad_norm": 1.8608477115631104,
"learning_rate": 1.4135952659878046e-05,
"epoch": 0.93,
"step": 61900
},
{
"loss": 1.0607,
"grad_norm": 1.1181379556655884,
"learning_rate": 1.406085728875672e-05,
"epoch": 0.93,
"step": 61925
},
{
"loss": 1.1118,
"grad_norm": 1.5385795831680298,
"learning_rate": 1.3985761917635399e-05,
"epoch": 0.93,
"step": 61950
},
{
"loss": 1.1192,
"grad_norm": 1.4369099140167236,
"learning_rate": 1.3910666546514073e-05,
"epoch": 0.93,
"step": 61975
},
{
"loss": 1.1036,
"grad_norm": 1.2244880199432373,
"learning_rate": 1.383557117539275e-05,
"epoch": 0.93,
"step": 62000
},
{
"loss": 1.0913,
"grad_norm": 1.3832460641860962,
"learning_rate": 1.3760475804271425e-05,
"epoch": 0.93,
"step": 62025
},
{
"loss": 1.1684,
"grad_norm": 0.9169008135795593,
"learning_rate": 1.3685380433150103e-05,
"epoch": 0.93,
"step": 62050
},
{
"loss": 1.1886,
"grad_norm": 2.110548973083496,
"learning_rate": 1.3610285062028776e-05,
"epoch": 0.93,
"step": 62075
},
{
"loss": 1.1511,
"grad_norm": 1.203637957572937,
"learning_rate": 1.3535189690907454e-05,
"epoch": 0.93,
"step": 62100
},
{
"loss": 1.1469,
"grad_norm": 1.3341647386550903,
"learning_rate": 1.3460094319786129e-05,
"epoch": 0.93,
"step": 62125
},
{
"loss": 1.1022,
"grad_norm": 1.5815610885620117,
"learning_rate": 1.3384998948664803e-05,
"epoch": 0.93,
"step": 62150
},
{
"loss": 1.0537,
"grad_norm": 1.7284424304962158,
"learning_rate": 1.3309903577543482e-05,
"epoch": 0.93,
"step": 62175
},
{
"loss": 1.0917,
"grad_norm": 1.2951127290725708,
"learning_rate": 1.3234808206422156e-05,
"epoch": 0.93,
"step": 62200
},
{
"loss": 1.0818,
"grad_norm": 1.735390305519104,
"learning_rate": 1.3159712835300833e-05,
"epoch": 0.93,
"step": 62225
},
{
"loss": 1.1024,
"grad_norm": 0.9933769702911377,
"learning_rate": 1.3084617464179508e-05,
"epoch": 0.93,
"step": 62250
},
{
"loss": 1.1004,
"grad_norm": 1.7689695358276367,
"learning_rate": 1.3009522093058185e-05,
"epoch": 0.94,
"step": 62275
},
{
"loss": 1.1127,
"grad_norm": 1.0094436407089233,
"learning_rate": 1.2934426721936859e-05,
"epoch": 0.94,
"step": 62300
},
{
"loss": 1.086,
"grad_norm": 1.3532946109771729,
"learning_rate": 1.2859331350815538e-05,
"epoch": 0.94,
"step": 62325
},
{
"loss": 1.1568,
"grad_norm": 1.7976974248886108,
"learning_rate": 1.2784235979694212e-05,
"epoch": 0.94,
"step": 62350
},
{
"loss": 1.1215,
"grad_norm": 1.748487114906311,
"learning_rate": 1.2709140608572889e-05,
"epoch": 0.94,
"step": 62375
},
{
"loss": 1.0682,
"grad_norm": 1.3425058126449585,
"learning_rate": 1.2634045237451564e-05,
"epoch": 0.94,
"step": 62400
},
{
"loss": 1.0849,
"grad_norm": 2.302241802215576,
"learning_rate": 1.2558949866330242e-05,
"epoch": 0.94,
"step": 62425
},
{
"loss": 1.0932,
"grad_norm": 1.4172135591506958,
"learning_rate": 1.2483854495208915e-05,
"epoch": 0.94,
"step": 62450
},
{
"loss": 1.1309,
"grad_norm": 1.8036898374557495,
"learning_rate": 1.2408759124087593e-05,
"epoch": 0.94,
"step": 62475
},
{
"loss": 1.1123,
"grad_norm": 2.0429811477661133,
"learning_rate": 1.2333663752966268e-05,
"epoch": 0.94,
"step": 62500
},
{
"loss": 1.1829,
"grad_norm": 1.2276302576065063,
"learning_rate": 1.2258568381844944e-05,
"epoch": 0.94,
"step": 62525
},
{
"loss": 1.1149,
"grad_norm": 1.4691849946975708,
"learning_rate": 1.218347301072362e-05,
"epoch": 0.94,
"step": 62550
},
{
"loss": 1.1291,
"grad_norm": 1.780098557472229,
"learning_rate": 1.2108377639602296e-05,
"epoch": 0.94,
"step": 62575
},
{
"loss": 1.0855,
"grad_norm": 1.4932245016098022,
"learning_rate": 1.2033282268480972e-05,
"epoch": 0.94,
"step": 62600
},
{
"loss": 1.0832,
"grad_norm": 1.277098536491394,
"learning_rate": 1.1958186897359647e-05,
"epoch": 0.94,
"step": 62625
},
{
"loss": 1.2369,
"grad_norm": 1.7345349788665771,
"learning_rate": 1.1883091526238323e-05,
"epoch": 0.94,
"step": 62650
},
{
"loss": 1.1604,
"grad_norm": 1.9038455486297607,
"learning_rate": 1.1807996155116998e-05,
"epoch": 0.94,
"step": 62675
},
{
"loss": 1.1065,
"grad_norm": 1.0243260860443115,
"learning_rate": 1.1732900783995675e-05,
"epoch": 0.94,
"step": 62700
},
{
"loss": 1.1043,
"grad_norm": 0.9342716336250305,
"learning_rate": 1.165780541287435e-05,
"epoch": 0.94,
"step": 62725
},
{
"loss": 1.1245,
"grad_norm": 1.554945707321167,
"learning_rate": 1.1582710041753026e-05,
"epoch": 0.94,
"step": 62750
},
{
"loss": 1.074,
"grad_norm": 1.1340545415878296,
"learning_rate": 1.1507614670631703e-05,
"epoch": 0.94,
"step": 62775
},
{
"loss": 1.1226,
"grad_norm": 2.2141757011413574,
"learning_rate": 1.1432519299510379e-05,
"epoch": 0.94,
"step": 62800
},
{
"loss": 1.1022,
"grad_norm": 1.2455902099609375,
"learning_rate": 1.1357423928389054e-05,
"epoch": 0.94,
"step": 62825
},
{
"loss": 1.135,
"grad_norm": 1.0841847658157349,
"learning_rate": 1.1282328557267732e-05,
"epoch": 0.94,
"step": 62850
},
{
"loss": 1.1242,
"grad_norm": 2.3354759216308594,
"learning_rate": 1.1207233186146407e-05,
"epoch": 0.94,
"step": 62875
},
{
"loss": 1.1036,
"grad_norm": 1.0070022344589233,
"learning_rate": 1.1132137815025083e-05,
"epoch": 0.94,
"step": 62900
},
{
"loss": 1.0723,
"grad_norm": 1.8489924669265747,
"learning_rate": 1.105704244390376e-05,
"epoch": 0.95,
"step": 62925
},
{
"loss": 1.0952,
"grad_norm": 1.4337140321731567,
"learning_rate": 1.0981947072782433e-05,
"epoch": 0.95,
"step": 62950
},
{
"loss": 1.092,
"grad_norm": 1.2222257852554321,
"learning_rate": 1.0906851701661109e-05,
"epoch": 0.95,
"step": 62975
},
{
"loss": 1.1448,
"grad_norm": 1.270473837852478,
"learning_rate": 1.0831756330539786e-05,
"epoch": 0.95,
"step": 63000
},
{
"loss": 1.186,
"grad_norm": 2.165717363357544,
"learning_rate": 1.0756660959418462e-05,
"epoch": 0.95,
"step": 63025
},
{
"loss": 1.0859,
"grad_norm": 1.2544116973876953,
"learning_rate": 1.0681565588297137e-05,
"epoch": 0.95,
"step": 63050
},
{
"loss": 1.1105,
"grad_norm": 1.404388666152954,
"learning_rate": 1.0606470217175814e-05,
"epoch": 0.95,
"step": 63075
},
{
"loss": 1.1805,
"grad_norm": 1.3540233373641968,
"learning_rate": 1.0534378660899343e-05,
"epoch": 0.95,
"step": 63100
},
{
"loss": 1.1223,
"grad_norm": 1.7471164464950562,
"learning_rate": 1.0459283289778018e-05,
"epoch": 0.95,
"step": 63125
},
{
"loss": 1.0884,
"grad_norm": 1.466888189315796,
"learning_rate": 1.0384187918656695e-05,
"epoch": 0.95,
"step": 63150
},
{
"loss": 1.1307,
"grad_norm": 1.0170552730560303,
"learning_rate": 1.0309092547535371e-05,
"epoch": 0.95,
"step": 63175
},
{
"loss": 1.0969,
"grad_norm": 1.400824785232544,
"learning_rate": 1.0233997176414046e-05,
"epoch": 0.95,
"step": 63200
},
{
"loss": 1.1182,
"grad_norm": 1.231128454208374,
"learning_rate": 1.0158901805292722e-05,
"epoch": 0.95,
"step": 63225
},
{
"loss": 1.1886,
"grad_norm": 1.5293277502059937,
"learning_rate": 1.0083806434171399e-05,
"epoch": 0.95,
"step": 63250
},
{
"loss": 1.1112,
"grad_norm": 1.315816879272461,
"learning_rate": 1.0008711063050074e-05,
"epoch": 0.95,
"step": 63275
},
{
"loss": 1.1224,
"grad_norm": 1.0503865480422974,
"learning_rate": 9.93361569192875e-06,
"epoch": 0.95,
"step": 63300
},
{
"loss": 1.1516,
"grad_norm": 1.5667177438735962,
"learning_rate": 9.858520320807425e-06,
"epoch": 0.95,
"step": 63325
},
{
"loss": 1.2137,
"grad_norm": 1.9724977016448975,
"learning_rate": 9.783424949686101e-06,
"epoch": 0.95,
"step": 63350
},
{
"loss": 1.1568,
"grad_norm": 1.0087287425994873,
"learning_rate": 9.708329578564778e-06,
"epoch": 0.95,
"step": 63375
},
{
"loss": 1.0902,
"grad_norm": 1.067909836769104,
"learning_rate": 9.633234207443454e-06,
"epoch": 0.95,
"step": 63400
},
{
"loss": 1.1043,
"grad_norm": 2.0196101665496826,
"learning_rate": 9.558138836322129e-06,
"epoch": 0.95,
"step": 63425
},
{
"loss": 1.0683,
"grad_norm": 1.6897556781768799,
"learning_rate": 9.483043465200806e-06,
"epoch": 0.95,
"step": 63450
},
{
"loss": 1.1969,
"grad_norm": 1.4092940092086792,
"learning_rate": 9.407948094079482e-06,
"epoch": 0.95,
"step": 63475
},
{
"loss": 1.1159,
"grad_norm": 1.5447856187820435,
"learning_rate": 9.332852722958157e-06,
"epoch": 0.95,
"step": 63500
},
{
"loss": 1.119,
"grad_norm": 1.5372124910354614,
"learning_rate": 9.257757351836834e-06,
"epoch": 0.95,
"step": 63525
},
{
"loss": 1.1478,
"grad_norm": 1.2936185598373413,
"learning_rate": 9.18266198071551e-06,
"epoch": 0.95,
"step": 63550
},
{
"loss": 1.1206,
"grad_norm": 0.9974470138549805,
"learning_rate": 9.107566609594185e-06,
"epoch": 0.95,
"step": 63575
},
{
"loss": 1.1306,
"grad_norm": 1.8973299264907837,
"learning_rate": 9.03247123847286e-06,
"epoch": 0.96,
"step": 63600
},
{
"loss": 1.1003,
"grad_norm": 1.2269550561904907,
"learning_rate": 8.957375867351536e-06,
"epoch": 0.96,
"step": 63625
},
{
"loss": 1.089,
"grad_norm": 0.9575774073600769,
"learning_rate": 8.882280496230212e-06,
"epoch": 0.96,
"step": 63650
},
{
"loss": 1.1122,
"grad_norm": 1.47458016872406,
"learning_rate": 8.807185125108889e-06,
"epoch": 0.96,
"step": 63675
},
{
"loss": 1.0881,
"grad_norm": 1.407483696937561,
"learning_rate": 8.732089753987564e-06,
"epoch": 0.96,
"step": 63700
},
{
"loss": 1.1247,
"grad_norm": 1.4554179906845093,
"learning_rate": 8.65699438286624e-06,
"epoch": 0.96,
"step": 63725
},
{
"loss": 1.1963,
"grad_norm": 1.2854880094528198,
"learning_rate": 8.581899011744917e-06,
"epoch": 0.96,
"step": 63750
},
{
"loss": 1.1419,
"grad_norm": 1.089011311531067,
"learning_rate": 8.506803640623593e-06,
"epoch": 0.96,
"step": 63775
},
{
"loss": 1.0494,
"grad_norm": 1.1109488010406494,
"learning_rate": 8.431708269502268e-06,
"epoch": 0.96,
"step": 63800
},
{
"loss": 1.084,
"grad_norm": 1.5390805006027222,
"learning_rate": 8.356612898380945e-06,
"epoch": 0.96,
"step": 63825
},
{
"loss": 1.0779,
"grad_norm": 1.3624422550201416,
"learning_rate": 8.28151752725962e-06,
"epoch": 0.96,
"step": 63850
},
{
"loss": 1.092,
"grad_norm": 1.3689720630645752,
"learning_rate": 8.206422156138296e-06,
"epoch": 0.96,
"step": 63875
},
{
"loss": 1.1746,
"grad_norm": 1.2376459836959839,
"learning_rate": 8.131326785016973e-06,
"epoch": 0.96,
"step": 63900
},
{
"loss": 1.147,
"grad_norm": 1.5905089378356934,
"learning_rate": 8.056231413895649e-06,
"epoch": 0.96,
"step": 63925
},
{
"loss": 1.1585,
"grad_norm": 2.2680752277374268,
"learning_rate": 7.981136042774323e-06,
"epoch": 0.96,
"step": 63950
},
{
"loss": 1.1892,
"grad_norm": 1.5471032857894897,
"learning_rate": 7.906040671653e-06,
"epoch": 0.96,
"step": 63975
},
{
"loss": 1.1173,
"grad_norm": 1.456756591796875,
"learning_rate": 7.830945300531675e-06,
"epoch": 0.96,
"step": 64000
},
{
"loss": 1.0896,
"grad_norm": 1.550498604774475,
"learning_rate": 7.75584992941035e-06,
"epoch": 0.96,
"step": 64025
},
{
"loss": 1.0944,
"grad_norm": 1.8201286792755127,
"learning_rate": 7.680754558289028e-06,
"epoch": 0.96,
"step": 64050
},
{
"loss": 1.1145,
"grad_norm": 1.392923355102539,
"learning_rate": 7.605659187167703e-06,
"epoch": 0.96,
"step": 64075
},
{
"loss": 1.103,
"grad_norm": 2.5812623500823975,
"learning_rate": 7.53056381604638e-06,
"epoch": 0.96,
"step": 64100
},
{
"loss": 1.1365,
"grad_norm": 1.7856642007827759,
"learning_rate": 7.455468444925055e-06,
"epoch": 0.96,
"step": 64125
},
{
"loss": 1.0761,
"grad_norm": 1.8361400365829468,
"learning_rate": 7.3803730738037315e-06,
"epoch": 0.96,
"step": 64150
},
{
"loss": 1.0984,
"grad_norm": 1.183370590209961,
"learning_rate": 7.305277702682407e-06,
"epoch": 0.96,
"step": 64175
},
{
"loss": 1.1064,
"grad_norm": 1.8606791496276855,
"learning_rate": 7.230182331561083e-06,
"epoch": 0.96,
"step": 64200
},
{
"loss": 1.1489,
"grad_norm": 1.3013999462127686,
"learning_rate": 7.15508696043976e-06,
"epoch": 0.96,
"step": 64225
},
{
"loss": 1.0736,
"grad_norm": 1.1197832822799683,
"learning_rate": 7.079991589318435e-06,
"epoch": 0.96,
"step": 64250
},
{
"loss": 1.05,
"grad_norm": 1.160477876663208,
"learning_rate": 7.00489621819711e-06,
"epoch": 0.97,
"step": 64275
},
{
"loss": 1.1556,
"grad_norm": 1.7113288640975952,
"learning_rate": 6.929800847075786e-06,
"epoch": 0.97,
"step": 64300
},
{
"loss": 1.154,
"grad_norm": 0.7315987348556519,
"learning_rate": 6.854705475954462e-06,
"epoch": 0.97,
"step": 64325
},
{
"loss": 1.086,
"grad_norm": 1.7214363813400269,
"learning_rate": 6.779610104833138e-06,
"epoch": 0.97,
"step": 64350
},
{
"loss": 1.0921,
"grad_norm": 0.8723170161247253,
"learning_rate": 6.704514733711814e-06,
"epoch": 0.97,
"step": 64375
},
{
"loss": 1.1255,
"grad_norm": 1.9772207736968994,
"learning_rate": 6.6294193625904905e-06,
"epoch": 0.97,
"step": 64400
},
{
"loss": 1.1423,
"grad_norm": 2.272956371307373,
"learning_rate": 6.554323991469166e-06,
"epoch": 0.97,
"step": 64425
},
{
"loss": 1.1113,
"grad_norm": 1.6277108192443848,
"learning_rate": 6.479228620347842e-06,
"epoch": 0.97,
"step": 64450
},
{
"loss": 1.0637,
"grad_norm": 1.5888078212738037,
"learning_rate": 6.404133249226519e-06,
"epoch": 0.97,
"step": 64475
},
{
"loss": 1.1832,
"grad_norm": 1.4354815483093262,
"learning_rate": 6.329037878105194e-06,
"epoch": 0.97,
"step": 64500
},
{
"loss": 1.0812,
"grad_norm": 1.2866464853286743,
"learning_rate": 6.2539425069838705e-06,
"epoch": 0.97,
"step": 64525
},
{
"loss": 1.0742,
"grad_norm": 1.206624984741211,
"learning_rate": 6.178847135862546e-06,
"epoch": 0.97,
"step": 64550
},
{
"loss": 1.1,
"grad_norm": 1.9013807773590088,
"learning_rate": 6.1037517647412214e-06,
"epoch": 0.97,
"step": 64575
},
{
"loss": 1.1164,
"grad_norm": 1.2918732166290283,
"learning_rate": 6.028656393619898e-06,
"epoch": 0.97,
"step": 64600
},
{
"loss": 1.126,
"grad_norm": 0.9611725211143494,
"learning_rate": 5.953561022498574e-06,
"epoch": 0.97,
"step": 64625
},
{
"loss": 1.1867,
"grad_norm": 1.8491181135177612,
"learning_rate": 5.8784656513772496e-06,
"epoch": 0.97,
"step": 64650
},
{
"loss": 1.1415,
"grad_norm": 1.3857682943344116,
"learning_rate": 5.803370280255925e-06,
"epoch": 0.97,
"step": 64675
},
{
"loss": 1.1016,
"grad_norm": 1.7419966459274292,
"learning_rate": 5.728274909134601e-06,
"epoch": 0.97,
"step": 64700
},
{
"loss": 1.1174,
"grad_norm": 2.3053975105285645,
"learning_rate": 5.653179538013277e-06,
"epoch": 0.97,
"step": 64725
},
{
"loss": 1.125,
"grad_norm": 1.3925187587738037,
"learning_rate": 5.578084166891953e-06,
"epoch": 0.97,
"step": 64750
},
{
"loss": 1.0828,
"grad_norm": 2.014289140701294,
"learning_rate": 5.5029887957706295e-06,
"epoch": 0.97,
"step": 64775
},
{
"loss": 1.1461,
"grad_norm": 2.213609457015991,
"learning_rate": 5.427893424649305e-06,
"epoch": 0.97,
"step": 64800
},
{
"loss": 1.0558,
"grad_norm": 1.0734851360321045,
"learning_rate": 5.3527980535279805e-06,
"epoch": 0.97,
"step": 64825
},
{
"loss": 1.1006,
"grad_norm": 1.362658977508545,
"learning_rate": 5.277702682406657e-06,
"epoch": 0.97,
"step": 64850
},
{
"loss": 1.1512,
"grad_norm": 1.9621925354003906,
"learning_rate": 5.202607311285332e-06,
"epoch": 0.97,
"step": 64875
},
{
"loss": 1.0506,
"grad_norm": 1.6093008518218994,
"learning_rate": 5.127511940164009e-06,
"epoch": 0.97,
"step": 64900
},
{
"loss": 1.174,
"grad_norm": 2.4825665950775146,
"learning_rate": 5.052416569042685e-06,
"epoch": 0.98,
"step": 64925
},
{
"loss": 1.0542,
"grad_norm": 1.142391562461853,
"learning_rate": 4.97732119792136e-06,
"epoch": 0.98,
"step": 64950
},
{
"loss": 1.158,
"grad_norm": 2.0994620323181152,
"learning_rate": 4.902225826800036e-06,
"epoch": 0.98,
"step": 64975
},
{
"loss": 1.1055,
"grad_norm": 1.4533177614212036,
"learning_rate": 4.827130455678712e-06,
"epoch": 0.98,
"step": 65000
},
{
"loss": 1.1457,
"grad_norm": 2.113051176071167,
"learning_rate": 4.752035084557388e-06,
"epoch": 0.98,
"step": 65025
},
{
"loss": 1.1301,
"grad_norm": 1.4814103841781616,
"learning_rate": 4.676939713436064e-06,
"epoch": 0.98,
"step": 65050
},
{
"loss": 1.1091,
"grad_norm": 1.3998606204986572,
"learning_rate": 4.60184434231474e-06,
"epoch": 0.98,
"step": 65075
},
{
"loss": 1.1468,
"grad_norm": 1.4728342294692993,
"learning_rate": 4.526748971193416e-06,
"epoch": 0.98,
"step": 65100
},
{
"loss": 1.1008,
"grad_norm": 1.29282808303833,
"learning_rate": 4.451653600072092e-06,
"epoch": 0.98,
"step": 65125
},
{
"loss": 1.0818,
"grad_norm": 0.9691277146339417,
"learning_rate": 4.376558228950768e-06,
"epoch": 0.98,
"step": 65150
},
{
"loss": 1.1651,
"grad_norm": 1.5705621242523193,
"learning_rate": 4.301462857829443e-06,
"epoch": 0.98,
"step": 65175
},
{
"loss": 1.0648,
"grad_norm": 1.7766458988189697,
"learning_rate": 4.2263674867081194e-06,
"epoch": 0.98,
"step": 65200
},
{
"loss": 1.0789,
"grad_norm": 1.3525621891021729,
"learning_rate": 4.151272115586796e-06,
"epoch": 0.98,
"step": 65225
},
{
"loss": 1.0551,
"grad_norm": 1.631650447845459,
"learning_rate": 4.076176744465471e-06,
"epoch": 0.98,
"step": 65250
},
{
"loss": 1.1308,
"grad_norm": 1.7099614143371582,
"learning_rate": 4.0010813733441476e-06,
"epoch": 0.98,
"step": 65275
},
{
"loss": 1.1203,
"grad_norm": 1.104038119316101,
"learning_rate": 3.925986002222823e-06,
"epoch": 0.98,
"step": 65300
},
{
"loss": 1.0968,
"grad_norm": 1.4031529426574707,
"learning_rate": 3.8508906311014985e-06,
"epoch": 0.98,
"step": 65325
},
{
"loss": 1.15,
"grad_norm": 2.0685653686523438,
"learning_rate": 3.775795259980175e-06,
"epoch": 0.98,
"step": 65350
},
{
"loss": 1.0778,
"grad_norm": 1.4602687358856201,
"learning_rate": 3.7006998888588508e-06,
"epoch": 0.98,
"step": 65375
},
{
"loss": 1.1119,
"grad_norm": 1.377066969871521,
"learning_rate": 3.625604517737527e-06,
"epoch": 0.98,
"step": 65400
},
{
"loss": 1.1806,
"grad_norm": 1.3793482780456543,
"learning_rate": 3.550509146616203e-06,
"epoch": 0.98,
"step": 65425
},
{
"loss": 1.0924,
"grad_norm": 1.323262095451355,
"learning_rate": 3.4754137754948785e-06,
"epoch": 0.98,
"step": 65450
},
{
"loss": 1.1011,
"grad_norm": 1.6005733013153076,
"learning_rate": 3.4003184043735544e-06,
"epoch": 0.98,
"step": 65475
},
{
"loss": 1.1188,
"grad_norm": 1.2906062602996826,
"learning_rate": 3.3252230332522303e-06,
"epoch": 0.98,
"step": 65500
},
{
"loss": 1.0887,
"grad_norm": 2.869511365890503,
"learning_rate": 3.250127662130906e-06,
"epoch": 0.98,
"step": 65525
},
{
"loss": 1.1348,
"grad_norm": 1.084037184715271,
"learning_rate": 3.1750322910095825e-06,
"epoch": 0.98,
"step": 65550
},
{
"loss": 1.1218,
"grad_norm": 1.7096983194351196,
"learning_rate": 3.099936919888258e-06,
"epoch": 0.98,
"step": 65575
},
{
"loss": 1.134,
"grad_norm": 2.19433856010437,
"learning_rate": 3.0278453636117873e-06,
"epoch": 0.99,
"step": 65600
},
{
"loss": 1.1709,
"grad_norm": 2.7771689891815186,
"learning_rate": 2.9527499924904628e-06,
"epoch": 0.99,
"step": 65625
},
{
"loss": 1.1184,
"grad_norm": 1.367202877998352,
"learning_rate": 2.877654621369139e-06,
"epoch": 0.99,
"step": 65650
},
{
"loss": 1.1393,
"grad_norm": 1.163167953491211,
"learning_rate": 2.802559250247815e-06,
"epoch": 0.99,
"step": 65675
},
{
"loss": 1.1185,
"grad_norm": 1.9196585416793823,
"learning_rate": 2.727463879126491e-06,
"epoch": 0.99,
"step": 65700
},
{
"loss": 1.0776,
"grad_norm": 1.1097601652145386,
"learning_rate": 2.652368508005167e-06,
"epoch": 0.99,
"step": 65725
},
{
"loss": 1.1115,
"grad_norm": 1.8407388925552368,
"learning_rate": 2.5772731368838427e-06,
"epoch": 0.99,
"step": 65750
},
{
"loss": 1.1397,
"grad_norm": 1.3508464097976685,
"learning_rate": 2.5021777657625186e-06,
"epoch": 0.99,
"step": 65775
},
{
"loss": 1.0408,
"grad_norm": 1.3656666278839111,
"learning_rate": 2.4270823946411945e-06,
"epoch": 0.99,
"step": 65800
},
{
"loss": 1.1232,
"grad_norm": 1.121551275253296,
"learning_rate": 2.3519870235198704e-06,
"epoch": 0.99,
"step": 65825
},
{
"loss": 1.1695,
"grad_norm": 3.1583876609802246,
"learning_rate": 2.2768916523985463e-06,
"epoch": 0.99,
"step": 65850
},
{
"loss": 1.114,
"grad_norm": 1.4626102447509766,
"learning_rate": 2.2017962812772222e-06,
"epoch": 0.99,
"step": 65875
},
{
"loss": 1.1404,
"grad_norm": 1.164562702178955,
"learning_rate": 2.126700910155898e-06,
"epoch": 0.99,
"step": 65900
},
{
"loss": 1.0749,
"grad_norm": 1.151390790939331,
"learning_rate": 2.051605539034574e-06,
"epoch": 0.99,
"step": 65925
},
{
"loss": 1.1223,
"grad_norm": 1.4878361225128174,
"learning_rate": 1.97651016791325e-06,
"epoch": 0.99,
"step": 65950
},
{
"loss": 1.0713,
"grad_norm": 0.9274216294288635,
"learning_rate": 1.9014147967919258e-06,
"epoch": 0.99,
"step": 65975
},
{
"loss": 1.0495,
"grad_norm": 1.1772902011871338,
"learning_rate": 1.8263194256706017e-06,
"epoch": 0.99,
"step": 66000
},
{
"loss": 1.1357,
"grad_norm": 1.2464003562927246,
"learning_rate": 1.7512240545492774e-06,
"epoch": 0.99,
"step": 66025
},
{
"loss": 1.0778,
"grad_norm": 1.813460350036621,
"learning_rate": 1.6761286834279536e-06,
"epoch": 0.99,
"step": 66050
},
{
"loss": 1.1034,
"grad_norm": 1.6727650165557861,
"learning_rate": 1.6010333123066297e-06,
"epoch": 0.99,
"step": 66075
},
{
"loss": 1.1252,
"grad_norm": 1.8909765481948853,
"learning_rate": 1.5259379411853054e-06,
"epoch": 0.99,
"step": 66100
},
{
"loss": 1.0249,
"grad_norm": 1.8321037292480469,
"learning_rate": 1.4508425700639813e-06,
"epoch": 0.99,
"step": 66125
},
{
"loss": 1.0836,
"grad_norm": 1.3860995769500732,
"learning_rate": 1.3757471989426574e-06,
"epoch": 0.99,
"step": 66150
},
{
"loss": 1.0984,
"grad_norm": 1.2683864831924438,
"learning_rate": 1.300651827821333e-06,
"epoch": 0.99,
"step": 66175
},
{
"loss": 1.0977,
"grad_norm": 2.86045503616333,
"learning_rate": 1.225556456700009e-06,
"epoch": 0.99,
"step": 66200
},
{
"loss": 1.138,
"grad_norm": 1.2112616300582886,
"learning_rate": 1.150461085578685e-06,
"epoch": 0.99,
"step": 66225
},
{
"loss": 1.1231,
"grad_norm": 1.550032615661621,
"learning_rate": 1.0753657144573608e-06,
"epoch": 0.99,
"step": 66250
},
{
"loss": 1.1238,
"grad_norm": 1.13444185256958,
"learning_rate": 1.0002703433360369e-06,
"epoch": 1.0,
"step": 66275
},
{
"loss": 1.1818,
"grad_norm": 2.8684732913970947,
"learning_rate": 9.251749722147127e-07,
"epoch": 1.0,
"step": 66300
},
{
"loss": 1.1398,
"grad_norm": 1.3792351484298706,
"learning_rate": 8.500796010933886e-07,
"epoch": 1.0,
"step": 66325
},
{
"loss": 1.1148,
"grad_norm": 1.5899792909622192,
"learning_rate": 7.749842299720645e-07,
"epoch": 1.0,
"step": 66350
},
{
"loss": 1.1708,
"grad_norm": 2.143692970275879,
"learning_rate": 6.998888588507405e-07,
"epoch": 1.0,
"step": 66375
},
{
"loss": 1.1092,
"grad_norm": 1.2674062252044678,
"learning_rate": 6.247934877294164e-07,
"epoch": 1.0,
"step": 66400
},
{
"loss": 1.0326,
"grad_norm": 1.1335889101028442,
"learning_rate": 5.496981166080923e-07,
"epoch": 1.0,
"step": 66425
},
{
"loss": 1.0948,
"grad_norm": 1.5896003246307373,
"learning_rate": 4.7460274548676816e-07,
"epoch": 1.0,
"step": 66450
},
{
"loss": 1.1036,
"grad_norm": 1.4150667190551758,
"learning_rate": 3.995073743654441e-07,
"epoch": 1.0,
"step": 66475
},
{
"loss": 1.14,
"grad_norm": 1.4912337064743042,
"learning_rate": 3.244120032441201e-07,
"epoch": 1.0,
"step": 66500
},
{
"loss": 1.0704,
"grad_norm": 1.5823650360107422,
"learning_rate": 2.493166321227959e-07,
"epoch": 1.0,
"step": 66525
},
{
"loss": 1.1301,
"grad_norm": 1.9806722402572632,
"learning_rate": 1.7422126100147188e-07,
"epoch": 1.0,
"step": 66550
},
{
"loss": 1.1368,
"grad_norm": 1.6522107124328613,
"learning_rate": 9.91258898801478e-08,
"epoch": 1.0,
"step": 66575
},
{
"train_runtime": 164326.412,
"train_samples_per_second": 0.81,
"train_steps_per_second": 0.405,
"total_flos": 7.363589651988972e+17,
"train_loss": 1.1616554066605727,
"epoch": 1.0,
"step": 66583
}
]