{"step": 50, "loss": 4.981022357940674, "lr": 5e-06, "grad_norm": 2.252105712890625, "wall_ms": 2927} {"step": 100, "loss": 5.7412309646606445, "lr": 1e-05, "grad_norm": 2.3073441982269287, "wall_ms": 4664} {"step": 150, "loss": 4.428994655609131, "lr": 1.5e-05, "grad_norm": 2.063769578933716, "wall_ms": 6400} {"step": 200, "loss": 5.589509010314941, "lr": 2e-05, "grad_norm": 2.293863296508789, "wall_ms": 8163} {"step": 250, "loss": 4.816532135009766, "lr": 2.5e-05, "grad_norm": 2.083081007003784, "wall_ms": 9894} {"step": 300, "loss": 4.9852728843688965, "lr": 3e-05, "grad_norm": 2.1867618560791016, "wall_ms": 11631} {"step": 350, "loss": 4.986634254455566, "lr": 3.5e-05, "grad_norm": 2.1337010860443115, "wall_ms": 13398} {"step": 400, "loss": 4.422274589538574, "lr": 4e-05, "grad_norm": 2.0704100131988525, "wall_ms": 15112} {"step": 450, "loss": 5.142458438873291, "lr": 4.5e-05, "grad_norm": 2.1837306022644043, "wall_ms": 16860} {"step": 500, "loss": 4.492672920227051, "lr": 5e-05, "grad_norm": 1.9964773654937744, "wall_ms": 18582} {"step": 550, "loss": 4.756455421447754, "lr": 5.500000000000001e-05, "grad_norm": 2.092428684234619, "wall_ms": 20304} {"step": 600, "loss": 4.151700019836426, "lr": 6e-05, "grad_norm": 1.966403841972351, "wall_ms": 22019} {"step": 650, "loss": 3.984065055847168, "lr": 6.500000000000001e-05, "grad_norm": 1.9158504009246826, "wall_ms": 23733} {"step": 700, "loss": 4.320199012756348, "lr": 7e-05, "grad_norm": 1.9205447435379028, "wall_ms": 25426} {"step": 750, "loss": 3.763932704925537, "lr": 7.500000000000001e-05, "grad_norm": 1.6902859210968018, "wall_ms": 27158} {"step": 800, "loss": 3.6340720653533936, "lr": 8e-05, "grad_norm": 1.6199060678482056, "wall_ms": 28889} {"step": 850, "loss": 3.7553584575653076, "lr": 8.5e-05, "grad_norm": 1.5846203565597534, "wall_ms": 30626} {"step": 900, "loss": 3.2709970474243164, "lr": 9e-05, "grad_norm": 1.3213638067245483, "wall_ms": 32373} {"step": 950, "loss": 3.0713982582092285, "lr": 9.5e-05, "grad_norm": 1.2353734970092773, "wall_ms": 34093} {"step": 1000, "loss": 3.0612316131591797, "lr": 0.0001, "grad_norm": 1.132875680923462, "wall_ms": 35797} {"step": 1050, "loss": 2.860645294189453, "lr": 9.999927386411504e-05, "grad_norm": 1.0716153383255005, "wall_ms": 48537} {"step": 1100, "loss": 2.7380363941192627, "lr": 9.999709547776411e-05, "grad_norm": 0.9688041806221008, "wall_ms": 50271} {"step": 1150, "loss": 3.2391562461853027, "lr": 9.99934649048585e-05, "grad_norm": 1.0119129419326782, "wall_ms": 52009} {"step": 1200, "loss": 2.917262077331543, "lr": 9.998838225191497e-05, "grad_norm": 0.9922711849212646, "wall_ms": 53739} {"step": 1250, "loss": 2.725057601928711, "lr": 9.998184766805256e-05, "grad_norm": 0.9564017653465271, "wall_ms": 55431} {"step": 1300, "loss": 3.0665206909179688, "lr": 9.997386134498827e-05, "grad_norm": 0.9862646460533142, "wall_ms": 57165} {"step": 1350, "loss": 2.772087574005127, "lr": 9.996442351703146e-05, "grad_norm": 0.9301272630691528, "wall_ms": 58904} {"step": 1400, "loss": 2.595355987548828, "lr": 9.995353446107685e-05, "grad_norm": 0.897271990776062, "wall_ms": 60641} {"step": 1450, "loss": 2.5398402214050293, "lr": 9.994119449659657e-05, "grad_norm": 0.8755579590797424, "wall_ms": 62366} {"step": 1500, "loss": 2.6841683387756348, "lr": 9.992740398563063e-05, "grad_norm": 0.8927384614944458, "wall_ms": 64073} {"step": 1550, "loss": 2.599381923675537, "lr": 9.991216333277644e-05, "grad_norm": 0.8667588829994202, "wall_ms": 65776} {"step": 1600, "loss": 2.3356454372406006, "lr": 9.989547298517679e-05, "grad_norm": 0.8550799489021301, "wall_ms": 67513} {"step": 1650, "loss": 2.718405246734619, "lr": 9.987733343250684e-05, "grad_norm": 0.8694663643836975, "wall_ms": 69241} {"step": 1700, "loss": 2.464646577835083, "lr": 9.985774520695977e-05, "grad_norm": 0.8989205956459045, "wall_ms": 70961} {"step": 1750, "loss": 2.5270137786865234, "lr": 9.983670888323103e-05, "grad_norm": 0.9128116965293884, "wall_ms": 72695} {"step": 1800, "loss": 2.5864880084991455, "lr": 9.981422507850163e-05, "grad_norm": 0.890068769454956, "wall_ms": 74418} {"step": 1850, "loss": 2.4129786491394043, "lr": 9.979029445241996e-05, "grad_norm": 0.8390555381774902, "wall_ms": 76124} {"step": 1900, "loss": 2.221975803375244, "lr": 9.976491770708243e-05, "grad_norm": 0.8150762915611267, "wall_ms": 77853} {"step": 1950, "loss": 2.3454837799072266, "lr": 9.973809558701289e-05, "grad_norm": 0.8517900705337524, "wall_ms": 79579} {"step": 2000, "loss": 2.434635877609253, "lr": 9.970982887914081e-05, "grad_norm": 0.8475390672683716, "wall_ms": 81316} {"step": 2050, "loss": 2.4815762042999268, "lr": 9.968011841277813e-05, "grad_norm": 0.8578412532806396, "wall_ms": 93207} {"step": 2100, "loss": 2.6064674854278564, "lr": 9.964896505959497e-05, "grad_norm": 0.8575811386108398, "wall_ms": 94929} {"step": 2150, "loss": 2.426909923553467, "lr": 9.961636973359409e-05, "grad_norm": 0.8410458564758301, "wall_ms": 96650} {"step": 2200, "loss": 2.3857474327087402, "lr": 9.9582333391084e-05, "grad_norm": 0.8360826969146729, "wall_ms": 98420} {"step": 2250, "loss": 2.2796945571899414, "lr": 9.954685703065094e-05, "grad_norm": 0.8515854477882385, "wall_ms": 100139} {"step": 2300, "loss": 2.2954154014587402, "lr": 9.950994169312961e-05, "grad_norm": 0.8667277097702026, "wall_ms": 101841} {"step": 2350, "loss": 2.25789213180542, "lr": 9.947158846157259e-05, "grad_norm": 0.8382138609886169, "wall_ms": 103557} {"step": 2400, "loss": 2.3312792778015137, "lr": 9.943179846121856e-05, "grad_norm": 0.8263556361198425, "wall_ms": 105283} {"step": 2450, "loss": 2.275775194168091, "lr": 9.939057285945933e-05, "grad_norm": 0.8361521363258362, "wall_ms": 106980} {"step": 2500, "loss": 2.1858339309692383, "lr": 9.934791286580555e-05, "grad_norm": 0.821962833404541, "wall_ms": 108682} {"step": 2550, "loss": 2.362473964691162, "lr": 9.930381973185127e-05, "grad_norm": 0.859581708908081, "wall_ms": 110395} {"step": 2600, "loss": 2.3368611335754395, "lr": 9.925829475123716e-05, "grad_norm": 0.8430662155151367, "wall_ms": 112129} {"step": 2650, "loss": 2.3162574768066406, "lr": 9.921133925961257e-05, "grad_norm": 0.8618436455726624, "wall_ms": 113839} {"step": 2700, "loss": 2.095130443572998, "lr": 9.916295463459645e-05, "grad_norm": 0.8041071891784668, "wall_ms": 115579} {"step": 2750, "loss": 2.0400781631469727, "lr": 9.911314229573673e-05, "grad_norm": 0.8217558860778809, "wall_ms": 117304} {"step": 2800, "loss": 2.3432958126068115, "lr": 9.906190370446883e-05, "grad_norm": 0.8449392318725586, "wall_ms": 119003} {"step": 2850, "loss": 2.232372283935547, "lr": 9.900924036407279e-05, "grad_norm": 0.8412013649940491, "wall_ms": 120735} {"step": 2900, "loss": 2.308964490890503, "lr": 9.895515381962903e-05, "grad_norm": 0.825849711894989, "wall_ms": 122489} {"step": 2950, "loss": 2.2970800399780273, "lr": 9.889964565797313e-05, "grad_norm": 0.833280086517334, "wall_ms": 124174} {"step": 3000, "loss": 2.1213788986206055, "lr": 9.88427175076493e-05, "grad_norm": 0.8449626564979553, "wall_ms": 125878} {"step": 3050, "loss": 2.0318360328674316, "lr": 9.878437103886246e-05, "grad_norm": 0.8188025951385498, "wall_ms": 137701} {"step": 3100, "loss": 2.0891270637512207, "lr": 9.872460796342937e-05, "grad_norm": 0.8086628913879395, "wall_ms": 139419} {"step": 3150, "loss": 2.181525230407715, "lr": 9.866343003472837e-05, "grad_norm": 0.8429564237594604, "wall_ms": 141145} {"step": 3200, "loss": 2.228914260864258, "lr": 9.860083904764793e-05, "grad_norm": 0.8539510369300842, "wall_ms": 142858} {"step": 3250, "loss": 2.1307148933410645, "lr": 9.853683683853397e-05, "grad_norm": 0.8099820017814636, "wall_ms": 144562} {"step": 3300, "loss": 2.2703657150268555, "lr": 9.847142528513602e-05, "grad_norm": 0.8939051032066345, "wall_ms": 146325} {"step": 3350, "loss": 1.980086088180542, "lr": 9.84046063065521e-05, "grad_norm": 0.8093340992927551, "wall_ms": 148074} {"step": 3400, "loss": 1.9251084327697754, "lr": 9.833638186317245e-05, "grad_norm": 0.8091058135032654, "wall_ms": 149768} {"step": 3450, "loss": 2.128087282180786, "lr": 9.826675395662196e-05, "grad_norm": 0.8179877996444702, "wall_ms": 151510} {"step": 3500, "loss": 2.1001152992248535, "lr": 9.819572462970153e-05, "grad_norm": 0.8214244246482849, "wall_ms": 153243} {"step": 3550, "loss": 2.1890668869018555, "lr": 9.812329596632806e-05, "grad_norm": 0.8434283137321472, "wall_ms": 154965} {"step": 3600, "loss": 2.001227855682373, "lr": 9.804947009147329e-05, "grad_norm": 0.831041157245636, "wall_ms": 156711} {"step": 3650, "loss": 2.110828399658203, "lr": 9.797424917110158e-05, "grad_norm": 0.844097375869751, "wall_ms": 158470} {"step": 3700, "loss": 1.8172613382339478, "lr": 9.78976354121062e-05, "grad_norm": 0.7621675729751587, "wall_ms": 160172} {"step": 3750, "loss": 1.8767883777618408, "lr": 9.781963106224478e-05, "grad_norm": 0.8152614235877991, "wall_ms": 161902} {"step": 3800, "loss": 1.9717506170272827, "lr": 9.774023841007313e-05, "grad_norm": 0.8197637796401978, "wall_ms": 163663} {"step": 3850, "loss": 2.258715867996216, "lr": 9.765945978487831e-05, "grad_norm": 0.865646243095398, "wall_ms": 165377} {"step": 3900, "loss": 1.99534273147583, "lr": 9.757729755661012e-05, "grad_norm": 0.8122466206550598, "wall_ms": 167116} {"step": 3950, "loss": 1.8075107336044312, "lr": 9.749375413581168e-05, "grad_norm": 0.7979199290275574, "wall_ms": 168840} {"step": 4000, "loss": 2.067093849182129, "lr": 9.740883197354873e-05, "grad_norm": 0.8324880599975586, "wall_ms": 170544} {"step": 4050, "loss": 1.9778571128845215, "lr": 9.732253356133757e-05, "grad_norm": 0.8299995064735413, "wall_ms": 182074} {"step": 4100, "loss": 2.0271129608154297, "lr": 9.72348614310721e-05, "grad_norm": 0.8216760754585266, "wall_ms": 183793} {"step": 4150, "loss": 2.2492616176605225, "lr": 9.71458181549495e-05, "grad_norm": 0.8732245564460754, "wall_ms": 185522} {"step": 4200, "loss": 1.8464481830596924, "lr": 9.705540634539474e-05, "grad_norm": 0.8248429894447327, "wall_ms": 187260} {"step": 4250, "loss": 2.018364191055298, "lr": 9.696362865498395e-05, "grad_norm": 0.8102579116821289, "wall_ms": 188964} {"step": 4300, "loss": 2.022132635116577, "lr": 9.68704877763666e-05, "grad_norm": 0.8345473408699036, "wall_ms": 190714} {"step": 4350, "loss": 2.084688186645508, "lr": 9.677598644218653e-05, "grad_norm": 0.8384360074996948, "wall_ms": 192452} {"step": 4400, "loss": 2.0086190700531006, "lr": 9.668012742500166e-05, "grad_norm": 0.8369913697242737, "wall_ms": 194211} {"step": 4450, "loss": 1.971854329109192, "lr": 9.658291353720285e-05, "grad_norm": 0.8395159244537354, "wall_ms": 195953} {"step": 4500, "loss": 1.6052772998809814, "lr": 9.648434763093118e-05, "grad_norm": 0.752248227596283, "wall_ms": 197652} {"step": 4550, "loss": 1.8128163814544678, "lr": 9.638443259799439e-05, "grad_norm": 0.846900224685669, "wall_ms": 199359} {"step": 4600, "loss": 1.8526911735534668, "lr": 9.628317136978201e-05, "grad_norm": 0.801855742931366, "wall_ms": 201087} {"step": 4650, "loss": 1.745560884475708, "lr": 9.618056691717934e-05, "grad_norm": 0.7948002815246582, "wall_ms": 202806} {"step": 4700, "loss": 1.6741844415664673, "lr": 9.607662225048037e-05, "grad_norm": 0.7691972255706787, "wall_ms": 204536} {"step": 4750, "loss": 1.7569997310638428, "lr": 9.597134041929928e-05, "grad_norm": 0.8564541339874268, "wall_ms": 206237} {"step": 4800, "loss": 1.9713107347488403, "lr": 9.58647245124812e-05, "grad_norm": 0.8349446654319763, "wall_ms": 207958} {"step": 4850, "loss": 1.965043544769287, "lr": 9.575677765801137e-05, "grad_norm": 0.8774456977844238, "wall_ms": 209685} {"step": 4900, "loss": 1.920728325843811, "lr": 9.564750302292357e-05, "grad_norm": 0.841619074344635, "wall_ms": 211420} {"step": 4950, "loss": 1.9048669338226318, "lr": 9.553690381320699e-05, "grad_norm": 0.8352078795433044, "wall_ms": 213103} {"step": 5000, "loss": 1.702864408493042, "lr": 9.542498327371238e-05, "grad_norm": 0.8108139634132385, "wall_ms": 214816} {"step": 5050, "loss": 1.5579715967178345, "lr": 9.531174468805669e-05, "grad_norm": 0.7652623057365417, "wall_ms": 226849} {"step": 5100, "loss": 1.7233974933624268, "lr": 9.519719137852681e-05, "grad_norm": 0.8120588660240173, "wall_ms": 228557} {"step": 5150, "loss": 1.850736141204834, "lr": 9.508132670598211e-05, "grad_norm": 0.8244868516921997, "wall_ms": 230274} {"step": 5200, "loss": 1.9520031213760376, "lr": 9.496415406975575e-05, "grad_norm": 0.8594580292701721, "wall_ms": 231986} {"step": 5250, "loss": 1.671244740486145, "lr": 9.484567690755506e-05, "grad_norm": 0.7666727900505066, "wall_ms": 233692} {"step": 5300, "loss": 1.850197434425354, "lr": 9.47258986953606e-05, "grad_norm": 0.8453735709190369, "wall_ms": 235424} {"step": 5350, "loss": 1.8554563522338867, "lr": 9.460482294732421e-05, "grad_norm": 0.8660063147544861, "wall_ms": 237152} {"step": 5400, "loss": 1.881805658340454, "lr": 9.448245321566592e-05, "grad_norm": 0.853525698184967, "wall_ms": 238874} {"step": 5450, "loss": 1.68351149559021, "lr": 9.435879309056969e-05, "grad_norm": 0.7786166667938232, "wall_ms": 240621} {"step": 5500, "loss": 1.819859266281128, "lr": 9.423384620007814e-05, "grad_norm": 0.8194922804832458, "wall_ms": 242302} {"step": 5550, "loss": 1.5244700908660889, "lr": 9.410761620998604e-05, "grad_norm": 0.7755674123764038, "wall_ms": 244028} {"step": 5600, "loss": 1.6860429048538208, "lr": 9.39801068237328e-05, "grad_norm": 0.8180768489837646, "wall_ms": 245746} {"step": 5650, "loss": 1.6363520622253418, "lr": 9.385132178229384e-05, "grad_norm": 0.8221612572669983, "wall_ms": 247455} {"step": 5700, "loss": 1.803638219833374, "lr": 9.372126486407073e-05, "grad_norm": 0.8446292281150818, "wall_ms": 249197} {"step": 5750, "loss": 1.5938254594802856, "lr": 9.358993988478052e-05, "grad_norm": 0.8051971197128296, "wall_ms": 250898} {"step": 5800, "loss": 1.6832702159881592, "lr": 9.345735069734357e-05, "grad_norm": 0.8405115008354187, "wall_ms": 252633} {"step": 5850, "loss": 1.9176671504974365, "lr": 9.332350119177069e-05, "grad_norm": 0.8510113954544067, "wall_ms": 254357} {"step": 5900, "loss": 1.5823216438293457, "lr": 9.318839529504891e-05, "grad_norm": 0.7735613584518433, "wall_ms": 256101} {"step": 5950, "loss": 1.7125014066696167, "lr": 9.305203697102627e-05, "grad_norm": 0.8223954439163208, "wall_ms": 257770} {"step": 6000, "loss": 1.7309885025024414, "lr": 9.291443022029567e-05, "grad_norm": 0.8021047711372375, "wall_ms": 259492} {"step": 6050, "loss": 1.6076606512069702, "lr": 9.277557908007723e-05, "grad_norm": 0.7435202598571777, "wall_ms": 276413} {"step": 6100, "loss": 1.53173828125, "lr": 9.26354876241001e-05, "grad_norm": 0.8360872268676758, "wall_ms": 278151} {"step": 6150, "loss": 1.7547814846038818, "lr": 9.249415996248278e-05, "grad_norm": 0.8933855295181274, "wall_ms": 279858} {"step": 6200, "loss": 1.690009593963623, "lr": 9.235160024161264e-05, "grad_norm": 0.860890805721283, "wall_ms": 281580} {"step": 6250, "loss": 1.8567678928375244, "lr": 9.220781264402413e-05, "grad_norm": 0.8851020336151123, "wall_ms": 283335} {"step": 6300, "loss": 1.5965412855148315, "lr": 9.206280138827628e-05, "grad_norm": 0.8255687952041626, "wall_ms": 285064} {"step": 6350, "loss": 1.7943873405456543, "lr": 9.19165707288287e-05, "grad_norm": 0.8414090275764465, "wall_ms": 286770} {"step": 6400, "loss": 1.8521709442138672, "lr": 9.176912495591695e-05, "grad_norm": 0.9002867937088013, "wall_ms": 288510} {"step": 6450, "loss": 1.7586954832077026, "lr": 9.162046839542654e-05, "grad_norm": 0.8267038464546204, "wall_ms": 290257} {"step": 6500, "loss": 1.5703048706054688, "lr": 9.147060540876609e-05, "grad_norm": 0.8453563451766968, "wall_ms": 291963} {"step": 6550, "loss": 1.4527912139892578, "lr": 9.131954039273932e-05, "grad_norm": 0.7429128885269165, "wall_ms": 293663} {"step": 6600, "loss": 1.6045023202896118, "lr": 9.116727777941612e-05, "grad_norm": 0.8460984826087952, "wall_ms": 295406} {"step": 6650, "loss": 1.6694273948669434, "lr": 9.101382203600241e-05, "grad_norm": 0.8415221571922302, "wall_ms": 297150} {"step": 6700, "loss": 1.512801170349121, "lr": 9.085917766470922e-05, "grad_norm": 0.8496189713478088, "wall_ms": 298855} {"step": 6750, "loss": 1.6897666454315186, "lr": 9.070334920262046e-05, "grad_norm": 0.8762431740760803, "wall_ms": 300605} {"step": 6800, "loss": 1.4998540878295898, "lr": 9.05463412215599e-05, "grad_norm": 0.7869887351989746, "wall_ms": 302352} {"step": 6850, "loss": 1.6579943895339966, "lr": 9.038815832795702e-05, "grad_norm": 0.8011910319328308, "wall_ms": 304045} {"step": 6900, "loss": 1.5785235166549683, "lr": 9.022880516271183e-05, "grad_norm": 0.8164384365081787, "wall_ms": 305797} {"step": 6950, "loss": 1.6391525268554688, "lr": 9.006828640105872e-05, "grad_norm": 0.839449405670166, "wall_ms": 307532} {"step": 7000, "loss": 1.5363757610321045, "lr": 8.990660675242936e-05, "grad_norm": 0.8037105202674866, "wall_ms": 309250} {"step": 7050, "loss": 1.5864999294281006, "lr": 8.974377096031444e-05, "grad_norm": 0.8749929070472717, "wall_ms": 320735} {"step": 7100, "loss": 1.369107961654663, "lr": 8.957978380212454e-05, "grad_norm": 0.7782756090164185, "wall_ms": 322437} {"step": 7150, "loss": 1.6449577808380127, "lr": 8.941465008904998e-05, "grad_norm": 0.8410767912864685, "wall_ms": 324182} {"step": 7200, "loss": 1.5831081867218018, "lr": 8.924837466591961e-05, "grad_norm": 0.8471114039421082, "wall_ms": 325885} {"step": 7250, "loss": 1.6519050598144531, "lr": 8.908096241105879e-05, "grad_norm": 0.8668118715286255, "wall_ms": 327578} {"step": 7300, "loss": 1.604102373123169, "lr": 8.89124182361461e-05, "grad_norm": 0.8457621335983276, "wall_ms": 329273} {"step": 7350, "loss": 1.4868223667144775, "lr": 8.874274708606943e-05, "grad_norm": 0.8246743083000183, "wall_ms": 330970} {"step": 7400, "loss": 1.6033059358596802, "lr": 8.857195393878063e-05, "grad_norm": 0.8149073719978333, "wall_ms": 332679} {"step": 7450, "loss": 1.4913185834884644, "lr": 8.840004380514981e-05, "grad_norm": 0.8061311841011047, "wall_ms": 334339} {"step": 7500, "loss": 1.571063756942749, "lr": 8.8227021728818e-05, "grad_norm": 0.8377233743667603, "wall_ms": 336027} {"step": 7550, "loss": 1.6114795207977295, "lr": 8.805289278604944e-05, "grad_norm": 0.86509770154953, "wall_ms": 337762} {"step": 7600, "loss": 1.6472111940383911, "lr": 8.787766208558243e-05, "grad_norm": 0.8547335863113403, "wall_ms": 339495} {"step": 7650, "loss": 1.494725227355957, "lr": 8.77013347684796e-05, "grad_norm": 0.7957534790039062, "wall_ms": 341247} {"step": 7700, "loss": 1.3077962398529053, "lr": 8.7523916007977e-05, "grad_norm": 0.758158266544342, "wall_ms": 342988} {"step": 7750, "loss": 1.4685697555541992, "lr": 8.734541100933239e-05, "grad_norm": 0.8582317233085632, "wall_ms": 344707} {"step": 7800, "loss": 1.338484764099121, "lr": 8.716582500967239e-05, "grad_norm": 0.7701882123947144, "wall_ms": 346479} {"step": 7850, "loss": 1.4787976741790771, "lr": 8.6985163277839e-05, "grad_norm": 0.8533888459205627, "wall_ms": 348206} {"step": 7900, "loss": 1.519551396369934, "lr": 8.680343111423492e-05, "grad_norm": 0.8808371424674988, "wall_ms": 349929} {"step": 7950, "loss": 1.6874083280563354, "lr": 8.6620633850668e-05, "grad_norm": 0.9264625310897827, "wall_ms": 351663} {"step": 8000, "loss": 1.537314772605896, "lr": 8.643677685019498e-05, "grad_norm": 0.838169276714325, "wall_ms": 353400} {"step": 8050, "loss": 1.6105272769927979, "lr": 8.625186550696393e-05, "grad_norm": 0.8879337310791016, "wall_ms": 365744} {"step": 8100, "loss": 1.3617974519729614, "lr": 8.606590524605614e-05, "grad_norm": 0.8258153200149536, "wall_ms": 367440} {"step": 8150, "loss": 1.5144351720809937, "lr": 8.587890152332692e-05, "grad_norm": 0.8906125426292419, "wall_ms": 369193} {"step": 8200, "loss": 1.3649393320083618, "lr": 8.569085982524551e-05, "grad_norm": 0.8047677874565125, "wall_ms": 370952} {"step": 8250, "loss": 1.5343724489212036, "lr": 8.55017856687341e-05, "grad_norm": 0.8776378631591797, "wall_ms": 372654} {"step": 8300, "loss": 1.5436773300170898, "lr": 8.531168460100608e-05, "grad_norm": 0.8368867039680481, "wall_ms": 374395} {"step": 8350, "loss": 1.4783859252929688, "lr": 8.512056219940306e-05, "grad_norm": 0.8244237899780273, "wall_ms": 376118} {"step": 8400, "loss": 1.3440756797790527, "lr": 8.492842407123156e-05, "grad_norm": 0.7829580903053284, "wall_ms": 377837} {"step": 8450, "loss": 1.4184643030166626, "lr": 8.473527585359818e-05, "grad_norm": 0.8419895172119141, "wall_ms": 379580} {"step": 8500, "loss": 1.4920107126235962, "lr": 8.454112321324446e-05, "grad_norm": 0.842334508895874, "wall_ms": 381262} {"step": 8550, "loss": 1.3251382112503052, "lr": 8.434597184638042e-05, "grad_norm": 0.7784276008605957, "wall_ms": 382973} {"step": 8600, "loss": 1.5268762111663818, "lr": 8.414982747851764e-05, "grad_norm": 0.8427223563194275, "wall_ms": 384670} {"step": 8650, "loss": 1.410663366317749, "lr": 8.395269586430112e-05, "grad_norm": 0.813461184501648, "wall_ms": 386413} {"step": 8700, "loss": 1.5312049388885498, "lr": 8.37545827873405e-05, "grad_norm": 0.8394680619239807, "wall_ms": 388117} {"step": 8750, "loss": 1.1280266046524048, "lr": 8.355549406004043e-05, "grad_norm": 0.7387124300003052, "wall_ms": 389872} {"step": 8800, "loss": 1.2595707178115845, "lr": 8.335543552342997e-05, "grad_norm": 0.8145673274993896, "wall_ms": 391575} {"step": 8850, "loss": 1.4885333776474, "lr": 8.315441304699125e-05, "grad_norm": 0.9074957370758057, "wall_ms": 393322} {"step": 8900, "loss": 1.2718966007232666, "lr": 8.295243252848722e-05, "grad_norm": 0.8060879707336426, "wall_ms": 395025} {"step": 8950, "loss": 1.387513279914856, "lr": 8.27494998937887e-05, "grad_norm": 0.8491438627243042, "wall_ms": 396767} {"step": 9000, "loss": 1.4901556968688965, "lr": 8.254562109670048e-05, "grad_norm": 0.8222256898880005, "wall_ms": 398449} {"step": 9050, "loss": 1.3749970197677612, "lr": 8.234080211878663e-05, "grad_norm": 0.7649692893028259, "wall_ms": 411839} {"step": 9100, "loss": 1.3517208099365234, "lr": 8.213504896919502e-05, "grad_norm": 0.8276722431182861, "wall_ms": 413581} {"step": 9150, "loss": 1.2928478717803955, "lr": 8.1928367684481e-05, "grad_norm": 0.8359339237213135, "wall_ms": 415313} {"step": 9200, "loss": 1.5046391487121582, "lr": 8.172076432843038e-05, "grad_norm": 0.8909878134727478, "wall_ms": 417044} {"step": 9250, "loss": 1.252079725265503, "lr": 8.151224499188134e-05, "grad_norm": 0.80852210521698, "wall_ms": 418769} {"step": 9300, "loss": 1.4965425729751587, "lr": 8.130281579254597e-05, "grad_norm": 0.8924341797828674, "wall_ms": 420508} {"step": 9350, "loss": 1.358128309249878, "lr": 8.10924828748306e-05, "grad_norm": 0.8521621823310852, "wall_ms": 422257} {"step": 9400, "loss": 1.4754784107208252, "lr": 8.088125240965562e-05, "grad_norm": 0.8612163662910461, "wall_ms": 423956} {"step": 9450, "loss": 1.4372576475143433, "lr": 8.066913059427436e-05, "grad_norm": 0.8494561314582825, "wall_ms": 425671} {"step": 9500, "loss": 1.38088059425354, "lr": 8.045612365209139e-05, "grad_norm": 0.8517435193061829, "wall_ms": 427395} {"step": 9550, "loss": 1.23662269115448, "lr": 8.024223783247978e-05, "grad_norm": 0.8071579337120056, "wall_ms": 429129} {"step": 9600, "loss": 1.3284133672714233, "lr": 8.00274794105979e-05, "grad_norm": 0.8217832446098328, "wall_ms": 430838} {"step": 9650, "loss": 1.2756754159927368, "lr": 7.981185468720522e-05, "grad_norm": 0.8096398711204529, "wall_ms": 432590} {"step": 9700, "loss": 1.3750932216644287, "lr": 7.959536998847742e-05, "grad_norm": 0.8602913618087769, "wall_ms": 434312} {"step": 9750, "loss": 1.3048489093780518, "lr": 7.937803166582096e-05, "grad_norm": 0.8028217554092407, "wall_ms": 436038} {"step": 9800, "loss": 1.300126314163208, "lr": 7.915984609568652e-05, "grad_norm": 0.78887939453125, "wall_ms": 437790} {"step": 9850, "loss": 1.3139381408691406, "lr": 7.894081967938207e-05, "grad_norm": 0.7995789051055908, "wall_ms": 439492} {"step": 9900, "loss": 1.3823362588882446, "lr": 7.872095884288499e-05, "grad_norm": 0.8315764665603638, "wall_ms": 441191} {"step": 9950, "loss": 1.3642982244491577, "lr": 7.850027003665364e-05, "grad_norm": 0.8326452374458313, "wall_ms": 442881} {"step": 10000, "loss": 1.3240914344787598, "lr": 7.827875973543793e-05, "grad_norm": 0.8761479258537292, "wall_ms": 444605} {"step": 10050, "loss": 1.3421809673309326, "lr": 7.805643443808956e-05, "grad_norm": 0.8619383573532104, "wall_ms": 455871} {"step": 10100, "loss": 1.5003067255020142, "lr": 7.783330066737117e-05, "grad_norm": 0.8941632509231567, "wall_ms": 457582} {"step": 10150, "loss": 1.410672903060913, "lr": 7.760936496976515e-05, "grad_norm": 0.8553872108459473, "wall_ms": 459347} {"step": 10200, "loss": 1.3067631721496582, "lr": 7.738463391528142e-05, "grad_norm": 0.843177855014801, "wall_ms": 461101} {"step": 10250, "loss": 1.5024006366729736, "lr": 7.715911409726476e-05, "grad_norm": 0.9571904540061951, "wall_ms": 462810} {"step": 10300, "loss": 1.2899487018585205, "lr": 7.693281213220135e-05, "grad_norm": 0.8346960544586182, "wall_ms": 464530} {"step": 10350, "loss": 1.3370059728622437, "lr": 7.67057346595246e-05, "grad_norm": 0.832564115524292, "wall_ms": 466226} {"step": 10400, "loss": 1.2906372547149658, "lr": 7.647788834142044e-05, "grad_norm": 0.8558885455131531, "wall_ms": 467935} {"step": 10450, "loss": 1.2203096151351929, "lr": 7.624927986263182e-05, "grad_norm": 0.8198251724243164, "wall_ms": 469663} {"step": 10500, "loss": 1.2420717477798462, "lr": 7.601991593026258e-05, "grad_norm": 0.8418638706207275, "wall_ms": 471390} {"step": 10550, "loss": 1.11480712890625, "lr": 7.578980327358068e-05, "grad_norm": 0.8306732773780823, "wall_ms": 473136} {"step": 10600, "loss": 1.3344184160232544, "lr": 7.555894864382078e-05, "grad_norm": 0.8826112151145935, "wall_ms": 474866} {"step": 10650, "loss": 1.317108392715454, "lr": 7.532735881398612e-05, "grad_norm": 0.8380564451217651, "wall_ms": 476595} {"step": 10700, "loss": 1.3113672733306885, "lr": 7.509504057864992e-05, "grad_norm": 0.8753242492675781, "wall_ms": 478260} {"step": 10750, "loss": 1.489659309387207, "lr": 7.486200075375585e-05, "grad_norm": 0.8985211849212646, "wall_ms": 479962} {"step": 10800, "loss": 1.2629674673080444, "lr": 7.46282461764183e-05, "grad_norm": 0.8080044984817505, "wall_ms": 481699} {"step": 10850, "loss": 1.340064525604248, "lr": 7.439378370472155e-05, "grad_norm": 0.9930605292320251, "wall_ms": 483395} {"step": 10900, "loss": 1.3676881790161133, "lr": 7.41586202175187e-05, "grad_norm": 0.8749220967292786, "wall_ms": 485154} {"step": 10950, "loss": 1.3557944297790527, "lr": 7.392276261422985e-05, "grad_norm": 0.8043420314788818, "wall_ms": 486882} {"step": 11000, "loss": 1.2265461683273315, "lr": 7.368621781463961e-05, "grad_norm": 0.8190564513206482, "wall_ms": 488610} {"step": 11050, "loss": 1.2214077711105347, "lr": 7.344899275869415e-05, "grad_norm": 0.8588452339172363, "wall_ms": 500561} {"step": 11100, "loss": 1.3136324882507324, "lr": 7.321109440629755e-05, "grad_norm": 0.7996401190757751, "wall_ms": 502282} {"step": 11150, "loss": 1.282321810722351, "lr": 7.297252973710757e-05, "grad_norm": 0.8383637070655823, "wall_ms": 503997} {"step": 11200, "loss": 1.2052867412567139, "lr": 7.273330575033103e-05, "grad_norm": 0.8522917628288269, "wall_ms": 505746} {"step": 11250, "loss": 1.3049485683441162, "lr": 7.249342946451824e-05, "grad_norm": 0.9361868500709534, "wall_ms": 507467} {"step": 11300, "loss": 1.1886632442474365, "lr": 7.225290791735725e-05, "grad_norm": 0.8596731424331665, "wall_ms": 509170} {"step": 11350, "loss": 1.220069169998169, "lr": 7.201174816546734e-05, "grad_norm": 0.8289191126823425, "wall_ms": 510864} {"step": 11400, "loss": 1.1266655921936035, "lr": 7.176995728419192e-05, "grad_norm": 0.8435049653053284, "wall_ms": 512605} {"step": 11450, "loss": 1.4654338359832764, "lr": 7.152754236739101e-05, "grad_norm": 0.931662917137146, "wall_ms": 514332} {"step": 11500, "loss": 1.0776749849319458, "lr": 7.128451052723311e-05, "grad_norm": 0.8250371813774109, "wall_ms": 516060} {"step": 11550, "loss": 1.2843378782272339, "lr": 7.104086889398649e-05, "grad_norm": 0.8430884480476379, "wall_ms": 517793} {"step": 11600, "loss": 1.18546462059021, "lr": 7.07966246158101e-05, "grad_norm": 0.8195156455039978, "wall_ms": 519486} {"step": 11650, "loss": 1.16171395778656, "lr": 7.055178485854371e-05, "grad_norm": 0.8501570820808411, "wall_ms": 521190} {"step": 11700, "loss": 1.2512246370315552, "lr": 7.030635680549779e-05, "grad_norm": 0.849337637424469, "wall_ms": 522901} {"step": 11750, "loss": 1.1213500499725342, "lr": 7.006034765724269e-05, "grad_norm": 0.8445180058479309, "wall_ms": 524602} {"step": 11800, "loss": 1.2376741170883179, "lr": 6.981376463139745e-05, "grad_norm": 0.8497915267944336, "wall_ms": 526295} {"step": 11850, "loss": 1.293068289756775, "lr": 6.956661496241795e-05, "grad_norm": 0.9009845852851868, "wall_ms": 528033} {"step": 11900, "loss": 1.1829347610473633, "lr": 6.931890590138475e-05, "grad_norm": 0.8331034779548645, "wall_ms": 529749} {"step": 11950, "loss": 1.0784697532653809, "lr": 6.907064471579032e-05, "grad_norm": 0.7947927713394165, "wall_ms": 531461} {"step": 12000, "loss": 0.996482789516449, "lr": 6.882183868932578e-05, "grad_norm": 0.778404951095581, "wall_ms": 533214} {"step": 12050, "loss": 1.2302013635635376, "lr": 6.857249512166725e-05, "grad_norm": 0.8443679809570312, "wall_ms": 544959} {"step": 12100, "loss": 1.300133228302002, "lr": 6.832262132826171e-05, "grad_norm": 0.8940951824188232, "wall_ms": 546696} {"step": 12150, "loss": 1.0288845300674438, "lr": 6.807222464011228e-05, "grad_norm": 0.771960973739624, "wall_ms": 548402} {"step": 12200, "loss": 1.116161823272705, "lr": 6.782131240356329e-05, "grad_norm": 0.8237959146499634, "wall_ms": 550120} {"step": 12250, "loss": 1.058748722076416, "lr": 6.756989198008454e-05, "grad_norm": 0.7707725763320923, "wall_ms": 551813} {"step": 12300, "loss": 1.187689185142517, "lr": 6.731797074605553e-05, "grad_norm": 0.8500635027885437, "wall_ms": 553514} {"step": 12350, "loss": 1.2747702598571777, "lr": 6.706555609254889e-05, "grad_norm": 0.8614800572395325, "wall_ms": 555256} {"step": 12400, "loss": 1.1681053638458252, "lr": 6.681265542511366e-05, "grad_norm": 0.8286347985267639, "wall_ms": 556967} {"step": 12450, "loss": 1.1450082063674927, "lr": 6.655927616355787e-05, "grad_norm": 0.8957437872886658, "wall_ms": 558666} {"step": 12500, "loss": 1.0977509021759033, "lr": 6.630542574173101e-05, "grad_norm": 0.8480830192565918, "wall_ms": 560368} {"step": 12550, "loss": 1.2448737621307373, "lr": 6.605111160730584e-05, "grad_norm": 0.8294087648391724, "wall_ms": 562111} {"step": 12600, "loss": 1.3551443815231323, "lr": 6.57963412215599e-05, "grad_norm": 0.8839945197105408, "wall_ms": 563823} {"step": 12650, "loss": 1.1198320388793945, "lr": 6.55411220591566e-05, "grad_norm": 0.8773590922355652, "wall_ms": 565552} {"step": 12700, "loss": 1.2205414772033691, "lr": 6.528546160792592e-05, "grad_norm": 0.8720853328704834, "wall_ms": 567277} {"step": 12750, "loss": 1.1470165252685547, "lr": 6.502936736864477e-05, "grad_norm": 0.817909836769104, "wall_ms": 569007} {"step": 12800, "loss": 1.0773601531982422, "lr": 6.477284685481687e-05, "grad_norm": 0.843097984790802, "wall_ms": 570725} {"step": 12850, "loss": 1.1868212223052979, "lr": 6.451590759245231e-05, "grad_norm": 0.8176910281181335, "wall_ms": 572428} {"step": 12900, "loss": 1.120443344116211, "lr": 6.425855711984682e-05, "grad_norm": 0.8551414608955383, "wall_ms": 574146} {"step": 12950, "loss": 1.2869683504104614, "lr": 6.400080298736052e-05, "grad_norm": 0.9839915633201599, "wall_ms": 575869} {"step": 13000, "loss": 1.2896708250045776, "lr": 6.374265275719645e-05, "grad_norm": 0.8778011798858643, "wall_ms": 577604} {"step": 13050, "loss": 1.0625834465026855, "lr": 6.348411400317865e-05, "grad_norm": 0.8064904808998108, "wall_ms": 589080} {"step": 13100, "loss": 1.0802499055862427, "lr": 6.322519431053006e-05, "grad_norm": 0.8312416076660156, "wall_ms": 590823} {"step": 13150, "loss": 1.0432072877883911, "lr": 6.296590127564989e-05, "grad_norm": 0.838089644908905, "wall_ms": 592529} {"step": 13200, "loss": 1.1150588989257812, "lr": 6.270624250589073e-05, "grad_norm": 0.8720580339431763, "wall_ms": 594248} {"step": 13250, "loss": 1.1227056980133057, "lr": 6.244622561933543e-05, "grad_norm": 0.8269084692001343, "wall_ms": 595947} {"step": 13300, "loss": 1.3046036958694458, "lr": 6.21858582445736e-05, "grad_norm": 0.8997644186019897, "wall_ms": 597635} {"step": 13350, "loss": 1.0315501689910889, "lr": 6.19251480204777e-05, "grad_norm": 0.8063523173332214, "wall_ms": 599365} {"step": 13400, "loss": 1.1844943761825562, "lr": 6.166410259597902e-05, "grad_norm": 0.9368064999580383, "wall_ms": 601059} {"step": 13450, "loss": 1.0434523820877075, "lr": 6.140272962984323e-05, "grad_norm": 0.8528125286102295, "wall_ms": 602767} {"step": 13500, "loss": 1.2159578800201416, "lr": 6.11410367904457e-05, "grad_norm": 0.8766202330589294, "wall_ms": 604461} {"step": 13550, "loss": 1.219970941543579, "lr": 6.0879031755546466e-05, "grad_norm": 0.8764426112174988, "wall_ms": 606210} {"step": 13600, "loss": 1.0643775463104248, "lr": 6.061672221206507e-05, "grad_norm": 0.8096029758453369, "wall_ms": 607918} {"step": 13650, "loss": 1.1121946573257446, "lr": 6.035411585585492e-05, "grad_norm": 0.8781654238700867, "wall_ms": 609636} {"step": 13700, "loss": 1.3689978122711182, "lr": 6.00912203914776e-05, "grad_norm": 0.940263032913208, "wall_ms": 611387} {"step": 13750, "loss": 1.1343265771865845, "lr": 5.982804353197676e-05, "grad_norm": 0.8792983293533325, "wall_ms": 613131} {"step": 13800, "loss": 1.0741926431655884, "lr": 5.956459299865188e-05, "grad_norm": 0.8380674719810486, "wall_ms": 614845} {"step": 13850, "loss": 1.2967398166656494, "lr": 5.93008765208317e-05, "grad_norm": 0.960627019405365, "wall_ms": 616569} {"step": 13900, "loss": 1.0509577989578247, "lr": 5.903690183564743e-05, "grad_norm": 0.8608508706092834, "wall_ms": 618296} {"step": 13950, "loss": 1.1173510551452637, "lr": 5.8772676687805805e-05, "grad_norm": 0.8836365938186646, "wall_ms": 620010} {"step": 14000, "loss": 1.0804780721664429, "lr": 5.8508208829361867e-05, "grad_norm": 0.8634904026985168, "wall_ms": 621724} {"step": 14050, "loss": 0.9832167029380798, "lr": 5.8243506019491436e-05, "grad_norm": 0.839834451675415, "wall_ms": 633519} {"step": 14100, "loss": 1.1871933937072754, "lr": 5.7978576024263586e-05, "grad_norm": 0.8580115437507629, "wall_ms": 635221} {"step": 14150, "loss": 1.1967799663543701, "lr": 5.7713426616412716e-05, "grad_norm": 0.8360282778739929, "wall_ms": 636985} {"step": 14200, "loss": 1.139590859413147, "lr": 5.744806557511056e-05, "grad_norm": 0.9242633581161499, "wall_ms": 638698} {"step": 14250, "loss": 0.9801345467567444, "lr": 5.718250068573786e-05, "grad_norm": 0.8630415201187134, "wall_ms": 640405} {"step": 14300, "loss": 1.02158522605896, "lr": 5.691673973965612e-05, "grad_norm": 0.8394274115562439, "wall_ms": 642119} {"step": 14350, "loss": 1.0457258224487305, "lr": 5.665079053397885e-05, "grad_norm": 0.8200191259384155, "wall_ms": 643830} {"step": 14400, "loss": 1.1458840370178223, "lr": 5.6384660871342874e-05, "grad_norm": 0.8569996953010559, "wall_ms": 645570} {"step": 14450, "loss": 1.046402931213379, "lr": 5.611835855967945e-05, "grad_norm": 0.8748022317886353, "wall_ms": 647279} {"step": 14500, "loss": 1.1350315809249878, "lr": 5.5851891411985125e-05, "grad_norm": 0.8747215270996094, "wall_ms": 649010} {"step": 14550, "loss": 1.0764318704605103, "lr": 5.558526724609254e-05, "grad_norm": 0.847220778465271, "wall_ms": 650756} {"step": 14600, "loss": 1.0863155126571655, "lr": 5.531849388444112e-05, "grad_norm": 0.8194402456283569, "wall_ms": 652460} {"step": 14650, "loss": 1.054734706878662, "lr": 5.505157915384739e-05, "grad_norm": 0.8760975003242493, "wall_ms": 654173} {"step": 14700, "loss": 1.0966160297393799, "lr": 5.478453088527563e-05, "grad_norm": 0.8915668725967407, "wall_ms": 655911} {"step": 14750, "loss": 0.9777820110321045, "lr": 5.4517356913607854e-05, "grad_norm": 0.8776732087135315, "wall_ms": 657651} {"step": 14800, "loss": 1.2455387115478516, "lr": 5.4250065077414124e-05, "grad_norm": 0.9114141464233398, "wall_ms": 659375} {"step": 14850, "loss": 1.0622403621673584, "lr": 5.398266321872245e-05, "grad_norm": 0.8811877965927124, "wall_ms": 661089} {"step": 14900, "loss": 1.099771499633789, "lr": 5.3715159182788834e-05, "grad_norm": 0.8775047063827515, "wall_ms": 662779} {"step": 14950, "loss": 1.0822341442108154, "lr": 5.344756081786699e-05, "grad_norm": 0.8492531180381775, "wall_ms": 664517} {"step": 15000, "loss": 1.0261255502700806, "lr": 5.3179875974978176e-05, "grad_norm": 0.8332518935203552, "wall_ms": 666240} {"step": 15050, "loss": 0.9999603629112244, "lr": 5.2912112507680755e-05, "grad_norm": 0.8243043422698975, "wall_ms": 683242} {"step": 15100, "loss": 1.1575226783752441, "lr": 5.264427827183989e-05, "grad_norm": 0.8933400511741638, "wall_ms": 684960} {"step": 15150, "loss": 1.117187261581421, "lr": 5.237638112539697e-05, "grad_norm": 0.8506838083267212, "wall_ms": 686674} {"step": 15200, "loss": 1.100053071975708, "lr": 5.2108428928139144e-05, "grad_norm": 0.8691291809082031, "wall_ms": 688399} {"step": 15250, "loss": 0.9638893604278564, "lr": 5.184042954146866e-05, "grad_norm": 0.8282660841941833, "wall_ms": 690149} {"step": 15300, "loss": 1.0748566389083862, "lr": 5.157239082817228e-05, "grad_norm": 0.8942558169364929, "wall_ms": 691849} {"step": 15350, "loss": 0.8609564304351807, "lr": 5.1304320652190505e-05, "grad_norm": 0.7505937814712524, "wall_ms": 693551} {"step": 15400, "loss": 1.0526268482208252, "lr": 5.103622687838697e-05, "grad_norm": 0.8421809673309326, "wall_ms": 695262} {"step": 15450, "loss": 1.016676664352417, "lr": 5.0768117372317634e-05, "grad_norm": 0.8124997019767761, "wall_ms": 696997} {"step": 15500, "loss": 1.159654140472412, "lr": 5.05e-05, "grad_norm": 0.8937890529632568, "wall_ms": 698716} {"step": 15550, "loss": 0.9398347735404968, "lr": 5.023188262768237e-05, "grad_norm": 0.8053609132766724, "wall_ms": 700467} {"step": 15600, "loss": 1.063999891281128, "lr": 4.996377312161303e-05, "grad_norm": 0.8964652419090271, "wall_ms": 702212} {"step": 15650, "loss": 1.061097264289856, "lr": 4.969567934780952e-05, "grad_norm": 0.8562669157981873, "wall_ms": 703903} {"step": 15700, "loss": 1.0884406566619873, "lr": 4.9427609171827734e-05, "grad_norm": 0.8834716081619263, "wall_ms": 705639} {"step": 15750, "loss": 1.0300354957580566, "lr": 4.915957045853135e-05, "grad_norm": 0.8499013185501099, "wall_ms": 707385} {"step": 15800, "loss": 1.1702936887741089, "lr": 4.889157107186088e-05, "grad_norm": 0.9339880347251892, "wall_ms": 709090} {"step": 15850, "loss": 1.1058207750320435, "lr": 4.862361887460305e-05, "grad_norm": 0.948033332824707, "wall_ms": 710817} {"step": 15900, "loss": 1.1161434650421143, "lr": 4.8355721728160126e-05, "grad_norm": 0.8597437143325806, "wall_ms": 712549} {"step": 15950, "loss": 1.0634154081344604, "lr": 4.808788749231927e-05, "grad_norm": 0.850639283657074, "wall_ms": 714267} {"step": 16000, "loss": 1.1766784191131592, "lr": 4.782012402502184e-05, "grad_norm": 0.9072657823562622, "wall_ms": 716013} {"step": 16050, "loss": 1.0085502862930298, "lr": 4.755243918213301e-05, "grad_norm": 0.8807640075683594, "wall_ms": 728057} {"step": 16100, "loss": 1.0399119853973389, "lr": 4.728484081721116e-05, "grad_norm": 0.8621131777763367, "wall_ms": 729818} {"step": 16150, "loss": 1.083953857421875, "lr": 4.7017336781277556e-05, "grad_norm": 0.8917695879936218, "wall_ms": 731533} {"step": 16200, "loss": 1.0265339612960815, "lr": 4.674993492258589e-05, "grad_norm": 0.7954411506652832, "wall_ms": 733263} {"step": 16250, "loss": 1.1125080585479736, "lr": 4.648264308639216e-05, "grad_norm": 0.9282450079917908, "wall_ms": 734976} {"step": 16300, "loss": 0.9538511037826538, "lr": 4.621546911472438e-05, "grad_norm": 0.8151458501815796, "wall_ms": 736703} {"step": 16350, "loss": 1.1594266891479492, "lr": 4.594842084615262e-05, "grad_norm": 0.946561872959137, "wall_ms": 738460} {"step": 16400, "loss": 1.0108965635299683, "lr": 4.568150611555892e-05, "grad_norm": 0.8478177785873413, "wall_ms": 740192} {"step": 16450, "loss": 0.9316054582595825, "lr": 4.541473275390746e-05, "grad_norm": 0.8544329404830933, "wall_ms": 741922} {"step": 16500, "loss": 1.0879098176956177, "lr": 4.51481085880149e-05, "grad_norm": 0.8834003806114197, "wall_ms": 743634} {"step": 16550, "loss": 0.9831568002700806, "lr": 4.4881641440320556e-05, "grad_norm": 0.9172267317771912, "wall_ms": 745338} {"step": 16600, "loss": 1.0807466506958008, "lr": 4.4615339128657135e-05, "grad_norm": 0.8237285017967224, "wall_ms": 747052} {"step": 16650, "loss": 0.8746027946472168, "lr": 4.4349209466021166e-05, "grad_norm": 0.8443201780319214, "wall_ms": 748791} {"step": 16700, "loss": 1.0062931776046753, "lr": 4.408326026034389e-05, "grad_norm": 0.8546826243400574, "wall_ms": 750520} {"step": 16750, "loss": 0.8437516689300537, "lr": 4.381749931426214e-05, "grad_norm": 0.834909200668335, "wall_ms": 752230} {"step": 16800, "loss": 1.0636985301971436, "lr": 4.3551934424889465e-05, "grad_norm": 0.8636642098426819, "wall_ms": 753949} {"step": 16850, "loss": 1.0302345752716064, "lr": 4.3286573383587286e-05, "grad_norm": 0.8633254766464233, "wall_ms": 755636} {"step": 16900, "loss": 1.0642852783203125, "lr": 4.302142397573643e-05, "grad_norm": 0.9197191596031189, "wall_ms": 757368} {"step": 16950, "loss": 0.9303790330886841, "lr": 4.2756493980508566e-05, "grad_norm": 0.8675852417945862, "wall_ms": 759068} {"step": 17000, "loss": 1.0527265071868896, "lr": 4.249179117063815e-05, "grad_norm": 0.8915224075317383, "wall_ms": 760776} {"step": 17050, "loss": 0.8927605748176575, "lr": 4.22273233121942e-05, "grad_norm": 0.7717829942703247, "wall_ms": 773435} {"step": 17100, "loss": 1.1028199195861816, "lr": 4.1963098164352594e-05, "grad_norm": 0.8811274170875549, "wall_ms": 775152} {"step": 17150, "loss": 1.1078433990478516, "lr": 4.1699123479168325e-05, "grad_norm": 0.8906840085983276, "wall_ms": 776890} {"step": 17200, "loss": 0.9846584796905518, "lr": 4.143540700134812e-05, "grad_norm": 0.9068949222564697, "wall_ms": 778601} {"step": 17250, "loss": 0.928356409072876, "lr": 4.117195646802326e-05, "grad_norm": 0.8232488036155701, "wall_ms": 780345} {"step": 17300, "loss": 0.9822089672088623, "lr": 4.0908779608522414e-05, "grad_norm": 0.8535576462745667, "wall_ms": 782060} {"step": 17350, "loss": 0.9306653738021851, "lr": 4.06458841441451e-05, "grad_norm": 0.8349252343177795, "wall_ms": 783818} {"step": 17400, "loss": 1.1000527143478394, "lr": 4.038327778793495e-05, "grad_norm": 0.9537215828895569, "wall_ms": 785529} {"step": 17450, "loss": 0.7251179218292236, "lr": 4.012096824445355e-05, "grad_norm": 0.7684425115585327, "wall_ms": 787296} {"step": 17500, "loss": 1.0074583292007446, "lr": 3.985896320955431e-05, "grad_norm": 0.8811207413673401, "wall_ms": 789021} {"step": 17550, "loss": 0.9257945418357849, "lr": 3.959727037015678e-05, "grad_norm": 0.970649242401123, "wall_ms": 790726} {"step": 17600, "loss": 0.8375262022018433, "lr": 3.9335897404020996e-05, "grad_norm": 0.8391942381858826, "wall_ms": 792440} {"step": 17650, "loss": 0.8168361186981201, "lr": 3.9074851979522324e-05, "grad_norm": 0.8728376030921936, "wall_ms": 794148} {"step": 17700, "loss": 0.9262288808822632, "lr": 3.8814141755426406e-05, "grad_norm": 0.8415954113006592, "wall_ms": 795856} {"step": 17750, "loss": 0.8563557863235474, "lr": 3.855377438066458e-05, "grad_norm": 0.8021153807640076, "wall_ms": 797582} {"step": 17800, "loss": 1.1233117580413818, "lr": 3.829375749410929e-05, "grad_norm": 0.9171063303947449, "wall_ms": 799301} {"step": 17850, "loss": 0.8165103197097778, "lr": 3.803409872435013e-05, "grad_norm": 0.8047669529914856, "wall_ms": 801017} {"step": 17900, "loss": 1.0407259464263916, "lr": 3.7774805689469935e-05, "grad_norm": 0.8833931684494019, "wall_ms": 802747} {"step": 17950, "loss": 0.9519691467285156, "lr": 3.751588599682136e-05, "grad_norm": 0.8331700563430786, "wall_ms": 804454} {"step": 18000, "loss": 1.0916705131530762, "lr": 3.725734724280358e-05, "grad_norm": 0.9678692817687988, "wall_ms": 806164} {"step": 18050, "loss": 0.8614157438278198, "lr": 3.6999197012639495e-05, "grad_norm": 0.8552488684654236, "wall_ms": 826451} {"step": 18100, "loss": 1.0676188468933105, "lr": 3.674144288015319e-05, "grad_norm": 0.8736070990562439, "wall_ms": 828181} {"step": 18150, "loss": 1.0648181438446045, "lr": 3.648409240754769e-05, "grad_norm": 0.9139515161514282, "wall_ms": 829852} {"step": 18200, "loss": 0.9852918386459351, "lr": 3.622715314518314e-05, "grad_norm": 0.8594771027565002, "wall_ms": 831553} {"step": 18250, "loss": 0.9953182935714722, "lr": 3.5970632631355237e-05, "grad_norm": 0.8491611480712891, "wall_ms": 833286} {"step": 18300, "loss": 0.9987272024154663, "lr": 3.5714538392074084e-05, "grad_norm": 0.8608967661857605, "wall_ms": 835007} {"step": 18350, "loss": 1.0570321083068848, "lr": 3.545887794084341e-05, "grad_norm": 0.8955971002578735, "wall_ms": 836732} {"step": 18400, "loss": 0.9548524022102356, "lr": 3.5203658778440106e-05, "grad_norm": 0.8819900155067444, "wall_ms": 838486} {"step": 18450, "loss": 0.9150660037994385, "lr": 3.4948888392694165e-05, "grad_norm": 0.8194774389266968, "wall_ms": 840180} {"step": 18500, "loss": 0.8421554565429688, "lr": 3.4694574258269e-05, "grad_norm": 0.8531363606452942, "wall_ms": 841906} {"step": 18550, "loss": 0.9207584261894226, "lr": 3.4440723836442135e-05, "grad_norm": 0.847586989402771, "wall_ms": 843582} {"step": 18600, "loss": 0.8606449961662292, "lr": 3.4187344574886354e-05, "grad_norm": 0.8400171995162964, "wall_ms": 845274} {"step": 18650, "loss": 0.8832734823226929, "lr": 3.3934443907451105e-05, "grad_norm": 0.8113038539886475, "wall_ms": 846995} {"step": 18700, "loss": 1.0616661310195923, "lr": 3.368202925394449e-05, "grad_norm": 0.932694137096405, "wall_ms": 848725} {"step": 18750, "loss": 1.0810670852661133, "lr": 3.343010801991547e-05, "grad_norm": 0.9158487319946289, "wall_ms": 850459} {"step": 18800, "loss": 0.8771670460700989, "lr": 3.3178687596436724e-05, "grad_norm": 0.8788829445838928, "wall_ms": 852175} {"step": 18850, "loss": 0.9155687093734741, "lr": 3.292777535988773e-05, "grad_norm": 0.8124942183494568, "wall_ms": 853872} {"step": 18900, "loss": 0.8976818323135376, "lr": 3.267737867173832e-05, "grad_norm": 0.8372101783752441, "wall_ms": 855616} {"step": 18950, "loss": 0.875629723072052, "lr": 3.242750487833278e-05, "grad_norm": 0.7971845269203186, "wall_ms": 857330} {"step": 19000, "loss": 1.0124332904815674, "lr": 3.217816131067424e-05, "grad_norm": 0.9292346835136414, "wall_ms": 859052} {"step": 19050, "loss": 0.8656603097915649, "lr": 3.1929355284209705e-05, "grad_norm": 0.7993571758270264, "wall_ms": 871563} {"step": 19100, "loss": 0.9409209489822388, "lr": 3.168109409861525e-05, "grad_norm": 0.8581641316413879, "wall_ms": 873306} {"step": 19150, "loss": 1.018110752105713, "lr": 3.143338503758206e-05, "grad_norm": 0.8884034752845764, "wall_ms": 875030} {"step": 19200, "loss": 0.9033374190330505, "lr": 3.1186235368602554e-05, "grad_norm": 0.8525997996330261, "wall_ms": 876756} {"step": 19250, "loss": 0.9682834148406982, "lr": 3.0939652342757317e-05, "grad_norm": 0.8440095782279968, "wall_ms": 878501} {"step": 19300, "loss": 0.8990552425384521, "lr": 3.0693643194502215e-05, "grad_norm": 0.8873471617698669, "wall_ms": 880219} {"step": 19350, "loss": 1.01651930809021, "lr": 3.04482151414563e-05, "grad_norm": 1.0462133884429932, "wall_ms": 881942} {"step": 19400, "loss": 0.9199861884117126, "lr": 3.0203375384189904e-05, "grad_norm": 0.8981838226318359, "wall_ms": 883647} {"step": 19450, "loss": 0.8702982664108276, "lr": 2.9959131106013523e-05, "grad_norm": 0.858237624168396, "wall_ms": 885398} {"step": 19500, "loss": 0.9785926342010498, "lr": 2.9715489472766906e-05, "grad_norm": 0.8964686393737793, "wall_ms": 887132} {"step": 19550, "loss": 0.9101927876472473, "lr": 2.9472457632609e-05, "grad_norm": 0.8456073999404907, "wall_ms": 888855} {"step": 19600, "loss": 0.8363006711006165, "lr": 2.9230042715808097e-05, "grad_norm": 0.8225622773170471, "wall_ms": 890589} {"step": 19650, "loss": 0.8951641321182251, "lr": 2.8988251834532666e-05, "grad_norm": 0.8785790205001831, "wall_ms": 892288} {"step": 19700, "loss": 1.0224478244781494, "lr": 2.8747092082642758e-05, "grad_norm": 0.8971164226531982, "wall_ms": 894042} {"step": 19750, "loss": 0.8289114236831665, "lr": 2.8506570535481785e-05, "grad_norm": 0.7883049845695496, "wall_ms": 895775} {"step": 19800, "loss": 0.7951846122741699, "lr": 2.8266694249669008e-05, "grad_norm": 0.8102098107337952, "wall_ms": 897498} {"step": 19850, "loss": 0.9012982249259949, "lr": 2.8027470262892437e-05, "grad_norm": 0.8591442108154297, "wall_ms": 899207} {"step": 19900, "loss": 0.8093454837799072, "lr": 2.7788905593702487e-05, "grad_norm": 0.7769754528999329, "wall_ms": 900934} {"step": 19950, "loss": 0.9650265574455261, "lr": 2.7551007241305853e-05, "grad_norm": 0.8530928492546082, "wall_ms": 902618} {"step": 20000, "loss": 0.9357821941375732, "lr": 2.7313782185360394e-05, "grad_norm": 0.9140329957008362, "wall_ms": 904325} {"step": 20050, "loss": 0.9763294458389282, "lr": 2.7077237385770144e-05, "grad_norm": 0.9679602980613708, "wall_ms": 917486} {"step": 20100, "loss": 0.9385108351707458, "lr": 2.6841379782481298e-05, "grad_norm": 0.8523024320602417, "wall_ms": 919173} {"step": 20150, "loss": 0.8232578039169312, "lr": 2.6606216295278464e-05, "grad_norm": 0.8398555517196655, "wall_ms": 920899} {"step": 20200, "loss": 0.9659301042556763, "lr": 2.6371753823581707e-05, "grad_norm": 0.9167727828025818, "wall_ms": 922627} {"step": 20250, "loss": 0.9619600772857666, "lr": 2.613799924624414e-05, "grad_norm": 0.9375002384185791, "wall_ms": 924358} {"step": 20300, "loss": 0.8954550623893738, "lr": 2.5904959421350093e-05, "grad_norm": 0.8773032426834106, "wall_ms": 926081} {"step": 20350, "loss": 1.041293978691101, "lr": 2.5672641186013884e-05, "grad_norm": 0.9526877999305725, "wall_ms": 927819} {"step": 20400, "loss": 0.8817823529243469, "lr": 2.544105135617923e-05, "grad_norm": 0.8185725212097168, "wall_ms": 929517} {"step": 20450, "loss": 0.9118589162826538, "lr": 2.521019672641933e-05, "grad_norm": 0.817489504814148, "wall_ms": 931265} {"step": 20500, "loss": 0.9400991201400757, "lr": 2.498008406973742e-05, "grad_norm": 0.8932892084121704, "wall_ms": 932988} {"step": 20550, "loss": 0.9242672920227051, "lr": 2.475072013736819e-05, "grad_norm": 0.908050000667572, "wall_ms": 934730} {"step": 20600, "loss": 0.8346642255783081, "lr": 2.452211165857958e-05, "grad_norm": 0.8050029277801514, "wall_ms": 936454} {"step": 20650, "loss": 0.9278695583343506, "lr": 2.429426534047543e-05, "grad_norm": 0.8720080852508545, "wall_ms": 938148} {"step": 20700, "loss": 0.8844668865203857, "lr": 2.406718786779866e-05, "grad_norm": 0.8438585996627808, "wall_ms": 939905} {"step": 20750, "loss": 0.8564820289611816, "lr": 2.3840885902735246e-05, "grad_norm": 0.8302301168441772, "wall_ms": 941627} {"step": 20800, "loss": 1.0061407089233398, "lr": 2.3615366084718573e-05, "grad_norm": 0.9668790698051453, "wall_ms": 943348} {"step": 20850, "loss": 0.8632994890213013, "lr": 2.339063503023485e-05, "grad_norm": 0.9127997756004333, "wall_ms": 945088} {"step": 20900, "loss": 0.8356308937072754, "lr": 2.316669933262882e-05, "grad_norm": 0.866096019744873, "wall_ms": 946806} {"step": 20950, "loss": 0.8333299160003662, "lr": 2.2943565561910472e-05, "grad_norm": 0.8730009198188782, "wall_ms": 948526} {"step": 21000, "loss": 0.8189157247543335, "lr": 2.272124026456208e-05, "grad_norm": 0.8412661552429199, "wall_ms": 950258} {"step": 21050, "loss": 0.9076573848724365, "lr": 2.249972996334638e-05, "grad_norm": 0.8590894937515259, "wall_ms": 962231} {"step": 21100, "loss": 0.8703791499137878, "lr": 2.2279041157115003e-05, "grad_norm": 0.858089029788971, "wall_ms": 963941} {"step": 21150, "loss": 0.9643392562866211, "lr": 2.2059180320617945e-05, "grad_norm": 0.8362814784049988, "wall_ms": 965651} {"step": 21200, "loss": 1.0685594081878662, "lr": 2.1840153904313502e-05, "grad_norm": 1.0029982328414917, "wall_ms": 967355} {"step": 21250, "loss": 0.859221875667572, "lr": 2.1621968334179048e-05, "grad_norm": 0.8319142460823059, "wall_ms": 969087} {"step": 21300, "loss": 0.8290194272994995, "lr": 2.1404630011522586e-05, "grad_norm": 0.8104252219200134, "wall_ms": 970794} {"step": 21350, "loss": 0.8288483619689941, "lr": 2.1188145312794808e-05, "grad_norm": 0.8457422852516174, "wall_ms": 972524} {"step": 21400, "loss": 0.8399669528007507, "lr": 2.0972520589402124e-05, "grad_norm": 0.9046602845191956, "wall_ms": 974243} {"step": 21450, "loss": 0.7548955082893372, "lr": 2.075776216752023e-05, "grad_norm": 0.8347029685974121, "wall_ms": 975954} {"step": 21500, "loss": 0.9515485167503357, "lr": 2.0543876347908638e-05, "grad_norm": 0.8904523849487305, "wall_ms": 977729} {"step": 21550, "loss": 0.8430825471878052, "lr": 2.0330869405725645e-05, "grad_norm": 0.8251240253448486, "wall_ms": 979426} {"step": 21600, "loss": 0.9202942848205566, "lr": 2.0118747590344396e-05, "grad_norm": 0.8643820285797119, "wall_ms": 981135} {"step": 21650, "loss": 1.009737253189087, "lr": 1.990751712516939e-05, "grad_norm": 0.9063624143600464, "wall_ms": 982824} {"step": 21700, "loss": 0.9040194749832153, "lr": 1.9697184207454027e-05, "grad_norm": 0.873333215713501, "wall_ms": 984538} {"step": 21750, "loss": 1.028117060661316, "lr": 1.948775500811867e-05, "grad_norm": 0.9130874276161194, "wall_ms": 986257} {"step": 21800, "loss": 0.9174608588218689, "lr": 1.9279235671569648e-05, "grad_norm": 0.8988444805145264, "wall_ms": 988034} {"step": 21850, "loss": 0.7713562250137329, "lr": 1.9071632315519e-05, "grad_norm": 0.7598730325698853, "wall_ms": 989748} {"step": 21900, "loss": 0.9092968106269836, "lr": 1.8864951030805002e-05, "grad_norm": 0.8814606666564941, "wall_ms": 991493} {"step": 21950, "loss": 0.9279510974884033, "lr": 1.8659197881213394e-05, "grad_norm": 0.873656690120697, "wall_ms": 993224} {"step": 22000, "loss": 0.8542144298553467, "lr": 1.8454378903299534e-05, "grad_norm": 0.8701207637786865, "wall_ms": 994920} {"step": 22050, "loss": 0.8821436762809753, "lr": 1.8250500106211322e-05, "grad_norm": 0.9216045141220093, "wall_ms": 1006997} {"step": 22100, "loss": 0.8767116665840149, "lr": 1.80475674715128e-05, "grad_norm": 0.8224970698356628, "wall_ms": 1008705} {"step": 22150, "loss": 0.853573203086853, "lr": 1.7845586953008772e-05, "grad_norm": 0.8947562575340271, "wall_ms": 1010434} {"step": 22200, "loss": 0.8528493642807007, "lr": 1.7644564476570042e-05, "grad_norm": 0.8433706760406494, "wall_ms": 1012134} {"step": 22250, "loss": 0.9502527713775635, "lr": 1.7444505939959583e-05, "grad_norm": 0.8932315707206726, "wall_ms": 1013893} {"step": 22300, "loss": 0.771693766117096, "lr": 1.7245417212659503e-05, "grad_norm": 0.8292216062545776, "wall_ms": 1015609} {"step": 22350, "loss": 0.853343665599823, "lr": 1.70473041356989e-05, "grad_norm": 0.8760968446731567, "wall_ms": 1017335} {"step": 22400, "loss": 0.8384867906570435, "lr": 1.685017252148236e-05, "grad_norm": 0.8473435044288635, "wall_ms": 1019020} {"step": 22450, "loss": 0.8621209263801575, "lr": 1.6654028153619576e-05, "grad_norm": 0.9388167858123779, "wall_ms": 1020732} {"step": 22500, "loss": 0.9024003744125366, "lr": 1.6458876786755542e-05, "grad_norm": 0.8314564228057861, "wall_ms": 1022423} {"step": 22550, "loss": 0.9346795678138733, "lr": 1.626472414640181e-05, "grad_norm": 0.9147424697875977, "wall_ms": 1024134} {"step": 22600, "loss": 0.9646110534667969, "lr": 1.6071575928768447e-05, "grad_norm": 0.8899040818214417, "wall_ms": 1025879} {"step": 22650, "loss": 0.8312832117080688, "lr": 1.5879437800596945e-05, "grad_norm": 0.9000478386878967, "wall_ms": 1027578} {"step": 22700, "loss": 0.8321069478988647, "lr": 1.5688315398993943e-05, "grad_norm": 0.869304358959198, "wall_ms": 1029330} {"step": 22750, "loss": 0.7931217551231384, "lr": 1.5498214331265904e-05, "grad_norm": 0.8160282373428345, "wall_ms": 1031044} {"step": 22800, "loss": 0.8300249576568604, "lr": 1.5309140174754513e-05, "grad_norm": 0.8487364649772644, "wall_ms": 1032772} {"step": 22850, "loss": 0.9364687204360962, "lr": 1.5121098476673084e-05, "grad_norm": 0.9242866039276123, "wall_ms": 1034535} {"step": 22900, "loss": 0.832299530506134, "lr": 1.4934094753943867e-05, "grad_norm": 0.8984871506690979, "wall_ms": 1036256} {"step": 22950, "loss": 0.8357889652252197, "lr": 1.4748134493036086e-05, "grad_norm": 0.8381741046905518, "wall_ms": 1037999} {"step": 23000, "loss": 0.7973881363868713, "lr": 1.4563223149805035e-05, "grad_norm": 0.8152109384536743, "wall_ms": 1039679} {"step": 23050, "loss": 0.84178626537323, "lr": 1.4379366149331999e-05, "grad_norm": 0.803633451461792, "wall_ms": 1051698} {"step": 23100, "loss": 0.9121197462081909, "lr": 1.4196568885765102e-05, "grad_norm": 0.8725898861885071, "wall_ms": 1053393} {"step": 23150, "loss": 0.8992476463317871, "lr": 1.4014836722160999e-05, "grad_norm": 0.8826303482055664, "wall_ms": 1055111} {"step": 23200, "loss": 0.876204788684845, "lr": 1.383417499032762e-05, "grad_norm": 0.8668400645256042, "wall_ms": 1056839} {"step": 23250, "loss": 0.9761193990707397, "lr": 1.365458899066762e-05, "grad_norm": 0.912093460559845, "wall_ms": 1058548} {"step": 23300, "loss": 0.920052170753479, "lr": 1.3476083992022995e-05, "grad_norm": 0.8988898992538452, "wall_ms": 1060265} {"step": 23350, "loss": 0.8359586000442505, "lr": 1.3298665231520408e-05, "grad_norm": 0.8546157479286194, "wall_ms": 1061955} {"step": 23400, "loss": 0.8620153665542603, "lr": 1.3122337914417582e-05, "grad_norm": 0.8116017580032349, "wall_ms": 1063675} {"step": 23450, "loss": 1.032891035079956, "lr": 1.2947107213950566e-05, "grad_norm": 0.9410510063171387, "wall_ms": 1065395} {"step": 23500, "loss": 0.8506320714950562, "lr": 1.2772978271182002e-05, "grad_norm": 0.8200762271881104, "wall_ms": 1067109} {"step": 23550, "loss": 0.9837802648544312, "lr": 1.2599956194850201e-05, "grad_norm": 0.9866050481796265, "wall_ms": 1068828} {"step": 23600, "loss": 0.9347388744354248, "lr": 1.2428046061219368e-05, "grad_norm": 0.8758438229560852, "wall_ms": 1070551} {"step": 23650, "loss": 0.8469772338867188, "lr": 1.2257252913930599e-05, "grad_norm": 0.8676921725273132, "wall_ms": 1072287} {"step": 23700, "loss": 0.8344777822494507, "lr": 1.2087581763853884e-05, "grad_norm": 0.8591567277908325, "wall_ms": 1074015} {"step": 23750, "loss": 0.8341230750083923, "lr": 1.1919037588941216e-05, "grad_norm": 0.8635292649269104, "wall_ms": 1075763} {"step": 23800, "loss": 0.9302524924278259, "lr": 1.1751625334080399e-05, "grad_norm": 0.8690425157546997, "wall_ms": 1077482} {"step": 23850, "loss": 0.8998254537582397, "lr": 1.1585349910950046e-05, "grad_norm": 0.8245886564254761, "wall_ms": 1079218} {"step": 23900, "loss": 0.8796104192733765, "lr": 1.1420216197875468e-05, "grad_norm": 0.8935802578926086, "wall_ms": 1080923} {"step": 23950, "loss": 0.8331429958343506, "lr": 1.1256229039685564e-05, "grad_norm": 0.8287945985794067, "wall_ms": 1082653} {"step": 24000, "loss": 1.0277016162872314, "lr": 1.1093393247570634e-05, "grad_norm": 0.9794235825538635, "wall_ms": 1084377} {"step": 24050, "loss": 0.891592800617218, "lr": 1.0931713598941281e-05, "grad_norm": 0.8880817294120789, "wall_ms": 1101270} {"step": 24100, "loss": 0.9156147241592407, "lr": 1.0771194837288172e-05, "grad_norm": 0.9501076936721802, "wall_ms": 1103017} {"step": 24150, "loss": 0.8949502110481262, "lr": 1.0611841672042985e-05, "grad_norm": 0.8870260715484619, "wall_ms": 1104729} {"step": 24200, "loss": 0.9432644248008728, "lr": 1.0453658778440107e-05, "grad_norm": 0.9648500680923462, "wall_ms": 1106423} {"step": 24250, "loss": 0.8379695415496826, "lr": 1.0296650797379555e-05, "grad_norm": 0.8338484764099121, "wall_ms": 1108128} {"step": 24300, "loss": 0.8662632703781128, "lr": 1.014082233529079e-05, "grad_norm": 0.8390796184539795, "wall_ms": 1109826} {"step": 24350, "loss": 0.8456107974052429, "lr": 9.986177963997596e-06, "grad_norm": 0.8867987394332886, "wall_ms": 1111541} {"step": 24400, "loss": 0.9766210317611694, "lr": 9.8327222205839e-06, "grad_norm": 0.9346963167190552, "wall_ms": 1113295} {"step": 24450, "loss": 0.7894440293312073, "lr": 9.680459607260682e-06, "grad_norm": 0.858227550983429, "wall_ms": 1115020} {"step": 24500, "loss": 0.9510351419448853, "lr": 9.529394591233923e-06, "grad_norm": 0.9256224632263184, "wall_ms": 1116731} {"step": 24550, "loss": 0.8613480925559998, "lr": 9.379531604573461e-06, "grad_norm": 0.9122938513755798, "wall_ms": 1118486} {"step": 24600, "loss": 0.8514374494552612, "lr": 9.230875044083066e-06, "grad_norm": 0.8688961863517761, "wall_ms": 1120192} {"step": 24650, "loss": 0.8874189853668213, "lr": 9.083429271171306e-06, "grad_norm": 0.907918393611908, "wall_ms": 1121925} {"step": 24700, "loss": 1.0067864656448364, "lr": 8.937198611723741e-06, "grad_norm": 0.906015932559967, "wall_ms": 1123666} {"step": 24750, "loss": 0.8770780563354492, "lr": 8.792187355975872e-06, "grad_norm": 0.9123240113258362, "wall_ms": 1125398} {"step": 24800, "loss": 0.9121580719947815, "lr": 8.648399758387379e-06, "grad_norm": 0.9098241329193115, "wall_ms": 1127135} {"step": 24850, "loss": 1.0781652927398682, "lr": 8.505840037517217e-06, "grad_norm": 0.9890038967132568, "wall_ms": 1128862} {"step": 24900, "loss": 0.8184528350830078, "lr": 8.364512375899902e-06, "grad_norm": 0.8455098271369934, "wall_ms": 1130595} {"step": 24950, "loss": 0.769512414932251, "lr": 8.224420919922768e-06, "grad_norm": 0.7987461686134338, "wall_ms": 1132307} {"step": 25000, "loss": 0.9131845235824585, "lr": 8.08556977970434e-06, "grad_norm": 0.9336896538734436, "wall_ms": 1134047} {"step": 25050, "loss": 0.9086676239967346, "lr": 7.947963028973717e-06, "grad_norm": 0.9177435040473938, "wall_ms": 1146007} {"step": 25100, "loss": 0.820991039276123, "lr": 7.811604704951106e-06, "grad_norm": 0.8283772468566895, "wall_ms": 1147744} {"step": 25150, "loss": 0.9893881678581238, "lr": 7.67649880822931e-06, "grad_norm": 0.9824809432029724, "wall_ms": 1149464} {"step": 25200, "loss": 0.8847253918647766, "lr": 7.542649302656429e-06, "grad_norm": 0.8823778033256531, "wall_ms": 1151170} {"step": 25250, "loss": 0.7431401610374451, "lr": 7.4100601152194856e-06, "grad_norm": 0.8639444708824158, "wall_ms": 1152916} {"step": 25300, "loss": 0.8755166530609131, "lr": 7.2787351359292624e-06, "grad_norm": 0.8950601816177368, "wall_ms": 1154625} {"step": 25350, "loss": 0.7971059679985046, "lr": 7.148678217706177e-06, "grad_norm": 0.8030542731285095, "wall_ms": 1156368} {"step": 25400, "loss": 0.8856620788574219, "lr": 7.019893176267212e-06, "grad_norm": 0.8934270739555359, "wall_ms": 1158081} {"step": 25450, "loss": 0.8757296204566956, "lr": 6.892383790013977e-06, "grad_norm": 0.8870618939399719, "wall_ms": 1159795} {"step": 25500, "loss": 0.8609622120857239, "lr": 6.76615379992187e-06, "grad_norm": 0.8816867470741272, "wall_ms": 1161497} {"step": 25550, "loss": 0.6766996383666992, "lr": 6.641206909430314e-06, "grad_norm": 0.7736501097679138, "wall_ms": 1163238} {"step": 25600, "loss": 1.0432275533676147, "lr": 6.5175467843340835e-06, "grad_norm": 0.9793668985366821, "wall_ms": 1164919} {"step": 25650, "loss": 0.9869239330291748, "lr": 6.3951770526757955e-06, "grad_norm": 0.9895700812339783, "wall_ms": 1166611} {"step": 25700, "loss": 0.863646388053894, "lr": 6.2741013046394e-06, "grad_norm": 0.8660830855369568, "wall_ms": 1168319} {"step": 25750, "loss": 0.7747340202331543, "lr": 6.1543230924449456e-06, "grad_norm": 0.7953796982765198, "wall_ms": 1170021} {"step": 25800, "loss": 0.8740692138671875, "lr": 6.035845930244261e-06, "grad_norm": 0.872544527053833, "wall_ms": 1171733} {"step": 25850, "loss": 0.9061554074287415, "lr": 5.9186732940179085e-06, "grad_norm": 0.9027559161186218, "wall_ms": 1173461} {"step": 25900, "loss": 0.9005404114723206, "lr": 5.8028086214731894e-06, "grad_norm": 0.9121727347373962, "wall_ms": 1175180} {"step": 25950, "loss": 0.8221328258514404, "lr": 5.688255311943317e-06, "grad_norm": 0.8713704347610474, "wall_ms": 1176906} {"step": 26000, "loss": 0.8818365335464478, "lr": 5.575016726287634e-06, "grad_norm": 0.9536114931106567, "wall_ms": 1178591} {"step": 26050, "loss": 0.8983368873596191, "lr": 5.463096186793012e-06, "grad_norm": 0.8690343499183655, "wall_ms": 1190428} {"step": 26100, "loss": 0.8721312284469604, "lr": 5.352496977076448e-06, "grad_norm": 0.8683496713638306, "wall_ms": 1192150} {"step": 26150, "loss": 0.8682249784469604, "lr": 5.243222341988627e-06, "grad_norm": 0.8597843050956726, "wall_ms": 1193908} {"step": 26200, "loss": 0.8631032705307007, "lr": 5.135275487518813e-06, "grad_norm": 0.8572007417678833, "wall_ms": 1195613} {"step": 26250, "loss": 0.723900318145752, "lr": 5.028659580700724e-06, "grad_norm": 0.8615127205848694, "wall_ms": 1197334} {"step": 26300, "loss": 0.7976235151290894, "lr": 4.92337774951965e-06, "grad_norm": 0.8316731452941895, "wall_ms": 1199015} {"step": 26350, "loss": 0.8843433856964111, "lr": 4.81943308282066e-06, "grad_norm": 0.8611040115356445, "wall_ms": 1200729} {"step": 26400, "loss": 0.930512011051178, "lr": 4.716828630218002e-06, "grad_norm": 0.9153528213500977, "wall_ms": 1202455} {"step": 26450, "loss": 0.9234251976013184, "lr": 4.615567402005619e-06, "grad_norm": 0.9505035877227783, "wall_ms": 1204192} {"step": 26500, "loss": 0.9038941860198975, "lr": 4.515652369068825e-06, "grad_norm": 0.9150354266166687, "wall_ms": 1205920} {"step": 26550, "loss": 0.9678453207015991, "lr": 4.41708646279715e-06, "grad_norm": 0.8609015345573425, "wall_ms": 1207608} {"step": 26600, "loss": 0.8607543706893921, "lr": 4.319872574998335e-06, "grad_norm": 0.8383751511573792, "wall_ms": 1209353} {"step": 26650, "loss": 0.7097011804580688, "lr": 4.2240135578134765e-06, "grad_norm": 0.763478696346283, "wall_ms": 1211062} {"step": 26700, "loss": 0.9740942716598511, "lr": 4.1295122236333985e-06, "grad_norm": 0.9253493547439575, "wall_ms": 1212782} {"step": 26750, "loss": 0.7666107416152954, "lr": 4.03637134501605e-06, "grad_norm": 0.8267912268638611, "wall_ms": 1214524} {"step": 26800, "loss": 0.7878971099853516, "lr": 3.944593654605264e-06, "grad_norm": 0.8662969470024109, "wall_ms": 1216272} {"step": 26850, "loss": 0.8265634179115295, "lr": 3.854181845050505e-06, "grad_norm": 0.8844826817512512, "wall_ms": 1218006} {"step": 26900, "loss": 0.872107982635498, "lr": 3.765138568927902e-06, "grad_norm": 0.8956784009933472, "wall_ms": 1219748} {"step": 26950, "loss": 0.8158397674560547, "lr": 3.677466438662441e-06, "grad_norm": 0.9160043597221375, "wall_ms": 1221448} {"step": 27000, "loss": 0.7509598731994629, "lr": 3.5911680264512838e-06, "grad_norm": 0.8434655666351318, "wall_ms": 1223192} {"step": 27050, "loss": 0.9980931878089905, "lr": 3.5062458641883246e-06, "grad_norm": 0.9410495758056641, "wall_ms": 1235157} {"step": 27100, "loss": 0.8116971254348755, "lr": 3.4227024433899e-06, "grad_norm": 0.8376370668411255, "wall_ms": 1236865} {"step": 27150, "loss": 0.7444368600845337, "lr": 3.340540215121711e-06, "grad_norm": 0.7965226769447327, "wall_ms": 1238592} {"step": 27200, "loss": 0.834304928779602, "lr": 3.2597615899268684e-06, "grad_norm": 0.8402142524719238, "wall_ms": 1240338} {"step": 27250, "loss": 0.860331118106842, "lr": 3.180368937755228e-06, "grad_norm": 0.8783428072929382, "wall_ms": 1242100} {"step": 27300, "loss": 0.8633811473846436, "lr": 3.1023645878937876e-06, "grad_norm": 0.8544955849647522, "wall_ms": 1243833} {"step": 27350, "loss": 0.9163490533828735, "lr": 3.025750828898435e-06, "grad_norm": 0.8683464527130127, "wall_ms": 1245583} {"step": 27400, "loss": 0.8604222536087036, "lr": 2.9505299085267165e-06, "grad_norm": 0.8676515817642212, "wall_ms": 1247302} {"step": 27450, "loss": 0.8994333148002625, "lr": 2.876704033671956e-06, "grad_norm": 0.9336061477661133, "wall_ms": 1249033} {"step": 27500, "loss": 0.8964388966560364, "lr": 2.8042753702984662e-06, "grad_norm": 0.9106435179710388, "wall_ms": 1250722} {"step": 27550, "loss": 0.7797161340713501, "lr": 2.7332460433780365e-06, "grad_norm": 0.8359201550483704, "wall_ms": 1252434} {"step": 27600, "loss": 0.9298245906829834, "lr": 2.6636181368275634e-06, "grad_norm": 0.8676599860191345, "wall_ms": 1254166} {"step": 27650, "loss": 0.8745954036712646, "lr": 2.5953936934479073e-06, "grad_norm": 0.8803846836090088, "wall_ms": 1255835} {"step": 27700, "loss": 0.9495563507080078, "lr": 2.5285747148639924e-06, "grad_norm": 0.9138960242271423, "wall_ms": 1257530} {"step": 27750, "loss": 0.7507927417755127, "lr": 2.4631631614660317e-06, "grad_norm": 0.8110131621360779, "wall_ms": 1259239} {"step": 27800, "loss": 0.8202790021896362, "lr": 2.3991609523520744e-06, "grad_norm": 0.8325169086456299, "wall_ms": 1260981} {"step": 27850, "loss": 0.8691180944442749, "lr": 2.3365699652716333e-06, "grad_norm": 0.8450722694396973, "wall_ms": 1262683} {"step": 27900, "loss": 0.8306569457054138, "lr": 2.275392036570641e-06, "grad_norm": 0.8902499675750732, "wall_ms": 1264395} {"step": 27950, "loss": 0.8598312735557556, "lr": 2.215628961137552e-06, "grad_norm": 0.8673657178878784, "wall_ms": 1266133} {"step": 28000, "loss": 0.9279443621635437, "lr": 2.1572824923507103e-06, "grad_norm": 0.9012027978897095, "wall_ms": 1267865} {"step": 28050, "loss": 1.0180460214614868, "lr": 2.100354342026861e-06, "grad_norm": 0.8994989395141602, "wall_ms": 1280297} {"step": 28100, "loss": 0.9184448719024658, "lr": 2.044846180370977e-06, "grad_norm": 0.9125210046768188, "wall_ms": 1282011} {"step": 28150, "loss": 0.7677313685417175, "lr": 1.990759635927207e-06, "grad_norm": 0.866466224193573, "wall_ms": 1283741} {"step": 28200, "loss": 0.9967849254608154, "lr": 1.938096295531161e-06, "grad_norm": 0.9125879406929016, "wall_ms": 1285490} {"step": 28250, "loss": 0.9128661155700684, "lr": 1.8868577042632798e-06, "grad_norm": 0.9288386702537537, "wall_ms": 1287191} {"step": 28300, "loss": 0.7252013683319092, "lr": 1.837045365403559e-06, "grad_norm": 0.766315758228302, "wall_ms": 1288928} {"step": 28350, "loss": 0.9164884686470032, "lr": 1.7886607403874219e-06, "grad_norm": 0.9724982380867004, "wall_ms": 1290653} {"step": 28400, "loss": 1.0190832614898682, "lr": 1.7417052487628546e-06, "grad_norm": 0.9689489603042603, "wall_ms": 1292374} {"step": 28450, "loss": 0.861492395401001, "lr": 1.696180268148741e-06, "grad_norm": 0.8416327834129333, "wall_ms": 1294094} {"step": 28500, "loss": 0.887925386428833, "lr": 1.652087134194456e-06, "grad_norm": 0.8842689394950867, "wall_ms": 1295819} {"step": 28550, "loss": 0.8391668796539307, "lr": 1.6094271405406859e-06, "grad_norm": 0.9109037518501282, "wall_ms": 1297522} {"step": 28600, "loss": 0.8185270428657532, "lr": 1.5682015387814545e-06, "grad_norm": 0.8562387228012085, "wall_ms": 1299256} {"step": 28650, "loss": 0.868694543838501, "lr": 1.5284115384274238e-06, "grad_norm": 0.8481931090354919, "wall_ms": 1300993} {"step": 28700, "loss": 0.8323656320571899, "lr": 1.4900583068703912e-06, "grad_norm": 0.9382968544960022, "wall_ms": 1302726} {"step": 28750, "loss": 0.7769579291343689, "lr": 1.4531429693490646e-06, "grad_norm": 0.8474683165550232, "wall_ms": 1304450} {"step": 28800, "loss": 0.9949464201927185, "lr": 1.4176666089160147e-06, "grad_norm": 0.9538387060165405, "wall_ms": 1306189} {"step": 28850, "loss": 0.9064397215843201, "lr": 1.3836302664059243e-06, "grad_norm": 0.8856461048126221, "wall_ms": 1307892} {"step": 28900, "loss": 1.0824071168899536, "lr": 1.351034940405037e-06, "grad_norm": 0.936811625957489, "wall_ms": 1309612} {"step": 28950, "loss": 0.8862952589988708, "lr": 1.3198815872218867e-06, "grad_norm": 0.8658307194709778, "wall_ms": 1311356} {"step": 29000, "loss": 0.8625856637954712, "lr": 1.2901711208591997e-06, "grad_norm": 0.8604753017425537, "wall_ms": 1313082} {"step": 29050, "loss": 0.8675184845924377, "lr": 1.2619044129871098e-06, "grad_norm": 0.8789829015731812, "wall_ms": 1325297} {"step": 29100, "loss": 0.728142261505127, "lr": 1.235082292917574e-06, "grad_norm": 0.8269463777542114, "wall_ms": 1327022} {"step": 29150, "loss": 0.8348161578178406, "lr": 1.2097055475800403e-06, "grad_norm": 0.8870102763175964, "wall_ms": 1328752} {"step": 29200, "loss": 0.8564329147338867, "lr": 1.1857749214983697e-06, "grad_norm": 0.8767995238304138, "wall_ms": 1330461} {"step": 29250, "loss": 1.0927298069000244, "lr": 1.1632911167689769e-06, "grad_norm": 0.9742985367774963, "wall_ms": 1332195} {"step": 29300, "loss": 0.928369402885437, "lr": 1.1422547930402411e-06, "grad_norm": 0.8560556173324585, "wall_ms": 1333891} {"step": 29350, "loss": 0.8408288359642029, "lr": 1.122666567493159e-06, "grad_norm": 0.8529053330421448, "wall_ms": 1335611} {"step": 29400, "loss": 0.7911362648010254, "lr": 1.1045270148232226e-06, "grad_norm": 0.7952395677566528, "wall_ms": 1337360} {"step": 29450, "loss": 0.8065153360366821, "lr": 1.087836667223572e-06, "grad_norm": 0.8458303213119507, "wall_ms": 1339062} {"step": 29500, "loss": 0.9675232768058777, "lr": 1.0725960143693708e-06, "grad_norm": 0.9907279014587402, "wall_ms": 1340780} {"step": 29550, "loss": 0.9719711542129517, "lr": 1.0588055034034404e-06, "grad_norm": 0.959941565990448, "wall_ms": 1342516} {"step": 29600, "loss": 1.0176799297332764, "lr": 1.0464655389231526e-06, "grad_norm": 0.9226427674293518, "wall_ms": 1344246} {"step": 29650, "loss": 0.8476913571357727, "lr": 1.035576482968549e-06, "grad_norm": 0.8446980714797974, "wall_ms": 1345978} {"step": 29700, "loss": 0.8505201935768127, "lr": 1.026138655011723e-06, "grad_norm": 0.8545879125595093, "wall_ms": 1347715} {"step": 29750, "loss": 0.9899210333824158, "lr": 1.018152331947444e-06, "grad_norm": 0.9345703721046448, "wall_ms": 1349470} {"step": 29800, "loss": 0.726345419883728, "lr": 1.0116177480850355e-06, "grad_norm": 0.8169661164283752, "wall_ms": 1351179} {"step": 29850, "loss": 0.9296042919158936, "lr": 1.0065350951415e-06, "grad_norm": 0.9300240278244019, "wall_ms": 1352915} {"step": 29900, "loss": 0.8035856485366821, "lr": 1.0029045222359017e-06, "grad_norm": 0.8468882441520691, "wall_ms": 1354638} {"step": 29950, "loss": 0.8234740495681763, "lr": 1.0007261358849657e-06, "grad_norm": 0.8150079846382141, "wall_ms": 1356331} {"step": 30000, "loss": 0.8510042428970337, "lr": 1e-06, "grad_norm": 0.8761374950408936, "wall_ms": 1358059}