jsanzolac's picture
Upload rank_512/train_log.jsonl with huggingface_hub
d5f9fc0 verified
{"step": 50, "loss": 0.045709993690252304, "loss_nce": 0.0011574977543205023, "loss_density": 0.4455249607563019, "lr": 5e-05, "grad_norm": 0.010513417422771454, "wall_ms": 48534}
{"step": 100, "loss": 0.03634428232908249, "loss_nce": 0.0022035781294107437, "loss_density": 0.34140706062316895, "lr": 0.0001, "grad_norm": 0.011891142465174198, "wall_ms": 95233}
{"step": 150, "loss": 0.03087129257619381, "loss_nce": 0.0016556349582970142, "loss_density": 0.2921565771102905, "lr": 0.00015, "grad_norm": 0.007981224916875362, "wall_ms": 141980}
{"step": 200, "loss": 0.027698222547769547, "loss_nce": 0.003042680211365223, "loss_density": 0.24655543267726898, "lr": 0.0002, "grad_norm": 0.02314840257167816, "wall_ms": 188839}
{"step": 250, "loss": 0.025634344667196274, "loss_nce": 0.001862902077846229, "loss_density": 0.23771442472934723, "lr": 0.00025, "grad_norm": 0.010692260228097439, "wall_ms": 235725}
{"step": 300, "loss": 0.023244261741638184, "loss_nce": 0.0028993948362767696, "loss_density": 0.20344865322113037, "lr": 0.0003, "grad_norm": 0.04207959771156311, "wall_ms": 282512}
{"step": 350, "loss": 0.02541101910173893, "loss_nce": 0.004856443498283625, "loss_density": 0.20554575324058533, "lr": 0.00035, "grad_norm": 0.13197611272335052, "wall_ms": 329346}
{"step": 400, "loss": 0.03890610486268997, "loss_nce": 0.021125324070453644, "loss_density": 0.1778077930212021, "lr": 0.0004, "grad_norm": 0.8804298043251038, "wall_ms": 376240}
{"step": 450, "loss": 0.01441185362637043, "loss_nce": 0.0032177320681512356, "loss_density": 0.11194121092557907, "lr": 0.00045000000000000004, "grad_norm": 0.059425245970487595, "wall_ms": 423065}
{"step": 500, "loss": 0.018649857491254807, "loss_nce": 0.0055419160053133965, "loss_density": 0.13107940554618835, "lr": 0.0005, "grad_norm": 0.07450757920742035, "wall_ms": 469858}
{"step": 550, "loss": 0.037244804203510284, "loss_nce": 0.019524525851011276, "loss_density": 0.17720277607440948, "lr": 0.000499996526792064, "grad_norm": 0.7320436239242554, "wall_ms": 516706}
{"step": 600, "loss": 0.011968838050961494, "loss_nce": 0.0017626157496124506, "loss_density": 0.10206222534179688, "lr": 0.0004999861072667309, "grad_norm": 0.04926066845655441, "wall_ms": 563651}
{"step": 650, "loss": 0.012583630159497261, "loss_nce": 0.0026841717772185802, "loss_density": 0.09899459034204483, "lr": 0.0004999687417194223, "grad_norm": 0.0781131312251091, "wall_ms": 610446}
{"step": 700, "loss": 0.010605952702462673, "loss_nce": 0.0019459591712802649, "loss_density": 0.0865999311208725, "lr": 0.000499944430642499, "grad_norm": 0.04889582842588425, "wall_ms": 657266}
{"step": 750, "loss": 0.016011711210012436, "loss_nce": 0.005723984912037849, "loss_density": 0.10287725180387497, "lr": 0.0004999131747252459, "grad_norm": 0.18515890836715698, "wall_ms": 704120}
{"step": 800, "loss": 0.011195853352546692, "loss_nce": 0.002506819786503911, "loss_density": 0.0868903398513794, "lr": 0.0004998749748538533, "grad_norm": 0.1662425547838211, "wall_ms": 750864}
{"step": 850, "loss": 0.01645912602543831, "loss_nce": 0.005164763890206814, "loss_density": 0.1129436269402504, "lr": 0.0004998298321113913, "grad_norm": 0.07034236937761307, "wall_ms": 797804}
{"step": 900, "loss": 0.01406706590205431, "loss_nce": 0.0013995320769026875, "loss_density": 0.1266753375530243, "lr": 0.0004997777477777794, "grad_norm": 0.05042058601975441, "wall_ms": 844749}
{"step": 950, "loss": 0.013195419684052467, "loss_nce": 0.0024839069228619337, "loss_density": 0.10711511969566345, "lr": 0.0004997187233297501, "grad_norm": 0.05295176059007645, "wall_ms": 891596}
{"step": 1000, "loss": 0.013317004777491093, "loss_nce": 0.004267278127372265, "loss_density": 0.09049726277589798, "lr": 0.0004996527604408066, "grad_norm": 0.19750279188156128, "wall_ms": 938444}
{"step": 1050, "loss": 0.017766516655683517, "loss_nce": 0.005013466812670231, "loss_density": 0.12753050029277802, "lr": 0.0004995798609811765, "grad_norm": 0.16494250297546387, "wall_ms": 985277}
{"step": 1100, "loss": 0.010176170617341995, "loss_nce": 0.0011015650816261768, "loss_density": 0.09074606001377106, "lr": 0.0004995000270177573, "grad_norm": 0.025249801576137543, "wall_ms": 1032108}
{"step": 1150, "loss": 0.011654548346996307, "loss_nce": 0.0017783124931156635, "loss_density": 0.09876234829425812, "lr": 0.000499413260814059, "grad_norm": 0.04064905270934105, "wall_ms": 1078943}
{"step": 1200, "loss": 0.00923355482518673, "loss_nce": 0.0015523864421993494, "loss_density": 0.07681168615818024, "lr": 0.0004993195648301393, "grad_norm": 0.04071284085512161, "wall_ms": 1125733}
{"step": 1250, "loss": 0.010505842044949532, "loss_nce": 0.0010845959186553955, "loss_density": 0.09421245753765106, "lr": 0.0004992189417225338, "grad_norm": 0.024433016777038574, "wall_ms": 1172547}
{"step": 1300, "loss": 0.010483683086931705, "loss_nce": 0.002656347816810012, "loss_density": 0.07827334851026535, "lr": 0.0004991113943441812, "grad_norm": 0.11166027188301086, "wall_ms": 1219337}
{"step": 1350, "loss": 0.009758377447724342, "loss_nce": 0.001052729319781065, "loss_density": 0.0870564877986908, "lr": 0.0004989969257443421, "grad_norm": 0.023822084069252014, "wall_ms": 1266133}
{"step": 1400, "loss": 0.011436698958277702, "loss_nce": 0.0018755816854536533, "loss_density": 0.09561116248369217, "lr": 0.0004988755391685123, "grad_norm": 0.047456905245780945, "wall_ms": 1313015}
{"step": 1450, "loss": 0.011115237139165401, "loss_nce": 0.0024340013042092323, "loss_density": 0.08681235462427139, "lr": 0.0004987472380583311, "grad_norm": 0.053387828171253204, "wall_ms": 1359787}
{"step": 1500, "loss": 0.011826476082205772, "loss_nce": 0.003626574296504259, "loss_density": 0.08199901133775711, "lr": 0.0004986120260514836, "grad_norm": 0.053968947380781174, "wall_ms": 1406530}
{"step": 1550, "loss": 0.009899723343551159, "loss_nce": 0.000844616093672812, "loss_density": 0.09055106341838837, "lr": 0.0004984699069815979, "grad_norm": 0.021279770880937576, "wall_ms": 1453362}
{"step": 1600, "loss": 0.01455264538526535, "loss_nce": 0.0037353024818003178, "loss_density": 0.1081734299659729, "lr": 0.0004983208848781357, "grad_norm": 0.13442547619342804, "wall_ms": 1500151}
{"step": 1650, "loss": 0.01030087098479271, "loss_nce": 0.001533812959678471, "loss_density": 0.08767057955265045, "lr": 0.0004981649639662787, "grad_norm": 0.02268199250102043, "wall_ms": 1547046}
{"step": 1700, "loss": 0.03326389938592911, "loss_nce": 0.021633723750710487, "loss_density": 0.11630174517631531, "lr": 0.0004980021486668087, "grad_norm": 0.31646186113357544, "wall_ms": 1593834}
{"step": 1750, "loss": 0.011438930407166481, "loss_nce": 0.002737933536991477, "loss_density": 0.0870099663734436, "lr": 0.0004978324435959818, "grad_norm": 0.043739743530750275, "wall_ms": 1640778}
{"step": 1800, "loss": 0.009904059581458569, "loss_nce": 0.0015941576566547155, "loss_density": 0.08309901505708694, "lr": 0.0004976558535653979, "grad_norm": 0.02996126189827919, "wall_ms": 1687599}
{"step": 1850, "loss": 0.0101852398365736, "loss_nce": 0.002122095786035061, "loss_density": 0.08063143491744995, "lr": 0.0004974723835818644, "grad_norm": 0.06489592790603638, "wall_ms": 1734451}
{"step": 1900, "loss": 0.010395411401987076, "loss_nce": 0.0014597894623875618, "loss_density": 0.08935621380805969, "lr": 0.000497282038847254, "grad_norm": 0.0393998846411705, "wall_ms": 1781252}
{"step": 1950, "loss": 0.010844022035598755, "loss_nce": 0.0022232572082430124, "loss_density": 0.08620765060186386, "lr": 0.0004970848247583572, "grad_norm": 0.044061776250600815, "wall_ms": 1827993}
{"step": 2000, "loss": 0.013384843245148659, "loss_nce": 0.0035993792116642, "loss_density": 0.09785463660955429, "lr": 0.0004968807469067294, "grad_norm": 0.07348860800266266, "wall_ms": 1874940}
{"step": 2050, "loss": 0.011723527684807777, "loss_nce": 0.003479731036350131, "loss_density": 0.08243796229362488, "lr": 0.0004966698110785325, "grad_norm": 0.06697510927915573, "wall_ms": 1921729}
{"step": 2100, "loss": 0.008690652437508106, "loss_nce": 0.001022546784952283, "loss_density": 0.07668105512857437, "lr": 0.0004964520232543703, "grad_norm": 0.026709286496043205, "wall_ms": 1968503}
{"step": 2150, "loss": 0.011144440621137619, "loss_nce": 0.002855521161109209, "loss_density": 0.08288918435573578, "lr": 0.0004962273896091197, "grad_norm": 0.0659460723400116, "wall_ms": 2015285}
{"step": 2200, "loss": 0.009447486139833927, "loss_nce": 0.0018852531211450696, "loss_density": 0.0756223276257515, "lr": 0.0004959959165117551, "grad_norm": 0.032548557966947556, "wall_ms": 2062037}
{"step": 2250, "loss": 0.013695849105715752, "loss_nce": 0.00326494500041008, "loss_density": 0.10430903732776642, "lr": 0.0004957576105251676, "grad_norm": 0.19121763110160828, "wall_ms": 2108867}
{"step": 2300, "loss": 0.009866869077086449, "loss_nce": 0.0016741259023547173, "loss_density": 0.08192743360996246, "lr": 0.00049551247840598, "grad_norm": 0.037151578813791275, "wall_ms": 2155608}
{"step": 2350, "loss": 0.009749576449394226, "loss_nce": 0.0016359197907149792, "loss_density": 0.08113656938076019, "lr": 0.0004952605271043538, "grad_norm": 0.041566286236047745, "wall_ms": 2202406}
{"step": 2400, "loss": 0.013274440541863441, "loss_nce": 0.004653426352888346, "loss_density": 0.08621013164520264, "lr": 0.0004950017637637934, "grad_norm": 0.06826890259981155, "wall_ms": 2249185}
{"step": 2450, "loss": 0.013336420059204102, "loss_nce": 0.005121695343405008, "loss_density": 0.08214724063873291, "lr": 0.0004947361957209426, "grad_norm": 0.09166550636291504, "wall_ms": 2295955}
{"step": 2500, "loss": 0.010005718097090721, "loss_nce": 0.0010521174408495426, "loss_density": 0.08953601121902466, "lr": 0.0004944638305053774, "grad_norm": 0.025566304102540016, "wall_ms": 2342750}
{"step": 2550, "loss": 0.010551566258072853, "loss_nce": 0.002019538776949048, "loss_density": 0.08532027900218964, "lr": 0.0004941846758393917, "grad_norm": 0.0640556737780571, "wall_ms": 2389527}
{"step": 2600, "loss": 0.011768292635679245, "loss_nce": 0.002485188189893961, "loss_density": 0.09283104538917542, "lr": 0.000493898739637779, "grad_norm": 0.04647933319211006, "wall_ms": 2436292}
{"step": 2650, "loss": 0.010419112630188465, "loss_nce": 0.0019825499039143324, "loss_density": 0.08436562120914459, "lr": 0.0004936060300076078, "grad_norm": 0.0332794226706028, "wall_ms": 2483123}
{"step": 2700, "loss": 0.010453636758029461, "loss_nce": 0.0018870586063712835, "loss_density": 0.08566577732563019, "lr": 0.0004933065552479917, "grad_norm": 0.03370196744799614, "wall_ms": 2530110}
{"step": 2750, "loss": 0.010116683319211006, "loss_nce": 0.001595525536686182, "loss_density": 0.08521157503128052, "lr": 0.0004930003238498539, "grad_norm": 0.017244206741452217, "wall_ms": 2576851}
{"step": 2800, "loss": 0.01083705760538578, "loss_nce": 0.002786492696031928, "loss_density": 0.08050564676523209, "lr": 0.0004926873444956872, "grad_norm": 0.06486423313617706, "wall_ms": 2623783}
{"step": 2850, "loss": 0.01060484442859888, "loss_nce": 0.0019222514238208532, "loss_density": 0.08682592213153839, "lr": 0.0004923676260593065, "grad_norm": 0.024950195103883743, "wall_ms": 2670662}
{"step": 2900, "loss": 0.009139180183410645, "loss_nce": 0.0011040017707273364, "loss_density": 0.08035178482532501, "lr": 0.0004920411776055984, "grad_norm": 0.025582056492567062, "wall_ms": 2717457}
{"step": 2950, "loss": 0.011383062228560448, "loss_nce": 0.0029263810720294714, "loss_density": 0.08456680923700333, "lr": 0.0004917080083902642, "grad_norm": 0.03876994177699089, "wall_ms": 2764278}
{"step": 3000, "loss": 0.010166572406888008, "loss_nce": 0.0015141658950597048, "loss_density": 0.08652406930923462, "lr": 0.0004913681278595564, "grad_norm": 0.02295353077352047, "wall_ms": 2811092}
{"step": 3050, "loss": 0.009988449513912201, "loss_nce": 0.0016585015691816807, "loss_density": 0.08329947292804718, "lr": 0.0004910215456500118, "grad_norm": 0.02380312606692314, "wall_ms": 2857905}
{"step": 3100, "loss": 0.010196727700531483, "loss_nce": 0.0021436584647744894, "loss_density": 0.08053068816661835, "lr": 0.000490668271588178, "grad_norm": 0.04794042557477951, "wall_ms": 2904723}
{"step": 3150, "loss": 0.009196978062391281, "loss_nce": 0.0013398616574704647, "loss_density": 0.07857117056846619, "lr": 0.0004903083156903349, "grad_norm": 0.018420470878481865, "wall_ms": 2951562}
{"step": 3200, "loss": 0.010183267295360565, "loss_nce": 0.0020160230342298746, "loss_density": 0.08167244493961334, "lr": 0.0004899416881622104, "grad_norm": 0.0343974307179451, "wall_ms": 2998319}
{"step": 3250, "loss": 0.00857553817331791, "loss_nce": 0.0009260033839382231, "loss_density": 0.07649534940719604, "lr": 0.0004895683993986913, "grad_norm": 0.015435702167451382, "wall_ms": 3045197}
{"step": 3300, "loss": 0.01720554754137993, "loss_nce": 0.009116473607718945, "loss_density": 0.0808907300233841, "lr": 0.0004891884599835288, "grad_norm": 0.05999911203980446, "wall_ms": 3092068}
{"step": 3350, "loss": 0.009800290688872337, "loss_nce": 0.001711646094918251, "loss_density": 0.08088644593954086, "lr": 0.0004888018806890377, "grad_norm": 0.021499978378415108, "wall_ms": 3139031}
{"step": 3400, "loss": 0.01047299150377512, "loss_nce": 0.002083966275677085, "loss_density": 0.08389025181531906, "lr": 0.0004884086724757918, "grad_norm": 0.028322771191596985, "wall_ms": 3185952}
{"step": 3450, "loss": 0.01015239767730236, "loss_nce": 0.0027045186143368483, "loss_density": 0.07447879016399384, "lr": 0.00048800884649231267, "grad_norm": 0.14232338964939117, "wall_ms": 3232747}
{"step": 3500, "loss": 0.013112563639879227, "loss_nce": 0.003651081118732691, "loss_density": 0.09461481869220734, "lr": 0.00048760241407475345, "grad_norm": 0.05103244632482529, "wall_ms": 3279532}
{"step": 3550, "loss": 0.010289198718965054, "loss_nce": 0.0021751902531832457, "loss_density": 0.08114007860422134, "lr": 0.00048718938674657786, "grad_norm": 0.021313901990652084, "wall_ms": 3326250}
{"step": 3600, "loss": 0.00942971557378769, "loss_nce": 0.0010858428431674838, "loss_density": 0.08343872427940369, "lr": 0.00048676977621823326, "grad_norm": 0.014173733070492744, "wall_ms": 3373142}
{"step": 3650, "loss": 0.009229038842022419, "loss_nce": 0.0013304577441886067, "loss_density": 0.07898581027984619, "lr": 0.00048634359438681855, "grad_norm": 0.019168466329574585, "wall_ms": 3420018}
{"step": 3700, "loss": 0.009719211608171463, "loss_nce": 0.0015164706856012344, "loss_density": 0.08202740550041199, "lr": 0.0004859108533357468, "grad_norm": 0.017530513927340508, "wall_ms": 3466935}
{"step": 3750, "loss": 0.008899322710931301, "loss_nce": 0.0012310795718804002, "loss_density": 0.0766824334859848, "lr": 0.00048547156533440317, "grad_norm": 0.01882850006222725, "wall_ms": 3514309}
{"step": 3800, "loss": 0.01128544844686985, "loss_nce": 0.0021864171139895916, "loss_density": 0.09099031984806061, "lr": 0.0004850257428377962, "grad_norm": 0.03126751631498337, "wall_ms": 3561219}
{"step": 3850, "loss": 0.010361790657043457, "loss_nce": 0.0010427838424220681, "loss_density": 0.09319005906581879, "lr": 0.00048457339848620554, "grad_norm": 0.026902908459305763, "wall_ms": 3608140}
{"step": 3900, "loss": 0.009052329696714878, "loss_nce": 0.0013414185959845781, "loss_density": 0.07710910588502884, "lr": 0.0004841145451048228, "grad_norm": 0.024360358715057373, "wall_ms": 3654907}
{"step": 3950, "loss": 0.008627058006823063, "loss_nce": 0.001242583035491407, "loss_density": 0.07384474575519562, "lr": 0.00048364919570338856, "grad_norm": 0.014755507931113243, "wall_ms": 3701687}
{"step": 4000, "loss": 0.013546546921133995, "loss_nce": 0.0057080332189798355, "loss_density": 0.0783851370215416, "lr": 0.00048317736347582295, "grad_norm": 0.12437047809362411, "wall_ms": 3748485}
{"step": 4050, "loss": 0.009595970623195171, "loss_nce": 0.0013134542386978865, "loss_density": 0.08282516151666641, "lr": 0.0004826990617998521, "grad_norm": 0.017995042726397514, "wall_ms": 3795266}
{"step": 4100, "loss": 0.00985666736960411, "loss_nce": 0.0018420041305944324, "loss_density": 0.08014662563800812, "lr": 0.0004822143042366284, "grad_norm": 0.03135840594768524, "wall_ms": 3842121}
{"step": 4150, "loss": 0.009008144028484821, "loss_nce": 0.0015922993188723922, "loss_density": 0.0741584450006485, "lr": 0.00048172310453034633, "grad_norm": 0.020212914794683456, "wall_ms": 3888923}
{"step": 4200, "loss": 0.012420380488038063, "loss_nce": 0.0032991901971399784, "loss_density": 0.09121190756559372, "lr": 0.0004812254766078526, "grad_norm": 0.06590721011161804, "wall_ms": 3935716}
{"step": 4250, "loss": 0.014452386647462845, "loss_nce": 0.006454587448388338, "loss_density": 0.07997799664735794, "lr": 0.0004807214345782512, "grad_norm": 0.17813840508460999, "wall_ms": 3982606}
{"step": 4300, "loss": 0.00925131794065237, "loss_nce": 0.0011183003662154078, "loss_density": 0.08133017271757126, "lr": 0.0004802109927325036, "grad_norm": 0.021792862564325333, "wall_ms": 4029376}
{"step": 4350, "loss": 0.008758749812841415, "loss_nce": 0.0013916220050305128, "loss_density": 0.07367127388715744, "lr": 0.0004796941655430234, "grad_norm": 0.018143732100725174, "wall_ms": 4076188}
{"step": 4400, "loss": 0.009850362315773964, "loss_nce": 0.0030473596416413784, "loss_density": 0.06803002208471298, "lr": 0.00047917096766326626, "grad_norm": 0.03092840313911438, "wall_ms": 4123008}
{"step": 4450, "loss": 0.010688879527151585, "loss_nce": 0.0027055609971284866, "loss_density": 0.07983317971229553, "lr": 0.00047864141392731373, "grad_norm": 0.029992276802659035, "wall_ms": 4169776}
{"step": 4500, "loss": 0.009050642140209675, "loss_nce": 0.0016481011407449841, "loss_density": 0.07402540743350983, "lr": 0.0004781055193494538, "grad_norm": 0.02485295571386814, "wall_ms": 4216773}
{"step": 4550, "loss": 0.009334821254014969, "loss_nce": 0.0015121044125407934, "loss_density": 0.07822716236114502, "lr": 0.0004775632991237539, "grad_norm": 0.018075568601489067, "wall_ms": 4263569}
{"step": 4600, "loss": 0.008294763043522835, "loss_nce": 0.0008659077575430274, "loss_density": 0.07428855448961258, "lr": 0.00047701476862363143, "grad_norm": 0.011038800701498985, "wall_ms": 4310317}
{"step": 4650, "loss": 0.008701975457370281, "loss_nce": 0.0011410563020035625, "loss_density": 0.07560919225215912, "lr": 0.0004764599434014166, "grad_norm": 0.019231019541621208, "wall_ms": 4357085}
{"step": 4700, "loss": 0.008302737958729267, "loss_nce": 0.0013086439575999975, "loss_density": 0.06994093954563141, "lr": 0.00047589883918791256, "grad_norm": 0.011344624683260918, "wall_ms": 4403878}
{"step": 4750, "loss": 0.008894791826605797, "loss_nce": 0.0012842118740081787, "loss_density": 0.07610580325126648, "lr": 0.00047533147189194875, "grad_norm": 0.02002754621207714, "wall_ms": 4450663}
{"step": 4800, "loss": 0.009468978270888329, "loss_nce": 0.0021738677751272917, "loss_density": 0.07295110821723938, "lr": 0.00047475785759992995, "grad_norm": 0.028071701526641846, "wall_ms": 4497560}
{"step": 4850, "loss": 0.012743594124913216, "loss_nce": 0.005077137146145105, "loss_density": 0.07666456699371338, "lr": 0.00047417801257538037, "grad_norm": 0.09547697007656097, "wall_ms": 4544311}
{"step": 4900, "loss": 0.011549418792128563, "loss_nce": 0.0029589314945042133, "loss_density": 0.08590486645698547, "lr": 0.00047359195325848244, "grad_norm": 0.045041315257549286, "wall_ms": 4591105}
{"step": 4950, "loss": 0.007986516691744328, "loss_nce": 0.0008923688437789679, "loss_density": 0.07094147801399231, "lr": 0.0004729996962656105, "grad_norm": 0.012418813072144985, "wall_ms": 4637895}
{"step": 5000, "loss": 0.01048984657973051, "loss_nce": 0.002719187643378973, "loss_density": 0.07770659029483795, "lr": 0.00047240125838886006, "grad_norm": 0.10434124618768692, "wall_ms": 4684700}
{"step": 5050, "loss": 0.008294794708490372, "loss_nce": 0.0010832311818376184, "loss_density": 0.07211563736200333, "lr": 0.00047179665659557134, "grad_norm": 0.017174987122416496, "wall_ms": 4824944}
{"step": 5100, "loss": 0.019571900367736816, "loss_nce": 0.0017014918848872185, "loss_density": 0.17870408296585083, "lr": 0.0004711859080278484, "grad_norm": 0.0236714668571949, "wall_ms": 4871643}
{"step": 5150, "loss": 0.00907044392079115, "loss_nce": 0.001673708320595324, "loss_density": 0.0739673525094986, "lr": 0.0004705690300020731, "grad_norm": 0.023412320762872696, "wall_ms": 4918365}
{"step": 5200, "loss": 0.009766699746251106, "loss_nce": 0.0017804119270294905, "loss_density": 0.07986287027597427, "lr": 0.0004699460400084139, "grad_norm": 0.02998770773410797, "wall_ms": 4965071}
{"step": 5250, "loss": 0.018609456717967987, "loss_nce": 0.001753518357872963, "loss_density": 0.16855937242507935, "lr": 0.0004693169557103305, "grad_norm": 0.015887953341007233, "wall_ms": 5011915}
{"step": 5300, "loss": 0.012193402275443077, "loss_nce": 0.004700938239693642, "loss_density": 0.07492464035749435, "lr": 0.0004686817949440724, "grad_norm": 0.033967241644859314, "wall_ms": 5058754}
{"step": 5350, "loss": 0.009574980475008488, "loss_nce": 0.0017051680479198694, "loss_density": 0.07869812101125717, "lr": 0.0004680405757181737, "grad_norm": 0.034646544605493546, "wall_ms": 5105648}
{"step": 5400, "loss": 0.009810619056224823, "loss_nce": 0.0015349843306466937, "loss_density": 0.08275634795427322, "lr": 0.00046739331621294204, "grad_norm": 0.01617116667330265, "wall_ms": 5152507}
{"step": 5450, "loss": 0.01168018113821745, "loss_nce": 0.003833659226074815, "loss_density": 0.07846522331237793, "lr": 0.00046674003477994357, "grad_norm": 0.03202882036566734, "wall_ms": 5199302}
{"step": 5500, "loss": 0.008628379553556442, "loss_nce": 0.0011429901933297515, "loss_density": 0.07485388964414597, "lr": 0.00046608074994148246, "grad_norm": 0.011725619435310364, "wall_ms": 5246066}
{"step": 5550, "loss": 0.009166380390524864, "loss_nce": 0.0015620585763826966, "loss_density": 0.07604321837425232, "lr": 0.0004654154803900758, "grad_norm": 0.015616989694535732, "wall_ms": 5293161}
{"step": 5600, "loss": 0.008569921366870403, "loss_nce": 0.0012457978446036577, "loss_density": 0.0732412338256836, "lr": 0.0004647442449879234, "grad_norm": 0.014262094162404537, "wall_ms": 5340075}
{"step": 5650, "loss": 0.008383307605981827, "loss_nce": 0.0010497013572603464, "loss_density": 0.07333606481552124, "lr": 0.0004640670627663732, "grad_norm": 0.009952005930244923, "wall_ms": 5386987}
{"step": 5700, "loss": 0.029366157948970795, "loss_nce": 0.022628670558333397, "loss_density": 0.06737487763166428, "lr": 0.0004633839529253817, "grad_norm": 0.6836894750595093, "wall_ms": 5433759}
{"step": 5750, "loss": 0.008448325097560883, "loss_nce": 0.0012538386508822441, "loss_density": 0.07194486260414124, "lr": 0.0004626949348329694, "grad_norm": 0.022581998258829117, "wall_ms": 5480541}
{"step": 5800, "loss": 0.009303348138928413, "loss_nce": 0.0018143304623663425, "loss_density": 0.07489018142223358, "lr": 0.000462000028024672, "grad_norm": 0.027118420228362083, "wall_ms": 5527290}
{"step": 5850, "loss": 0.010448089800775051, "loss_nce": 0.0037482399493455887, "loss_density": 0.06699849665164948, "lr": 0.000461299252202986, "grad_norm": 0.04959241300821304, "wall_ms": 5574166}
{"step": 5900, "loss": 0.008394870907068253, "loss_nce": 0.0010676386300474405, "loss_density": 0.07327232509851456, "lr": 0.0004605926272368106, "grad_norm": 0.013272121548652649, "wall_ms": 5621005}
{"step": 5950, "loss": 0.00828579906374216, "loss_nce": 0.000997425289824605, "loss_density": 0.072883740067482, "lr": 0.00045988017316088425, "grad_norm": 0.008583346381783485, "wall_ms": 5667750}
{"step": 6000, "loss": 0.008103417232632637, "loss_nce": 0.0012684634421020746, "loss_density": 0.06834953278303146, "lr": 0.0004591619101752162, "grad_norm": 0.015901219099760056, "wall_ms": 5714578}
{"step": 6050, "loss": 0.00805114395916462, "loss_nce": 0.0011686425423249602, "loss_density": 0.06882501393556595, "lr": 0.0004584378586445142, "grad_norm": 0.011721141636371613, "wall_ms": 5761347}
{"step": 6100, "loss": 0.007534731179475784, "loss_nce": 0.0008344750385731459, "loss_density": 0.0670025572180748, "lr": 0.00045770803909760707, "grad_norm": 0.008889979682862759, "wall_ms": 5808171}
{"step": 6150, "loss": 0.009459597989916801, "loss_nce": 0.0025075357407331467, "loss_density": 0.06952062249183655, "lr": 0.00045697247222686237, "grad_norm": 0.055455442517995834, "wall_ms": 5854902}
{"step": 6200, "loss": 0.010143432766199112, "loss_nce": 0.0025881975889205933, "loss_density": 0.07555234432220459, "lr": 0.00045623117888760024, "grad_norm": 0.028520088642835617, "wall_ms": 5901712}
{"step": 6250, "loss": 0.007794383447617292, "loss_nce": 0.0009493664838373661, "loss_density": 0.06845016777515411, "lr": 0.00045548418009750134, "grad_norm": 0.008694680407643318, "wall_ms": 5948550}
{"step": 6300, "loss": 0.008750214241445065, "loss_nce": 0.0021119732409715652, "loss_density": 0.06638240814208984, "lr": 0.0004547314970360119, "grad_norm": 0.02798551321029663, "wall_ms": 5995380}
{"step": 6350, "loss": 0.007956515066325665, "loss_nce": 0.0010108568239957094, "loss_density": 0.06945657730102539, "lr": 0.00045397315104374217, "grad_norm": 0.00991846714168787, "wall_ms": 6042196}
{"step": 6400, "loss": 0.010210643522441387, "loss_nce": 0.0022199146915227175, "loss_density": 0.07990729063749313, "lr": 0.00045320916362186214, "grad_norm": 0.02603738382458687, "wall_ms": 6089038}
{"step": 6450, "loss": 0.009546949528157711, "loss_nce": 0.0014900058740749955, "loss_density": 0.08056943118572235, "lr": 0.0004524395564314916, "grad_norm": 0.012970666401088238, "wall_ms": 6135873}
{"step": 6500, "loss": 0.008623423054814339, "loss_nce": 0.0014824623940512538, "loss_density": 0.07140960544347763, "lr": 0.0004516643512930861, "grad_norm": 0.019668035209178925, "wall_ms": 6182577}
{"step": 6550, "loss": 0.009863889776170254, "loss_nce": 0.0015021282015368342, "loss_density": 0.08361761271953583, "lr": 0.0004508835701858182, "grad_norm": 0.012641573324799538, "wall_ms": 6229404}
{"step": 6600, "loss": 0.008396654389798641, "loss_nce": 0.0014108794275671244, "loss_density": 0.06985774636268616, "lr": 0.00045009723524695435, "grad_norm": 0.011116772890090942, "wall_ms": 6276120}
{"step": 6650, "loss": 0.008019606582820415, "loss_nce": 0.0011196485720574856, "loss_density": 0.06899958103895187, "lr": 0.0004493053687712272, "grad_norm": 0.011459670960903168, "wall_ms": 6322951}
{"step": 6700, "loss": 0.00842378567904234, "loss_nce": 0.0009702572133392096, "loss_density": 0.07453528791666031, "lr": 0.00044850799321020343, "grad_norm": 0.00944980327039957, "wall_ms": 6369786}
{"step": 6750, "loss": 0.008023351430892944, "loss_nce": 0.001171325333416462, "loss_density": 0.06852025538682938, "lr": 0.00044770513117164745, "grad_norm": 0.00980115681886673, "wall_ms": 6416756}
{"step": 6800, "loss": 0.009754138998687267, "loss_nce": 0.0026979425456374884, "loss_density": 0.0705619603395462, "lr": 0.00044689680541888, "grad_norm": 0.018978575244545937, "wall_ms": 6463551}
{"step": 6850, "loss": 0.009330508299171925, "loss_nce": 0.0017295540310442448, "loss_density": 0.07600954174995422, "lr": 0.00044608303887013314, "grad_norm": 0.017292676493525505, "wall_ms": 6510358}
{"step": 6900, "loss": 0.00888801459223032, "loss_nce": 0.0010948741110041738, "loss_density": 0.07793140411376953, "lr": 0.00044526385459790025, "grad_norm": 0.011244899593293667, "wall_ms": 6557090}
{"step": 6950, "loss": 0.008436608128249645, "loss_nce": 0.0010549272410571575, "loss_density": 0.07381680607795715, "lr": 0.00044443927582828185, "grad_norm": 0.00925364624708891, "wall_ms": 6603891}
{"step": 7000, "loss": 0.008741163648664951, "loss_nce": 0.0011195569531992078, "loss_density": 0.07621606439352036, "lr": 0.00044360932594032706, "grad_norm": 0.014686217531561852, "wall_ms": 6650714}
{"step": 7050, "loss": 0.008587857708334923, "loss_nce": 0.001513088122010231, "loss_density": 0.07074769586324692, "lr": 0.0004427740284653709, "grad_norm": 0.01707345061004162, "wall_ms": 6697648}
{"step": 7100, "loss": 0.010762238875031471, "loss_nce": 0.003530783113092184, "loss_density": 0.0723145604133606, "lr": 0.0004419334070863671, "grad_norm": 0.15223874151706696, "wall_ms": 6744459}
{"step": 7150, "loss": 0.008131624199450016, "loss_nce": 0.001031694351695478, "loss_density": 0.07099929451942444, "lr": 0.0004410874856372163, "grad_norm": 0.007504594512283802, "wall_ms": 6791268}
{"step": 7200, "loss": 0.00929073803126812, "loss_nce": 0.002592463046312332, "loss_density": 0.06698274612426758, "lr": 0.00044023628810209073, "grad_norm": 0.030017614364624023, "wall_ms": 6838046}
{"step": 7250, "loss": 0.00880240648984909, "loss_nce": 0.0020456123165786266, "loss_density": 0.06756794452667236, "lr": 0.0004393798386147539, "grad_norm": 0.03131018579006195, "wall_ms": 6884793}
{"step": 7300, "loss": 0.008417785167694092, "loss_nce": 0.0012120186584070325, "loss_density": 0.07205766439437866, "lr": 0.0004385181614578766, "grad_norm": 0.009255434386432171, "wall_ms": 6931575}
{"step": 7350, "loss": 0.00918539334088564, "loss_nce": 0.002486314857378602, "loss_density": 0.06699078530073166, "lr": 0.000437651281062348, "grad_norm": 0.020243730396032333, "wall_ms": 6978350}
{"step": 7400, "loss": 0.011586057022213936, "loss_nce": 0.004538760986179113, "loss_density": 0.07047295570373535, "lr": 0.0004367792220065833, "grad_norm": 0.034436892718076706, "wall_ms": 7025281}
{"step": 7450, "loss": 0.010945928283035755, "loss_nce": 0.0025124249514192343, "loss_density": 0.08433502912521362, "lr": 0.000435902009015827, "grad_norm": 0.037579137831926346, "wall_ms": 7072233}
{"step": 7500, "loss": 0.0077763693407177925, "loss_nce": 0.0009140107431448996, "loss_density": 0.06862358748912811, "lr": 0.00043501966696145143, "grad_norm": 0.008868057280778885, "wall_ms": 7119551}
{"step": 7550, "loss": 0.00919189304113388, "loss_nce": 0.0024632057175040245, "loss_density": 0.06728687882423401, "lr": 0.00043413222086025207, "grad_norm": 0.029087131842970848, "wall_ms": 7166330}
{"step": 7600, "loss": 0.00973791815340519, "loss_nce": 0.002686684485524893, "loss_density": 0.07051233947277069, "lr": 0.00043323969587373757, "grad_norm": 0.029543625190854073, "wall_ms": 7213100}
{"step": 7650, "loss": 0.009424903430044651, "loss_nce": 0.001965847099199891, "loss_density": 0.07459056377410889, "lr": 0.0004323421173074172, "grad_norm": 0.01244751550257206, "wall_ms": 7259976}
{"step": 7700, "loss": 0.007998468354344368, "loss_nce": 0.001661340007558465, "loss_density": 0.06337128579616547, "lr": 0.0004314395106100827, "grad_norm": 0.01176379807293415, "wall_ms": 7306735}
{"step": 7750, "loss": 0.008806997910141945, "loss_nce": 0.0025078896433115005, "loss_density": 0.06299107521772385, "lr": 0.00043053190137308723, "grad_norm": 0.05932730808854103, "wall_ms": 7353527}
{"step": 7800, "loss": 0.00847361609339714, "loss_nce": 0.0013263174332678318, "loss_density": 0.07147298008203506, "lr": 0.0004296193153296193, "grad_norm": 0.014996582642197609, "wall_ms": 7400438}
{"step": 7850, "loss": 0.00994872860610485, "loss_nce": 0.003106208983808756, "loss_density": 0.06842519342899323, "lr": 0.0004287017783539734, "grad_norm": 0.06174614280462265, "wall_ms": 7447322}
{"step": 7900, "loss": 0.011845655739307404, "loss_nce": 0.0027040867134928703, "loss_density": 0.09141568839550018, "lr": 0.00042777931646081685, "grad_norm": 0.02911268174648285, "wall_ms": 7494227}
{"step": 7950, "loss": 0.00853466521948576, "loss_nce": 0.0015790604520589113, "loss_density": 0.06955604255199432, "lr": 0.00042685195580445106, "grad_norm": 0.014994620345532894, "wall_ms": 7541244}
{"step": 8000, "loss": 0.010056622326374054, "loss_nce": 0.0032549151219427586, "loss_density": 0.06801706552505493, "lr": 0.0004259197226780713, "grad_norm": 0.032359179109334946, "wall_ms": 7588073}
{"step": 8050, "loss": 0.008718806318938732, "loss_nce": 0.0015145278302952647, "loss_density": 0.0720427855849266, "lr": 0.00042498264351302023, "grad_norm": 0.012032192200422287, "wall_ms": 7634926}
{"step": 8100, "loss": 0.008612636476755142, "loss_nce": 0.0012868299381807446, "loss_density": 0.07325806468725204, "lr": 0.00042404074487803906, "grad_norm": 0.019689548760652542, "wall_ms": 7681686}
{"step": 8150, "loss": 0.00766865536570549, "loss_nce": 0.0010572706814855337, "loss_density": 0.06611384451389313, "lr": 0.00042309405347851404, "grad_norm": 0.015459930524230003, "wall_ms": 7728521}
{"step": 8200, "loss": 0.007543360814452171, "loss_nce": 0.00117209879681468, "loss_density": 0.06371261924505234, "lr": 0.0004221425961557194, "grad_norm": 0.011372184380888939, "wall_ms": 7775388}
{"step": 8250, "loss": 0.007500474341213703, "loss_nce": 0.0009580713231116533, "loss_density": 0.06542403250932693, "lr": 0.00042118639988605595, "grad_norm": 0.008712024427950382, "wall_ms": 7822286}
{"step": 8300, "loss": 0.008021178655326366, "loss_nce": 0.001170994364656508, "loss_density": 0.06850184500217438, "lr": 0.00042022549178028683, "grad_norm": 0.012919061817228794, "wall_ms": 7869142}
{"step": 8350, "loss": 0.009348686784505844, "loss_nce": 0.0020306138321757317, "loss_density": 0.07318073511123657, "lr": 0.0004192598990827685, "grad_norm": 0.015455729328095913, "wall_ms": 7916053}
{"step": 8400, "loss": 0.01015767827630043, "loss_nce": 0.0010797946015372872, "loss_density": 0.09077882766723633, "lr": 0.0004182896491706782, "grad_norm": 0.030085252597928047, "wall_ms": 7962937}
{"step": 8450, "loss": 0.008109725080430508, "loss_nce": 0.0013823261251673102, "loss_density": 0.06727398931980133, "lr": 0.00041731476955323793, "grad_norm": 0.013618579134345055, "wall_ms": 8009698}
{"step": 8500, "loss": 0.007224695757031441, "loss_nce": 0.0007993612671270967, "loss_density": 0.06425334513187408, "lr": 0.0004163352878709345, "grad_norm": 0.006101786158978939, "wall_ms": 8056638}
{"step": 8550, "loss": 0.008679443970322609, "loss_nce": 0.0013510854914784431, "loss_density": 0.07328358292579651, "lr": 0.00041535123189473557, "grad_norm": 0.015485938638448715, "wall_ms": 8103316}
{"step": 8600, "loss": 0.008222963660955429, "loss_nce": 0.0016242511337623, "loss_density": 0.06598712503910065, "lr": 0.00041436262952530253, "grad_norm": 0.022509051486849785, "wall_ms": 8150134}
{"step": 8650, "loss": 0.007807508111000061, "loss_nce": 0.001128191826865077, "loss_density": 0.06679315865039825, "lr": 0.0004133695087921996, "grad_norm": 0.008096239529550076, "wall_ms": 8196925}
{"step": 8700, "loss": 0.007886895909905434, "loss_nce": 0.0014769458211958408, "loss_density": 0.0640995055437088, "lr": 0.00041237189785309854, "grad_norm": 0.01702473685145378, "wall_ms": 8243777}
{"step": 8750, "loss": 0.008695351891219616, "loss_nce": 0.0013159271329641342, "loss_density": 0.07379424571990967, "lr": 0.0004113698249929809, "grad_norm": 0.011120584793388844, "wall_ms": 8290793}
{"step": 8800, "loss": 0.02164408564567566, "loss_nce": 0.0025489195249974728, "loss_density": 0.1909516453742981, "lr": 0.00041036331862333585, "grad_norm": 0.026799984276294708, "wall_ms": 8337726}
{"step": 8850, "loss": 0.013067073188722134, "loss_nce": 0.0018926847260445356, "loss_density": 0.11174388229846954, "lr": 0.0004093524072813543, "grad_norm": 0.009876121766865253, "wall_ms": 8384510}
{"step": 8900, "loss": 0.008491115644574165, "loss_nce": 0.0013957966584712267, "loss_density": 0.07095319032669067, "lr": 0.0004083371196291204, "grad_norm": 0.013507459312677383, "wall_ms": 8431343}
{"step": 8950, "loss": 0.008583977818489075, "loss_nce": 0.0016072215512394905, "loss_density": 0.06976756453514099, "lr": 0.00040731748445279855, "grad_norm": 0.012352993711829185, "wall_ms": 8478285}
{"step": 9000, "loss": 0.007285036146640778, "loss_nce": 0.001113814883865416, "loss_density": 0.06171221286058426, "lr": 0.000406293530661817, "grad_norm": 0.008883127011358738, "wall_ms": 8525144}
{"step": 9050, "loss": 0.007535838522017002, "loss_nce": 0.0013625167775899172, "loss_density": 0.06173321604728699, "lr": 0.00040526528728804896, "grad_norm": 0.013894111849367619, "wall_ms": 8571994}
{"step": 9100, "loss": 0.007567795924842358, "loss_nce": 0.0013627347070723772, "loss_density": 0.06205061078071594, "lr": 0.00040423278348498845, "grad_norm": 0.015452410094439983, "wall_ms": 8618807}
{"step": 9150, "loss": 0.008333005011081696, "loss_nce": 0.002103882608935237, "loss_density": 0.0622912272810936, "lr": 0.00040319604852692454, "grad_norm": 0.036073826253414154, "wall_ms": 8665614}
{"step": 9200, "loss": 0.008397985249757767, "loss_nce": 0.0014147049514576793, "loss_density": 0.06983280181884766, "lr": 0.00040215511180811096, "grad_norm": 0.0123893478885293, "wall_ms": 8712425}
{"step": 9250, "loss": 0.009949734434485435, "loss_nce": 0.003257777774706483, "loss_density": 0.06691956520080566, "lr": 0.0004011100028419329, "grad_norm": 0.046180568635463715, "wall_ms": 8759404}
{"step": 9300, "loss": 0.009013975039124489, "loss_nce": 0.002164047211408615, "loss_density": 0.06849928200244904, "lr": 0.0004000607512600699, "grad_norm": 0.01593768782913685, "wall_ms": 8806207}
{"step": 9350, "loss": 0.010801296681165695, "loss_nce": 0.0026073709595948458, "loss_density": 0.08193925768136978, "lr": 0.00039900738681165594, "grad_norm": 0.056933872401714325, "wall_ms": 8853021}
{"step": 9400, "loss": 0.008063455112278461, "loss_nce": 0.0012801203411072493, "loss_density": 0.06783334910869598, "lr": 0.0003979499393624361, "grad_norm": 0.014901788905262947, "wall_ms": 8899787}
{"step": 9450, "loss": 0.013333214446902275, "loss_nce": 0.005518671125173569, "loss_density": 0.07814542949199677, "lr": 0.0003968884388939196, "grad_norm": 0.04857771471142769, "wall_ms": 8946553}
{"step": 9500, "loss": 0.012352341786026955, "loss_nce": 0.006079029757529497, "loss_density": 0.06273311376571655, "lr": 0.00039582291550252974, "grad_norm": 0.13778778910636902, "wall_ms": 8993419}
{"step": 9550, "loss": 0.007989764213562012, "loss_nce": 0.0013003206113353372, "loss_density": 0.06689443439245224, "lr": 0.00039475339939875037, "grad_norm": 0.010049551725387573, "wall_ms": 9040180}
{"step": 9600, "loss": 0.007931913249194622, "loss_nce": 0.001357823028229177, "loss_density": 0.06574089825153351, "lr": 0.00039367992090627005, "grad_norm": 0.015885919332504272, "wall_ms": 9087085}
{"step": 9650, "loss": 0.00987241044640541, "loss_nce": 0.0023339493200182915, "loss_density": 0.07538460940122604, "lr": 0.0003926025104611215, "grad_norm": 0.02614171802997589, "wall_ms": 9133870}
{"step": 9700, "loss": 0.007082848809659481, "loss_nce": 0.0012019197456538677, "loss_density": 0.05880929157137871, "lr": 0.0003915211986108189, "grad_norm": 0.0075454493053257465, "wall_ms": 9180786}
{"step": 9750, "loss": 0.007984294556081295, "loss_nce": 0.0010466014500707388, "loss_density": 0.06937693059444427, "lr": 0.00039043601601349237, "grad_norm": 0.00812365673482418, "wall_ms": 9227584}
{"step": 9800, "loss": 0.007844600826501846, "loss_nce": 0.0015327412402257323, "loss_density": 0.0631185993552208, "lr": 0.0003893469934370177, "grad_norm": 0.014825818128883839, "wall_ms": 9274420}
{"step": 9850, "loss": 0.008004814386367798, "loss_nce": 0.0015225345268845558, "loss_density": 0.06482279300689697, "lr": 0.00038825416175814484, "grad_norm": 0.020918410271406174, "wall_ms": 9321327}
{"step": 9900, "loss": 0.007261082995682955, "loss_nce": 0.0008361163781955838, "loss_density": 0.0642496645450592, "lr": 0.00038715755196162187, "grad_norm": 0.006529574748128653, "wall_ms": 9368132}
{"step": 9950, "loss": 0.0097297802567482, "loss_nce": 0.0026171717327088118, "loss_density": 0.07112608850002289, "lr": 0.00038605719513931707, "grad_norm": 0.022496147081255913, "wall_ms": 9414977}
{"step": 10000, "loss": 0.007759352680295706, "loss_nce": 0.0010599461384117603, "loss_density": 0.0669940635561943, "lr": 0.00038495312248933687, "grad_norm": 0.009710493497550488, "wall_ms": 9461803}
{"step": 10050, "loss": 0.007414680439978838, "loss_nce": 0.0007353656110353768, "loss_density": 0.06679314374923706, "lr": 0.0003838453653151417, "grad_norm": 0.00820914562791586, "wall_ms": 9598797}
{"step": 10100, "loss": 0.009402388706803322, "loss_nce": 0.00219186139293015, "loss_density": 0.072105273604393, "lr": 0.0003827339550246581, "grad_norm": 0.01807025447487831, "wall_ms": 9645528}
{"step": 10150, "loss": 0.00708276592195034, "loss_nce": 0.0011187896598130465, "loss_density": 0.05963975936174393, "lr": 0.00038161892312938843, "grad_norm": 0.01013970747590065, "wall_ms": 9692416}
{"step": 10200, "loss": 0.007974062114953995, "loss_nce": 0.0012159929610788822, "loss_density": 0.0675806850194931, "lr": 0.0003805003012435174, "grad_norm": 0.008069794625043869, "wall_ms": 9739220}
{"step": 10250, "loss": 0.007099870126694441, "loss_nce": 0.0011141513241454959, "loss_density": 0.05985718593001366, "lr": 0.0003793781210830158, "grad_norm": 0.009527339600026608, "wall_ms": 9786232}
{"step": 10300, "loss": 0.008403982035815716, "loss_nce": 0.0020482062827795744, "loss_density": 0.06355775892734528, "lr": 0.000378252414464741, "grad_norm": 0.029964782297611237, "wall_ms": 9833288}
{"step": 10350, "loss": 0.00893267709761858, "loss_nce": 0.00237097917124629, "loss_density": 0.06561698019504547, "lr": 0.00037712321330553517, "grad_norm": 0.021210353821516037, "wall_ms": 9880260}
{"step": 10400, "loss": 0.008813911117613316, "loss_nce": 0.00225038081407547, "loss_density": 0.0656353011727333, "lr": 0.00037599054962132006, "grad_norm": 0.023521680384874344, "wall_ms": 9927196}
{"step": 10450, "loss": 0.00933932140469551, "loss_nce": 0.002426760271191597, "loss_density": 0.06912560760974884, "lr": 0.00037485445552618967, "grad_norm": 0.023808419704437256, "wall_ms": 9974074}
{"step": 10500, "loss": 0.007915210910141468, "loss_nce": 0.001743439701385796, "loss_density": 0.06171771138906479, "lr": 0.0003737149632314993, "grad_norm": 0.02035406418144703, "wall_ms": 10021003}
{"step": 10550, "loss": 0.00918390043079853, "loss_nce": 0.002694014459848404, "loss_density": 0.06489886343479156, "lr": 0.00037257210504495244, "grad_norm": 0.03367117419838905, "wall_ms": 10067972}
{"step": 10600, "loss": 0.007676695939153433, "loss_nce": 0.001201657229103148, "loss_density": 0.06475038826465607, "lr": 0.0003714259133696847, "grad_norm": 0.006770050153136253, "wall_ms": 10114811}
{"step": 10650, "loss": 0.00749125424772501, "loss_nce": 0.0007746613118797541, "loss_density": 0.06716592609882355, "lr": 0.0003702764207033454, "grad_norm": 0.007513816002756357, "wall_ms": 10161823}
{"step": 10700, "loss": 0.007045177277177572, "loss_nce": 0.0008168236236087978, "loss_density": 0.06228353828191757, "lr": 0.0003691236596371757, "grad_norm": 0.009118988178670406, "wall_ms": 10208912}
{"step": 10750, "loss": 0.006718013901263475, "loss_nce": 0.0006959150778129697, "loss_density": 0.06022098660469055, "lr": 0.00036796766285508486, "grad_norm": 0.005775738973170519, "wall_ms": 10255835}
{"step": 10800, "loss": 0.010020853020250797, "loss_nce": 0.0011968056205660105, "loss_density": 0.08824047446250916, "lr": 0.00036680846313272346, "grad_norm": 0.012874288484454155, "wall_ms": 10302748}
{"step": 10850, "loss": 0.008049322292208672, "loss_nce": 0.0012496705166995525, "loss_density": 0.06799651682376862, "lr": 0.0003656460933365541, "grad_norm": 0.010921304114162922, "wall_ms": 10349724}
{"step": 10900, "loss": 0.00745898112654686, "loss_nce": 0.0014205366605892777, "loss_density": 0.060384444892406464, "lr": 0.00036448058642291975, "grad_norm": 0.0081467991694808, "wall_ms": 10396587}
{"step": 10950, "loss": 0.007432871963828802, "loss_nce": 0.0011428488651290536, "loss_density": 0.06290023028850555, "lr": 0.0003633119754371091, "grad_norm": 0.008196484297513962, "wall_ms": 10443441}
{"step": 11000, "loss": 0.008317830041050911, "loss_nce": 0.0019346929620951414, "loss_density": 0.0638313740491867, "lr": 0.0003621402935124196, "grad_norm": 0.011823073029518127, "wall_ms": 10490336}
{"step": 11050, "loss": 0.006927101872861385, "loss_nce": 0.0007349355146288872, "loss_density": 0.06192166358232498, "lr": 0.00036096557386921824, "grad_norm": 0.006726534571498632, "wall_ms": 10537350}
{"step": 11100, "loss": 0.0070330677554011345, "loss_nce": 0.0007078390917740762, "loss_density": 0.0632522851228714, "lr": 0.00035978784981399975, "grad_norm": 0.00468719657510519, "wall_ms": 10584270}
{"step": 11150, "loss": 0.007218235172331333, "loss_nce": 0.0007728708442300558, "loss_density": 0.06445364654064178, "lr": 0.0003586071547384419, "grad_norm": 0.006640553008764982, "wall_ms": 10631165}
{"step": 11200, "loss": 0.007286434061825275, "loss_nce": 0.0012057279236614704, "loss_density": 0.060807060450315475, "lr": 0.0003574235221184589, "grad_norm": 0.007714405190199614, "wall_ms": 10677971}
{"step": 11250, "loss": 0.009114162996411324, "loss_nce": 0.0018650072161108255, "loss_density": 0.07249155640602112, "lr": 0.00035623698551325267, "grad_norm": 0.018135124817490578, "wall_ms": 10724965}
{"step": 11300, "loss": 0.008190976455807686, "loss_nce": 0.0018086358904838562, "loss_density": 0.0638234093785286, "lr": 0.00035504757856436064, "grad_norm": 0.013957105576992035, "wall_ms": 10771924}
{"step": 11350, "loss": 0.007055314257740974, "loss_nce": 0.0008878929074853659, "loss_density": 0.06167421117424965, "lr": 0.0003538553349947026, "grad_norm": 0.005332445725798607, "wall_ms": 10819198}
{"step": 11400, "loss": 0.006733217742294073, "loss_nce": 0.00104323064442724, "loss_density": 0.056899867951869965, "lr": 0.00035266028860762383, "grad_norm": 0.008456813171505928, "wall_ms": 10865993}
{"step": 11450, "loss": 0.007634845096617937, "loss_nce": 0.0013352151727303863, "loss_density": 0.06299629807472229, "lr": 0.0003514624732859376, "grad_norm": 0.009205739013850689, "wall_ms": 10912841}
{"step": 11500, "loss": 0.007754520978778601, "loss_nce": 0.0012423209846019745, "loss_density": 0.06512200087308884, "lr": 0.0003502619229909636, "grad_norm": 0.00979198981076479, "wall_ms": 10959746}
{"step": 11550, "loss": 0.007492034696042538, "loss_nce": 0.001250299857929349, "loss_density": 0.062417346984148026, "lr": 0.00034905867176156566, "grad_norm": 0.01457197219133377, "wall_ms": 11006839}
{"step": 11600, "loss": 0.0074729169718921185, "loss_nce": 0.0015023844316601753, "loss_density": 0.05970532447099686, "lr": 0.0003478527537131863, "grad_norm": 0.013911381363868713, "wall_ms": 11053634}
{"step": 11650, "loss": 0.008882432244718075, "loss_nce": 0.0024588769301772118, "loss_density": 0.06423555314540863, "lr": 0.0003466442030368799, "grad_norm": 0.011866135522723198, "wall_ms": 11100507}
{"step": 11700, "loss": 0.007750251796096563, "loss_nce": 0.0010324536124244332, "loss_density": 0.06717798113822937, "lr": 0.0003454330539983426, "grad_norm": 0.006926270201802254, "wall_ms": 11147426}
{"step": 11750, "loss": 0.010279124602675438, "loss_nce": 0.0010580087546259165, "loss_density": 0.09221115708351135, "lr": 0.0003442193409369415, "grad_norm": 0.032965537160634995, "wall_ms": 11194342}
{"step": 11800, "loss": 0.009037245064973831, "loss_nce": 0.0023010398726910353, "loss_density": 0.06736205518245697, "lr": 0.00034300309826474035, "grad_norm": 0.01761108823120594, "wall_ms": 11241250}
{"step": 11850, "loss": 0.00985789392143488, "loss_nce": 0.002341985236853361, "loss_density": 0.07515908777713776, "lr": 0.0003417843604655247, "grad_norm": 0.022021068260073662, "wall_ms": 11288219}
{"step": 11900, "loss": 0.007619425188750029, "loss_nce": 0.0012874625390395522, "loss_density": 0.0633196234703064, "lr": 0.0003405631620938234, "grad_norm": 0.010771148838102818, "wall_ms": 11335031}
{"step": 11950, "loss": 0.007322512101382017, "loss_nce": 0.0006901542656123638, "loss_density": 0.06632357835769653, "lr": 0.00033933953777392925, "grad_norm": 0.003992415498942137, "wall_ms": 11381841}
{"step": 12000, "loss": 0.007365812547504902, "loss_nce": 0.0009459683205932379, "loss_density": 0.06419844180345535, "lr": 0.0003381135221989174, "grad_norm": 0.009403660893440247, "wall_ms": 11428752}
{"step": 12050, "loss": 0.007446758449077606, "loss_nce": 0.0010049550328403711, "loss_density": 0.06441803276538849, "lr": 0.00033688515012966156, "grad_norm": 0.006245248019695282, "wall_ms": 11475762}
{"step": 12100, "loss": 0.007078022230416536, "loss_nce": 0.0008096834644675255, "loss_density": 0.06268338859081268, "lr": 0.00033565445639384845, "grad_norm": 0.007418728433549404, "wall_ms": 11522672}
{"step": 12150, "loss": 0.006415185052901506, "loss_nce": 0.000386240950319916, "loss_density": 0.06028943881392479, "lr": 0.0003344214758849902, "grad_norm": 0.0034123824443668127, "wall_ms": 11569842}
{"step": 12200, "loss": 0.00870572030544281, "loss_nce": 0.002653256058692932, "loss_density": 0.06052464246749878, "lr": 0.0003331862435614354, "grad_norm": 0.010128860361874104, "wall_ms": 11616873}
{"step": 12250, "loss": 0.008289351128041744, "loss_nce": 0.0014833904569968581, "loss_density": 0.06805960834026337, "lr": 0.0003319487944453775, "grad_norm": 0.018885385245084763, "wall_ms": 11663855}
{"step": 12300, "loss": 0.007047195918858051, "loss_nce": 0.0006936131976544857, "loss_density": 0.06353582441806793, "lr": 0.00033070916362186215, "grad_norm": 0.005354910157620907, "wall_ms": 11710826}
{"step": 12350, "loss": 0.009228024631738663, "loss_nce": 0.002694595605134964, "loss_density": 0.06533429026603699, "lr": 0.0003294673862377923, "grad_norm": 0.025486310943961143, "wall_ms": 11757880}
{"step": 12400, "loss": 0.012848181650042534, "loss_nce": 0.006197728682309389, "loss_density": 0.06650452315807343, "lr": 0.0003282234975009318, "grad_norm": 0.10131096094846725, "wall_ms": 11804932}
{"step": 12450, "loss": 0.007316424511373043, "loss_nce": 0.0011566632892936468, "loss_density": 0.06159760802984238, "lr": 0.00032697753267890717, "grad_norm": 0.0070203556679189205, "wall_ms": 11851930}
{"step": 12500, "loss": 0.006529409904032946, "loss_nce": 0.0005142021691426635, "loss_density": 0.0601520761847496, "lr": 0.0003257295270982071, "grad_norm": 0.00495985196903348, "wall_ms": 11898883}
{"step": 12550, "loss": 0.008298425935208797, "loss_nce": 0.0014404153916984797, "loss_density": 0.06858010590076447, "lr": 0.00032447951614318204, "grad_norm": 0.020277736708521843, "wall_ms": 11945898}
{"step": 12600, "loss": 0.006707079242914915, "loss_nce": 0.0005559880519285798, "loss_density": 0.06151091307401657, "lr": 0.0003232275352550397, "grad_norm": 0.007767274975776672, "wall_ms": 11992877}
{"step": 12650, "loss": 0.008396781980991364, "loss_nce": 0.0017056816723197699, "loss_density": 0.0669110044836998, "lr": 0.00032197361993084107, "grad_norm": 0.020349809899926186, "wall_ms": 12039841}
{"step": 12700, "loss": 0.007909346371889114, "loss_nce": 0.0019205461721867323, "loss_density": 0.05988800525665283, "lr": 0.00032071780572249343, "grad_norm": 0.018771400675177574, "wall_ms": 12086862}
{"step": 12750, "loss": 0.007688253186643124, "loss_nce": 0.0013460831250995398, "loss_density": 0.06342169642448425, "lr": 0.00031946012823574264, "grad_norm": 0.009047162719070911, "wall_ms": 12133886}
{"step": 12800, "loss": 0.0077948495745658875, "loss_nce": 0.0010143949184566736, "loss_density": 0.06780454516410828, "lr": 0.0003182006231291639, "grad_norm": 0.006979775615036488, "wall_ms": 12180818}
{"step": 12850, "loss": 0.008155341260135174, "loss_nce": 0.0013681699056178331, "loss_density": 0.06787171214818954, "lr": 0.00031693932611314985, "grad_norm": 0.014992395415902138, "wall_ms": 12227607}
{"step": 12900, "loss": 0.007412962149828672, "loss_nce": 0.0007383099873550236, "loss_density": 0.06674651801586151, "lr": 0.0003156762729488991, "grad_norm": 0.006231334060430527, "wall_ms": 12274475}
{"step": 12950, "loss": 0.006879730150103569, "loss_nce": 0.000697054376360029, "loss_density": 0.06182675436139107, "lr": 0.00031441149944740136, "grad_norm": 0.004801168106496334, "wall_ms": 12321271}
{"step": 13000, "loss": 0.008002003654837608, "loss_nce": 0.0012251774314790964, "loss_density": 0.06776826083660126, "lr": 0.00031314504146842323, "grad_norm": 0.010426657274365425, "wall_ms": 12368167}
{"step": 13050, "loss": 0.008015478029847145, "loss_nce": 0.0014166554901748896, "loss_density": 0.06598822772502899, "lr": 0.00031187693491949007, "grad_norm": 0.01724999025464058, "wall_ms": 12415029}
{"step": 13100, "loss": 0.007319031748920679, "loss_nce": 0.0008176947594620287, "loss_density": 0.06501337140798569, "lr": 0.0003106072157548693, "grad_norm": 0.006170624401420355, "wall_ms": 12461861}
{"step": 13150, "loss": 0.008081517182290554, "loss_nce": 0.001946585951372981, "loss_density": 0.06134930998086929, "lr": 0.0003093359199745501, "grad_norm": 0.02772309072315693, "wall_ms": 12508693}
{"step": 13200, "loss": 0.007567894645035267, "loss_nce": 0.0015892342198640108, "loss_density": 0.0597866028547287, "lr": 0.00030806308362322306, "grad_norm": 0.009451431222259998, "wall_ms": 12555549}
{"step": 13250, "loss": 0.00912158191204071, "loss_nce": 0.0029741311445832253, "loss_density": 0.061474502086639404, "lr": 0.000306788742789258, "grad_norm": 0.023634448647499084, "wall_ms": 12602516}
{"step": 13300, "loss": 0.009618997573852539, "loss_nce": 0.004318751394748688, "loss_density": 0.05300246179103851, "lr": 0.00030551293360368104, "grad_norm": 0.033722490072250366, "wall_ms": 12649406}
{"step": 13350, "loss": 0.007490481249988079, "loss_nce": 0.0013183706905692816, "loss_density": 0.06172110512852669, "lr": 0.00030423569223915015, "grad_norm": 0.015158251859247684, "wall_ms": 12696319}
{"step": 13400, "loss": 0.007520761340856552, "loss_nce": 0.0016117847990244627, "loss_density": 0.05908976495265961, "lr": 0.0003029570549089293, "grad_norm": 0.014566672965884209, "wall_ms": 12743211}
{"step": 13450, "loss": 0.007534385658800602, "loss_nce": 0.0011321415659040213, "loss_density": 0.06402243673801422, "lr": 0.00030167705786586193, "grad_norm": 0.007793193217366934, "wall_ms": 12790047}
{"step": 13500, "loss": 0.00897055771201849, "loss_nce": 0.0015200148336589336, "loss_density": 0.07450542598962784, "lr": 0.000300395737401343, "grad_norm": 0.023280059918761253, "wall_ms": 12836810}
{"step": 13550, "loss": 0.007223211228847504, "loss_nce": 0.0005987759213894606, "loss_density": 0.06624435633420944, "lr": 0.0002991131298442902, "grad_norm": 0.003587424522265792, "wall_ms": 12883597}
{"step": 13600, "loss": 0.006720908917486668, "loss_nce": 0.000800524721853435, "loss_density": 0.05920384079217911, "lr": 0.00029782927156011364, "grad_norm": 0.004669310990720987, "wall_ms": 12930337}
{"step": 13650, "loss": 0.007554636336863041, "loss_nce": 0.0009842641884461045, "loss_density": 0.06570371985435486, "lr": 0.0002965441989496851, "grad_norm": 0.006366374436765909, "wall_ms": 12977170}
{"step": 13700, "loss": 0.007737367879599333, "loss_nce": 0.0016081231879070401, "loss_density": 0.06129244714975357, "lr": 0.00029525794844830557, "grad_norm": 0.014295167289674282, "wall_ms": 13024024}
{"step": 13750, "loss": 0.006986602209508419, "loss_nce": 0.0009450900834053755, "loss_density": 0.0604151226580143, "lr": 0.00029397055652467265, "grad_norm": 0.008372346870601177, "wall_ms": 13071052}
{"step": 13800, "loss": 0.006910902913659811, "loss_nce": 0.0007346438942477107, "loss_density": 0.06176258623600006, "lr": 0.0002926820596798464, "grad_norm": 0.00372882978990674, "wall_ms": 13117985}
{"step": 13850, "loss": 0.007123084738850594, "loss_nce": 0.0008688403759151697, "loss_density": 0.06254243850708008, "lr": 0.00029139249444621413, "grad_norm": 0.00716423848643899, "wall_ms": 13164966}
{"step": 13900, "loss": 0.006717057898640633, "loss_nce": 0.0006093665142543614, "loss_density": 0.06107690930366516, "lr": 0.00029010189738645515, "grad_norm": 0.004078308120369911, "wall_ms": 13211946}
{"step": 13950, "loss": 0.00878076907247305, "loss_nce": 0.0013158235233277082, "loss_density": 0.07464945316314697, "lr": 0.00028881030509250385, "grad_norm": 0.02009444311261177, "wall_ms": 13258827}
{"step": 14000, "loss": 0.007608933374285698, "loss_nce": 0.0007824136409908533, "loss_density": 0.06826519966125488, "lr": 0.0002875177541845119, "grad_norm": 0.0051925708539783955, "wall_ms": 13305801}
{"step": 14050, "loss": 0.008561274036765099, "loss_nce": 0.0018322644755244255, "loss_density": 0.06729009747505188, "lr": 0.00028622428130981064, "grad_norm": 0.02593783661723137, "wall_ms": 13352746}
{"step": 14100, "loss": 0.020190326496958733, "loss_nce": 0.014559602364897728, "loss_density": 0.05630723759531975, "lr": 0.00028492992314187144, "grad_norm": 0.09475620090961456, "wall_ms": 13399922}
{"step": 14150, "loss": 0.007198111154139042, "loss_nce": 0.0007820760365575552, "loss_density": 0.06416035443544388, "lr": 0.0002836347163792664, "grad_norm": 0.006883352063596249, "wall_ms": 13446921}
{"step": 14200, "loss": 0.00741235725581646, "loss_nce": 0.000985340098850429, "loss_density": 0.06427016854286194, "lr": 0.0002823386977446272, "grad_norm": 0.015015574172139168, "wall_ms": 13493967}
{"step": 14250, "loss": 0.006503273267298937, "loss_nce": 0.000613272306509316, "loss_density": 0.05890000984072685, "lr": 0.0002810419039836045, "grad_norm": 0.004758382681757212, "wall_ms": 13540946}
{"step": 14300, "loss": 0.007064460776746273, "loss_nce": 0.0016720588319003582, "loss_density": 0.053924016654491425, "lr": 0.0002797443718638261, "grad_norm": 0.015691718086600304, "wall_ms": 13587927}
{"step": 14350, "loss": 0.007409960497170687, "loss_nce": 0.001356832217425108, "loss_density": 0.06053128093481064, "lr": 0.00027844613817385404, "grad_norm": 0.008872460573911667, "wall_ms": 13634959}
{"step": 14400, "loss": 0.009819635190069675, "loss_nce": 0.0029844825621694326, "loss_density": 0.06835152953863144, "lr": 0.00027714723972214165, "grad_norm": 0.06971217691898346, "wall_ms": 13681946}
{"step": 14450, "loss": 0.0062648882158100605, "loss_nce": 0.0006187516264617443, "loss_density": 0.05646136403083801, "lr": 0.0002758477133359905, "grad_norm": 0.004467188846319914, "wall_ms": 13728888}
{"step": 14500, "loss": 0.00646404130384326, "loss_nce": 0.0005905661382712424, "loss_density": 0.05873475223779678, "lr": 0.0002745475958605054, "grad_norm": 0.006521414499729872, "wall_ms": 13775830}
{"step": 14550, "loss": 0.007108312100172043, "loss_nce": 0.00075869623105973, "loss_density": 0.06349615752696991, "lr": 0.0002732469241575503, "grad_norm": 0.008584563620388508, "wall_ms": 13822733}
{"step": 14600, "loss": 0.007038915529847145, "loss_nce": 0.0011707268422469497, "loss_density": 0.05868188291788101, "lr": 0.000271945735104703, "grad_norm": 0.015419614501297474, "wall_ms": 13869705}
{"step": 14650, "loss": 0.0062636807560920715, "loss_nce": 0.0007678109686821699, "loss_density": 0.05495870113372803, "lr": 0.00027064406559420954, "grad_norm": 0.00671094236895442, "wall_ms": 13916677}
{"step": 14700, "loss": 0.007404545787721872, "loss_nce": 0.0014103158609941602, "loss_density": 0.059942297637462616, "lr": 0.0002693419525319384, "grad_norm": 0.013087085448205471, "wall_ms": 13963875}
{"step": 14750, "loss": 0.00720629096031189, "loss_nce": 0.0008901871042326093, "loss_density": 0.06316103786230087, "lr": 0.00026803943283633375, "grad_norm": 0.005638707894831896, "wall_ms": 14010838}
{"step": 14800, "loss": 0.0075379651971161366, "loss_nce": 0.0012131045805290341, "loss_density": 0.06324860453605652, "lr": 0.0002667365434373691, "grad_norm": 0.013083391822874546, "wall_ms": 14057812}
{"step": 14850, "loss": 0.00720192352309823, "loss_nce": 0.00084686279296875, "loss_density": 0.06355060636997223, "lr": 0.00026543332127549983, "grad_norm": 0.0065791551023721695, "wall_ms": 14104828}
{"step": 14900, "loss": 0.007100917398929596, "loss_nce": 0.0011376196052879095, "loss_density": 0.059632979333400726, "lr": 0.00026412980330061623, "grad_norm": 0.008624306879937649, "wall_ms": 14151856}
{"step": 14950, "loss": 0.007490992546081543, "loss_nce": 0.0009048456558957696, "loss_density": 0.06586147099733353, "lr": 0.00026282602647099577, "grad_norm": 0.008364209905266762, "wall_ms": 14198820}
{"step": 15000, "loss": 0.007748741656541824, "loss_nce": 0.0018281979719176888, "loss_density": 0.05920543521642685, "lr": 0.00026152202775225485, "grad_norm": 0.011376447044312954, "wall_ms": 14246015}
{"step": 15050, "loss": 0.0072108106687664986, "loss_nce": 0.0010863938368856907, "loss_density": 0.061244167387485504, "lr": 0.0002602178441163013, "grad_norm": 0.006806767545640469, "wall_ms": 14352451}
{"step": 15100, "loss": 0.006638520397245884, "loss_nce": 0.0010009908583015203, "loss_density": 0.05637529864907265, "lr": 0.0002589135125402857, "grad_norm": 0.012095332145690918, "wall_ms": 14400194}
{"step": 15150, "loss": 0.006764788180589676, "loss_nce": 0.0007569312583655119, "loss_density": 0.06007856875658035, "lr": 0.0002576090700055534, "grad_norm": 0.005386087577790022, "wall_ms": 14447292}
{"step": 15200, "loss": 0.0066042873077094555, "loss_nce": 0.0005228685331530869, "loss_density": 0.060814183205366135, "lr": 0.00025630455349659524, "grad_norm": 0.003463583532720804, "wall_ms": 14494190}
{"step": 15250, "loss": 0.007203132379800081, "loss_nce": 0.0005799491191282868, "loss_density": 0.06623183190822601, "lr": 0.000255, "grad_norm": 0.003966668155044317, "wall_ms": 14541066}
{"step": 15300, "loss": 0.006536382716149092, "loss_nce": 0.0004510485741775483, "loss_density": 0.060853343456983566, "lr": 0.00025369544650340474, "grad_norm": 0.003405922092497349, "wall_ms": 14588025}
{"step": 15350, "loss": 0.0102540897205472, "loss_nce": 0.001156139886006713, "loss_density": 0.09097949415445328, "lr": 0.00025239092999444676, "grad_norm": 0.0314309261739254, "wall_ms": 14634938}
{"step": 15400, "loss": 0.008902139030396938, "loss_nce": 0.0015458047855645418, "loss_density": 0.0735633373260498, "lr": 0.0002510864874597142, "grad_norm": 0.014233353547751904, "wall_ms": 14681842}
{"step": 15450, "loss": 0.007931931875646114, "loss_nce": 0.0017013901378959417, "loss_density": 0.06230541318655014, "lr": 0.0002497821558836988, "grad_norm": 0.010416798293590546, "wall_ms": 14728749}
{"step": 15500, "loss": 0.007947463542222977, "loss_nce": 0.001961597241461277, "loss_density": 0.05985866114497185, "lr": 0.0002484779722477452, "grad_norm": 0.00823596678674221, "wall_ms": 14775788}
{"step": 15550, "loss": 0.0071996538899838924, "loss_nce": 0.0013721957802772522, "loss_density": 0.05827457830309868, "lr": 0.0002471739735290043, "grad_norm": 0.006879420485347509, "wall_ms": 14822861}
{"step": 15600, "loss": 0.0061125075444579124, "loss_nce": 0.0009159564506262541, "loss_density": 0.051965512335300446, "lr": 0.0002458701966993837, "grad_norm": 0.007176238112151623, "wall_ms": 14869813}
{"step": 15650, "loss": 0.006478224880993366, "loss_nce": 0.0007308201165869832, "loss_density": 0.0574740469455719, "lr": 0.0002445666787245002, "grad_norm": 0.0071257115341722965, "wall_ms": 14916799}
{"step": 15700, "loss": 0.007143127266317606, "loss_nce": 0.001466957968659699, "loss_density": 0.056761693209409714, "lr": 0.00024326345656263098, "grad_norm": 0.014252755790948868, "wall_ms": 14963758}
{"step": 15750, "loss": 0.006702870596200228, "loss_nce": 0.000617264595348388, "loss_density": 0.060856059193611145, "lr": 0.00024196056716366629, "grad_norm": 0.0051324861124157906, "wall_ms": 15010718}
{"step": 15800, "loss": 0.007134759332984686, "loss_nce": 0.0009805639274418354, "loss_density": 0.061541952192783356, "lr": 0.0002406580474680616, "grad_norm": 0.00682688457891345, "wall_ms": 15057765}
{"step": 15850, "loss": 0.007052330765873194, "loss_nce": 0.001008194056339562, "loss_density": 0.06044136732816696, "lr": 0.00023935593440579052, "grad_norm": 0.010028026066720486, "wall_ms": 15104711}
{"step": 15900, "loss": 0.006859111599624157, "loss_nce": 0.0006336853839457035, "loss_density": 0.06225426122546196, "lr": 0.0002380542648952971, "grad_norm": 0.011439495719969273, "wall_ms": 15151659}
{"step": 15950, "loss": 0.007834070362150669, "loss_nce": 0.0014731341507285833, "loss_density": 0.06360936164855957, "lr": 0.0002367530758424498, "grad_norm": 0.012995795346796513, "wall_ms": 15198649}
{"step": 16000, "loss": 0.00628368416801095, "loss_nce": 0.0007854165742173791, "loss_density": 0.05498267710208893, "lr": 0.0002354524041394946, "grad_norm": 0.005320900585502386, "wall_ms": 15245685}
{"step": 16050, "loss": 0.006984698120504618, "loss_nce": 0.0009139009052887559, "loss_density": 0.06070797145366669, "lr": 0.00023415228666400954, "grad_norm": 0.004272045101970434, "wall_ms": 15292673}
{"step": 16100, "loss": 0.006711078807711601, "loss_nce": 0.001294593559578061, "loss_density": 0.05416485294699669, "lr": 0.00023285276027785836, "grad_norm": 0.006268744822591543, "wall_ms": 15339716}
{"step": 16150, "loss": 0.007098553702235222, "loss_nce": 0.0009346547303721309, "loss_density": 0.06163898855447769, "lr": 0.00023155386182614608, "grad_norm": 0.006979817990213633, "wall_ms": 15386596}
{"step": 16200, "loss": 0.005963470786809921, "loss_nce": 0.0006420728168450296, "loss_density": 0.05321398004889488, "lr": 0.0002302556281361739, "grad_norm": 0.005924359429627657, "wall_ms": 15433557}
{"step": 16250, "loss": 0.008870339952409267, "loss_nce": 0.002863031579181552, "loss_density": 0.06007308512926102, "lr": 0.0002289580960163955, "grad_norm": 0.05403449758887291, "wall_ms": 15480380}
{"step": 16300, "loss": 0.007936036214232445, "loss_nce": 0.0016310580540448427, "loss_density": 0.06304977834224701, "lr": 0.00022766130225537287, "grad_norm": 0.014045044779777527, "wall_ms": 15527306}
{"step": 16350, "loss": 0.00670943409204483, "loss_nce": 0.0015909748617559671, "loss_density": 0.05118458718061447, "lr": 0.00022636528362073372, "grad_norm": 0.014117484912276268, "wall_ms": 15574300}
{"step": 16400, "loss": 0.008126368746161461, "loss_nce": 0.0019036736339330673, "loss_density": 0.062226951122283936, "lr": 0.0002250700768581285, "grad_norm": 0.013550632633268833, "wall_ms": 15621240}
{"step": 16450, "loss": 0.008549542166292667, "loss_nce": 0.0010827170917764306, "loss_density": 0.07466825097799301, "lr": 0.0002237757186901894, "grad_norm": 0.009747457690536976, "wall_ms": 15668202}
{"step": 16500, "loss": 0.00876871682703495, "loss_nce": 0.0018759124213829637, "loss_density": 0.06892804056406021, "lr": 0.00022248224581548814, "grad_norm": 0.011363611556589603, "wall_ms": 15715101}
{"step": 16550, "loss": 0.006843441165983677, "loss_nce": 0.0006605758680962026, "loss_density": 0.061828650534152985, "lr": 0.00022118969490749626, "grad_norm": 0.004318533930927515, "wall_ms": 15762022}
{"step": 16600, "loss": 0.00636496813967824, "loss_nce": 0.0006420505233108997, "loss_density": 0.0572291761636734, "lr": 0.00021989810261354486, "grad_norm": 0.004091790411621332, "wall_ms": 15809071}
{"step": 16650, "loss": 0.006753780413419008, "loss_nce": 0.0009610900306142867, "loss_density": 0.057926904410123825, "lr": 0.00021860750555378593, "grad_norm": 0.006490187719464302, "wall_ms": 15856111}
{"step": 16700, "loss": 0.007091626990586519, "loss_nce": 0.0009705892880447209, "loss_density": 0.061210375279188156, "lr": 0.00021731794032015366, "grad_norm": 0.0060225315392017365, "wall_ms": 15903030}
{"step": 16750, "loss": 0.009922096505761147, "loss_nce": 0.003127611242234707, "loss_density": 0.06794485449790955, "lr": 0.0002160294434753274, "grad_norm": 0.028315896168351173, "wall_ms": 15950188}
{"step": 16800, "loss": 0.006993676535785198, "loss_nce": 0.0010340602602809668, "loss_density": 0.05959615856409073, "lr": 0.00021474205155169452, "grad_norm": 0.008039397187530994, "wall_ms": 15997191}
{"step": 16850, "loss": 0.007377899717539549, "loss_nce": 0.0006665074615739286, "loss_density": 0.06711392104625702, "lr": 0.00021345580105031493, "grad_norm": 0.009711962193250656, "wall_ms": 16044197}
{"step": 16900, "loss": 0.0066872029565274715, "loss_nce": 0.0005494182696565986, "loss_density": 0.06137784570455551, "lr": 0.00021217072843988637, "grad_norm": 0.004610911943018436, "wall_ms": 16091074}
{"step": 16950, "loss": 0.007090642116963863, "loss_nce": 0.001258129021152854, "loss_density": 0.058325134217739105, "lr": 0.00021088687015570988, "grad_norm": 0.010415498167276382, "wall_ms": 16138038}
{"step": 17000, "loss": 0.007101280614733696, "loss_nce": 0.0014619886642321944, "loss_density": 0.056392915546894073, "lr": 0.00020960426259865702, "grad_norm": 0.010728671215474606, "wall_ms": 16185177}
{"step": 17050, "loss": 0.006914140656590462, "loss_nce": 0.0011158926645293832, "loss_density": 0.05798247829079628, "lr": 0.0002083229421341381, "grad_norm": 0.00728565314784646, "wall_ms": 16232649}
{"step": 17100, "loss": 0.007059849798679352, "loss_nce": 0.001317809335887432, "loss_density": 0.05742040276527405, "lr": 0.00020704294509107073, "grad_norm": 0.010445594787597656, "wall_ms": 16279653}
{"step": 17150, "loss": 0.005847645457834005, "loss_nce": 0.0004833713755942881, "loss_density": 0.053642742335796356, "lr": 0.0002057643077608499, "grad_norm": 0.0037246462889015675, "wall_ms": 16326566}
{"step": 17200, "loss": 0.006606005132198334, "loss_nce": 0.0006075297715142369, "loss_density": 0.059984754770994186, "lr": 0.000204487066396319, "grad_norm": 0.004122299142181873, "wall_ms": 16373441}
{"step": 17250, "loss": 0.006451596040278673, "loss_nce": 0.0005842213868163526, "loss_density": 0.05867374688386917, "lr": 0.00020321125721074203, "grad_norm": 0.0036252555437386036, "wall_ms": 16420381}
{"step": 17300, "loss": 0.008609703741967678, "loss_nce": 0.0023707521613687277, "loss_density": 0.06238951534032822, "lr": 0.00020193691637677703, "grad_norm": 0.023633871227502823, "wall_ms": 16467299}
{"step": 17350, "loss": 0.006758606992661953, "loss_nce": 0.0009949670638889074, "loss_density": 0.05763639509677887, "lr": 0.00020066408002544995, "grad_norm": 0.0071398853324353695, "wall_ms": 16514309}
{"step": 17400, "loss": 0.007994454354047775, "loss_nce": 0.002007911680266261, "loss_density": 0.059865426272153854, "lr": 0.00019939278424513075, "grad_norm": 0.017800347879529, "wall_ms": 16561331}
{"step": 17450, "loss": 0.010395096614956856, "loss_nce": 0.002119040582329035, "loss_density": 0.08276055008172989, "lr": 0.00019812306508051, "grad_norm": 0.018655454739928246, "wall_ms": 16608196}
{"step": 17500, "loss": 0.007751891855150461, "loss_nce": 0.0015924520557746291, "loss_density": 0.0615943968296051, "lr": 0.00019685495853157686, "grad_norm": 0.015085970982909203, "wall_ms": 16655134}
{"step": 17550, "loss": 0.0064188516698777676, "loss_nce": 0.0006776313530281186, "loss_density": 0.05741220340132713, "lr": 0.00019558850055259865, "grad_norm": 0.005431736819446087, "wall_ms": 16702049}
{"step": 17600, "loss": 0.007605631835758686, "loss_nce": 0.001227054512128234, "loss_density": 0.06378576904535294, "lr": 0.00019432372705110103, "grad_norm": 0.010940482839941978, "wall_ms": 16749032}
{"step": 17650, "loss": 0.00657217763364315, "loss_nce": 0.0012490367516875267, "loss_density": 0.05323140695691109, "lr": 0.00019306067388685013, "grad_norm": 0.008900723420083523, "wall_ms": 16796043}
{"step": 17700, "loss": 0.006091210059821606, "loss_nce": 0.0005253624403849244, "loss_density": 0.05565847456455231, "lr": 0.00019179937687083612, "grad_norm": 0.0032985908910632133, "wall_ms": 16843003}
{"step": 17750, "loss": 0.006845490075647831, "loss_nce": 0.0012674155877903104, "loss_density": 0.055780746042728424, "lr": 0.00019053987176425734, "grad_norm": 0.01444664690643549, "wall_ms": 16889923}
{"step": 17800, "loss": 0.006979185156524181, "loss_nce": 0.0009109487291425467, "loss_density": 0.06068236753344536, "lr": 0.00018928219427750666, "grad_norm": 0.00472467252984643, "wall_ms": 16936881}
{"step": 17850, "loss": 0.007000013720244169, "loss_nce": 0.0005857179639860988, "loss_density": 0.06414295732975006, "lr": 0.00018802638006915897, "grad_norm": 0.0036510436329990625, "wall_ms": 16984004}
{"step": 17900, "loss": 0.007230808027088642, "loss_nce": 0.000770293059758842, "loss_density": 0.06460514664649963, "lr": 0.00018677246474496027, "grad_norm": 0.004495722241699696, "wall_ms": 17030900}
{"step": 17950, "loss": 0.006211164873093367, "loss_nce": 0.00045271962881088257, "loss_density": 0.057584453374147415, "lr": 0.000185520483856818, "grad_norm": 0.004018337000161409, "wall_ms": 17077850}
{"step": 18000, "loss": 0.006310931406915188, "loss_nce": 0.0007379876915365458, "loss_density": 0.05572943389415741, "lr": 0.00018427047290179292, "grad_norm": 0.020094603300094604, "wall_ms": 17124772}
{"step": 18050, "loss": 0.007187731098383665, "loss_nce": 0.0009319917298853397, "loss_density": 0.0625573918223381, "lr": 0.00018302246732109298, "grad_norm": 0.008271140977740288, "wall_ms": 17171699}
{"step": 18100, "loss": 0.009112192317843437, "loss_nce": 0.003871740074828267, "loss_density": 0.05240451917052269, "lr": 0.00018177650249906816, "grad_norm": 0.04689745604991913, "wall_ms": 17218597}
{"step": 18150, "loss": 0.006147227715700865, "loss_nce": 0.000492190767545253, "loss_density": 0.056550368666648865, "lr": 0.00018053261376220774, "grad_norm": 0.0031331651844084263, "wall_ms": 17265630}
{"step": 18200, "loss": 0.007103693671524525, "loss_nce": 0.0011291871778666973, "loss_density": 0.05974506586790085, "lr": 0.00017929083637813792, "grad_norm": 0.009897410869598389, "wall_ms": 17312625}
{"step": 18250, "loss": 0.0062137809582054615, "loss_nce": 0.00051103625446558, "loss_density": 0.05702744424343109, "lr": 0.00017805120555462262, "grad_norm": 0.005287615582346916, "wall_ms": 17359605}
{"step": 18300, "loss": 0.0067392876371741295, "loss_nce": 0.0006214631139300764, "loss_density": 0.06117824465036392, "lr": 0.00017681375643856462, "grad_norm": 0.005261401645839214, "wall_ms": 17406619}
{"step": 18350, "loss": 0.007982578128576279, "loss_nce": 0.002543430542573333, "loss_density": 0.05439147353172302, "lr": 0.0001755785241150098, "grad_norm": 0.011956935748457909, "wall_ms": 17453577}
{"step": 18400, "loss": 0.0070215854793787, "loss_nce": 0.00075306068174541, "loss_density": 0.06268524378538132, "lr": 0.0001743455436061516, "grad_norm": 0.005228675436228514, "wall_ms": 17500553}
{"step": 18450, "loss": 0.006815400440245867, "loss_nce": 0.0009088543010875583, "loss_density": 0.05906546115875244, "lr": 0.00017311484987033845, "grad_norm": 0.010570665821433067, "wall_ms": 17547498}
{"step": 18500, "loss": 0.0062789819203317165, "loss_nce": 0.0006845881580375135, "loss_density": 0.05594393610954285, "lr": 0.00017188647780108258, "grad_norm": 0.004310059826821089, "wall_ms": 17594423}
{"step": 18550, "loss": 0.0069127813912928104, "loss_nce": 0.0009242696105502546, "loss_density": 0.059885114431381226, "lr": 0.0001706604622260708, "grad_norm": 0.008576016873121262, "wall_ms": 17641360}
{"step": 18600, "loss": 0.006617108825594187, "loss_nce": 0.0011279397876933217, "loss_density": 0.054891690611839294, "lr": 0.0001694368379061767, "grad_norm": 0.00902070663869381, "wall_ms": 17688274}
{"step": 18650, "loss": 0.006696867756545544, "loss_nce": 0.0007222624844871461, "loss_density": 0.05974604934453964, "lr": 0.0001682156395344754, "grad_norm": 0.004618598148226738, "wall_ms": 17735124}
{"step": 18700, "loss": 0.0067971982061862946, "loss_nce": 0.0008915565558709204, "loss_density": 0.059056416153907776, "lr": 0.0001669969017352596, "grad_norm": 0.006161034572869539, "wall_ms": 17782016}
{"step": 18750, "loss": 0.006371753755956888, "loss_nce": 0.0006951218820177019, "loss_density": 0.056766316294670105, "lr": 0.00016578065906305855, "grad_norm": 0.003912963904440403, "wall_ms": 17829042}
{"step": 18800, "loss": 0.006856996100395918, "loss_nce": 0.0008520004339516163, "loss_density": 0.06004995480179787, "lr": 0.00016456694600165746, "grad_norm": 0.006655808072537184, "wall_ms": 17875990}
{"step": 18850, "loss": 0.006107232067734003, "loss_nce": 0.000487475743284449, "loss_density": 0.05619756132364273, "lr": 0.00016335579696312016, "grad_norm": 0.004594695754349232, "wall_ms": 17922877}
{"step": 18900, "loss": 0.006857701577246189, "loss_nce": 0.0010161336977034807, "loss_density": 0.058415673673152924, "lr": 0.00016214724628681366, "grad_norm": 0.0056403204798698425, "wall_ms": 17970277}
{"step": 18950, "loss": 0.006651442497968674, "loss_nce": 0.0007395861903205514, "loss_density": 0.05911856144666672, "lr": 0.0001609413282384344, "grad_norm": 0.006628761999309063, "wall_ms": 18017273}
{"step": 19000, "loss": 0.007368803024291992, "loss_nce": 0.0009122232440859079, "loss_density": 0.06456579267978668, "lr": 0.00015973807700903647, "grad_norm": 0.00543864956125617, "wall_ms": 18064250}
{"step": 19050, "loss": 0.006888789124786854, "loss_nce": 0.0007066380348987877, "loss_density": 0.06182150915265083, "lr": 0.00015853752671406252, "grad_norm": 0.005641872063279152, "wall_ms": 18111233}
{"step": 19100, "loss": 0.006679086945950985, "loss_nce": 0.0006741684628650546, "loss_density": 0.060049183666706085, "lr": 0.00015733971139237615, "grad_norm": 0.00526820495724678, "wall_ms": 18158069}
{"step": 19150, "loss": 0.007392293773591518, "loss_nce": 0.0010960877407342196, "loss_density": 0.06296205520629883, "lr": 0.00015614466500529748, "grad_norm": 0.007782908622175455, "wall_ms": 18205104}
{"step": 19200, "loss": 0.0063201868906617165, "loss_nce": 0.0005218122969381511, "loss_density": 0.05798374488949776, "lr": 0.0001549524214356394, "grad_norm": 0.0034992380533367395, "wall_ms": 18252044}
{"step": 19250, "loss": 0.007008508779108524, "loss_nce": 0.0018881495343521237, "loss_density": 0.051203593611717224, "lr": 0.00015376301448674743, "grad_norm": 0.010736431926488876, "wall_ms": 18298945}
{"step": 19300, "loss": 0.006584686227142811, "loss_nce": 0.000657483353279531, "loss_density": 0.059272028505802155, "lr": 0.0001525764778815412, "grad_norm": 0.0036715620663017035, "wall_ms": 18345855}
{"step": 19350, "loss": 0.006729534827172756, "loss_nce": 0.0012687622802332044, "loss_density": 0.05460772663354874, "lr": 0.0001513928452615582, "grad_norm": 0.008031820878386497, "wall_ms": 18392804}
{"step": 19400, "loss": 0.006794034503400326, "loss_nce": 0.0007508014095947146, "loss_density": 0.06043232977390289, "lr": 0.00015021215018600032, "grad_norm": 0.005708271637558937, "wall_ms": 18439894}
{"step": 19450, "loss": 0.005978460423648357, "loss_nce": 0.0008563300943933427, "loss_density": 0.05122130364179611, "lr": 0.00014903442613078182, "grad_norm": 0.0049312482587993145, "wall_ms": 18486762}
{"step": 19500, "loss": 0.007722364738583565, "loss_nce": 0.0017687364015728235, "loss_density": 0.05953628197312355, "lr": 0.00014785970648758045, "grad_norm": 0.0283415038138628, "wall_ms": 18533658}
{"step": 19550, "loss": 0.007334552239626646, "loss_nce": 0.0015847726026549935, "loss_density": 0.057497795671224594, "lr": 0.00014668802456289096, "grad_norm": 0.015043336898088455, "wall_ms": 18580771}
{"step": 19600, "loss": 0.006163605023175478, "loss_nce": 0.0005681904149241745, "loss_density": 0.05595414340496063, "lr": 0.00014551941357708018, "grad_norm": 0.0041482532396912575, "wall_ms": 18627671}
{"step": 19650, "loss": 0.007707681506872177, "loss_nce": 0.0018575958674773574, "loss_density": 0.058500856161117554, "lr": 0.00014435390666344588, "grad_norm": 0.008866675198078156, "wall_ms": 18674581}
{"step": 19700, "loss": 0.009154438972473145, "loss_nce": 0.00346420519053936, "loss_density": 0.056902334094047546, "lr": 0.00014319153686727663, "grad_norm": 0.023494591936469078, "wall_ms": 18721564}
{"step": 19750, "loss": 0.006624611560255289, "loss_nce": 0.0006633137236349285, "loss_density": 0.059612978249788284, "lr": 0.00014203233714491515, "grad_norm": 0.0039849355816841125, "wall_ms": 18768511}
{"step": 19800, "loss": 0.007202767767012119, "loss_nce": 0.0016068948898464441, "loss_density": 0.05595872923731804, "lr": 0.0001408763403628243, "grad_norm": 0.02761632390320301, "wall_ms": 18815500}
{"step": 19850, "loss": 0.00652981037274003, "loss_nce": 0.001164612709544599, "loss_density": 0.053651973605155945, "lr": 0.00013972357929665464, "grad_norm": 0.00791474524885416, "wall_ms": 18862486}
{"step": 19900, "loss": 0.006248202174901962, "loss_nce": 0.0007163963746279478, "loss_density": 0.05531805381178856, "lr": 0.00013857408663031534, "grad_norm": 0.005431325174868107, "wall_ms": 18909456}
{"step": 19950, "loss": 0.010920053347945213, "loss_nce": 0.0038157720118761063, "loss_density": 0.07104280591011047, "lr": 0.0001374278949550476, "grad_norm": 0.034225426614284515, "wall_ms": 18956411}
{"step": 20000, "loss": 0.00768383638933301, "loss_nce": 0.0014343730872496963, "loss_density": 0.062494631856679916, "lr": 0.00013628503676850075, "grad_norm": 0.008080494590103626, "wall_ms": 19003455}
{"step": 20050, "loss": 0.00709037808701396, "loss_nce": 0.000660267542116344, "loss_density": 0.06430110335350037, "lr": 0.00013514554447381037, "grad_norm": 0.01440053153783083, "wall_ms": 19141019}
{"step": 20100, "loss": 0.006980221718549728, "loss_nce": 0.0011453503975644708, "loss_density": 0.05834871530532837, "lr": 0.00013400945037868, "grad_norm": 0.00828948151320219, "wall_ms": 19187828}
{"step": 20150, "loss": 0.006820219103246927, "loss_nce": 0.0010224109282717109, "loss_density": 0.057978078722953796, "lr": 0.00013287678669446487, "grad_norm": 0.004929295741021633, "wall_ms": 19234693}
{"step": 20200, "loss": 0.006416718941181898, "loss_nce": 0.0007535183103755116, "loss_density": 0.05663200840353966, "lr": 0.00013174758553525907, "grad_norm": 0.004852146375924349, "wall_ms": 19281681}
{"step": 20250, "loss": 0.006008281372487545, "loss_nce": 0.000572253717109561, "loss_density": 0.0543602779507637, "lr": 0.00013062187891698429, "grad_norm": 0.0029157239478081465, "wall_ms": 19328628}
{"step": 20300, "loss": 0.005998489912599325, "loss_nce": 0.0005625641206279397, "loss_density": 0.054359257221221924, "lr": 0.0001294996987564826, "grad_norm": 0.0041414774022996426, "wall_ms": 19375602}
{"step": 20350, "loss": 0.007226848043501377, "loss_nce": 0.000991232693195343, "loss_density": 0.06235615164041519, "lr": 0.0001283810768706116, "grad_norm": 0.00842616893351078, "wall_ms": 19422587}
{"step": 20400, "loss": 0.00602108146995306, "loss_nce": 0.0006873211241327226, "loss_density": 0.05333760380744934, "lr": 0.00012726604497534185, "grad_norm": 0.003825446357950568, "wall_ms": 19469594}
{"step": 20450, "loss": 0.007939870469272137, "loss_nce": 0.0016470237169414759, "loss_density": 0.0629284679889679, "lr": 0.00012615463468485828, "grad_norm": 0.008395187556743622, "wall_ms": 19516531}
{"step": 20500, "loss": 0.006801958195865154, "loss_nce": 0.0007480935892090201, "loss_density": 0.0605386458337307, "lr": 0.00012504687751066311, "grad_norm": 0.004233636427670717, "wall_ms": 19563503}
{"step": 20550, "loss": 0.00939013622701168, "loss_nce": 0.003848749678581953, "loss_density": 0.0554138645529747, "lr": 0.0001239428048606829, "grad_norm": 0.06704546511173248, "wall_ms": 19610453}
{"step": 20600, "loss": 0.006330915726721287, "loss_nce": 0.0006829749327152967, "loss_density": 0.05647940933704376, "lr": 0.0001228424480383781, "grad_norm": 0.005155540537089109, "wall_ms": 19657392}
{"step": 20650, "loss": 0.0061920322477817535, "loss_nce": 0.0011568076442927122, "loss_density": 0.050352245569229126, "lr": 0.00012174583824185521, "grad_norm": 0.008460639044642448, "wall_ms": 19704339}
{"step": 20700, "loss": 0.006421769503504038, "loss_nce": 0.000665004423353821, "loss_density": 0.05756764858961105, "lr": 0.0001206530065629823, "grad_norm": 0.004339013714343309, "wall_ms": 19751435}
{"step": 20750, "loss": 0.0073219770565629005, "loss_nce": 0.0009071975364349782, "loss_density": 0.06414779275655746, "lr": 0.00011956398398650769, "grad_norm": 0.00571768032386899, "wall_ms": 19798441}
{"step": 20800, "loss": 0.005966292694211006, "loss_nce": 0.0006504731718450785, "loss_density": 0.05315819010138512, "lr": 0.00011847880138918105, "grad_norm": 0.003931056708097458, "wall_ms": 19845417}
{"step": 20850, "loss": 0.005940873175859451, "loss_nce": 0.0004488630802370608, "loss_density": 0.054920099675655365, "lr": 0.0001173974895388786, "grad_norm": 0.0029208874329924583, "wall_ms": 19892397}
{"step": 20900, "loss": 0.010526279918849468, "loss_nce": 0.004446729086339474, "loss_density": 0.060795508325099945, "lr": 0.00011632007909373003, "grad_norm": 0.11454721540212631, "wall_ms": 19939483}
{"step": 20950, "loss": 0.006337761878967285, "loss_nce": 0.0013338204007595778, "loss_density": 0.05003941059112549, "lr": 0.00011524660060124972, "grad_norm": 0.006422633305191994, "wall_ms": 19986357}
{"step": 21000, "loss": 0.006494168192148209, "loss_nce": 0.000889073999132961, "loss_density": 0.05605094134807587, "lr": 0.00011417708449747033, "grad_norm": 0.009113800711929798, "wall_ms": 20033281}
{"step": 21050, "loss": 0.00675317645072937, "loss_nce": 0.0007662205025553703, "loss_density": 0.05986955761909485, "lr": 0.0001131115611060804, "grad_norm": 0.006659817881882191, "wall_ms": 20080184}
{"step": 21100, "loss": 0.006331936456263065, "loss_nce": 0.0008664796478115022, "loss_density": 0.0546545647084713, "lr": 0.00011205006063756384, "grad_norm": 0.005796113051474094, "wall_ms": 20127134}
{"step": 21150, "loss": 0.0077196815982460976, "loss_nce": 0.0023351036943495274, "loss_density": 0.05384577810764313, "lr": 0.0001109926131883441, "grad_norm": 0.02041998878121376, "wall_ms": 20174131}
{"step": 21200, "loss": 0.006373976822942495, "loss_nce": 0.0004878183826804161, "loss_density": 0.05886158347129822, "lr": 0.00010993924873993015, "grad_norm": 0.0031665200367569923, "wall_ms": 20221200}
{"step": 21250, "loss": 0.006776287220418453, "loss_nce": 0.0007635157089680433, "loss_density": 0.06012771278619766, "lr": 0.00010888999715806712, "grad_norm": 0.007771816570311785, "wall_ms": 20268105}
{"step": 21300, "loss": 0.006920497864484787, "loss_nce": 0.0011191812809556723, "loss_density": 0.05801316723227501, "lr": 0.00010784488819188905, "grad_norm": 0.007009773049503565, "wall_ms": 20315088}
{"step": 21350, "loss": 0.00721000786870718, "loss_nce": 0.001352209015749395, "loss_density": 0.05857798457145691, "lr": 0.00010680395147307554, "grad_norm": 0.010731725953519344, "wall_ms": 20362047}
{"step": 21400, "loss": 0.006414399947971106, "loss_nce": 0.0007586244610138237, "loss_density": 0.056557752192020416, "lr": 0.00010576721651501157, "grad_norm": 0.007479887455701828, "wall_ms": 20408934}
{"step": 21450, "loss": 0.006501419469714165, "loss_nce": 0.0007530783186666667, "loss_density": 0.05748341232538223, "lr": 0.0001047347127119511, "grad_norm": 0.004886420909315348, "wall_ms": 20455930}
{"step": 21500, "loss": 0.006107730325311422, "loss_nce": 0.0003885794139932841, "loss_density": 0.057191506028175354, "lr": 0.000103706469338183, "grad_norm": 0.002795459935441613, "wall_ms": 20502870}
{"step": 21550, "loss": 0.006769115570932627, "loss_nce": 0.0006200540810823441, "loss_density": 0.06149061396718025, "lr": 0.00010268251554720157, "grad_norm": 0.005693882238119841, "wall_ms": 20549926}
{"step": 21600, "loss": 0.007714797742664814, "loss_nce": 0.0015523696783930063, "loss_density": 0.061624281108379364, "lr": 0.00010166288037087958, "grad_norm": 0.007684016600251198, "wall_ms": 20596816}
{"step": 21650, "loss": 0.006368126720190048, "loss_nce": 0.0006872869562357664, "loss_density": 0.05680840089917183, "lr": 0.00010064759271864573, "grad_norm": 0.006036403588950634, "wall_ms": 20643697}
{"step": 21700, "loss": 0.007236837409436703, "loss_nce": 0.0016427282243967056, "loss_density": 0.05594108998775482, "lr": 9.963668137666424e-05, "grad_norm": 0.02109336666762829, "wall_ms": 20690571}
{"step": 21750, "loss": 0.005813266150653362, "loss_nce": 0.0005354822496883571, "loss_density": 0.052777841687202454, "lr": 9.863017500701914e-05, "grad_norm": 0.004413963295519352, "wall_ms": 20737492}
{"step": 21800, "loss": 0.0068160127848386765, "loss_nce": 0.0013289470225572586, "loss_density": 0.05487065762281418, "lr": 9.762810214690147e-05, "grad_norm": 0.010212553665041924, "wall_ms": 20784363}
{"step": 21850, "loss": 0.006095064803957939, "loss_nce": 0.000574188306927681, "loss_density": 0.05520876497030258, "lr": 9.663049120780048e-05, "grad_norm": 0.005133197642862797, "wall_ms": 20831244}
{"step": 21900, "loss": 0.006259178277105093, "loss_nce": 0.0009277106146328151, "loss_density": 0.053314678370952606, "lr": 9.563737047469745e-05, "grad_norm": 0.006387969013303518, "wall_ms": 20878222}
{"step": 21950, "loss": 0.0076219988986849785, "loss_nce": 0.001759199658408761, "loss_density": 0.05862799286842346, "lr": 9.464876810526448e-05, "grad_norm": 0.008707442320883274, "wall_ms": 20925275}
{"step": 22000, "loss": 0.0071866437792778015, "loss_nce": 0.0009816070087254047, "loss_density": 0.062050364911556244, "lr": 9.366471212906554e-05, "grad_norm": 0.006244616582989693, "wall_ms": 20972224}
{"step": 22050, "loss": 0.0067846630699932575, "loss_nce": 0.0010909056290984154, "loss_density": 0.056937575340270996, "lr": 9.2685230446762e-05, "grad_norm": 0.01057687122374773, "wall_ms": 21019174}
{"step": 22100, "loss": 0.0065194061025977135, "loss_nce": 0.0008437051437795162, "loss_density": 0.05675701051950455, "lr": 9.171035082932176e-05, "grad_norm": 0.0071340445429086685, "wall_ms": 21066188}
{"step": 22150, "loss": 0.006534646265208721, "loss_nce": 0.0008453469490632415, "loss_density": 0.056892991065979004, "lr": 9.074010091723151e-05, "grad_norm": 0.007481150794774294, "wall_ms": 21113135}
{"step": 22200, "loss": 0.006948172114789486, "loss_nce": 0.0010715320240706205, "loss_density": 0.05876639857888222, "lr": 8.977450821971316e-05, "grad_norm": 0.006452243775129318, "wall_ms": 21160219}
{"step": 22250, "loss": 0.006156211718916893, "loss_nce": 0.0004383557243272662, "loss_density": 0.0571785569190979, "lr": 8.881360011394404e-05, "grad_norm": 0.0032746056094765663, "wall_ms": 21207222}
{"step": 22300, "loss": 0.007065990008413792, "loss_nce": 0.0009286762215197086, "loss_density": 0.061373136937618256, "lr": 8.785740384428066e-05, "grad_norm": 0.008256956934928894, "wall_ms": 21254200}
{"step": 22350, "loss": 0.006294493097811937, "loss_nce": 0.0007932921871542931, "loss_density": 0.05501201003789902, "lr": 8.690594652148597e-05, "grad_norm": 0.0072516086511313915, "wall_ms": 21301261}
{"step": 22400, "loss": 0.006394101306796074, "loss_nce": 0.0005778863560408354, "loss_density": 0.058162152767181396, "lr": 8.5959255121961e-05, "grad_norm": 0.009291511960327625, "wall_ms": 21348269}
{"step": 22450, "loss": 0.008027876727283001, "loss_nce": 0.0024281474761664867, "loss_density": 0.05599729344248772, "lr": 8.501735648697979e-05, "grad_norm": 0.036759454756975174, "wall_ms": 21395257}
{"step": 22500, "loss": 0.0056660789996385574, "loss_nce": 0.0004867161042056978, "loss_density": 0.05179362744092941, "lr": 8.408027732192878e-05, "grad_norm": 0.0024433243088424206, "wall_ms": 21442264}
{"step": 22550, "loss": 0.006839660461992025, "loss_nce": 0.0010856169974431396, "loss_density": 0.05754043161869049, "lr": 8.3148044195549e-05, "grad_norm": 0.006786811631172895, "wall_ms": 21489186}
{"step": 22600, "loss": 0.0066774506121873856, "loss_nce": 0.0008460332173854113, "loss_density": 0.05831417068839073, "lr": 8.22206835391832e-05, "grad_norm": 0.007732451893389225, "wall_ms": 21536064}
{"step": 22650, "loss": 0.007282985839992762, "loss_nce": 0.0015503973700106144, "loss_density": 0.05732588469982147, "lr": 8.129822164602656e-05, "grad_norm": 0.016251148656010628, "wall_ms": 21583080}
{"step": 22700, "loss": 0.005962011869996786, "loss_nce": 0.00044733285903930664, "loss_density": 0.05514679104089737, "lr": 8.038068467038072e-05, "grad_norm": 0.0025882197078317404, "wall_ms": 21630472}
{"step": 22750, "loss": 0.006266463547945023, "loss_nce": 0.0006404739106073976, "loss_density": 0.056259896606206894, "lr": 7.946809862691277e-05, "grad_norm": 0.005936034023761749, "wall_ms": 21677309}
{"step": 22800, "loss": 0.007227722555398941, "loss_nce": 0.0011684461496770382, "loss_density": 0.060592763125896454, "lr": 7.856048938991731e-05, "grad_norm": 0.008087950758635998, "wall_ms": 21724305}
{"step": 22850, "loss": 0.006724216043949127, "loss_nce": 0.0010534944012761116, "loss_density": 0.05670721456408501, "lr": 7.76578826925828e-05, "grad_norm": 0.008505855686962605, "wall_ms": 21771366}
{"step": 22900, "loss": 0.03205405920743942, "loss_nce": 0.026399044319987297, "loss_density": 0.05655013769865036, "lr": 7.676030412626244e-05, "grad_norm": 0.2132941633462906, "wall_ms": 21818249}
{"step": 22950, "loss": 0.006371106021106243, "loss_nce": 0.0004338782746344805, "loss_density": 0.05937227979302406, "lr": 7.5867779139748e-05, "grad_norm": 0.003453923622146249, "wall_ms": 21865069}
{"step": 23000, "loss": 0.00628998689353466, "loss_nce": 0.0006502105388790369, "loss_density": 0.05639776214957237, "lr": 7.498033303854857e-05, "grad_norm": 0.005262316670268774, "wall_ms": 21911965}
{"step": 23050, "loss": 0.0065062278881669044, "loss_nce": 0.0008613744284957647, "loss_density": 0.05644853785634041, "lr": 7.409799098417297e-05, "grad_norm": 0.00532006798312068, "wall_ms": 21958909}
{"step": 23100, "loss": 0.006412321235984564, "loss_nce": 0.0005080483970232308, "loss_density": 0.05904272571206093, "lr": 7.322077799341671e-05, "grad_norm": 0.005469319876283407, "wall_ms": 22005821}
{"step": 23150, "loss": 0.006837925873696804, "loss_nce": 0.000801039335783571, "loss_density": 0.060368865728378296, "lr": 7.234871893765208e-05, "grad_norm": 0.005664667580276728, "wall_ms": 22052723}
{"step": 23200, "loss": 0.006101008038967848, "loss_nce": 0.0009224617970176041, "loss_density": 0.051785461604595184, "lr": 7.148183854212346e-05, "grad_norm": 0.006339833606034517, "wall_ms": 22099626}
{"step": 23250, "loss": 0.0059376978315413, "loss_nce": 0.0004706084728240967, "loss_density": 0.05467089265584946, "lr": 7.062016138524607e-05, "grad_norm": 0.003358614630997181, "wall_ms": 22146541}
{"step": 23300, "loss": 0.006405569612979889, "loss_nce": 0.0007043743971735239, "loss_density": 0.05701194703578949, "lr": 6.976371189790929e-05, "grad_norm": 0.00438335444778204, "wall_ms": 22193422}
{"step": 23350, "loss": 0.0062721408903598785, "loss_nce": 0.0007950304425321519, "loss_density": 0.05477110296487808, "lr": 6.891251436278378e-05, "grad_norm": 0.006099947262555361, "wall_ms": 22240224}
{"step": 23400, "loss": 0.007197768427431583, "loss_nce": 0.0007838085293769836, "loss_density": 0.06413959711790085, "lr": 6.806659291363291e-05, "grad_norm": 0.005765701178461313, "wall_ms": 22287158}
{"step": 23450, "loss": 0.005413845647126436, "loss_nce": 0.00045159313594922423, "loss_density": 0.04962252452969551, "lr": 6.722597153462912e-05, "grad_norm": 0.003655970562249422, "wall_ms": 22334058}
{"step": 23500, "loss": 0.008133882656693459, "loss_nce": 0.0026219116989523172, "loss_density": 0.05511970818042755, "lr": 6.639067405967293e-05, "grad_norm": 0.0245906300842762, "wall_ms": 22381012}
{"step": 23550, "loss": 0.006836704909801483, "loss_nce": 0.0008823308162391186, "loss_density": 0.05954374000430107, "lr": 6.556072417171819e-05, "grad_norm": 0.01113450713455677, "wall_ms": 22427948}
{"step": 23600, "loss": 0.007624995894730091, "loss_nce": 0.0012680903309956193, "loss_density": 0.06356905400753021, "lr": 6.473614540209977e-05, "grad_norm": 0.011653144843876362, "wall_ms": 22474777}
{"step": 23650, "loss": 0.005841940175741911, "loss_nce": 0.00048712448915466666, "loss_density": 0.05354815721511841, "lr": 6.391696112986686e-05, "grad_norm": 0.004006264731287956, "wall_ms": 22521849}
{"step": 23700, "loss": 0.00621818657964468, "loss_nce": 0.0006330477190203965, "loss_density": 0.05585138499736786, "lr": 6.310319458112e-05, "grad_norm": 0.005189455579966307, "wall_ms": 22568754}
{"step": 23750, "loss": 0.007216814439743757, "loss_nce": 0.0008690343238413334, "loss_density": 0.06347779929637909, "lr": 6.22948688283526e-05, "grad_norm": 0.00974116288125515, "wall_ms": 22615724}
{"step": 23800, "loss": 0.006364001892507076, "loss_nce": 0.0006954402197152376, "loss_density": 0.056685611605644226, "lr": 6.14920067897966e-05, "grad_norm": 0.004460692871361971, "wall_ms": 22662618}
{"step": 23850, "loss": 0.006470919586718082, "loss_nce": 0.0007033679285086691, "loss_density": 0.05767551809549332, "lr": 6.0694631228772884e-05, "grad_norm": 0.005558391101658344, "wall_ms": 22709542}
{"step": 23900, "loss": 0.006092695519328117, "loss_nce": 0.0005510489572770894, "loss_density": 0.05541646480560303, "lr": 5.990276475304566e-05, "grad_norm": 0.0034864041954278946, "wall_ms": 22756421}
{"step": 23950, "loss": 0.005423305090516806, "loss_nce": 0.000418199400883168, "loss_density": 0.05005105584859848, "lr": 5.911642981418183e-05, "grad_norm": 0.0026263047475367785, "wall_ms": 22803432}
{"step": 24000, "loss": 0.006038932129740715, "loss_nce": 0.0003821108548436314, "loss_density": 0.0565682128071785, "lr": 5.8335648706913954e-05, "grad_norm": 0.002418599557131529, "wall_ms": 22850352}
{"step": 24050, "loss": 0.0055016446858644485, "loss_nce": 0.0006408269982784986, "loss_density": 0.04860817641019821, "lr": 5.756044356850848e-05, "grad_norm": 0.0036536992993205786, "wall_ms": 22897286}
{"step": 24100, "loss": 0.005890808068215847, "loss_nce": 0.00036187609657645226, "loss_density": 0.055289316922426224, "lr": 5.67908363781379e-05, "grad_norm": 0.0023838195484131575, "wall_ms": 22944154}
{"step": 24150, "loss": 0.006071016658097506, "loss_nce": 0.0006446865736506879, "loss_density": 0.05426330119371414, "lr": 5.602684895625786e-05, "grad_norm": 0.0038456495385617018, "wall_ms": 22990997}
{"step": 24200, "loss": 0.00821033213287592, "loss_nce": 0.0020487322472035885, "loss_density": 0.06161599978804588, "lr": 5.526850296398808e-05, "grad_norm": 0.025571053847670555, "wall_ms": 23037894}
{"step": 24250, "loss": 0.0063215624541044235, "loss_nce": 0.0004477432812564075, "loss_density": 0.058738190680742264, "lr": 5.451581990249859e-05, "grad_norm": 0.0028186729177832603, "wall_ms": 23085130}
{"step": 24300, "loss": 0.006823626346886158, "loss_nce": 0.0011759669287130237, "loss_density": 0.056476593017578125, "lr": 5.376882111239975e-05, "grad_norm": 0.013809463009238243, "wall_ms": 23132029}
{"step": 24350, "loss": 0.00645598815754056, "loss_nce": 0.0005904246354475617, "loss_density": 0.05865563452243805, "lr": 5.3027527773137604e-05, "grad_norm": 0.0039002641569823027, "wall_ms": 23178904}
{"step": 24400, "loss": 0.006212607026100159, "loss_nce": 0.0007718394044786692, "loss_density": 0.05440767854452133, "lr": 5.229196090239301e-05, "grad_norm": 0.005529473535716534, "wall_ms": 23225848}
{"step": 24450, "loss": 0.0073875607922673225, "loss_nce": 0.0013940338976681232, "loss_density": 0.05993526801466942, "lr": 5.156214135548585e-05, "grad_norm": 0.013861830346286297, "wall_ms": 23272700}
{"step": 24500, "loss": 0.008139865472912788, "loss_nce": 0.0023998874239623547, "loss_density": 0.05739977955818176, "lr": 5.083808982478382e-05, "grad_norm": 0.03765329718589783, "wall_ms": 23319581}
{"step": 24550, "loss": 0.006337213329970837, "loss_nce": 0.000784296658821404, "loss_density": 0.055529166013002396, "lr": 5.0119826839115736e-05, "grad_norm": 0.006301156710833311, "wall_ms": 23366440}
{"step": 24600, "loss": 0.006632194388657808, "loss_nce": 0.00062124605756253, "loss_density": 0.06010948121547699, "lr": 4.940737276318935e-05, "grad_norm": 0.004484266508370638, "wall_ms": 23413373}
{"step": 24650, "loss": 0.0069103059358894825, "loss_nce": 0.0011617772979661822, "loss_density": 0.05748528242111206, "lr": 4.870074779701407e-05, "grad_norm": 0.007629831787198782, "wall_ms": 23460318}
{"step": 24700, "loss": 0.006280449219048023, "loss_nce": 0.0014146529138088226, "loss_density": 0.04865796118974686, "lr": 4.799997197532813e-05, "grad_norm": 0.008796327747404575, "wall_ms": 23507206}
{"step": 24750, "loss": 0.006616602651774883, "loss_nce": 0.0005507979076355696, "loss_density": 0.060658048838377, "lr": 4.7305065167030635e-05, "grad_norm": 0.0032714270055294037, "wall_ms": 23554129}
{"step": 24800, "loss": 0.007723968476057053, "loss_nce": 0.0014708886155858636, "loss_density": 0.06253080070018768, "lr": 4.661604707461838e-05, "grad_norm": 0.013504082337021828, "wall_ms": 23601112}
{"step": 24850, "loss": 0.007562657818198204, "loss_nce": 0.0008868507575243711, "loss_density": 0.06675807386636734, "lr": 4.593293723362686e-05, "grad_norm": 0.006571303587406874, "wall_ms": 23648000}
{"step": 24900, "loss": 0.00691720237955451, "loss_nce": 0.0007257937104441226, "loss_density": 0.06191408634185791, "lr": 4.525575501207662e-05, "grad_norm": 0.004248281940817833, "wall_ms": 23694823}
{"step": 24950, "loss": 0.006338770501315594, "loss_nce": 0.0011751506244763732, "loss_density": 0.05163619667291641, "lr": 4.4584519609924226e-05, "grad_norm": 0.020449072122573853, "wall_ms": 23741663}
{"step": 25000, "loss": 0.006350125651806593, "loss_nce": 0.0006202341755852103, "loss_density": 0.05729891359806061, "lr": 4.391925005851749e-05, "grad_norm": 0.004709034226834774, "wall_ms": 23788504}
{"step": 25050, "loss": 0.006035073660314083, "loss_nce": 0.0007332066306844354, "loss_density": 0.05301867052912712, "lr": 4.325996522005645e-05, "grad_norm": 0.006259765475988388, "wall_ms": 23908748}
{"step": 25100, "loss": 0.0059931217692792416, "loss_nce": 0.0006408671615645289, "loss_density": 0.053522542119026184, "lr": 4.260668378705803e-05, "grad_norm": 0.006953043397516012, "wall_ms": 23955551}
{"step": 25150, "loss": 0.006856861058622599, "loss_nce": 0.0009427600307390094, "loss_density": 0.05914100632071495, "lr": 4.195942428182631e-05, "grad_norm": 0.007573932874947786, "wall_ms": 24002452}
{"step": 25200, "loss": 0.006605486385524273, "loss_nce": 0.0010268368059769273, "loss_density": 0.05578649789094925, "lr": 4.1318205055927574e-05, "grad_norm": 0.00961252860724926, "wall_ms": 24049230}
{"step": 25250, "loss": 0.006004382856190205, "loss_nce": 0.0005426392308436334, "loss_density": 0.05461743474006653, "lr": 4.068304428966948e-05, "grad_norm": 0.006797300186008215, "wall_ms": 24096251}
{"step": 25300, "loss": 0.007812189403921366, "loss_nce": 0.002120816148817539, "loss_density": 0.05691373348236084, "lr": 4.00539599915861e-05, "grad_norm": 0.03228509798645973, "wall_ms": 24143105}
{"step": 25350, "loss": 0.00668724812567234, "loss_nce": 0.0004951902665197849, "loss_density": 0.06192057952284813, "lr": 3.943096999792693e-05, "grad_norm": 0.003156198188662529, "wall_ms": 24190041}
{"step": 25400, "loss": 0.006458156742155552, "loss_nce": 0.0006801873096264899, "loss_density": 0.057779695838689804, "lr": 3.881409197215163e-05, "grad_norm": 0.0058455560356378555, "wall_ms": 24236941}
{"step": 25450, "loss": 0.006517811678349972, "loss_nce": 0.0006698983488604426, "loss_density": 0.058479130268096924, "lr": 3.82033434044287e-05, "grad_norm": 0.0051679452881217, "wall_ms": 24283803}
{"step": 25500, "loss": 0.005386806093156338, "loss_nce": 0.00039780649240128696, "loss_density": 0.04988999664783478, "lr": 3.759874161114e-05, "grad_norm": 0.002981061115860939, "wall_ms": 24330771}
{"step": 25550, "loss": 0.0062150429002940655, "loss_nce": 0.0005657393485307693, "loss_density": 0.056493036448955536, "lr": 3.700030373438951e-05, "grad_norm": 0.0033944363240152597, "wall_ms": 24377741}
{"step": 25600, "loss": 0.00592020945623517, "loss_nce": 0.0005542057333514094, "loss_density": 0.053660035133361816, "lr": 3.6408046741517626e-05, "grad_norm": 0.0036776885390281677, "wall_ms": 24424639}
{"step": 25650, "loss": 0.0075206151232123375, "loss_nce": 0.001859094831161201, "loss_density": 0.056615199893713, "lr": 3.582198742461967e-05, "grad_norm": 0.028680117800831795, "wall_ms": 24471687}
{"step": 25700, "loss": 0.006022260524332523, "loss_nce": 0.0009384781587868929, "loss_density": 0.05083782598376274, "lr": 3.5242142400070074e-05, "grad_norm": 0.010501752607524395, "wall_ms": 24518581}
{"step": 25750, "loss": 0.006317559164017439, "loss_nce": 0.0008261502953246236, "loss_density": 0.05491408705711365, "lr": 3.466852810805131e-05, "grad_norm": 0.010560303926467896, "wall_ms": 24565410}
{"step": 25800, "loss": 0.007090797647833824, "loss_nce": 0.0011281550396233797, "loss_density": 0.05962642282247543, "lr": 3.410116081208744e-05, "grad_norm": 0.015295940451323986, "wall_ms": 24612371}
{"step": 25850, "loss": 0.007321303244680166, "loss_nce": 0.00127743324264884, "loss_density": 0.06043870002031326, "lr": 3.3540056598583426e-05, "grad_norm": 0.0168415829539299, "wall_ms": 24659172}
{"step": 25900, "loss": 0.006852158345282078, "loss_nce": 0.0014298169407993555, "loss_density": 0.05422341451048851, "lr": 3.298523137636864e-05, "grad_norm": 0.0279566440731287, "wall_ms": 24706106}
{"step": 25950, "loss": 0.006186847575008869, "loss_nce": 0.0005691373953595757, "loss_density": 0.05617710202932358, "lr": 3.243670087624607e-05, "grad_norm": 0.00607132026925683, "wall_ms": 24752976}
{"step": 26000, "loss": 0.006812126375734806, "loss_nce": 0.0005849165027029812, "loss_density": 0.0622720941901207, "lr": 3.189448065054626e-05, "grad_norm": 0.0039723534137010574, "wall_ms": 24799775}
{"step": 26050, "loss": 0.006569509394466877, "loss_nce": 0.0006835199310444295, "loss_density": 0.058859892189502716, "lr": 3.1358586072686245e-05, "grad_norm": 0.005297433584928513, "wall_ms": 24846670}
{"step": 26100, "loss": 0.007269435096532106, "loss_nce": 0.0017098878743126988, "loss_density": 0.05559547245502472, "lr": 3.082903233673382e-05, "grad_norm": 0.011362873949110508, "wall_ms": 24893556}
{"step": 26150, "loss": 0.006293687038123608, "loss_nce": 0.0005863758269697428, "loss_density": 0.05707310885190964, "lr": 3.0305834456976593e-05, "grad_norm": 0.003719005500897765, "wall_ms": 24940543}
{"step": 26200, "loss": 0.006227030418813229, "loss_nce": 0.0004712793161161244, "loss_density": 0.05755750834941864, "lr": 2.9789007267496415e-05, "grad_norm": 0.003339580725878477, "wall_ms": 24987420}
{"step": 26250, "loss": 0.00686721783131361, "loss_nce": 0.0011385588441044092, "loss_density": 0.057286590337753296, "lr": 2.9278565421748823e-05, "grad_norm": 0.009792376309633255, "wall_ms": 25034534}
{"step": 26300, "loss": 0.007857928983867168, "loss_nce": 0.002263725735247135, "loss_density": 0.05594203248620033, "lr": 2.877452339214745e-05, "grad_norm": 0.039130549877882004, "wall_ms": 25081453}
{"step": 26350, "loss": 0.005907584447413683, "loss_nce": 0.0006800749688409269, "loss_density": 0.05227509140968323, "lr": 2.8276895469653687e-05, "grad_norm": 0.007771602366119623, "wall_ms": 25128392}
{"step": 26400, "loss": 0.006378096994012594, "loss_nce": 0.0006629059789702296, "loss_density": 0.057151909917593, "lr": 2.7785695763371615e-05, "grad_norm": 0.004828184377402067, "wall_ms": 25175318}
{"step": 26450, "loss": 0.006207307800650597, "loss_nce": 0.00037078862078487873, "loss_density": 0.05836518853902817, "lr": 2.7300938200147965e-05, "grad_norm": 0.0035220249556005, "wall_ms": 25222250}
{"step": 26500, "loss": 0.006239233072847128, "loss_nce": 0.0005443115951493382, "loss_density": 0.05694921314716339, "lr": 2.6822636524177088e-05, "grad_norm": 0.003925703931599855, "wall_ms": 25269752}
{"step": 26550, "loss": 0.006789609789848328, "loss_nce": 0.0008022113470360637, "loss_density": 0.05987398326396942, "lr": 2.635080429661152e-05, "grad_norm": 0.004537977743893862, "wall_ms": 25316647}
{"step": 26600, "loss": 0.008517177775502205, "loss_nce": 0.0029544481076300144, "loss_density": 0.05562729388475418, "lr": 2.5885454895177184e-05, "grad_norm": 0.017469538375735283, "wall_ms": 25363500}
{"step": 26650, "loss": 0.005699735134840012, "loss_nce": 0.0006433976232074201, "loss_density": 0.050563372671604156, "lr": 2.5426601513794476e-05, "grad_norm": 0.004530340898782015, "wall_ms": 25410397}
{"step": 26700, "loss": 0.007013377267867327, "loss_nce": 0.0013709845952689648, "loss_density": 0.05642392486333847, "lr": 2.497425716220377e-05, "grad_norm": 0.00962207280099392, "wall_ms": 25457207}
{"step": 26750, "loss": 0.006259081419557333, "loss_nce": 0.0007102068630047143, "loss_density": 0.055488742887973785, "lr": 2.4528434665596857e-05, "grad_norm": 0.006809397134929895, "wall_ms": 25504059}
{"step": 26800, "loss": 0.005687997676432133, "loss_nce": 0.00041581824189051986, "loss_density": 0.052721790969371796, "lr": 2.4089146664253166e-05, "grad_norm": 0.0034056096337735653, "wall_ms": 25550992}
{"step": 26850, "loss": 0.006239835172891617, "loss_nce": 0.000724192475900054, "loss_density": 0.05515642464160919, "lr": 2.3656405613181506e-05, "grad_norm": 0.006391784641891718, "wall_ms": 25598211}
{"step": 26900, "loss": 0.005899925250560045, "loss_nce": 0.0004897183971479535, "loss_density": 0.05410207062959671, "lr": 2.3230223781766764e-05, "grad_norm": 0.0033024486619979143, "wall_ms": 25645213}
{"step": 26950, "loss": 0.006406135391443968, "loss_nce": 0.0007634123903699219, "loss_density": 0.056427229195833206, "lr": 2.281061325342217e-05, "grad_norm": 0.005711990874260664, "wall_ms": 25692092}
{"step": 27000, "loss": 0.006285138428211212, "loss_nce": 0.0007446803501807153, "loss_density": 0.055404581129550934, "lr": 2.2397585925246587e-05, "grad_norm": 0.0039893039502203465, "wall_ms": 25739125}
{"step": 27050, "loss": 0.0065219225361943245, "loss_nce": 0.0005322371143847704, "loss_density": 0.05989684909582138, "lr": 2.1991153507687386e-05, "grad_norm": 0.0034476066939532757, "wall_ms": 25786138}
{"step": 27100, "loss": 0.006234966684132814, "loss_nce": 0.0007863747305236757, "loss_density": 0.05448591709136963, "lr": 2.1591327524208184e-05, "grad_norm": 0.0077614616602659225, "wall_ms": 25833062}
{"step": 27150, "loss": 0.00570044107735157, "loss_nce": 0.00046749389730393887, "loss_density": 0.052329473197460175, "lr": 2.119811931096232e-05, "grad_norm": 0.003795885480940342, "wall_ms": 25879992}
{"step": 27200, "loss": 0.006949670612812042, "loss_nce": 0.0015023485757410526, "loss_density": 0.05447322130203247, "lr": 2.0811540016471218e-05, "grad_norm": 0.011546770110726357, "wall_ms": 25926890}
{"step": 27250, "loss": 0.006116870325058699, "loss_nce": 0.0005428103031590581, "loss_density": 0.0557405985891819, "lr": 2.0431600601308665e-05, "grad_norm": 0.0033482906874269247, "wall_ms": 25973838}
{"step": 27300, "loss": 0.006667289882898331, "loss_nce": 0.0008291740668937564, "loss_density": 0.058381155133247375, "lr": 2.0058311837789623e-05, "grad_norm": 0.005942446645349264, "wall_ms": 26020844}
{"step": 27350, "loss": 0.0067707872949540615, "loss_nce": 0.000971663452219218, "loss_density": 0.05799124017357826, "lr": 1.9691684309665104e-05, "grad_norm": 0.008724605664610863, "wall_ms": 26068059}
{"step": 27400, "loss": 0.006094375159591436, "loss_nce": 0.0009032095549628139, "loss_density": 0.05191165208816528, "lr": 1.9331728411821957e-05, "grad_norm": 0.008548257872462273, "wall_ms": 26114901}
{"step": 27450, "loss": 0.006198795046657324, "loss_nce": 0.0007784701883792877, "loss_density": 0.054203249514102936, "lr": 1.8978454349988175e-05, "grad_norm": 0.004175092093646526, "wall_ms": 26161757}
{"step": 27500, "loss": 0.006299125030636787, "loss_nce": 0.001126012997701764, "loss_density": 0.05173111706972122, "lr": 1.86318721404436e-05, "grad_norm": 0.010596704669296741, "wall_ms": 26208629}
{"step": 27550, "loss": 0.005837615579366684, "loss_nce": 0.0006913883262313902, "loss_density": 0.05146227031946182, "lr": 1.8291991609735785e-05, "grad_norm": 0.0034169540740549564, "wall_ms": 26255560}
{"step": 27600, "loss": 0.006743425969034433, "loss_nce": 0.00143095210660249, "loss_density": 0.05312473699450493, "lr": 1.7958822394401554e-05, "grad_norm": 0.01738341897726059, "wall_ms": 26302628}
{"step": 27650, "loss": 0.006620537955313921, "loss_nce": 0.0008745626546442509, "loss_density": 0.0574597530066967, "lr": 1.7632373940693616e-05, "grad_norm": 0.006224347278475761, "wall_ms": 26349752}
{"step": 27700, "loss": 0.006118271965533495, "loss_nce": 0.0006143407663330436, "loss_density": 0.055039308965206146, "lr": 1.7312655504312922e-05, "grad_norm": 0.0038604331202805042, "wall_ms": 26396587}
{"step": 27750, "loss": 0.006385525688529015, "loss_nce": 0.0006848957855254412, "loss_density": 0.05700629577040672, "lr": 1.6999676150146084e-05, "grad_norm": 0.006292062345892191, "wall_ms": 26443410}
{"step": 27800, "loss": 0.02233259752392769, "loss_nce": 0.016465792432427406, "loss_density": 0.058668047189712524, "lr": 1.669344475200838e-05, "grad_norm": 0.23944632709026337, "wall_ms": 26490257}
{"step": 27850, "loss": 0.006308079697191715, "loss_nce": 0.0007244920707307756, "loss_density": 0.055835872888565063, "lr": 1.6393969992392252e-05, "grad_norm": 0.005986457224935293, "wall_ms": 26537042}
{"step": 27900, "loss": 0.005869849119335413, "loss_nce": 0.0003900756419170648, "loss_density": 0.054797735065221786, "lr": 1.6101260362221082e-05, "grad_norm": 0.002477882895618677, "wall_ms": 26583953}
{"step": 27950, "loss": 0.006499065086245537, "loss_nce": 0.0010094710160046816, "loss_density": 0.05489593744277954, "lr": 1.5815324160608417e-05, "grad_norm": 0.007652169559150934, "wall_ms": 26630824}
{"step": 28000, "loss": 0.006619682069867849, "loss_nce": 0.0012114965356886387, "loss_density": 0.05408185347914696, "lr": 1.5536169494622664e-05, "grad_norm": 0.02373824454843998, "wall_ms": 26677790}
{"step": 28050, "loss": 0.006812365725636482, "loss_nce": 0.0008480304386466742, "loss_density": 0.059643350541591644, "lr": 1.5263804279057375e-05, "grad_norm": 0.0045542968437075615, "wall_ms": 26724750}
{"step": 28100, "loss": 0.006989513989537954, "loss_nce": 0.000803902861662209, "loss_density": 0.061856113374233246, "lr": 1.4998236236206608e-05, "grad_norm": 0.004695939365774393, "wall_ms": 26771629}
{"step": 28150, "loss": 0.006343699060380459, "loss_nce": 0.00036832288606092334, "loss_density": 0.05975376069545746, "lr": 1.4739472895646162e-05, "grad_norm": 0.0024973799008876085, "wall_ms": 26818472}
{"step": 28200, "loss": 0.007261536084115505, "loss_nce": 0.0015309869777411222, "loss_density": 0.05730548873543739, "lr": 1.4487521594020037e-05, "grad_norm": 0.021549200639128685, "wall_ms": 26865442}
{"step": 28250, "loss": 0.006983479484915733, "loss_nce": 0.0009530282113701105, "loss_density": 0.060304515063762665, "lr": 1.4242389474832363e-05, "grad_norm": 0.006153210066258907, "wall_ms": 26912323}
{"step": 28300, "loss": 0.0067367698065936565, "loss_nce": 0.0008809001301415265, "loss_density": 0.05855869501829147, "lr": 1.4004083488244975e-05, "grad_norm": 0.010290965437889099, "wall_ms": 26959182}
{"step": 28350, "loss": 0.007401762530207634, "loss_nce": 0.0010418344754725695, "loss_density": 0.06359928101301193, "lr": 1.3772610390880274e-05, "grad_norm": 0.011849055998027325, "wall_ms": 27006067}
{"step": 28400, "loss": 0.00616123341023922, "loss_nce": 0.000962350401096046, "loss_density": 0.05198882892727852, "lr": 1.3547976745629686e-05, "grad_norm": 0.007022891193628311, "wall_ms": 27052976}
{"step": 28450, "loss": 0.006300416775047779, "loss_nce": 0.0010109294671565294, "loss_density": 0.05289487540721893, "lr": 1.333018892146754e-05, "grad_norm": 0.010424169711768627, "wall_ms": 27099801}
{"step": 28500, "loss": 0.007009359076619148, "loss_nce": 0.0006201440701261163, "loss_density": 0.06389214843511581, "lr": 1.3119253093270585e-05, "grad_norm": 0.01379472203552723, "wall_ms": 27146728}
{"step": 28550, "loss": 0.005753371398895979, "loss_nce": 0.00043487129732966423, "loss_density": 0.05318500101566315, "lr": 1.2915175241642836e-05, "grad_norm": 0.002734618028625846, "wall_ms": 27193633}
{"step": 28600, "loss": 0.006544017232954502, "loss_nce": 0.0009413135121576488, "loss_density": 0.056027039885520935, "lr": 1.2717961152746062e-05, "grad_norm": 0.006847742013633251, "wall_ms": 27240514}
{"step": 28650, "loss": 0.00645503168925643, "loss_nce": 0.0005470294854603708, "loss_density": 0.05908001959323883, "lr": 1.252761641813563e-05, "grad_norm": 0.0033056086394935846, "wall_ms": 27287425}
{"step": 28700, "loss": 0.005767285358160734, "loss_nce": 0.00035236982512287796, "loss_density": 0.05414915829896927, "lr": 1.2344146434602146e-05, "grad_norm": 0.0028908755630254745, "wall_ms": 27334311}
{"step": 28750, "loss": 0.006676977034658194, "loss_nce": 0.0005587565829046071, "loss_density": 0.06118220090866089, "lr": 1.2167556404018265e-05, "grad_norm": 0.0036081895232200623, "wall_ms": 27381265}
{"step": 28800, "loss": 0.006004132330417633, "loss_nce": 0.000655846786685288, "loss_density": 0.05348285660147667, "lr": 1.1997851333191282e-05, "grad_norm": 0.004221049137413502, "wall_ms": 27428236}
{"step": 28850, "loss": 0.006040005013346672, "loss_nce": 0.0006656883051618934, "loss_density": 0.053743164986371994, "lr": 1.183503603372121e-05, "grad_norm": 0.005687990691512823, "wall_ms": 27475190}
{"step": 28900, "loss": 0.00716153159737587, "loss_nce": 0.0010288723278790712, "loss_density": 0.06132659316062927, "lr": 1.1679115121864286e-05, "grad_norm": 0.0058783600106835365, "wall_ms": 27522074}
{"step": 28950, "loss": 0.007232952862977982, "loss_nce": 0.0018328321166336536, "loss_density": 0.054001208394765854, "lr": 1.1530093018402129e-05, "grad_norm": 0.010528255254030228, "wall_ms": 27568952}
{"step": 29000, "loss": 0.006644365377724171, "loss_nce": 0.0007388860103674233, "loss_density": 0.059054791927337646, "lr": 1.1387973948516413e-05, "grad_norm": 0.007411428727209568, "wall_ms": 27615939}
{"step": 29050, "loss": 0.008201144635677338, "loss_nce": 0.002817789325490594, "loss_density": 0.053833551704883575, "lr": 1.125276194166898e-05, "grad_norm": 0.025691736489534378, "wall_ms": 27662840}
{"step": 29100, "loss": 0.005754509009420872, "loss_nce": 0.0007389385136775672, "loss_density": 0.05015570670366287, "lr": 1.1124460831487752e-05, "grad_norm": 0.004145738668739796, "wall_ms": 27709680}
{"step": 29150, "loss": 0.005387268494814634, "loss_nce": 0.0004375826974865049, "loss_density": 0.0494968555867672, "lr": 1.1003074255657908e-05, "grad_norm": 0.0029811025597155094, "wall_ms": 27756661}
{"step": 29200, "loss": 0.00597479147836566, "loss_nce": 0.0005439231172204018, "loss_density": 0.054308682680130005, "lr": 1.0888605655818757e-05, "grad_norm": 0.006674340460449457, "wall_ms": 27803581}
{"step": 29250, "loss": 0.0067938766442239285, "loss_nce": 0.0010369722731411457, "loss_density": 0.05756904184818268, "lr": 1.078105827746622e-05, "grad_norm": 0.005418897606432438, "wall_ms": 27850382}
{"step": 29300, "loss": 0.00638198247179389, "loss_nce": 0.0005903207929804921, "loss_density": 0.05791661515831947, "lr": 1.0680435169860776e-05, "grad_norm": 0.003486177185550332, "wall_ms": 27897321}
{"step": 29350, "loss": 0.006277420558035374, "loss_nce": 0.0009228975977748632, "loss_density": 0.05354522913694382, "lr": 1.0586739185940974e-05, "grad_norm": 0.008347016759216785, "wall_ms": 27944309}
{"step": 29400, "loss": 0.0068495613522827625, "loss_nce": 0.00103433383628726, "loss_density": 0.058152273297309875, "lr": 1.0499972982242673e-05, "grad_norm": 0.010205763392150402, "wall_ms": 27991220}
{"step": 29450, "loss": 0.007998845539987087, "loss_nce": 0.003002300625666976, "loss_density": 0.049965448677539825, "lr": 1.0420139018823495e-05, "grad_norm": 0.12089824676513672, "wall_ms": 28038123}
{"step": 29500, "loss": 0.005919528193771839, "loss_nce": 0.0007277363329194486, "loss_density": 0.051917918026447296, "lr": 1.0347239559193323e-05, "grad_norm": 0.006082749459892511, "wall_ms": 28084984}
{"step": 29550, "loss": 0.005880477372556925, "loss_nce": 0.0006298840744420886, "loss_density": 0.05250593274831772, "lr": 1.0281276670249951e-05, "grad_norm": 0.004774052649736404, "wall_ms": 28131937}
{"step": 29600, "loss": 0.005502342712134123, "loss_nce": 0.00048444262938573956, "loss_density": 0.050178997218608856, "lr": 1.022225222222056e-05, "grad_norm": 0.002902636304497719, "wall_ms": 28178854}
{"step": 29650, "loss": 0.005942163988947868, "loss_nce": 0.0004298293497413397, "loss_density": 0.05512334406375885, "lr": 1.0170167888608693e-05, "grad_norm": 0.003726336406543851, "wall_ms": 28225821}
{"step": 29700, "loss": 0.006085713393986225, "loss_nce": 0.0008568849880248308, "loss_density": 0.05228827893733978, "lr": 1.0125025146146728e-05, "grad_norm": 0.007095999550074339, "wall_ms": 28272758}
{"step": 29750, "loss": 0.005899408832192421, "loss_nce": 0.0006217900663614273, "loss_density": 0.052776187658309937, "lr": 1.0086825274754108e-05, "grad_norm": 0.00941284466534853, "wall_ms": 28319696}
{"step": 29800, "loss": 0.00611557811498642, "loss_nce": 0.0006424849852919579, "loss_density": 0.05473092943429947, "lr": 1.0055569357501058e-05, "grad_norm": 0.005190265364944935, "wall_ms": 28366539}
{"step": 29850, "loss": 0.006907371338456869, "loss_nce": 0.0008599002030678093, "loss_density": 0.060474708676338196, "lr": 1.0031258280577722e-05, "grad_norm": 0.00799440685659647, "wall_ms": 28413465}
{"step": 29900, "loss": 0.006278254557400942, "loss_nce": 0.0008191785891540349, "loss_density": 0.05459075793623924, "lr": 1.0013892733269211e-05, "grad_norm": 0.00619782879948616, "wall_ms": 28460478}
{"step": 29950, "loss": 0.0064282286912202835, "loss_nce": 0.0009245107648894191, "loss_density": 0.055037178099155426, "lr": 1.0003473207936022e-05, "grad_norm": 0.015668801963329315, "wall_ms": 28507338}
{"step": 30000, "loss": 0.00672431755810976, "loss_nce": 0.001277339644730091, "loss_density": 0.05446977913379669, "lr": 1e-05, "grad_norm": 0.014501402154564857, "wall_ms": 28554274}