jsanzolac commited on
Commit
d5f9fc0
·
verified ·
1 Parent(s): 7dcd98b

Upload rank_512/train_log.jsonl with huggingface_hub

Browse files
Files changed (1) hide show
  1. rank_512/train_log.jsonl +100 -0
rank_512/train_log.jsonl CHANGED
@@ -498,3 +498,103 @@
498
  {"step": 24900, "loss": 0.00691720237955451, "loss_nce": 0.0007257937104441226, "loss_density": 0.06191408634185791, "lr": 4.525575501207662e-05, "grad_norm": 0.004248281940817833, "wall_ms": 23694823}
499
  {"step": 24950, "loss": 0.006338770501315594, "loss_nce": 0.0011751506244763732, "loss_density": 0.05163619667291641, "lr": 4.4584519609924226e-05, "grad_norm": 0.020449072122573853, "wall_ms": 23741663}
500
  {"step": 25000, "loss": 0.006350125651806593, "loss_nce": 0.0006202341755852103, "loss_density": 0.05729891359806061, "lr": 4.391925005851749e-05, "grad_norm": 0.004709034226834774, "wall_ms": 23788504}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  {"step": 24900, "loss": 0.00691720237955451, "loss_nce": 0.0007257937104441226, "loss_density": 0.06191408634185791, "lr": 4.525575501207662e-05, "grad_norm": 0.004248281940817833, "wall_ms": 23694823}
499
  {"step": 24950, "loss": 0.006338770501315594, "loss_nce": 0.0011751506244763732, "loss_density": 0.05163619667291641, "lr": 4.4584519609924226e-05, "grad_norm": 0.020449072122573853, "wall_ms": 23741663}
500
  {"step": 25000, "loss": 0.006350125651806593, "loss_nce": 0.0006202341755852103, "loss_density": 0.05729891359806061, "lr": 4.391925005851749e-05, "grad_norm": 0.004709034226834774, "wall_ms": 23788504}
501
+ {"step": 25050, "loss": 0.006035073660314083, "loss_nce": 0.0007332066306844354, "loss_density": 0.05301867052912712, "lr": 4.325996522005645e-05, "grad_norm": 0.006259765475988388, "wall_ms": 23908748}
502
+ {"step": 25100, "loss": 0.0059931217692792416, "loss_nce": 0.0006408671615645289, "loss_density": 0.053522542119026184, "lr": 4.260668378705803e-05, "grad_norm": 0.006953043397516012, "wall_ms": 23955551}
503
+ {"step": 25150, "loss": 0.006856861058622599, "loss_nce": 0.0009427600307390094, "loss_density": 0.05914100632071495, "lr": 4.195942428182631e-05, "grad_norm": 0.007573932874947786, "wall_ms": 24002452}
504
+ {"step": 25200, "loss": 0.006605486385524273, "loss_nce": 0.0010268368059769273, "loss_density": 0.05578649789094925, "lr": 4.1318205055927574e-05, "grad_norm": 0.00961252860724926, "wall_ms": 24049230}
505
+ {"step": 25250, "loss": 0.006004382856190205, "loss_nce": 0.0005426392308436334, "loss_density": 0.05461743474006653, "lr": 4.068304428966948e-05, "grad_norm": 0.006797300186008215, "wall_ms": 24096251}
506
+ {"step": 25300, "loss": 0.007812189403921366, "loss_nce": 0.002120816148817539, "loss_density": 0.05691373348236084, "lr": 4.00539599915861e-05, "grad_norm": 0.03228509798645973, "wall_ms": 24143105}
507
+ {"step": 25350, "loss": 0.00668724812567234, "loss_nce": 0.0004951902665197849, "loss_density": 0.06192057952284813, "lr": 3.943096999792693e-05, "grad_norm": 0.003156198188662529, "wall_ms": 24190041}
508
+ {"step": 25400, "loss": 0.006458156742155552, "loss_nce": 0.0006801873096264899, "loss_density": 0.057779695838689804, "lr": 3.881409197215163e-05, "grad_norm": 0.0058455560356378555, "wall_ms": 24236941}
509
+ {"step": 25450, "loss": 0.006517811678349972, "loss_nce": 0.0006698983488604426, "loss_density": 0.058479130268096924, "lr": 3.82033434044287e-05, "grad_norm": 0.0051679452881217, "wall_ms": 24283803}
510
+ {"step": 25500, "loss": 0.005386806093156338, "loss_nce": 0.00039780649240128696, "loss_density": 0.04988999664783478, "lr": 3.759874161114e-05, "grad_norm": 0.002981061115860939, "wall_ms": 24330771}
511
+ {"step": 25550, "loss": 0.0062150429002940655, "loss_nce": 0.0005657393485307693, "loss_density": 0.056493036448955536, "lr": 3.700030373438951e-05, "grad_norm": 0.0033944363240152597, "wall_ms": 24377741}
512
+ {"step": 25600, "loss": 0.00592020945623517, "loss_nce": 0.0005542057333514094, "loss_density": 0.053660035133361816, "lr": 3.6408046741517626e-05, "grad_norm": 0.0036776885390281677, "wall_ms": 24424639}
513
+ {"step": 25650, "loss": 0.0075206151232123375, "loss_nce": 0.001859094831161201, "loss_density": 0.056615199893713, "lr": 3.582198742461967e-05, "grad_norm": 0.028680117800831795, "wall_ms": 24471687}
514
+ {"step": 25700, "loss": 0.006022260524332523, "loss_nce": 0.0009384781587868929, "loss_density": 0.05083782598376274, "lr": 3.5242142400070074e-05, "grad_norm": 0.010501752607524395, "wall_ms": 24518581}
515
+ {"step": 25750, "loss": 0.006317559164017439, "loss_nce": 0.0008261502953246236, "loss_density": 0.05491408705711365, "lr": 3.466852810805131e-05, "grad_norm": 0.010560303926467896, "wall_ms": 24565410}
516
+ {"step": 25800, "loss": 0.007090797647833824, "loss_nce": 0.0011281550396233797, "loss_density": 0.05962642282247543, "lr": 3.410116081208744e-05, "grad_norm": 0.015295940451323986, "wall_ms": 24612371}
517
+ {"step": 25850, "loss": 0.007321303244680166, "loss_nce": 0.00127743324264884, "loss_density": 0.06043870002031326, "lr": 3.3540056598583426e-05, "grad_norm": 0.0168415829539299, "wall_ms": 24659172}
518
+ {"step": 25900, "loss": 0.006852158345282078, "loss_nce": 0.0014298169407993555, "loss_density": 0.05422341451048851, "lr": 3.298523137636864e-05, "grad_norm": 0.0279566440731287, "wall_ms": 24706106}
519
+ {"step": 25950, "loss": 0.006186847575008869, "loss_nce": 0.0005691373953595757, "loss_density": 0.05617710202932358, "lr": 3.243670087624607e-05, "grad_norm": 0.00607132026925683, "wall_ms": 24752976}
520
+ {"step": 26000, "loss": 0.006812126375734806, "loss_nce": 0.0005849165027029812, "loss_density": 0.0622720941901207, "lr": 3.189448065054626e-05, "grad_norm": 0.0039723534137010574, "wall_ms": 24799775}
521
+ {"step": 26050, "loss": 0.006569509394466877, "loss_nce": 0.0006835199310444295, "loss_density": 0.058859892189502716, "lr": 3.1358586072686245e-05, "grad_norm": 0.005297433584928513, "wall_ms": 24846670}
522
+ {"step": 26100, "loss": 0.007269435096532106, "loss_nce": 0.0017098878743126988, "loss_density": 0.05559547245502472, "lr": 3.082903233673382e-05, "grad_norm": 0.011362873949110508, "wall_ms": 24893556}
523
+ {"step": 26150, "loss": 0.006293687038123608, "loss_nce": 0.0005863758269697428, "loss_density": 0.05707310885190964, "lr": 3.0305834456976593e-05, "grad_norm": 0.003719005500897765, "wall_ms": 24940543}
524
+ {"step": 26200, "loss": 0.006227030418813229, "loss_nce": 0.0004712793161161244, "loss_density": 0.05755750834941864, "lr": 2.9789007267496415e-05, "grad_norm": 0.003339580725878477, "wall_ms": 24987420}
525
+ {"step": 26250, "loss": 0.00686721783131361, "loss_nce": 0.0011385588441044092, "loss_density": 0.057286590337753296, "lr": 2.9278565421748823e-05, "grad_norm": 0.009792376309633255, "wall_ms": 25034534}
526
+ {"step": 26300, "loss": 0.007857928983867168, "loss_nce": 0.002263725735247135, "loss_density": 0.05594203248620033, "lr": 2.877452339214745e-05, "grad_norm": 0.039130549877882004, "wall_ms": 25081453}
527
+ {"step": 26350, "loss": 0.005907584447413683, "loss_nce": 0.0006800749688409269, "loss_density": 0.05227509140968323, "lr": 2.8276895469653687e-05, "grad_norm": 0.007771602366119623, "wall_ms": 25128392}
528
+ {"step": 26400, "loss": 0.006378096994012594, "loss_nce": 0.0006629059789702296, "loss_density": 0.057151909917593, "lr": 2.7785695763371615e-05, "grad_norm": 0.004828184377402067, "wall_ms": 25175318}
529
+ {"step": 26450, "loss": 0.006207307800650597, "loss_nce": 0.00037078862078487873, "loss_density": 0.05836518853902817, "lr": 2.7300938200147965e-05, "grad_norm": 0.0035220249556005, "wall_ms": 25222250}
530
+ {"step": 26500, "loss": 0.006239233072847128, "loss_nce": 0.0005443115951493382, "loss_density": 0.05694921314716339, "lr": 2.6822636524177088e-05, "grad_norm": 0.003925703931599855, "wall_ms": 25269752}
531
+ {"step": 26550, "loss": 0.006789609789848328, "loss_nce": 0.0008022113470360637, "loss_density": 0.05987398326396942, "lr": 2.635080429661152e-05, "grad_norm": 0.004537977743893862, "wall_ms": 25316647}
532
+ {"step": 26600, "loss": 0.008517177775502205, "loss_nce": 0.0029544481076300144, "loss_density": 0.05562729388475418, "lr": 2.5885454895177184e-05, "grad_norm": 0.017469538375735283, "wall_ms": 25363500}
533
+ {"step": 26650, "loss": 0.005699735134840012, "loss_nce": 0.0006433976232074201, "loss_density": 0.050563372671604156, "lr": 2.5426601513794476e-05, "grad_norm": 0.004530340898782015, "wall_ms": 25410397}
534
+ {"step": 26700, "loss": 0.007013377267867327, "loss_nce": 0.0013709845952689648, "loss_density": 0.05642392486333847, "lr": 2.497425716220377e-05, "grad_norm": 0.00962207280099392, "wall_ms": 25457207}
535
+ {"step": 26750, "loss": 0.006259081419557333, "loss_nce": 0.0007102068630047143, "loss_density": 0.055488742887973785, "lr": 2.4528434665596857e-05, "grad_norm": 0.006809397134929895, "wall_ms": 25504059}
536
+ {"step": 26800, "loss": 0.005687997676432133, "loss_nce": 0.00041581824189051986, "loss_density": 0.052721790969371796, "lr": 2.4089146664253166e-05, "grad_norm": 0.0034056096337735653, "wall_ms": 25550992}
537
+ {"step": 26850, "loss": 0.006239835172891617, "loss_nce": 0.000724192475900054, "loss_density": 0.05515642464160919, "lr": 2.3656405613181506e-05, "grad_norm": 0.006391784641891718, "wall_ms": 25598211}
538
+ {"step": 26900, "loss": 0.005899925250560045, "loss_nce": 0.0004897183971479535, "loss_density": 0.05410207062959671, "lr": 2.3230223781766764e-05, "grad_norm": 0.0033024486619979143, "wall_ms": 25645213}
539
+ {"step": 26950, "loss": 0.006406135391443968, "loss_nce": 0.0007634123903699219, "loss_density": 0.056427229195833206, "lr": 2.281061325342217e-05, "grad_norm": 0.005711990874260664, "wall_ms": 25692092}
540
+ {"step": 27000, "loss": 0.006285138428211212, "loss_nce": 0.0007446803501807153, "loss_density": 0.055404581129550934, "lr": 2.2397585925246587e-05, "grad_norm": 0.0039893039502203465, "wall_ms": 25739125}
541
+ {"step": 27050, "loss": 0.0065219225361943245, "loss_nce": 0.0005322371143847704, "loss_density": 0.05989684909582138, "lr": 2.1991153507687386e-05, "grad_norm": 0.0034476066939532757, "wall_ms": 25786138}
542
+ {"step": 27100, "loss": 0.006234966684132814, "loss_nce": 0.0007863747305236757, "loss_density": 0.05448591709136963, "lr": 2.1591327524208184e-05, "grad_norm": 0.0077614616602659225, "wall_ms": 25833062}
543
+ {"step": 27150, "loss": 0.00570044107735157, "loss_nce": 0.00046749389730393887, "loss_density": 0.052329473197460175, "lr": 2.119811931096232e-05, "grad_norm": 0.003795885480940342, "wall_ms": 25879992}
544
+ {"step": 27200, "loss": 0.006949670612812042, "loss_nce": 0.0015023485757410526, "loss_density": 0.05447322130203247, "lr": 2.0811540016471218e-05, "grad_norm": 0.011546770110726357, "wall_ms": 25926890}
545
+ {"step": 27250, "loss": 0.006116870325058699, "loss_nce": 0.0005428103031590581, "loss_density": 0.0557405985891819, "lr": 2.0431600601308665e-05, "grad_norm": 0.0033482906874269247, "wall_ms": 25973838}
546
+ {"step": 27300, "loss": 0.006667289882898331, "loss_nce": 0.0008291740668937564, "loss_density": 0.058381155133247375, "lr": 2.0058311837789623e-05, "grad_norm": 0.005942446645349264, "wall_ms": 26020844}
547
+ {"step": 27350, "loss": 0.0067707872949540615, "loss_nce": 0.000971663452219218, "loss_density": 0.05799124017357826, "lr": 1.9691684309665104e-05, "grad_norm": 0.008724605664610863, "wall_ms": 26068059}
548
+ {"step": 27400, "loss": 0.006094375159591436, "loss_nce": 0.0009032095549628139, "loss_density": 0.05191165208816528, "lr": 1.9331728411821957e-05, "grad_norm": 0.008548257872462273, "wall_ms": 26114901}
549
+ {"step": 27450, "loss": 0.006198795046657324, "loss_nce": 0.0007784701883792877, "loss_density": 0.054203249514102936, "lr": 1.8978454349988175e-05, "grad_norm": 0.004175092093646526, "wall_ms": 26161757}
550
+ {"step": 27500, "loss": 0.006299125030636787, "loss_nce": 0.001126012997701764, "loss_density": 0.05173111706972122, "lr": 1.86318721404436e-05, "grad_norm": 0.010596704669296741, "wall_ms": 26208629}
551
+ {"step": 27550, "loss": 0.005837615579366684, "loss_nce": 0.0006913883262313902, "loss_density": 0.05146227031946182, "lr": 1.8291991609735785e-05, "grad_norm": 0.0034169540740549564, "wall_ms": 26255560}
552
+ {"step": 27600, "loss": 0.006743425969034433, "loss_nce": 0.00143095210660249, "loss_density": 0.05312473699450493, "lr": 1.7958822394401554e-05, "grad_norm": 0.01738341897726059, "wall_ms": 26302628}
553
+ {"step": 27650, "loss": 0.006620537955313921, "loss_nce": 0.0008745626546442509, "loss_density": 0.0574597530066967, "lr": 1.7632373940693616e-05, "grad_norm": 0.006224347278475761, "wall_ms": 26349752}
554
+ {"step": 27700, "loss": 0.006118271965533495, "loss_nce": 0.0006143407663330436, "loss_density": 0.055039308965206146, "lr": 1.7312655504312922e-05, "grad_norm": 0.0038604331202805042, "wall_ms": 26396587}
555
+ {"step": 27750, "loss": 0.006385525688529015, "loss_nce": 0.0006848957855254412, "loss_density": 0.05700629577040672, "lr": 1.6999676150146084e-05, "grad_norm": 0.006292062345892191, "wall_ms": 26443410}
556
+ {"step": 27800, "loss": 0.02233259752392769, "loss_nce": 0.016465792432427406, "loss_density": 0.058668047189712524, "lr": 1.669344475200838e-05, "grad_norm": 0.23944632709026337, "wall_ms": 26490257}
557
+ {"step": 27850, "loss": 0.006308079697191715, "loss_nce": 0.0007244920707307756, "loss_density": 0.055835872888565063, "lr": 1.6393969992392252e-05, "grad_norm": 0.005986457224935293, "wall_ms": 26537042}
558
+ {"step": 27900, "loss": 0.005869849119335413, "loss_nce": 0.0003900756419170648, "loss_density": 0.054797735065221786, "lr": 1.6101260362221082e-05, "grad_norm": 0.002477882895618677, "wall_ms": 26583953}
559
+ {"step": 27950, "loss": 0.006499065086245537, "loss_nce": 0.0010094710160046816, "loss_density": 0.05489593744277954, "lr": 1.5815324160608417e-05, "grad_norm": 0.007652169559150934, "wall_ms": 26630824}
560
+ {"step": 28000, "loss": 0.006619682069867849, "loss_nce": 0.0012114965356886387, "loss_density": 0.05408185347914696, "lr": 1.5536169494622664e-05, "grad_norm": 0.02373824454843998, "wall_ms": 26677790}
561
+ {"step": 28050, "loss": 0.006812365725636482, "loss_nce": 0.0008480304386466742, "loss_density": 0.059643350541591644, "lr": 1.5263804279057375e-05, "grad_norm": 0.0045542968437075615, "wall_ms": 26724750}
562
+ {"step": 28100, "loss": 0.006989513989537954, "loss_nce": 0.000803902861662209, "loss_density": 0.061856113374233246, "lr": 1.4998236236206608e-05, "grad_norm": 0.004695939365774393, "wall_ms": 26771629}
563
+ {"step": 28150, "loss": 0.006343699060380459, "loss_nce": 0.00036832288606092334, "loss_density": 0.05975376069545746, "lr": 1.4739472895646162e-05, "grad_norm": 0.0024973799008876085, "wall_ms": 26818472}
564
+ {"step": 28200, "loss": 0.007261536084115505, "loss_nce": 0.0015309869777411222, "loss_density": 0.05730548873543739, "lr": 1.4487521594020037e-05, "grad_norm": 0.021549200639128685, "wall_ms": 26865442}
565
+ {"step": 28250, "loss": 0.006983479484915733, "loss_nce": 0.0009530282113701105, "loss_density": 0.060304515063762665, "lr": 1.4242389474832363e-05, "grad_norm": 0.006153210066258907, "wall_ms": 26912323}
566
+ {"step": 28300, "loss": 0.0067367698065936565, "loss_nce": 0.0008809001301415265, "loss_density": 0.05855869501829147, "lr": 1.4004083488244975e-05, "grad_norm": 0.010290965437889099, "wall_ms": 26959182}
567
+ {"step": 28350, "loss": 0.007401762530207634, "loss_nce": 0.0010418344754725695, "loss_density": 0.06359928101301193, "lr": 1.3772610390880274e-05, "grad_norm": 0.011849055998027325, "wall_ms": 27006067}
568
+ {"step": 28400, "loss": 0.00616123341023922, "loss_nce": 0.000962350401096046, "loss_density": 0.05198882892727852, "lr": 1.3547976745629686e-05, "grad_norm": 0.007022891193628311, "wall_ms": 27052976}
569
+ {"step": 28450, "loss": 0.006300416775047779, "loss_nce": 0.0010109294671565294, "loss_density": 0.05289487540721893, "lr": 1.333018892146754e-05, "grad_norm": 0.010424169711768627, "wall_ms": 27099801}
570
+ {"step": 28500, "loss": 0.007009359076619148, "loss_nce": 0.0006201440701261163, "loss_density": 0.06389214843511581, "lr": 1.3119253093270585e-05, "grad_norm": 0.01379472203552723, "wall_ms": 27146728}
571
+ {"step": 28550, "loss": 0.005753371398895979, "loss_nce": 0.00043487129732966423, "loss_density": 0.05318500101566315, "lr": 1.2915175241642836e-05, "grad_norm": 0.002734618028625846, "wall_ms": 27193633}
572
+ {"step": 28600, "loss": 0.006544017232954502, "loss_nce": 0.0009413135121576488, "loss_density": 0.056027039885520935, "lr": 1.2717961152746062e-05, "grad_norm": 0.006847742013633251, "wall_ms": 27240514}
573
+ {"step": 28650, "loss": 0.00645503168925643, "loss_nce": 0.0005470294854603708, "loss_density": 0.05908001959323883, "lr": 1.252761641813563e-05, "grad_norm": 0.0033056086394935846, "wall_ms": 27287425}
574
+ {"step": 28700, "loss": 0.005767285358160734, "loss_nce": 0.00035236982512287796, "loss_density": 0.05414915829896927, "lr": 1.2344146434602146e-05, "grad_norm": 0.0028908755630254745, "wall_ms": 27334311}
575
+ {"step": 28750, "loss": 0.006676977034658194, "loss_nce": 0.0005587565829046071, "loss_density": 0.06118220090866089, "lr": 1.2167556404018265e-05, "grad_norm": 0.0036081895232200623, "wall_ms": 27381265}
576
+ {"step": 28800, "loss": 0.006004132330417633, "loss_nce": 0.000655846786685288, "loss_density": 0.05348285660147667, "lr": 1.1997851333191282e-05, "grad_norm": 0.004221049137413502, "wall_ms": 27428236}
577
+ {"step": 28850, "loss": 0.006040005013346672, "loss_nce": 0.0006656883051618934, "loss_density": 0.053743164986371994, "lr": 1.183503603372121e-05, "grad_norm": 0.005687990691512823, "wall_ms": 27475190}
578
+ {"step": 28900, "loss": 0.00716153159737587, "loss_nce": 0.0010288723278790712, "loss_density": 0.06132659316062927, "lr": 1.1679115121864286e-05, "grad_norm": 0.0058783600106835365, "wall_ms": 27522074}
579
+ {"step": 28950, "loss": 0.007232952862977982, "loss_nce": 0.0018328321166336536, "loss_density": 0.054001208394765854, "lr": 1.1530093018402129e-05, "grad_norm": 0.010528255254030228, "wall_ms": 27568952}
580
+ {"step": 29000, "loss": 0.006644365377724171, "loss_nce": 0.0007388860103674233, "loss_density": 0.059054791927337646, "lr": 1.1387973948516413e-05, "grad_norm": 0.007411428727209568, "wall_ms": 27615939}
581
+ {"step": 29050, "loss": 0.008201144635677338, "loss_nce": 0.002817789325490594, "loss_density": 0.053833551704883575, "lr": 1.125276194166898e-05, "grad_norm": 0.025691736489534378, "wall_ms": 27662840}
582
+ {"step": 29100, "loss": 0.005754509009420872, "loss_nce": 0.0007389385136775672, "loss_density": 0.05015570670366287, "lr": 1.1124460831487752e-05, "grad_norm": 0.004145738668739796, "wall_ms": 27709680}
583
+ {"step": 29150, "loss": 0.005387268494814634, "loss_nce": 0.0004375826974865049, "loss_density": 0.0494968555867672, "lr": 1.1003074255657908e-05, "grad_norm": 0.0029811025597155094, "wall_ms": 27756661}
584
+ {"step": 29200, "loss": 0.00597479147836566, "loss_nce": 0.0005439231172204018, "loss_density": 0.054308682680130005, "lr": 1.0888605655818757e-05, "grad_norm": 0.006674340460449457, "wall_ms": 27803581}
585
+ {"step": 29250, "loss": 0.0067938766442239285, "loss_nce": 0.0010369722731411457, "loss_density": 0.05756904184818268, "lr": 1.078105827746622e-05, "grad_norm": 0.005418897606432438, "wall_ms": 27850382}
586
+ {"step": 29300, "loss": 0.00638198247179389, "loss_nce": 0.0005903207929804921, "loss_density": 0.05791661515831947, "lr": 1.0680435169860776e-05, "grad_norm": 0.003486177185550332, "wall_ms": 27897321}
587
+ {"step": 29350, "loss": 0.006277420558035374, "loss_nce": 0.0009228975977748632, "loss_density": 0.05354522913694382, "lr": 1.0586739185940974e-05, "grad_norm": 0.008347016759216785, "wall_ms": 27944309}
588
+ {"step": 29400, "loss": 0.0068495613522827625, "loss_nce": 0.00103433383628726, "loss_density": 0.058152273297309875, "lr": 1.0499972982242673e-05, "grad_norm": 0.010205763392150402, "wall_ms": 27991220}
589
+ {"step": 29450, "loss": 0.007998845539987087, "loss_nce": 0.003002300625666976, "loss_density": 0.049965448677539825, "lr": 1.0420139018823495e-05, "grad_norm": 0.12089824676513672, "wall_ms": 28038123}
590
+ {"step": 29500, "loss": 0.005919528193771839, "loss_nce": 0.0007277363329194486, "loss_density": 0.051917918026447296, "lr": 1.0347239559193323e-05, "grad_norm": 0.006082749459892511, "wall_ms": 28084984}
591
+ {"step": 29550, "loss": 0.005880477372556925, "loss_nce": 0.0006298840744420886, "loss_density": 0.05250593274831772, "lr": 1.0281276670249951e-05, "grad_norm": 0.004774052649736404, "wall_ms": 28131937}
592
+ {"step": 29600, "loss": 0.005502342712134123, "loss_nce": 0.00048444262938573956, "loss_density": 0.050178997218608856, "lr": 1.022225222222056e-05, "grad_norm": 0.002902636304497719, "wall_ms": 28178854}
593
+ {"step": 29650, "loss": 0.005942163988947868, "loss_nce": 0.0004298293497413397, "loss_density": 0.05512334406375885, "lr": 1.0170167888608693e-05, "grad_norm": 0.003726336406543851, "wall_ms": 28225821}
594
+ {"step": 29700, "loss": 0.006085713393986225, "loss_nce": 0.0008568849880248308, "loss_density": 0.05228827893733978, "lr": 1.0125025146146728e-05, "grad_norm": 0.007095999550074339, "wall_ms": 28272758}
595
+ {"step": 29750, "loss": 0.005899408832192421, "loss_nce": 0.0006217900663614273, "loss_density": 0.052776187658309937, "lr": 1.0086825274754108e-05, "grad_norm": 0.00941284466534853, "wall_ms": 28319696}
596
+ {"step": 29800, "loss": 0.00611557811498642, "loss_nce": 0.0006424849852919579, "loss_density": 0.05473092943429947, "lr": 1.0055569357501058e-05, "grad_norm": 0.005190265364944935, "wall_ms": 28366539}
597
+ {"step": 29850, "loss": 0.006907371338456869, "loss_nce": 0.0008599002030678093, "loss_density": 0.060474708676338196, "lr": 1.0031258280577722e-05, "grad_norm": 0.00799440685659647, "wall_ms": 28413465}
598
+ {"step": 29900, "loss": 0.006278254557400942, "loss_nce": 0.0008191785891540349, "loss_density": 0.05459075793623924, "lr": 1.0013892733269211e-05, "grad_norm": 0.00619782879948616, "wall_ms": 28460478}
599
+ {"step": 29950, "loss": 0.0064282286912202835, "loss_nce": 0.0009245107648894191, "loss_density": 0.055037178099155426, "lr": 1.0003473207936022e-05, "grad_norm": 0.015668801963329315, "wall_ms": 28507338}
600
+ {"step": 30000, "loss": 0.00672431755810976, "loss_nce": 0.001277339644730091, "loss_density": 0.05446977913379669, "lr": 1e-05, "grad_norm": 0.014501402154564857, "wall_ms": 28554274}