English
glove
lora
distillation
bpe
cl100k_base
jsanzolac commited on
Commit
ef681a6
·
verified ·
1 Parent(s): bc4dc73

Upload rank_512/train_log.jsonl with huggingface_hub

Browse files
Files changed (1) hide show
  1. rank_512/train_log.jsonl +100 -0
rank_512/train_log.jsonl CHANGED
@@ -498,3 +498,103 @@
498
  {"step": 24900, "loss": 0.01772192120552063, "loss_nce": 0.00612602848559618, "loss_density": 0.11595892906188965, "lr": 4.525575501207662e-05, "grad_norm": 0.003699392778798938, "wall_ms": 157549814}
499
  {"step": 24950, "loss": 0.017222072929143906, "loss_nce": 0.005175208672881126, "loss_density": 0.1204686388373375, "lr": 4.4584519609924226e-05, "grad_norm": 0.002767683705314994, "wall_ms": 157865845}
500
  {"step": 25000, "loss": 0.01700553297996521, "loss_nce": 0.005132824182510376, "loss_density": 0.11872707307338715, "lr": 4.391925005851749e-05, "grad_norm": 0.0024957035202533007, "wall_ms": 158182193}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  {"step": 24900, "loss": 0.01772192120552063, "loss_nce": 0.00612602848559618, "loss_density": 0.11595892906188965, "lr": 4.525575501207662e-05, "grad_norm": 0.003699392778798938, "wall_ms": 157549814}
499
  {"step": 24950, "loss": 0.017222072929143906, "loss_nce": 0.005175208672881126, "loss_density": 0.1204686388373375, "lr": 4.4584519609924226e-05, "grad_norm": 0.002767683705314994, "wall_ms": 157865845}
500
  {"step": 25000, "loss": 0.01700553297996521, "loss_nce": 0.005132824182510376, "loss_density": 0.11872707307338715, "lr": 4.391925005851749e-05, "grad_norm": 0.0024957035202533007, "wall_ms": 158182193}
501
+ {"step": 25050, "loss": 0.018509160727262497, "loss_nce": 0.006624283269047737, "loss_density": 0.1188487708568573, "lr": 4.325996522005645e-05, "grad_norm": 0.007082335185259581, "wall_ms": 158506356}
502
+ {"step": 25100, "loss": 0.019545018672943115, "loss_nce": 0.007415838539600372, "loss_density": 0.12129180133342743, "lr": 4.260668378705803e-05, "grad_norm": 0.00678681768476963, "wall_ms": 158822966}
503
+ {"step": 25150, "loss": 0.018135955557227135, "loss_nce": 0.0058902231976389885, "loss_density": 0.12245732545852661, "lr": 4.195942428182631e-05, "grad_norm": 0.0030277001205831766, "wall_ms": 159140188}
504
+ {"step": 25200, "loss": 0.017417244613170624, "loss_nce": 0.0053653717041015625, "loss_density": 0.12051871418952942, "lr": 4.1318205055927574e-05, "grad_norm": 0.0025765597820281982, "wall_ms": 159454761}
505
+ {"step": 25250, "loss": 0.01754085347056389, "loss_nce": 0.0055549293756484985, "loss_density": 0.1198592334985733, "lr": 4.068304428966948e-05, "grad_norm": 0.002958947326987982, "wall_ms": 159771931}
506
+ {"step": 25300, "loss": 0.017589299008250237, "loss_nce": 0.006003560498356819, "loss_density": 0.11585738509893417, "lr": 4.00539599915861e-05, "grad_norm": 0.0035093470942229033, "wall_ms": 160088344}
507
+ {"step": 25350, "loss": 0.019560344517230988, "loss_nce": 0.007894822396337986, "loss_density": 0.11665521562099457, "lr": 3.943096999792693e-05, "grad_norm": 0.007369628641754389, "wall_ms": 160404062}
508
+ {"step": 25400, "loss": 0.01817457005381584, "loss_nce": 0.0060422057285904884, "loss_density": 0.12132363021373749, "lr": 3.881409197215163e-05, "grad_norm": 0.0035299924202263355, "wall_ms": 160719245}
509
+ {"step": 25450, "loss": 0.01708057150244713, "loss_nce": 0.005368825048208237, "loss_density": 0.11711744964122772, "lr": 3.82033434044287e-05, "grad_norm": 0.003087342018261552, "wall_ms": 161034866}
510
+ {"step": 25500, "loss": 0.01666295900940895, "loss_nce": 0.005277114454656839, "loss_density": 0.11385843902826309, "lr": 3.759874161114e-05, "grad_norm": 0.0027145645581185818, "wall_ms": 161350716}
511
+ {"step": 25550, "loss": 0.01718416064977646, "loss_nce": 0.0056306771002709866, "loss_density": 0.11553484201431274, "lr": 3.700030373438951e-05, "grad_norm": 0.0028065056540071964, "wall_ms": 161666802}
512
+ {"step": 25600, "loss": 0.017077013850212097, "loss_nce": 0.005543601233512163, "loss_density": 0.11533412337303162, "lr": 3.6408046741517626e-05, "grad_norm": 0.0037241592071950436, "wall_ms": 161982809}
513
+ {"step": 25650, "loss": 0.017310436815023422, "loss_nce": 0.005079355556517839, "loss_density": 0.12231080234050751, "lr": 3.582198742461967e-05, "grad_norm": 0.0024118334986269474, "wall_ms": 162297338}
514
+ {"step": 25700, "loss": 0.016126297414302826, "loss_nce": 0.004644573200494051, "loss_density": 0.11481723934412003, "lr": 3.5242142400070074e-05, "grad_norm": 0.0022675239015370607, "wall_ms": 162611893}
515
+ {"step": 25750, "loss": 0.017056427896022797, "loss_nce": 0.005675465799868107, "loss_density": 0.11380960792303085, "lr": 3.466852810805131e-05, "grad_norm": 0.002873815596103668, "wall_ms": 162928201}
516
+ {"step": 25800, "loss": 0.015960153192281723, "loss_nce": 0.0043726107105612755, "loss_density": 0.11587541550397873, "lr": 3.410116081208744e-05, "grad_norm": 0.002604549052193761, "wall_ms": 163245623}
517
+ {"step": 25850, "loss": 0.01649445854127407, "loss_nce": 0.00490080751478672, "loss_density": 0.1159365102648735, "lr": 3.3540056598583426e-05, "grad_norm": 0.0028461567126214504, "wall_ms": 163559613}
518
+ {"step": 25900, "loss": 0.017009973526000977, "loss_nce": 0.0053370241075754166, "loss_density": 0.1167294830083847, "lr": 3.298523137636864e-05, "grad_norm": 0.002911926945671439, "wall_ms": 163874928}
519
+ {"step": 25950, "loss": 0.019072245806455612, "loss_nce": 0.006869981065392494, "loss_density": 0.12202265113592148, "lr": 3.243670087624607e-05, "grad_norm": 0.002966414438560605, "wall_ms": 164191310}
520
+ {"step": 26000, "loss": 0.01736976020038128, "loss_nce": 0.005843375343829393, "loss_density": 0.11526384949684143, "lr": 3.189448065054626e-05, "grad_norm": 0.003307138569653034, "wall_ms": 164508812}
521
+ {"step": 26050, "loss": 0.01670476794242859, "loss_nce": 0.005044546909630299, "loss_density": 0.11660219728946686, "lr": 3.1358586072686245e-05, "grad_norm": 0.0024160512257367373, "wall_ms": 164825551}
522
+ {"step": 26100, "loss": 0.017610475420951843, "loss_nce": 0.005720407702028751, "loss_density": 0.11890066415071487, "lr": 3.082903233673382e-05, "grad_norm": 0.0033136752899736166, "wall_ms": 165141639}
523
+ {"step": 26150, "loss": 0.018145261332392693, "loss_nce": 0.006012025289237499, "loss_density": 0.12133235484361649, "lr": 3.0305834456976593e-05, "grad_norm": 0.0034690971951931715, "wall_ms": 165455765}
524
+ {"step": 26200, "loss": 0.017519596964120865, "loss_nce": 0.0054631708189845085, "loss_density": 0.12056425958871841, "lr": 2.9789007267496415e-05, "grad_norm": 0.0025105425156652927, "wall_ms": 165773151}
525
+ {"step": 26250, "loss": 0.017823534086346626, "loss_nce": 0.006589470896869898, "loss_density": 0.11234062910079956, "lr": 2.9278565421748823e-05, "grad_norm": 0.003363188821822405, "wall_ms": 166090127}
526
+ {"step": 26300, "loss": 0.016565026715397835, "loss_nce": 0.005254236049950123, "loss_density": 0.11310790479183197, "lr": 2.877452339214745e-05, "grad_norm": 0.0027174584101885557, "wall_ms": 166407729}
527
+ {"step": 26350, "loss": 0.017274200916290283, "loss_nce": 0.005497585982084274, "loss_density": 0.11776615679264069, "lr": 2.8276895469653687e-05, "grad_norm": 0.0025844015181064606, "wall_ms": 166722846}
528
+ {"step": 26400, "loss": 0.01787237636744976, "loss_nce": 0.006262066308408976, "loss_density": 0.11610309779644012, "lr": 2.7785695763371615e-05, "grad_norm": 0.006054919213056564, "wall_ms": 167039074}
529
+ {"step": 26450, "loss": 0.018227679654955864, "loss_nce": 0.006085767410695553, "loss_density": 0.12141911685466766, "lr": 2.7300938200147965e-05, "grad_norm": 0.002918472047895193, "wall_ms": 167353142}
530
+ {"step": 26500, "loss": 0.016786396503448486, "loss_nce": 0.005016263108700514, "loss_density": 0.11770133674144745, "lr": 2.6822636524177088e-05, "grad_norm": 0.0022671192418783903, "wall_ms": 167669559}
531
+ {"step": 26550, "loss": 0.017985664308071136, "loss_nce": 0.006029629148542881, "loss_density": 0.1195603460073471, "lr": 2.635080429661152e-05, "grad_norm": 0.0026432587765157223, "wall_ms": 167986198}
532
+ {"step": 26600, "loss": 0.01790567673742771, "loss_nce": 0.006131387315690517, "loss_density": 0.11774289608001709, "lr": 2.5885454895177184e-05, "grad_norm": 0.0034228661097586155, "wall_ms": 168304264}
533
+ {"step": 26650, "loss": 0.0158634502440691, "loss_nce": 0.004572712816298008, "loss_density": 0.11290737241506577, "lr": 2.5426601513794476e-05, "grad_norm": 0.0018440543208271265, "wall_ms": 168619391}
534
+ {"step": 26700, "loss": 0.017128512263298035, "loss_nce": 0.005403105169534683, "loss_density": 0.11725407838821411, "lr": 2.497425716220377e-05, "grad_norm": 0.0029072873294353485, "wall_ms": 168935318}
535
+ {"step": 26750, "loss": 0.01587863452732563, "loss_nce": 0.0043291691690683365, "loss_density": 0.11549465358257294, "lr": 2.4528434665596857e-05, "grad_norm": 0.0020867465063929558, "wall_ms": 169251579}
536
+ {"step": 26800, "loss": 0.018316572532057762, "loss_nce": 0.006711930967867374, "loss_density": 0.11604641377925873, "lr": 2.4089146664253166e-05, "grad_norm": 0.0036262364592403173, "wall_ms": 169569185}
537
+ {"step": 26850, "loss": 0.019308872520923615, "loss_nce": 0.007648735772818327, "loss_density": 0.11660136282444, "lr": 2.3656405613181506e-05, "grad_norm": 0.0036386875435709953, "wall_ms": 169886105}
538
+ {"step": 26900, "loss": 0.017766933888196945, "loss_nce": 0.00645026657730341, "loss_density": 0.1131666749715805, "lr": 2.3230223781766764e-05, "grad_norm": 0.0035950420424342155, "wall_ms": 170203698}
539
+ {"step": 26950, "loss": 0.03874893859028816, "loss_nce": 0.02678784169256687, "loss_density": 0.1196109727025032, "lr": 2.281061325342217e-05, "grad_norm": 0.009756588377058506, "wall_ms": 170519633}
540
+ {"step": 27000, "loss": 0.020133022218942642, "loss_nce": 0.008583446964621544, "loss_density": 0.11549574881792068, "lr": 2.2397585925246587e-05, "grad_norm": 0.008569532074034214, "wall_ms": 170836459}
541
+ {"step": 27050, "loss": 0.01720932126045227, "loss_nce": 0.005462393630295992, "loss_density": 0.11746926605701447, "lr": 2.1991153507687386e-05, "grad_norm": 0.003220104845240712, "wall_ms": 171152346}
542
+ {"step": 27100, "loss": 0.01843755878508091, "loss_nce": 0.006126098334789276, "loss_density": 0.12311460077762604, "lr": 2.1591327524208184e-05, "grad_norm": 0.0031248328741639853, "wall_ms": 171469499}
543
+ {"step": 27150, "loss": 0.018353933468461037, "loss_nce": 0.00586699740961194, "loss_density": 0.12486935406923294, "lr": 2.119811931096232e-05, "grad_norm": 0.003107589902356267, "wall_ms": 171785366}
544
+ {"step": 27200, "loss": 0.01739620417356491, "loss_nce": 0.005402000620961189, "loss_density": 0.11994203925132751, "lr": 2.0811540016471218e-05, "grad_norm": 0.0037125598173588514, "wall_ms": 172101211}
545
+ {"step": 27250, "loss": 0.018887311220169067, "loss_nce": 0.006838006898760796, "loss_density": 0.12049303948879242, "lr": 2.0431600601308665e-05, "grad_norm": 0.0131526542827487, "wall_ms": 172418850}
546
+ {"step": 27300, "loss": 0.01796184480190277, "loss_nce": 0.006049156188964844, "loss_density": 0.11912688612937927, "lr": 2.0058311837789623e-05, "grad_norm": 0.0036440289113670588, "wall_ms": 172735325}
547
+ {"step": 27350, "loss": 0.018261361867189407, "loss_nce": 0.0060907211154699326, "loss_density": 0.12170639634132385, "lr": 1.9691684309665104e-05, "grad_norm": 0.0028494936414062977, "wall_ms": 173051940}
548
+ {"step": 27400, "loss": 0.017575785517692566, "loss_nce": 0.006096815690398216, "loss_density": 0.1147896945476532, "lr": 1.9331728411821957e-05, "grad_norm": 0.0039583612233400345, "wall_ms": 173366984}
549
+ {"step": 27450, "loss": 0.018337782472372055, "loss_nce": 0.006434938870370388, "loss_density": 0.11902843415737152, "lr": 1.8978454349988175e-05, "grad_norm": 0.007638565264642239, "wall_ms": 173681287}
550
+ {"step": 27500, "loss": 0.018890701234340668, "loss_nce": 0.006756600923836231, "loss_density": 0.12134099751710892, "lr": 1.86318721404436e-05, "grad_norm": 0.0035578273236751556, "wall_ms": 173998141}
551
+ {"step": 27550, "loss": 0.016777776181697845, "loss_nce": 0.005056263878941536, "loss_density": 0.1172151267528534, "lr": 1.8291991609735785e-05, "grad_norm": 0.00240815500728786, "wall_ms": 174315942}
552
+ {"step": 27600, "loss": 0.019121520221233368, "loss_nce": 0.007453048601746559, "loss_density": 0.11668471246957779, "lr": 1.7958822394401554e-05, "grad_norm": 0.006023565772920847, "wall_ms": 174631118}
553
+ {"step": 27650, "loss": 0.016610100865364075, "loss_nce": 0.004874760285019875, "loss_density": 0.1173533946275711, "lr": 1.7632373940693616e-05, "grad_norm": 0.002251360798254609, "wall_ms": 174948196}
554
+ {"step": 27700, "loss": 0.01621972769498825, "loss_nce": 0.00449762586504221, "loss_density": 0.11722101271152496, "lr": 1.7312655504312922e-05, "grad_norm": 0.0021976332645863295, "wall_ms": 175264960}
555
+ {"step": 27750, "loss": 0.017739970237016678, "loss_nce": 0.005807277746498585, "loss_density": 0.11932691931724548, "lr": 1.6999676150146084e-05, "grad_norm": 0.003353915875777602, "wall_ms": 175580364}
556
+ {"step": 27800, "loss": 0.01694890670478344, "loss_nce": 0.00526037160307169, "loss_density": 0.11688534915447235, "lr": 1.669344475200838e-05, "grad_norm": 0.0028417916037142277, "wall_ms": 175894836}
557
+ {"step": 27850, "loss": 0.0191365797072649, "loss_nce": 0.007247226312756538, "loss_density": 0.11889353394508362, "lr": 1.6393969992392252e-05, "grad_norm": 0.0040230462327599525, "wall_ms": 176209330}
558
+ {"step": 27900, "loss": 0.017897676676511765, "loss_nce": 0.0057954988442361355, "loss_density": 0.12102176994085312, "lr": 1.6101260362221082e-05, "grad_norm": 0.004413286689668894, "wall_ms": 176525454}
559
+ {"step": 27950, "loss": 0.01830127090215683, "loss_nce": 0.006251335609704256, "loss_density": 0.12049934267997742, "lr": 1.5815324160608417e-05, "grad_norm": 0.00344660272821784, "wall_ms": 176840628}
560
+ {"step": 28000, "loss": 0.015942173078656197, "loss_nce": 0.0043595763854682446, "loss_density": 0.11582596600055695, "lr": 1.5536169494622664e-05, "grad_norm": 0.001885465462692082, "wall_ms": 177156736}
561
+ {"step": 28050, "loss": 0.01724330708384514, "loss_nce": 0.005634889006614685, "loss_density": 0.11608417332172394, "lr": 1.5263804279057375e-05, "grad_norm": 0.003363116178661585, "wall_ms": 177474503}
562
+ {"step": 28100, "loss": 0.018116150051355362, "loss_nce": 0.0063707176595926285, "loss_density": 0.11745431274175644, "lr": 1.4998236236206608e-05, "grad_norm": 0.003759870771318674, "wall_ms": 177790717}
563
+ {"step": 28150, "loss": 0.018650149926543236, "loss_nce": 0.007428634446114302, "loss_density": 0.11221515387296677, "lr": 1.4739472895646162e-05, "grad_norm": 0.006681277882307768, "wall_ms": 178109706}
564
+ {"step": 28200, "loss": 0.01779598370194435, "loss_nce": 0.006105022504925728, "loss_density": 0.11690961569547653, "lr": 1.4487521594020037e-05, "grad_norm": 0.003156348131597042, "wall_ms": 178425466}
565
+ {"step": 28250, "loss": 0.016456328332424164, "loss_nce": 0.004776713438332081, "loss_density": 0.11679613590240479, "lr": 1.4242389474832363e-05, "grad_norm": 0.003326233709231019, "wall_ms": 178740451}
566
+ {"step": 28300, "loss": 0.019864194095134735, "loss_nce": 0.007990706712007523, "loss_density": 0.11873485893011093, "lr": 1.4004083488244975e-05, "grad_norm": 0.004817835986614227, "wall_ms": 179057194}
567
+ {"step": 28350, "loss": 0.017003320157527924, "loss_nce": 0.0053107477724552155, "loss_density": 0.11692573130130768, "lr": 1.3772610390880274e-05, "grad_norm": 0.002532408107072115, "wall_ms": 179372771}
568
+ {"step": 28400, "loss": 0.017529236152768135, "loss_nce": 0.005729249678552151, "loss_density": 0.11799986660480499, "lr": 1.3547976745629686e-05, "grad_norm": 0.0030579851008951664, "wall_ms": 179688927}
569
+ {"step": 28450, "loss": 0.01815546676516533, "loss_nce": 0.006362259853631258, "loss_density": 0.11793206632137299, "lr": 1.333018892146754e-05, "grad_norm": 0.013587593100965023, "wall_ms": 180005125}
570
+ {"step": 28500, "loss": 0.017414093017578125, "loss_nce": 0.00609315000474453, "loss_density": 0.11320942640304565, "lr": 1.3119253093270585e-05, "grad_norm": 0.003794890595600009, "wall_ms": 180322835}
571
+ {"step": 28550, "loss": 0.016614237800240517, "loss_nce": 0.004582545720040798, "loss_density": 0.12031692266464233, "lr": 1.2915175241642836e-05, "grad_norm": 0.002478918991982937, "wall_ms": 180638660}
572
+ {"step": 28600, "loss": 0.017593130469322205, "loss_nce": 0.006150895729660988, "loss_density": 0.11442233622074127, "lr": 1.2717961152746062e-05, "grad_norm": 0.003322032978758216, "wall_ms": 180953779}
573
+ {"step": 28650, "loss": 0.01714944839477539, "loss_nce": 0.005555209703743458, "loss_density": 0.11594239622354507, "lr": 1.252761641813563e-05, "grad_norm": 0.0034605541732162237, "wall_ms": 181269106}
574
+ {"step": 28700, "loss": 0.02135428413748741, "loss_nce": 0.009554216638207436, "loss_density": 0.11800067126750946, "lr": 1.2344146434602146e-05, "grad_norm": 0.007185269612818956, "wall_ms": 181586619}
575
+ {"step": 28750, "loss": 0.019148750230669975, "loss_nce": 0.007269718684256077, "loss_density": 0.11879031360149384, "lr": 1.2167556404018265e-05, "grad_norm": 0.0035046336706727743, "wall_ms": 181902964}
576
+ {"step": 28800, "loss": 0.01768924482166767, "loss_nce": 0.006722564343363047, "loss_density": 0.10966679453849792, "lr": 1.1997851333191282e-05, "grad_norm": 0.005734813865274191, "wall_ms": 182218183}
577
+ {"step": 28850, "loss": 0.017813928425312042, "loss_nce": 0.006183309946209192, "loss_density": 0.11630618572235107, "lr": 1.183503603372121e-05, "grad_norm": 0.0032874636817723513, "wall_ms": 182532740}
578
+ {"step": 28900, "loss": 0.017358820885419846, "loss_nce": 0.00564364530146122, "loss_density": 0.11715176701545715, "lr": 1.1679115121864286e-05, "grad_norm": 0.0027250228449702263, "wall_ms": 182847296}
579
+ {"step": 28950, "loss": 0.02288355492055416, "loss_nce": 0.011149165220558643, "loss_density": 0.11734389513731003, "lr": 1.1530093018402129e-05, "grad_norm": 0.013385726138949394, "wall_ms": 183163823}
580
+ {"step": 29000, "loss": 0.02041490003466606, "loss_nce": 0.009230348281562328, "loss_density": 0.11184552311897278, "lr": 1.1387973948516413e-05, "grad_norm": 0.01848837174475193, "wall_ms": 183478841}
581
+ {"step": 29050, "loss": 0.018947362899780273, "loss_nce": 0.007029147353023291, "loss_density": 0.11918215453624725, "lr": 1.125276194166898e-05, "grad_norm": 0.0050741443410515785, "wall_ms": 183793494}
582
+ {"step": 29100, "loss": 0.01777314767241478, "loss_nce": 0.005888114683330059, "loss_density": 0.11885033547878265, "lr": 1.1124460831487752e-05, "grad_norm": 0.003996982239186764, "wall_ms": 184109847}
583
+ {"step": 29150, "loss": 0.018470771610736847, "loss_nce": 0.006345358211547136, "loss_density": 0.12125412374734879, "lr": 1.1003074255657908e-05, "grad_norm": 0.003372128354385495, "wall_ms": 184425711}
584
+ {"step": 29200, "loss": 0.019854433834552765, "loss_nce": 0.008328781463205814, "loss_density": 0.11525651812553406, "lr": 1.0888605655818757e-05, "grad_norm": 0.009042256511747837, "wall_ms": 184740313}
585
+ {"step": 29250, "loss": 0.017834285274147987, "loss_nce": 0.00622315239161253, "loss_density": 0.11611133068799973, "lr": 1.078105827746622e-05, "grad_norm": 0.0038007546681910753, "wall_ms": 185056808}
586
+ {"step": 29300, "loss": 0.017974182963371277, "loss_nce": 0.006338238716125488, "loss_density": 0.11635942757129669, "lr": 1.0680435169860776e-05, "grad_norm": 0.0030981486197561026, "wall_ms": 185371776}
587
+ {"step": 29350, "loss": 0.017663590610027313, "loss_nce": 0.006210772320628166, "loss_density": 0.11452818661928177, "lr": 1.0586739185940974e-05, "grad_norm": 0.003177017206326127, "wall_ms": 185687829}
588
+ {"step": 29400, "loss": 0.016077810898423195, "loss_nce": 0.00453729834407568, "loss_density": 0.1154051274061203, "lr": 1.0499972982242673e-05, "grad_norm": 0.0023984916042536497, "wall_ms": 186003541}
589
+ {"step": 29450, "loss": 0.017905734479427338, "loss_nce": 0.006503623444586992, "loss_density": 0.11402110755443573, "lr": 1.0420139018823495e-05, "grad_norm": 0.003794933669269085, "wall_ms": 186320933}
590
+ {"step": 29500, "loss": 0.01762351207435131, "loss_nce": 0.006045406684279442, "loss_density": 0.11578105390071869, "lr": 1.0347239559193323e-05, "grad_norm": 0.006021488923579454, "wall_ms": 186636564}
591
+ {"step": 29550, "loss": 0.01722542569041252, "loss_nce": 0.005899193696677685, "loss_density": 0.11326231062412262, "lr": 1.0281276670249951e-05, "grad_norm": 0.0033303124364465475, "wall_ms": 186954084}
592
+ {"step": 29600, "loss": 0.016825594007968903, "loss_nce": 0.004800451919436455, "loss_density": 0.12025142461061478, "lr": 1.022225222222056e-05, "grad_norm": 0.0022449807729572058, "wall_ms": 187268193}
593
+ {"step": 29650, "loss": 0.016705866903066635, "loss_nce": 0.005314390640705824, "loss_density": 0.11391476541757584, "lr": 1.0170167888608693e-05, "grad_norm": 0.0024514049291610718, "wall_ms": 187585205}
594
+ {"step": 29700, "loss": 0.016164371743798256, "loss_nce": 0.004893495701253414, "loss_density": 0.11270876228809357, "lr": 1.0125025146146728e-05, "grad_norm": 0.0025471074040979147, "wall_ms": 187899634}
595
+ {"step": 29750, "loss": 0.018673088401556015, "loss_nce": 0.006966623477637768, "loss_density": 0.11706465482711792, "lr": 1.0086825274754108e-05, "grad_norm": 0.007535192649811506, "wall_ms": 188214328}
596
+ {"step": 29800, "loss": 0.017384059727191925, "loss_nce": 0.004983941093087196, "loss_density": 0.12400119006633759, "lr": 1.0055569357501058e-05, "grad_norm": 0.0026758068706840277, "wall_ms": 188529966}
597
+ {"step": 29850, "loss": 0.01970292627811432, "loss_nce": 0.008054208010435104, "loss_density": 0.11648717522621155, "lr": 1.0031258280577722e-05, "grad_norm": 0.006574121303856373, "wall_ms": 188844328}
598
+ {"step": 29900, "loss": 0.015129048377275467, "loss_nce": 0.003752360353246331, "loss_density": 0.1137668788433075, "lr": 1.0013892733269211e-05, "grad_norm": 0.0015500987647101283, "wall_ms": 189158465}
599
+ {"step": 29950, "loss": 0.01626775972545147, "loss_nce": 0.004790944512933493, "loss_density": 0.11476815491914749, "lr": 1.0003473207936022e-05, "grad_norm": 0.00263501750305295, "wall_ms": 189473360}
600
+ {"step": 30000, "loss": 0.017737263813614845, "loss_nce": 0.00605910737067461, "loss_density": 0.1167815625667572, "lr": 1e-05, "grad_norm": 0.003232294926419854, "wall_ms": 189790433}