Upload rank_300/train_log.jsonl with huggingface_hub
Browse files- rank_300/train_log.jsonl +100 -0
rank_300/train_log.jsonl
CHANGED
|
@@ -698,3 +698,103 @@
|
|
| 698 |
{"step": 69800, "loss": 0.23580867052078247, "loss_nce": 0.2332986742258072, "loss_mse": 0.002509994897991419, "lr": 0.0002843736945442768, "grad_norm": 0.11558451503515244, "wall_ms": 6268403}
|
| 699 |
{"step": 69900, "loss": 0.26374825835227966, "loss_nce": 0.26125049591064453, "loss_mse": 0.002497777109965682, "lr": 0.00028386078512288303, "grad_norm": 0.1257464587688446, "wall_ms": 6277352}
|
| 700 |
{"step": 70000, "loss": 0.24297796189785004, "loss_nce": 0.24049930274486542, "loss_mse": 0.002478661946952343, "lr": 0.00028334774739896854, "grad_norm": 0.12397409975528717, "wall_ms": 6286316}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
{"step": 69800, "loss": 0.23580867052078247, "loss_nce": 0.2332986742258072, "loss_mse": 0.002509994897991419, "lr": 0.0002843736945442768, "grad_norm": 0.11558451503515244, "wall_ms": 6268403}
|
| 699 |
{"step": 69900, "loss": 0.26374825835227966, "loss_nce": 0.26125049591064453, "loss_mse": 0.002497777109965682, "lr": 0.00028386078512288303, "grad_norm": 0.1257464587688446, "wall_ms": 6277352}
|
| 700 |
{"step": 70000, "loss": 0.24297796189785004, "loss_nce": 0.24049930274486542, "loss_mse": 0.002478661946952343, "lr": 0.00028334774739896854, "grad_norm": 0.12397409975528717, "wall_ms": 6286316}
|
| 701 |
+
{"step": 70100, "loss": 0.23948293924331665, "loss_nce": 0.23701757192611694, "loss_mse": 0.0024653691798448563, "lr": 0.00028283458365327625, "grad_norm": 0.1120358482003212, "wall_ms": 6302041}
|
| 702 |
+
{"step": 70200, "loss": 0.24504323303699493, "loss_nce": 0.2425747513771057, "loss_mse": 0.002468486549332738, "lr": 0.00028232129616710943, "grad_norm": 0.12249377369880676, "wall_ms": 6311015}
|
| 703 |
+
{"step": 70300, "loss": 0.25513067841529846, "loss_nce": 0.2526742219924927, "loss_mse": 0.0024564422201365232, "lr": 0.0002818078872223213, "grad_norm": 0.12850438058376312, "wall_ms": 6319991}
|
| 704 |
+
{"step": 70400, "loss": 0.24545255303382874, "loss_nce": 0.24297282099723816, "loss_mse": 0.002479734132066369, "lr": 0.0002812943591013053, "grad_norm": 0.12263116985559464, "wall_ms": 6330037}
|
| 705 |
+
{"step": 70500, "loss": 0.26158761978149414, "loss_nce": 0.25907543301582336, "loss_mse": 0.002512198407202959, "lr": 0.00028078071408698443, "grad_norm": 0.11421786993741989, "wall_ms": 6338988}
|
| 706 |
+
{"step": 70600, "loss": 0.27211064100265503, "loss_nce": 0.26958611607551575, "loss_mse": 0.0025245381984859705, "lr": 0.0002802669544628014, "grad_norm": 0.1256054788827896, "wall_ms": 6347951}
|
| 707 |
+
{"step": 70700, "loss": 0.26685822010040283, "loss_nce": 0.2643740177154541, "loss_mse": 0.0024842096026986837, "lr": 0.0002797530825127083, "grad_norm": 0.13716726005077362, "wall_ms": 6356919}
|
| 708 |
+
{"step": 70800, "loss": 0.23620356619358063, "loss_nce": 0.2336978167295456, "loss_mse": 0.0025057552848011255, "lr": 0.00027923910052115707, "grad_norm": 0.11611805856227875, "wall_ms": 6365903}
|
| 709 |
+
{"step": 70900, "loss": 0.22370021045207977, "loss_nce": 0.2212701439857483, "loss_mse": 0.0024300608783960342, "lr": 0.00027872501077308827, "grad_norm": 0.11123614013195038, "wall_ms": 6374865}
|
| 710 |
+
{"step": 71000, "loss": 0.24167150259017944, "loss_nce": 0.23920778930187225, "loss_mse": 0.002463711891323328, "lr": 0.00027821081555392174, "grad_norm": 0.1146172434091568, "wall_ms": 6383814}
|
| 711 |
+
{"step": 71100, "loss": 0.2260751873254776, "loss_nce": 0.22362692654132843, "loss_mse": 0.0024482551962137222, "lr": 0.0002776965171495463, "grad_norm": 0.10704449564218521, "wall_ms": 6392700}
|
| 712 |
+
{"step": 71200, "loss": 0.21414418518543243, "loss_nce": 0.21168223023414612, "loss_mse": 0.002461952157318592, "lr": 0.0002771821178463092, "grad_norm": 0.11100133508443832, "wall_ms": 6401591}
|
| 713 |
+
{"step": 71300, "loss": 0.21772164106369019, "loss_nce": 0.21520169079303741, "loss_mse": 0.0025199572555720806, "lr": 0.0002766676199310065, "grad_norm": 0.11136674880981445, "wall_ms": 6410516}
|
| 714 |
+
{"step": 71400, "loss": 0.21311631798744202, "loss_nce": 0.21066296100616455, "loss_mse": 0.0024533600080758333, "lr": 0.00027615302569087267, "grad_norm": 0.11843819916248322, "wall_ms": 6419465}
|
| 715 |
+
{"step": 71500, "loss": 0.22519981861114502, "loss_nce": 0.22274325788021088, "loss_mse": 0.0024565572384744883, "lr": 0.0002756383374135702, "grad_norm": 0.11375945061445236, "wall_ms": 6428436}
|
| 716 |
+
{"step": 71600, "loss": 0.24152159690856934, "loss_nce": 0.23902639746665955, "loss_mse": 0.0024952031672000885, "lr": 0.00027512355738717965, "grad_norm": 0.12178342044353485, "wall_ms": 6437408}
|
| 717 |
+
{"step": 71700, "loss": 0.2342597395181656, "loss_nce": 0.2318057268857956, "loss_mse": 0.0024540103040635586, "lr": 0.00027460868790018963, "grad_norm": 0.12102220207452774, "wall_ms": 6446380}
|
| 718 |
+
{"step": 71800, "loss": 0.2291390746831894, "loss_nce": 0.22668343782424927, "loss_mse": 0.0024556389544159174, "lr": 0.0002740937312414862, "grad_norm": 0.1152460053563118, "wall_ms": 6455349}
|
| 719 |
+
{"step": 71900, "loss": 0.2510983347892761, "loss_nce": 0.24863199889659882, "loss_mse": 0.0024663396179676056, "lr": 0.0002735786897003433, "grad_norm": 0.11887852847576141, "wall_ms": 6464309}
|
| 720 |
+
{"step": 72000, "loss": 0.2366245836019516, "loss_nce": 0.23413287103176117, "loss_mse": 0.002491716993972659, "lr": 0.0002730635655664119, "grad_norm": 0.11491606384515762, "wall_ms": 6473268}
|
| 721 |
+
{"step": 72100, "loss": 0.2615783214569092, "loss_nce": 0.25907325744628906, "loss_mse": 0.00250506354495883, "lr": 0.0002725483611297103, "grad_norm": 0.12313222140073776, "wall_ms": 6482232}
|
| 722 |
+
{"step": 72200, "loss": 0.2361956238746643, "loss_nce": 0.23371386528015137, "loss_mse": 0.0024817653466016054, "lr": 0.00027203307868061365, "grad_norm": 0.11496517062187195, "wall_ms": 6491217}
|
| 723 |
+
{"step": 72300, "loss": 0.24533480405807495, "loss_nce": 0.24284833669662476, "loss_mse": 0.0024864638689905405, "lr": 0.0002715177205098442, "grad_norm": 0.11622884124517441, "wall_ms": 6500218}
|
| 724 |
+
{"step": 72400, "loss": 0.22517454624176025, "loss_nce": 0.22268228232860565, "loss_mse": 0.0024922662414610386, "lr": 0.0002710022889084605, "grad_norm": 0.12656307220458984, "wall_ms": 6509221}
|
| 725 |
+
{"step": 72500, "loss": 0.26323914527893066, "loss_nce": 0.26071321964263916, "loss_mse": 0.0025259358808398247, "lr": 0.00027048678616784763, "grad_norm": 0.11349409073591232, "wall_ms": 6518205}
|
| 726 |
+
{"step": 72600, "loss": 0.20871229469776154, "loss_nce": 0.20623525977134705, "loss_mse": 0.002477039583027363, "lr": 0.00026997121457970696, "grad_norm": 0.11126075685024261, "wall_ms": 6527171}
|
| 727 |
+
{"step": 72700, "loss": 0.26014137268066406, "loss_nce": 0.2576313614845276, "loss_mse": 0.0025100228376686573, "lr": 0.0002694555764360462, "grad_norm": 0.12585844099521637, "wall_ms": 6536157}
|
| 728 |
+
{"step": 72800, "loss": 0.22997520864009857, "loss_nce": 0.22747349739074707, "loss_mse": 0.0025017145089805126, "lr": 0.0002689398740291684, "grad_norm": 0.11358380317687988, "wall_ms": 6545126}
|
| 729 |
+
{"step": 72900, "loss": 0.2171812206506729, "loss_nce": 0.21467901766300201, "loss_mse": 0.0025022022891789675, "lr": 0.0002684241096516627, "grad_norm": 0.113593690097332, "wall_ms": 6554061}
|
| 730 |
+
{"step": 73000, "loss": 0.24595296382904053, "loss_nce": 0.2435026466846466, "loss_mse": 0.0024503180757164955, "lr": 0.0002679082855963935, "grad_norm": 0.12652528285980225, "wall_ms": 6562940}
|
| 731 |
+
{"step": 73100, "loss": 0.2726942300796509, "loss_nce": 0.2702072560787201, "loss_mse": 0.002486963290721178, "lr": 0.0002673924041564907, "grad_norm": 0.1284068375825882, "wall_ms": 6571824}
|
| 732 |
+
{"step": 73200, "loss": 0.2655949592590332, "loss_nce": 0.26310819387435913, "loss_mse": 0.0024867679458111525, "lr": 0.0002668764676253392, "grad_norm": 0.1230059266090393, "wall_ms": 6580743}
|
| 733 |
+
{"step": 73300, "loss": 0.237030029296875, "loss_nce": 0.23457074165344238, "loss_mse": 0.0024592846166342497, "lr": 0.0002663604782965688, "grad_norm": 0.12657053768634796, "wall_ms": 6589694}
|
| 734 |
+
{"step": 73400, "loss": 0.2542479336261749, "loss_nce": 0.25177517533302307, "loss_mse": 0.002472762716934085, "lr": 0.00026584443846404393, "grad_norm": 0.13717229664325714, "wall_ms": 6598649}
|
| 735 |
+
{"step": 73500, "loss": 0.2423740029335022, "loss_nce": 0.23987874388694763, "loss_mse": 0.0024952555540949106, "lr": 0.0002653283504218538, "grad_norm": 0.12283863872289658, "wall_ms": 6607626}
|
| 736 |
+
{"step": 73600, "loss": 0.21589592099189758, "loss_nce": 0.21342454850673676, "loss_mse": 0.0024713738821446896, "lr": 0.0002648122164643018, "grad_norm": 0.1132458820939064, "wall_ms": 6616603}
|
| 737 |
+
{"step": 73700, "loss": 0.22950226068496704, "loss_nce": 0.2270384579896927, "loss_mse": 0.0024638071190565825, "lr": 0.0002642960388858954, "grad_norm": 0.11532966047525406, "wall_ms": 6625590}
|
| 738 |
+
{"step": 73800, "loss": 0.22591537237167358, "loss_nce": 0.22341418266296387, "loss_mse": 0.00250119436532259, "lr": 0.00026377981998133593, "grad_norm": 0.11976612359285355, "wall_ms": 6634564}
|
| 739 |
+
{"step": 73900, "loss": 0.27511167526245117, "loss_nce": 0.27261096239089966, "loss_mse": 0.0025007121730595827, "lr": 0.0002632635620455085, "grad_norm": 0.12091807276010513, "wall_ms": 6643530}
|
| 740 |
+
{"step": 74000, "loss": 0.22920864820480347, "loss_nce": 0.2267097681760788, "loss_mse": 0.0024988786317408085, "lr": 0.0002627472673734719, "grad_norm": 0.11966951936483383, "wall_ms": 6652493}
|
| 741 |
+
{"step": 74100, "loss": 0.2493577003479004, "loss_nce": 0.24694989621639252, "loss_mse": 0.0024078087881207466, "lr": 0.0002622309382604481, "grad_norm": 0.1255464255809784, "wall_ms": 6661465}
|
| 742 |
+
{"step": 74200, "loss": 0.2364577353000641, "loss_nce": 0.23398245871067047, "loss_mse": 0.0024752796161919832, "lr": 0.00026171457700181197, "grad_norm": 0.12357527762651443, "wall_ms": 6670428}
|
| 743 |
+
{"step": 74300, "loss": 0.20924997329711914, "loss_nce": 0.20678530633449554, "loss_mse": 0.002464665099978447, "lr": 0.00026119818589308177, "grad_norm": 0.11283524334430695, "wall_ms": 6679411}
|
| 744 |
+
{"step": 74400, "loss": 0.2542198598384857, "loss_nce": 0.25172290205955505, "loss_mse": 0.0024969526566565037, "lr": 0.0002606817672299079, "grad_norm": 0.1159786507487297, "wall_ms": 6688380}
|
| 745 |
+
{"step": 74500, "loss": 0.22415728867053986, "loss_nce": 0.22171923518180847, "loss_mse": 0.0024380465038120747, "lr": 0.0002601653233080637, "grad_norm": 0.11250044405460358, "wall_ms": 6697369}
|
| 746 |
+
{"step": 74600, "loss": 0.25380823016166687, "loss_nce": 0.25126761198043823, "loss_mse": 0.0025406298227608204, "lr": 0.0002596488564234347, "grad_norm": 0.12566128373146057, "wall_ms": 6706338}
|
| 747 |
+
{"step": 74700, "loss": 0.23715689778327942, "loss_nce": 0.23467928171157837, "loss_mse": 0.002477614674717188, "lr": 0.0002591323688720082, "grad_norm": 0.11521201580762863, "wall_ms": 6715298}
|
| 748 |
+
{"step": 74800, "loss": 0.20605921745300293, "loss_nce": 0.20360681414604187, "loss_mse": 0.002452404471114278, "lr": 0.0002586158629498638, "grad_norm": 0.10898151993751526, "wall_ms": 6724213}
|
| 749 |
+
{"step": 74900, "loss": 0.20380371809005737, "loss_nce": 0.20135334134101868, "loss_mse": 0.0024503821041435003, "lr": 0.0002580993409531626, "grad_norm": 0.10693718492984772, "wall_ms": 6733093}
|
| 750 |
+
{"step": 75000, "loss": 0.23894977569580078, "loss_nce": 0.2364240437746048, "loss_mse": 0.0025257256347686052, "lr": 0.000257582805178137, "grad_norm": 0.12051822990179062, "wall_ms": 6741995}
|
| 751 |
+
{"step": 75100, "loss": 0.2504984736442566, "loss_nce": 0.2480228990316391, "loss_mse": 0.002475581830367446, "lr": 0.0002570662579210809, "grad_norm": 0.11376997083425522, "wall_ms": 6750927}
|
| 752 |
+
{"step": 75200, "loss": 0.2472149133682251, "loss_nce": 0.2446754425764084, "loss_mse": 0.0025394747499376535, "lr": 0.0002565497014783393, "grad_norm": 0.12249352782964706, "wall_ms": 6759884}
|
| 753 |
+
{"step": 75300, "loss": 0.253159761428833, "loss_nce": 0.2506623864173889, "loss_mse": 0.002497386420145631, "lr": 0.0002560331381462976, "grad_norm": 0.1361510008573532, "wall_ms": 6768831}
|
| 754 |
+
{"step": 75400, "loss": 0.22497205436229706, "loss_nce": 0.22248294949531555, "loss_mse": 0.0024891074281185865, "lr": 0.00025551657022137225, "grad_norm": 0.10588448494672775, "wall_ms": 6777778}
|
| 755 |
+
{"step": 75500, "loss": 0.2466069608926773, "loss_nce": 0.24417223036289215, "loss_mse": 0.0024347356520593166, "lr": 0.000255, "grad_norm": 0.1122569739818573, "wall_ms": 6786711}
|
| 756 |
+
{"step": 75600, "loss": 0.2232009917497635, "loss_nce": 0.2206888049840927, "loss_mse": 0.002512182341888547, "lr": 0.00025448342977862773, "grad_norm": 0.11561908572912216, "wall_ms": 6795670}
|
| 757 |
+
{"step": 75700, "loss": 0.21881981194019318, "loss_nce": 0.21636401116847992, "loss_mse": 0.0024558028671890497, "lr": 0.00025396686185370245, "grad_norm": 0.11016306281089783, "wall_ms": 6804643}
|
| 758 |
+
{"step": 75800, "loss": 0.25791358947753906, "loss_nce": 0.255399227142334, "loss_mse": 0.0025143756065517664, "lr": 0.0002534502985216608, "grad_norm": 0.12253497540950775, "wall_ms": 6813620}
|
| 759 |
+
{"step": 75900, "loss": 0.24210518598556519, "loss_nce": 0.23958903551101685, "loss_mse": 0.0025161446537822485, "lr": 0.0002529337420789191, "grad_norm": 0.11098743975162506, "wall_ms": 6822617}
|
| 760 |
+
{"step": 76000, "loss": 0.22712737321853638, "loss_nce": 0.22465816140174866, "loss_mse": 0.0024692106526345015, "lr": 0.00025241719482186297, "grad_norm": 0.10752365738153458, "wall_ms": 6831579}
|
| 761 |
+
{"step": 76100, "loss": 0.28436440229415894, "loss_nce": 0.2818523645401001, "loss_mse": 0.0025120240170508623, "lr": 0.00025190065904683745, "grad_norm": 0.13781419396400452, "wall_ms": 6840537}
|
| 762 |
+
{"step": 76200, "loss": 0.2552701532840729, "loss_nce": 0.2527783513069153, "loss_mse": 0.002491812454536557, "lr": 0.0002513841370501362, "grad_norm": 0.1258939504623413, "wall_ms": 6849500}
|
| 763 |
+
{"step": 76300, "loss": 0.2313784509897232, "loss_nce": 0.2288752794265747, "loss_mse": 0.0025031643453985453, "lr": 0.0002508676311279918, "grad_norm": 0.117430679500103, "wall_ms": 6858457}
|
| 764 |
+
{"step": 76400, "loss": 0.23519349098205566, "loss_nce": 0.23269443213939667, "loss_mse": 0.0024990567471832037, "lr": 0.0002503511435765654, "grad_norm": 0.1267726868391037, "wall_ms": 6867418}
|
| 765 |
+
{"step": 76500, "loss": 0.26798003911972046, "loss_nce": 0.26544639468193054, "loss_mse": 0.002533659106120467, "lr": 0.00024983467669193637, "grad_norm": 0.12447496503591537, "wall_ms": 6876373}
|
| 766 |
+
{"step": 76600, "loss": 0.23766517639160156, "loss_nce": 0.23518779873847961, "loss_mse": 0.002477378584444523, "lr": 0.00024931823277009216, "grad_norm": 0.11025623977184296, "wall_ms": 6885295}
|
| 767 |
+
{"step": 76700, "loss": 0.25038787722587585, "loss_nce": 0.24787800014019012, "loss_mse": 0.0025098901242017746, "lr": 0.0002488018141069183, "grad_norm": 0.126640185713768, "wall_ms": 6894179}
|
| 768 |
+
{"step": 76800, "loss": 0.2774796485900879, "loss_nce": 0.27494820952415466, "loss_mse": 0.0025314476806670427, "lr": 0.000248285422998188, "grad_norm": 0.12395117431879044, "wall_ms": 6903061}
|
| 769 |
+
{"step": 76900, "loss": 0.2422029823064804, "loss_nce": 0.23971952497959137, "loss_mse": 0.0024834524374455214, "lr": 0.00024776906173955195, "grad_norm": 0.11429134756326675, "wall_ms": 6911964}
|
| 770 |
+
{"step": 77000, "loss": 0.26834312081336975, "loss_nce": 0.26585137844085693, "loss_mse": 0.0024917491246014833, "lr": 0.00024725273262652807, "grad_norm": 0.1365536004304886, "wall_ms": 6920913}
|
| 771 |
+
{"step": 77100, "loss": 0.24831974506378174, "loss_nce": 0.24581289291381836, "loss_mse": 0.002506852149963379, "lr": 0.00024673643795449147, "grad_norm": 0.1293649524450302, "wall_ms": 6929882}
|
| 772 |
+
{"step": 77200, "loss": 0.22018633782863617, "loss_nce": 0.21771138906478882, "loss_mse": 0.0024749552831053734, "lr": 0.00024622018001866416, "grad_norm": 0.11789287626743317, "wall_ms": 6938855}
|
| 773 |
+
{"step": 77300, "loss": 0.24016624689102173, "loss_nce": 0.23767206072807312, "loss_mse": 0.0024941919837146997, "lr": 0.0002457039611141047, "grad_norm": 0.1220453679561615, "wall_ms": 6947816}
|
| 774 |
+
{"step": 77400, "loss": 0.25620588660240173, "loss_nce": 0.2536787688732147, "loss_mse": 0.0025271165650337934, "lr": 0.00024518778353569825, "grad_norm": 0.1254071146249771, "wall_ms": 6956759}
|
| 775 |
+
{"step": 77500, "loss": 0.2195693999528885, "loss_nce": 0.21708041429519653, "loss_mse": 0.0024889791384339333, "lr": 0.0002446716495781462, "grad_norm": 0.11811619251966476, "wall_ms": 6965703}
|
| 776 |
+
{"step": 77600, "loss": 0.245944082736969, "loss_nce": 0.24348796904087067, "loss_mse": 0.00245611066929996, "lr": 0.0002441555615359561, "grad_norm": 0.12704159319400787, "wall_ms": 6974662}
|
| 777 |
+
{"step": 77700, "loss": 0.25460249185562134, "loss_nce": 0.2520943582057953, "loss_mse": 0.0025081313215196133, "lr": 0.00024363952170343135, "grad_norm": 0.13076092302799225, "wall_ms": 6983619}
|
| 778 |
+
{"step": 77800, "loss": 0.25252363085746765, "loss_nce": 0.2500026822090149, "loss_mse": 0.002520951209589839, "lr": 0.0002431235323746608, "grad_norm": 0.13024243712425232, "wall_ms": 6992591}
|
| 779 |
+
{"step": 77900, "loss": 0.23977185785770416, "loss_nce": 0.23725579679012299, "loss_mse": 0.0025160673540085554, "lr": 0.00024260759584350931, "grad_norm": 0.11494655162096024, "wall_ms": 7001572}
|
| 780 |
+
{"step": 78000, "loss": 0.23387114703655243, "loss_nce": 0.23136265575885773, "loss_mse": 0.0025084870867431164, "lr": 0.00024209171440360653, "grad_norm": 0.12935397028923035, "wall_ms": 7010539}
|
| 781 |
+
{"step": 78100, "loss": 0.27730417251586914, "loss_nce": 0.2748183310031891, "loss_mse": 0.0024858491960912943, "lr": 0.00024157589034833735, "grad_norm": 0.13524504005908966, "wall_ms": 7019494}
|
| 782 |
+
{"step": 78200, "loss": 0.2471490502357483, "loss_nce": 0.2446904331445694, "loss_mse": 0.002458623144775629, "lr": 0.00024106012597083164, "grad_norm": 0.12801054120063782, "wall_ms": 7028455}
|
| 783 |
+
{"step": 78300, "loss": 0.257608026266098, "loss_nce": 0.2550826072692871, "loss_mse": 0.002525421790778637, "lr": 0.00024054442356395386, "grad_norm": 0.12323608994483948, "wall_ms": 7037427}
|
| 784 |
+
{"step": 78400, "loss": 0.2607113718986511, "loss_nce": 0.258199006319046, "loss_mse": 0.00251235393807292, "lr": 0.00024002878542029305, "grad_norm": 0.12315646559000015, "wall_ms": 7046401}
|
| 785 |
+
{"step": 78500, "loss": 0.23459112644195557, "loss_nce": 0.23206886649131775, "loss_mse": 0.0025222657714039087, "lr": 0.00023951321383215246, "grad_norm": 0.12745016813278198, "wall_ms": 7055319}
|
| 786 |
+
{"step": 78600, "loss": 0.23772789537906647, "loss_nce": 0.23520570993423462, "loss_mse": 0.0025221880059689283, "lr": 0.00023899771109153964, "grad_norm": 0.11680945008993149, "wall_ms": 7064197}
|
| 787 |
+
{"step": 78700, "loss": 0.263224720954895, "loss_nce": 0.26068800687789917, "loss_mse": 0.002536701736971736, "lr": 0.00023848227949015594, "grad_norm": 0.1337016522884369, "wall_ms": 7073103}
|
| 788 |
+
{"step": 78800, "loss": 0.24369116127490997, "loss_nce": 0.2412409782409668, "loss_mse": 0.002450178610160947, "lr": 0.00023796692131938633, "grad_norm": 0.12699709832668304, "wall_ms": 7082117}
|
| 789 |
+
{"step": 78900, "loss": 0.2583531439304352, "loss_nce": 0.25584936141967773, "loss_mse": 0.0025037750601768494, "lr": 0.00023745163887028975, "grad_norm": 0.12736506760120392, "wall_ms": 7091136}
|
| 790 |
+
{"step": 79000, "loss": 0.23586413264274597, "loss_nce": 0.23340043425559998, "loss_mse": 0.002463702578097582, "lr": 0.00023693643443358815, "grad_norm": 0.11907332390546799, "wall_ms": 7100182}
|
| 791 |
+
{"step": 79100, "loss": 0.24625778198242188, "loss_nce": 0.24376419186592102, "loss_mse": 0.002493589883670211, "lr": 0.00023642131029965677, "grad_norm": 0.11392756551504135, "wall_ms": 7109154}
|
| 792 |
+
{"step": 79200, "loss": 0.24598944187164307, "loss_nce": 0.2435241937637329, "loss_mse": 0.002465242985635996, "lr": 0.00023590626875851384, "grad_norm": 0.12001735717058182, "wall_ms": 7118158}
|
| 793 |
+
{"step": 79300, "loss": 0.21298304200172424, "loss_nce": 0.21048961579799652, "loss_mse": 0.0024934227112680674, "lr": 0.00023539131209981048, "grad_norm": 0.11052750051021576, "wall_ms": 7127158}
|
| 794 |
+
{"step": 79400, "loss": 0.22809116542339325, "loss_nce": 0.2256009429693222, "loss_mse": 0.0024902252480387688, "lr": 0.00023487644261282046, "grad_norm": 0.11706709116697311, "wall_ms": 7136129}
|
| 795 |
+
{"step": 79500, "loss": 0.24317407608032227, "loss_nce": 0.24071826040744781, "loss_mse": 0.002455814043059945, "lr": 0.0002343616625864299, "grad_norm": 0.13800302147865295, "wall_ms": 7145081}
|
| 796 |
+
{"step": 79600, "loss": 0.24604621529579163, "loss_nce": 0.2435862123966217, "loss_mse": 0.0024600066244602203, "lr": 0.00023384697430912742, "grad_norm": 0.12179362028837204, "wall_ms": 7154080}
|
| 797 |
+
{"step": 79700, "loss": 0.20584708452224731, "loss_nce": 0.20339472591876984, "loss_mse": 0.002452359301969409, "lr": 0.00023333238006899344, "grad_norm": 0.10853485763072968, "wall_ms": 7163104}
|
| 798 |
+
{"step": 79800, "loss": 0.23509517312049866, "loss_nce": 0.23258383572101593, "loss_mse": 0.00251132994890213, "lr": 0.00023281788215369082, "grad_norm": 0.1305357962846756, "wall_ms": 7172119}
|
| 799 |
+
{"step": 79900, "loss": 0.25213736295700073, "loss_nce": 0.24962766468524933, "loss_mse": 0.0025096845347434282, "lr": 0.00023230348285045377, "grad_norm": 0.12180022150278091, "wall_ms": 7181118}
|
| 800 |
+
{"step": 80000, "loss": 0.21336138248443604, "loss_nce": 0.2109503448009491, "loss_mse": 0.002411032561212778, "lr": 0.00023178918444607827, "grad_norm": 0.11037831753492355, "wall_ms": 7190118}
|