Upload rank_300/train_log.jsonl with huggingface_hub
Browse files- rank_300/train_log.jsonl +100 -0
rank_300/train_log.jsonl
CHANGED
|
@@ -598,3 +598,103 @@
|
|
| 598 |
{"step": 59800, "loss": 0.25111469626426697, "loss_nce": 0.24863629043102264, "loss_mse": 0.0024784065317362547, "lr": 0.00033462850997437177, "grad_norm": 0.12471190094947815, "wall_ms": 5370979}
|
| 599 |
{"step": 59900, "loss": 0.28567489981651306, "loss_nce": 0.28315240144729614, "loss_mse": 0.002522500930353999, "lr": 0.0003341398078222182, "grad_norm": 0.1375349760055542, "wall_ms": 5379865}
|
| 600 |
{"step": 60000, "loss": 0.21717432141304016, "loss_nce": 0.214727982878685, "loss_mse": 0.0024463359732180834, "lr": 0.00033365075384883763, "grad_norm": 0.11287357658147812, "wall_ms": 5388763}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
{"step": 59800, "loss": 0.25111469626426697, "loss_nce": 0.24863629043102264, "loss_mse": 0.0024784065317362547, "lr": 0.00033462850997437177, "grad_norm": 0.12471190094947815, "wall_ms": 5370979}
|
| 599 |
{"step": 59900, "loss": 0.28567489981651306, "loss_nce": 0.28315240144729614, "loss_mse": 0.002522500930353999, "lr": 0.0003341398078222182, "grad_norm": 0.1375349760055542, "wall_ms": 5379865}
|
| 600 |
{"step": 60000, "loss": 0.21717432141304016, "loss_nce": 0.214727982878685, "loss_mse": 0.0024463359732180834, "lr": 0.00033365075384883763, "grad_norm": 0.11287357658147812, "wall_ms": 5388763}
|
| 601 |
+
{"step": 60100, "loss": 0.24558709561824799, "loss_nce": 0.24315685033798218, "loss_mse": 0.0024302464444190264, "lr": 0.0003331613502283515, "grad_norm": 0.13340556621551514, "wall_ms": 5405111}
|
| 602 |
+
{"step": 60200, "loss": 0.24853332340717316, "loss_nce": 0.24602539837360382, "loss_mse": 0.0025079238694161177, "lr": 0.0003326715991364361, "grad_norm": 0.12424098700284958, "wall_ms": 5414000}
|
| 603 |
+
{"step": 60300, "loss": 0.25565576553344727, "loss_nce": 0.25313106179237366, "loss_mse": 0.002524702111259103, "lr": 0.000332181502750312, "grad_norm": 0.13468694686889648, "wall_ms": 5422854}
|
| 604 |
+
{"step": 60400, "loss": 0.2775002419948578, "loss_nce": 0.27492639422416687, "loss_mse": 0.0025738393887877464, "lr": 0.000331691063248735, "grad_norm": 0.13495589792728424, "wall_ms": 5431710}
|
| 605 |
+
{"step": 60500, "loss": 0.2328193187713623, "loss_nce": 0.23035050928592682, "loss_mse": 0.0024688085541129112, "lr": 0.0003312002828119861, "grad_norm": 0.11197277903556824, "wall_ms": 5440570}
|
| 606 |
+
{"step": 60600, "loss": 0.24667827785015106, "loss_nce": 0.24419613182544708, "loss_mse": 0.0024821460247039795, "lr": 0.00033070916362186215, "grad_norm": 0.11425039172172546, "wall_ms": 5449433}
|
| 607 |
+
{"step": 60700, "loss": 0.21365295350551605, "loss_nce": 0.21119321882724762, "loss_mse": 0.0024597414303570986, "lr": 0.0003302177078616658, "grad_norm": 0.11956824362277985, "wall_ms": 5458300}
|
| 608 |
+
{"step": 60800, "loss": 0.26369237899780273, "loss_nce": 0.26121678948402405, "loss_mse": 0.0024756011553108692, "lr": 0.00032972591771619606, "grad_norm": 0.12970054149627686, "wall_ms": 5467178}
|
| 609 |
+
{"step": 60900, "loss": 0.22695167362689972, "loss_nce": 0.22450658679008484, "loss_mse": 0.0024450900964438915, "lr": 0.0003292337953717385, "grad_norm": 0.11264882981777191, "wall_ms": 5476054}
|
| 610 |
+
{"step": 61000, "loss": 0.23720823228359222, "loss_nce": 0.2347196489572525, "loss_mse": 0.002488587750121951, "lr": 0.00032874134301605514, "grad_norm": 0.11494530737400055, "wall_ms": 5484917}
|
| 611 |
+
{"step": 61100, "loss": 0.2534351348876953, "loss_nce": 0.25093376636505127, "loss_mse": 0.0025013608392328024, "lr": 0.00032824856283837547, "grad_norm": 0.13041885197162628, "wall_ms": 5493788}
|
| 612 |
+
{"step": 61200, "loss": 0.25926345586776733, "loss_nce": 0.25674378871917725, "loss_mse": 0.0025196813512593508, "lr": 0.0003277554570293863, "grad_norm": 0.12252362817525864, "wall_ms": 5502664}
|
| 613 |
+
{"step": 61300, "loss": 0.2806133031845093, "loss_nce": 0.27811098098754883, "loss_mse": 0.002502333838492632, "lr": 0.0003272620277812219, "grad_norm": 0.1392044723033905, "wall_ms": 5511543}
|
| 614 |
+
{"step": 61400, "loss": 0.2349795550107956, "loss_nce": 0.23253944516181946, "loss_mse": 0.0024401152040809393, "lr": 0.0003267682772874546, "grad_norm": 0.11423011124134064, "wall_ms": 5520415}
|
| 615 |
+
{"step": 61500, "loss": 0.2824997305870056, "loss_nce": 0.2799960970878601, "loss_mse": 0.0025036209262907505, "lr": 0.00032627420774308455, "grad_norm": 0.12121531367301941, "wall_ms": 5529280}
|
| 616 |
+
{"step": 61600, "loss": 0.27644792199134827, "loss_nce": 0.27397507429122925, "loss_mse": 0.002472846070304513, "lr": 0.0003257798213445304, "grad_norm": 0.1186135932803154, "wall_ms": 5538150}
|
| 617 |
+
{"step": 61700, "loss": 0.2934373617172241, "loss_nce": 0.2909088134765625, "loss_mse": 0.002528550336137414, "lr": 0.00032528512028961976, "grad_norm": 0.1466088891029358, "wall_ms": 5547033}
|
| 618 |
+
{"step": 61800, "loss": 0.2260964959859848, "loss_nce": 0.22363334894180298, "loss_mse": 0.002463146112859249, "lr": 0.00032479010677757857, "grad_norm": 0.1135377436876297, "wall_ms": 5555941}
|
| 619 |
+
{"step": 61900, "loss": 0.2866390347480774, "loss_nce": 0.28409069776535034, "loss_mse": 0.002548347460106015, "lr": 0.0003242947830090219, "grad_norm": 0.13865824043750763, "wall_ms": 5564854}
|
| 620 |
+
{"step": 62000, "loss": 0.2818508744239807, "loss_nce": 0.2793028950691223, "loss_mse": 0.0025479760952293873, "lr": 0.0003237991511859443, "grad_norm": 0.1291116327047348, "wall_ms": 5573759}
|
| 621 |
+
{"step": 62100, "loss": 0.2604336738586426, "loss_nce": 0.2579452395439148, "loss_mse": 0.002488440601155162, "lr": 0.00032330321351170966, "grad_norm": 0.12349564582109451, "wall_ms": 5582663}
|
| 622 |
+
{"step": 62200, "loss": 0.24064363539218903, "loss_nce": 0.23822112381458282, "loss_mse": 0.0024225059896707535, "lr": 0.00032280697219104156, "grad_norm": 0.12135300785303116, "wall_ms": 5591538}
|
| 623 |
+
{"step": 62300, "loss": 0.267173707485199, "loss_nce": 0.26469671726226807, "loss_mse": 0.0024769811425358057, "lr": 0.0003223104294300135, "grad_norm": 0.13605599105358124, "wall_ms": 5600414}
|
| 624 |
+
{"step": 62400, "loss": 0.2520592212677002, "loss_nce": 0.2495483160018921, "loss_mse": 0.002510908991098404, "lr": 0.0003218135874360388, "grad_norm": 0.12272293120622635, "wall_ms": 5609293}
|
| 625 |
+
{"step": 62500, "loss": 0.22229529917240143, "loss_nce": 0.21985696256160736, "loss_mse": 0.0024383363779634237, "lr": 0.0003213164484178616, "grad_norm": 0.1190819963812828, "wall_ms": 5618197}
|
| 626 |
+
{"step": 62600, "loss": 0.25644880533218384, "loss_nce": 0.2540351152420044, "loss_mse": 0.0024136833380907774, "lr": 0.0003208190145855459, "grad_norm": 0.11328353732824326, "wall_ms": 5627097}
|
| 627 |
+
{"step": 62700, "loss": 0.21647414565086365, "loss_nce": 0.21396887302398682, "loss_mse": 0.002505265874788165, "lr": 0.00032032128815046674, "grad_norm": 0.11136633157730103, "wall_ms": 5636019}
|
| 628 |
+
{"step": 62800, "loss": 0.28573906421661377, "loss_nce": 0.2832369804382324, "loss_mse": 0.002502071438357234, "lr": 0.0003198232713252996, "grad_norm": 0.14905929565429688, "wall_ms": 5644922}
|
| 629 |
+
{"step": 62900, "loss": 0.20728395879268646, "loss_nce": 0.20482809841632843, "loss_mse": 0.002455861307680607, "lr": 0.0003193249663240111, "grad_norm": 0.11449076980352402, "wall_ms": 5653843}
|
| 630 |
+
{"step": 63000, "loss": 0.24015513062477112, "loss_nce": 0.23768864572048187, "loss_mse": 0.002466486766934395, "lr": 0.00031882637536184905, "grad_norm": 0.11356458067893982, "wall_ms": 5662726}
|
| 631 |
+
{"step": 63100, "loss": 0.24951598048210144, "loss_nce": 0.24710379540920258, "loss_mse": 0.002412187634035945, "lr": 0.00031832750065533227, "grad_norm": 0.12143789976835251, "wall_ms": 5671612}
|
| 632 |
+
{"step": 63200, "loss": 0.25542256236076355, "loss_nce": 0.25295379757881165, "loss_mse": 0.0024687747936695814, "lr": 0.00031782834442224125, "grad_norm": 0.13258381187915802, "wall_ms": 5680499}
|
| 633 |
+
{"step": 63300, "loss": 0.25527143478393555, "loss_nce": 0.252811461687088, "loss_mse": 0.0024599707685410976, "lr": 0.00031732890888160775, "grad_norm": 0.12830433249473572, "wall_ms": 5689397}
|
| 634 |
+
{"step": 63400, "loss": 0.24921754002571106, "loss_nce": 0.24667730927467346, "loss_mse": 0.0025402368046343327, "lr": 0.0003168291962537054, "grad_norm": 0.11994313448667526, "wall_ms": 5698303}
|
| 635 |
+
{"step": 63500, "loss": 0.25260528922080994, "loss_nce": 0.25013595819473267, "loss_mse": 0.002469327999278903, "lr": 0.00031632920876003976, "grad_norm": 0.13265010714530945, "wall_ms": 5707211}
|
| 636 |
+
{"step": 63600, "loss": 0.2814013659954071, "loss_nce": 0.2789105176925659, "loss_mse": 0.002490854822099209, "lr": 0.0003158289486233379, "grad_norm": 0.13605259358882904, "wall_ms": 5716121}
|
| 637 |
+
{"step": 63700, "loss": 0.22283461689949036, "loss_nce": 0.22035855054855347, "loss_mse": 0.0024760703090578318, "lr": 0.0003153284180675394, "grad_norm": 0.11121460795402527, "wall_ms": 5725020}
|
| 638 |
+
{"step": 63800, "loss": 0.2352452278137207, "loss_nce": 0.23274491727352142, "loss_mse": 0.0025003121700137854, "lr": 0.00031482761931778564, "grad_norm": 0.11327195167541504, "wall_ms": 5733915}
|
| 639 |
+
{"step": 63900, "loss": 0.2559795677661896, "loss_nce": 0.2535235583782196, "loss_mse": 0.0024560177698731422, "lr": 0.00031432655460041054, "grad_norm": 0.1279122531414032, "wall_ms": 5742800}
|
| 640 |
+
{"step": 64000, "loss": 0.2379511296749115, "loss_nce": 0.2354150414466858, "loss_mse": 0.0025360812433063984, "lr": 0.0003138252261429303, "grad_norm": 0.1196322813630104, "wall_ms": 5751689}
|
| 641 |
+
{"step": 64100, "loss": 0.22108425199985504, "loss_nce": 0.21864917874336243, "loss_mse": 0.002435071161016822, "lr": 0.00031332363617403355, "grad_norm": 0.11296342313289642, "wall_ms": 5760584}
|
| 642 |
+
{"step": 64200, "loss": 0.26085615158081055, "loss_nce": 0.2583538889884949, "loss_mse": 0.0025022588670253754, "lr": 0.00031282178692357157, "grad_norm": 0.12131813913583755, "wall_ms": 5769470}
|
| 643 |
+
{"step": 64300, "loss": 0.25727030634880066, "loss_nce": 0.2547365128993988, "loss_mse": 0.0025338006671518087, "lr": 0.00031231968062254815, "grad_norm": 0.12981942296028137, "wall_ms": 5778375}
|
| 644 |
+
{"step": 64400, "loss": 0.2520886957645416, "loss_nce": 0.24961484968662262, "loss_mse": 0.0024738565552979708, "lr": 0.00031181731950311004, "grad_norm": 0.1231703907251358, "wall_ms": 5787266}
|
| 645 |
+
{"step": 64500, "loss": 0.26420989632606506, "loss_nce": 0.2617073059082031, "loss_mse": 0.002502598101273179, "lr": 0.00031131470579853673, "grad_norm": 0.12564454972743988, "wall_ms": 5796145}
|
| 646 |
+
{"step": 64600, "loss": 0.2764851152896881, "loss_nce": 0.2740030884742737, "loss_mse": 0.002482026582583785, "lr": 0.00031081184174323027, "grad_norm": 0.14714263379573822, "wall_ms": 5805043}
|
| 647 |
+
{"step": 64700, "loss": 0.2440696507692337, "loss_nce": 0.24159272015094757, "loss_mse": 0.002476937137544155, "lr": 0.0003103087295727062, "grad_norm": 0.12501965463161469, "wall_ms": 5813940}
|
| 648 |
+
{"step": 64800, "loss": 0.2409229874610901, "loss_nce": 0.2384396195411682, "loss_mse": 0.00248337397351861, "lr": 0.0003098053715235827, "grad_norm": 0.11749096214771271, "wall_ms": 5822826}
|
| 649 |
+
{"step": 64900, "loss": 0.2549411356449127, "loss_nce": 0.25245893001556396, "loss_mse": 0.0024822161067277193, "lr": 0.0003093017698335712, "grad_norm": 0.13044188916683197, "wall_ms": 5831710}
|
| 650 |
+
{"step": 65000, "loss": 0.25748229026794434, "loss_nce": 0.2549978196620941, "loss_mse": 0.00248446105979383, "lr": 0.0003087979267414659, "grad_norm": 0.1294815093278885, "wall_ms": 5840600}
|
| 651 |
+
{"step": 65100, "loss": 0.2500176727771759, "loss_nce": 0.24749961495399475, "loss_mse": 0.0025180538650602102, "lr": 0.0003082938444871348, "grad_norm": 0.13060958683490753, "wall_ms": 5849486}
|
| 652 |
+
{"step": 65200, "loss": 0.2654229700565338, "loss_nce": 0.26294100284576416, "loss_mse": 0.0024819723330438137, "lr": 0.00030778952531150864, "grad_norm": 0.12440643459558487, "wall_ms": 5858374}
|
| 653 |
+
{"step": 65300, "loss": 0.26465871930122375, "loss_nce": 0.26214802265167236, "loss_mse": 0.0025106894318014383, "lr": 0.00030728497145657134, "grad_norm": 0.12611287832260132, "wall_ms": 5867279}
|
| 654 |
+
{"step": 65400, "loss": 0.26317209005355835, "loss_nce": 0.2606284022331238, "loss_mse": 0.0025436892174184322, "lr": 0.00030678018516535054, "grad_norm": 0.13589614629745483, "wall_ms": 5876168}
|
| 655 |
+
{"step": 65500, "loss": 0.2672985792160034, "loss_nce": 0.2648458182811737, "loss_mse": 0.0024527728091925383, "lr": 0.0003062751686819069, "grad_norm": 0.1296466886997223, "wall_ms": 5885051}
|
| 656 |
+
{"step": 65600, "loss": 0.2487020194530487, "loss_nce": 0.24621006846427917, "loss_mse": 0.0024919547140598297, "lr": 0.0003057699242513243, "grad_norm": 0.12698106467723846, "wall_ms": 5893935}
|
| 657 |
+
{"step": 65700, "loss": 0.2798651158809662, "loss_nce": 0.27735933661460876, "loss_mse": 0.002505788579583168, "lr": 0.00030526445411970015, "grad_norm": 0.13182978332042694, "wall_ms": 5902813}
|
| 658 |
+
{"step": 65800, "loss": 0.2583588659763336, "loss_nce": 0.25589388608932495, "loss_mse": 0.002464965684339404, "lr": 0.0003047587605341353, "grad_norm": 0.12614752352237701, "wall_ms": 5911704}
|
| 659 |
+
{"step": 65900, "loss": 0.22026023268699646, "loss_nce": 0.21781547367572784, "loss_mse": 0.002444752724841237, "lr": 0.00030425284574272393, "grad_norm": 0.11034902185201645, "wall_ms": 5920587}
|
| 660 |
+
{"step": 66000, "loss": 0.2651234567165375, "loss_nce": 0.2626189887523651, "loss_mse": 0.002504474250599742, "lr": 0.00030374671199454343, "grad_norm": 0.12980782985687256, "wall_ms": 5929472}
|
| 661 |
+
{"step": 66100, "loss": 0.2573617994785309, "loss_nce": 0.2548367977142334, "loss_mse": 0.0025250082835555077, "lr": 0.00030324036153964477, "grad_norm": 0.12487063556909561, "wall_ms": 5938358}
|
| 662 |
+
{"step": 66200, "loss": 0.26738256216049194, "loss_nce": 0.26488032937049866, "loss_mse": 0.002502226736396551, "lr": 0.00030273379662904226, "grad_norm": 0.13736681640148163, "wall_ms": 5947244}
|
| 663 |
+
{"step": 66300, "loss": 0.25670620799064636, "loss_nce": 0.2542005479335785, "loss_mse": 0.0025056598242372274, "lr": 0.00030222701951470357, "grad_norm": 0.11780697852373123, "wall_ms": 5956141}
|
| 664 |
+
{"step": 66400, "loss": 0.2609589695930481, "loss_nce": 0.25846755504608154, "loss_mse": 0.002491412917152047, "lr": 0.00030172003244953964, "grad_norm": 0.12420804798603058, "wall_ms": 5965020}
|
| 665 |
+
{"step": 66500, "loss": 0.22394579648971558, "loss_nce": 0.2214791625738144, "loss_mse": 0.002466632751747966, "lr": 0.000301212837687395, "grad_norm": 0.1172209084033966, "wall_ms": 5973912}
|
| 666 |
+
{"step": 66600, "loss": 0.24293598532676697, "loss_nce": 0.24044524133205414, "loss_mse": 0.0024907393380999565, "lr": 0.0003007054374830371, "grad_norm": 0.12436547130346298, "wall_ms": 5982807}
|
| 667 |
+
{"step": 66700, "loss": 0.2621209919452667, "loss_nce": 0.25960609316825867, "loss_mse": 0.002514899242669344, "lr": 0.0003001978340921472, "grad_norm": 0.13283632695674896, "wall_ms": 5991711}
|
| 668 |
+
{"step": 66800, "loss": 0.2880057692527771, "loss_nce": 0.2854645550251007, "loss_mse": 0.002541210735216737, "lr": 0.0002996900297713097, "grad_norm": 0.1499718576669693, "wall_ms": 6000629}
|
| 669 |
+
{"step": 66900, "loss": 0.21879956126213074, "loss_nce": 0.21633557975292206, "loss_mse": 0.0024639740586280823, "lr": 0.0002991820267780019, "grad_norm": 0.11328241229057312, "wall_ms": 6009540}
|
| 670 |
+
{"step": 67000, "loss": 0.2431573122739792, "loss_nce": 0.2406700849533081, "loss_mse": 0.002487221732735634, "lr": 0.0002986738273705846, "grad_norm": 0.12817995250225067, "wall_ms": 6018446}
|
| 671 |
+
{"step": 67100, "loss": 0.24143823981285095, "loss_nce": 0.2389465868473053, "loss_mse": 0.0024916590191423893, "lr": 0.0002981654338082918, "grad_norm": 0.12257543951272964, "wall_ms": 6027359}
|
| 672 |
+
{"step": 67200, "loss": 0.25364193320274353, "loss_nce": 0.2510957419872284, "loss_mse": 0.002546182833611965, "lr": 0.0002976568483512206, "grad_norm": 0.12341001629829407, "wall_ms": 6036276}
|
| 673 |
+
{"step": 67300, "loss": 0.25427213311195374, "loss_nce": 0.25176650285720825, "loss_mse": 0.0025056377053260803, "lr": 0.000297148073260321, "grad_norm": 0.12941177189350128, "wall_ms": 6045193}
|
| 674 |
+
{"step": 67400, "loss": 0.2569858431816101, "loss_nce": 0.2544700503349304, "loss_mse": 0.002515797270461917, "lr": 0.00029663911079738626, "grad_norm": 0.1391107738018036, "wall_ms": 6054110}
|
| 675 |
+
{"step": 67500, "loss": 0.2347715049982071, "loss_nce": 0.2323303371667862, "loss_mse": 0.002441170159727335, "lr": 0.00029612996322504254, "grad_norm": 0.11334304511547089, "wall_ms": 6063016}
|
| 676 |
+
{"step": 67600, "loss": 0.2800554037094116, "loss_nce": 0.2775402069091797, "loss_mse": 0.0025152056477963924, "lr": 0.00029562063280673883, "grad_norm": 0.14359918236732483, "wall_ms": 6071952}
|
| 677 |
+
{"step": 67700, "loss": 0.2522546350955963, "loss_nce": 0.24971917271614075, "loss_mse": 0.002535460749641061, "lr": 0.00029511112180673714, "grad_norm": 0.12927845120429993, "wall_ms": 6080905}
|
| 678 |
+
{"step": 67800, "loss": 0.2310393750667572, "loss_nce": 0.2285470962524414, "loss_mse": 0.002492284867912531, "lr": 0.00029460143249010213, "grad_norm": 0.12258365750312805, "wall_ms": 6089850}
|
| 679 |
+
{"step": 67900, "loss": 0.30447518825531006, "loss_nce": 0.3019634783267975, "loss_mse": 0.002511711558327079, "lr": 0.0002940915671226912, "grad_norm": 0.13406120240688324, "wall_ms": 6098791}
|
| 680 |
+
{"step": 68000, "loss": 0.24047425389289856, "loss_nce": 0.23795907199382782, "loss_mse": 0.0025151870213449, "lr": 0.0002935815279711444, "grad_norm": 0.13264892995357513, "wall_ms": 6107727}
|
| 681 |
+
{"step": 68100, "loss": 0.24636231362819672, "loss_nce": 0.2438991516828537, "loss_mse": 0.0024631600826978683, "lr": 0.0002930713173028744, "grad_norm": 0.13344606757164001, "wall_ms": 6116668}
|
| 682 |
+
{"step": 68200, "loss": 0.2179877609014511, "loss_nce": 0.21551644802093506, "loss_mse": 0.002471317071467638, "lr": 0.00029256093738605634, "grad_norm": 0.11878366023302078, "wall_ms": 6125601}
|
| 683 |
+
{"step": 68300, "loss": 0.2574642598628998, "loss_nce": 0.2549676299095154, "loss_mse": 0.002496624831110239, "lr": 0.0002920503904896176, "grad_norm": 0.13698269426822662, "wall_ms": 6134527}
|
| 684 |
+
{"step": 68400, "loss": 0.2744201421737671, "loss_nce": 0.271944522857666, "loss_mse": 0.0024756104685366154, "lr": 0.0002915396788832282, "grad_norm": 0.129328653216362, "wall_ms": 6143470}
|
| 685 |
+
{"step": 68500, "loss": 0.24047371745109558, "loss_nce": 0.23796799778938293, "loss_mse": 0.002505721990019083, "lr": 0.00029102880483729027, "grad_norm": 0.125127375125885, "wall_ms": 6152401}
|
| 686 |
+
{"step": 68600, "loss": 0.25251832604408264, "loss_nce": 0.2500169277191162, "loss_mse": 0.0025013897102326155, "lr": 0.0002905177706229279, "grad_norm": 0.13457433879375458, "wall_ms": 6161337}
|
| 687 |
+
{"step": 68700, "loss": 0.277326762676239, "loss_nce": 0.274818480014801, "loss_mse": 0.002508295001462102, "lr": 0.00029000657851197735, "grad_norm": 0.12902796268463135, "wall_ms": 6170285}
|
| 688 |
+
{"step": 68800, "loss": 0.25648385286331177, "loss_nce": 0.25397419929504395, "loss_mse": 0.0025096505414694548, "lr": 0.00028949523077697676, "grad_norm": 0.11936090886592865, "wall_ms": 6179224}
|
| 689 |
+
{"step": 68900, "loss": 0.25161072611808777, "loss_nce": 0.24917030334472656, "loss_mse": 0.0024404339492321014, "lr": 0.0002889837296911563, "grad_norm": 0.1336245983839035, "wall_ms": 6188152}
|
| 690 |
+
{"step": 69000, "loss": 0.25301826000213623, "loss_nce": 0.25051233172416687, "loss_mse": 0.0025059303734451532, "lr": 0.0002884720775284276, "grad_norm": 0.11409962922334671, "wall_ms": 6197045}
|
| 691 |
+
{"step": 69100, "loss": 0.25348252058029175, "loss_nce": 0.25104403495788574, "loss_mse": 0.002438488882035017, "lr": 0.00028796027656337417, "grad_norm": 0.12050490826368332, "wall_ms": 6205940}
|
| 692 |
+
{"step": 69200, "loss": 0.2547222375869751, "loss_nce": 0.25229477882385254, "loss_mse": 0.002427453175187111, "lr": 0.0002874483290712406, "grad_norm": 0.1284312754869461, "wall_ms": 6214850}
|
| 693 |
+
{"step": 69300, "loss": 0.27366703748703003, "loss_nce": 0.2711630165576935, "loss_mse": 0.0025040097534656525, "lr": 0.00028693623732792354, "grad_norm": 0.1445707380771637, "wall_ms": 6223776}
|
| 694 |
+
{"step": 69400, "loss": 0.2788941264152527, "loss_nce": 0.27639442682266235, "loss_mse": 0.0024997100699692965, "lr": 0.0002864240036099605, "grad_norm": 0.13751421868801117, "wall_ms": 6232676}
|
| 695 |
+
{"step": 69500, "loss": 0.2753647267818451, "loss_nce": 0.2728379964828491, "loss_mse": 0.0025267296005040407, "lr": 0.0002859116301945201, "grad_norm": 0.1335645467042923, "wall_ms": 6241583}
|
| 696 |
+
{"step": 69600, "loss": 0.2683931887149811, "loss_nce": 0.2658712565898895, "loss_mse": 0.0025219249073415995, "lr": 0.0002853991193593921, "grad_norm": 0.1339402198791504, "wall_ms": 6250508}
|
| 697 |
+
{"step": 69700, "loss": 0.22771453857421875, "loss_nce": 0.22523608803749084, "loss_mse": 0.0024784556590020657, "lr": 0.0002848864733829772, "grad_norm": 0.11189991235733032, "wall_ms": 6259459}
|
| 698 |
+
{"step": 69800, "loss": 0.23580867052078247, "loss_nce": 0.2332986742258072, "loss_mse": 0.002509994897991419, "lr": 0.0002843736945442768, "grad_norm": 0.11558451503515244, "wall_ms": 6268403}
|
| 699 |
+
{"step": 69900, "loss": 0.26374825835227966, "loss_nce": 0.26125049591064453, "loss_mse": 0.002497777109965682, "lr": 0.00028386078512288303, "grad_norm": 0.1257464587688446, "wall_ms": 6277352}
|
| 700 |
+
{"step": 70000, "loss": 0.24297796189785004, "loss_nce": 0.24049930274486542, "loss_mse": 0.002478661946952343, "lr": 0.00028334774739896854, "grad_norm": 0.12397409975528717, "wall_ms": 6286316}
|