Upload rank_300/train_log.jsonl with huggingface_hub
Browse files- rank_300/train_log.jsonl +100 -0
rank_300/train_log.jsonl
CHANGED
|
@@ -398,3 +398,103 @@
|
|
| 398 |
{"step": 39800, "loss": 0.0062874965369701385, "lr": 0.00042248796854912776, "grad_norm": 0.05316545441746712, "wall_ms": 797533}
|
| 399 |
{"step": 39900, "loss": 0.0188736692070961, "lr": 0.0004221105857258146, "grad_norm": 0.18801380693912506, "wall_ms": 799446}
|
| 400 |
{"step": 40000, "loss": 0.002954079071059823, "lr": 0.00042173246000138824, "grad_norm": 0.028334325179457664, "wall_ms": 801360}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
{"step": 39800, "loss": 0.0062874965369701385, "lr": 0.00042248796854912776, "grad_norm": 0.05316545441746712, "wall_ms": 797533}
|
| 399 |
{"step": 39900, "loss": 0.0188736692070961, "lr": 0.0004221105857258146, "grad_norm": 0.18801380693912506, "wall_ms": 799446}
|
| 400 |
{"step": 40000, "loss": 0.002954079071059823, "lr": 0.00042173246000138824, "grad_norm": 0.028334325179457664, "wall_ms": 801360}
|
| 401 |
+
{"step": 40100, "loss": 0.010397459380328655, "lr": 0.0004213535930568317, "grad_norm": 0.13505253195762634, "wall_ms": 815719}
|
| 402 |
+
{"step": 40200, "loss": 0.00856927689164877, "lr": 0.0004209739865764226, "grad_norm": 0.12606368958950043, "wall_ms": 817632}
|
| 403 |
+
{"step": 40300, "loss": 0.012866489589214325, "lr": 0.0004205936422477267, "grad_norm": 0.13808761537075043, "wall_ms": 819546}
|
| 404 |
+
{"step": 40400, "loss": 0.007574545219540596, "lr": 0.00042021256176158977, "grad_norm": 0.1353817582130432, "wall_ms": 821460}
|
| 405 |
+
{"step": 40500, "loss": 0.01743222586810589, "lr": 0.0004198307468121303, "grad_norm": 0.15875573456287384, "wall_ms": 823374}
|
| 406 |
+
{"step": 40600, "loss": 0.06191498786211014, "lr": 0.0004194481990967316, "grad_norm": 0.22612588107585907, "wall_ms": 825286}
|
| 407 |
+
{"step": 40700, "loss": 0.016912980005145073, "lr": 0.0004190649203160348, "grad_norm": 0.1314598023891449, "wall_ms": 827199}
|
| 408 |
+
{"step": 40800, "loss": 0.005686279386281967, "lr": 0.00041868091217393095, "grad_norm": 0.05669712275266647, "wall_ms": 829112}
|
| 409 |
+
{"step": 40900, "loss": 0.004974420182406902, "lr": 0.00041829617637755364, "grad_norm": 0.061953701078891754, "wall_ms": 831027}
|
| 410 |
+
{"step": 41000, "loss": 0.007359669543802738, "lr": 0.0004179107146372711, "grad_norm": 0.08005005866289139, "wall_ms": 832941}
|
| 411 |
+
{"step": 41100, "loss": 0.014179435558617115, "lr": 0.0004175245286666788, "grad_norm": 0.210542231798172, "wall_ms": 834857}
|
| 412 |
+
{"step": 41200, "loss": 0.01584380678832531, "lr": 0.00041713762018259206, "grad_norm": 0.13738597929477692, "wall_ms": 836772}
|
| 413 |
+
{"step": 41300, "loss": 0.008527670986950397, "lr": 0.0004167499909050379, "grad_norm": 0.07592284679412842, "wall_ms": 838689}
|
| 414 |
+
{"step": 41400, "loss": 0.007427709177136421, "lr": 0.0004163616425572479, "grad_norm": 0.11130291223526001, "wall_ms": 840605}
|
| 415 |
+
{"step": 41500, "loss": 0.004690546076744795, "lr": 0.0004159725768656501, "grad_norm": 0.044877201318740845, "wall_ms": 842523}
|
| 416 |
+
{"step": 41600, "loss": 0.008775198832154274, "lr": 0.00041558279555986176, "grad_norm": 0.1672581434249878, "wall_ms": 844440}
|
| 417 |
+
{"step": 41700, "loss": 0.007823841646313667, "lr": 0.0004151923003726813, "grad_norm": 0.08183766156435013, "wall_ms": 846357}
|
| 418 |
+
{"step": 41800, "loss": 0.01984889805316925, "lr": 0.00041480109304008075, "grad_norm": 0.23827452957630157, "wall_ms": 848276}
|
| 419 |
+
{"step": 41900, "loss": 0.01575811207294464, "lr": 0.00041440917530119817, "grad_norm": 0.10787743330001831, "wall_ms": 850194}
|
| 420 |
+
{"step": 42000, "loss": 0.008131194859743118, "lr": 0.0004140165488983296, "grad_norm": 0.10251801460981369, "wall_ms": 852113}
|
| 421 |
+
{"step": 42100, "loss": 0.009560279548168182, "lr": 0.00041362321557692145, "grad_norm": 0.12116432934999466, "wall_ms": 854032}
|
| 422 |
+
{"step": 42200, "loss": 0.014396963641047478, "lr": 0.0004132291770855631, "grad_norm": 0.10045375674962997, "wall_ms": 855951}
|
| 423 |
+
{"step": 42300, "loss": 0.008995674550533295, "lr": 0.00041283443517597864, "grad_norm": 0.09740407019853592, "wall_ms": 857870}
|
| 424 |
+
{"step": 42400, "loss": 0.0053513916209340096, "lr": 0.00041243899160301893, "grad_norm": 0.0671568512916565, "wall_ms": 859789}
|
| 425 |
+
{"step": 42500, "loss": 0.013690905645489693, "lr": 0.0004120428481246548, "grad_norm": 0.12558838725090027, "wall_ms": 861706}
|
| 426 |
+
{"step": 42600, "loss": 0.0087239108979702, "lr": 0.00041164600650196796, "grad_norm": 0.11401980370283127, "wall_ms": 863623}
|
| 427 |
+
{"step": 42700, "loss": 0.01819322630763054, "lr": 0.0004112484684991439, "grad_norm": 0.14392758905887604, "wall_ms": 865540}
|
| 428 |
+
{"step": 42800, "loss": 0.02430196851491928, "lr": 0.00041085023588346417, "grad_norm": 0.1930292695760727, "wall_ms": 867456}
|
| 429 |
+
{"step": 42900, "loss": 0.010126589797437191, "lr": 0.00041045131042529787, "grad_norm": 0.10495217144489288, "wall_ms": 869373}
|
| 430 |
+
{"step": 43000, "loss": 0.008282248862087727, "lr": 0.00041005169389809467, "grad_norm": 0.07858297228813171, "wall_ms": 871290}
|
| 431 |
+
{"step": 43100, "loss": 0.008661068975925446, "lr": 0.0004096513880783759, "grad_norm": 0.17419970035552979, "wall_ms": 873207}
|
| 432 |
+
{"step": 43200, "loss": 0.014065489172935486, "lr": 0.00040925039474572764, "grad_norm": 0.10001479089260101, "wall_ms": 875124}
|
| 433 |
+
{"step": 43300, "loss": 0.005010524298995733, "lr": 0.00040884871568279196, "grad_norm": 0.05056199058890343, "wall_ms": 877041}
|
| 434 |
+
{"step": 43400, "loss": 0.013975264504551888, "lr": 0.0004084463526752598, "grad_norm": 0.11907429993152618, "wall_ms": 878959}
|
| 435 |
+
{"step": 43500, "loss": 0.016739320009946823, "lr": 0.00040804330751186224, "grad_norm": 0.1513577103614807, "wall_ms": 880876}
|
| 436 |
+
{"step": 43600, "loss": 0.013648326508700848, "lr": 0.00040763958198436316, "grad_norm": 0.1299302875995636, "wall_ms": 882794}
|
| 437 |
+
{"step": 43700, "loss": 0.025616688653826714, "lr": 0.00040723517788755103, "grad_norm": 0.17827709019184113, "wall_ms": 884712}
|
| 438 |
+
{"step": 43800, "loss": 0.008630544878542423, "lr": 0.00040683009701923076, "grad_norm": 0.09416767954826355, "wall_ms": 886630}
|
| 439 |
+
{"step": 43900, "loss": 0.015451314859092236, "lr": 0.00040642434118021614, "grad_norm": 0.14151597023010254, "wall_ms": 888548}
|
| 440 |
+
{"step": 44000, "loss": 0.028586314991116524, "lr": 0.0004060179121743214, "grad_norm": 0.24595649540424347, "wall_ms": 890466}
|
| 441 |
+
{"step": 44100, "loss": 0.004711111541837454, "lr": 0.00040561081180835344, "grad_norm": 0.04618645831942558, "wall_ms": 892385}
|
| 442 |
+
{"step": 44200, "loss": 0.010285566560924053, "lr": 0.0004052030418921038, "grad_norm": 0.09462043642997742, "wall_ms": 894304}
|
| 443 |
+
{"step": 44300, "loss": 0.04746420681476593, "lr": 0.0004047946042383406, "grad_norm": 0.2871762812137604, "wall_ms": 896222}
|
| 444 |
+
{"step": 44400, "loss": 0.004222167190164328, "lr": 0.00040438550066280017, "grad_norm": 0.06833445280790329, "wall_ms": 898140}
|
| 445 |
+
{"step": 44500, "loss": 0.011668341234326363, "lr": 0.0004039757329841793, "grad_norm": 0.11328490823507309, "wall_ms": 900058}
|
| 446 |
+
{"step": 44600, "loss": 0.006833279971033335, "lr": 0.0004035653030241274, "grad_norm": 0.07876694202423096, "wall_ms": 901976}
|
| 447 |
+
{"step": 44700, "loss": 0.009702710434794426, "lr": 0.00040315421260723783, "grad_norm": 0.10172110050916672, "wall_ms": 903895}
|
| 448 |
+
{"step": 44800, "loss": 0.004836805630475283, "lr": 0.00040274246356104007, "grad_norm": 0.10262440145015717, "wall_ms": 905814}
|
| 449 |
+
{"step": 44900, "loss": 0.01815442554652691, "lr": 0.0004023300577159916, "grad_norm": 0.16835591197013855, "wall_ms": 907733}
|
| 450 |
+
{"step": 45000, "loss": 0.016567885875701904, "lr": 0.0004019169969054698, "grad_norm": 0.1921650469303131, "wall_ms": 909650}
|
| 451 |
+
{"step": 45100, "loss": 0.01325782760977745, "lr": 0.00040150328296576366, "grad_norm": 0.20094230771064758, "wall_ms": 911569}
|
| 452 |
+
{"step": 45200, "loss": 0.02153337560594082, "lr": 0.00040108891773606587, "grad_norm": 0.18486449122428894, "wall_ms": 913486}
|
| 453 |
+
{"step": 45300, "loss": 0.021984897553920746, "lr": 0.0004006739030584642, "grad_norm": 0.17313362658023834, "wall_ms": 915402}
|
| 454 |
+
{"step": 45400, "loss": 0.014210928231477737, "lr": 0.0004002582407779338, "grad_norm": 0.12400618195533752, "wall_ms": 917319}
|
| 455 |
+
{"step": 45500, "loss": 0.008711465634405613, "lr": 0.0003998419327423286, "grad_norm": 0.08794626593589783, "wall_ms": 919236}
|
| 456 |
+
{"step": 45600, "loss": 0.015320626087486744, "lr": 0.0003994249808023736, "grad_norm": 0.19725404679775238, "wall_ms": 921155}
|
| 457 |
+
{"step": 45700, "loss": 0.007001353893429041, "lr": 0.00039900738681165594, "grad_norm": 0.0652400404214859, "wall_ms": 923072}
|
| 458 |
+
{"step": 45800, "loss": 0.018646374344825745, "lr": 0.0003985891526266172, "grad_norm": 0.24613328278064728, "wall_ms": 924990}
|
| 459 |
+
{"step": 45900, "loss": 0.00914381630718708, "lr": 0.00039817028010654505, "grad_norm": 0.07750286906957626, "wall_ms": 926904}
|
| 460 |
+
{"step": 46000, "loss": 0.001603137468919158, "lr": 0.00039775077111356487, "grad_norm": 0.022243423387408257, "wall_ms": 928817}
|
| 461 |
+
{"step": 46100, "loss": 0.010899793356657028, "lr": 0.0003973306275126315, "grad_norm": 0.14299717545509338, "wall_ms": 930732}
|
| 462 |
+
{"step": 46200, "loss": 0.009920804761350155, "lr": 0.000396909851171521, "grad_norm": 0.08866684883832932, "wall_ms": 932645}
|
| 463 |
+
{"step": 46300, "loss": 0.020827054977416992, "lr": 0.0003964884439608222, "grad_norm": 0.2065972089767456, "wall_ms": 934559}
|
| 464 |
+
{"step": 46400, "loss": 0.005253588315099478, "lr": 0.00039606640775392875, "grad_norm": 0.07971103489398956, "wall_ms": 936472}
|
| 465 |
+
{"step": 46500, "loss": 0.007696210872381926, "lr": 0.0003956437444270303, "grad_norm": 0.0759945809841156, "wall_ms": 938392}
|
| 466 |
+
{"step": 46600, "loss": 0.031049618497490883, "lr": 0.00039522045585910453, "grad_norm": 0.2933987081050873, "wall_ms": 940312}
|
| 467 |
+
{"step": 46700, "loss": 0.010841799899935722, "lr": 0.00039479654393190873, "grad_norm": 0.12424678355455399, "wall_ms": 942232}
|
| 468 |
+
{"step": 46800, "loss": 0.01071688998490572, "lr": 0.00039437201052997114, "grad_norm": 0.10467074066400528, "wall_ms": 944148}
|
| 469 |
+
{"step": 46900, "loss": 0.002265302464365959, "lr": 0.00039394685754058303, "grad_norm": 0.0234000813215971, "wall_ms": 946403}
|
| 470 |
+
{"step": 47000, "loss": 0.01592894271016121, "lr": 0.00039352108685379, "grad_norm": 0.12414422631263733, "wall_ms": 948325}
|
| 471 |
+
{"step": 47100, "loss": 0.004369989037513733, "lr": 0.0003930947003623837, "grad_norm": 0.08298582583665848, "wall_ms": 950248}
|
| 472 |
+
{"step": 47200, "loss": 0.0029629385098814964, "lr": 0.0003926676999618932, "grad_norm": 0.035060085356235504, "wall_ms": 952172}
|
| 473 |
+
{"step": 47300, "loss": 0.007202439941465855, "lr": 0.0003922400875505771, "grad_norm": 0.10406927019357681, "wall_ms": 954096}
|
| 474 |
+
{"step": 47400, "loss": 0.0041802069172263145, "lr": 0.0003918118650294145, "grad_norm": 0.09203027933835983, "wall_ms": 956012}
|
| 475 |
+
{"step": 47500, "loss": 0.011513378471136093, "lr": 0.00039138303430209673, "grad_norm": 0.10743104666471481, "wall_ms": 957926}
|
| 476 |
+
{"step": 47600, "loss": 0.006759416777640581, "lr": 0.00039095359727501914, "grad_norm": 0.13766951858997345, "wall_ms": 959839}
|
| 477 |
+
{"step": 47700, "loss": 0.0024468405172228813, "lr": 0.0003905235558572722, "grad_norm": 0.032276708632707596, "wall_ms": 961753}
|
| 478 |
+
{"step": 47800, "loss": 0.0016580959782004356, "lr": 0.0003900929119606334, "grad_norm": 0.016229595988988876, "wall_ms": 963668}
|
| 479 |
+
{"step": 47900, "loss": 0.003776548895984888, "lr": 0.0003896616674995586, "grad_norm": 0.043889258056879044, "wall_ms": 965584}
|
| 480 |
+
{"step": 48000, "loss": 0.009515472687780857, "lr": 0.0003892298243911733, "grad_norm": 0.08251439779996872, "wall_ms": 967501}
|
| 481 |
+
{"step": 48100, "loss": 0.013699901290237904, "lr": 0.00038879738455526466, "grad_norm": 0.0969061478972435, "wall_ms": 969418}
|
| 482 |
+
{"step": 48200, "loss": 0.003610521089285612, "lr": 0.00038836434991427224, "grad_norm": 0.03731502592563629, "wall_ms": 971334}
|
| 483 |
+
{"step": 48300, "loss": 0.0030777459032833576, "lr": 0.00038793072239328016, "grad_norm": 0.037631772458553314, "wall_ms": 973251}
|
| 484 |
+
{"step": 48400, "loss": 0.0019643139094114304, "lr": 0.00038749650392000824, "grad_norm": 0.022836295887827873, "wall_ms": 975168}
|
| 485 |
+
{"step": 48500, "loss": 0.007064108736813068, "lr": 0.00038706169642480315, "grad_norm": 0.10022050142288208, "wall_ms": 977085}
|
| 486 |
+
{"step": 48600, "loss": 0.008586812764406204, "lr": 0.0003866263018406302, "grad_norm": 0.06404700130224228, "wall_ms": 979001}
|
| 487 |
+
{"step": 48700, "loss": 0.004586075432598591, "lr": 0.00038619032210306483, "grad_norm": 0.03774949163198471, "wall_ms": 980919}
|
| 488 |
+
{"step": 48800, "loss": 0.003081690054386854, "lr": 0.00038575375915028356, "grad_norm": 0.03238743916153908, "wall_ms": 982835}
|
| 489 |
+
{"step": 48900, "loss": 0.007040832191705704, "lr": 0.0003853166149230557, "grad_norm": 0.0736752450466156, "wall_ms": 984752}
|
| 490 |
+
{"step": 49000, "loss": 0.0034203380346298218, "lr": 0.00038487889136473496, "grad_norm": 0.029735539108514786, "wall_ms": 986669}
|
| 491 |
+
{"step": 49100, "loss": 0.013385034166276455, "lr": 0.00038444059042125, "grad_norm": 0.15407967567443848, "wall_ms": 988586}
|
| 492 |
+
{"step": 49200, "loss": 0.013819307088851929, "lr": 0.0003840017140410965, "grad_norm": 0.10832081735134125, "wall_ms": 990502}
|
| 493 |
+
{"step": 49300, "loss": 0.004450330976396799, "lr": 0.00038356226417532853, "grad_norm": 0.04762798547744751, "wall_ms": 992419}
|
| 494 |
+
{"step": 49400, "loss": 0.009436571039259434, "lr": 0.00038312224277754916, "grad_norm": 0.13158932328224182, "wall_ms": 994336}
|
| 495 |
+
{"step": 49500, "loss": 0.005434469319880009, "lr": 0.0003826816518039027, "grad_norm": 0.054796766489744186, "wall_ms": 996433}
|
| 496 |
+
{"step": 49600, "loss": 0.007598974741995335, "lr": 0.00038224049321306517, "grad_norm": 0.1066659539937973, "wall_ms": 998350}
|
| 497 |
+
{"step": 49700, "loss": 0.00960001815110445, "lr": 0.0003817987689662362, "grad_norm": 0.2015618234872818, "wall_ms": 1000269}
|
| 498 |
+
{"step": 49800, "loss": 0.006109618581831455, "lr": 0.00038135648102713003, "grad_norm": 0.06518024206161499, "wall_ms": 1002186}
|
| 499 |
+
{"step": 49900, "loss": 0.0016483115032315254, "lr": 0.00038091363136196686, "grad_norm": 0.021807311102747917, "wall_ms": 1004103}
|
| 500 |
+
{"step": 50000, "loss": 0.013999374583363533, "lr": 0.000380470221939464, "grad_norm": 0.11076150834560394, "wall_ms": 1006020}
|