Upload rank_300/train_log.jsonl with huggingface_hub
Browse files- rank_300/train_log.jsonl +100 -0
rank_300/train_log.jsonl
CHANGED
|
@@ -1398,3 +1398,103 @@
|
|
| 1398 |
{"step": 139800, "loss": 0.2330114096403122, "loss_nce": 0.23276430368423462, "loss_mse": 0.0024710700381547213, "lr": 1.5644027106517515e-05, "grad_norm": 0.12382189184427261, "wall_ms": 1929585}
|
| 1399 |
{"step": 139900, "loss": 0.21208447217941284, "loss_nce": 0.21183769404888153, "loss_mse": 0.0024677743203938007, "lr": 1.5534318900928905e-05, "grad_norm": 0.11637057363986969, "wall_ms": 1930868}
|
| 1400 |
{"step": 140000, "loss": 0.24567827582359314, "loss_nce": 0.2454293817281723, "loss_mse": 0.0024889518972486258, "lr": 1.5425675255794584e-05, "grad_norm": 0.13403667509555817, "wall_ms": 1932117}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1398 |
{"step": 139800, "loss": 0.2330114096403122, "loss_nce": 0.23276430368423462, "loss_mse": 0.0024710700381547213, "lr": 1.5644027106517515e-05, "grad_norm": 0.12382189184427261, "wall_ms": 1929585}
|
| 1399 |
{"step": 139900, "loss": 0.21208447217941284, "loss_nce": 0.21183769404888153, "loss_mse": 0.0024677743203938007, "lr": 1.5534318900928905e-05, "grad_norm": 0.11637057363986969, "wall_ms": 1930868}
|
| 1400 |
{"step": 140000, "loss": 0.24567827582359314, "loss_nce": 0.2454293817281723, "loss_mse": 0.0024889518972486258, "lr": 1.5425675255794584e-05, "grad_norm": 0.13403667509555817, "wall_ms": 1932117}
|
| 1401 |
+
{"step": 140100, "loss": 0.22269222140312195, "loss_nce": 0.22244420647621155, "loss_mse": 0.002480089431628585, "lr": 1.5318096654096976e-05, "grad_norm": 0.1226760745048523, "wall_ms": 1944372}
|
| 1402 |
+
{"step": 140200, "loss": 0.21625323593616486, "loss_nce": 0.21600741147994995, "loss_mse": 0.002458172384649515, "lr": 1.5211583574083892e-05, "grad_norm": 0.11729899048805237, "wall_ms": 1945618}
|
| 1403 |
+
{"step": 140300, "loss": 0.2663312554359436, "loss_nce": 0.26607969403266907, "loss_mse": 0.002515618223696947, "lr": 1.5106136489266233e-05, "grad_norm": 0.12592019140720367, "wall_ms": 1946866}
|
| 1404 |
+
{"step": 140400, "loss": 0.25933143496513367, "loss_nce": 0.2590779662132263, "loss_mse": 0.002534609753638506, "lr": 1.50017558684159e-05, "grad_norm": 0.13398374617099762, "wall_ms": 1948110}
|
| 1405 |
+
{"step": 140500, "loss": 0.26208582520484924, "loss_nce": 0.2618369460105896, "loss_mse": 0.0024887025356292725, "lr": 1.4898442175563888e-05, "grad_norm": 0.13503843545913696, "wall_ms": 1949347}
|
| 1406 |
+
{"step": 140600, "loss": 0.2400626689195633, "loss_nce": 0.23981434106826782, "loss_mse": 0.002483209129422903, "lr": 1.4796195869998034e-05, "grad_norm": 0.1344202607870102, "wall_ms": 1950591}
|
| 1407 |
+
{"step": 140700, "loss": 0.24664832651615143, "loss_nce": 0.24639716744422913, "loss_mse": 0.0025116351898759604, "lr": 1.4695017406260963e-05, "grad_norm": 0.13583223521709442, "wall_ms": 1952175}
|
| 1408 |
+
{"step": 140800, "loss": 0.23106485605239868, "loss_nce": 0.2308165729045868, "loss_mse": 0.0024827804882079363, "lr": 1.4594907234148174e-05, "grad_norm": 0.12462904304265976, "wall_ms": 1953421}
|
| 1409 |
+
{"step": 140900, "loss": 0.207895427942276, "loss_nce": 0.2076457142829895, "loss_mse": 0.002497077686712146, "lr": 1.449586579870613e-05, "grad_norm": 0.11265287548303604, "wall_ms": 1954676}
|
| 1410 |
+
{"step": 141000, "loss": 0.2553400695323944, "loss_nce": 0.2550877332687378, "loss_mse": 0.0025234476197510958, "lr": 1.4397893540230015e-05, "grad_norm": 0.13581621646881104, "wall_ms": 1955918}
|
| 1411 |
+
{"step": 141100, "loss": 0.20879536867141724, "loss_nce": 0.2085493803024292, "loss_mse": 0.002459830604493618, "lr": 1.4300990894261907e-05, "grad_norm": 0.1204482838511467, "wall_ms": 1957159}
|
| 1412 |
+
{"step": 141200, "loss": 0.22138020396232605, "loss_nce": 0.22112959623336792, "loss_mse": 0.002506042830646038, "lr": 1.4205158291588964e-05, "grad_norm": 0.12893767654895782, "wall_ms": 1958401}
|
| 1413 |
+
{"step": 141300, "loss": 0.23647288978099823, "loss_nce": 0.23622457683086395, "loss_mse": 0.0024830906186252832, "lr": 1.4110396158241286e-05, "grad_norm": 0.1284753680229187, "wall_ms": 1959642}
|
| 1414 |
+
{"step": 141400, "loss": 0.22841787338256836, "loss_nce": 0.22816815972328186, "loss_mse": 0.0024970858357846737, "lr": 1.4016704915490192e-05, "grad_norm": 0.12634111940860748, "wall_ms": 1960885}
|
| 1415 |
+
{"step": 141500, "loss": 0.2603078782558441, "loss_nce": 0.2600569427013397, "loss_mse": 0.0025093231815844774, "lr": 1.3924084979846274e-05, "grad_norm": 0.13156482577323914, "wall_ms": 1962129}
|
| 1416 |
+
{"step": 141600, "loss": 0.22182397544384003, "loss_nce": 0.22157667577266693, "loss_mse": 0.0024729808792471886, "lr": 1.3832536763057559e-05, "grad_norm": 0.12174489349126816, "wall_ms": 1963372}
|
| 1417 |
+
{"step": 141700, "loss": 0.23290537297725677, "loss_nce": 0.2326551079750061, "loss_mse": 0.002502627205103636, "lr": 1.3742060672107678e-05, "grad_norm": 0.11423052102327347, "wall_ms": 1964630}
|
| 1418 |
+
{"step": 141800, "loss": 0.22370968759059906, "loss_nce": 0.22346389293670654, "loss_mse": 0.0024578759912401438, "lr": 1.365265710921405e-05, "grad_norm": 0.11490073055028915, "wall_ms": 1965913}
|
| 1419 |
+
{"step": 141900, "loss": 0.21887105703353882, "loss_nce": 0.21862506866455078, "loss_mse": 0.0024599090684205294, "lr": 1.356432647182608e-05, "grad_norm": 0.11958448588848114, "wall_ms": 1967195}
|
| 1420 |
+
{"step": 142000, "loss": 0.21741987764835358, "loss_nce": 0.21717415750026703, "loss_mse": 0.0024572296533733606, "lr": 1.347706915262343e-05, "grad_norm": 0.1243332028388977, "wall_ms": 1968468}
|
| 1421 |
+
{"step": 142100, "loss": 0.22714513540267944, "loss_nce": 0.22689811885356903, "loss_mse": 0.0024702271912246943, "lr": 1.3390885539514283e-05, "grad_norm": 0.13189247250556946, "wall_ms": 1969735}
|
| 1422 |
+
{"step": 142200, "loss": 0.2731291651725769, "loss_nce": 0.27287620306015015, "loss_mse": 0.002529755001887679, "lr": 1.3305776015633551e-05, "grad_norm": 0.1380726397037506, "wall_ms": 1970998}
|
| 1423 |
+
{"step": 142300, "loss": 0.23452328145503998, "loss_nce": 0.23427343368530273, "loss_mse": 0.002498420886695385, "lr": 1.3221740959341242e-05, "grad_norm": 0.1251005083322525, "wall_ms": 1972251}
|
| 1424 |
+
{"step": 142400, "loss": 0.23414160311222076, "loss_nce": 0.23389360308647156, "loss_mse": 0.0024800070095807314, "lr": 1.3138780744220746e-05, "grad_norm": 0.12976250052452087, "wall_ms": 1973494}
|
| 1425 |
+
{"step": 142500, "loss": 0.24702201783657074, "loss_nce": 0.24677535891532898, "loss_mse": 0.0024666429962962866, "lr": 1.3056895739077109e-05, "grad_norm": 0.12575654685497284, "wall_ms": 1974736}
|
| 1426 |
+
{"step": 142600, "loss": 0.22722437977790833, "loss_nce": 0.22697824239730835, "loss_mse": 0.002461363561451435, "lr": 1.2976086307935565e-05, "grad_norm": 0.11825746297836304, "wall_ms": 1975985}
|
| 1427 |
+
{"step": 142700, "loss": 0.23501679301261902, "loss_nce": 0.23476819694042206, "loss_mse": 0.0024860044941306114, "lr": 1.2896352810039735e-05, "grad_norm": 0.1269513964653015, "wall_ms": 1977233}
|
| 1428 |
+
{"step": 142800, "loss": 0.24003541469573975, "loss_nce": 0.23978739976882935, "loss_mse": 0.0024801043327897787, "lr": 1.2817695599850131e-05, "grad_norm": 0.11670839041471481, "wall_ms": 1978482}
|
| 1429 |
+
{"step": 142900, "loss": 0.23804843425750732, "loss_nce": 0.23780159652233124, "loss_mse": 0.0024683892261236906, "lr": 1.2740115027042594e-05, "grad_norm": 0.13980528712272644, "wall_ms": 1979723}
|
| 1430 |
+
{"step": 143000, "loss": 0.23104062676429749, "loss_nce": 0.23079490661621094, "loss_mse": 0.00245716143399477, "lr": 1.2663611436506632e-05, "grad_norm": 0.12598097324371338, "wall_ms": 1980962}
|
| 1431 |
+
{"step": 143100, "loss": 0.2455485612154007, "loss_nce": 0.245292529463768, "loss_mse": 0.0025603335816413164, "lr": 1.2588185168344003e-05, "grad_norm": 0.13091741502285004, "wall_ms": 1982205}
|
| 1432 |
+
{"step": 143200, "loss": 0.21376760303974152, "loss_nce": 0.21352702379226685, "loss_mse": 0.0024057866539806128, "lr": 1.2513836557867131e-05, "grad_norm": 0.12468662112951279, "wall_ms": 1983450}
|
| 1433 |
+
{"step": 143300, "loss": 0.19546130299568176, "loss_nce": 0.1952154040336609, "loss_mse": 0.0024589707609266043, "lr": 1.244056593559772e-05, "grad_norm": 0.1040220856666565, "wall_ms": 1984691}
|
| 1434 |
+
{"step": 143400, "loss": 0.2280956655740738, "loss_nce": 0.2278471738100052, "loss_mse": 0.0024849707260727882, "lr": 1.2368373627265107e-05, "grad_norm": 0.12031874060630798, "wall_ms": 1985932}
|
| 1435 |
+
{"step": 143500, "loss": 0.23261761665344238, "loss_nce": 0.23236846923828125, "loss_mse": 0.0024915477260947227, "lr": 1.2297259953804956e-05, "grad_norm": 0.12389998137950897, "wall_ms": 1987180}
|
| 1436 |
+
{"step": 143600, "loss": 0.21652300655841827, "loss_nce": 0.2162771224975586, "loss_mse": 0.0024589041713625193, "lr": 1.2227225231357765e-05, "grad_norm": 0.10928698629140854, "wall_ms": 1988446}
|
| 1437 |
+
{"step": 143700, "loss": 0.2502976953983307, "loss_nce": 0.25004842877388, "loss_mse": 0.0024925379548221827, "lr": 1.215826977126755e-05, "grad_norm": 0.1292482614517212, "wall_ms": 1989697}
|
| 1438 |
+
{"step": 143800, "loss": 0.21255305409431458, "loss_nce": 0.21230006217956543, "loss_mse": 0.00252998317591846, "lr": 1.2090393880080306e-05, "grad_norm": 0.11934549361467361, "wall_ms": 1990939}
|
| 1439 |
+
{"step": 143900, "loss": 0.23774197697639465, "loss_nce": 0.23749570548534393, "loss_mse": 0.00246264785528183, "lr": 1.2023597859542778e-05, "grad_norm": 0.1371724009513855, "wall_ms": 1992180}
|
| 1440 |
+
{"step": 144000, "loss": 0.205282062292099, "loss_nce": 0.2050272673368454, "loss_mse": 0.002547919051721692, "lr": 1.1957882006601099e-05, "grad_norm": 0.1151367574930191, "wall_ms": 1993420}
|
| 1441 |
+
{"step": 144100, "loss": 0.24529199302196503, "loss_nce": 0.24504053592681885, "loss_mse": 0.0025146387051790953, "lr": 1.1893246613399433e-05, "grad_norm": 0.12776696681976318, "wall_ms": 1994663}
|
| 1442 |
+
{"step": 144200, "loss": 0.21452246606349945, "loss_nce": 0.21426689624786377, "loss_mse": 0.002555709332227707, "lr": 1.1829691967278613e-05, "grad_norm": 0.11633110791444778, "wall_ms": 1995916}
|
| 1443 |
+
{"step": 144300, "loss": 0.24163123965263367, "loss_nce": 0.24137867987155914, "loss_mse": 0.0025256178341805935, "lr": 1.1767218350775084e-05, "grad_norm": 0.1395653486251831, "wall_ms": 1997166}
|
| 1444 |
+
{"step": 144400, "loss": 0.23066522181034088, "loss_nce": 0.23041877150535583, "loss_mse": 0.002464457880705595, "lr": 1.1705826041619372e-05, "grad_norm": 0.12512138485908508, "wall_ms": 1998428}
|
| 1445 |
+
{"step": 144500, "loss": 0.1956569403409958, "loss_nce": 0.19541296362876892, "loss_mse": 0.002439767587929964, "lr": 1.1645515312735035e-05, "grad_norm": 0.11209730803966522, "wall_ms": 1999689}
|
| 1446 |
+
{"step": 144600, "loss": 0.21273142099380493, "loss_nce": 0.2124851644039154, "loss_mse": 0.0024625305086374283, "lr": 1.1586286432237424e-05, "grad_norm": 0.11400073021650314, "wall_ms": 2000936}
|
| 1447 |
+
{"step": 144700, "loss": 0.23203159868717194, "loss_nce": 0.23177845776081085, "loss_mse": 0.0025314041413366795, "lr": 1.1528139663432368e-05, "grad_norm": 0.1262088119983673, "wall_ms": 2002202}
|
| 1448 |
+
{"step": 144800, "loss": 0.22705376148223877, "loss_nce": 0.22680653631687164, "loss_mse": 0.0024722320958971977, "lr": 1.1471075264815204e-05, "grad_norm": 0.11873861402273178, "wall_ms": 2003447}
|
| 1449 |
+
{"step": 144900, "loss": 0.22967202961444855, "loss_nce": 0.22942432761192322, "loss_mse": 0.002477068454027176, "lr": 1.1415093490069456e-05, "grad_norm": 0.131086528301239, "wall_ms": 2004703}
|
| 1450 |
+
{"step": 145000, "loss": 0.2258954793214798, "loss_nce": 0.2256489247083664, "loss_mse": 0.00246558990329504, "lr": 1.1360194588065799e-05, "grad_norm": 0.12227047979831696, "wall_ms": 2005965}
|
| 1451 |
+
{"step": 145100, "loss": 0.20710648596286774, "loss_nce": 0.20685680210590363, "loss_mse": 0.0024968667421489954, "lr": 1.130637880286091e-05, "grad_norm": 0.10659901797771454, "wall_ms": 2007216}
|
| 1452 |
+
{"step": 145200, "loss": 0.21773891150951385, "loss_nce": 0.21749337017536163, "loss_mse": 0.002455452224239707, "lr": 1.1253646373696394e-05, "grad_norm": 0.12072473764419556, "wall_ms": 2008475}
|
| 1453 |
+
{"step": 145300, "loss": 0.24378575384616852, "loss_nce": 0.24353551864624023, "loss_mse": 0.002502372721210122, "lr": 1.1201997534997762e-05, "grad_norm": 0.12994515895843506, "wall_ms": 2009720}
|
| 1454 |
+
{"step": 145400, "loss": 0.21622486412525177, "loss_nce": 0.2159787118434906, "loss_mse": 0.002461487427353859, "lr": 1.1151432516373298e-05, "grad_norm": 0.1222911924123764, "wall_ms": 2010985}
|
| 1455 |
+
{"step": 145500, "loss": 0.2384205311536789, "loss_nce": 0.23817209899425507, "loss_mse": 0.002484358148649335, "lr": 1.1101951542613162e-05, "grad_norm": 0.11630015820264816, "wall_ms": 2012250}
|
| 1456 |
+
{"step": 145600, "loss": 0.2503690719604492, "loss_nce": 0.2501164674758911, "loss_mse": 0.0025259172543883324, "lr": 1.1053554833688237e-05, "grad_norm": 0.131388857960701, "wall_ms": 2013505}
|
| 1457 |
+
{"step": 145700, "loss": 0.2239970862865448, "loss_nce": 0.22374993562698364, "loss_mse": 0.0024715117178857327, "lr": 1.1006242604749301e-05, "grad_norm": 0.11782564967870712, "wall_ms": 2014773}
|
| 1458 |
+
{"step": 145800, "loss": 0.2156093567609787, "loss_nce": 0.21535897254943848, "loss_mse": 0.002503858646377921, "lr": 1.0960015066126008e-05, "grad_norm": 0.12348375469446182, "wall_ms": 2016039}
|
| 1459 |
+
{"step": 145900, "loss": 0.2465624064207077, "loss_nce": 0.24631617963314056, "loss_mse": 0.002462268341332674, "lr": 1.091487242332589e-05, "grad_norm": 0.12954631447792053, "wall_ms": 2017303}
|
| 1460 |
+
{"step": 146000, "loss": 0.2327023148536682, "loss_nce": 0.23245425522327423, "loss_mse": 0.0024805464781820774, "lr": 1.0870814877033537e-05, "grad_norm": 0.11864183098077774, "wall_ms": 2018566}
|
| 1461 |
+
{"step": 146100, "loss": 0.2242731750011444, "loss_nce": 0.22402286529541016, "loss_mse": 0.0025030835531651974, "lr": 1.0827842623109696e-05, "grad_norm": 0.1200995221734047, "wall_ms": 2019827}
|
| 1462 |
+
{"step": 146200, "loss": 0.22511914372444153, "loss_nce": 0.22486907243728638, "loss_mse": 0.0025006665382534266, "lr": 1.0785955852590329e-05, "grad_norm": 0.1207338199019432, "wall_ms": 2021092}
|
| 1463 |
+
{"step": 146300, "loss": 0.20672820508480072, "loss_nce": 0.20648011565208435, "loss_mse": 0.002480926690623164, "lr": 1.0745154751685839e-05, "grad_norm": 0.11875727027654648, "wall_ms": 2022363}
|
| 1464 |
+
{"step": 146400, "loss": 0.24303878843784332, "loss_nce": 0.24278876185417175, "loss_mse": 0.002500328700989485, "lr": 1.0705439501780237e-05, "grad_norm": 0.12533506751060486, "wall_ms": 2023635}
|
| 1465 |
+
{"step": 146500, "loss": 0.24535119533538818, "loss_nce": 0.2450999766588211, "loss_mse": 0.0025122559163719416, "lr": 1.066681027943024e-05, "grad_norm": 0.12788110971450806, "wall_ms": 2024911}
|
| 1466 |
+
{"step": 146600, "loss": 0.21476982533931732, "loss_nce": 0.21452537178993225, "loss_mse": 0.0024445930030196905, "lr": 1.0629267256364616e-05, "grad_norm": 0.11499352008104324, "wall_ms": 2026189}
|
| 1467 |
+
{"step": 146700, "loss": 0.22948743402957916, "loss_nce": 0.2292376011610031, "loss_mse": 0.002498346148058772, "lr": 1.059281059948335e-05, "grad_norm": 0.12508335709571838, "wall_ms": 2027456}
|
| 1468 |
+
{"step": 146800, "loss": 0.2377084195613861, "loss_nce": 0.2374579757452011, "loss_mse": 0.002504480304196477, "lr": 1.0557440470856894e-05, "grad_norm": 0.1281760036945343, "wall_ms": 2028702}
|
| 1469 |
+
{"step": 146900, "loss": 0.24010393023490906, "loss_nce": 0.2398526817560196, "loss_mse": 0.0025124563835561275, "lr": 1.0523157027725503e-05, "grad_norm": 0.13565482199192047, "wall_ms": 2029956}
|
| 1470 |
+
{"step": 147000, "loss": 0.24382126331329346, "loss_nce": 0.24356985092163086, "loss_mse": 0.0025140719953924417, "lr": 1.0489960422498462e-05, "grad_norm": 0.12533855438232422, "wall_ms": 2031219}
|
| 1471 |
+
{"step": 147100, "loss": 0.22467631101608276, "loss_nce": 0.22442564368247986, "loss_mse": 0.002506664954125881, "lr": 1.0457850802753495e-05, "grad_norm": 0.126227468252182, "wall_ms": 2032480}
|
| 1472 |
+
{"step": 147200, "loss": 0.24448621273040771, "loss_nce": 0.24423277378082275, "loss_mse": 0.002534456318244338, "lr": 1.0426828311236027e-05, "grad_norm": 0.13216720521450043, "wall_ms": 2033738}
|
| 1473 |
+
{"step": 147300, "loss": 0.22558313608169556, "loss_nce": 0.2253330945968628, "loss_mse": 0.002500434871762991, "lr": 1.0396893085858557e-05, "grad_norm": 0.11895480751991272, "wall_ms": 2034997}
|
| 1474 |
+
{"step": 147400, "loss": 0.20862285792827606, "loss_nce": 0.20837348699569702, "loss_mse": 0.0024937642738223076, "lr": 1.0368045259700091e-05, "grad_norm": 0.116340272128582, "wall_ms": 2036259}
|
| 1475 |
+
{"step": 147500, "loss": 0.1977211982011795, "loss_nce": 0.19747787714004517, "loss_mse": 0.002433160552754998, "lr": 1.0340284961005595e-05, "grad_norm": 0.11327332258224487, "wall_ms": 2037507}
|
| 1476 |
+
{"step": 147600, "loss": 0.23517656326293945, "loss_nce": 0.23492717742919922, "loss_mse": 0.0024939225986599922, "lr": 1.0313612313185286e-05, "grad_norm": 0.13022306561470032, "wall_ms": 2038771}
|
| 1477 |
+
{"step": 147700, "loss": 0.22446981072425842, "loss_nce": 0.22422054409980774, "loss_mse": 0.002492631087079644, "lr": 1.028802743481415e-05, "grad_norm": 0.13733325898647308, "wall_ms": 2040044}
|
| 1478 |
+
{"step": 147800, "loss": 0.22619645297527313, "loss_nce": 0.2259499728679657, "loss_mse": 0.00246481504291296, "lr": 1.0263530439631468e-05, "grad_norm": 0.1179613545536995, "wall_ms": 2041319}
|
| 1479 |
+
{"step": 147900, "loss": 0.21777579188346863, "loss_nce": 0.21752715110778809, "loss_mse": 0.002486392157152295, "lr": 1.024012143654026e-05, "grad_norm": 0.11184456199407578, "wall_ms": 2042585}
|
| 1480 |
+
{"step": 148000, "loss": 0.21164648234844208, "loss_nce": 0.2113991677761078, "loss_mse": 0.002473160857334733, "lr": 1.0217800529606751e-05, "grad_norm": 0.11802244186401367, "wall_ms": 2043856}
|
| 1481 |
+
{"step": 148100, "loss": 0.24248768389225006, "loss_nce": 0.2422335147857666, "loss_mse": 0.002541732508689165, "lr": 1.0196567818060029e-05, "grad_norm": 0.11787931621074677, "wall_ms": 2045126}
|
| 1482 |
+
{"step": 148200, "loss": 0.23774594068527222, "loss_nce": 0.23749804496765137, "loss_mse": 0.002478939015418291, "lr": 1.0176423396291502e-05, "grad_norm": 0.12778763473033905, "wall_ms": 2046402}
|
| 1483 |
+
{"step": 148300, "loss": 0.22988922894001007, "loss_nce": 0.22964197397232056, "loss_mse": 0.0024724809918552637, "lr": 1.0157367353854531e-05, "grad_norm": 0.13523074984550476, "wall_ms": 2047687}
|
| 1484 |
+
{"step": 148400, "loss": 0.19948220252990723, "loss_nce": 0.19923600554466248, "loss_mse": 0.0024620164185762405, "lr": 1.0139399775464014e-05, "grad_norm": 0.1108655035495758, "wall_ms": 2048962}
|
| 1485 |
+
{"step": 148500, "loss": 0.2326967418193817, "loss_nce": 0.23244546353816986, "loss_mse": 0.0025127993430942297, "lr": 1.0122520740995986e-05, "grad_norm": 0.12606281042099, "wall_ms": 2050227}
|
| 1486 |
+
{"step": 148600, "loss": 0.23978565633296967, "loss_nce": 0.23953601717948914, "loss_mse": 0.002496371977031231, "lr": 1.0106730325487338e-05, "grad_norm": 0.13003847002983093, "wall_ms": 2051486}
|
| 1487 |
+
{"step": 148700, "loss": 0.21825183928012848, "loss_nce": 0.21800190210342407, "loss_mse": 0.0024993596598505974, "lr": 1.0092028599135339e-05, "grad_norm": 0.11861986666917801, "wall_ms": 2052750}
|
| 1488 |
+
{"step": 148800, "loss": 0.22292622923851013, "loss_nce": 0.22267812490463257, "loss_mse": 0.002481090137735009, "lr": 1.0078415627297567e-05, "grad_norm": 0.12601128220558167, "wall_ms": 2054024}
|
| 1489 |
+
{"step": 148900, "loss": 0.19816111028194427, "loss_nce": 0.19791463017463684, "loss_mse": 0.002464830409735441, "lr": 1.0065891470491296e-05, "grad_norm": 0.11937720328569412, "wall_ms": 2055285}
|
| 1490 |
+
{"step": 149000, "loss": 0.24295459687709808, "loss_nce": 0.24270185828208923, "loss_mse": 0.0025273922365158796, "lr": 1.0054456184393565e-05, "grad_norm": 0.128482386469841, "wall_ms": 2056538}
|
| 1491 |
+
{"step": 149100, "loss": 0.21177460253238678, "loss_nce": 0.21152889728546143, "loss_mse": 0.002457062480971217, "lr": 1.0044109819840656e-05, "grad_norm": 0.12478934973478317, "wall_ms": 2057809}
|
| 1492 |
+
{"step": 149200, "loss": 0.22744649648666382, "loss_nce": 0.22719857096672058, "loss_mse": 0.0024793108459562063, "lr": 1.003485242282801e-05, "grad_norm": 0.1287311464548111, "wall_ms": 2059077}
|
| 1493 |
+
{"step": 149300, "loss": 0.2237858921289444, "loss_nce": 0.22353583574295044, "loss_mse": 0.0025005945935845375, "lr": 1.002668403450998e-05, "grad_norm": 0.12042998522520065, "wall_ms": 2060349}
|
| 1494 |
+
{"step": 149400, "loss": 0.22772358357906342, "loss_nce": 0.227473184466362, "loss_mse": 0.0025039720349013805, "lr": 1.0019604691199735e-05, "grad_norm": 0.11767154186964035, "wall_ms": 2061621}
|
| 1495 |
+
{"step": 149500, "loss": 0.23845434188842773, "loss_nce": 0.23820644617080688, "loss_mse": 0.0024789804592728615, "lr": 1.0013614424368916e-05, "grad_norm": 0.12415837496519089, "wall_ms": 2062888}
|
| 1496 |
+
{"step": 149600, "loss": 0.2305118292570114, "loss_nce": 0.23026657104492188, "loss_mse": 0.002452544402331114, "lr": 1.0008713260647647e-05, "grad_norm": 0.12466101348400116, "wall_ms": 2064158}
|
| 1497 |
+
{"step": 149700, "loss": 0.23565326631069183, "loss_nce": 0.23540055751800537, "loss_mse": 0.002527112141251564, "lr": 1.0004901221824372e-05, "grad_norm": 0.1246560588479042, "wall_ms": 2065424}
|
| 1498 |
+
{"step": 149800, "loss": 0.22576335072517395, "loss_nce": 0.22551582753658295, "loss_mse": 0.0024751615710556507, "lr": 1.0002178324845794e-05, "grad_norm": 0.11215802282094955, "wall_ms": 2066699}
|
| 1499 |
+
{"step": 149900, "loss": 0.24257613718509674, "loss_nce": 0.24232470989227295, "loss_mse": 0.002514241263270378, "lr": 1.0000544581816693e-05, "grad_norm": 0.12875066697597504, "wall_ms": 2067976}
|
| 1500 |
+
{"step": 150000, "loss": 0.2361295372247696, "loss_nce": 0.23587825894355774, "loss_mse": 0.002512819366529584, "lr": 1e-05, "grad_norm": 0.13920067250728607, "wall_ms": 2069248}
|