Upload rank_300/train_log.jsonl with huggingface_hub
Browse files- rank_300/train_log.jsonl +100 -0
rank_300/train_log.jsonl
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 100, "loss": 0.501089334487915, "loss_nce": 0.4996451437473297, "loss_mse": 0.0014441837556660175, "lr": 5e-05, "grad_norm": 0.18229807913303375, "wall_ms": 11774}
|
| 2 |
+
{"step": 200, "loss": 0.484137624502182, "loss_nce": 0.4825816750526428, "loss_mse": 0.0015559502644464374, "lr": 0.0001, "grad_norm": 0.16905367374420166, "wall_ms": 20610}
|
| 3 |
+
{"step": 300, "loss": 0.48664137721061707, "loss_nce": 0.4850168526172638, "loss_mse": 0.0016245124861598015, "lr": 0.00015, "grad_norm": 0.19781707227230072, "wall_ms": 29423}
|
| 4 |
+
{"step": 400, "loss": 0.4704655408859253, "loss_nce": 0.4688371419906616, "loss_mse": 0.0016284098383039236, "lr": 0.0002, "grad_norm": 0.17539408802986145, "wall_ms": 38294}
|
| 5 |
+
{"step": 500, "loss": 0.39263051748275757, "loss_nce": 0.39090198278427124, "loss_mse": 0.0017285386566072702, "lr": 0.00025, "grad_norm": 0.15555043518543243, "wall_ms": 47170}
|
| 6 |
+
{"step": 600, "loss": 0.40355363488197327, "loss_nce": 0.4018084406852722, "loss_mse": 0.0017451856983825564, "lr": 0.0003, "grad_norm": 0.17472311854362488, "wall_ms": 56054}
|
| 7 |
+
{"step": 700, "loss": 0.39552927017211914, "loss_nce": 0.3936971426010132, "loss_mse": 0.0018321232637390494, "lr": 0.00035, "grad_norm": 0.17999093234539032, "wall_ms": 64946}
|
| 8 |
+
{"step": 800, "loss": 0.3570287823677063, "loss_nce": 0.35523855686187744, "loss_mse": 0.0017902200343087316, "lr": 0.0004, "grad_norm": 0.15141135454177856, "wall_ms": 73823}
|
| 9 |
+
{"step": 900, "loss": 0.40242981910705566, "loss_nce": 0.400546133518219, "loss_mse": 0.0018836923409253359, "lr": 0.00045000000000000004, "grad_norm": 0.1794588714838028, "wall_ms": 82683}
|
| 10 |
+
{"step": 1000, "loss": 0.43191301822662354, "loss_nce": 0.42999595403671265, "loss_mse": 0.0019170587183907628, "lr": 0.0005, "grad_norm": 0.17408491671085358, "wall_ms": 91544}
|
| 11 |
+
{"step": 1100, "loss": 0.359700471162796, "loss_nce": 0.35774821043014526, "loss_mse": 0.0019522578222677112, "lr": 0.0004999994554181833, "grad_norm": 0.1641308218240738, "wall_ms": 100412}
|
| 12 |
+
{"step": 1200, "loss": 0.3165666162967682, "loss_nce": 0.3145831227302551, "loss_mse": 0.0019834854174405336, "lr": 0.0004999978216751542, "grad_norm": 0.1632140427827835, "wall_ms": 109271}
|
| 13 |
+
{"step": 1300, "loss": 0.3017593324184418, "loss_nce": 0.2997354567050934, "loss_mse": 0.002023872919380665, "lr": 0.0004999950987781756, "grad_norm": 0.1413581520318985, "wall_ms": 118150}
|
| 14 |
+
{"step": 1400, "loss": 0.3471393883228302, "loss_nce": 0.34503689408302307, "loss_mse": 0.002102494239807129, "lr": 0.0004999912867393524, "grad_norm": 0.15548814833164215, "wall_ms": 127032}
|
| 15 |
+
{"step": 1500, "loss": 0.3171663284301758, "loss_nce": 0.31514063477516174, "loss_mse": 0.0020256845746189356, "lr": 0.0004999863855756311, "grad_norm": 0.1406836062669754, "wall_ms": 135920}
|
| 16 |
+
{"step": 1600, "loss": 0.3501623570919037, "loss_nce": 0.3480682373046875, "loss_mse": 0.002094127470627427, "lr": 0.0004999803953088003, "grad_norm": 0.16040804982185364, "wall_ms": 144814}
|
| 17 |
+
{"step": 1700, "loss": 0.3274966776371002, "loss_nce": 0.3253595232963562, "loss_mse": 0.002137151313945651, "lr": 0.00049997331596549, "grad_norm": 0.15181374549865723, "wall_ms": 153705}
|
| 18 |
+
{"step": 1800, "loss": 0.3372095227241516, "loss_nce": 0.33506420254707336, "loss_mse": 0.0021453341469168663, "lr": 0.000499965147577172, "grad_norm": 0.1453600972890854, "wall_ms": 162600}
|
| 19 |
+
{"step": 1900, "loss": 0.34425097703933716, "loss_nce": 0.34209567308425903, "loss_mse": 0.0021553144324570894, "lr": 0.0004999558901801592, "grad_norm": 0.15982747077941895, "wall_ms": 171499}
|
| 20 |
+
{"step": 2000, "loss": 0.2859135866165161, "loss_nce": 0.2837199866771698, "loss_mse": 0.0021936087869107723, "lr": 0.0004999455438156064, "grad_norm": 0.14673078060150146, "wall_ms": 180409}
|
| 21 |
+
{"step": 2100, "loss": 0.315708190202713, "loss_nce": 0.3135884404182434, "loss_mse": 0.002119738608598709, "lr": 0.0004999341085295087, "grad_norm": 0.1467268168926239, "wall_ms": 189321}
|
| 22 |
+
{"step": 2200, "loss": 0.29786819219589233, "loss_nce": 0.2956554889678955, "loss_mse": 0.0022126883268356323, "lr": 0.0004999215843727024, "grad_norm": 0.13532409071922302, "wall_ms": 198231}
|
| 23 |
+
{"step": 2300, "loss": 0.3103567063808441, "loss_nce": 0.30816522240638733, "loss_mse": 0.0021914849057793617, "lr": 0.0004999079714008647, "grad_norm": 0.1568823754787445, "wall_ms": 207135}
|
| 24 |
+
{"step": 2400, "loss": 0.32511648535728455, "loss_nce": 0.3229205012321472, "loss_mse": 0.002195971552282572, "lr": 0.0004998932696745126, "grad_norm": 0.14981402456760406, "wall_ms": 216058}
|
| 25 |
+
{"step": 2500, "loss": 0.3114720582962036, "loss_nce": 0.30925604701042175, "loss_mse": 0.002216022927314043, "lr": 0.000499877479259004, "grad_norm": 0.1512657254934311, "wall_ms": 224967}
|
| 26 |
+
{"step": 2600, "loss": 0.28301307559013367, "loss_nce": 0.28085240721702576, "loss_mse": 0.002160655800253153, "lr": 0.000499860600224536, "grad_norm": 0.13976918160915375, "wall_ms": 233879}
|
| 27 |
+
{"step": 2700, "loss": 0.30275171995162964, "loss_nce": 0.300527960062027, "loss_mse": 0.002223747316747904, "lr": 0.0004998426326461454, "grad_norm": 0.14642079174518585, "wall_ms": 242780}
|
| 28 |
+
{"step": 2800, "loss": 0.3025912046432495, "loss_nce": 0.30031895637512207, "loss_mse": 0.0022722387220710516, "lr": 0.0004998235766037085, "grad_norm": 0.15302853286266327, "wall_ms": 251683}
|
| 29 |
+
{"step": 2900, "loss": 0.3400624692440033, "loss_nce": 0.3377612829208374, "loss_mse": 0.0023011895827949047, "lr": 0.00049980343218194, "grad_norm": 0.15293024480342865, "wall_ms": 260573}
|
| 30 |
+
{"step": 3000, "loss": 0.2964795231819153, "loss_nce": 0.29424798488616943, "loss_mse": 0.002231535967439413, "lr": 0.0004997821994703933, "grad_norm": 0.14758272469043732, "wall_ms": 269467}
|
| 31 |
+
{"step": 3100, "loss": 0.26985475420951843, "loss_nce": 0.2676219642162323, "loss_mse": 0.002232786500826478, "lr": 0.0004997598785634597, "grad_norm": 0.12887035310268402, "wall_ms": 278364}
|
| 32 |
+
{"step": 3200, "loss": 0.3123099207878113, "loss_nce": 0.3101000189781189, "loss_mse": 0.0022098987828940153, "lr": 0.0004997364695603685, "grad_norm": 0.14686059951782227, "wall_ms": 287265}
|
| 33 |
+
{"step": 3300, "loss": 0.28545060753822327, "loss_nce": 0.2832489311695099, "loss_mse": 0.0022016644943505526, "lr": 0.0004997119725651858, "grad_norm": 0.13913528621196747, "wall_ms": 296160}
|
| 34 |
+
{"step": 3400, "loss": 0.26727449893951416, "loss_nce": 0.26503369212150574, "loss_mse": 0.0022407949436455965, "lr": 0.0004996863876868148, "grad_norm": 0.12627901136875153, "wall_ms": 305051}
|
| 35 |
+
{"step": 3500, "loss": 0.3241628408432007, "loss_nce": 0.3218540847301483, "loss_mse": 0.0023087619338184595, "lr": 0.0004996597150389944, "grad_norm": 0.16261722147464752, "wall_ms": 313943}
|
| 36 |
+
{"step": 3600, "loss": 0.32576003670692444, "loss_nce": 0.3234596252441406, "loss_mse": 0.0023004214745014906, "lr": 0.0004996319547402999, "grad_norm": 0.1622830033302307, "wall_ms": 322838}
|
| 37 |
+
{"step": 3700, "loss": 0.31639134883880615, "loss_nce": 0.3141058385372162, "loss_mse": 0.002285504713654518, "lr": 0.0004996031069141414, "grad_norm": 0.160132497549057, "wall_ms": 331734}
|
| 38 |
+
{"step": 3800, "loss": 0.2731849253177643, "loss_nce": 0.2708495855331421, "loss_mse": 0.0023353509604930878, "lr": 0.000499573171688764, "grad_norm": 0.13890595734119415, "wall_ms": 340634}
|
| 39 |
+
{"step": 3900, "loss": 0.3135264813899994, "loss_nce": 0.311246782541275, "loss_mse": 0.0022797039709985256, "lr": 0.0004995421491972465, "grad_norm": 0.13511784374713898, "wall_ms": 349517}
|
| 40 |
+
{"step": 4000, "loss": 0.32762280106544495, "loss_nce": 0.3252915143966675, "loss_mse": 0.0023312850389629602, "lr": 0.0004995100395775015, "grad_norm": 0.16005538403987885, "wall_ms": 358404}
|
| 41 |
+
{"step": 4100, "loss": 0.3300316333770752, "loss_nce": 0.32772496342658997, "loss_mse": 0.0023066711146384478, "lr": 0.0004994768429722744, "grad_norm": 0.1550389528274536, "wall_ms": 367304}
|
| 42 |
+
{"step": 4200, "loss": 0.2944837808609009, "loss_nce": 0.2921549379825592, "loss_mse": 0.0023288519587367773, "lr": 0.0004994425595291432, "grad_norm": 0.14857468008995056, "wall_ms": 376211}
|
| 43 |
+
{"step": 4300, "loss": 0.311707079410553, "loss_nce": 0.30939149856567383, "loss_mse": 0.0023155692033469677, "lr": 0.0004994071894005168, "grad_norm": 0.15044018626213074, "wall_ms": 385102}
|
| 44 |
+
{"step": 4400, "loss": 0.3000284433364868, "loss_nce": 0.2977370619773865, "loss_mse": 0.002291391370818019, "lr": 0.0004993707327436355, "grad_norm": 0.14315077662467957, "wall_ms": 394004}
|
| 45 |
+
{"step": 4500, "loss": 0.32898834347724915, "loss_nce": 0.32665640115737915, "loss_mse": 0.0023319313768297434, "lr": 0.0004993331897205698, "grad_norm": 0.14607210457324982, "wall_ms": 402894}
|
| 46 |
+
{"step": 4600, "loss": 0.28629907965660095, "loss_nce": 0.28398939967155457, "loss_mse": 0.002309668343514204, "lr": 0.0004992945604982198, "grad_norm": 0.14565175771713257, "wall_ms": 411793}
|
| 47 |
+
{"step": 4700, "loss": 0.32131484150886536, "loss_nce": 0.31899070739746094, "loss_mse": 0.0023241310846060514, "lr": 0.0004992548452483141, "grad_norm": 0.14076337218284607, "wall_ms": 420689}
|
| 48 |
+
{"step": 4800, "loss": 0.27338123321533203, "loss_nce": 0.27108296751976013, "loss_mse": 0.002298270585015416, "lr": 0.0004992140441474097, "grad_norm": 0.15309779345989227, "wall_ms": 429578}
|
| 49 |
+
{"step": 4900, "loss": 0.30472123622894287, "loss_nce": 0.30241915583610535, "loss_mse": 0.0023020796943455935, "lr": 0.0004991721573768904, "grad_norm": 0.15478618443012238, "wall_ms": 438477}
|
| 50 |
+
{"step": 5000, "loss": 0.28676265478134155, "loss_nce": 0.2844533920288086, "loss_mse": 0.0023092497140169144, "lr": 0.0004991291851229665, "grad_norm": 0.1442706137895584, "wall_ms": 447369}
|
| 51 |
+
{"step": 5100, "loss": 0.2795954644680023, "loss_nce": 0.27724888920783997, "loss_mse": 0.00234658713452518, "lr": 0.0004990851275766741, "grad_norm": 0.13814984261989594, "wall_ms": 456265}
|
| 52 |
+
{"step": 5200, "loss": 0.2962324619293213, "loss_nce": 0.2938932180404663, "loss_mse": 0.0023392403963953257, "lr": 0.000499039984933874, "grad_norm": 0.14499756693840027, "wall_ms": 465175}
|
| 53 |
+
{"step": 5300, "loss": 0.3187069296836853, "loss_nce": 0.3163717985153198, "loss_mse": 0.0023351332638412714, "lr": 0.0004989937573952507, "grad_norm": 0.14654351770877838, "wall_ms": 474081}
|
| 54 |
+
{"step": 5400, "loss": 0.282958984375, "loss_nce": 0.2805725038051605, "loss_mse": 0.0023864840622991323, "lr": 0.0004989464451663118, "grad_norm": 0.14625544846057892, "wall_ms": 482980}
|
| 55 |
+
{"step": 5500, "loss": 0.277175635099411, "loss_nce": 0.2748992145061493, "loss_mse": 0.002276418264955282, "lr": 0.0004988980484573869, "grad_norm": 0.1449975073337555, "wall_ms": 491880}
|
| 56 |
+
{"step": 5600, "loss": 0.33982449769973755, "loss_nce": 0.3374886214733124, "loss_mse": 0.00233587552793324, "lr": 0.0004988485674836267, "grad_norm": 0.16533127427101135, "wall_ms": 500777}
|
| 57 |
+
{"step": 5700, "loss": 0.3041359484195709, "loss_nce": 0.3017471134662628, "loss_mse": 0.00238884543068707, "lr": 0.0004987980024650023, "grad_norm": 0.15808981657028198, "wall_ms": 509675}
|
| 58 |
+
{"step": 5800, "loss": 0.2817055583000183, "loss_nce": 0.27930617332458496, "loss_mse": 0.002399372635409236, "lr": 0.0004987463536263036, "grad_norm": 0.13438743352890015, "wall_ms": 518617}
|
| 59 |
+
{"step": 5900, "loss": 0.3027260899543762, "loss_nce": 0.30034348368644714, "loss_mse": 0.002382599748671055, "lr": 0.0004986936211971391, "grad_norm": 0.15564176440238953, "wall_ms": 527573}
|
| 60 |
+
{"step": 6000, "loss": 0.2899472415447235, "loss_nce": 0.2875589430332184, "loss_mse": 0.0023883057292550802, "lr": 0.0004986398054119342, "grad_norm": 0.135450541973114, "wall_ms": 536492}
|
| 61 |
+
{"step": 6100, "loss": 0.2933296859264374, "loss_nce": 0.2909911274909973, "loss_mse": 0.0023385595995932817, "lr": 0.0004985849065099305, "grad_norm": 0.14363089203834534, "wall_ms": 545398}
|
| 62 |
+
{"step": 6200, "loss": 0.2618044316768646, "loss_nce": 0.25945180654525757, "loss_mse": 0.0023526379372924566, "lr": 0.0004985289247351848, "grad_norm": 0.13597869873046875, "wall_ms": 554376}
|
| 63 |
+
{"step": 6300, "loss": 0.2766004800796509, "loss_nce": 0.27425825595855713, "loss_mse": 0.002342228079214692, "lr": 0.0004984718603365676, "grad_norm": 0.1413934826850891, "wall_ms": 563300}
|
| 64 |
+
{"step": 6400, "loss": 0.33024299144744873, "loss_nce": 0.32787370681762695, "loss_mse": 0.002369272755458951, "lr": 0.0004984137135677626, "grad_norm": 0.15368963778018951, "wall_ms": 572188}
|
| 65 |
+
{"step": 6500, "loss": 0.27903610467910767, "loss_nce": 0.27669405937194824, "loss_mse": 0.002342045772820711, "lr": 0.0004983544846872649, "grad_norm": 0.14195112884044647, "wall_ms": 581140}
|
| 66 |
+
{"step": 6600, "loss": 0.2470056265592575, "loss_nce": 0.2446708232164383, "loss_mse": 0.0023348089307546616, "lr": 0.0004982941739583807, "grad_norm": 0.13261021673679352, "wall_ms": 590074}
|
| 67 |
+
{"step": 6700, "loss": 0.30008870363235474, "loss_nce": 0.2977401912212372, "loss_mse": 0.0023485005367547274, "lr": 0.0004982327816492249, "grad_norm": 0.15386654436588287, "wall_ms": 598979}
|
| 68 |
+
{"step": 6800, "loss": 0.2986982762813568, "loss_nce": 0.296360582113266, "loss_mse": 0.0023376839235424995, "lr": 0.0004981703080327214, "grad_norm": 0.13616767525672913, "wall_ms": 607922}
|
| 69 |
+
{"step": 6900, "loss": 0.3028205335140228, "loss_nce": 0.3004269301891327, "loss_mse": 0.0023935986682772636, "lr": 0.0004981067533866005, "grad_norm": 0.14138557016849518, "wall_ms": 616875}
|
| 70 |
+
{"step": 7000, "loss": 0.2929159998893738, "loss_nce": 0.290575236082077, "loss_mse": 0.002340755658224225, "lr": 0.000498042117993399, "grad_norm": 0.14678609371185303, "wall_ms": 625779}
|
| 71 |
+
{"step": 7100, "loss": 0.3150222897529602, "loss_nce": 0.31263434886932373, "loss_mse": 0.002387931337580085, "lr": 0.0004979764021404572, "grad_norm": 0.16109561920166016, "wall_ms": 634669}
|
| 72 |
+
{"step": 7200, "loss": 0.2907381057739258, "loss_nce": 0.2883453667163849, "loss_mse": 0.0023927316069602966, "lr": 0.0004979096061199197, "grad_norm": 0.15536561608314514, "wall_ms": 643553}
|
| 73 |
+
{"step": 7300, "loss": 0.291935533285141, "loss_nce": 0.2895764410495758, "loss_mse": 0.002359085250645876, "lr": 0.0004978417302287325, "grad_norm": 0.14708688855171204, "wall_ms": 652468}
|
| 74 |
+
{"step": 7400, "loss": 0.2875458598136902, "loss_nce": 0.28514352440834045, "loss_mse": 0.002402340294793248, "lr": 0.0004977727747686422, "grad_norm": 0.13904523849487305, "wall_ms": 661373}
|
| 75 |
+
{"step": 7500, "loss": 0.2769772410392761, "loss_nce": 0.274631142616272, "loss_mse": 0.002346100052818656, "lr": 0.000497702740046195, "grad_norm": 0.13755367696285248, "wall_ms": 670338}
|
| 76 |
+
{"step": 7600, "loss": 0.23821690678596497, "loss_nce": 0.23588444292545319, "loss_mse": 0.0023324626963585615, "lr": 0.0004976316263727349, "grad_norm": 0.13306009769439697, "wall_ms": 679315}
|
| 77 |
+
{"step": 7700, "loss": 0.3137253522872925, "loss_nce": 0.31133702397346497, "loss_mse": 0.0023883283138275146, "lr": 0.0004975594340644023, "grad_norm": 0.14241021871566772, "wall_ms": 688262}
|
| 78 |
+
{"step": 7800, "loss": 0.2868463397026062, "loss_nce": 0.28448089957237244, "loss_mse": 0.0023654322139918804, "lr": 0.0004974861634421329, "grad_norm": 0.13670934736728668, "wall_ms": 697172}
|
| 79 |
+
{"step": 7900, "loss": 0.2954387664794922, "loss_nce": 0.2930690348148346, "loss_mse": 0.0023697202559560537, "lr": 0.000497411814831656, "grad_norm": 0.1406220942735672, "wall_ms": 706126}
|
| 80 |
+
{"step": 8000, "loss": 0.30804750323295593, "loss_nce": 0.3056589663028717, "loss_mse": 0.0023885401897132397, "lr": 0.0004973363885634934, "grad_norm": 0.13683579862117767, "wall_ms": 715071}
|
| 81 |
+
{"step": 8100, "loss": 0.2699294686317444, "loss_nce": 0.267539918422699, "loss_mse": 0.0023895606864243746, "lr": 0.0004972598849729574, "grad_norm": 0.1498822718858719, "wall_ms": 724014}
|
| 82 |
+
{"step": 8200, "loss": 0.2983795404434204, "loss_nce": 0.295993834733963, "loss_mse": 0.002385704545304179, "lr": 0.0004971823044001499, "grad_norm": 0.1425805240869522, "wall_ms": 732948}
|
| 83 |
+
{"step": 8300, "loss": 0.26138100028038025, "loss_nce": 0.25899243354797363, "loss_mse": 0.0023885625414550304, "lr": 0.0004971036471899603, "grad_norm": 0.12923358380794525, "wall_ms": 741874}
|
| 84 |
+
{"step": 8400, "loss": 0.27928414940834045, "loss_nce": 0.2768334448337555, "loss_mse": 0.0024506933987140656, "lr": 0.0004970239136920645, "grad_norm": 0.13942788541316986, "wall_ms": 750795}
|
| 85 |
+
{"step": 8500, "loss": 0.2535550594329834, "loss_nce": 0.2511581778526306, "loss_mse": 0.0023968745954334736, "lr": 0.0004969431042609229, "grad_norm": 0.12643538415431976, "wall_ms": 759726}
|
| 86 |
+
{"step": 8600, "loss": 0.3016558885574341, "loss_nce": 0.2992773652076721, "loss_mse": 0.002378531266003847, "lr": 0.0004968612192557794, "grad_norm": 0.13780879974365234, "wall_ms": 768656}
|
| 87 |
+
{"step": 8700, "loss": 0.29901447892189026, "loss_nce": 0.2965580224990845, "loss_mse": 0.0024564480409026146, "lr": 0.0004967782590406587, "grad_norm": 0.13612505793571472, "wall_ms": 777595}
|
| 88 |
+
{"step": 8800, "loss": 0.3248012661933899, "loss_nce": 0.32244211435317993, "loss_mse": 0.0023591502103954554, "lr": 0.0004966942239843664, "grad_norm": 0.17274892330169678, "wall_ms": 786550}
|
| 89 |
+
{"step": 8900, "loss": 0.2664557695388794, "loss_nce": 0.2639874517917633, "loss_mse": 0.00246830633841455, "lr": 0.0004966091144604858, "grad_norm": 0.14397358894348145, "wall_ms": 795505}
|
| 90 |
+
{"step": 9000, "loss": 0.27923882007598877, "loss_nce": 0.27681684494018555, "loss_mse": 0.002421971643343568, "lr": 0.0004965229308473765, "grad_norm": 0.14763304591178894, "wall_ms": 804455}
|
| 91 |
+
{"step": 9100, "loss": 0.28671935200691223, "loss_nce": 0.28428810834884644, "loss_mse": 0.002431256929412484, "lr": 0.000496435673528174, "grad_norm": 0.1562277227640152, "wall_ms": 813402}
|
| 92 |
+
{"step": 9200, "loss": 0.29257261753082275, "loss_nce": 0.29014822840690613, "loss_mse": 0.0024243968073278666, "lr": 0.000496347342890786, "grad_norm": 0.14588390290737152, "wall_ms": 822357}
|
| 93 |
+
{"step": 9300, "loss": 0.30074113607406616, "loss_nce": 0.2982674539089203, "loss_mse": 0.0024736777413636446, "lr": 0.0004962579393278923, "grad_norm": 0.15842895209789276, "wall_ms": 831288}
|
| 94 |
+
{"step": 9400, "loss": 0.30799102783203125, "loss_nce": 0.305568665266037, "loss_mse": 0.0024223693180829287, "lr": 0.0004961674632369425, "grad_norm": 0.1455927938222885, "wall_ms": 840222}
|
| 95 |
+
{"step": 9500, "loss": 0.2983650863170624, "loss_nce": 0.29595527052879333, "loss_mse": 0.0024098146241158247, "lr": 0.0004960759150201537, "grad_norm": 0.15369388461112976, "wall_ms": 849157}
|
| 96 |
+
{"step": 9600, "loss": 0.2496955394744873, "loss_nce": 0.24726752936840057, "loss_mse": 0.0024280084762722254, "lr": 0.0004959832950845099, "grad_norm": 0.13617092370986938, "wall_ms": 858078}
|
| 97 |
+
{"step": 9700, "loss": 0.26342254877090454, "loss_nce": 0.26097801327705383, "loss_mse": 0.0024445420131087303, "lr": 0.0004958896038417587, "grad_norm": 0.139978289604187, "wall_ms": 867017}
|
| 98 |
+
{"step": 9800, "loss": 0.26342520117759705, "loss_nce": 0.26104646921157837, "loss_mse": 0.0023787240497767925, "lr": 0.000495794841708411, "grad_norm": 0.1436578631401062, "wall_ms": 875935}
|
| 99 |
+
{"step": 9900, "loss": 0.2950769364833832, "loss_nce": 0.2926183342933655, "loss_mse": 0.0024586159270256758, "lr": 0.0004956990091057381, "grad_norm": 0.1351795792579651, "wall_ms": 884878}
|
| 100 |
+
{"step": 10000, "loss": 0.2549520432949066, "loss_nce": 0.25253555178642273, "loss_mse": 0.0024164915084838867, "lr": 0.00049560210645977, "grad_norm": 0.14753563702106476, "wall_ms": 893808}
|