Upload rank_300/train_log.jsonl with huggingface_hub
Browse files- rank_300/train_log.jsonl +100 -0
rank_300/train_log.jsonl
CHANGED
|
@@ -498,3 +498,103 @@
|
|
| 498 |
{"step": 49800, "loss": 0.22766044735908508, "loss_nce": 0.2252032458782196, "loss_mse": 0.002457203809171915, "lr": 0.00038135648102713003, "grad_norm": 0.11789606511592865, "wall_ms": 4472465}
|
| 499 |
{"step": 49900, "loss": 0.26244989037513733, "loss_nce": 0.2599841356277466, "loss_mse": 0.0024657452013343573, "lr": 0.00038091363136196686, "grad_norm": 0.13658109307289124, "wall_ms": 4481348}
|
| 500 |
{"step": 50000, "loss": 0.21818453073501587, "loss_nce": 0.21570155024528503, "loss_mse": 0.0024829870089888573, "lr": 0.000380470221939464, "grad_norm": 0.10834434628486633, "wall_ms": 4490248}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
{"step": 49800, "loss": 0.22766044735908508, "loss_nce": 0.2252032458782196, "loss_mse": 0.002457203809171915, "lr": 0.00038135648102713003, "grad_norm": 0.11789606511592865, "wall_ms": 4472465}
|
| 499 |
{"step": 49900, "loss": 0.26244989037513733, "loss_nce": 0.2599841356277466, "loss_mse": 0.0024657452013343573, "lr": 0.00038091363136196686, "grad_norm": 0.13658109307289124, "wall_ms": 4481348}
|
| 500 |
{"step": 50000, "loss": 0.21818453073501587, "loss_nce": 0.21570155024528503, "loss_mse": 0.0024829870089888573, "lr": 0.000380470221939464, "grad_norm": 0.10834434628486633, "wall_ms": 4490248}
|
| 501 |
+
{"step": 50100, "loss": 0.2588280141353607, "loss_nce": 0.2563406229019165, "loss_mse": 0.0024873947259038687, "lr": 0.0003800262547308272, "grad_norm": 0.12122497707605362, "wall_ms": 4505550}
|
| 502 |
+
{"step": 50200, "loss": 0.26446419954299927, "loss_nce": 0.26200515031814575, "loss_mse": 0.0024590552784502506, "lr": 0.00037958173170974193, "grad_norm": 0.13131308555603027, "wall_ms": 4514437}
|
| 503 |
+
{"step": 50300, "loss": 0.2503693699836731, "loss_nce": 0.2478683739900589, "loss_mse": 0.0025010022800415754, "lr": 0.0003791366548523646, "grad_norm": 0.12237710505723953, "wall_ms": 4523321}
|
| 504 |
+
{"step": 50400, "loss": 0.2650926113128662, "loss_nce": 0.26260823011398315, "loss_mse": 0.002484389115124941, "lr": 0.0003786910261373137, "grad_norm": 0.1389566957950592, "wall_ms": 4532211}
|
| 505 |
+
{"step": 50500, "loss": 0.2565482258796692, "loss_nce": 0.25407546758651733, "loss_mse": 0.002472757361829281, "lr": 0.0003782448475456609, "grad_norm": 0.11639617383480072, "wall_ms": 4541098}
|
| 506 |
+
{"step": 50600, "loss": 0.26149576902389526, "loss_nce": 0.2589530646800995, "loss_mse": 0.0025426920037716627, "lr": 0.0003777981210609226, "grad_norm": 0.13424089550971985, "wall_ms": 4549994}
|
| 507 |
+
{"step": 50700, "loss": 0.27244120836257935, "loss_nce": 0.27001041173934937, "loss_mse": 0.002430807799100876, "lr": 0.0003773508486690508, "grad_norm": 0.12027856707572937, "wall_ms": 4558902}
|
| 508 |
+
{"step": 50800, "loss": 0.24488535523414612, "loss_nce": 0.2424120306968689, "loss_mse": 0.002473325002938509, "lr": 0.0003769030323584244, "grad_norm": 0.12119985371828079, "wall_ms": 4567806}
|
| 509 |
+
{"step": 50900, "loss": 0.2647389769554138, "loss_nce": 0.2622148096561432, "loss_mse": 0.0025241554249078035, "lr": 0.0003764546741198404, "grad_norm": 0.12686727941036224, "wall_ms": 4576735}
|
| 510 |
+
{"step": 51000, "loss": 0.23630961775779724, "loss_nce": 0.23380933701992035, "loss_mse": 0.0025002737529575825, "lr": 0.00037600577594650453, "grad_norm": 0.12018672376871109, "wall_ms": 4585680}
|
| 511 |
+
{"step": 51100, "loss": 0.29229220747947693, "loss_nce": 0.28976455330848694, "loss_mse": 0.002527655800804496, "lr": 0.00037555633983402353, "grad_norm": 0.13241973519325256, "wall_ms": 4594626}
|
| 512 |
+
{"step": 51200, "loss": 0.2619052231311798, "loss_nce": 0.2594256103038788, "loss_mse": 0.002479603746905923, "lr": 0.00037510636778039516, "grad_norm": 0.12381605803966522, "wall_ms": 4603536}
|
| 513 |
+
{"step": 51300, "loss": 0.25782427191734314, "loss_nce": 0.2553485333919525, "loss_mse": 0.0024757462088018656, "lr": 0.00037465586178599965, "grad_norm": 0.12657253444194794, "wall_ms": 4612445}
|
| 514 |
+
{"step": 51400, "loss": 0.27055463194847107, "loss_nce": 0.26807069778442383, "loss_mse": 0.002483946271240711, "lr": 0.00037420482385359114, "grad_norm": 0.13202719390392303, "wall_ms": 4621383}
|
| 515 |
+
{"step": 51500, "loss": 0.2936129570007324, "loss_nce": 0.29110604524612427, "loss_mse": 0.0025068980176001787, "lr": 0.00037375325598828845, "grad_norm": 0.13253968954086304, "wall_ms": 4630307}
|
| 516 |
+
{"step": 51600, "loss": 0.2613441050052643, "loss_nce": 0.2589186429977417, "loss_mse": 0.002425476675853133, "lr": 0.00037330116019756646, "grad_norm": 0.12410365790128708, "wall_ms": 4639219}
|
| 517 |
+
{"step": 51700, "loss": 0.2779342234134674, "loss_nce": 0.2754327356815338, "loss_mse": 0.0025014847051352262, "lr": 0.0003728485384912465, "grad_norm": 0.12434500455856323, "wall_ms": 4648144}
|
| 518 |
+
{"step": 51800, "loss": 0.25363093614578247, "loss_nce": 0.2511070668697357, "loss_mse": 0.0025238674134016037, "lr": 0.00037239539288148833, "grad_norm": 0.12417462468147278, "wall_ms": 4657080}
|
| 519 |
+
{"step": 51900, "loss": 0.22339442372322083, "loss_nce": 0.22091811895370483, "loss_mse": 0.0024763087276369333, "lr": 0.0003719417253827805, "grad_norm": 0.10706812888383865, "wall_ms": 4665990}
|
| 520 |
+
{"step": 52000, "loss": 0.24531662464141846, "loss_nce": 0.2428949922323227, "loss_mse": 0.0024216370657086372, "lr": 0.00037148753801193187, "grad_norm": 0.12317614257335663, "wall_ms": 4674929}
|
| 521 |
+
{"step": 52100, "loss": 0.2585437297821045, "loss_nce": 0.25607481598854065, "loss_mse": 0.002468923106789589, "lr": 0.0003710328327880623, "grad_norm": 0.11365240067243576, "wall_ms": 4683849}
|
| 522 |
+
{"step": 52200, "loss": 0.2628491222858429, "loss_nce": 0.26038968563079834, "loss_mse": 0.0024594224523752928, "lr": 0.0003705776117325936, "grad_norm": 0.12835575640201569, "wall_ms": 4692771}
|
| 523 |
+
{"step": 52300, "loss": 0.26771748065948486, "loss_nce": 0.265218049287796, "loss_mse": 0.002499440684914589, "lr": 0.00037012187686924123, "grad_norm": 0.12863732874393463, "wall_ms": 4701707}
|
| 524 |
+
{"step": 52400, "loss": 0.24979327619075775, "loss_nce": 0.24732008576393127, "loss_mse": 0.0024731969460844994, "lr": 0.00036966563022400426, "grad_norm": 0.12493344396352768, "wall_ms": 4710622}
|
| 525 |
+
{"step": 52500, "loss": 0.24235881865024567, "loss_nce": 0.23984912037849426, "loss_mse": 0.0025096938479691744, "lr": 0.00036920887382515746, "grad_norm": 0.12302655726671219, "wall_ms": 4719530}
|
| 526 |
+
{"step": 52600, "loss": 0.2906341254711151, "loss_nce": 0.28803983330726624, "loss_mse": 0.0025942877400666475, "lr": 0.00036875160970324135, "grad_norm": 0.1349363923072815, "wall_ms": 4728455}
|
| 527 |
+
{"step": 52700, "loss": 0.23376183211803436, "loss_nce": 0.23124822974205017, "loss_mse": 0.0025136000476777554, "lr": 0.00036829383989105363, "grad_norm": 0.11547146737575531, "wall_ms": 4737365}
|
| 528 |
+
{"step": 52800, "loss": 0.2739992141723633, "loss_nce": 0.27145037055015564, "loss_mse": 0.0025488350074738264, "lr": 0.00036783556642364025, "grad_norm": 0.12623512744903564, "wall_ms": 4746299}
|
| 529 |
+
{"step": 52900, "loss": 0.2620737850666046, "loss_nce": 0.2595300078392029, "loss_mse": 0.0025437648873776197, "lr": 0.00036737679133828606, "grad_norm": 0.12761802971363068, "wall_ms": 4755225}
|
| 530 |
+
{"step": 53000, "loss": 0.2748960554599762, "loss_nce": 0.2723260819911957, "loss_mse": 0.0025699795223772526, "lr": 0.0003669175166745059, "grad_norm": 0.13513332605361938, "wall_ms": 4764163}
|
| 531 |
+
{"step": 53100, "loss": 0.18691986799240112, "loss_nce": 0.18451213836669922, "loss_mse": 0.0024077300913631916, "lr": 0.0003664577444740355, "grad_norm": 0.10034302622079849, "wall_ms": 4773103}
|
| 532 |
+
{"step": 53200, "loss": 0.2772520184516907, "loss_nce": 0.27476146817207336, "loss_mse": 0.002490554703399539, "lr": 0.0003659974767808225, "grad_norm": 0.12403669953346252, "wall_ms": 4782040}
|
| 533 |
+
{"step": 53300, "loss": 0.28344494104385376, "loss_nce": 0.2809922695159912, "loss_mse": 0.0024526636116206646, "lr": 0.0003655367156410173, "grad_norm": 0.13697926700115204, "wall_ms": 4790967}
|
| 534 |
+
{"step": 53400, "loss": 0.24214807152748108, "loss_nce": 0.23961198329925537, "loss_mse": 0.002536080777645111, "lr": 0.00036507546310296377, "grad_norm": 0.12843957543373108, "wall_ms": 4799897}
|
| 535 |
+
{"step": 53500, "loss": 0.24815894663333893, "loss_nce": 0.24571676552295685, "loss_mse": 0.002442179247736931, "lr": 0.0003646137212171905, "grad_norm": 0.1185799315571785, "wall_ms": 4808833}
|
| 536 |
+
{"step": 53600, "loss": 0.2336384505033493, "loss_nce": 0.2311573326587677, "loss_mse": 0.0024811176117509604, "lr": 0.0003641514920364015, "grad_norm": 0.11033083498477936, "wall_ms": 4817759}
|
| 537 |
+
{"step": 53700, "loss": 0.23284760117530823, "loss_nce": 0.23036059737205505, "loss_mse": 0.0024870026390999556, "lr": 0.00036368877761546673, "grad_norm": 0.11893846839666367, "wall_ms": 4826698}
|
| 538 |
+
{"step": 53800, "loss": 0.2545614540576935, "loss_nce": 0.2520405352115631, "loss_mse": 0.0025209139566868544, "lr": 0.00036322558001141407, "grad_norm": 0.12232551723718643, "wall_ms": 4835631}
|
| 539 |
+
{"step": 53900, "loss": 0.26819729804992676, "loss_nce": 0.2656865119934082, "loss_mse": 0.0025107767432928085, "lr": 0.0003627619012834188, "grad_norm": 0.12047264724969864, "wall_ms": 4844584}
|
| 540 |
+
{"step": 54000, "loss": 0.2632346749305725, "loss_nce": 0.26079222559928894, "loss_mse": 0.002442453056573868, "lr": 0.00036229774349279503, "grad_norm": 0.11876800656318665, "wall_ms": 4853525}
|
| 541 |
+
{"step": 54100, "loss": 0.2599849998950958, "loss_nce": 0.25746551156044006, "loss_mse": 0.0025194883346557617, "lr": 0.00036183310870298703, "grad_norm": 0.13223963975906372, "wall_ms": 4862463}
|
| 542 |
+
{"step": 54200, "loss": 0.2680528461933136, "loss_nce": 0.26556098461151123, "loss_mse": 0.002491866471245885, "lr": 0.00036136799897955926, "grad_norm": 0.13102136552333832, "wall_ms": 4871417}
|
| 543 |
+
{"step": 54300, "loss": 0.23863448202610016, "loss_nce": 0.23611731827259064, "loss_mse": 0.002517168875783682, "lr": 0.00036090241639018765, "grad_norm": 0.11751389503479004, "wall_ms": 4880349}
|
| 544 |
+
{"step": 54400, "loss": 0.266784131526947, "loss_nce": 0.26418787240982056, "loss_mse": 0.0025962651707232, "lr": 0.00036043636300465027, "grad_norm": 0.12609028816223145, "wall_ms": 4889307}
|
| 545 |
+
{"step": 54500, "loss": 0.26578083634376526, "loss_nce": 0.2632896900177002, "loss_mse": 0.0024911449290812016, "lr": 0.00035996984089481805, "grad_norm": 0.12723298370838165, "wall_ms": 4898254}
|
| 546 |
+
{"step": 54600, "loss": 0.23704050481319427, "loss_nce": 0.23456957936286926, "loss_mse": 0.002470920095220208, "lr": 0.0003595028521346458, "grad_norm": 0.11546822637319565, "wall_ms": 4907197}
|
| 547 |
+
{"step": 54700, "loss": 0.2155076116323471, "loss_nce": 0.21298961341381073, "loss_mse": 0.0025180040393024683, "lr": 0.0003590353988001629, "grad_norm": 0.11316386610269547, "wall_ms": 4916143}
|
| 548 |
+
{"step": 54800, "loss": 0.26004812121391296, "loss_nce": 0.25754809379577637, "loss_mse": 0.0025000383611768484, "lr": 0.0003585674829694638, "grad_norm": 0.1293029636144638, "wall_ms": 4925076}
|
| 549 |
+
{"step": 54900, "loss": 0.24328182637691498, "loss_nce": 0.24076801538467407, "loss_mse": 0.002513810293748975, "lr": 0.00035809910672269916, "grad_norm": 0.11954088509082794, "wall_ms": 4933981}
|
| 550 |
+
{"step": 55000, "loss": 0.27719929814338684, "loss_nce": 0.2746574878692627, "loss_mse": 0.0025418060831725597, "lr": 0.00035763027214206653, "grad_norm": 0.11540858447551727, "wall_ms": 4942906}
|
| 551 |
+
{"step": 55100, "loss": 0.24741096794605255, "loss_nce": 0.2449219524860382, "loss_mse": 0.0024890180211514235, "lr": 0.0003571609813118008, "grad_norm": 0.13679717481136322, "wall_ms": 4951837}
|
| 552 |
+
{"step": 55200, "loss": 0.27331680059432983, "loss_nce": 0.27084022760391235, "loss_mse": 0.0024765857961028814, "lr": 0.00035669123631816525, "grad_norm": 0.12663006782531738, "wall_ms": 4960743}
|
| 553 |
+
{"step": 55300, "loss": 0.22289974987506866, "loss_nce": 0.22041234374046326, "loss_mse": 0.0024874110240489244, "lr": 0.00035622103924944234, "grad_norm": 0.12257695943117142, "wall_ms": 4969673}
|
| 554 |
+
{"step": 55400, "loss": 0.2276637852191925, "loss_nce": 0.2251497358083725, "loss_mse": 0.0025140447542071342, "lr": 0.0003557503921959239, "grad_norm": 0.11346252262592316, "wall_ms": 4978596}
|
| 555 |
+
{"step": 55500, "loss": 0.2556811571121216, "loss_nce": 0.2531607151031494, "loss_mse": 0.002520442707464099, "lr": 0.0003552792972499025, "grad_norm": 0.11171253025531769, "wall_ms": 4987520}
|
| 556 |
+
{"step": 55600, "loss": 0.2519446611404419, "loss_nce": 0.2493879348039627, "loss_mse": 0.0025567219126969576, "lr": 0.0003548077565056618, "grad_norm": 0.1143852174282074, "wall_ms": 4996430}
|
| 557 |
+
{"step": 55700, "loss": 0.22466246783733368, "loss_nce": 0.2222154587507248, "loss_mse": 0.0024470083881169558, "lr": 0.000354335772059467, "grad_norm": 0.11682698130607605, "wall_ms": 5005324}
|
| 558 |
+
{"step": 55800, "loss": 0.24228665232658386, "loss_nce": 0.23983988165855408, "loss_mse": 0.0024467746261507273, "lr": 0.0003538633460095563, "grad_norm": 0.10954629629850388, "wall_ms": 5014227}
|
| 559 |
+
{"step": 55900, "loss": 0.28999677300453186, "loss_nce": 0.28749608993530273, "loss_mse": 0.002500690519809723, "lr": 0.0003533904804561307, "grad_norm": 0.1318160742521286, "wall_ms": 5023166}
|
| 560 |
+
{"step": 56000, "loss": 0.26515838503837585, "loss_nce": 0.26268988847732544, "loss_mse": 0.0024684863165020943, "lr": 0.0003529171775013451, "grad_norm": 0.12652522325515747, "wall_ms": 5032090}
|
| 561 |
+
{"step": 56100, "loss": 0.2570634186267853, "loss_nce": 0.2545682489871979, "loss_mse": 0.002495179418474436, "lr": 0.0003524434392492992, "grad_norm": 0.12637484073638916, "wall_ms": 5040977}
|
| 562 |
+
{"step": 56200, "loss": 0.2698066830635071, "loss_nce": 0.267285019159317, "loss_mse": 0.0025216685608029366, "lr": 0.0003519692678060273, "grad_norm": 0.12789353728294373, "wall_ms": 5049874}
|
| 563 |
+
{"step": 56300, "loss": 0.22539347410202026, "loss_nce": 0.22296454012393951, "loss_mse": 0.0024289379362016916, "lr": 0.0003514946652794899, "grad_norm": 0.11830209940671921, "wall_ms": 5058811}
|
| 564 |
+
{"step": 56400, "loss": 0.23456686735153198, "loss_nce": 0.23205728828907013, "loss_mse": 0.0025095809251070023, "lr": 0.0003510196337795637, "grad_norm": 0.12161102145910263, "wall_ms": 5067757}
|
| 565 |
+
{"step": 56500, "loss": 0.2555481195449829, "loss_nce": 0.25307410955429077, "loss_mse": 0.002474000211805105, "lr": 0.0003505441754180327, "grad_norm": 0.13207650184631348, "wall_ms": 5076719}
|
| 566 |
+
{"step": 56600, "loss": 0.2703496217727661, "loss_nce": 0.26784467697143555, "loss_mse": 0.0025049555115401745, "lr": 0.00035006829230857815, "grad_norm": 0.13160373270511627, "wall_ms": 5085667}
|
| 567 |
+
{"step": 56700, "loss": 0.31709903478622437, "loss_nce": 0.3145983815193176, "loss_mse": 0.0025006404612213373, "lr": 0.00034959198656676996, "grad_norm": 0.13345512747764587, "wall_ms": 5094632}
|
| 568 |
+
{"step": 56800, "loss": 0.22012288868427277, "loss_nce": 0.2177126705646515, "loss_mse": 0.002410219982266426, "lr": 0.0003491152603100565, "grad_norm": 0.11137008666992188, "wall_ms": 5103587}
|
| 569 |
+
{"step": 56900, "loss": 0.24803709983825684, "loss_nce": 0.24548761546611786, "loss_mse": 0.0025494801811873913, "lr": 0.0003486381156577558, "grad_norm": 0.12436376512050629, "wall_ms": 5112575}
|
| 570 |
+
{"step": 57000, "loss": 0.2537578344345093, "loss_nce": 0.2512805461883545, "loss_mse": 0.0024772975593805313, "lr": 0.00034816055473104574, "grad_norm": 0.12304897606372833, "wall_ms": 5121538}
|
| 571 |
+
{"step": 57100, "loss": 0.27253708243370056, "loss_nce": 0.2700161039829254, "loss_mse": 0.0025209931191056967, "lr": 0.00034768257965295507, "grad_norm": 0.12923523783683777, "wall_ms": 5130530}
|
| 572 |
+
{"step": 57200, "loss": 0.2433241307735443, "loss_nce": 0.24085955321788788, "loss_mse": 0.0024645705707371235, "lr": 0.00034720419254835334, "grad_norm": 0.12569370865821838, "wall_ms": 5139480}
|
| 573 |
+
{"step": 57300, "loss": 0.2776809632778168, "loss_nce": 0.27519112825393677, "loss_mse": 0.0024898387491703033, "lr": 0.0003467253955439418, "grad_norm": 0.13674628734588623, "wall_ms": 5148490}
|
| 574 |
+
{"step": 57400, "loss": 0.22460786998271942, "loss_nce": 0.22213061153888702, "loss_mse": 0.0024772624019533396, "lr": 0.00034624619076824433, "grad_norm": 0.11141205579042435, "wall_ms": 5157487}
|
| 575 |
+
{"step": 57500, "loss": 0.252841591835022, "loss_nce": 0.25034621357917786, "loss_mse": 0.0024953861720860004, "lr": 0.0003457665803515972, "grad_norm": 0.13097621500492096, "wall_ms": 5166392}
|
| 576 |
+
{"step": 57600, "loss": 0.23909839987754822, "loss_nce": 0.23661580681800842, "loss_mse": 0.0024825958535075188, "lr": 0.00034528656642614003, "grad_norm": 0.119733065366745, "wall_ms": 5175278}
|
| 577 |
+
{"step": 57700, "loss": 0.21064820885658264, "loss_nce": 0.20818662643432617, "loss_mse": 0.002461586846038699, "lr": 0.0003448061511258065, "grad_norm": 0.10903365164995193, "wall_ms": 5184145}
|
| 578 |
+
{"step": 57800, "loss": 0.24058200418949127, "loss_nce": 0.23811985552310944, "loss_mse": 0.0024621491320431232, "lr": 0.0003443253365863142, "grad_norm": 0.12348281592130661, "wall_ms": 5193041}
|
| 579 |
+
{"step": 57900, "loss": 0.21938851475715637, "loss_nce": 0.21690122783184052, "loss_mse": 0.002487294375896454, "lr": 0.0003438441249451561, "grad_norm": 0.10999690741300583, "wall_ms": 5201922}
|
| 580 |
+
{"step": 58000, "loss": 0.25016647577285767, "loss_nce": 0.2476935088634491, "loss_mse": 0.002472969237715006, "lr": 0.00034336251834159013, "grad_norm": 0.12484649568796158, "wall_ms": 5210809}
|
| 581 |
+
{"step": 58100, "loss": 0.2505309581756592, "loss_nce": 0.24804890155792236, "loss_mse": 0.0024820517282932997, "lr": 0.00034288051891663015, "grad_norm": 0.12504306435585022, "wall_ms": 5219713}
|
| 582 |
+
{"step": 58200, "loss": 0.2643982172012329, "loss_nce": 0.2618914544582367, "loss_mse": 0.002506766701117158, "lr": 0.0003423981288130363, "grad_norm": 0.12454070150852203, "wall_ms": 5228607}
|
| 583 |
+
{"step": 58300, "loss": 0.2956334352493286, "loss_nce": 0.29307132959365845, "loss_mse": 0.0025621100794523954, "lr": 0.0003419153501753055, "grad_norm": 0.139480322599411, "wall_ms": 5237522}
|
| 584 |
+
{"step": 58400, "loss": 0.2668497860431671, "loss_nce": 0.26433655619621277, "loss_mse": 0.0025132354348897934, "lr": 0.0003414321851496622, "grad_norm": 0.13223344087600708, "wall_ms": 5246446}
|
| 585 |
+
{"step": 58500, "loss": 0.2177545577287674, "loss_nce": 0.21527346968650818, "loss_mse": 0.0024810906033962965, "lr": 0.00034094863588404826, "grad_norm": 0.10781250894069672, "wall_ms": 5255368}
|
| 586 |
+
{"step": 58600, "loss": 0.22056330740451813, "loss_nce": 0.21809476613998413, "loss_mse": 0.0024685398675501347, "lr": 0.0003404647045281137, "grad_norm": 0.11320233345031738, "wall_ms": 5264284}
|
| 587 |
+
{"step": 58700, "loss": 0.2621335983276367, "loss_nce": 0.2596128582954407, "loss_mse": 0.002520749345421791, "lr": 0.0003399803932332072, "grad_norm": 0.1195669174194336, "wall_ms": 5273205}
|
| 588 |
+
{"step": 58800, "loss": 0.2343791127204895, "loss_nce": 0.23186680674552917, "loss_mse": 0.0025123076047748327, "lr": 0.00033949570415236656, "grad_norm": 0.12076704949140549, "wall_ms": 5282120}
|
| 589 |
+
{"step": 58900, "loss": 0.25941598415374756, "loss_nce": 0.25691986083984375, "loss_mse": 0.002496137982234359, "lr": 0.00033901063944030913, "grad_norm": 0.1178438812494278, "wall_ms": 5291037}
|
| 590 |
+
{"step": 59000, "loss": 0.296156644821167, "loss_nce": 0.2936748266220093, "loss_mse": 0.00248181470669806, "lr": 0.00033852520125342186, "grad_norm": 0.14023876190185547, "wall_ms": 5299949}
|
| 591 |
+
{"step": 59100, "loss": 0.23361939191818237, "loss_nce": 0.23115013539791107, "loss_mse": 0.0024692509323358536, "lr": 0.0003380393917497523, "grad_norm": 0.1234418973326683, "wall_ms": 5308839}
|
| 592 |
+
{"step": 59200, "loss": 0.21732820570468903, "loss_nce": 0.21489568054676056, "loss_mse": 0.002432530978694558, "lr": 0.00033755321308899836, "grad_norm": 0.11074765026569366, "wall_ms": 5317743}
|
| 593 |
+
{"step": 59300, "loss": 0.2441577911376953, "loss_nce": 0.24164341390132904, "loss_mse": 0.0025143823586404324, "lr": 0.00033706666743249964, "grad_norm": 0.12644372880458832, "wall_ms": 5326633}
|
| 594 |
+
{"step": 59400, "loss": 0.21515430510044098, "loss_nce": 0.21265290677547455, "loss_mse": 0.002501405542716384, "lr": 0.0003365797569432267, "grad_norm": 0.1167834997177124, "wall_ms": 5335499}
|
| 595 |
+
{"step": 59500, "loss": 0.2691107988357544, "loss_nce": 0.2666638195514679, "loss_mse": 0.0024469897616654634, "lr": 0.00033609248378577215, "grad_norm": 0.13474515080451965, "wall_ms": 5344365}
|
| 596 |
+
{"step": 59600, "loss": 0.22607465088367462, "loss_nce": 0.22357235848903656, "loss_mse": 0.0025022889021784067, "lr": 0.000335604850126341, "grad_norm": 0.11684080958366394, "wall_ms": 5353235}
|
| 597 |
+
{"step": 59700, "loss": 0.23584416508674622, "loss_nce": 0.23341825604438782, "loss_mse": 0.002425912069156766, "lr": 0.00033511685813274064, "grad_norm": 0.11371912062168121, "wall_ms": 5362109}
|
| 598 |
+
{"step": 59800, "loss": 0.25111469626426697, "loss_nce": 0.24863629043102264, "loss_mse": 0.0024784065317362547, "lr": 0.00033462850997437177, "grad_norm": 0.12471190094947815, "wall_ms": 5370979}
|
| 599 |
+
{"step": 59900, "loss": 0.28567489981651306, "loss_nce": 0.28315240144729614, "loss_mse": 0.002522500930353999, "lr": 0.0003341398078222182, "grad_norm": 0.1375349760055542, "wall_ms": 5379865}
|
| 600 |
+
{"step": 60000, "loss": 0.21717432141304016, "loss_nce": 0.214727982878685, "loss_mse": 0.0024463359732180834, "lr": 0.00033365075384883763, "grad_norm": 0.11287357658147812, "wall_ms": 5388763}
|