diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,14797 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 24.0, - "eval_steps": 500, - "global_step": 21096, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.01138627953316254, - "grad_norm": 2.734767198562622, - "learning_rate": 0.0001999512432959532, - "loss": 30.333529663085937, - "step": 10 - }, - { - "epoch": 0.02277255906632508, - "grad_norm": 2.7044386863708496, - "learning_rate": 0.00019989706918034563, - "loss": 25.792864990234374, - "step": 20 - }, - { - "epoch": 0.034158838599487616, - "grad_norm": 2.4714598655700684, - "learning_rate": 0.00019984289506473808, - "loss": 24.416429138183595, - "step": 30 - }, - { - "epoch": 0.04554511813265016, - "grad_norm": 2.6829721927642822, - "learning_rate": 0.00019978872094913053, - "loss": 23.82067413330078, - "step": 40 - }, - { - "epoch": 0.0569313976658127, - "grad_norm": 3.0441884994506836, - "learning_rate": 0.00019973454683352296, - "loss": 23.323060607910158, - "step": 50 - }, - { - "epoch": 0.06831767719897523, - "grad_norm": 3.367356538772583, - "learning_rate": 0.0001996803727179154, - "loss": 22.732952880859376, - "step": 60 - }, - { - "epoch": 0.07970395673213777, - "grad_norm": 3.5883660316467285, - "learning_rate": 0.00019962619860230783, - "loss": 22.522509765625, - "step": 70 - }, - { - "epoch": 0.09109023626530031, - "grad_norm": 3.8543548583984375, - "learning_rate": 0.00019957202448670026, - "loss": 22.270034790039062, - "step": 80 - }, - { - "epoch": 0.10247651579846286, - "grad_norm": 4.374216079711914, - "learning_rate": 0.0001995178503710927, - "loss": 22.013529968261718, - "step": 90 - }, - { - "epoch": 0.1138627953316254, - "grad_norm": 4.15579891204834, - "learning_rate": 0.00019946367625548513, - "loss": 22.13349304199219, - "step": 100 - }, - { - "epoch": 0.12524907486478792, - "grad_norm": 4.63075065612793, - "learning_rate": 0.00019940950213987756, - "loss": 21.13990173339844, - "step": 110 - }, - { - "epoch": 0.13663535439795046, - "grad_norm": 5.391815662384033, - "learning_rate": 0.00019935532802427, - "loss": 21.209239196777343, - "step": 120 - }, - { - "epoch": 0.148021633931113, - "grad_norm": 5.1299896240234375, - "learning_rate": 0.00019930115390866243, - "loss": 21.18792266845703, - "step": 130 - }, - { - "epoch": 0.15940791346427555, - "grad_norm": 5.028836727142334, - "learning_rate": 0.00019924697979305489, - "loss": 20.73717346191406, - "step": 140 - }, - { - "epoch": 0.1707941929974381, - "grad_norm": 5.911370754241943, - "learning_rate": 0.00019919280567744734, - "loss": 20.920809936523437, - "step": 150 - }, - { - "epoch": 0.18218047253060063, - "grad_norm": 6.1007914543151855, - "learning_rate": 0.00019913863156183976, - "loss": 20.673001098632813, - "step": 160 - }, - { - "epoch": 0.19356675206376317, - "grad_norm": 7.011501312255859, - "learning_rate": 0.0001990844574462322, - "loss": 20.6161376953125, - "step": 170 - }, - { - "epoch": 0.2049530315969257, - "grad_norm": 6.00868558883667, - "learning_rate": 0.00019903028333062464, - "loss": 20.028971862792968, - "step": 180 - }, - { - "epoch": 0.21633931113008825, - "grad_norm": 6.089074611663818, - "learning_rate": 0.0001989761092150171, - "loss": 20.252516174316405, - "step": 190 - }, - { - "epoch": 0.2277255906632508, - "grad_norm": 6.774249076843262, - "learning_rate": 0.00019892193509940951, - "loss": 20.02190399169922, - "step": 200 - }, - { - "epoch": 0.23911187019641333, - "grad_norm": 5.999351501464844, - "learning_rate": 0.00019886776098380197, - "loss": 19.793946838378908, - "step": 210 - }, - { - "epoch": 0.25049814972957585, - "grad_norm": 6.9030256271362305, - "learning_rate": 0.0001988135868681944, - "loss": 19.749664306640625, - "step": 220 - }, - { - "epoch": 0.2618844292627384, - "grad_norm": 6.463044166564941, - "learning_rate": 0.00019875941275258681, - "loss": 19.66212615966797, - "step": 230 - }, - { - "epoch": 0.27327070879590093, - "grad_norm": 6.855940818786621, - "learning_rate": 0.00019870523863697924, - "loss": 19.732081604003906, - "step": 240 - }, - { - "epoch": 0.2846569883290635, - "grad_norm": 5.966732025146484, - "learning_rate": 0.0001986510645213717, - "loss": 19.346263122558593, - "step": 250 - }, - { - "epoch": 0.296043267862226, - "grad_norm": 7.0768632888793945, - "learning_rate": 0.00019859689040576412, - "loss": 19.514622497558594, - "step": 260 - }, - { - "epoch": 0.3074295473953886, - "grad_norm": 6.5192694664001465, - "learning_rate": 0.00019854271629015657, - "loss": 19.53545379638672, - "step": 270 - }, - { - "epoch": 0.3188158269285511, - "grad_norm": 6.5199103355407715, - "learning_rate": 0.00019848854217454902, - "loss": 18.95964813232422, - "step": 280 - }, - { - "epoch": 0.33020210646171366, - "grad_norm": 6.990855693817139, - "learning_rate": 0.00019843436805894144, - "loss": 18.899111938476562, - "step": 290 - }, - { - "epoch": 0.3415883859948762, - "grad_norm": 7.77662992477417, - "learning_rate": 0.0001983801939433339, - "loss": 18.688560485839844, - "step": 300 - }, - { - "epoch": 0.3529746655280387, - "grad_norm": 7.311860084533691, - "learning_rate": 0.00019832601982772632, - "loss": 18.872056579589845, - "step": 310 - }, - { - "epoch": 0.36436094506120126, - "grad_norm": 8.168448448181152, - "learning_rate": 0.00019827184571211877, - "loss": 18.846502685546874, - "step": 320 - }, - { - "epoch": 0.37574722459436377, - "grad_norm": 7.2531046867370605, - "learning_rate": 0.0001982176715965112, - "loss": 18.281404113769533, - "step": 330 - }, - { - "epoch": 0.38713350412752634, - "grad_norm": 7.322739601135254, - "learning_rate": 0.00019816349748090365, - "loss": 18.37748565673828, - "step": 340 - }, - { - "epoch": 0.39851978366068885, - "grad_norm": 7.208354949951172, - "learning_rate": 0.00019810932336529607, - "loss": 18.53016815185547, - "step": 350 - }, - { - "epoch": 0.4099060631938514, - "grad_norm": 8.217365264892578, - "learning_rate": 0.00019805514924968852, - "loss": 18.07104949951172, - "step": 360 - }, - { - "epoch": 0.42129234272701394, - "grad_norm": 6.938902854919434, - "learning_rate": 0.00019800097513408095, - "loss": 18.317633056640624, - "step": 370 - }, - { - "epoch": 0.4326786222601765, - "grad_norm": 7.835426330566406, - "learning_rate": 0.00019794680101847337, - "loss": 18.164633178710936, - "step": 380 - }, - { - "epoch": 0.444064901793339, - "grad_norm": 6.851658821105957, - "learning_rate": 0.00019789262690286582, - "loss": 18.049453735351562, - "step": 390 - }, - { - "epoch": 0.4554511813265016, - "grad_norm": 6.942612648010254, - "learning_rate": 0.00019783845278725825, - "loss": 18.057662963867188, - "step": 400 - }, - { - "epoch": 0.4668374608596641, - "grad_norm": 7.211177349090576, - "learning_rate": 0.0001977842786716507, - "loss": 17.956227111816407, - "step": 410 - }, - { - "epoch": 0.47822374039282667, - "grad_norm": 7.398573398590088, - "learning_rate": 0.00019773010455604312, - "loss": 17.89140167236328, - "step": 420 - }, - { - "epoch": 0.4896100199259892, - "grad_norm": 7.789492607116699, - "learning_rate": 0.00019767593044043558, - "loss": 17.603182983398437, - "step": 430 - }, - { - "epoch": 0.5009962994591517, - "grad_norm": 6.86952018737793, - "learning_rate": 0.000197621756324828, - "loss": 17.51262969970703, - "step": 440 - }, - { - "epoch": 0.5123825789923142, - "grad_norm": 6.798729419708252, - "learning_rate": 0.00019756758220922045, - "loss": 17.721162414550783, - "step": 450 - }, - { - "epoch": 0.5237688585254768, - "grad_norm": 7.071723461151123, - "learning_rate": 0.00019751340809361288, - "loss": 17.830340576171874, - "step": 460 - }, - { - "epoch": 0.5351551380586393, - "grad_norm": 7.081210136413574, - "learning_rate": 0.00019745923397800533, - "loss": 17.28885498046875, - "step": 470 - }, - { - "epoch": 0.5465414175918019, - "grad_norm": 7.516231536865234, - "learning_rate": 0.00019740505986239775, - "loss": 17.6906005859375, - "step": 480 - }, - { - "epoch": 0.5579276971249644, - "grad_norm": 6.515166759490967, - "learning_rate": 0.0001973508857467902, - "loss": 17.18426971435547, - "step": 490 - }, - { - "epoch": 0.569313976658127, - "grad_norm": 7.4100189208984375, - "learning_rate": 0.00019729671163118263, - "loss": 17.34864501953125, - "step": 500 - }, - { - "epoch": 0.5807002561912895, - "grad_norm": 6.820520401000977, - "learning_rate": 0.00019724253751557505, - "loss": 17.613563537597656, - "step": 510 - }, - { - "epoch": 0.592086535724452, - "grad_norm": 7.629634857177734, - "learning_rate": 0.0001971883633999675, - "loss": 17.18858947753906, - "step": 520 - }, - { - "epoch": 0.6034728152576145, - "grad_norm": 7.522180080413818, - "learning_rate": 0.00019713418928435993, - "loss": 17.541119384765626, - "step": 530 - }, - { - "epoch": 0.6148590947907772, - "grad_norm": 7.64506196975708, - "learning_rate": 0.00019708001516875238, - "loss": 17.047702026367187, - "step": 540 - }, - { - "epoch": 0.6262453743239397, - "grad_norm": 6.857038497924805, - "learning_rate": 0.0001970258410531448, - "loss": 17.394444274902344, - "step": 550 - }, - { - "epoch": 0.6376316538571022, - "grad_norm": 6.667849063873291, - "learning_rate": 0.00019697166693753726, - "loss": 17.071144104003906, - "step": 560 - }, - { - "epoch": 0.6490179333902647, - "grad_norm": 7.24165153503418, - "learning_rate": 0.00019691749282192968, - "loss": 16.311009216308594, - "step": 570 - }, - { - "epoch": 0.6604042129234273, - "grad_norm": 7.296228408813477, - "learning_rate": 0.00019686331870632213, - "loss": 16.9224609375, - "step": 580 - }, - { - "epoch": 0.6717904924565898, - "grad_norm": 6.7600016593933105, - "learning_rate": 0.00019680914459071456, - "loss": 17.025440979003907, - "step": 590 - }, - { - "epoch": 0.6831767719897524, - "grad_norm": 7.559263229370117, - "learning_rate": 0.000196754970475107, - "loss": 16.395526123046874, - "step": 600 - }, - { - "epoch": 0.6945630515229149, - "grad_norm": 7.639865398406982, - "learning_rate": 0.00019670079635949946, - "loss": 17.01804962158203, - "step": 610 - }, - { - "epoch": 0.7059493310560774, - "grad_norm": 7.71447229385376, - "learning_rate": 0.00019664662224389188, - "loss": 16.441880798339845, - "step": 620 - }, - { - "epoch": 0.71733561058924, - "grad_norm": 6.845354080200195, - "learning_rate": 0.00019659244812828434, - "loss": 16.84131622314453, - "step": 630 - }, - { - "epoch": 0.7287218901224025, - "grad_norm": 6.601531505584717, - "learning_rate": 0.00019653827401267676, - "loss": 16.1950439453125, - "step": 640 - }, - { - "epoch": 0.740108169655565, - "grad_norm": 6.828949928283691, - "learning_rate": 0.00019648409989706918, - "loss": 16.982223510742188, - "step": 650 - }, - { - "epoch": 0.7514944491887275, - "grad_norm": 7.29005765914917, - "learning_rate": 0.0001964299257814616, - "loss": 16.249069213867188, - "step": 660 - }, - { - "epoch": 0.7628807287218902, - "grad_norm": 7.450255870819092, - "learning_rate": 0.00019637575166585406, - "loss": 16.10637664794922, - "step": 670 - }, - { - "epoch": 0.7742670082550527, - "grad_norm": 7.040278434753418, - "learning_rate": 0.00019632157755024649, - "loss": 16.148355102539064, - "step": 680 - }, - { - "epoch": 0.7856532877882152, - "grad_norm": 7.316576957702637, - "learning_rate": 0.00019626740343463894, - "loss": 16.328166198730468, - "step": 690 - }, - { - "epoch": 0.7970395673213777, - "grad_norm": 6.502153396606445, - "learning_rate": 0.00019621322931903136, - "loss": 15.988864135742187, - "step": 700 - }, - { - "epoch": 0.8084258468545403, - "grad_norm": 7.165337085723877, - "learning_rate": 0.0001961590552034238, - "loss": 16.477142333984375, - "step": 710 - }, - { - "epoch": 0.8198121263877028, - "grad_norm": 7.06484317779541, - "learning_rate": 0.00019610488108781626, - "loss": 16.042941284179687, - "step": 720 - }, - { - "epoch": 0.8311984059208654, - "grad_norm": 7.751636505126953, - "learning_rate": 0.0001960507069722087, - "loss": 16.278134155273438, - "step": 730 - }, - { - "epoch": 0.8425846854540279, - "grad_norm": 6.873840808868408, - "learning_rate": 0.00019599653285660114, - "loss": 15.732162475585938, - "step": 740 - }, - { - "epoch": 0.8539709649871904, - "grad_norm": 6.263609886169434, - "learning_rate": 0.00019594235874099356, - "loss": 16.18165283203125, - "step": 750 - }, - { - "epoch": 0.865357244520353, - "grad_norm": 6.69379186630249, - "learning_rate": 0.00019588818462538602, - "loss": 16.150958251953124, - "step": 760 - }, - { - "epoch": 0.8767435240535155, - "grad_norm": 6.563896656036377, - "learning_rate": 0.00019583401050977844, - "loss": 15.676901245117188, - "step": 770 - }, - { - "epoch": 0.888129803586678, - "grad_norm": 6.714715003967285, - "learning_rate": 0.0001957798363941709, - "loss": 15.763505554199218, - "step": 780 - }, - { - "epoch": 0.8995160831198405, - "grad_norm": 6.818701267242432, - "learning_rate": 0.00019572566227856332, - "loss": 16.16502685546875, - "step": 790 - }, - { - "epoch": 0.9109023626530032, - "grad_norm": 7.140949249267578, - "learning_rate": 0.00019567148816295574, - "loss": 15.515142822265625, - "step": 800 - }, - { - "epoch": 0.9222886421861657, - "grad_norm": 6.7389020919799805, - "learning_rate": 0.00019561731404734817, - "loss": 15.634983825683594, - "step": 810 - }, - { - "epoch": 0.9336749217193282, - "grad_norm": 6.847448825836182, - "learning_rate": 0.00019556313993174062, - "loss": 15.689366149902344, - "step": 820 - }, - { - "epoch": 0.9450612012524907, - "grad_norm": 7.26194429397583, - "learning_rate": 0.00019550896581613307, - "loss": 15.806666564941406, - "step": 830 - }, - { - "epoch": 0.9564474807856533, - "grad_norm": 6.119577407836914, - "learning_rate": 0.0001954547917005255, - "loss": 15.300843811035156, - "step": 840 - }, - { - "epoch": 0.9678337603188158, - "grad_norm": 6.605442047119141, - "learning_rate": 0.00019540061758491794, - "loss": 15.186727905273438, - "step": 850 - }, - { - "epoch": 0.9792200398519784, - "grad_norm": 6.060615062713623, - "learning_rate": 0.00019534644346931037, - "loss": 15.392152404785156, - "step": 860 - }, - { - "epoch": 0.9906063193851409, - "grad_norm": 6.676738262176514, - "learning_rate": 0.00019529226935370282, - "loss": 15.1415283203125, - "step": 870 - }, - { - "epoch": 1.0011386279533163, - "grad_norm": 6.309657096862793, - "learning_rate": 0.00019523809523809525, - "loss": 14.388438415527343, - "step": 880 - }, - { - "epoch": 1.0125249074864788, - "grad_norm": 6.854212760925293, - "learning_rate": 0.0001951839211224877, - "loss": 14.986418151855469, - "step": 890 - }, - { - "epoch": 1.0239111870196413, - "grad_norm": 6.602004051208496, - "learning_rate": 0.00019512974700688012, - "loss": 14.524969482421875, - "step": 900 - }, - { - "epoch": 1.0352974665528039, - "grad_norm": 7.170373439788818, - "learning_rate": 0.00019507557289127257, - "loss": 15.004403686523437, - "step": 910 - }, - { - "epoch": 1.0466837460859664, - "grad_norm": 6.895066738128662, - "learning_rate": 0.000195021398775665, - "loss": 14.763569641113282, - "step": 920 - }, - { - "epoch": 1.0580700256191289, - "grad_norm": 6.917144775390625, - "learning_rate": 0.00019496722466005742, - "loss": 14.740924072265624, - "step": 930 - }, - { - "epoch": 1.0694563051522914, - "grad_norm": 6.31508731842041, - "learning_rate": 0.00019491305054444987, - "loss": 14.591043090820312, - "step": 940 - }, - { - "epoch": 1.080842584685454, - "grad_norm": 6.562560558319092, - "learning_rate": 0.0001948588764288423, - "loss": 14.91121826171875, - "step": 950 - }, - { - "epoch": 1.0922288642186166, - "grad_norm": 6.635254383087158, - "learning_rate": 0.00019480470231323475, - "loss": 15.324360656738282, - "step": 960 - }, - { - "epoch": 1.1036151437517792, - "grad_norm": 6.704561233520508, - "learning_rate": 0.00019475052819762717, - "loss": 15.346974182128907, - "step": 970 - }, - { - "epoch": 1.1150014232849417, - "grad_norm": 6.2377190589904785, - "learning_rate": 0.00019469635408201963, - "loss": 15.083432006835938, - "step": 980 - }, - { - "epoch": 1.1263877028181042, - "grad_norm": 6.517440319061279, - "learning_rate": 0.00019464217996641205, - "loss": 14.828807067871093, - "step": 990 - }, - { - "epoch": 1.1377739823512667, - "grad_norm": 6.731694221496582, - "learning_rate": 0.0001945880058508045, - "loss": 14.4470703125, - "step": 1000 - }, - { - "epoch": 1.1491602618844292, - "grad_norm": 6.918997287750244, - "learning_rate": 0.00019453383173519693, - "loss": 14.706845092773438, - "step": 1010 - }, - { - "epoch": 1.1605465414175917, - "grad_norm": 6.228609561920166, - "learning_rate": 0.00019447965761958938, - "loss": 14.568064880371093, - "step": 1020 - }, - { - "epoch": 1.1719328209507545, - "grad_norm": 7.079123497009277, - "learning_rate": 0.0001944254835039818, - "loss": 14.400852966308594, - "step": 1030 - }, - { - "epoch": 1.183319100483917, - "grad_norm": 6.567544937133789, - "learning_rate": 0.00019437130938837425, - "loss": 14.280006408691406, - "step": 1040 - }, - { - "epoch": 1.1947053800170795, - "grad_norm": 7.218766689300537, - "learning_rate": 0.00019431713527276668, - "loss": 14.390548706054688, - "step": 1050 - }, - { - "epoch": 1.206091659550242, - "grad_norm": 6.728633880615234, - "learning_rate": 0.00019426296115715913, - "loss": 14.768504333496093, - "step": 1060 - }, - { - "epoch": 1.2174779390834045, - "grad_norm": 6.698533535003662, - "learning_rate": 0.00019420878704155155, - "loss": 14.518492126464844, - "step": 1070 - }, - { - "epoch": 1.228864218616567, - "grad_norm": 6.782904148101807, - "learning_rate": 0.00019415461292594398, - "loss": 14.52556915283203, - "step": 1080 - }, - { - "epoch": 1.2402504981497295, - "grad_norm": 6.522706508636475, - "learning_rate": 0.00019410043881033643, - "loss": 14.64172821044922, - "step": 1090 - }, - { - "epoch": 1.251636777682892, - "grad_norm": 6.942655563354492, - "learning_rate": 0.00019404626469472885, - "loss": 14.770219421386718, - "step": 1100 - }, - { - "epoch": 1.2630230572160546, - "grad_norm": 6.034182071685791, - "learning_rate": 0.0001939920905791213, - "loss": 14.227700805664062, - "step": 1110 - }, - { - "epoch": 1.2744093367492173, - "grad_norm": 6.776942253112793, - "learning_rate": 0.00019393791646351373, - "loss": 14.423974609375, - "step": 1120 - }, - { - "epoch": 1.2857956162823796, - "grad_norm": 6.075601100921631, - "learning_rate": 0.00019388374234790618, - "loss": 14.799586486816406, - "step": 1130 - }, - { - "epoch": 1.2971818958155423, - "grad_norm": 6.346046447753906, - "learning_rate": 0.0001938295682322986, - "loss": 14.888410949707032, - "step": 1140 - }, - { - "epoch": 1.3085681753487048, - "grad_norm": 6.099461078643799, - "learning_rate": 0.00019377539411669106, - "loss": 14.434004211425782, - "step": 1150 - }, - { - "epoch": 1.3199544548818674, - "grad_norm": 6.728332042694092, - "learning_rate": 0.00019372122000108348, - "loss": 14.108644104003906, - "step": 1160 - }, - { - "epoch": 1.3313407344150299, - "grad_norm": 6.106889247894287, - "learning_rate": 0.00019366704588547593, - "loss": 13.984442138671875, - "step": 1170 - }, - { - "epoch": 1.3427270139481924, - "grad_norm": 6.419578552246094, - "learning_rate": 0.00019361287176986839, - "loss": 14.819793701171875, - "step": 1180 - }, - { - "epoch": 1.354113293481355, - "grad_norm": 6.706035614013672, - "learning_rate": 0.0001935586976542608, - "loss": 14.234246826171875, - "step": 1190 - }, - { - "epoch": 1.3654995730145174, - "grad_norm": 5.745364665985107, - "learning_rate": 0.00019350452353865324, - "loss": 14.144187927246094, - "step": 1200 - }, - { - "epoch": 1.3768858525476801, - "grad_norm": 6.465015411376953, - "learning_rate": 0.0001934503494230457, - "loss": 14.569776916503907, - "step": 1210 - }, - { - "epoch": 1.3882721320808427, - "grad_norm": 5.789831638336182, - "learning_rate": 0.0001933961753074381, - "loss": 14.204434204101563, - "step": 1220 - }, - { - "epoch": 1.3996584116140052, - "grad_norm": 6.589231014251709, - "learning_rate": 0.00019334200119183054, - "loss": 13.60784912109375, - "step": 1230 - }, - { - "epoch": 1.4110446911471677, - "grad_norm": 6.706899166107178, - "learning_rate": 0.000193287827076223, - "loss": 14.148931884765625, - "step": 1240 - }, - { - "epoch": 1.4224309706803302, - "grad_norm": 6.473396301269531, - "learning_rate": 0.0001932336529606154, - "loss": 14.05177001953125, - "step": 1250 - }, - { - "epoch": 1.4338172502134927, - "grad_norm": 6.002316474914551, - "learning_rate": 0.00019317947884500786, - "loss": 13.77730712890625, - "step": 1260 - }, - { - "epoch": 1.4452035297466552, - "grad_norm": 5.781302452087402, - "learning_rate": 0.0001931253047294003, - "loss": 14.126388549804688, - "step": 1270 - }, - { - "epoch": 1.4565898092798177, - "grad_norm": 6.653365612030029, - "learning_rate": 0.00019307113061379274, - "loss": 14.278581237792968, - "step": 1280 - }, - { - "epoch": 1.4679760888129803, - "grad_norm": 6.22074556350708, - "learning_rate": 0.0001930169564981852, - "loss": 13.808645629882813, - "step": 1290 - }, - { - "epoch": 1.479362368346143, - "grad_norm": 6.5593647956848145, - "learning_rate": 0.00019296278238257762, - "loss": 13.794139099121093, - "step": 1300 - }, - { - "epoch": 1.4907486478793055, - "grad_norm": 5.953279972076416, - "learning_rate": 0.00019290860826697007, - "loss": 13.860592651367188, - "step": 1310 - }, - { - "epoch": 1.502134927412468, - "grad_norm": 5.616598606109619, - "learning_rate": 0.0001928544341513625, - "loss": 13.759190368652344, - "step": 1320 - }, - { - "epoch": 1.5135212069456305, - "grad_norm": 5.759921073913574, - "learning_rate": 0.00019280026003575494, - "loss": 13.61251220703125, - "step": 1330 - }, - { - "epoch": 1.524907486478793, - "grad_norm": 5.853238105773926, - "learning_rate": 0.00019274608592014737, - "loss": 13.766014099121094, - "step": 1340 - }, - { - "epoch": 1.5362937660119556, - "grad_norm": 6.38026762008667, - "learning_rate": 0.0001926919118045398, - "loss": 13.8654296875, - "step": 1350 - }, - { - "epoch": 1.547680045545118, - "grad_norm": 6.176024436950684, - "learning_rate": 0.00019263773768893222, - "loss": 13.856707763671874, - "step": 1360 - }, - { - "epoch": 1.5590663250782808, - "grad_norm": 5.767195701599121, - "learning_rate": 0.00019258356357332467, - "loss": 13.602825927734376, - "step": 1370 - }, - { - "epoch": 1.570452604611443, - "grad_norm": 6.558640480041504, - "learning_rate": 0.0001925293894577171, - "loss": 13.450588989257813, - "step": 1380 - }, - { - "epoch": 1.5818388841446058, - "grad_norm": 5.716969966888428, - "learning_rate": 0.00019247521534210954, - "loss": 13.9936279296875, - "step": 1390 - }, - { - "epoch": 1.5932251636777683, - "grad_norm": 6.217525005340576, - "learning_rate": 0.000192421041226502, - "loss": 13.740388488769531, - "step": 1400 - }, - { - "epoch": 1.6046114432109309, - "grad_norm": 6.646007061004639, - "learning_rate": 0.00019236686711089442, - "loss": 13.809278869628907, - "step": 1410 - }, - { - "epoch": 1.6159977227440934, - "grad_norm": 5.91151237487793, - "learning_rate": 0.00019231269299528687, - "loss": 13.775947570800781, - "step": 1420 - }, - { - "epoch": 1.6273840022772559, - "grad_norm": 5.8763957023620605, - "learning_rate": 0.0001922585188796793, - "loss": 13.332572937011719, - "step": 1430 - }, - { - "epoch": 1.6387702818104184, - "grad_norm": 5.781065940856934, - "learning_rate": 0.00019220434476407175, - "loss": 13.051596069335938, - "step": 1440 - }, - { - "epoch": 1.650156561343581, - "grad_norm": 5.9461445808410645, - "learning_rate": 0.00019215017064846417, - "loss": 13.740011596679688, - "step": 1450 - }, - { - "epoch": 1.6615428408767436, - "grad_norm": 5.9059224128723145, - "learning_rate": 0.00019209599653285662, - "loss": 13.289398193359375, - "step": 1460 - }, - { - "epoch": 1.672929120409906, - "grad_norm": 6.076962947845459, - "learning_rate": 0.00019204182241724905, - "loss": 13.65223388671875, - "step": 1470 - }, - { - "epoch": 1.6843153999430687, - "grad_norm": 6.304300308227539, - "learning_rate": 0.0001919876483016415, - "loss": 13.400830078125, - "step": 1480 - }, - { - "epoch": 1.6957016794762312, - "grad_norm": 6.031702041625977, - "learning_rate": 0.00019193347418603392, - "loss": 13.493438720703125, - "step": 1490 - }, - { - "epoch": 1.7070879590093937, - "grad_norm": 6.418713092803955, - "learning_rate": 0.00019187930007042635, - "loss": 13.696240234375, - "step": 1500 - }, - { - "epoch": 1.7184742385425562, - "grad_norm": 5.954122543334961, - "learning_rate": 0.0001918251259548188, - "loss": 13.905665588378906, - "step": 1510 - }, - { - "epoch": 1.7298605180757187, - "grad_norm": 6.412307262420654, - "learning_rate": 0.00019177095183921122, - "loss": 13.017045593261718, - "step": 1520 - }, - { - "epoch": 1.7412467976088815, - "grad_norm": 6.024705410003662, - "learning_rate": 0.00019171677772360368, - "loss": 13.734344482421875, - "step": 1530 - }, - { - "epoch": 1.7526330771420437, - "grad_norm": 6.235986709594727, - "learning_rate": 0.0001916626036079961, - "loss": 13.35974578857422, - "step": 1540 - }, - { - "epoch": 1.7640193566752065, - "grad_norm": 6.963014602661133, - "learning_rate": 0.00019160842949238855, - "loss": 13.176609802246094, - "step": 1550 - }, - { - "epoch": 1.7754056362083688, - "grad_norm": 5.929773807525635, - "learning_rate": 0.00019155425537678098, - "loss": 13.372627258300781, - "step": 1560 - }, - { - "epoch": 1.7867919157415315, - "grad_norm": 6.559890270233154, - "learning_rate": 0.00019150008126117343, - "loss": 13.397409057617187, - "step": 1570 - }, - { - "epoch": 1.798178195274694, - "grad_norm": 6.073424816131592, - "learning_rate": 0.00019144590714556585, - "loss": 13.51752166748047, - "step": 1580 - }, - { - "epoch": 1.8095644748078565, - "grad_norm": 6.313562393188477, - "learning_rate": 0.0001913917330299583, - "loss": 13.768846130371093, - "step": 1590 - }, - { - "epoch": 1.820950754341019, - "grad_norm": 6.578205108642578, - "learning_rate": 0.00019133755891435073, - "loss": 13.342623901367187, - "step": 1600 - }, - { - "epoch": 1.8323370338741816, - "grad_norm": 6.062254428863525, - "learning_rate": 0.00019128338479874318, - "loss": 13.149093627929688, - "step": 1610 - }, - { - "epoch": 1.8437233134073443, - "grad_norm": 6.041051864624023, - "learning_rate": 0.0001912292106831356, - "loss": 12.984222412109375, - "step": 1620 - }, - { - "epoch": 1.8551095929405066, - "grad_norm": 6.353360652923584, - "learning_rate": 0.00019117503656752803, - "loss": 13.421148681640625, - "step": 1630 - }, - { - "epoch": 1.8664958724736693, - "grad_norm": 6.004988670349121, - "learning_rate": 0.00019112086245192048, - "loss": 13.050341796875, - "step": 1640 - }, - { - "epoch": 1.8778821520068316, - "grad_norm": 5.7523932456970215, - "learning_rate": 0.0001910666883363129, - "loss": 12.99239501953125, - "step": 1650 - }, - { - "epoch": 1.8892684315399944, - "grad_norm": 6.328149795532227, - "learning_rate": 0.00019101251422070536, - "loss": 12.906375122070312, - "step": 1660 - }, - { - "epoch": 1.9006547110731569, - "grad_norm": 5.817428112030029, - "learning_rate": 0.00019095834010509778, - "loss": 12.926182556152344, - "step": 1670 - }, - { - "epoch": 1.9120409906063194, - "grad_norm": 6.4034247398376465, - "learning_rate": 0.00019090416598949023, - "loss": 12.87743682861328, - "step": 1680 - }, - { - "epoch": 1.923427270139482, - "grad_norm": 5.860933303833008, - "learning_rate": 0.00019084999187388266, - "loss": 13.04234619140625, - "step": 1690 - }, - { - "epoch": 1.9348135496726444, - "grad_norm": 5.991933345794678, - "learning_rate": 0.0001907958177582751, - "loss": 12.981626892089844, - "step": 1700 - }, - { - "epoch": 1.9461998292058071, - "grad_norm": 5.810291767120361, - "learning_rate": 0.00019074164364266753, - "loss": 13.029983520507812, - "step": 1710 - }, - { - "epoch": 1.9575861087389694, - "grad_norm": 6.07528829574585, - "learning_rate": 0.00019068746952705999, - "loss": 13.375914001464844, - "step": 1720 - }, - { - "epoch": 1.9689723882721322, - "grad_norm": 6.209125995635986, - "learning_rate": 0.0001906332954114524, - "loss": 13.055149841308594, - "step": 1730 - }, - { - "epoch": 1.9803586678052945, - "grad_norm": 5.829214572906494, - "learning_rate": 0.00019057912129584486, - "loss": 13.251846313476562, - "step": 1740 - }, - { - "epoch": 1.9917449473384572, - "grad_norm": 5.681432723999023, - "learning_rate": 0.0001905249471802373, - "loss": 13.179347229003906, - "step": 1750 - }, - { - "epoch": 2.0022772559066326, - "grad_norm": 6.714110374450684, - "learning_rate": 0.00019047077306462974, - "loss": 12.078095245361329, - "step": 1760 - }, - { - "epoch": 2.013663535439795, - "grad_norm": 5.950106620788574, - "learning_rate": 0.00019041659894902216, - "loss": 12.684372711181641, - "step": 1770 - }, - { - "epoch": 2.0250498149729577, - "grad_norm": 6.075564861297607, - "learning_rate": 0.00019036242483341459, - "loss": 12.492652893066406, - "step": 1780 - }, - { - "epoch": 2.03643609450612, - "grad_norm": 5.315488338470459, - "learning_rate": 0.00019030825071780704, - "loss": 12.558822631835938, - "step": 1790 - }, - { - "epoch": 2.0478223740392827, - "grad_norm": 6.444157600402832, - "learning_rate": 0.00019025407660219946, - "loss": 12.013819122314453, - "step": 1800 - }, - { - "epoch": 2.059208653572445, - "grad_norm": 7.443634510040283, - "learning_rate": 0.00019019990248659191, - "loss": 12.401322174072266, - "step": 1810 - }, - { - "epoch": 2.0705949331056077, - "grad_norm": 6.039581298828125, - "learning_rate": 0.00019014572837098434, - "loss": 11.99999542236328, - "step": 1820 - }, - { - "epoch": 2.0819812126387705, - "grad_norm": 6.211398601531982, - "learning_rate": 0.0001900915542553768, - "loss": 12.471622467041016, - "step": 1830 - }, - { - "epoch": 2.0933674921719327, - "grad_norm": 5.827359199523926, - "learning_rate": 0.00019003738013976921, - "loss": 12.675922393798828, - "step": 1840 - }, - { - "epoch": 2.1047537717050955, - "grad_norm": 6.171777248382568, - "learning_rate": 0.00018998320602416167, - "loss": 12.37955093383789, - "step": 1850 - }, - { - "epoch": 2.1161400512382578, - "grad_norm": 5.889030456542969, - "learning_rate": 0.00018992903190855412, - "loss": 12.620635986328125, - "step": 1860 - }, - { - "epoch": 2.1275263307714205, - "grad_norm": 5.960679531097412, - "learning_rate": 0.00018987485779294654, - "loss": 12.005726623535157, - "step": 1870 - }, - { - "epoch": 2.138912610304583, - "grad_norm": 6.680525302886963, - "learning_rate": 0.000189820683677339, - "loss": 12.5527099609375, - "step": 1880 - }, - { - "epoch": 2.1502988898377455, - "grad_norm": 6.306058883666992, - "learning_rate": 0.00018976650956173142, - "loss": 12.299282836914063, - "step": 1890 - }, - { - "epoch": 2.161685169370908, - "grad_norm": 6.001065254211426, - "learning_rate": 0.00018971233544612387, - "loss": 12.561842346191407, - "step": 1900 - }, - { - "epoch": 2.1730714489040706, - "grad_norm": 5.667927265167236, - "learning_rate": 0.0001896581613305163, - "loss": 12.342430877685548, - "step": 1910 - }, - { - "epoch": 2.1844577284372333, - "grad_norm": 5.749220848083496, - "learning_rate": 0.00018960398721490872, - "loss": 11.777164459228516, - "step": 1920 - }, - { - "epoch": 2.1958440079703956, - "grad_norm": 5.756120681762695, - "learning_rate": 0.00018954981309930114, - "loss": 12.499293518066406, - "step": 1930 - }, - { - "epoch": 2.2072302875035583, - "grad_norm": 5.476713180541992, - "learning_rate": 0.0001894956389836936, - "loss": 12.119635009765625, - "step": 1940 - }, - { - "epoch": 2.2186165670367206, - "grad_norm": 6.148690700531006, - "learning_rate": 0.00018944146486808602, - "loss": 12.262448120117188, - "step": 1950 - }, - { - "epoch": 2.2300028465698833, - "grad_norm": 6.188674449920654, - "learning_rate": 0.00018938729075247847, - "loss": 11.836429595947266, - "step": 1960 - }, - { - "epoch": 2.2413891261030456, - "grad_norm": 5.806178569793701, - "learning_rate": 0.00018933311663687092, - "loss": 12.008864593505859, - "step": 1970 - }, - { - "epoch": 2.2527754056362084, - "grad_norm": 6.355106830596924, - "learning_rate": 0.00018927894252126335, - "loss": 12.07802505493164, - "step": 1980 - }, - { - "epoch": 2.2641616851693707, - "grad_norm": 5.5258612632751465, - "learning_rate": 0.0001892247684056558, - "loss": 12.283776092529298, - "step": 1990 - }, - { - "epoch": 2.2755479647025334, - "grad_norm": 5.87457275390625, - "learning_rate": 0.00018917059429004822, - "loss": 12.212881469726563, - "step": 2000 - }, - { - "epoch": 2.286934244235696, - "grad_norm": 5.784579277038574, - "learning_rate": 0.00018911642017444067, - "loss": 12.175121307373047, - "step": 2010 - }, - { - "epoch": 2.2983205237688584, - "grad_norm": 5.964444637298584, - "learning_rate": 0.00018906766347039387, - "loss": 12.410752868652343, - "step": 2020 - }, - { - "epoch": 2.309706803302021, - "grad_norm": 5.863014221191406, - "learning_rate": 0.00018901890676634707, - "loss": 12.259366607666015, - "step": 2030 - }, - { - "epoch": 2.3210930828351835, - "grad_norm": 6.4362382888793945, - "learning_rate": 0.0001889647326507395, - "loss": 12.253427124023437, - "step": 2040 - }, - { - "epoch": 2.332479362368346, - "grad_norm": 5.864607810974121, - "learning_rate": 0.00018891055853513194, - "loss": 12.033533477783203, - "step": 2050 - }, - { - "epoch": 2.343865641901509, - "grad_norm": 6.0086822509765625, - "learning_rate": 0.00018885638441952437, - "loss": 12.2366943359375, - "step": 2060 - }, - { - "epoch": 2.355251921434671, - "grad_norm": 5.782958507537842, - "learning_rate": 0.00018880221030391682, - "loss": 12.448239135742188, - "step": 2070 - }, - { - "epoch": 2.366638200967834, - "grad_norm": 5.685044288635254, - "learning_rate": 0.00018874803618830924, - "loss": 11.95639419555664, - "step": 2080 - }, - { - "epoch": 2.3780244805009962, - "grad_norm": 5.853184700012207, - "learning_rate": 0.00018869386207270167, - "loss": 12.157659912109375, - "step": 2090 - }, - { - "epoch": 2.389410760034159, - "grad_norm": 5.814517974853516, - "learning_rate": 0.0001886396879570941, - "loss": 11.77392349243164, - "step": 2100 - }, - { - "epoch": 2.4007970395673213, - "grad_norm": 6.793750286102295, - "learning_rate": 0.00018858551384148654, - "loss": 12.14357681274414, - "step": 2110 - }, - { - "epoch": 2.412183319100484, - "grad_norm": 5.957283020019531, - "learning_rate": 0.00018853133972587897, - "loss": 11.823822021484375, - "step": 2120 - }, - { - "epoch": 2.4235695986336463, - "grad_norm": 6.891757488250732, - "learning_rate": 0.00018847716561027142, - "loss": 12.402496337890625, - "step": 2130 - }, - { - "epoch": 2.434955878166809, - "grad_norm": 5.9682135581970215, - "learning_rate": 0.00018842299149466387, - "loss": 12.376535797119141, - "step": 2140 - }, - { - "epoch": 2.4463421576999718, - "grad_norm": 6.419319152832031, - "learning_rate": 0.0001883688173790563, - "loss": 11.78542709350586, - "step": 2150 - }, - { - "epoch": 2.457728437233134, - "grad_norm": 5.599404335021973, - "learning_rate": 0.00018831464326344875, - "loss": 11.65877685546875, - "step": 2160 - }, - { - "epoch": 2.469114716766297, - "grad_norm": 6.27384090423584, - "learning_rate": 0.00018826046914784117, - "loss": 12.384040832519531, - "step": 2170 - }, - { - "epoch": 2.480500996299459, - "grad_norm": 6.210123062133789, - "learning_rate": 0.00018820629503223362, - "loss": 11.9489990234375, - "step": 2180 - }, - { - "epoch": 2.491887275832622, - "grad_norm": 6.209619998931885, - "learning_rate": 0.00018815212091662605, - "loss": 12.142713928222657, - "step": 2190 - }, - { - "epoch": 2.503273555365784, - "grad_norm": 5.776041030883789, - "learning_rate": 0.0001880979468010185, - "loss": 12.368475341796875, - "step": 2200 - }, - { - "epoch": 2.514659834898947, - "grad_norm": 5.5831217765808105, - "learning_rate": 0.00018804377268541092, - "loss": 12.049860382080078, - "step": 2210 - }, - { - "epoch": 2.526046114432109, - "grad_norm": 6.643979072570801, - "learning_rate": 0.00018798959856980335, - "loss": 12.57002944946289, - "step": 2220 - }, - { - "epoch": 2.537432393965272, - "grad_norm": 5.437014102935791, - "learning_rate": 0.00018793542445419577, - "loss": 12.165242767333984, - "step": 2230 - }, - { - "epoch": 2.5488186734984346, - "grad_norm": 6.001914024353027, - "learning_rate": 0.00018788125033858822, - "loss": 11.841060638427734, - "step": 2240 - }, - { - "epoch": 2.560204953031597, - "grad_norm": 6.4120259284973145, - "learning_rate": 0.00018782707622298065, - "loss": 11.942637634277343, - "step": 2250 - }, - { - "epoch": 2.571591232564759, - "grad_norm": 6.355876922607422, - "learning_rate": 0.0001877729021073731, - "loss": 11.278862762451173, - "step": 2260 - }, - { - "epoch": 2.582977512097922, - "grad_norm": 5.9582600593566895, - "learning_rate": 0.00018771872799176555, - "loss": 12.05511932373047, - "step": 2270 - }, - { - "epoch": 2.5943637916310847, - "grad_norm": 6.194489479064941, - "learning_rate": 0.00018766455387615798, - "loss": 11.693316650390624, - "step": 2280 - }, - { - "epoch": 2.605750071164247, - "grad_norm": 6.129605770111084, - "learning_rate": 0.00018761037976055043, - "loss": 11.912431335449218, - "step": 2290 - }, - { - "epoch": 2.6171363506974097, - "grad_norm": 5.7922444343566895, - "learning_rate": 0.00018755620564494285, - "loss": 11.996822357177734, - "step": 2300 - }, - { - "epoch": 2.628522630230572, - "grad_norm": 5.562745094299316, - "learning_rate": 0.0001875020315293353, - "loss": 12.01687240600586, - "step": 2310 - }, - { - "epoch": 2.6399089097637347, - "grad_norm": 5.735344886779785, - "learning_rate": 0.00018744785741372773, - "loss": 12.01505355834961, - "step": 2320 - }, - { - "epoch": 2.6512951892968974, - "grad_norm": 5.924304485321045, - "learning_rate": 0.00018739368329812018, - "loss": 11.80968017578125, - "step": 2330 - }, - { - "epoch": 2.6626814688300597, - "grad_norm": 5.843868732452393, - "learning_rate": 0.0001873395091825126, - "loss": 12.081328582763671, - "step": 2340 - }, - { - "epoch": 2.6740677483632225, - "grad_norm": 5.781383514404297, - "learning_rate": 0.00018728533506690506, - "loss": 12.280255889892578, - "step": 2350 - }, - { - "epoch": 2.6854540278963848, - "grad_norm": 5.876210689544678, - "learning_rate": 0.00018723116095129748, - "loss": 11.941732788085938, - "step": 2360 - }, - { - "epoch": 2.6968403074295475, - "grad_norm": 5.828925609588623, - "learning_rate": 0.0001871769868356899, - "loss": 11.539690399169922, - "step": 2370 - }, - { - "epoch": 2.70822658696271, - "grad_norm": 6.443966388702393, - "learning_rate": 0.00018712281272008236, - "loss": 11.438835144042969, - "step": 2380 - }, - { - "epoch": 2.7196128664958725, - "grad_norm": 5.669212818145752, - "learning_rate": 0.00018706863860447478, - "loss": 12.036208343505859, - "step": 2390 - }, - { - "epoch": 2.730999146029035, - "grad_norm": 5.616700649261475, - "learning_rate": 0.00018701446448886723, - "loss": 11.880667114257813, - "step": 2400 - }, - { - "epoch": 2.7423854255621976, - "grad_norm": 5.890092372894287, - "learning_rate": 0.00018696029037325966, - "loss": 11.707859802246094, - "step": 2410 - }, - { - "epoch": 2.7537717050953603, - "grad_norm": 5.797362804412842, - "learning_rate": 0.0001869061162576521, - "loss": 11.643144989013672, - "step": 2420 - }, - { - "epoch": 2.7651579846285226, - "grad_norm": 5.619494438171387, - "learning_rate": 0.00018685194214204453, - "loss": 11.876329803466797, - "step": 2430 - }, - { - "epoch": 2.7765442641616853, - "grad_norm": 5.690425395965576, - "learning_rate": 0.00018679776802643698, - "loss": 11.896712493896484, - "step": 2440 - }, - { - "epoch": 2.7879305436948476, - "grad_norm": 5.649746894836426, - "learning_rate": 0.0001867435939108294, - "loss": 11.640943908691407, - "step": 2450 - }, - { - "epoch": 2.7993168232280103, - "grad_norm": 5.736121654510498, - "learning_rate": 0.00018668941979522186, - "loss": 11.80575942993164, - "step": 2460 - }, - { - "epoch": 2.8107031027611726, - "grad_norm": 5.586820125579834, - "learning_rate": 0.00018663524567961428, - "loss": 11.54848861694336, - "step": 2470 - }, - { - "epoch": 2.8220893822943354, - "grad_norm": 5.552623271942139, - "learning_rate": 0.00018658107156400674, - "loss": 11.699449157714843, - "step": 2480 - }, - { - "epoch": 2.8334756618274977, - "grad_norm": 5.449943542480469, - "learning_rate": 0.00018652689744839916, - "loss": 12.021916961669922, - "step": 2490 - }, - { - "epoch": 2.8448619413606604, - "grad_norm": 6.037112712860107, - "learning_rate": 0.0001864727233327916, - "loss": 11.778411865234375, - "step": 2500 - }, - { - "epoch": 2.856248220893823, - "grad_norm": 5.5578813552856445, - "learning_rate": 0.00018641854921718404, - "loss": 11.259034729003906, - "step": 2510 - }, - { - "epoch": 2.8676345004269854, - "grad_norm": 5.7229719161987305, - "learning_rate": 0.00018636437510157646, - "loss": 11.734458923339844, - "step": 2520 - }, - { - "epoch": 2.879020779960148, - "grad_norm": 5.95925235748291, - "learning_rate": 0.0001863102009859689, - "loss": 11.601738739013673, - "step": 2530 - }, - { - "epoch": 2.8904070594933104, - "grad_norm": 5.8496479988098145, - "learning_rate": 0.00018625602687036134, - "loss": 11.096221923828125, - "step": 2540 - }, - { - "epoch": 2.901793339026473, - "grad_norm": 6.491213321685791, - "learning_rate": 0.0001862018527547538, - "loss": 11.531390380859374, - "step": 2550 - }, - { - "epoch": 2.9131796185596355, - "grad_norm": 5.992333889007568, - "learning_rate": 0.0001861476786391462, - "loss": 11.45031509399414, - "step": 2560 - }, - { - "epoch": 2.924565898092798, - "grad_norm": 5.646914005279541, - "learning_rate": 0.00018609350452353866, - "loss": 11.133451080322265, - "step": 2570 - }, - { - "epoch": 2.9359521776259605, - "grad_norm": 5.3638176918029785, - "learning_rate": 0.0001860393304079311, - "loss": 11.416927337646484, - "step": 2580 - }, - { - "epoch": 2.9473384571591232, - "grad_norm": 5.817018985748291, - "learning_rate": 0.00018598515629232354, - "loss": 11.489157104492188, - "step": 2590 - }, - { - "epoch": 2.958724736692286, - "grad_norm": 6.094698905944824, - "learning_rate": 0.000185930982176716, - "loss": 11.472473907470704, - "step": 2600 - }, - { - "epoch": 2.9701110162254483, - "grad_norm": 6.355233669281006, - "learning_rate": 0.00018587680806110842, - "loss": 11.384600067138672, - "step": 2610 - }, - { - "epoch": 2.981497295758611, - "grad_norm": 5.65998649597168, - "learning_rate": 0.00018582263394550087, - "loss": 11.349325561523438, - "step": 2620 - }, - { - "epoch": 2.9928835752917733, - "grad_norm": 6.039316654205322, - "learning_rate": 0.0001857684598298933, - "loss": 11.65872802734375, - "step": 2630 - }, - { - "epoch": 3.0034158838599487, - "grad_norm": 5.6023993492126465, - "learning_rate": 0.00018571428571428572, - "loss": 10.584882354736328, - "step": 2640 - }, - { - "epoch": 3.0148021633931115, - "grad_norm": 6.057605743408203, - "learning_rate": 0.00018566011159867814, - "loss": 11.056887817382812, - "step": 2650 - }, - { - "epoch": 3.0261884429262738, - "grad_norm": 5.620913028717041, - "learning_rate": 0.0001856059374830706, - "loss": 11.04276351928711, - "step": 2660 - }, - { - "epoch": 3.0375747224594365, - "grad_norm": 5.724585056304932, - "learning_rate": 0.00018555176336746302, - "loss": 10.584162139892578, - "step": 2670 - }, - { - "epoch": 3.048961001992599, - "grad_norm": 6.663942337036133, - "learning_rate": 0.00018549758925185547, - "loss": 10.474478149414063, - "step": 2680 - }, - { - "epoch": 3.0603472815257615, - "grad_norm": 5.8548688888549805, - "learning_rate": 0.0001854434151362479, - "loss": 10.782363891601562, - "step": 2690 - }, - { - "epoch": 3.071733561058924, - "grad_norm": 5.4828314781188965, - "learning_rate": 0.00018538924102064035, - "loss": 10.86609115600586, - "step": 2700 - }, - { - "epoch": 3.0831198405920865, - "grad_norm": 5.855891704559326, - "learning_rate": 0.0001853350669050328, - "loss": 10.718875885009766, - "step": 2710 - }, - { - "epoch": 3.0945061201252493, - "grad_norm": 5.583263874053955, - "learning_rate": 0.00018528089278942522, - "loss": 10.526648712158202, - "step": 2720 - }, - { - "epoch": 3.1058923996584116, - "grad_norm": 5.6130690574646, - "learning_rate": 0.00018522671867381767, - "loss": 10.729558563232422, - "step": 2730 - }, - { - "epoch": 3.1172786791915743, - "grad_norm": 6.017326354980469, - "learning_rate": 0.0001851725445582101, - "loss": 10.696547698974609, - "step": 2740 - }, - { - "epoch": 3.1286649587247366, - "grad_norm": 5.918975353240967, - "learning_rate": 0.00018511837044260255, - "loss": 10.813334655761718, - "step": 2750 - }, - { - "epoch": 3.1400512382578993, - "grad_norm": 5.893662929534912, - "learning_rate": 0.00018506419632699497, - "loss": 10.956719970703125, - "step": 2760 - }, - { - "epoch": 3.1514375177910616, - "grad_norm": 6.190542221069336, - "learning_rate": 0.00018501002221138743, - "loss": 10.913722229003906, - "step": 2770 - }, - { - "epoch": 3.1628237973242244, - "grad_norm": 6.2601237297058105, - "learning_rate": 0.00018495584809577985, - "loss": 10.506605529785157, - "step": 2780 - }, - { - "epoch": 3.1742100768573867, - "grad_norm": 5.85802698135376, - "learning_rate": 0.00018490167398017227, - "loss": 11.01781005859375, - "step": 2790 - }, - { - "epoch": 3.1855963563905494, - "grad_norm": 5.59824800491333, - "learning_rate": 0.0001848474998645647, - "loss": 10.554005432128907, - "step": 2800 - }, - { - "epoch": 3.196982635923712, - "grad_norm": 5.9762282371521, - "learning_rate": 0.00018479332574895715, - "loss": 11.067266082763672, - "step": 2810 - }, - { - "epoch": 3.2083689154568744, - "grad_norm": 5.892134189605713, - "learning_rate": 0.0001847391516333496, - "loss": 10.800299072265625, - "step": 2820 - }, - { - "epoch": 3.219755194990037, - "grad_norm": 6.411027431488037, - "learning_rate": 0.00018468497751774203, - "loss": 10.963347625732421, - "step": 2830 - }, - { - "epoch": 3.2311414745231994, - "grad_norm": 5.7685394287109375, - "learning_rate": 0.00018463080340213448, - "loss": 10.73892822265625, - "step": 2840 - }, - { - "epoch": 3.242527754056362, - "grad_norm": 5.961293697357178, - "learning_rate": 0.0001845766292865269, - "loss": 10.619090270996093, - "step": 2850 - }, - { - "epoch": 3.2539140335895245, - "grad_norm": 7.078546524047852, - "learning_rate": 0.00018452245517091935, - "loss": 10.756937408447266, - "step": 2860 - }, - { - "epoch": 3.265300313122687, - "grad_norm": 6.000650405883789, - "learning_rate": 0.00018446828105531178, - "loss": 11.32503662109375, - "step": 2870 - }, - { - "epoch": 3.2766865926558495, - "grad_norm": 5.751965045928955, - "learning_rate": 0.00018441410693970423, - "loss": 11.059173583984375, - "step": 2880 - }, - { - "epoch": 3.2880728721890122, - "grad_norm": 6.229584693908691, - "learning_rate": 0.00018435993282409665, - "loss": 10.77396469116211, - "step": 2890 - }, - { - "epoch": 3.299459151722175, - "grad_norm": 5.8026123046875, - "learning_rate": 0.0001843057587084891, - "loss": 10.68900146484375, - "step": 2900 - }, - { - "epoch": 3.3108454312553373, - "grad_norm": 5.8648529052734375, - "learning_rate": 0.00018425158459288153, - "loss": 10.619082641601562, - "step": 2910 - }, - { - "epoch": 3.3222317107885, - "grad_norm": 6.061291217803955, - "learning_rate": 0.00018419741047727395, - "loss": 11.001821899414063, - "step": 2920 - }, - { - "epoch": 3.3336179903216623, - "grad_norm": 6.154610633850098, - "learning_rate": 0.0001841432363616664, - "loss": 10.811206817626953, - "step": 2930 - }, - { - "epoch": 3.345004269854825, - "grad_norm": 5.7560133934021, - "learning_rate": 0.00018408906224605883, - "loss": 11.004631805419923, - "step": 2940 - }, - { - "epoch": 3.3563905493879873, - "grad_norm": 5.845217227935791, - "learning_rate": 0.00018403488813045128, - "loss": 10.858744049072266, - "step": 2950 - }, - { - "epoch": 3.36777682892115, - "grad_norm": 5.666683197021484, - "learning_rate": 0.0001839807140148437, - "loss": 11.276985931396485, - "step": 2960 - }, - { - "epoch": 3.3791631084543123, - "grad_norm": 6.077807426452637, - "learning_rate": 0.00018392653989923616, - "loss": 10.676035308837891, - "step": 2970 - }, - { - "epoch": 3.390549387987475, - "grad_norm": 5.791286468505859, - "learning_rate": 0.00018387236578362858, - "loss": 10.966339874267579, - "step": 2980 - }, - { - "epoch": 3.401935667520638, - "grad_norm": 6.040390968322754, - "learning_rate": 0.00018381819166802103, - "loss": 10.893710327148437, - "step": 2990 - }, - { - "epoch": 3.4133219470538, - "grad_norm": 5.881897926330566, - "learning_rate": 0.00018376401755241346, - "loss": 10.969651031494141, - "step": 3000 - }, - { - "epoch": 3.424708226586963, - "grad_norm": 6.096680641174316, - "learning_rate": 0.0001837098434368059, - "loss": 10.903501892089844, - "step": 3010 - }, - { - "epoch": 3.436094506120125, - "grad_norm": 6.454861164093018, - "learning_rate": 0.00018365566932119834, - "loss": 10.728018188476563, - "step": 3020 - }, - { - "epoch": 3.447480785653288, - "grad_norm": 5.956232070922852, - "learning_rate": 0.0001836014952055908, - "loss": 10.590874481201173, - "step": 3030 - }, - { - "epoch": 3.45886706518645, - "grad_norm": 5.892461776733398, - "learning_rate": 0.0001835473210899832, - "loss": 10.788093566894531, - "step": 3040 - }, - { - "epoch": 3.470253344719613, - "grad_norm": 5.8851423263549805, - "learning_rate": 0.00018349314697437566, - "loss": 10.701885223388672, - "step": 3050 - }, - { - "epoch": 3.481639624252775, - "grad_norm": 6.0876078605651855, - "learning_rate": 0.0001834389728587681, - "loss": 10.101341247558594, - "step": 3060 - }, - { - "epoch": 3.493025903785938, - "grad_norm": 6.961325168609619, - "learning_rate": 0.0001833847987431605, - "loss": 10.46026153564453, - "step": 3070 - }, - { - "epoch": 3.5044121833191006, - "grad_norm": 6.137760639190674, - "learning_rate": 0.00018333062462755296, - "loss": 10.805878448486329, - "step": 3080 - }, - { - "epoch": 3.515798462852263, - "grad_norm": 5.97675895690918, - "learning_rate": 0.0001832764505119454, - "loss": 11.289669799804688, - "step": 3090 - }, - { - "epoch": 3.5271847423854257, - "grad_norm": 6.564485549926758, - "learning_rate": 0.00018322227639633784, - "loss": 10.845318603515626, - "step": 3100 - }, - { - "epoch": 3.538571021918588, - "grad_norm": 6.3509979248046875, - "learning_rate": 0.00018316810228073026, - "loss": 10.126212310791015, - "step": 3110 - }, - { - "epoch": 3.5499573014517507, - "grad_norm": 5.984918117523193, - "learning_rate": 0.00018311392816512272, - "loss": 10.31814956665039, - "step": 3120 - }, - { - "epoch": 3.5613435809849134, - "grad_norm": 5.558696746826172, - "learning_rate": 0.00018305975404951514, - "loss": 10.726148223876953, - "step": 3130 - }, - { - "epoch": 3.5727298605180757, - "grad_norm": 6.394120216369629, - "learning_rate": 0.0001830055799339076, - "loss": 10.786263275146485, - "step": 3140 - }, - { - "epoch": 3.584116140051238, - "grad_norm": 5.847959041595459, - "learning_rate": 0.00018295140581830002, - "loss": 10.560782623291015, - "step": 3150 - }, - { - "epoch": 3.5955024195844008, - "grad_norm": 5.2607879638671875, - "learning_rate": 0.00018289723170269247, - "loss": 10.637435913085938, - "step": 3160 - }, - { - "epoch": 3.6068886991175635, - "grad_norm": 6.327804088592529, - "learning_rate": 0.00018284305758708492, - "loss": 11.141863250732422, - "step": 3170 - }, - { - "epoch": 3.618274978650726, - "grad_norm": 5.784237861633301, - "learning_rate": 0.00018278888347147734, - "loss": 10.513392639160156, - "step": 3180 - }, - { - "epoch": 3.6296612581838885, - "grad_norm": 5.775552272796631, - "learning_rate": 0.0001827347093558698, - "loss": 10.784326934814453, - "step": 3190 - }, - { - "epoch": 3.641047537717051, - "grad_norm": 5.3834075927734375, - "learning_rate": 0.00018268053524026222, - "loss": 11.03073959350586, - "step": 3200 - }, - { - "epoch": 3.6524338172502135, - "grad_norm": 5.9715800285339355, - "learning_rate": 0.00018262636112465464, - "loss": 11.21976318359375, - "step": 3210 - }, - { - "epoch": 3.6638200967833763, - "grad_norm": 5.637823104858398, - "learning_rate": 0.00018257218700904707, - "loss": 10.567466735839844, - "step": 3220 - }, - { - "epoch": 3.6752063763165386, - "grad_norm": 6.525228500366211, - "learning_rate": 0.00018251801289343952, - "loss": 10.669526672363281, - "step": 3230 - }, - { - "epoch": 3.686592655849701, - "grad_norm": 5.675905704498291, - "learning_rate": 0.00018246383877783194, - "loss": 10.238113403320312, - "step": 3240 - }, - { - "epoch": 3.6979789353828636, - "grad_norm": 6.053264617919922, - "learning_rate": 0.0001824096646622244, - "loss": 10.952618408203126, - "step": 3250 - }, - { - "epoch": 3.7093652149160263, - "grad_norm": 6.067233562469482, - "learning_rate": 0.00018235549054661682, - "loss": 10.320521545410156, - "step": 3260 - }, - { - "epoch": 3.7207514944491886, - "grad_norm": 6.502700328826904, - "learning_rate": 0.00018230131643100927, - "loss": 10.383914947509766, - "step": 3270 - }, - { - "epoch": 3.7321377739823514, - "grad_norm": 5.968015670776367, - "learning_rate": 0.00018224714231540172, - "loss": 10.5248779296875, - "step": 3280 - }, - { - "epoch": 3.7435240535155136, - "grad_norm": 5.4662628173828125, - "learning_rate": 0.00018219296819979415, - "loss": 10.531697082519532, - "step": 3290 - }, - { - "epoch": 3.7549103330486764, - "grad_norm": 5.9595255851745605, - "learning_rate": 0.0001821387940841866, - "loss": 10.61429443359375, - "step": 3300 - }, - { - "epoch": 3.766296612581839, - "grad_norm": 6.310218334197998, - "learning_rate": 0.00018208461996857902, - "loss": 10.1953125, - "step": 3310 - }, - { - "epoch": 3.7776828921150014, - "grad_norm": 5.952932834625244, - "learning_rate": 0.00018203044585297148, - "loss": 10.674753570556641, - "step": 3320 - }, - { - "epoch": 3.7890691716481637, - "grad_norm": 6.101319313049316, - "learning_rate": 0.0001819762717373639, - "loss": 10.45705337524414, - "step": 3330 - }, - { - "epoch": 3.8004554511813264, - "grad_norm": 5.611819744110107, - "learning_rate": 0.00018192209762175632, - "loss": 10.72674560546875, - "step": 3340 - }, - { - "epoch": 3.811841730714489, - "grad_norm": 5.927121162414551, - "learning_rate": 0.00018186792350614878, - "loss": 10.702400970458985, - "step": 3350 - }, - { - "epoch": 3.8232280102476515, - "grad_norm": 6.374614715576172, - "learning_rate": 0.0001818137493905412, - "loss": 10.934248352050782, - "step": 3360 - }, - { - "epoch": 3.834614289780814, - "grad_norm": 6.032021522521973, - "learning_rate": 0.00018175957527493363, - "loss": 10.430912017822266, - "step": 3370 - }, - { - "epoch": 3.8460005693139765, - "grad_norm": 5.958329677581787, - "learning_rate": 0.00018170540115932608, - "loss": 10.657380676269531, - "step": 3380 - }, - { - "epoch": 3.8573868488471392, - "grad_norm": 5.539936542510986, - "learning_rate": 0.00018165122704371853, - "loss": 10.864521026611328, - "step": 3390 - }, - { - "epoch": 3.868773128380302, - "grad_norm": 5.501955986022949, - "learning_rate": 0.00018159705292811095, - "loss": 10.671641540527343, - "step": 3400 - }, - { - "epoch": 3.8801594079134643, - "grad_norm": 6.021368503570557, - "learning_rate": 0.0001815428788125034, - "loss": 10.651559448242187, - "step": 3410 - }, - { - "epoch": 3.8915456874466265, - "grad_norm": 5.438447952270508, - "learning_rate": 0.00018148870469689583, - "loss": 10.541700744628907, - "step": 3420 - }, - { - "epoch": 3.9029319669797893, - "grad_norm": 5.7153215408325195, - "learning_rate": 0.00018143453058128828, - "loss": 10.635433959960938, - "step": 3430 - }, - { - "epoch": 3.914318246512952, - "grad_norm": 5.716714382171631, - "learning_rate": 0.0001813803564656807, - "loss": 10.716056060791015, - "step": 3440 - }, - { - "epoch": 3.9257045260461143, - "grad_norm": 5.737730979919434, - "learning_rate": 0.00018132618235007316, - "loss": 10.449008178710937, - "step": 3450 - }, - { - "epoch": 3.937090805579277, - "grad_norm": 5.603618621826172, - "learning_rate": 0.00018127200823446558, - "loss": 10.800203704833985, - "step": 3460 - }, - { - "epoch": 3.9484770851124393, - "grad_norm": 5.745213508605957, - "learning_rate": 0.00018121783411885803, - "loss": 10.808299255371093, - "step": 3470 - }, - { - "epoch": 3.959863364645602, - "grad_norm": 5.7340850830078125, - "learning_rate": 0.00018116366000325046, - "loss": 10.769782257080077, - "step": 3480 - }, - { - "epoch": 3.971249644178765, - "grad_norm": 6.192749500274658, - "learning_rate": 0.00018110948588764288, - "loss": 10.350373840332031, - "step": 3490 - }, - { - "epoch": 3.982635923711927, - "grad_norm": 5.680601119995117, - "learning_rate": 0.00018105531177203533, - "loss": 10.832538604736328, - "step": 3500 - }, - { - "epoch": 3.99402220324509, - "grad_norm": 5.494757652282715, - "learning_rate": 0.00018100113765642776, - "loss": 10.541521453857422, - "step": 3510 - }, - { - "epoch": 4.004554511813265, - "grad_norm": 5.503895282745361, - "learning_rate": 0.0001809469635408202, - "loss": 9.482205200195313, - "step": 3520 - }, - { - "epoch": 4.015940791346427, - "grad_norm": 5.427241802215576, - "learning_rate": 0.00018089278942521263, - "loss": 9.544134521484375, - "step": 3530 - }, - { - "epoch": 4.02732707087959, - "grad_norm": 6.108213424682617, - "learning_rate": 0.00018083861530960509, - "loss": 10.208637237548828, - "step": 3540 - }, - { - "epoch": 4.038713350412753, - "grad_norm": 5.874690055847168, - "learning_rate": 0.0001807844411939975, - "loss": 9.760848236083984, - "step": 3550 - }, - { - "epoch": 4.050099629945915, - "grad_norm": 5.867772102355957, - "learning_rate": 0.00018073026707838996, - "loss": 9.961907958984375, - "step": 3560 - }, - { - "epoch": 4.061485909479078, - "grad_norm": 5.899345874786377, - "learning_rate": 0.00018067609296278239, - "loss": 9.273954010009765, - "step": 3570 - }, - { - "epoch": 4.07287218901224, - "grad_norm": 6.068745136260986, - "learning_rate": 0.00018062191884717484, - "loss": 10.06835708618164, - "step": 3580 - }, - { - "epoch": 4.084258468545403, - "grad_norm": 6.152563095092773, - "learning_rate": 0.00018056774473156726, - "loss": 9.612554931640625, - "step": 3590 - }, - { - "epoch": 4.095644748078565, - "grad_norm": 6.043203830718994, - "learning_rate": 0.0001805135706159597, - "loss": 9.750363159179688, - "step": 3600 - }, - { - "epoch": 4.107031027611728, - "grad_norm": 5.493103504180908, - "learning_rate": 0.00018045939650035214, - "loss": 9.853363800048829, - "step": 3610 - }, - { - "epoch": 4.11841730714489, - "grad_norm": 6.1369242668151855, - "learning_rate": 0.0001804052223847446, - "loss": 10.104590606689452, - "step": 3620 - }, - { - "epoch": 4.129803586678053, - "grad_norm": 6.410492897033691, - "learning_rate": 0.00018035104826913701, - "loss": 10.213319396972656, - "step": 3630 - }, - { - "epoch": 4.141189866211215, - "grad_norm": 5.654990196228027, - "learning_rate": 0.00018029687415352944, - "loss": 9.714891052246093, - "step": 3640 - }, - { - "epoch": 4.152576145744378, - "grad_norm": 6.079676151275635, - "learning_rate": 0.0001802427000379219, - "loss": 9.392752838134765, - "step": 3650 - }, - { - "epoch": 4.163962425277541, - "grad_norm": 6.427825927734375, - "learning_rate": 0.00018018852592231431, - "loss": 9.643639373779298, - "step": 3660 - }, - { - "epoch": 4.175348704810703, - "grad_norm": 5.963044166564941, - "learning_rate": 0.00018013435180670677, - "loss": 9.924317932128906, - "step": 3670 - }, - { - "epoch": 4.1867349843438655, - "grad_norm": 5.854789733886719, - "learning_rate": 0.0001800801776910992, - "loss": 9.63187255859375, - "step": 3680 - }, - { - "epoch": 4.198121263877028, - "grad_norm": 5.785211086273193, - "learning_rate": 0.00018002600357549164, - "loss": 9.579486846923828, - "step": 3690 - }, - { - "epoch": 4.209507543410191, - "grad_norm": 5.718067169189453, - "learning_rate": 0.00017997182945988407, - "loss": 9.896450805664063, - "step": 3700 - }, - { - "epoch": 4.220893822943353, - "grad_norm": 6.051468849182129, - "learning_rate": 0.00017991765534427652, - "loss": 9.21618194580078, - "step": 3710 - }, - { - "epoch": 4.2322801024765155, - "grad_norm": 5.758811950683594, - "learning_rate": 0.00017986348122866894, - "loss": 9.783828735351562, - "step": 3720 - }, - { - "epoch": 4.243666382009678, - "grad_norm": 6.194498062133789, - "learning_rate": 0.0001798093071130614, - "loss": 9.940241241455078, - "step": 3730 - }, - { - "epoch": 4.255052661542841, - "grad_norm": 5.6434125900268555, - "learning_rate": 0.00017975513299745385, - "loss": 9.683363342285157, - "step": 3740 - }, - { - "epoch": 4.266438941076004, - "grad_norm": 5.981060028076172, - "learning_rate": 0.00017970095888184627, - "loss": 10.015727996826172, - "step": 3750 - }, - { - "epoch": 4.277825220609166, - "grad_norm": 5.95534610748291, - "learning_rate": 0.0001796467847662387, - "loss": 10.008995819091798, - "step": 3760 - }, - { - "epoch": 4.289211500142328, - "grad_norm": 6.140223503112793, - "learning_rate": 0.00017959261065063112, - "loss": 9.99697265625, - "step": 3770 - }, - { - "epoch": 4.300597779675491, - "grad_norm": 5.725556373596191, - "learning_rate": 0.00017953843653502357, - "loss": 9.58441390991211, - "step": 3780 - }, - { - "epoch": 4.311984059208654, - "grad_norm": 6.234734058380127, - "learning_rate": 0.000179484262419416, - "loss": 10.283954620361328, - "step": 3790 - }, - { - "epoch": 4.323370338741816, - "grad_norm": 5.869581699371338, - "learning_rate": 0.00017943008830380845, - "loss": 9.939014434814453, - "step": 3800 - }, - { - "epoch": 4.334756618274978, - "grad_norm": 6.069183826446533, - "learning_rate": 0.00017937591418820087, - "loss": 9.780485534667969, - "step": 3810 - }, - { - "epoch": 4.346142897808141, - "grad_norm": 6.241194725036621, - "learning_rate": 0.00017932174007259332, - "loss": 9.680929565429688, - "step": 3820 - }, - { - "epoch": 4.357529177341304, - "grad_norm": 5.660580635070801, - "learning_rate": 0.00017926756595698575, - "loss": 9.670439147949219, - "step": 3830 - }, - { - "epoch": 4.368915456874467, - "grad_norm": 6.770995140075684, - "learning_rate": 0.0001792133918413782, - "loss": 10.093508911132812, - "step": 3840 - }, - { - "epoch": 4.380301736407628, - "grad_norm": 6.052023887634277, - "learning_rate": 0.00017915921772577065, - "loss": 10.153280639648438, - "step": 3850 - }, - { - "epoch": 4.391688015940791, - "grad_norm": 5.781612396240234, - "learning_rate": 0.00017910504361016308, - "loss": 9.563631439208985, - "step": 3860 - }, - { - "epoch": 4.403074295473954, - "grad_norm": 6.453577995300293, - "learning_rate": 0.00017905086949455553, - "loss": 10.253567504882813, - "step": 3870 - }, - { - "epoch": 4.414460575007117, - "grad_norm": 5.5569539070129395, - "learning_rate": 0.00017899669537894795, - "loss": 9.466841888427734, - "step": 3880 - }, - { - "epoch": 4.425846854540279, - "grad_norm": 6.755964756011963, - "learning_rate": 0.0001789425212633404, - "loss": 10.00699234008789, - "step": 3890 - }, - { - "epoch": 4.437233134073441, - "grad_norm": 5.5527472496032715, - "learning_rate": 0.00017888834714773283, - "loss": 9.851504516601562, - "step": 3900 - }, - { - "epoch": 4.448619413606604, - "grad_norm": 6.0654096603393555, - "learning_rate": 0.00017883417303212525, - "loss": 9.837632751464843, - "step": 3910 - }, - { - "epoch": 4.460005693139767, - "grad_norm": 5.762067794799805, - "learning_rate": 0.00017877999891651768, - "loss": 9.580419158935547, - "step": 3920 - }, - { - "epoch": 4.471391972672929, - "grad_norm": 6.3049139976501465, - "learning_rate": 0.00017872582480091013, - "loss": 10.155924987792968, - "step": 3930 - }, - { - "epoch": 4.482778252206091, - "grad_norm": 6.560263633728027, - "learning_rate": 0.00017867165068530255, - "loss": 9.540550231933594, - "step": 3940 - }, - { - "epoch": 4.494164531739254, - "grad_norm": 6.04788064956665, - "learning_rate": 0.000178617476569695, - "loss": 9.617048645019532, - "step": 3950 - }, - { - "epoch": 4.505550811272417, - "grad_norm": 6.14173698425293, - "learning_rate": 0.00017856330245408746, - "loss": 9.965501403808593, - "step": 3960 - }, - { - "epoch": 4.5169370908055795, - "grad_norm": 6.081863880157471, - "learning_rate": 0.00017850912833847988, - "loss": 10.01316146850586, - "step": 3970 - }, - { - "epoch": 4.528323370338741, - "grad_norm": 5.400860786437988, - "learning_rate": 0.00017845495422287233, - "loss": 10.162345886230469, - "step": 3980 - }, - { - "epoch": 4.539709649871904, - "grad_norm": 5.537269592285156, - "learning_rate": 0.00017840078010726476, - "loss": 9.989215850830078, - "step": 3990 - }, - { - "epoch": 4.551095929405067, - "grad_norm": 5.948838710784912, - "learning_rate": 0.0001783466059916572, - "loss": 10.042298126220704, - "step": 4000 - }, - { - "epoch": 4.5624822089382295, - "grad_norm": 5.892082691192627, - "learning_rate": 0.00017829243187604963, - "loss": 10.084636688232422, - "step": 4010 - }, - { - "epoch": 4.573868488471392, - "grad_norm": 6.188459396362305, - "learning_rate": 0.00017823825776044208, - "loss": 9.930858612060547, - "step": 4020 - }, - { - "epoch": 4.585254768004555, - "grad_norm": 6.035775661468506, - "learning_rate": 0.0001781840836448345, - "loss": 9.804845428466797, - "step": 4030 - }, - { - "epoch": 4.596641047537717, - "grad_norm": 6.26588249206543, - "learning_rate": 0.00017812990952922696, - "loss": 9.714436340332032, - "step": 4040 - }, - { - "epoch": 4.60802732707088, - "grad_norm": 5.697126865386963, - "learning_rate": 0.00017807573541361938, - "loss": 9.831003570556641, - "step": 4050 - }, - { - "epoch": 4.619413606604042, - "grad_norm": 5.572251796722412, - "learning_rate": 0.0001780215612980118, - "loss": 10.07955551147461, - "step": 4060 - }, - { - "epoch": 4.630799886137204, - "grad_norm": 5.784109592437744, - "learning_rate": 0.00017796738718240426, - "loss": 9.46136016845703, - "step": 4070 - }, - { - "epoch": 4.642186165670367, - "grad_norm": 6.043442726135254, - "learning_rate": 0.00017791321306679668, - "loss": 10.091639709472656, - "step": 4080 - }, - { - "epoch": 4.65357244520353, - "grad_norm": 5.721872329711914, - "learning_rate": 0.00017785903895118914, - "loss": 10.043199157714843, - "step": 4090 - }, - { - "epoch": 4.664958724736692, - "grad_norm": 5.753310203552246, - "learning_rate": 0.00017780486483558156, - "loss": 9.644969940185547, - "step": 4100 - }, - { - "epoch": 4.676345004269855, - "grad_norm": 6.087526321411133, - "learning_rate": 0.000177750690719974, - "loss": 9.953328704833984, - "step": 4110 - }, - { - "epoch": 4.687731283803018, - "grad_norm": 5.621133327484131, - "learning_rate": 0.00017769651660436644, - "loss": 9.69726104736328, - "step": 4120 - }, - { - "epoch": 4.69911756333618, - "grad_norm": 6.229346752166748, - "learning_rate": 0.0001776423424887589, - "loss": 9.628369140625, - "step": 4130 - }, - { - "epoch": 4.710503842869342, - "grad_norm": 5.986842155456543, - "learning_rate": 0.0001775881683731513, - "loss": 9.742918395996094, - "step": 4140 - }, - { - "epoch": 4.721890122402505, - "grad_norm": 5.907449245452881, - "learning_rate": 0.00017753399425754376, - "loss": 9.967567443847656, - "step": 4150 - }, - { - "epoch": 4.733276401935668, - "grad_norm": 6.126130104064941, - "learning_rate": 0.0001774798201419362, - "loss": 9.773100280761719, - "step": 4160 - }, - { - "epoch": 4.74466268146883, - "grad_norm": 5.871231555938721, - "learning_rate": 0.00017742564602632864, - "loss": 9.900487518310547, - "step": 4170 - }, - { - "epoch": 4.7560489610019925, - "grad_norm": 5.7376275062561035, - "learning_rate": 0.00017737147191072106, - "loss": 9.520377349853515, - "step": 4180 - }, - { - "epoch": 4.767435240535155, - "grad_norm": 5.596216201782227, - "learning_rate": 0.0001773172977951135, - "loss": 10.175640869140626, - "step": 4190 - }, - { - "epoch": 4.778821520068318, - "grad_norm": 6.0666351318359375, - "learning_rate": 0.00017726312367950594, - "loss": 9.647950744628906, - "step": 4200 - }, - { - "epoch": 4.790207799601481, - "grad_norm": 6.583392143249512, - "learning_rate": 0.00017720894956389837, - "loss": 9.790153503417969, - "step": 4210 - }, - { - "epoch": 4.8015940791346425, - "grad_norm": 5.613272190093994, - "learning_rate": 0.00017715477544829082, - "loss": 9.466513061523438, - "step": 4220 - }, - { - "epoch": 4.812980358667805, - "grad_norm": 5.842050552368164, - "learning_rate": 0.00017710060133268324, - "loss": 9.622956085205079, - "step": 4230 - }, - { - "epoch": 4.824366638200968, - "grad_norm": 5.759915828704834, - "learning_rate": 0.0001770464272170757, - "loss": 9.80495834350586, - "step": 4240 - }, - { - "epoch": 4.835752917734131, - "grad_norm": 5.608836650848389, - "learning_rate": 0.00017699225310146812, - "loss": 9.477723693847656, - "step": 4250 - }, - { - "epoch": 4.847139197267293, - "grad_norm": 6.463353157043457, - "learning_rate": 0.00017693807898586057, - "loss": 9.470064544677735, - "step": 4260 - }, - { - "epoch": 4.858525476800455, - "grad_norm": 5.962118148803711, - "learning_rate": 0.000176883904870253, - "loss": 9.367919921875, - "step": 4270 - }, - { - "epoch": 4.869911756333618, - "grad_norm": 5.565269470214844, - "learning_rate": 0.00017682973075464544, - "loss": 9.758961486816407, - "step": 4280 - }, - { - "epoch": 4.881298035866781, - "grad_norm": 5.9193010330200195, - "learning_rate": 0.0001767755566390379, - "loss": 9.685740661621093, - "step": 4290 - }, - { - "epoch": 4.8926843153999435, - "grad_norm": 5.730422019958496, - "learning_rate": 0.00017672138252343032, - "loss": 9.70977554321289, - "step": 4300 - }, - { - "epoch": 4.904070594933105, - "grad_norm": 5.72115421295166, - "learning_rate": 0.00017666720840782277, - "loss": 9.60604476928711, - "step": 4310 - }, - { - "epoch": 4.915456874466268, - "grad_norm": 6.489387035369873, - "learning_rate": 0.0001766130342922152, - "loss": 9.322960662841798, - "step": 4320 - }, - { - "epoch": 4.926843153999431, - "grad_norm": 5.830432415008545, - "learning_rate": 0.00017655886017660762, - "loss": 9.564017486572265, - "step": 4330 - }, - { - "epoch": 4.938229433532594, - "grad_norm": 6.302420616149902, - "learning_rate": 0.00017650468606100005, - "loss": 10.048651123046875, - "step": 4340 - }, - { - "epoch": 4.949615713065755, - "grad_norm": 6.4496660232543945, - "learning_rate": 0.0001764505119453925, - "loss": 9.775814819335938, - "step": 4350 - }, - { - "epoch": 4.961001992598918, - "grad_norm": 6.146448612213135, - "learning_rate": 0.00017639633782978492, - "loss": 9.664933013916016, - "step": 4360 - }, - { - "epoch": 4.972388272132081, - "grad_norm": 6.442460060119629, - "learning_rate": 0.00017634216371417737, - "loss": 9.787579345703126, - "step": 4370 - }, - { - "epoch": 4.983774551665244, - "grad_norm": 5.952507019042969, - "learning_rate": 0.0001762879895985698, - "loss": 9.962568664550782, - "step": 4380 - }, - { - "epoch": 4.995160831198406, - "grad_norm": 5.775949954986572, - "learning_rate": 0.00017623381548296225, - "loss": 9.72928466796875, - "step": 4390 - }, - { - "epoch": 5.005693139766581, - "grad_norm": 6.083469390869141, - "learning_rate": 0.00017617964136735467, - "loss": 8.628085327148437, - "step": 4400 - }, - { - "epoch": 5.017079419299744, - "grad_norm": 6.120087623596191, - "learning_rate": 0.00017612546725174713, - "loss": 8.888311004638672, - "step": 4410 - }, - { - "epoch": 5.028465698832906, - "grad_norm": 6.082287311553955, - "learning_rate": 0.00017607129313613958, - "loss": 9.107136535644532, - "step": 4420 - }, - { - "epoch": 5.039851978366069, - "grad_norm": 5.743897438049316, - "learning_rate": 0.000176017119020532, - "loss": 8.61147232055664, - "step": 4430 - }, - { - "epoch": 5.051238257899231, - "grad_norm": 5.427106857299805, - "learning_rate": 0.00017596294490492445, - "loss": 8.694246673583985, - "step": 4440 - }, - { - "epoch": 5.062624537432394, - "grad_norm": 6.282980442047119, - "learning_rate": 0.00017590877078931688, - "loss": 9.144784545898437, - "step": 4450 - }, - { - "epoch": 5.074010816965557, - "grad_norm": 6.395759105682373, - "learning_rate": 0.0001758545966737093, - "loss": 8.932855224609375, - "step": 4460 - }, - { - "epoch": 5.085397096498719, - "grad_norm": 6.1782917976379395, - "learning_rate": 0.00017580042255810175, - "loss": 9.115586853027343, - "step": 4470 - }, - { - "epoch": 5.0967833760318815, - "grad_norm": 6.45112943649292, - "learning_rate": 0.00017574624844249418, - "loss": 8.773223876953125, - "step": 4480 - }, - { - "epoch": 5.108169655565044, - "grad_norm": 5.9199371337890625, - "learning_rate": 0.0001756920743268866, - "loss": 8.678252410888671, - "step": 4490 - }, - { - "epoch": 5.119555935098207, - "grad_norm": 5.901211738586426, - "learning_rate": 0.00017563790021127905, - "loss": 9.167632293701171, - "step": 4500 - }, - { - "epoch": 5.130942214631369, - "grad_norm": 6.0059661865234375, - "learning_rate": 0.00017558372609567148, - "loss": 8.844408416748047, - "step": 4510 - }, - { - "epoch": 5.1423284941645315, - "grad_norm": 6.615947246551514, - "learning_rate": 0.00017552955198006393, - "loss": 8.853307342529297, - "step": 4520 - }, - { - "epoch": 5.153714773697694, - "grad_norm": 5.707233428955078, - "learning_rate": 0.00017547537786445638, - "loss": 8.580878448486327, - "step": 4530 - }, - { - "epoch": 5.165101053230857, - "grad_norm": 6.256187915802002, - "learning_rate": 0.0001754212037488488, - "loss": 8.889945983886719, - "step": 4540 - }, - { - "epoch": 5.17648733276402, - "grad_norm": 5.724560260772705, - "learning_rate": 0.00017536702963324126, - "loss": 9.283840942382813, - "step": 4550 - }, - { - "epoch": 5.187873612297182, - "grad_norm": 6.063653469085693, - "learning_rate": 0.00017531285551763368, - "loss": 9.45126953125, - "step": 4560 - }, - { - "epoch": 5.199259891830344, - "grad_norm": 6.490650177001953, - "learning_rate": 0.00017525868140202613, - "loss": 9.213851928710938, - "step": 4570 - }, - { - "epoch": 5.210646171363507, - "grad_norm": 6.077616214752197, - "learning_rate": 0.00017520450728641856, - "loss": 8.94876937866211, - "step": 4580 - }, - { - "epoch": 5.22203245089667, - "grad_norm": 6.140579700469971, - "learning_rate": 0.000175150333170811, - "loss": 9.061854553222656, - "step": 4590 - }, - { - "epoch": 5.233418730429832, - "grad_norm": 6.086933135986328, - "learning_rate": 0.00017509615905520343, - "loss": 8.97676773071289, - "step": 4600 - }, - { - "epoch": 5.244805009962994, - "grad_norm": 6.013683319091797, - "learning_rate": 0.00017504198493959586, - "loss": 9.112580871582031, - "step": 4610 - }, - { - "epoch": 5.256191289496157, - "grad_norm": 6.05435037612915, - "learning_rate": 0.00017498781082398828, - "loss": 8.846112823486328, - "step": 4620 - }, - { - "epoch": 5.26757756902932, - "grad_norm": 6.0196919441223145, - "learning_rate": 0.00017493363670838074, - "loss": 9.190632629394532, - "step": 4630 - }, - { - "epoch": 5.278963848562483, - "grad_norm": 5.778266906738281, - "learning_rate": 0.0001748794625927732, - "loss": 9.008673858642577, - "step": 4640 - }, - { - "epoch": 5.290350128095644, - "grad_norm": 6.087759971618652, - "learning_rate": 0.0001748252884771656, - "loss": 9.25833969116211, - "step": 4650 - }, - { - "epoch": 5.301736407628807, - "grad_norm": 5.978583812713623, - "learning_rate": 0.00017477111436155806, - "loss": 8.707977294921875, - "step": 4660 - }, - { - "epoch": 5.31312268716197, - "grad_norm": 5.761394500732422, - "learning_rate": 0.0001747169402459505, - "loss": 9.27506103515625, - "step": 4670 - }, - { - "epoch": 5.324508966695133, - "grad_norm": 6.357019901275635, - "learning_rate": 0.00017466276613034294, - "loss": 9.029557037353516, - "step": 4680 - }, - { - "epoch": 5.335895246228295, - "grad_norm": 6.016201496124268, - "learning_rate": 0.00017460859201473536, - "loss": 8.777602386474609, - "step": 4690 - }, - { - "epoch": 5.347281525761457, - "grad_norm": 6.20839786529541, - "learning_rate": 0.00017455441789912781, - "loss": 8.966375732421875, - "step": 4700 - }, - { - "epoch": 5.35866780529462, - "grad_norm": 6.455466270446777, - "learning_rate": 0.00017450024378352024, - "loss": 9.284576416015625, - "step": 4710 - }, - { - "epoch": 5.370054084827783, - "grad_norm": 6.2462663650512695, - "learning_rate": 0.0001744460696679127, - "loss": 8.876305389404298, - "step": 4720 - }, - { - "epoch": 5.381440364360945, - "grad_norm": 6.3350396156311035, - "learning_rate": 0.00017439189555230512, - "loss": 9.187533569335937, - "step": 4730 - }, - { - "epoch": 5.392826643894107, - "grad_norm": 6.246124267578125, - "learning_rate": 0.00017433772143669757, - "loss": 8.913845825195313, - "step": 4740 - }, - { - "epoch": 5.40421292342727, - "grad_norm": 5.948974609375, - "learning_rate": 0.00017428354732109, - "loss": 9.092901611328125, - "step": 4750 - }, - { - "epoch": 5.415599202960433, - "grad_norm": 6.238656520843506, - "learning_rate": 0.00017422937320548242, - "loss": 9.446102905273438, - "step": 4760 - }, - { - "epoch": 5.4269854824935955, - "grad_norm": 6.002922058105469, - "learning_rate": 0.00017417519908987487, - "loss": 9.022492980957031, - "step": 4770 - }, - { - "epoch": 5.438371762026758, - "grad_norm": 5.8916707038879395, - "learning_rate": 0.0001741210249742673, - "loss": 8.732567596435548, - "step": 4780 - }, - { - "epoch": 5.44975804155992, - "grad_norm": 6.253623008728027, - "learning_rate": 0.00017406685085865974, - "loss": 8.986526489257812, - "step": 4790 - }, - { - "epoch": 5.461144321093083, - "grad_norm": 6.794173717498779, - "learning_rate": 0.00017401267674305217, - "loss": 8.910897827148437, - "step": 4800 - }, - { - "epoch": 5.4725306006262455, - "grad_norm": 6.226437568664551, - "learning_rate": 0.00017395850262744462, - "loss": 9.053468322753906, - "step": 4810 - }, - { - "epoch": 5.483916880159408, - "grad_norm": 5.871084213256836, - "learning_rate": 0.00017390432851183704, - "loss": 8.758150482177735, - "step": 4820 - }, - { - "epoch": 5.49530315969257, - "grad_norm": 6.246805667877197, - "learning_rate": 0.0001738501543962295, - "loss": 9.195549774169923, - "step": 4830 - }, - { - "epoch": 5.506689439225733, - "grad_norm": 6.047657489776611, - "learning_rate": 0.00017379598028062192, - "loss": 9.35385513305664, - "step": 4840 - }, - { - "epoch": 5.518075718758896, - "grad_norm": 5.7672529220581055, - "learning_rate": 0.00017374180616501437, - "loss": 9.131436157226563, - "step": 4850 - }, - { - "epoch": 5.529461998292058, - "grad_norm": 6.151646614074707, - "learning_rate": 0.00017368763204940682, - "loss": 9.32352523803711, - "step": 4860 - }, - { - "epoch": 5.540848277825221, - "grad_norm": 6.154118537902832, - "learning_rate": 0.00017363345793379925, - "loss": 9.332242584228515, - "step": 4870 - }, - { - "epoch": 5.552234557358383, - "grad_norm": 5.870425224304199, - "learning_rate": 0.00017357928381819167, - "loss": 8.844430541992187, - "step": 4880 - }, - { - "epoch": 5.563620836891546, - "grad_norm": 6.215554237365723, - "learning_rate": 0.0001735251097025841, - "loss": 9.3614013671875, - "step": 4890 - }, - { - "epoch": 5.575007116424708, - "grad_norm": 6.865769386291504, - "learning_rate": 0.00017347093558697655, - "loss": 8.7501708984375, - "step": 4900 - }, - { - "epoch": 5.586393395957871, - "grad_norm": 5.617834091186523, - "learning_rate": 0.00017341676147136897, - "loss": 9.023835754394531, - "step": 4910 - }, - { - "epoch": 5.597779675491033, - "grad_norm": 6.217780590057373, - "learning_rate": 0.00017336258735576142, - "loss": 8.83472900390625, - "step": 4920 - }, - { - "epoch": 5.609165955024196, - "grad_norm": 6.248758316040039, - "learning_rate": 0.00017330841324015385, - "loss": 9.115097808837891, - "step": 4930 - }, - { - "epoch": 5.620552234557358, - "grad_norm": 5.923741817474365, - "learning_rate": 0.0001732542391245463, - "loss": 9.077340698242187, - "step": 4940 - }, - { - "epoch": 5.631938514090521, - "grad_norm": 6.346497058868408, - "learning_rate": 0.00017320006500893872, - "loss": 9.040455627441407, - "step": 4950 - }, - { - "epoch": 5.643324793623684, - "grad_norm": 6.081203937530518, - "learning_rate": 0.00017314589089333118, - "loss": 9.156749725341797, - "step": 4960 - }, - { - "epoch": 5.654711073156846, - "grad_norm": 5.836169719696045, - "learning_rate": 0.00017309171677772363, - "loss": 9.09630126953125, - "step": 4970 - }, - { - "epoch": 5.6660973526900085, - "grad_norm": 6.374985218048096, - "learning_rate": 0.00017303754266211605, - "loss": 9.138162231445312, - "step": 4980 - }, - { - "epoch": 5.677483632223171, - "grad_norm": 6.967176914215088, - "learning_rate": 0.0001729833685465085, - "loss": 9.05536346435547, - "step": 4990 - }, - { - "epoch": 5.688869911756334, - "grad_norm": 6.718258857727051, - "learning_rate": 0.00017292919443090093, - "loss": 8.888587951660156, - "step": 5000 - }, - { - "epoch": 5.700256191289496, - "grad_norm": 6.106222629547119, - "learning_rate": 0.00017287502031529338, - "loss": 9.413323974609375, - "step": 5010 - }, - { - "epoch": 5.7116424708226585, - "grad_norm": 5.6303229331970215, - "learning_rate": 0.0001728208461996858, - "loss": 9.086915588378906, - "step": 5020 - }, - { - "epoch": 5.723028750355821, - "grad_norm": 6.345915794372559, - "learning_rate": 0.00017276667208407823, - "loss": 8.981614685058593, - "step": 5030 - }, - { - "epoch": 5.734415029888984, - "grad_norm": 5.963151454925537, - "learning_rate": 0.00017271249796847065, - "loss": 8.846481323242188, - "step": 5040 - }, - { - "epoch": 5.745801309422147, - "grad_norm": 6.332173824310303, - "learning_rate": 0.0001726583238528631, - "loss": 9.01428451538086, - "step": 5050 - }, - { - "epoch": 5.757187588955309, - "grad_norm": 5.805280685424805, - "learning_rate": 0.00017260414973725553, - "loss": 8.98281021118164, - "step": 5060 - }, - { - "epoch": 5.768573868488471, - "grad_norm": 6.478013038635254, - "learning_rate": 0.00017254997562164798, - "loss": 9.026184844970704, - "step": 5070 - }, - { - "epoch": 5.779960148021634, - "grad_norm": 6.193087100982666, - "learning_rate": 0.00017249580150604043, - "loss": 8.499151611328125, - "step": 5080 - }, - { - "epoch": 5.791346427554797, - "grad_norm": 6.035495758056641, - "learning_rate": 0.00017244162739043286, - "loss": 9.09105224609375, - "step": 5090 - }, - { - "epoch": 5.802732707087959, - "grad_norm": 6.009443759918213, - "learning_rate": 0.0001723874532748253, - "loss": 9.080800628662109, - "step": 5100 - }, - { - "epoch": 5.814118986621121, - "grad_norm": 6.147922992706299, - "learning_rate": 0.00017233327915921773, - "loss": 9.082097625732422, - "step": 5110 - }, - { - "epoch": 5.825505266154284, - "grad_norm": 6.0678019523620605, - "learning_rate": 0.00017227910504361018, - "loss": 8.975556182861329, - "step": 5120 - }, - { - "epoch": 5.836891545687447, - "grad_norm": 5.851400852203369, - "learning_rate": 0.0001722249309280026, - "loss": 8.932150268554688, - "step": 5130 - }, - { - "epoch": 5.84827782522061, - "grad_norm": 5.831391334533691, - "learning_rate": 0.00017217075681239506, - "loss": 8.861358642578125, - "step": 5140 - }, - { - "epoch": 5.859664104753771, - "grad_norm": 6.788999557495117, - "learning_rate": 0.00017211658269678749, - "loss": 9.362552642822266, - "step": 5150 - }, - { - "epoch": 5.871050384286934, - "grad_norm": 6.117767333984375, - "learning_rate": 0.00017206240858117994, - "loss": 9.051403045654297, - "step": 5160 - }, - { - "epoch": 5.882436663820097, - "grad_norm": 5.815280914306641, - "learning_rate": 0.00017200823446557236, - "loss": 8.855480194091797, - "step": 5170 - }, - { - "epoch": 5.89382294335326, - "grad_norm": 6.476868152618408, - "learning_rate": 0.00017195406034996479, - "loss": 9.67321319580078, - "step": 5180 - }, - { - "epoch": 5.9052092228864215, - "grad_norm": 6.074749946594238, - "learning_rate": 0.0001718998862343572, - "loss": 8.859770965576171, - "step": 5190 - }, - { - "epoch": 5.916595502419584, - "grad_norm": 5.674811840057373, - "learning_rate": 0.00017184571211874966, - "loss": 9.397520446777344, - "step": 5200 - }, - { - "epoch": 5.927981781952747, - "grad_norm": 5.898608684539795, - "learning_rate": 0.0001717915380031421, - "loss": 9.098382568359375, - "step": 5210 - }, - { - "epoch": 5.93936806148591, - "grad_norm": 6.260279178619385, - "learning_rate": 0.00017173736388753454, - "loss": 9.085452270507812, - "step": 5220 - }, - { - "epoch": 5.950754341019072, - "grad_norm": 6.2707719802856445, - "learning_rate": 0.000171683189771927, - "loss": 8.759799194335937, - "step": 5230 - }, - { - "epoch": 5.962140620552234, - "grad_norm": 6.20477294921875, - "learning_rate": 0.00017162901565631941, - "loss": 9.199006652832031, - "step": 5240 - }, - { - "epoch": 5.973526900085397, - "grad_norm": 5.900763511657715, - "learning_rate": 0.00017157484154071187, - "loss": 9.182913970947265, - "step": 5250 - }, - { - "epoch": 5.98491317961856, - "grad_norm": 6.241118431091309, - "learning_rate": 0.0001715206674251043, - "loss": 8.96974868774414, - "step": 5260 - }, - { - "epoch": 5.9962994591517225, - "grad_norm": 5.996070384979248, - "learning_rate": 0.00017146649330949674, - "loss": 9.107511138916015, - "step": 5270 - }, - { - "epoch": 6.0068317677198975, - "grad_norm": 6.609316349029541, - "learning_rate": 0.00017141231919388917, - "loss": 7.848031616210937, - "step": 5280 - }, - { - "epoch": 6.01821804725306, - "grad_norm": 6.0844197273254395, - "learning_rate": 0.00017135814507828162, - "loss": 8.242960357666016, - "step": 5290 - }, - { - "epoch": 6.029604326786223, - "grad_norm": 5.959528923034668, - "learning_rate": 0.00017130397096267404, - "loss": 8.17882080078125, - "step": 5300 - }, - { - "epoch": 6.040990606319385, - "grad_norm": 5.812763690948486, - "learning_rate": 0.00017124979684706647, - "loss": 8.189881134033204, - "step": 5310 - }, - { - "epoch": 6.0523768858525475, - "grad_norm": 6.203407287597656, - "learning_rate": 0.00017119562273145892, - "loss": 7.9870758056640625, - "step": 5320 - }, - { - "epoch": 6.06376316538571, - "grad_norm": 6.108156681060791, - "learning_rate": 0.00017114144861585134, - "loss": 8.187033081054688, - "step": 5330 - }, - { - "epoch": 6.075149444918873, - "grad_norm": 6.818889617919922, - "learning_rate": 0.0001710872745002438, - "loss": 8.802757263183594, - "step": 5340 - }, - { - "epoch": 6.086535724452036, - "grad_norm": 6.080479145050049, - "learning_rate": 0.00017103310038463622, - "loss": 8.3635986328125, - "step": 5350 - }, - { - "epoch": 6.097922003985198, - "grad_norm": 6.009644508361816, - "learning_rate": 0.00017097892626902867, - "loss": 8.316793060302734, - "step": 5360 - }, - { - "epoch": 6.10930828351836, - "grad_norm": 5.928283214569092, - "learning_rate": 0.0001709247521534211, - "loss": 8.05916976928711, - "step": 5370 - }, - { - "epoch": 6.120694563051523, - "grad_norm": 6.18734073638916, - "learning_rate": 0.00017087057803781355, - "loss": 8.414893341064452, - "step": 5380 - }, - { - "epoch": 6.132080842584686, - "grad_norm": 5.8552021980285645, - "learning_rate": 0.00017081640392220597, - "loss": 8.328546905517578, - "step": 5390 - }, - { - "epoch": 6.143467122117848, - "grad_norm": 6.283035755157471, - "learning_rate": 0.00017076222980659842, - "loss": 8.294504547119141, - "step": 5400 - }, - { - "epoch": 6.15485340165101, - "grad_norm": 6.527463436126709, - "learning_rate": 0.00017070805569099085, - "loss": 8.205842590332031, - "step": 5410 - }, - { - "epoch": 6.166239681184173, - "grad_norm": 6.081474304199219, - "learning_rate": 0.0001706538815753833, - "loss": 8.215932464599609, - "step": 5420 - }, - { - "epoch": 6.177625960717336, - "grad_norm": 6.173142433166504, - "learning_rate": 0.00017059970745977575, - "loss": 8.617554473876954, - "step": 5430 - }, - { - "epoch": 6.189012240250499, - "grad_norm": 6.146962642669678, - "learning_rate": 0.00017054553334416817, - "loss": 8.21309814453125, - "step": 5440 - }, - { - "epoch": 6.20039851978366, - "grad_norm": 6.535096645355225, - "learning_rate": 0.0001704913592285606, - "loss": 8.393982696533204, - "step": 5450 - }, - { - "epoch": 6.211784799316823, - "grad_norm": 5.731675624847412, - "learning_rate": 0.00017043718511295302, - "loss": 8.488709259033204, - "step": 5460 - }, - { - "epoch": 6.223171078849986, - "grad_norm": 6.357123374938965, - "learning_rate": 0.00017038301099734547, - "loss": 8.502271270751953, - "step": 5470 - }, - { - "epoch": 6.234557358383149, - "grad_norm": 6.319135665893555, - "learning_rate": 0.0001703288368817379, - "loss": 8.702040100097657, - "step": 5480 - }, - { - "epoch": 6.2459436379163105, - "grad_norm": 6.496546268463135, - "learning_rate": 0.00017027466276613035, - "loss": 8.020443725585938, - "step": 5490 - }, - { - "epoch": 6.257329917449473, - "grad_norm": 6.284101963043213, - "learning_rate": 0.00017022048865052278, - "loss": 8.432199096679687, - "step": 5500 - }, - { - "epoch": 6.268716196982636, - "grad_norm": 6.48223876953125, - "learning_rate": 0.00017016631453491523, - "loss": 8.392076873779297, - "step": 5510 - }, - { - "epoch": 6.280102476515799, - "grad_norm": 6.497861862182617, - "learning_rate": 0.00017011214041930765, - "loss": 8.759424591064453, - "step": 5520 - }, - { - "epoch": 6.291488756048961, - "grad_norm": 6.510660648345947, - "learning_rate": 0.0001700579663037001, - "loss": 8.286143493652343, - "step": 5530 - }, - { - "epoch": 6.302875035582123, - "grad_norm": 5.99029541015625, - "learning_rate": 0.00017000379218809255, - "loss": 8.367719268798828, - "step": 5540 - }, - { - "epoch": 6.314261315115286, - "grad_norm": 6.750374794006348, - "learning_rate": 0.00016994961807248498, - "loss": 8.649713134765625, - "step": 5550 - }, - { - "epoch": 6.325647594648449, - "grad_norm": 5.9475202560424805, - "learning_rate": 0.00016989544395687743, - "loss": 8.837234497070312, - "step": 5560 - }, - { - "epoch": 6.3370338741816115, - "grad_norm": 6.30899715423584, - "learning_rate": 0.00016984126984126986, - "loss": 8.32588882446289, - "step": 5570 - }, - { - "epoch": 6.348420153714773, - "grad_norm": 6.342610836029053, - "learning_rate": 0.00016978709572566228, - "loss": 8.394316101074219, - "step": 5580 - }, - { - "epoch": 6.359806433247936, - "grad_norm": 6.095706462860107, - "learning_rate": 0.00016973292161005473, - "loss": 8.327243041992187, - "step": 5590 - }, - { - "epoch": 6.371192712781099, - "grad_norm": 6.3162336349487305, - "learning_rate": 0.00016967874749444716, - "loss": 8.443352508544923, - "step": 5600 - }, - { - "epoch": 6.3825789923142615, - "grad_norm": 6.393646240234375, - "learning_rate": 0.00016962457337883958, - "loss": 8.368497467041015, - "step": 5610 - }, - { - "epoch": 6.393965271847424, - "grad_norm": 6.893944263458252, - "learning_rate": 0.00016957039926323203, - "loss": 8.11514892578125, - "step": 5620 - }, - { - "epoch": 6.405351551380586, - "grad_norm": 6.172192573547363, - "learning_rate": 0.00016951622514762446, - "loss": 8.440129852294922, - "step": 5630 - }, - { - "epoch": 6.416737830913749, - "grad_norm": 6.662540435791016, - "learning_rate": 0.0001694620510320169, - "loss": 8.530294799804688, - "step": 5640 - }, - { - "epoch": 6.428124110446912, - "grad_norm": 6.44663143157959, - "learning_rate": 0.00016940787691640936, - "loss": 8.647301483154298, - "step": 5650 - }, - { - "epoch": 6.439510389980074, - "grad_norm": 7.118195056915283, - "learning_rate": 0.00016935370280080178, - "loss": 8.56711654663086, - "step": 5660 - }, - { - "epoch": 6.450896669513236, - "grad_norm": 5.5173540115356445, - "learning_rate": 0.00016929952868519424, - "loss": 8.647555541992187, - "step": 5670 - }, - { - "epoch": 6.462282949046399, - "grad_norm": 5.75939416885376, - "learning_rate": 0.00016924535456958666, - "loss": 8.223889923095703, - "step": 5680 - }, - { - "epoch": 6.473669228579562, - "grad_norm": 6.170014381408691, - "learning_rate": 0.0001691911804539791, - "loss": 7.9549812316894535, - "step": 5690 - }, - { - "epoch": 6.485055508112724, - "grad_norm": 6.4585490226745605, - "learning_rate": 0.00016913700633837154, - "loss": 8.623312377929688, - "step": 5700 - }, - { - "epoch": 6.496441787645887, - "grad_norm": 6.330554485321045, - "learning_rate": 0.000169082832222764, - "loss": 8.541600036621094, - "step": 5710 - }, - { - "epoch": 6.507828067179049, - "grad_norm": 6.698922634124756, - "learning_rate": 0.0001690286581071564, - "loss": 8.024588012695313, - "step": 5720 - }, - { - "epoch": 6.519214346712212, - "grad_norm": 6.28864049911499, - "learning_rate": 0.00016897448399154884, - "loss": 8.494007110595703, - "step": 5730 - }, - { - "epoch": 6.530600626245374, - "grad_norm": 6.495082378387451, - "learning_rate": 0.00016892030987594126, - "loss": 8.174886322021484, - "step": 5740 - }, - { - "epoch": 6.541986905778537, - "grad_norm": 6.506287097930908, - "learning_rate": 0.0001688661357603337, - "loss": 8.86504898071289, - "step": 5750 - }, - { - "epoch": 6.553373185311699, - "grad_norm": 6.550364971160889, - "learning_rate": 0.00016881196164472616, - "loss": 8.222732543945312, - "step": 5760 - }, - { - "epoch": 6.564759464844862, - "grad_norm": 6.110886096954346, - "learning_rate": 0.0001687577875291186, - "loss": 7.874899291992188, - "step": 5770 - }, - { - "epoch": 6.5761457443780245, - "grad_norm": 5.996333122253418, - "learning_rate": 0.00016870361341351104, - "loss": 8.523206329345703, - "step": 5780 - }, - { - "epoch": 6.587532023911187, - "grad_norm": 6.117859840393066, - "learning_rate": 0.00016864943929790346, - "loss": 8.307976531982423, - "step": 5790 - }, - { - "epoch": 6.59891830344435, - "grad_norm": 6.8809404373168945, - "learning_rate": 0.00016859526518229592, - "loss": 8.540397644042969, - "step": 5800 - }, - { - "epoch": 6.610304582977512, - "grad_norm": 6.469377040863037, - "learning_rate": 0.00016854109106668834, - "loss": 8.485488128662109, - "step": 5810 - }, - { - "epoch": 6.6216908625106745, - "grad_norm": 6.278227806091309, - "learning_rate": 0.0001684869169510808, - "loss": 8.363449096679688, - "step": 5820 - }, - { - "epoch": 6.633077142043837, - "grad_norm": 6.4080586433410645, - "learning_rate": 0.00016843274283547322, - "loss": 8.468556213378907, - "step": 5830 - }, - { - "epoch": 6.644463421577, - "grad_norm": 5.922115802764893, - "learning_rate": 0.00016837856871986567, - "loss": 8.429467010498048, - "step": 5840 - }, - { - "epoch": 6.655849701110162, - "grad_norm": 6.8086018562316895, - "learning_rate": 0.0001683243946042581, - "loss": 8.133071136474609, - "step": 5850 - }, - { - "epoch": 6.667235980643325, - "grad_norm": 6.547855377197266, - "learning_rate": 0.00016827022048865054, - "loss": 7.84344253540039, - "step": 5860 - }, - { - "epoch": 6.678622260176487, - "grad_norm": 6.792558193206787, - "learning_rate": 0.00016821604637304297, - "loss": 8.842105102539062, - "step": 5870 - }, - { - "epoch": 6.69000853970965, - "grad_norm": 6.492033004760742, - "learning_rate": 0.0001681618722574354, - "loss": 8.356997680664062, - "step": 5880 - }, - { - "epoch": 6.701394819242813, - "grad_norm": 6.224930763244629, - "learning_rate": 0.00016810769814182784, - "loss": 8.31610565185547, - "step": 5890 - }, - { - "epoch": 6.712781098775975, - "grad_norm": 6.407049655914307, - "learning_rate": 0.00016805352402622027, - "loss": 8.490274047851562, - "step": 5900 - }, - { - "epoch": 6.724167378309137, - "grad_norm": 6.361656665802002, - "learning_rate": 0.00016799934991061272, - "loss": 8.183870697021485, - "step": 5910 - }, - { - "epoch": 6.7355536578423, - "grad_norm": 6.558957576751709, - "learning_rate": 0.00016794517579500515, - "loss": 8.488135528564452, - "step": 5920 - }, - { - "epoch": 6.746939937375463, - "grad_norm": 7.5453925132751465, - "learning_rate": 0.0001678910016793976, - "loss": 8.479955291748047, - "step": 5930 - }, - { - "epoch": 6.758326216908625, - "grad_norm": 6.576931476593018, - "learning_rate": 0.00016783682756379002, - "loss": 8.354127502441406, - "step": 5940 - }, - { - "epoch": 6.769712496441787, - "grad_norm": 6.558088302612305, - "learning_rate": 0.00016778265344818247, - "loss": 8.517078399658203, - "step": 5950 - }, - { - "epoch": 6.78109877597495, - "grad_norm": 6.338796615600586, - "learning_rate": 0.0001677284793325749, - "loss": 8.582713317871093, - "step": 5960 - }, - { - "epoch": 6.792485055508113, - "grad_norm": 5.990917205810547, - "learning_rate": 0.00016767430521696735, - "loss": 8.49090347290039, - "step": 5970 - }, - { - "epoch": 6.803871335041276, - "grad_norm": 5.543584823608398, - "learning_rate": 0.00016762013110135977, - "loss": 8.585533905029298, - "step": 5980 - }, - { - "epoch": 6.8152576145744375, - "grad_norm": 6.125088214874268, - "learning_rate": 0.00016756595698575223, - "loss": 8.365689086914063, - "step": 5990 - }, - { - "epoch": 6.8266438941076, - "grad_norm": 6.520383358001709, - "learning_rate": 0.00016751178287014465, - "loss": 7.997092437744141, - "step": 6000 - }, - { - "epoch": 6.838030173640763, - "grad_norm": 6.5187530517578125, - "learning_rate": 0.00016745760875453707, - "loss": 8.529048919677734, - "step": 6010 - }, - { - "epoch": 6.849416453173926, - "grad_norm": 6.216691493988037, - "learning_rate": 0.00016740343463892953, - "loss": 8.508572387695313, - "step": 6020 - }, - { - "epoch": 6.8608027327070875, - "grad_norm": 6.541543960571289, - "learning_rate": 0.00016734926052332195, - "loss": 8.271363830566406, - "step": 6030 - }, - { - "epoch": 6.87218901224025, - "grad_norm": 6.519371509552002, - "learning_rate": 0.0001672950864077144, - "loss": 8.481680297851563, - "step": 6040 - }, - { - "epoch": 6.883575291773413, - "grad_norm": 6.069828510284424, - "learning_rate": 0.00016724091229210683, - "loss": 8.080921936035157, - "step": 6050 - }, - { - "epoch": 6.894961571306576, - "grad_norm": 6.756743907928467, - "learning_rate": 0.00016718673817649928, - "loss": 8.416082763671875, - "step": 6060 - }, - { - "epoch": 6.9063478508397385, - "grad_norm": 6.148792266845703, - "learning_rate": 0.0001671325640608917, - "loss": 8.236578369140625, - "step": 6070 - }, - { - "epoch": 6.9177341303729, - "grad_norm": 6.631039619445801, - "learning_rate": 0.00016707838994528415, - "loss": 8.468033599853516, - "step": 6080 - }, - { - "epoch": 6.929120409906063, - "grad_norm": 7.002105236053467, - "learning_rate": 0.00016702421582967658, - "loss": 8.596262359619141, - "step": 6090 - }, - { - "epoch": 6.940506689439226, - "grad_norm": 6.974485397338867, - "learning_rate": 0.00016697004171406903, - "loss": 8.201548767089843, - "step": 6100 - }, - { - "epoch": 6.9518929689723885, - "grad_norm": 6.358666896820068, - "learning_rate": 0.00016691586759846148, - "loss": 8.327249908447266, - "step": 6110 - }, - { - "epoch": 6.96327924850555, - "grad_norm": 6.620755195617676, - "learning_rate": 0.00016686711089441465, - "loss": 8.569712829589843, - "step": 6120 - }, - { - "epoch": 6.974665528038713, - "grad_norm": 6.0767059326171875, - "learning_rate": 0.0001668129367788071, - "loss": 8.352509307861329, - "step": 6130 - }, - { - "epoch": 6.986051807571876, - "grad_norm": 6.384547233581543, - "learning_rate": 0.00016675876266319955, - "loss": 8.438029479980468, - "step": 6140 - }, - { - "epoch": 6.997438087105039, - "grad_norm": 5.849452972412109, - "learning_rate": 0.00016670458854759198, - "loss": 8.576494598388672, - "step": 6150 - }, - { - "epoch": 7.007970395673214, - "grad_norm": 6.016312122344971, - "learning_rate": 0.0001666504144319844, - "loss": 7.190988922119141, - "step": 6160 - }, - { - "epoch": 7.019356675206376, - "grad_norm": 7.674123764038086, - "learning_rate": 0.00016659624031637683, - "loss": 7.777459716796875, - "step": 6170 - }, - { - "epoch": 7.030742954739539, - "grad_norm": 5.954489707946777, - "learning_rate": 0.00016654206620076928, - "loss": 8.038485717773437, - "step": 6180 - }, - { - "epoch": 7.042129234272702, - "grad_norm": 6.098645210266113, - "learning_rate": 0.0001664878920851617, - "loss": 7.921611022949219, - "step": 6190 - }, - { - "epoch": 7.053515513805864, - "grad_norm": 6.347640514373779, - "learning_rate": 0.00016643371796955415, - "loss": 7.475108337402344, - "step": 6200 - }, - { - "epoch": 7.064901793339026, - "grad_norm": 6.295144081115723, - "learning_rate": 0.00016637954385394658, - "loss": 7.842532348632813, - "step": 6210 - }, - { - "epoch": 7.076288072872189, - "grad_norm": 6.518668174743652, - "learning_rate": 0.00016632536973833903, - "loss": 8.195176696777343, - "step": 6220 - }, - { - "epoch": 7.087674352405352, - "grad_norm": 6.452752590179443, - "learning_rate": 0.00016627119562273145, - "loss": 7.876513671875, - "step": 6230 - }, - { - "epoch": 7.099060631938514, - "grad_norm": 6.639153003692627, - "learning_rate": 0.0001662170215071239, - "loss": 7.704139709472656, - "step": 6240 - }, - { - "epoch": 7.110446911471676, - "grad_norm": 6.341755390167236, - "learning_rate": 0.00016616284739151636, - "loss": 7.5064857482910154, - "step": 6250 - }, - { - "epoch": 7.121833191004839, - "grad_norm": 6.495119094848633, - "learning_rate": 0.00016610867327590878, - "loss": 7.757982635498047, - "step": 6260 - }, - { - "epoch": 7.133219470538002, - "grad_norm": 5.995544910430908, - "learning_rate": 0.00016605449916030123, - "loss": 7.433665466308594, - "step": 6270 - }, - { - "epoch": 7.144605750071165, - "grad_norm": 5.881967067718506, - "learning_rate": 0.00016600032504469366, - "loss": 7.579911804199218, - "step": 6280 - }, - { - "epoch": 7.1559920296043265, - "grad_norm": 6.570091724395752, - "learning_rate": 0.0001659461509290861, - "loss": 7.659120178222656, - "step": 6290 - }, - { - "epoch": 7.167378309137489, - "grad_norm": 6.527503490447998, - "learning_rate": 0.00016589197681347853, - "loss": 7.960893249511718, - "step": 6300 - }, - { - "epoch": 7.178764588670652, - "grad_norm": 6.29287576675415, - "learning_rate": 0.00016583780269787096, - "loss": 7.749033355712891, - "step": 6310 - }, - { - "epoch": 7.190150868203815, - "grad_norm": 6.531010627746582, - "learning_rate": 0.00016578362858226338, - "loss": 7.7865547180175785, - "step": 6320 - }, - { - "epoch": 7.2015371477369765, - "grad_norm": 6.445150852203369, - "learning_rate": 0.00016572945446665583, - "loss": 7.650438690185547, - "step": 6330 - }, - { - "epoch": 7.212923427270139, - "grad_norm": 6.2929816246032715, - "learning_rate": 0.00016567528035104826, - "loss": 7.579685974121094, - "step": 6340 - }, - { - "epoch": 7.224309706803302, - "grad_norm": 7.012646675109863, - "learning_rate": 0.0001656211062354407, - "loss": 7.769477844238281, - "step": 6350 - }, - { - "epoch": 7.235695986336465, - "grad_norm": 6.118347644805908, - "learning_rate": 0.00016556693211983316, - "loss": 7.643257904052734, - "step": 6360 - }, - { - "epoch": 7.2470822658696274, - "grad_norm": 5.888443946838379, - "learning_rate": 0.0001655127580042256, - "loss": 7.476705169677734, - "step": 6370 - }, - { - "epoch": 7.258468545402789, - "grad_norm": 6.040239334106445, - "learning_rate": 0.00016545858388861804, - "loss": 7.228121948242188, - "step": 6380 - }, - { - "epoch": 7.269854824935952, - "grad_norm": 6.494739055633545, - "learning_rate": 0.00016540440977301046, - "loss": 7.939952087402344, - "step": 6390 - }, - { - "epoch": 7.281241104469115, - "grad_norm": 6.026639461517334, - "learning_rate": 0.00016535023565740291, - "loss": 7.657688903808594, - "step": 6400 - }, - { - "epoch": 7.2926273840022775, - "grad_norm": 6.798668384552002, - "learning_rate": 0.00016529606154179534, - "loss": 8.28334732055664, - "step": 6410 - }, - { - "epoch": 7.304013663535439, - "grad_norm": 6.844184398651123, - "learning_rate": 0.0001652418874261878, - "loss": 7.631706237792969, - "step": 6420 - }, - { - "epoch": 7.315399943068602, - "grad_norm": 5.855307579040527, - "learning_rate": 0.00016518771331058022, - "loss": 7.990458679199219, - "step": 6430 - }, - { - "epoch": 7.326786222601765, - "grad_norm": 6.2944159507751465, - "learning_rate": 0.00016513353919497264, - "loss": 7.7971031188964846, - "step": 6440 - }, - { - "epoch": 7.3381725021349276, - "grad_norm": 5.8472900390625, - "learning_rate": 0.0001650793650793651, - "loss": 7.843349456787109, - "step": 6450 - }, - { - "epoch": 7.34955878166809, - "grad_norm": 6.461435317993164, - "learning_rate": 0.00016502519096375752, - "loss": 7.716208648681641, - "step": 6460 - }, - { - "epoch": 7.360945061201252, - "grad_norm": 7.011518955230713, - "learning_rate": 0.00016497101684814997, - "loss": 7.972675323486328, - "step": 6470 - }, - { - "epoch": 7.372331340734415, - "grad_norm": 6.545440196990967, - "learning_rate": 0.0001649168427325424, - "loss": 8.097943878173828, - "step": 6480 - }, - { - "epoch": 7.383717620267578, - "grad_norm": 6.5945820808410645, - "learning_rate": 0.00016486266861693484, - "loss": 8.236820220947266, - "step": 6490 - }, - { - "epoch": 7.39510389980074, - "grad_norm": 5.519364833831787, - "learning_rate": 0.00016480849450132727, - "loss": 7.836220550537109, - "step": 6500 - }, - { - "epoch": 7.406490179333902, - "grad_norm": 6.1039910316467285, - "learning_rate": 0.00016475432038571972, - "loss": 7.595920562744141, - "step": 6510 - }, - { - "epoch": 7.417876458867065, - "grad_norm": 6.2389020919799805, - "learning_rate": 0.00016470014627011214, - "loss": 7.607307434082031, - "step": 6520 - }, - { - "epoch": 7.429262738400228, - "grad_norm": 6.249361038208008, - "learning_rate": 0.0001646459721545046, - "loss": 7.690441131591797, - "step": 6530 - }, - { - "epoch": 7.44064901793339, - "grad_norm": 6.085043907165527, - "learning_rate": 0.00016459179803889702, - "loss": 7.78430404663086, - "step": 6540 - }, - { - "epoch": 7.452035297466553, - "grad_norm": 6.791484355926514, - "learning_rate": 0.00016453762392328947, - "loss": 8.073173522949219, - "step": 6550 - }, - { - "epoch": 7.463421576999715, - "grad_norm": 6.072941303253174, - "learning_rate": 0.0001644834498076819, - "loss": 7.5049797058105465, - "step": 6560 - }, - { - "epoch": 7.474807856532878, - "grad_norm": 6.538127422332764, - "learning_rate": 0.00016442927569207435, - "loss": 8.0398193359375, - "step": 6570 - }, - { - "epoch": 7.4861941360660404, - "grad_norm": 6.887174129486084, - "learning_rate": 0.00016437510157646677, - "loss": 7.953376007080078, - "step": 6580 - }, - { - "epoch": 7.497580415599203, - "grad_norm": 6.870800495147705, - "learning_rate": 0.0001643209274608592, - "loss": 7.675375366210938, - "step": 6590 - }, - { - "epoch": 7.508966695132365, - "grad_norm": 6.101712703704834, - "learning_rate": 0.00016426675334525165, - "loss": 7.8550971984863285, - "step": 6600 - }, - { - "epoch": 7.520352974665528, - "grad_norm": 6.480758190155029, - "learning_rate": 0.00016421257922964407, - "loss": 7.9089508056640625, - "step": 6610 - }, - { - "epoch": 7.5317392541986905, - "grad_norm": 6.663425445556641, - "learning_rate": 0.00016415840511403652, - "loss": 7.739738464355469, - "step": 6620 - }, - { - "epoch": 7.543125533731853, - "grad_norm": 6.832794666290283, - "learning_rate": 0.00016410423099842895, - "loss": 7.970989990234375, - "step": 6630 - }, - { - "epoch": 7.554511813265016, - "grad_norm": 6.2909770011901855, - "learning_rate": 0.0001640500568828214, - "loss": 8.1257080078125, - "step": 6640 - }, - { - "epoch": 7.565898092798178, - "grad_norm": 6.659548759460449, - "learning_rate": 0.00016399588276721382, - "loss": 7.914602661132813, - "step": 6650 - }, - { - "epoch": 7.577284372331341, - "grad_norm": 6.749256610870361, - "learning_rate": 0.00016394170865160628, - "loss": 7.901271057128906, - "step": 6660 - }, - { - "epoch": 7.588670651864503, - "grad_norm": 6.320638656616211, - "learning_rate": 0.0001638875345359987, - "loss": 7.549610137939453, - "step": 6670 - }, - { - "epoch": 7.600056931397666, - "grad_norm": 6.600435256958008, - "learning_rate": 0.00016383336042039115, - "loss": 7.900602722167969, - "step": 6680 - }, - { - "epoch": 7.611443210930828, - "grad_norm": 7.011569976806641, - "learning_rate": 0.00016377918630478358, - "loss": 7.672644805908203, - "step": 6690 - }, - { - "epoch": 7.622829490463991, - "grad_norm": 8.137212753295898, - "learning_rate": 0.00016372501218917603, - "loss": 7.771617126464844, - "step": 6700 - }, - { - "epoch": 7.634215769997153, - "grad_norm": 6.862710952758789, - "learning_rate": 0.00016367083807356848, - "loss": 8.049059295654297, - "step": 6710 - }, - { - "epoch": 7.645602049530316, - "grad_norm": 6.581108093261719, - "learning_rate": 0.0001636166639579609, - "loss": 7.8606727600097654, - "step": 6720 - }, - { - "epoch": 7.656988329063479, - "grad_norm": 6.7956647872924805, - "learning_rate": 0.00016356248984235333, - "loss": 8.168240356445313, - "step": 6730 - }, - { - "epoch": 7.668374608596641, - "grad_norm": 6.123491287231445, - "learning_rate": 0.00016350831572674575, - "loss": 7.896241760253906, - "step": 6740 - }, - { - "epoch": 7.679760888129803, - "grad_norm": 6.9219136238098145, - "learning_rate": 0.0001634541416111382, - "loss": 8.173204040527343, - "step": 6750 - }, - { - "epoch": 7.691147167662966, - "grad_norm": 6.664566993713379, - "learning_rate": 0.00016339996749553063, - "loss": 8.276904296875, - "step": 6760 - }, - { - "epoch": 7.702533447196129, - "grad_norm": 7.015839576721191, - "learning_rate": 0.00016334579337992308, - "loss": 7.637835693359375, - "step": 6770 - }, - { - "epoch": 7.713919726729291, - "grad_norm": 7.610071659088135, - "learning_rate": 0.0001632916192643155, - "loss": 7.590660095214844, - "step": 6780 - }, - { - "epoch": 7.7253060062624535, - "grad_norm": 6.450679779052734, - "learning_rate": 0.00016323744514870796, - "loss": 7.862370300292969, - "step": 6790 - }, - { - "epoch": 7.736692285795616, - "grad_norm": 6.770766735076904, - "learning_rate": 0.00016318327103310038, - "loss": 7.959809112548828, - "step": 6800 - }, - { - "epoch": 7.748078565328779, - "grad_norm": 6.543522357940674, - "learning_rate": 0.00016312909691749283, - "loss": 7.892201232910156, - "step": 6810 - }, - { - "epoch": 7.759464844861942, - "grad_norm": 6.228302955627441, - "learning_rate": 0.00016307492280188528, - "loss": 7.535732269287109, - "step": 6820 - }, - { - "epoch": 7.770851124395104, - "grad_norm": 6.860931396484375, - "learning_rate": 0.0001630207486862777, - "loss": 8.195712280273437, - "step": 6830 - }, - { - "epoch": 7.782237403928266, - "grad_norm": 6.871212482452393, - "learning_rate": 0.00016296657457067016, - "loss": 7.680415344238281, - "step": 6840 - }, - { - "epoch": 7.793623683461429, - "grad_norm": 6.185060977935791, - "learning_rate": 0.00016291240045506259, - "loss": 7.795463562011719, - "step": 6850 - }, - { - "epoch": 7.805009962994592, - "grad_norm": 6.6304826736450195, - "learning_rate": 0.000162858226339455, - "loss": 7.885947418212891, - "step": 6860 - }, - { - "epoch": 7.816396242527754, - "grad_norm": 5.791022777557373, - "learning_rate": 0.00016280405222384743, - "loss": 7.429241943359375, - "step": 6870 - }, - { - "epoch": 7.827782522060916, - "grad_norm": 6.679376602172852, - "learning_rate": 0.00016274987810823989, - "loss": 7.767063140869141, - "step": 6880 - }, - { - "epoch": 7.839168801594079, - "grad_norm": 6.607264518737793, - "learning_rate": 0.0001626957039926323, - "loss": 8.051114654541015, - "step": 6890 - }, - { - "epoch": 7.850555081127242, - "grad_norm": 6.548991680145264, - "learning_rate": 0.00016264152987702476, - "loss": 7.440624237060547, - "step": 6900 - }, - { - "epoch": 7.8619413606604045, - "grad_norm": 6.445249557495117, - "learning_rate": 0.00016258735576141719, - "loss": 7.984051513671875, - "step": 6910 - }, - { - "epoch": 7.873327640193567, - "grad_norm": 6.627695560455322, - "learning_rate": 0.00016253318164580964, - "loss": 7.8278968811035154, - "step": 6920 - }, - { - "epoch": 7.884713919726729, - "grad_norm": 6.76314640045166, - "learning_rate": 0.0001624790075302021, - "loss": 7.394937133789062, - "step": 6930 - }, - { - "epoch": 7.896100199259892, - "grad_norm": 6.663532257080078, - "learning_rate": 0.00016242483341459451, - "loss": 8.056930541992188, - "step": 6940 - }, - { - "epoch": 7.9074864787930546, - "grad_norm": 7.132768630981445, - "learning_rate": 0.00016237065929898697, - "loss": 7.636945343017578, - "step": 6950 - }, - { - "epoch": 7.918872758326217, - "grad_norm": 6.137714385986328, - "learning_rate": 0.0001623164851833794, - "loss": 8.028910827636718, - "step": 6960 - }, - { - "epoch": 7.930259037859379, - "grad_norm": 6.413361072540283, - "learning_rate": 0.00016226231106777184, - "loss": 7.650550842285156, - "step": 6970 - }, - { - "epoch": 7.941645317392542, - "grad_norm": 5.806910514831543, - "learning_rate": 0.00016220813695216427, - "loss": 7.590312194824219, - "step": 6980 - }, - { - "epoch": 7.953031596925705, - "grad_norm": 6.411592483520508, - "learning_rate": 0.00016215396283655672, - "loss": 8.053181457519532, - "step": 6990 - }, - { - "epoch": 7.964417876458867, - "grad_norm": 6.627279758453369, - "learning_rate": 0.00016209978872094914, - "loss": 7.878144073486328, - "step": 7000 - }, - { - "epoch": 7.97580415599203, - "grad_norm": 7.155358791351318, - "learning_rate": 0.00016204561460534157, - "loss": 7.987397003173828, - "step": 7010 - }, - { - "epoch": 7.987190435525192, - "grad_norm": 6.340551376342773, - "learning_rate": 0.000161991440489734, - "loss": 7.976909637451172, - "step": 7020 - }, - { - "epoch": 7.998576715058355, - "grad_norm": 6.796611309051514, - "learning_rate": 0.00016193726637412644, - "loss": 7.7033638000488285, - "step": 7030 - }, - { - "epoch": 8.00910902362653, - "grad_norm": 6.260311126708984, - "learning_rate": 0.0001618830922585189, - "loss": 6.682421112060547, - "step": 7040 - }, - { - "epoch": 8.020495303159693, - "grad_norm": 7.427417278289795, - "learning_rate": 0.00016182891814291132, - "loss": 7.5024055480957035, - "step": 7050 - }, - { - "epoch": 8.031881582692854, - "grad_norm": 6.3494486808776855, - "learning_rate": 0.00016177474402730377, - "loss": 7.2604820251464846, - "step": 7060 - }, - { - "epoch": 8.043267862226017, - "grad_norm": 6.655555725097656, - "learning_rate": 0.0001617205699116962, - "loss": 7.514097595214844, - "step": 7070 - }, - { - "epoch": 8.05465414175918, - "grad_norm": 6.684828758239746, - "learning_rate": 0.00016166639579608865, - "loss": 7.446005249023438, - "step": 7080 - }, - { - "epoch": 8.066040421292342, - "grad_norm": 6.890992164611816, - "learning_rate": 0.00016161222168048107, - "loss": 7.443931579589844, - "step": 7090 - }, - { - "epoch": 8.077426700825505, - "grad_norm": 6.673741340637207, - "learning_rate": 0.00016155804756487352, - "loss": 7.16052017211914, - "step": 7100 - }, - { - "epoch": 8.088812980358668, - "grad_norm": 6.923252105712891, - "learning_rate": 0.00016150387344926595, - "loss": 6.938526153564453, - "step": 7110 - }, - { - "epoch": 8.10019925989183, - "grad_norm": 6.302939414978027, - "learning_rate": 0.0001614496993336584, - "loss": 7.243843841552734, - "step": 7120 - }, - { - "epoch": 8.111585539424993, - "grad_norm": 6.899267673492432, - "learning_rate": 0.00016139552521805082, - "loss": 7.297703552246094, - "step": 7130 - }, - { - "epoch": 8.122971818958156, - "grad_norm": 7.268035888671875, - "learning_rate": 0.00016134135110244327, - "loss": 7.230997467041016, - "step": 7140 - }, - { - "epoch": 8.134358098491319, - "grad_norm": 7.285719871520996, - "learning_rate": 0.00016129259439839647, - "loss": 7.024531555175781, - "step": 7150 - }, - { - "epoch": 8.14574437802448, - "grad_norm": 6.868885040283203, - "learning_rate": 0.0001612384202827889, - "loss": 7.714209747314453, - "step": 7160 - }, - { - "epoch": 8.157130657557643, - "grad_norm": 6.319274425506592, - "learning_rate": 0.00016118424616718132, - "loss": 6.995166015625, - "step": 7170 - }, - { - "epoch": 8.168516937090805, - "grad_norm": 6.736001491546631, - "learning_rate": 0.00016113007205157374, - "loss": 7.084954833984375, - "step": 7180 - }, - { - "epoch": 8.179903216623968, - "grad_norm": 6.393606185913086, - "learning_rate": 0.0001610758979359662, - "loss": 7.56659164428711, - "step": 7190 - }, - { - "epoch": 8.19128949615713, - "grad_norm": 6.33527135848999, - "learning_rate": 0.00016102172382035865, - "loss": 7.253793334960937, - "step": 7200 - }, - { - "epoch": 8.202675775690293, - "grad_norm": 7.905284404754639, - "learning_rate": 0.00016096754970475107, - "loss": 7.590048217773438, - "step": 7210 - }, - { - "epoch": 8.214062055223456, - "grad_norm": 6.864686965942383, - "learning_rate": 0.00016091337558914352, - "loss": 7.1065208435058596, - "step": 7220 - }, - { - "epoch": 8.225448334756619, - "grad_norm": 6.355857849121094, - "learning_rate": 0.00016085920147353595, - "loss": 7.114351654052735, - "step": 7230 - }, - { - "epoch": 8.23683461428978, - "grad_norm": 7.217507839202881, - "learning_rate": 0.0001608050273579284, - "loss": 7.072686767578125, - "step": 7240 - }, - { - "epoch": 8.248220893822943, - "grad_norm": 6.846201419830322, - "learning_rate": 0.00016075085324232082, - "loss": 7.156787109375, - "step": 7250 - }, - { - "epoch": 8.259607173356105, - "grad_norm": 7.219080448150635, - "learning_rate": 0.00016069667912671327, - "loss": 7.439443969726563, - "step": 7260 - }, - { - "epoch": 8.270993452889268, - "grad_norm": 6.303773880004883, - "learning_rate": 0.0001606425050111057, - "loss": 7.361647033691407, - "step": 7270 - }, - { - "epoch": 8.28237973242243, - "grad_norm": 7.611039161682129, - "learning_rate": 0.00016058833089549815, - "loss": 7.37061767578125, - "step": 7280 - }, - { - "epoch": 8.293766011955594, - "grad_norm": 6.5654497146606445, - "learning_rate": 0.00016053415677989057, - "loss": 7.721343994140625, - "step": 7290 - }, - { - "epoch": 8.305152291488756, - "grad_norm": 5.921697616577148, - "learning_rate": 0.000160479982664283, - "loss": 7.14508285522461, - "step": 7300 - }, - { - "epoch": 8.316538571021919, - "grad_norm": 6.788980484008789, - "learning_rate": 0.00016042580854867545, - "loss": 7.477375793457031, - "step": 7310 - }, - { - "epoch": 8.327924850555082, - "grad_norm": 7.130054950714111, - "learning_rate": 0.00016037163443306788, - "loss": 6.889068603515625, - "step": 7320 - }, - { - "epoch": 8.339311130088245, - "grad_norm": 6.373105049133301, - "learning_rate": 0.00016031746031746033, - "loss": 7.234850311279297, - "step": 7330 - }, - { - "epoch": 8.350697409621405, - "grad_norm": 7.498371124267578, - "learning_rate": 0.00016026328620185275, - "loss": 7.064684295654297, - "step": 7340 - }, - { - "epoch": 8.362083689154568, - "grad_norm": 6.624664783477783, - "learning_rate": 0.0001602091120862452, - "loss": 7.807039642333985, - "step": 7350 - }, - { - "epoch": 8.373469968687731, - "grad_norm": 6.387814044952393, - "learning_rate": 0.00016015493797063763, - "loss": 7.460430908203125, - "step": 7360 - }, - { - "epoch": 8.384856248220894, - "grad_norm": 7.011965274810791, - "learning_rate": 0.00016010076385503008, - "loss": 7.333013916015625, - "step": 7370 - }, - { - "epoch": 8.396242527754056, - "grad_norm": 6.425861835479736, - "learning_rate": 0.0001600465897394225, - "loss": 7.393313598632813, - "step": 7380 - }, - { - "epoch": 8.40762880728722, - "grad_norm": 6.637088775634766, - "learning_rate": 0.00015999241562381496, - "loss": 7.426857757568359, - "step": 7390 - }, - { - "epoch": 8.419015086820382, - "grad_norm": 7.036196708679199, - "learning_rate": 0.00015993824150820738, - "loss": 7.142207336425781, - "step": 7400 - }, - { - "epoch": 8.430401366353545, - "grad_norm": 6.357840061187744, - "learning_rate": 0.00015988406739259983, - "loss": 7.445525360107422, - "step": 7410 - }, - { - "epoch": 8.441787645886706, - "grad_norm": 6.880340099334717, - "learning_rate": 0.00015982989327699228, - "loss": 7.163670349121094, - "step": 7420 - }, - { - "epoch": 8.453173925419868, - "grad_norm": 6.362428188323975, - "learning_rate": 0.0001597757191613847, - "loss": 7.434315490722656, - "step": 7430 - }, - { - "epoch": 8.464560204953031, - "grad_norm": 6.664401531219482, - "learning_rate": 0.00015972154504577713, - "loss": 7.144511413574219, - "step": 7440 - }, - { - "epoch": 8.475946484486194, - "grad_norm": 6.501242637634277, - "learning_rate": 0.00015966737093016956, - "loss": 7.108064270019531, - "step": 7450 - }, - { - "epoch": 8.487332764019357, - "grad_norm": 6.9143476486206055, - "learning_rate": 0.000159613196814562, - "loss": 7.1984611511230465, - "step": 7460 - }, - { - "epoch": 8.49871904355252, - "grad_norm": 6.557959079742432, - "learning_rate": 0.00015955902269895443, - "loss": 7.022095489501953, - "step": 7470 - }, - { - "epoch": 8.510105323085682, - "grad_norm": 6.557929992675781, - "learning_rate": 0.00015950484858334688, - "loss": 7.145917510986328, - "step": 7480 - }, - { - "epoch": 8.521491602618845, - "grad_norm": 6.603511810302734, - "learning_rate": 0.0001594506744677393, - "loss": 7.341641998291015, - "step": 7490 - }, - { - "epoch": 8.532877882152007, - "grad_norm": 7.191463470458984, - "learning_rate": 0.00015939650035213176, - "loss": 6.9570457458496096, - "step": 7500 - }, - { - "epoch": 8.54426416168517, - "grad_norm": 6.919197082519531, - "learning_rate": 0.00015934232623652418, - "loss": 7.235923004150391, - "step": 7510 - }, - { - "epoch": 8.555650441218331, - "grad_norm": 6.883152008056641, - "learning_rate": 0.00015928815212091664, - "loss": 7.444821166992187, - "step": 7520 - }, - { - "epoch": 8.567036720751494, - "grad_norm": 6.3207244873046875, - "learning_rate": 0.0001592339780053091, - "loss": 7.598611450195312, - "step": 7530 - }, - { - "epoch": 8.578423000284657, - "grad_norm": 6.418648719787598, - "learning_rate": 0.0001591798038897015, - "loss": 7.209278869628906, - "step": 7540 - }, - { - "epoch": 8.58980927981782, - "grad_norm": 6.839104175567627, - "learning_rate": 0.00015912562977409396, - "loss": 7.241474914550781, - "step": 7550 - }, - { - "epoch": 8.601195559350982, - "grad_norm": 6.884853363037109, - "learning_rate": 0.0001590714556584864, - "loss": 7.021162414550782, - "step": 7560 - }, - { - "epoch": 8.612581838884145, - "grad_norm": 6.1609787940979, - "learning_rate": 0.00015901728154287884, - "loss": 7.474005126953125, - "step": 7570 - }, - { - "epoch": 8.623968118417308, - "grad_norm": 6.195271968841553, - "learning_rate": 0.00015896310742727126, - "loss": 6.983114624023438, - "step": 7580 - }, - { - "epoch": 8.63535439795047, - "grad_norm": 6.70726203918457, - "learning_rate": 0.0001589089333116637, - "loss": 7.420458984375, - "step": 7590 - }, - { - "epoch": 8.646740677483631, - "grad_norm": 7.531630516052246, - "learning_rate": 0.0001588547591960561, - "loss": 7.718193817138672, - "step": 7600 - }, - { - "epoch": 8.658126957016794, - "grad_norm": 6.9179534912109375, - "learning_rate": 0.00015880058508044856, - "loss": 7.357187652587891, - "step": 7610 - }, - { - "epoch": 8.669513236549957, - "grad_norm": 6.675024509429932, - "learning_rate": 0.000158746410964841, - "loss": 7.010201263427734, - "step": 7620 - }, - { - "epoch": 8.68089951608312, - "grad_norm": 6.622598648071289, - "learning_rate": 0.00015869223684923344, - "loss": 7.162730407714844, - "step": 7630 - }, - { - "epoch": 8.692285795616282, - "grad_norm": 6.835996627807617, - "learning_rate": 0.0001586380627336259, - "loss": 7.303845977783203, - "step": 7640 - }, - { - "epoch": 8.703672075149445, - "grad_norm": 6.367660045623779, - "learning_rate": 0.00015858388861801832, - "loss": 7.625505065917968, - "step": 7650 - }, - { - "epoch": 8.715058354682608, - "grad_norm": 6.169827938079834, - "learning_rate": 0.00015852971450241077, - "loss": 7.09228744506836, - "step": 7660 - }, - { - "epoch": 8.72644463421577, - "grad_norm": 6.215500354766846, - "learning_rate": 0.0001584755403868032, - "loss": 7.060032653808594, - "step": 7670 - }, - { - "epoch": 8.737830913748933, - "grad_norm": 6.325623035430908, - "learning_rate": 0.00015842136627119564, - "loss": 7.31326904296875, - "step": 7680 - }, - { - "epoch": 8.749217193282096, - "grad_norm": 6.559711933135986, - "learning_rate": 0.00015836719215558807, - "loss": 7.409132385253907, - "step": 7690 - }, - { - "epoch": 8.760603472815257, - "grad_norm": 6.366078853607178, - "learning_rate": 0.00015831301803998052, - "loss": 6.915621948242188, - "step": 7700 - }, - { - "epoch": 8.77198975234842, - "grad_norm": 6.734594345092773, - "learning_rate": 0.00015825884392437294, - "loss": 7.271147155761719, - "step": 7710 - }, - { - "epoch": 8.783376031881582, - "grad_norm": 7.609946250915527, - "learning_rate": 0.00015820466980876537, - "loss": 7.408287048339844, - "step": 7720 - }, - { - "epoch": 8.794762311414745, - "grad_norm": 6.976548671722412, - "learning_rate": 0.00015815049569315782, - "loss": 7.269252777099609, - "step": 7730 - }, - { - "epoch": 8.806148590947908, - "grad_norm": 7.052664279937744, - "learning_rate": 0.00015809632157755025, - "loss": 7.457789611816406, - "step": 7740 - }, - { - "epoch": 8.81753487048107, - "grad_norm": 6.683445453643799, - "learning_rate": 0.0001580421474619427, - "loss": 7.239376831054687, - "step": 7750 - }, - { - "epoch": 8.828921150014233, - "grad_norm": 6.740503311157227, - "learning_rate": 0.00015798797334633512, - "loss": 7.014515686035156, - "step": 7760 - }, - { - "epoch": 8.840307429547396, - "grad_norm": 7.012024879455566, - "learning_rate": 0.00015793379923072757, - "loss": 7.423218536376953, - "step": 7770 - }, - { - "epoch": 8.851693709080559, - "grad_norm": 6.79911994934082, - "learning_rate": 0.00015787962511512, - "loss": 7.527141571044922, - "step": 7780 - }, - { - "epoch": 8.86307998861372, - "grad_norm": 6.133242130279541, - "learning_rate": 0.00015782545099951245, - "loss": 7.559020233154297, - "step": 7790 - }, - { - "epoch": 8.874466268146882, - "grad_norm": 6.954835891723633, - "learning_rate": 0.00015777127688390487, - "loss": 7.530062103271485, - "step": 7800 - }, - { - "epoch": 8.885852547680045, - "grad_norm": 6.71362829208374, - "learning_rate": 0.00015771710276829733, - "loss": 7.113396453857422, - "step": 7810 - }, - { - "epoch": 8.897238827213208, - "grad_norm": 6.9403862953186035, - "learning_rate": 0.00015766292865268975, - "loss": 7.578011322021484, - "step": 7820 - }, - { - "epoch": 8.90862510674637, - "grad_norm": 6.492265701293945, - "learning_rate": 0.0001576087545370822, - "loss": 7.283029937744141, - "step": 7830 - }, - { - "epoch": 8.920011386279533, - "grad_norm": 5.762649059295654, - "learning_rate": 0.00015755458042147463, - "loss": 7.412893676757813, - "step": 7840 - }, - { - "epoch": 8.931397665812696, - "grad_norm": 6.935257911682129, - "learning_rate": 0.00015750040630586708, - "loss": 7.5289451599121096, - "step": 7850 - }, - { - "epoch": 8.942783945345859, - "grad_norm": 6.323666095733643, - "learning_rate": 0.0001574462321902595, - "loss": 7.003956604003906, - "step": 7860 - }, - { - "epoch": 8.954170224879022, - "grad_norm": 5.6248779296875, - "learning_rate": 0.00015739205807465193, - "loss": 7.441354370117187, - "step": 7870 - }, - { - "epoch": 8.965556504412183, - "grad_norm": 6.482755661010742, - "learning_rate": 0.00015733788395904438, - "loss": 7.2617744445800785, - "step": 7880 - }, - { - "epoch": 8.976942783945345, - "grad_norm": 6.673778533935547, - "learning_rate": 0.0001572837098434368, - "loss": 7.212700653076172, - "step": 7890 - }, - { - "epoch": 8.988329063478508, - "grad_norm": 6.261040210723877, - "learning_rate": 0.00015722953572782925, - "loss": 7.402249145507812, - "step": 7900 - }, - { - "epoch": 8.99971534301167, - "grad_norm": 6.743113994598389, - "learning_rate": 0.00015717536161222168, - "loss": 7.82423095703125, - "step": 7910 - }, - { - "epoch": 9.010247651579846, - "grad_norm": 6.245288372039795, - "learning_rate": 0.00015712118749661413, - "loss": 6.120718765258789, - "step": 7920 - }, - { - "epoch": 9.021633931113008, - "grad_norm": 6.373956203460693, - "learning_rate": 0.00015706701338100655, - "loss": 6.5073394775390625, - "step": 7930 - }, - { - "epoch": 9.033020210646171, - "grad_norm": 6.782715320587158, - "learning_rate": 0.000157012839265399, - "loss": 6.719621276855468, - "step": 7940 - }, - { - "epoch": 9.044406490179334, - "grad_norm": 6.204227924346924, - "learning_rate": 0.00015695866514979143, - "loss": 6.846994781494141, - "step": 7950 - }, - { - "epoch": 9.055792769712497, - "grad_norm": 5.706500053405762, - "learning_rate": 0.00015690449103418388, - "loss": 6.545850372314453, - "step": 7960 - }, - { - "epoch": 9.06717904924566, - "grad_norm": 6.953368186950684, - "learning_rate": 0.0001568503169185763, - "loss": 6.754498291015625, - "step": 7970 - }, - { - "epoch": 9.078565328778822, - "grad_norm": 7.3118743896484375, - "learning_rate": 0.00015679614280296876, - "loss": 7.14571533203125, - "step": 7980 - }, - { - "epoch": 9.089951608311985, - "grad_norm": 7.408244609832764, - "learning_rate": 0.00015674196868736118, - "loss": 6.966554260253906, - "step": 7990 - }, - { - "epoch": 9.101337887845146, - "grad_norm": 6.550361156463623, - "learning_rate": 0.00015668779457175363, - "loss": 6.683185577392578, - "step": 8000 - }, - { - "epoch": 9.112724167378309, - "grad_norm": 7.23964262008667, - "learning_rate": 0.00015663362045614606, - "loss": 6.666042327880859, - "step": 8010 - }, - { - "epoch": 9.124110446911471, - "grad_norm": 6.447325706481934, - "learning_rate": 0.00015657944634053848, - "loss": 6.605396270751953, - "step": 8020 - }, - { - "epoch": 9.135496726444634, - "grad_norm": 6.252988338470459, - "learning_rate": 0.00015652527222493093, - "loss": 6.713479614257812, - "step": 8030 - }, - { - "epoch": 9.146883005977797, - "grad_norm": 7.02787971496582, - "learning_rate": 0.00015647109810932336, - "loss": 6.7254997253417965, - "step": 8040 - }, - { - "epoch": 9.15826928551096, - "grad_norm": 6.618356704711914, - "learning_rate": 0.0001564169239937158, - "loss": 6.731692504882813, - "step": 8050 - }, - { - "epoch": 9.169655565044122, - "grad_norm": 6.890060901641846, - "learning_rate": 0.00015636274987810824, - "loss": 6.741816711425781, - "step": 8060 - }, - { - "epoch": 9.181041844577285, - "grad_norm": 7.1875901222229, - "learning_rate": 0.0001563085757625007, - "loss": 6.789379119873047, - "step": 8070 - }, - { - "epoch": 9.192428124110448, - "grad_norm": 7.181351184844971, - "learning_rate": 0.0001562544016468931, - "loss": 6.818449401855469, - "step": 8080 - }, - { - "epoch": 9.203814403643609, - "grad_norm": 7.285808086395264, - "learning_rate": 0.00015620022753128556, - "loss": 6.539177703857422, - "step": 8090 - }, - { - "epoch": 9.215200683176771, - "grad_norm": 6.714638710021973, - "learning_rate": 0.00015614605341567801, - "loss": 6.656063842773437, - "step": 8100 - }, - { - "epoch": 9.226586962709934, - "grad_norm": 7.055619716644287, - "learning_rate": 0.00015609187930007044, - "loss": 6.620281982421875, - "step": 8110 - }, - { - "epoch": 9.237973242243097, - "grad_norm": 6.574080467224121, - "learning_rate": 0.0001560377051844629, - "loss": 6.747283172607422, - "step": 8120 - }, - { - "epoch": 9.24935952177626, - "grad_norm": 6.460296630859375, - "learning_rate": 0.00015598353106885531, - "loss": 6.993648529052734, - "step": 8130 - }, - { - "epoch": 9.260745801309422, - "grad_norm": 7.454887866973877, - "learning_rate": 0.00015592935695324774, - "loss": 6.948415374755859, - "step": 8140 - }, - { - "epoch": 9.272132080842585, - "grad_norm": 6.729802131652832, - "learning_rate": 0.00015587518283764016, - "loss": 6.89251937866211, - "step": 8150 - }, - { - "epoch": 9.283518360375748, - "grad_norm": 6.335530757904053, - "learning_rate": 0.00015582100872203262, - "loss": 6.703284454345703, - "step": 8160 - }, - { - "epoch": 9.29490463990891, - "grad_norm": 7.840336322784424, - "learning_rate": 0.00015576683460642504, - "loss": 7.09771957397461, - "step": 8170 - }, - { - "epoch": 9.306290919442072, - "grad_norm": 6.923723220825195, - "learning_rate": 0.0001557126604908175, - "loss": 6.888484954833984, - "step": 8180 - }, - { - "epoch": 9.317677198975234, - "grad_norm": 6.690946578979492, - "learning_rate": 0.00015565848637520992, - "loss": 7.289754486083984, - "step": 8190 - }, - { - "epoch": 9.329063478508397, - "grad_norm": 6.629517555236816, - "learning_rate": 0.00015560431225960237, - "loss": 7.009606170654297, - "step": 8200 - }, - { - "epoch": 9.34044975804156, - "grad_norm": 7.02864933013916, - "learning_rate": 0.00015555013814399482, - "loss": 6.7587532043457035, - "step": 8210 - }, - { - "epoch": 9.351836037574722, - "grad_norm": 6.852596282958984, - "learning_rate": 0.00015549596402838724, - "loss": 6.829173278808594, - "step": 8220 - }, - { - "epoch": 9.363222317107885, - "grad_norm": 6.9407243728637695, - "learning_rate": 0.0001554417899127797, - "loss": 6.788723754882812, - "step": 8230 - }, - { - "epoch": 9.374608596641048, - "grad_norm": 6.7164812088012695, - "learning_rate": 0.00015538761579717212, - "loss": 6.483773040771484, - "step": 8240 - }, - { - "epoch": 9.38599487617421, - "grad_norm": 6.983746528625488, - "learning_rate": 0.00015533344168156457, - "loss": 6.634527587890625, - "step": 8250 - }, - { - "epoch": 9.397381155707373, - "grad_norm": 7.202597618103027, - "learning_rate": 0.000155279267565957, - "loss": 6.860771942138672, - "step": 8260 - }, - { - "epoch": 9.408767435240534, - "grad_norm": 6.925684452056885, - "learning_rate": 0.00015522509345034945, - "loss": 7.078357696533203, - "step": 8270 - }, - { - "epoch": 9.420153714773697, - "grad_norm": 7.451114177703857, - "learning_rate": 0.00015517091933474187, - "loss": 6.643927764892578, - "step": 8280 - }, - { - "epoch": 9.43153999430686, - "grad_norm": 6.825454235076904, - "learning_rate": 0.0001551167452191343, - "loss": 6.680137634277344, - "step": 8290 - }, - { - "epoch": 9.442926273840023, - "grad_norm": 6.374398708343506, - "learning_rate": 0.00015506257110352672, - "loss": 6.5555778503417965, - "step": 8300 - }, - { - "epoch": 9.454312553373185, - "grad_norm": 7.115841388702393, - "learning_rate": 0.00015500839698791917, - "loss": 7.261045837402344, - "step": 8310 - }, - { - "epoch": 9.465698832906348, - "grad_norm": 6.24588680267334, - "learning_rate": 0.00015495422287231162, - "loss": 6.373281860351563, - "step": 8320 - }, - { - "epoch": 9.47708511243951, - "grad_norm": 6.433933258056641, - "learning_rate": 0.00015490004875670405, - "loss": 7.186137390136719, - "step": 8330 - }, - { - "epoch": 9.488471391972674, - "grad_norm": 6.373920440673828, - "learning_rate": 0.0001548458746410965, - "loss": 6.6659690856933596, - "step": 8340 - }, - { - "epoch": 9.499857671505836, - "grad_norm": 6.638841152191162, - "learning_rate": 0.00015479170052548892, - "loss": 6.709719848632813, - "step": 8350 - }, - { - "epoch": 9.511243951038997, - "grad_norm": 7.437965393066406, - "learning_rate": 0.00015473752640988138, - "loss": 6.974462890625, - "step": 8360 - }, - { - "epoch": 9.52263023057216, - "grad_norm": 7.176238059997559, - "learning_rate": 0.0001546833522942738, - "loss": 7.053614807128906, - "step": 8370 - }, - { - "epoch": 9.534016510105323, - "grad_norm": 6.885675430297852, - "learning_rate": 0.00015462917817866625, - "loss": 6.691897583007813, - "step": 8380 - }, - { - "epoch": 9.545402789638485, - "grad_norm": 5.81322717666626, - "learning_rate": 0.00015457500406305868, - "loss": 6.6090576171875, - "step": 8390 - }, - { - "epoch": 9.556789069171648, - "grad_norm": 6.862832069396973, - "learning_rate": 0.00015452082994745113, - "loss": 7.214694976806641, - "step": 8400 - }, - { - "epoch": 9.568175348704811, - "grad_norm": 7.299694538116455, - "learning_rate": 0.00015446665583184355, - "loss": 6.833685302734375, - "step": 8410 - }, - { - "epoch": 9.579561628237974, - "grad_norm": 6.393362522125244, - "learning_rate": 0.000154412481716236, - "loss": 7.033847045898438, - "step": 8420 - }, - { - "epoch": 9.590947907771136, - "grad_norm": 7.435060977935791, - "learning_rate": 0.00015435830760062843, - "loss": 6.689753723144531, - "step": 8430 - }, - { - "epoch": 9.602334187304299, - "grad_norm": 7.483737468719482, - "learning_rate": 0.00015430413348502085, - "loss": 7.141274261474609, - "step": 8440 - }, - { - "epoch": 9.61372046683746, - "grad_norm": 6.771705150604248, - "learning_rate": 0.0001542499593694133, - "loss": 6.743639373779297, - "step": 8450 - }, - { - "epoch": 9.625106746370623, - "grad_norm": 6.806461334228516, - "learning_rate": 0.00015419578525380573, - "loss": 6.662245178222657, - "step": 8460 - }, - { - "epoch": 9.636493025903786, - "grad_norm": 6.6654372215271, - "learning_rate": 0.00015414161113819818, - "loss": 6.932222747802735, - "step": 8470 - }, - { - "epoch": 9.647879305436948, - "grad_norm": 6.507349491119385, - "learning_rate": 0.0001540874370225906, - "loss": 7.092259216308594, - "step": 8480 - }, - { - "epoch": 9.659265584970111, - "grad_norm": 7.4099202156066895, - "learning_rate": 0.00015403326290698306, - "loss": 6.74954833984375, - "step": 8490 - }, - { - "epoch": 9.670651864503274, - "grad_norm": 6.947366714477539, - "learning_rate": 0.00015397908879137548, - "loss": 6.710967254638672, - "step": 8500 - }, - { - "epoch": 9.682038144036436, - "grad_norm": 7.391485214233398, - "learning_rate": 0.00015392491467576793, - "loss": 6.349737167358398, - "step": 8510 - }, - { - "epoch": 9.6934244235696, - "grad_norm": 6.834575653076172, - "learning_rate": 0.00015387074056016036, - "loss": 6.986430358886719, - "step": 8520 - }, - { - "epoch": 9.704810703102762, - "grad_norm": 6.834963321685791, - "learning_rate": 0.0001538165664445528, - "loss": 6.768232727050782, - "step": 8530 - }, - { - "epoch": 9.716196982635923, - "grad_norm": 6.545167446136475, - "learning_rate": 0.00015376239232894526, - "loss": 7.306761932373047, - "step": 8540 - }, - { - "epoch": 9.727583262169086, - "grad_norm": 6.159801959991455, - "learning_rate": 0.00015370821821333768, - "loss": 6.77575454711914, - "step": 8550 - }, - { - "epoch": 9.738969541702248, - "grad_norm": 6.571531772613525, - "learning_rate": 0.0001536540440977301, - "loss": 6.770185089111328, - "step": 8560 - }, - { - "epoch": 9.750355821235411, - "grad_norm": 7.542463779449463, - "learning_rate": 0.00015359986998212253, - "loss": 7.183005523681641, - "step": 8570 - }, - { - "epoch": 9.761742100768574, - "grad_norm": 6.521090984344482, - "learning_rate": 0.00015354569586651499, - "loss": 6.722921752929688, - "step": 8580 - }, - { - "epoch": 9.773128380301737, - "grad_norm": 6.779551029205322, - "learning_rate": 0.0001534915217509074, - "loss": 6.963375854492187, - "step": 8590 - }, - { - "epoch": 9.7845146598349, - "grad_norm": 7.098196029663086, - "learning_rate": 0.00015343734763529986, - "loss": 7.068116760253906, - "step": 8600 - }, - { - "epoch": 9.795900939368062, - "grad_norm": 6.990663528442383, - "learning_rate": 0.00015338317351969229, - "loss": 6.641743469238281, - "step": 8610 - }, - { - "epoch": 9.807287218901225, - "grad_norm": 6.753698348999023, - "learning_rate": 0.00015332899940408474, - "loss": 6.880230712890625, - "step": 8620 - }, - { - "epoch": 9.818673498434386, - "grad_norm": 7.067351341247559, - "learning_rate": 0.00015327482528847716, - "loss": 6.862619781494141, - "step": 8630 - }, - { - "epoch": 9.830059777967548, - "grad_norm": 7.574794292449951, - "learning_rate": 0.0001532206511728696, - "loss": 6.817996978759766, - "step": 8640 - }, - { - "epoch": 9.841446057500711, - "grad_norm": 6.906538963317871, - "learning_rate": 0.00015316647705726204, - "loss": 6.774374389648438, - "step": 8650 - }, - { - "epoch": 9.852832337033874, - "grad_norm": 6.665681838989258, - "learning_rate": 0.0001531123029416545, - "loss": 6.8718719482421875, - "step": 8660 - }, - { - "epoch": 9.864218616567037, - "grad_norm": 6.376792907714844, - "learning_rate": 0.00015305812882604694, - "loss": 6.672955322265625, - "step": 8670 - }, - { - "epoch": 9.8756048961002, - "grad_norm": 7.075804710388184, - "learning_rate": 0.00015300395471043937, - "loss": 7.0490264892578125, - "step": 8680 - }, - { - "epoch": 9.886991175633362, - "grad_norm": 6.787551403045654, - "learning_rate": 0.00015294978059483182, - "loss": 7.223753356933594, - "step": 8690 - }, - { - "epoch": 9.898377455166525, - "grad_norm": 6.694747447967529, - "learning_rate": 0.00015289560647922424, - "loss": 6.798531341552734, - "step": 8700 - }, - { - "epoch": 9.909763734699688, - "grad_norm": 6.917640686035156, - "learning_rate": 0.00015284143236361667, - "loss": 7.0906829833984375, - "step": 8710 - }, - { - "epoch": 9.921150014232849, - "grad_norm": 7.117720127105713, - "learning_rate": 0.0001527872582480091, - "loss": 7.004290771484375, - "step": 8720 - }, - { - "epoch": 9.932536293766011, - "grad_norm": 7.377773761749268, - "learning_rate": 0.00015273308413240154, - "loss": 6.975653839111328, - "step": 8730 - }, - { - "epoch": 9.943922573299174, - "grad_norm": 6.890756130218506, - "learning_rate": 0.00015267891001679397, - "loss": 6.900422668457031, - "step": 8740 - }, - { - "epoch": 9.955308852832337, - "grad_norm": 7.174078941345215, - "learning_rate": 0.00015262473590118642, - "loss": 7.064826202392578, - "step": 8750 - }, - { - "epoch": 9.9666951323655, - "grad_norm": 6.936395168304443, - "learning_rate": 0.00015257056178557884, - "loss": 6.7657325744628904, - "step": 8760 - }, - { - "epoch": 9.978081411898662, - "grad_norm": 6.216085433959961, - "learning_rate": 0.0001525163876699713, - "loss": 6.81783447265625, - "step": 8770 - }, - { - "epoch": 9.989467691431825, - "grad_norm": 6.8290839195251465, - "learning_rate": 0.00015246221355436375, - "loss": 6.718660736083985, - "step": 8780 - }, - { - "epoch": 10.0, - "grad_norm": 5.303323745727539, - "learning_rate": 0.00015240803943875617, - "loss": 6.455754089355469, - "step": 8790 - }, - { - "epoch": 10.011386279533163, - "grad_norm": 6.515944480895996, - "learning_rate": 0.00015235386532314862, - "loss": 6.108221435546875, - "step": 8800 - }, - { - "epoch": 10.022772559066325, - "grad_norm": 6.763042449951172, - "learning_rate": 0.00015229969120754105, - "loss": 6.451548004150391, - "step": 8810 - }, - { - "epoch": 10.034158838599488, - "grad_norm": 7.289494037628174, - "learning_rate": 0.0001522455170919335, - "loss": 6.422400665283203, - "step": 8820 - }, - { - "epoch": 10.045545118132651, - "grad_norm": 6.849950790405273, - "learning_rate": 0.00015219134297632592, - "loss": 6.369514083862304, - "step": 8830 - }, - { - "epoch": 10.056931397665812, - "grad_norm": 6.839559555053711, - "learning_rate": 0.00015213716886071835, - "loss": 6.199007415771485, - "step": 8840 - }, - { - "epoch": 10.068317677198975, - "grad_norm": 7.444436550140381, - "learning_rate": 0.0001520829947451108, - "loss": 6.206901931762696, - "step": 8850 - }, - { - "epoch": 10.079703956732137, - "grad_norm": 7.2634782791137695, - "learning_rate": 0.00015202882062950322, - "loss": 6.411515808105468, - "step": 8860 - }, - { - "epoch": 10.0910902362653, - "grad_norm": 7.075826168060303, - "learning_rate": 0.00015197464651389565, - "loss": 6.321298980712891, - "step": 8870 - }, - { - "epoch": 10.102476515798463, - "grad_norm": 8.332730293273926, - "learning_rate": 0.0001519204723982881, - "loss": 6.658344268798828, - "step": 8880 - }, - { - "epoch": 10.113862795331626, - "grad_norm": 6.364902019500732, - "learning_rate": 0.00015186629828268055, - "loss": 6.109456634521484, - "step": 8890 - }, - { - "epoch": 10.125249074864788, - "grad_norm": 7.3898138999938965, - "learning_rate": 0.00015181212416707297, - "loss": 6.706474304199219, - "step": 8900 - }, - { - "epoch": 10.136635354397951, - "grad_norm": 7.5353264808654785, - "learning_rate": 0.00015175795005146543, - "loss": 6.191032791137696, - "step": 8910 - }, - { - "epoch": 10.148021633931114, - "grad_norm": 6.525182723999023, - "learning_rate": 0.00015170377593585785, - "loss": 6.050387191772461, - "step": 8920 - }, - { - "epoch": 10.159407913464275, - "grad_norm": 6.768884658813477, - "learning_rate": 0.0001516496018202503, - "loss": 6.448866271972657, - "step": 8930 - }, - { - "epoch": 10.170794192997437, - "grad_norm": 6.508273601531982, - "learning_rate": 0.00015159542770464273, - "loss": 6.556841278076172, - "step": 8940 - }, - { - "epoch": 10.1821804725306, - "grad_norm": 7.144713401794434, - "learning_rate": 0.00015154125358903518, - "loss": 6.7161407470703125, - "step": 8950 - }, - { - "epoch": 10.193566752063763, - "grad_norm": 6.919196128845215, - "learning_rate": 0.0001514870794734276, - "loss": 6.426815032958984, - "step": 8960 - }, - { - "epoch": 10.204953031596926, - "grad_norm": 6.691727161407471, - "learning_rate": 0.00015143290535782005, - "loss": 6.2255096435546875, - "step": 8970 - }, - { - "epoch": 10.216339311130088, - "grad_norm": 6.866335391998291, - "learning_rate": 0.00015137873124221248, - "loss": 6.1784015655517575, - "step": 8980 - }, - { - "epoch": 10.227725590663251, - "grad_norm": 6.891946792602539, - "learning_rate": 0.0001513245571266049, - "loss": 6.152831268310547, - "step": 8990 - }, - { - "epoch": 10.239111870196414, - "grad_norm": 6.802209854125977, - "learning_rate": 0.00015127038301099736, - "loss": 6.270240783691406, - "step": 9000 - }, - { - "epoch": 10.250498149729577, - "grad_norm": 7.624642372131348, - "learning_rate": 0.00015121620889538978, - "loss": 6.247557067871094, - "step": 9010 - }, - { - "epoch": 10.261884429262738, - "grad_norm": 7.880180835723877, - "learning_rate": 0.00015116203477978223, - "loss": 6.371080017089843, - "step": 9020 - }, - { - "epoch": 10.2732707087959, - "grad_norm": 6.9289116859436035, - "learning_rate": 0.00015110786066417466, - "loss": 6.2116645812988285, - "step": 9030 - }, - { - "epoch": 10.284656988329063, - "grad_norm": 7.059987545013428, - "learning_rate": 0.0001510536865485671, - "loss": 6.152032852172852, - "step": 9040 - }, - { - "epoch": 10.296043267862226, - "grad_norm": 7.274111270904541, - "learning_rate": 0.00015099951243295953, - "loss": 6.412078857421875, - "step": 9050 - }, - { - "epoch": 10.307429547395389, - "grad_norm": 6.609608173370361, - "learning_rate": 0.00015094533831735198, - "loss": 6.340757369995117, - "step": 9060 - }, - { - "epoch": 10.318815826928551, - "grad_norm": 6.39749002456665, - "learning_rate": 0.0001508911642017444, - "loss": 6.413553619384766, - "step": 9070 - }, - { - "epoch": 10.330202106461714, - "grad_norm": 6.884727478027344, - "learning_rate": 0.00015083699008613686, - "loss": 6.569889068603516, - "step": 9080 - }, - { - "epoch": 10.341588385994877, - "grad_norm": 7.041878700256348, - "learning_rate": 0.00015078281597052928, - "loss": 6.179785537719726, - "step": 9090 - }, - { - "epoch": 10.35297466552804, - "grad_norm": 6.370310306549072, - "learning_rate": 0.00015072864185492174, - "loss": 6.136286926269531, - "step": 9100 - }, - { - "epoch": 10.3643609450612, - "grad_norm": 7.227022647857666, - "learning_rate": 0.0001506744677393142, - "loss": 6.1931709289550785, - "step": 9110 - }, - { - "epoch": 10.375747224594363, - "grad_norm": 6.813343048095703, - "learning_rate": 0.0001506202936237066, - "loss": 6.568285369873047, - "step": 9120 - }, - { - "epoch": 10.387133504127526, - "grad_norm": 6.486794948577881, - "learning_rate": 0.00015056611950809904, - "loss": 6.357745361328125, - "step": 9130 - }, - { - "epoch": 10.398519783660689, - "grad_norm": 7.465872764587402, - "learning_rate": 0.00015051194539249146, - "loss": 6.26553955078125, - "step": 9140 - }, - { - "epoch": 10.409906063193851, - "grad_norm": 7.438695907592773, - "learning_rate": 0.0001504577712768839, - "loss": 6.602199554443359, - "step": 9150 - }, - { - "epoch": 10.421292342727014, - "grad_norm": 8.035082817077637, - "learning_rate": 0.00015040359716127634, - "loss": 6.462114715576172, - "step": 9160 - }, - { - "epoch": 10.432678622260177, - "grad_norm": 6.581029891967773, - "learning_rate": 0.0001503494230456688, - "loss": 6.531999206542968, - "step": 9170 - }, - { - "epoch": 10.44406490179334, - "grad_norm": 7.6987175941467285, - "learning_rate": 0.0001502952489300612, - "loss": 6.53165283203125, - "step": 9180 - }, - { - "epoch": 10.455451181326502, - "grad_norm": 5.949412822723389, - "learning_rate": 0.00015024107481445366, - "loss": 5.95707893371582, - "step": 9190 - }, - { - "epoch": 10.466837460859663, - "grad_norm": 7.387345314025879, - "learning_rate": 0.0001501869006988461, - "loss": 6.544392395019531, - "step": 9200 - }, - { - "epoch": 10.478223740392826, - "grad_norm": 7.590228080749512, - "learning_rate": 0.00015013272658323854, - "loss": 6.59503173828125, - "step": 9210 - }, - { - "epoch": 10.489610019925989, - "grad_norm": 6.827988147735596, - "learning_rate": 0.000150078552467631, - "loss": 6.5579277038574215, - "step": 9220 - }, - { - "epoch": 10.500996299459151, - "grad_norm": 6.989532470703125, - "learning_rate": 0.00015002437835202342, - "loss": 6.056536102294922, - "step": 9230 - }, - { - "epoch": 10.512382578992314, - "grad_norm": 6.390324592590332, - "learning_rate": 0.00014997020423641587, - "loss": 6.437796783447266, - "step": 9240 - }, - { - "epoch": 10.523768858525477, - "grad_norm": 7.057545185089111, - "learning_rate": 0.0001499160301208083, - "loss": 6.424283599853515, - "step": 9250 - }, - { - "epoch": 10.53515513805864, - "grad_norm": 7.236074924468994, - "learning_rate": 0.00014986185600520072, - "loss": 6.465802001953125, - "step": 9260 - }, - { - "epoch": 10.546541417591802, - "grad_norm": 7.380622863769531, - "learning_rate": 0.00014980768188959314, - "loss": 6.5994873046875, - "step": 9270 - }, - { - "epoch": 10.557927697124965, - "grad_norm": 6.613060474395752, - "learning_rate": 0.0001497535077739856, - "loss": 6.399759292602539, - "step": 9280 - }, - { - "epoch": 10.569313976658126, - "grad_norm": 7.060276508331299, - "learning_rate": 0.00014969933365837802, - "loss": 6.557682800292969, - "step": 9290 - }, - { - "epoch": 10.580700256191289, - "grad_norm": 6.692377090454102, - "learning_rate": 0.00014964515954277047, - "loss": 6.2362712860107425, - "step": 9300 - }, - { - "epoch": 10.592086535724452, - "grad_norm": 7.573443412780762, - "learning_rate": 0.0001495909854271629, - "loss": 6.369208908081054, - "step": 9310 - }, - { - "epoch": 10.603472815257614, - "grad_norm": 7.296620845794678, - "learning_rate": 0.00014953681131155534, - "loss": 6.499317932128906, - "step": 9320 - }, - { - "epoch": 10.614859094790777, - "grad_norm": 7.194809436798096, - "learning_rate": 0.00014948263719594777, - "loss": 6.53308334350586, - "step": 9330 - }, - { - "epoch": 10.62624537432394, - "grad_norm": 7.089186191558838, - "learning_rate": 0.00014942846308034022, - "loss": 6.357146835327148, - "step": 9340 - }, - { - "epoch": 10.637631653857103, - "grad_norm": 6.138644695281982, - "learning_rate": 0.00014937428896473267, - "loss": 6.987527465820312, - "step": 9350 - }, - { - "epoch": 10.649017933390265, - "grad_norm": 6.458044528961182, - "learning_rate": 0.0001493201148491251, - "loss": 6.447171783447265, - "step": 9360 - }, - { - "epoch": 10.660404212923428, - "grad_norm": 6.982960224151611, - "learning_rate": 0.00014926594073351755, - "loss": 6.367984390258789, - "step": 9370 - }, - { - "epoch": 10.67179049245659, - "grad_norm": 7.014771461486816, - "learning_rate": 0.00014921176661790997, - "loss": 6.597218322753906, - "step": 9380 - }, - { - "epoch": 10.683176771989752, - "grad_norm": 7.192128658294678, - "learning_rate": 0.00014915759250230242, - "loss": 6.468855285644532, - "step": 9390 - }, - { - "epoch": 10.694563051522914, - "grad_norm": 6.968006134033203, - "learning_rate": 0.00014910341838669485, - "loss": 6.514220428466797, - "step": 9400 - }, - { - "epoch": 10.705949331056077, - "grad_norm": 7.2908220291137695, - "learning_rate": 0.00014904924427108727, - "loss": 5.984500885009766, - "step": 9410 - }, - { - "epoch": 10.71733561058924, - "grad_norm": 7.383593559265137, - "learning_rate": 0.0001489950701554797, - "loss": 6.390668869018555, - "step": 9420 - }, - { - "epoch": 10.728721890122403, - "grad_norm": 7.428356170654297, - "learning_rate": 0.00014894089603987215, - "loss": 6.504000854492188, - "step": 9430 - }, - { - "epoch": 10.740108169655565, - "grad_norm": 6.935097694396973, - "learning_rate": 0.00014888672192426457, - "loss": 6.668525695800781, - "step": 9440 - }, - { - "epoch": 10.751494449188728, - "grad_norm": 7.8939595222473145, - "learning_rate": 0.00014883254780865703, - "loss": 6.7964630126953125, - "step": 9450 - }, - { - "epoch": 10.76288072872189, - "grad_norm": 7.261744976043701, - "learning_rate": 0.00014877837369304948, - "loss": 6.494899749755859, - "step": 9460 - }, - { - "epoch": 10.774267008255052, - "grad_norm": 7.5321044921875, - "learning_rate": 0.0001487241995774419, - "loss": 6.319461441040039, - "step": 9470 - }, - { - "epoch": 10.785653287788215, - "grad_norm": 7.152090072631836, - "learning_rate": 0.00014867002546183435, - "loss": 6.386759948730469, - "step": 9480 - }, - { - "epoch": 10.797039567321377, - "grad_norm": 6.318842887878418, - "learning_rate": 0.00014861585134622678, - "loss": 6.789165496826172, - "step": 9490 - }, - { - "epoch": 10.80842584685454, - "grad_norm": 7.488574981689453, - "learning_rate": 0.00014856167723061923, - "loss": 6.679385375976563, - "step": 9500 - }, - { - "epoch": 10.819812126387703, - "grad_norm": 7.495085716247559, - "learning_rate": 0.00014851292052657242, - "loss": 6.523779296875, - "step": 9510 - }, - { - "epoch": 10.831198405920865, - "grad_norm": 6.9265522956848145, - "learning_rate": 0.00014845874641096485, - "loss": 6.4424797058105465, - "step": 9520 - }, - { - "epoch": 10.842584685454028, - "grad_norm": 6.2375359535217285, - "learning_rate": 0.0001484045722953573, - "loss": 6.325106811523438, - "step": 9530 - }, - { - "epoch": 10.853970964987191, - "grad_norm": 7.461876392364502, - "learning_rate": 0.00014835039817974973, - "loss": 6.4818115234375, - "step": 9540 - }, - { - "epoch": 10.865357244520354, - "grad_norm": 6.406781196594238, - "learning_rate": 0.00014829622406414218, - "loss": 6.454816436767578, - "step": 9550 - }, - { - "epoch": 10.876743524053516, - "grad_norm": 6.492645263671875, - "learning_rate": 0.0001482420499485346, - "loss": 6.191008377075195, - "step": 9560 - }, - { - "epoch": 10.888129803586677, - "grad_norm": 7.3028645515441895, - "learning_rate": 0.00014818787583292703, - "loss": 6.245122909545898, - "step": 9570 - }, - { - "epoch": 10.89951608311984, - "grad_norm": 6.861218452453613, - "learning_rate": 0.00014813370171731945, - "loss": 6.476416778564453, - "step": 9580 - }, - { - "epoch": 10.910902362653003, - "grad_norm": 7.525650501251221, - "learning_rate": 0.0001480795276017119, - "loss": 6.472120666503907, - "step": 9590 - }, - { - "epoch": 10.922288642186166, - "grad_norm": 7.155680179595947, - "learning_rate": 0.00014802535348610435, - "loss": 6.589835357666016, - "step": 9600 - }, - { - "epoch": 10.933674921719328, - "grad_norm": 7.079594612121582, - "learning_rate": 0.00014797117937049678, - "loss": 6.457518005371094, - "step": 9610 - }, - { - "epoch": 10.945061201252491, - "grad_norm": 7.258676052093506, - "learning_rate": 0.00014791700525488923, - "loss": 6.361143112182617, - "step": 9620 - }, - { - "epoch": 10.956447480785654, - "grad_norm": 7.055770397186279, - "learning_rate": 0.00014786283113928165, - "loss": 6.50369873046875, - "step": 9630 - }, - { - "epoch": 10.967833760318817, - "grad_norm": 6.666393756866455, - "learning_rate": 0.0001478086570236741, - "loss": 6.464437866210938, - "step": 9640 - }, - { - "epoch": 10.979220039851977, - "grad_norm": 6.598408222198486, - "learning_rate": 0.00014775448290806653, - "loss": 6.123414993286133, - "step": 9650 - }, - { - "epoch": 10.99060631938514, - "grad_norm": 6.270415306091309, - "learning_rate": 0.00014770030879245898, - "loss": 6.181151580810547, - "step": 9660 - }, - { - "epoch": 11.001138627953317, - "grad_norm": 6.535286903381348, - "learning_rate": 0.0001476461346768514, - "loss": 5.540151977539063, - "step": 9670 - }, - { - "epoch": 11.012524907486478, - "grad_norm": 6.144158363342285, - "learning_rate": 0.00014759196056124386, - "loss": 5.872829818725586, - "step": 9680 - }, - { - "epoch": 11.02391118701964, - "grad_norm": 7.178324222564697, - "learning_rate": 0.00014753778644563628, - "loss": 6.154612350463867, - "step": 9690 - }, - { - "epoch": 11.035297466552803, - "grad_norm": 6.866833209991455, - "learning_rate": 0.0001474836123300287, - "loss": 5.634689712524414, - "step": 9700 - }, - { - "epoch": 11.046683746085966, - "grad_norm": 7.015516757965088, - "learning_rate": 0.00014742943821442116, - "loss": 5.930116271972656, - "step": 9710 - }, - { - "epoch": 11.058070025619129, - "grad_norm": 7.600584983825684, - "learning_rate": 0.00014737526409881358, - "loss": 5.9960792541503904, - "step": 9720 - }, - { - "epoch": 11.069456305152292, - "grad_norm": 7.366182804107666, - "learning_rate": 0.00014732108998320603, - "loss": 5.631900787353516, - "step": 9730 - }, - { - "epoch": 11.080842584685454, - "grad_norm": 7.046829700469971, - "learning_rate": 0.00014726691586759846, - "loss": 6.252207183837891, - "step": 9740 - }, - { - "epoch": 11.092228864218617, - "grad_norm": 6.92673397064209, - "learning_rate": 0.0001472127417519909, - "loss": 6.086066818237304, - "step": 9750 - }, - { - "epoch": 11.10361514375178, - "grad_norm": 9.307673454284668, - "learning_rate": 0.00014715856763638333, - "loss": 6.022590255737304, - "step": 9760 - }, - { - "epoch": 11.11500142328494, - "grad_norm": 7.187988758087158, - "learning_rate": 0.00014710439352077579, - "loss": 5.973920440673828, - "step": 9770 - }, - { - "epoch": 11.126387702818104, - "grad_norm": 7.109073638916016, - "learning_rate": 0.0001470502194051682, - "loss": 5.864291000366211, - "step": 9780 - }, - { - "epoch": 11.137773982351266, - "grad_norm": 7.3360276222229, - "learning_rate": 0.00014699604528956066, - "loss": 6.124382781982422, - "step": 9790 - }, - { - "epoch": 11.149160261884429, - "grad_norm": 7.7672343254089355, - "learning_rate": 0.0001469418711739531, - "loss": 6.148440170288086, - "step": 9800 - }, - { - "epoch": 11.160546541417592, - "grad_norm": 6.8055100440979, - "learning_rate": 0.00014688769705834554, - "loss": 5.931351852416992, - "step": 9810 - }, - { - "epoch": 11.171932820950754, - "grad_norm": 7.583737373352051, - "learning_rate": 0.000146833522942738, - "loss": 5.668278503417969, - "step": 9820 - }, - { - "epoch": 11.183319100483917, - "grad_norm": 7.5180463790893555, - "learning_rate": 0.00014677934882713041, - "loss": 6.020656585693359, - "step": 9830 - }, - { - "epoch": 11.19470538001708, - "grad_norm": 7.665243625640869, - "learning_rate": 0.00014672517471152284, - "loss": 5.651744842529297, - "step": 9840 - }, - { - "epoch": 11.206091659550243, - "grad_norm": 7.284232139587402, - "learning_rate": 0.00014667100059591526, - "loss": 5.927288818359375, - "step": 9850 - }, - { - "epoch": 11.217477939083404, - "grad_norm": 6.677280426025391, - "learning_rate": 0.00014661682648030772, - "loss": 5.800758743286133, - "step": 9860 - }, - { - "epoch": 11.228864218616566, - "grad_norm": 6.858931064605713, - "learning_rate": 0.00014656265236470014, - "loss": 5.876351928710937, - "step": 9870 - }, - { - "epoch": 11.240250498149729, - "grad_norm": 6.993981838226318, - "learning_rate": 0.0001465084782490926, - "loss": 5.805530166625976, - "step": 9880 - }, - { - "epoch": 11.251636777682892, - "grad_norm": 7.795031547546387, - "learning_rate": 0.00014645430413348502, - "loss": 6.2553356170654295, - "step": 9890 - }, - { - "epoch": 11.263023057216055, - "grad_norm": 7.493925094604492, - "learning_rate": 0.00014640013001787747, - "loss": 6.046601867675781, - "step": 9900 - }, - { - "epoch": 11.274409336749217, - "grad_norm": 7.555092811584473, - "learning_rate": 0.0001463459559022699, - "loss": 5.973406982421875, - "step": 9910 - }, - { - "epoch": 11.28579561628238, - "grad_norm": 6.385543346405029, - "learning_rate": 0.00014629178178666234, - "loss": 6.176846694946289, - "step": 9920 - }, - { - "epoch": 11.297181895815543, - "grad_norm": 7.017746448516846, - "learning_rate": 0.00014623760767105477, - "loss": 6.001755142211914, - "step": 9930 - }, - { - "epoch": 11.308568175348706, - "grad_norm": 7.1694416999816895, - "learning_rate": 0.00014618343355544722, - "loss": 6.172482299804687, - "step": 9940 - }, - { - "epoch": 11.319954454881866, - "grad_norm": 7.279458999633789, - "learning_rate": 0.00014612925943983967, - "loss": 6.369392013549804, - "step": 9950 - }, - { - "epoch": 11.33134073441503, - "grad_norm": 6.9396772384643555, - "learning_rate": 0.0001460750853242321, - "loss": 5.813999176025391, - "step": 9960 - }, - { - "epoch": 11.342727013948192, - "grad_norm": 7.31268310546875, - "learning_rate": 0.00014602091120862455, - "loss": 5.91674575805664, - "step": 9970 - }, - { - "epoch": 11.354113293481355, - "grad_norm": 6.88545560836792, - "learning_rate": 0.00014596673709301697, - "loss": 5.994848251342773, - "step": 9980 - }, - { - "epoch": 11.365499573014517, - "grad_norm": 7.708160400390625, - "learning_rate": 0.0001459125629774094, - "loss": 5.773896408081055, - "step": 9990 - }, - { - "epoch": 11.37688585254768, - "grad_norm": 7.004419803619385, - "learning_rate": 0.00014585838886180182, - "loss": 5.685453796386719, - "step": 10000 - }, - { - "epoch": 11.388272132080843, - "grad_norm": 6.840615749359131, - "learning_rate": 0.00014580421474619427, - "loss": 5.64319953918457, - "step": 10010 - }, - { - "epoch": 11.399658411614006, - "grad_norm": 7.056737899780273, - "learning_rate": 0.0001457500406305867, - "loss": 5.996237945556641, - "step": 10020 - }, - { - "epoch": 11.411044691147168, - "grad_norm": 7.051031589508057, - "learning_rate": 0.00014569586651497915, - "loss": 6.065059661865234, - "step": 10030 - }, - { - "epoch": 11.422430970680331, - "grad_norm": 6.999020576477051, - "learning_rate": 0.00014564169239937157, - "loss": 5.551327896118164, - "step": 10040 - }, - { - "epoch": 11.433817250213492, - "grad_norm": 7.3235650062561035, - "learning_rate": 0.00014558751828376402, - "loss": 5.727095031738282, - "step": 10050 - }, - { - "epoch": 11.445203529746655, - "grad_norm": 6.848404884338379, - "learning_rate": 0.00014553334416815648, - "loss": 6.206461334228516, - "step": 10060 - }, - { - "epoch": 11.456589809279818, - "grad_norm": 6.96482515335083, - "learning_rate": 0.0001454791700525489, - "loss": 6.19579963684082, - "step": 10070 - }, - { - "epoch": 11.46797608881298, - "grad_norm": 7.321053981781006, - "learning_rate": 0.00014542499593694135, - "loss": 6.439669799804688, - "step": 10080 - }, - { - "epoch": 11.479362368346143, - "grad_norm": 6.996761798858643, - "learning_rate": 0.00014537082182133378, - "loss": 5.96948356628418, - "step": 10090 - }, - { - "epoch": 11.490748647879306, - "grad_norm": 6.9859466552734375, - "learning_rate": 0.00014531664770572623, - "loss": 5.9932861328125, - "step": 10100 - }, - { - "epoch": 11.502134927412468, - "grad_norm": 6.946524620056152, - "learning_rate": 0.00014526247359011865, - "loss": 5.791952514648438, - "step": 10110 - }, - { - "epoch": 11.513521206945631, - "grad_norm": 7.48444938659668, - "learning_rate": 0.00014520829947451108, - "loss": 6.16157112121582, - "step": 10120 - }, - { - "epoch": 11.524907486478792, - "grad_norm": 7.355890274047852, - "learning_rate": 0.0001451541253589035, - "loss": 5.91258659362793, - "step": 10130 - }, - { - "epoch": 11.536293766011955, - "grad_norm": 7.450939178466797, - "learning_rate": 0.00014509995124329595, - "loss": 6.787471008300781, - "step": 10140 - }, - { - "epoch": 11.547680045545118, - "grad_norm": 7.768852710723877, - "learning_rate": 0.00014504577712768838, - "loss": 6.156349563598633, - "step": 10150 - }, - { - "epoch": 11.55906632507828, - "grad_norm": 7.6208953857421875, - "learning_rate": 0.00014499160301208083, - "loss": 6.0792900085449215, - "step": 10160 - }, - { - "epoch": 11.570452604611443, - "grad_norm": 6.2815632820129395, - "learning_rate": 0.00014493742889647328, - "loss": 5.962456130981446, - "step": 10170 - }, - { - "epoch": 11.581838884144606, - "grad_norm": 7.529908180236816, - "learning_rate": 0.0001448832547808657, - "loss": 6.132223510742188, - "step": 10180 - }, - { - "epoch": 11.593225163677769, - "grad_norm": 7.08761739730835, - "learning_rate": 0.00014482908066525816, - "loss": 6.127777099609375, - "step": 10190 - }, - { - "epoch": 11.604611443210931, - "grad_norm": 8.239002227783203, - "learning_rate": 0.00014477490654965058, - "loss": 5.923508834838867, - "step": 10200 - }, - { - "epoch": 11.615997722744094, - "grad_norm": 7.077022552490234, - "learning_rate": 0.00014472073243404303, - "loss": 5.75263786315918, - "step": 10210 - }, - { - "epoch": 11.627384002277257, - "grad_norm": 7.765398979187012, - "learning_rate": 0.00014466655831843546, - "loss": 6.193927001953125, - "step": 10220 - }, - { - "epoch": 11.638770281810418, - "grad_norm": 7.009711265563965, - "learning_rate": 0.0001446123842028279, - "loss": 5.800412368774414, - "step": 10230 - }, - { - "epoch": 11.65015656134358, - "grad_norm": 7.33494234085083, - "learning_rate": 0.00014455821008722033, - "loss": 5.910943222045899, - "step": 10240 - }, - { - "epoch": 11.661542840876743, - "grad_norm": 7.698012351989746, - "learning_rate": 0.00014450403597161278, - "loss": 6.299586486816406, - "step": 10250 - }, - { - "epoch": 11.672929120409906, - "grad_norm": 7.655066967010498, - "learning_rate": 0.0001444498618560052, - "loss": 6.288556671142578, - "step": 10260 - }, - { - "epoch": 11.684315399943069, - "grad_norm": 7.222828388214111, - "learning_rate": 0.00014439568774039763, - "loss": 5.758666610717773, - "step": 10270 - }, - { - "epoch": 11.695701679476231, - "grad_norm": 7.702454566955566, - "learning_rate": 0.00014434151362479008, - "loss": 6.061179733276367, - "step": 10280 - }, - { - "epoch": 11.707087959009394, - "grad_norm": 8.399511337280273, - "learning_rate": 0.0001442873395091825, - "loss": 6.088422012329102, - "step": 10290 - }, - { - "epoch": 11.718474238542557, - "grad_norm": 6.886438846588135, - "learning_rate": 0.00014423316539357496, - "loss": 6.215003585815429, - "step": 10300 - }, - { - "epoch": 11.729860518075718, - "grad_norm": 6.97210168838501, - "learning_rate": 0.00014417899127796739, - "loss": 5.770806121826172, - "step": 10310 - }, - { - "epoch": 11.74124679760888, - "grad_norm": 6.872792720794678, - "learning_rate": 0.00014412481716235984, - "loss": 6.1828559875488285, - "step": 10320 - }, - { - "epoch": 11.752633077142043, - "grad_norm": 7.779046535491943, - "learning_rate": 0.00014407064304675226, - "loss": 5.819805908203125, - "step": 10330 - }, - { - "epoch": 11.764019356675206, - "grad_norm": 6.890455722808838, - "learning_rate": 0.0001440164689311447, - "loss": 5.905824279785156, - "step": 10340 - }, - { - "epoch": 11.775405636208369, - "grad_norm": 7.692152976989746, - "learning_rate": 0.00014396229481553714, - "loss": 5.950515365600586, - "step": 10350 - }, - { - "epoch": 11.786791915741532, - "grad_norm": 7.703096866607666, - "learning_rate": 0.0001439081206999296, - "loss": 6.3083740234375, - "step": 10360 - }, - { - "epoch": 11.798178195274694, - "grad_norm": 7.19696044921875, - "learning_rate": 0.00014385394658432201, - "loss": 6.3550971984863285, - "step": 10370 - }, - { - "epoch": 11.809564474807857, - "grad_norm": 6.914282321929932, - "learning_rate": 0.00014379977246871447, - "loss": 5.792062759399414, - "step": 10380 - }, - { - "epoch": 11.82095075434102, - "grad_norm": 7.224524974822998, - "learning_rate": 0.0001437455983531069, - "loss": 5.989566802978516, - "step": 10390 - }, - { - "epoch": 11.832337033874182, - "grad_norm": 7.418369293212891, - "learning_rate": 0.00014369142423749934, - "loss": 5.995355224609375, - "step": 10400 - }, - { - "epoch": 11.843723313407343, - "grad_norm": 6.645310878753662, - "learning_rate": 0.00014363725012189177, - "loss": 5.791942596435547, - "step": 10410 - }, - { - "epoch": 11.855109592940506, - "grad_norm": 7.1590375900268555, - "learning_rate": 0.0001435830760062842, - "loss": 5.956428146362304, - "step": 10420 - }, - { - "epoch": 11.866495872473669, - "grad_norm": 7.662755489349365, - "learning_rate": 0.00014352890189067664, - "loss": 6.0646930694580075, - "step": 10430 - }, - { - "epoch": 11.877882152006832, - "grad_norm": 7.7084550857543945, - "learning_rate": 0.00014347472777506907, - "loss": 6.142111206054688, - "step": 10440 - }, - { - "epoch": 11.889268431539994, - "grad_norm": 6.89467191696167, - "learning_rate": 0.00014342055365946152, - "loss": 5.906317138671875, - "step": 10450 - }, - { - "epoch": 11.900654711073157, - "grad_norm": 7.120398044586182, - "learning_rate": 0.00014336637954385394, - "loss": 6.146725463867187, - "step": 10460 - }, - { - "epoch": 11.91204099060632, - "grad_norm": 7.415362358093262, - "learning_rate": 0.0001433122054282464, - "loss": 6.037253570556641, - "step": 10470 - }, - { - "epoch": 11.923427270139483, - "grad_norm": 7.9902262687683105, - "learning_rate": 0.00014325803131263882, - "loss": 6.28980598449707, - "step": 10480 - }, - { - "epoch": 11.934813549672645, - "grad_norm": 6.856546878814697, - "learning_rate": 0.00014320385719703127, - "loss": 6.058418273925781, - "step": 10490 - }, - { - "epoch": 11.946199829205806, - "grad_norm": 7.198825836181641, - "learning_rate": 0.00014314968308142372, - "loss": 6.076106262207031, - "step": 10500 - }, - { - "epoch": 11.957586108738969, - "grad_norm": 7.608924388885498, - "learning_rate": 0.00014309550896581615, - "loss": 5.793802261352539, - "step": 10510 - }, - { - "epoch": 11.968972388272132, - "grad_norm": 7.0075225830078125, - "learning_rate": 0.0001430413348502086, - "loss": 6.062651443481445, - "step": 10520 - }, - { - "epoch": 11.980358667805294, - "grad_norm": 6.545762062072754, - "learning_rate": 0.00014298716073460102, - "loss": 5.8560432434082035, - "step": 10530 - }, - { - "epoch": 11.991744947338457, - "grad_norm": 7.019240379333496, - "learning_rate": 0.00014293298661899345, - "loss": 6.309774398803711, - "step": 10540 - }, - { - "epoch": 12.002277255906632, - "grad_norm": 6.560851097106934, - "learning_rate": 0.00014287881250338587, - "loss": 5.244910812377929, - "step": 10550 - }, - { - "epoch": 12.013663535439795, - "grad_norm": 6.521633625030518, - "learning_rate": 0.00014282463838777832, - "loss": 5.584713363647461, - "step": 10560 - }, - { - "epoch": 12.025049814972958, - "grad_norm": 7.09952974319458, - "learning_rate": 0.00014277046427217075, - "loss": 5.414521789550781, - "step": 10570 - }, - { - "epoch": 12.03643609450612, - "grad_norm": 7.587541103363037, - "learning_rate": 0.0001427162901565632, - "loss": 5.422273635864258, - "step": 10580 - }, - { - "epoch": 12.047822374039283, - "grad_norm": 8.005366325378418, - "learning_rate": 0.00014266211604095562, - "loss": 5.438925552368164, - "step": 10590 - }, - { - "epoch": 12.059208653572446, - "grad_norm": 7.477734565734863, - "learning_rate": 0.00014260794192534807, - "loss": 5.222858810424805, - "step": 10600 - }, - { - "epoch": 12.070594933105609, - "grad_norm": 7.249375343322754, - "learning_rate": 0.00014255376780974053, - "loss": 5.38745231628418, - "step": 10610 - }, - { - "epoch": 12.08198121263877, - "grad_norm": 7.713613510131836, - "learning_rate": 0.00014249959369413295, - "loss": 5.812319946289063, - "step": 10620 - }, - { - "epoch": 12.093367492171932, - "grad_norm": 7.597423553466797, - "learning_rate": 0.0001424454195785254, - "loss": 5.536786270141602, - "step": 10630 - }, - { - "epoch": 12.104753771705095, - "grad_norm": 6.994418621063232, - "learning_rate": 0.00014239124546291783, - "loss": 5.595279312133789, - "step": 10640 - }, - { - "epoch": 12.116140051238258, - "grad_norm": 7.918735027313232, - "learning_rate": 0.00014233707134731028, - "loss": 5.551082611083984, - "step": 10650 - }, - { - "epoch": 12.12752633077142, - "grad_norm": 7.9564666748046875, - "learning_rate": 0.0001422828972317027, - "loss": 5.564613342285156, - "step": 10660 - }, - { - "epoch": 12.138912610304583, - "grad_norm": 6.786741733551025, - "learning_rate": 0.00014222872311609515, - "loss": 5.819846725463867, - "step": 10670 - }, - { - "epoch": 12.150298889837746, - "grad_norm": 6.291092872619629, - "learning_rate": 0.00014217454900048758, - "loss": 5.597542190551758, - "step": 10680 - }, - { - "epoch": 12.161685169370909, - "grad_norm": 8.643335342407227, - "learning_rate": 0.00014212037488488, - "loss": 5.279590225219726, - "step": 10690 - }, - { - "epoch": 12.173071448904071, - "grad_norm": 8.687835693359375, - "learning_rate": 0.00014206620076927243, - "loss": 5.638618087768554, - "step": 10700 - }, - { - "epoch": 12.184457728437232, - "grad_norm": 7.45721960067749, - "learning_rate": 0.00014201202665366488, - "loss": 5.268462371826172, - "step": 10710 - }, - { - "epoch": 12.195844007970395, - "grad_norm": 7.256382942199707, - "learning_rate": 0.0001419578525380573, - "loss": 5.795388412475586, - "step": 10720 - }, - { - "epoch": 12.207230287503558, - "grad_norm": 7.054190635681152, - "learning_rate": 0.00014190367842244976, - "loss": 5.79822006225586, - "step": 10730 - }, - { - "epoch": 12.21861656703672, - "grad_norm": 6.79879093170166, - "learning_rate": 0.0001418495043068422, - "loss": 5.52197036743164, - "step": 10740 - }, - { - "epoch": 12.230002846569883, - "grad_norm": 7.19872522354126, - "learning_rate": 0.00014179533019123463, - "loss": 5.513645935058594, - "step": 10750 - }, - { - "epoch": 12.241389126103046, - "grad_norm": 6.726789951324463, - "learning_rate": 0.00014174115607562708, - "loss": 5.6524711608886715, - "step": 10760 - }, - { - "epoch": 12.252775405636209, - "grad_norm": 7.368878364562988, - "learning_rate": 0.0001416869819600195, - "loss": 5.634532928466797, - "step": 10770 - }, - { - "epoch": 12.264161685169372, - "grad_norm": 7.050151348114014, - "learning_rate": 0.00014163280784441196, - "loss": 5.614359283447266, - "step": 10780 - }, - { - "epoch": 12.275547964702534, - "grad_norm": 7.243103981018066, - "learning_rate": 0.00014157863372880438, - "loss": 5.495789337158203, - "step": 10790 - }, - { - "epoch": 12.286934244235695, - "grad_norm": 7.616795063018799, - "learning_rate": 0.00014152445961319684, - "loss": 5.778979110717773, - "step": 10800 - }, - { - "epoch": 12.298320523768858, - "grad_norm": 7.822340488433838, - "learning_rate": 0.00014147028549758926, - "loss": 5.528537750244141, - "step": 10810 - }, - { - "epoch": 12.30970680330202, - "grad_norm": 7.852270603179932, - "learning_rate": 0.00014141611138198168, - "loss": 5.403163909912109, - "step": 10820 - }, - { - "epoch": 12.321093082835183, - "grad_norm": 7.205753326416016, - "learning_rate": 0.00014136193726637414, - "loss": 5.604093551635742, - "step": 10830 - }, - { - "epoch": 12.332479362368346, - "grad_norm": 6.92218017578125, - "learning_rate": 0.00014130776315076656, - "loss": 5.867033004760742, - "step": 10840 - }, - { - "epoch": 12.343865641901509, - "grad_norm": 7.30786657333374, - "learning_rate": 0.000141253589035159, - "loss": 5.671744537353516, - "step": 10850 - }, - { - "epoch": 12.355251921434672, - "grad_norm": 7.897021293640137, - "learning_rate": 0.00014119941491955144, - "loss": 5.346432495117187, - "step": 10860 - }, - { - "epoch": 12.366638200967834, - "grad_norm": 7.8339738845825195, - "learning_rate": 0.0001411452408039439, - "loss": 5.636806488037109, - "step": 10870 - }, - { - "epoch": 12.378024480500997, - "grad_norm": 7.503673553466797, - "learning_rate": 0.0001410910666883363, - "loss": 5.6904136657714846, - "step": 10880 - }, - { - "epoch": 12.389410760034158, - "grad_norm": 7.782256603240967, - "learning_rate": 0.00014103689257272876, - "loss": 5.786883544921875, - "step": 10890 - }, - { - "epoch": 12.40079703956732, - "grad_norm": 7.115951061248779, - "learning_rate": 0.0001409827184571212, - "loss": 5.459210968017578, - "step": 10900 - }, - { - "epoch": 12.412183319100484, - "grad_norm": 7.4705281257629395, - "learning_rate": 0.00014092854434151364, - "loss": 5.9279014587402346, - "step": 10910 - }, - { - "epoch": 12.423569598633646, - "grad_norm": 7.553955554962158, - "learning_rate": 0.00014087437022590606, - "loss": 5.823044204711914, - "step": 10920 - }, - { - "epoch": 12.434955878166809, - "grad_norm": 7.552109241485596, - "learning_rate": 0.00014082019611029852, - "loss": 5.539102172851562, - "step": 10930 - }, - { - "epoch": 12.446342157699972, - "grad_norm": 6.597175598144531, - "learning_rate": 0.00014076602199469094, - "loss": 6.030828857421875, - "step": 10940 - }, - { - "epoch": 12.457728437233134, - "grad_norm": 6.793281078338623, - "learning_rate": 0.0001407118478790834, - "loss": 5.719361114501953, - "step": 10950 - }, - { - "epoch": 12.469114716766297, - "grad_norm": 7.471744537353516, - "learning_rate": 0.00014065767376347582, - "loss": 5.419223403930664, - "step": 10960 - }, - { - "epoch": 12.48050099629946, - "grad_norm": 6.892999649047852, - "learning_rate": 0.00014060349964786824, - "loss": 5.7255298614501955, - "step": 10970 - }, - { - "epoch": 12.491887275832621, - "grad_norm": 7.7696533203125, - "learning_rate": 0.0001405493255322607, - "loss": 6.058211898803711, - "step": 10980 - }, - { - "epoch": 12.503273555365784, - "grad_norm": 7.769250869750977, - "learning_rate": 0.00014049515141665312, - "loss": 5.425541687011719, - "step": 10990 - }, - { - "epoch": 12.514659834898946, - "grad_norm": 7.765056610107422, - "learning_rate": 0.00014044097730104557, - "loss": 5.620434951782227, - "step": 11000 - }, - { - "epoch": 12.52604611443211, - "grad_norm": 7.338669300079346, - "learning_rate": 0.000140386803185438, - "loss": 5.756977081298828, - "step": 11010 - }, - { - "epoch": 12.537432393965272, - "grad_norm": 7.880843162536621, - "learning_rate": 0.00014033262906983044, - "loss": 5.7971046447753904, - "step": 11020 - }, - { - "epoch": 12.548818673498435, - "grad_norm": 7.511135101318359, - "learning_rate": 0.00014027845495422287, - "loss": 5.453466415405273, - "step": 11030 - }, - { - "epoch": 12.560204953031597, - "grad_norm": 7.005084037780762, - "learning_rate": 0.00014022428083861532, - "loss": 5.595069503784179, - "step": 11040 - }, - { - "epoch": 12.57159123256476, - "grad_norm": 7.802360534667969, - "learning_rate": 0.00014017010672300775, - "loss": 6.111663436889648, - "step": 11050 - }, - { - "epoch": 12.582977512097923, - "grad_norm": 7.821834564208984, - "learning_rate": 0.0001401159326074002, - "loss": 5.398255920410156, - "step": 11060 - }, - { - "epoch": 12.594363791631084, - "grad_norm": 7.565220832824707, - "learning_rate": 0.00014006175849179265, - "loss": 5.964350891113281, - "step": 11070 - }, - { - "epoch": 12.605750071164247, - "grad_norm": 7.540068626403809, - "learning_rate": 0.00014000758437618507, - "loss": 5.82092399597168, - "step": 11080 - }, - { - "epoch": 12.61713635069741, - "grad_norm": 7.710811138153076, - "learning_rate": 0.00013995341026057752, - "loss": 5.725724411010742, - "step": 11090 - }, - { - "epoch": 12.628522630230572, - "grad_norm": 6.454217433929443, - "learning_rate": 0.00013989923614496995, - "loss": 5.528153991699218, - "step": 11100 - }, - { - "epoch": 12.639908909763735, - "grad_norm": 6.6477837562561035, - "learning_rate": 0.00013984506202936237, - "loss": 5.676776885986328, - "step": 11110 - }, - { - "epoch": 12.651295189296897, - "grad_norm": 6.51104736328125, - "learning_rate": 0.0001397908879137548, - "loss": 5.7555488586425785, - "step": 11120 - }, - { - "epoch": 12.66268146883006, - "grad_norm": 7.661293029785156, - "learning_rate": 0.00013973671379814725, - "loss": 5.601126861572266, - "step": 11130 - }, - { - "epoch": 12.674067748363223, - "grad_norm": 7.333502292633057, - "learning_rate": 0.00013968253968253967, - "loss": 5.845965957641601, - "step": 11140 - }, - { - "epoch": 12.685454027896386, - "grad_norm": 8.663681030273438, - "learning_rate": 0.00013962836556693213, - "loss": 5.837159729003906, - "step": 11150 - }, - { - "epoch": 12.696840307429547, - "grad_norm": 6.259514808654785, - "learning_rate": 0.00013957419145132455, - "loss": 5.721532821655273, - "step": 11160 - }, - { - "epoch": 12.70822658696271, - "grad_norm": 6.932978630065918, - "learning_rate": 0.000139520017335717, - "loss": 5.9294178009033205, - "step": 11170 - }, - { - "epoch": 12.719612866495872, - "grad_norm": 7.781189441680908, - "learning_rate": 0.00013946584322010945, - "loss": 5.403998184204101, - "step": 11180 - }, - { - "epoch": 12.730999146029035, - "grad_norm": 7.4265336990356445, - "learning_rate": 0.00013941166910450188, - "loss": 5.511170196533203, - "step": 11190 - }, - { - "epoch": 12.742385425562198, - "grad_norm": 8.001691818237305, - "learning_rate": 0.00013935749498889433, - "loss": 5.863291931152344, - "step": 11200 - }, - { - "epoch": 12.75377170509536, - "grad_norm": 6.850292682647705, - "learning_rate": 0.00013930332087328675, - "loss": 5.665401458740234, - "step": 11210 - }, - { - "epoch": 12.765157984628523, - "grad_norm": 8.020200729370117, - "learning_rate": 0.0001392491467576792, - "loss": 5.8318031311035154, - "step": 11220 - }, - { - "epoch": 12.776544264161686, - "grad_norm": 7.251684188842773, - "learning_rate": 0.00013919497264207163, - "loss": 5.751366806030274, - "step": 11230 - }, - { - "epoch": 12.787930543694848, - "grad_norm": 8.099172592163086, - "learning_rate": 0.00013914079852646405, - "loss": 5.816202545166016, - "step": 11240 - }, - { - "epoch": 12.79931682322801, - "grad_norm": 7.1152024269104, - "learning_rate": 0.00013908662441085648, - "loss": 5.4952960968017575, - "step": 11250 - }, - { - "epoch": 12.810703102761172, - "grad_norm": 7.486706256866455, - "learning_rate": 0.00013903245029524893, - "loss": 5.637163162231445, - "step": 11260 - }, - { - "epoch": 12.822089382294335, - "grad_norm": 7.652496337890625, - "learning_rate": 0.00013897827617964135, - "loss": 5.713247680664063, - "step": 11270 - }, - { - "epoch": 12.833475661827498, - "grad_norm": 7.462140083312988, - "learning_rate": 0.0001389241020640338, - "loss": 5.825135040283203, - "step": 11280 - }, - { - "epoch": 12.84486194136066, - "grad_norm": 7.004937648773193, - "learning_rate": 0.00013886992794842626, - "loss": 5.827632904052734, - "step": 11290 - }, - { - "epoch": 12.856248220893823, - "grad_norm": 7.434920310974121, - "learning_rate": 0.00013881575383281868, - "loss": 5.4522960662841795, - "step": 11300 - }, - { - "epoch": 12.867634500426986, - "grad_norm": 6.466522693634033, - "learning_rate": 0.00013876157971721113, - "loss": 5.210875701904297, - "step": 11310 - }, - { - "epoch": 12.879020779960149, - "grad_norm": 7.1903510093688965, - "learning_rate": 0.00013870740560160356, - "loss": 5.339904022216797, - "step": 11320 - }, - { - "epoch": 12.890407059493311, - "grad_norm": 7.564988613128662, - "learning_rate": 0.000138653231485996, - "loss": 5.6416679382324215, - "step": 11330 - }, - { - "epoch": 12.901793339026472, - "grad_norm": 8.242656707763672, - "learning_rate": 0.00013859905737038843, - "loss": 5.6682594299316404, - "step": 11340 - }, - { - "epoch": 12.913179618559635, - "grad_norm": 7.32057523727417, - "learning_rate": 0.00013854488325478089, - "loss": 5.620843124389649, - "step": 11350 - }, - { - "epoch": 12.924565898092798, - "grad_norm": 7.37946891784668, - "learning_rate": 0.0001384907091391733, - "loss": 5.613275527954102, - "step": 11360 - }, - { - "epoch": 12.93595217762596, - "grad_norm": 7.773562431335449, - "learning_rate": 0.00013843653502356576, - "loss": 5.346442794799804, - "step": 11370 - }, - { - "epoch": 12.947338457159123, - "grad_norm": 7.526298999786377, - "learning_rate": 0.00013838236090795819, - "loss": 5.606759643554687, - "step": 11380 - }, - { - "epoch": 12.958724736692286, - "grad_norm": 7.332315921783447, - "learning_rate": 0.0001383281867923506, - "loss": 5.419976425170899, - "step": 11390 - }, - { - "epoch": 12.970111016225449, - "grad_norm": 7.382599830627441, - "learning_rate": 0.00013827401267674304, - "loss": 5.998534393310547, - "step": 11400 - }, - { - "epoch": 12.981497295758611, - "grad_norm": 7.097250461578369, - "learning_rate": 0.0001382198385611355, - "loss": 5.930072021484375, - "step": 11410 - }, - { - "epoch": 12.992883575291774, - "grad_norm": 6.900557041168213, - "learning_rate": 0.00013816566444552794, - "loss": 5.5991252899169925, - "step": 11420 - }, - { - "epoch": 13.00341588385995, - "grad_norm": 6.776562213897705, - "learning_rate": 0.00013811149032992036, - "loss": 4.849653244018555, - "step": 11430 - }, - { - "epoch": 13.014802163393112, - "grad_norm": 7.419433116912842, - "learning_rate": 0.00013805731621431281, - "loss": 5.020238876342773, - "step": 11440 - }, - { - "epoch": 13.026188442926275, - "grad_norm": 6.931766986846924, - "learning_rate": 0.00013800314209870524, - "loss": 5.281552124023437, - "step": 11450 - }, - { - "epoch": 13.037574722459436, - "grad_norm": 7.03234338760376, - "learning_rate": 0.0001379489679830977, - "loss": 5.1979625701904295, - "step": 11460 - }, - { - "epoch": 13.048961001992598, - "grad_norm": 7.978625774383545, - "learning_rate": 0.00013789479386749011, - "loss": 5.348671722412109, - "step": 11470 - }, - { - "epoch": 13.060347281525761, - "grad_norm": 7.255367755889893, - "learning_rate": 0.00013784061975188257, - "loss": 5.117027282714844, - "step": 11480 - }, - { - "epoch": 13.071733561058924, - "grad_norm": 7.438676834106445, - "learning_rate": 0.000137786445636275, - "loss": 5.207336044311523, - "step": 11490 - }, - { - "epoch": 13.083119840592087, - "grad_norm": 6.7722578048706055, - "learning_rate": 0.00013773227152066744, - "loss": 4.885826110839844, - "step": 11500 - }, - { - "epoch": 13.09450612012525, - "grad_norm": 6.830091953277588, - "learning_rate": 0.00013767809740505987, - "loss": 4.998377990722656, - "step": 11510 - }, - { - "epoch": 13.105892399658412, - "grad_norm": 7.623549461364746, - "learning_rate": 0.00013762392328945232, - "loss": 5.187086486816407, - "step": 11520 - }, - { - "epoch": 13.117278679191575, - "grad_norm": 8.049860954284668, - "learning_rate": 0.00013756974917384474, - "loss": 4.911079788208008, - "step": 11530 - }, - { - "epoch": 13.128664958724737, - "grad_norm": 7.040311336517334, - "learning_rate": 0.00013751557505823717, - "loss": 5.190642547607422, - "step": 11540 - }, - { - "epoch": 13.140051238257898, - "grad_norm": 7.5759806632995605, - "learning_rate": 0.00013746140094262962, - "loss": 5.331892776489258, - "step": 11550 - }, - { - "epoch": 13.151437517791061, - "grad_norm": 7.5590362548828125, - "learning_rate": 0.00013740722682702204, - "loss": 5.309208679199219, - "step": 11560 - }, - { - "epoch": 13.162823797324224, - "grad_norm": 7.6602559089660645, - "learning_rate": 0.0001373530527114145, - "loss": 5.0000556945800785, - "step": 11570 - }, - { - "epoch": 13.174210076857387, - "grad_norm": 6.687933921813965, - "learning_rate": 0.00013729887859580692, - "loss": 5.226931381225586, - "step": 11580 - }, - { - "epoch": 13.18559635639055, - "grad_norm": 7.443079471588135, - "learning_rate": 0.00013724470448019937, - "loss": 5.208231735229492, - "step": 11590 - }, - { - "epoch": 13.196982635923712, - "grad_norm": 7.559839725494385, - "learning_rate": 0.0001371905303645918, - "loss": 5.439892578125, - "step": 11600 - }, - { - "epoch": 13.208368915456875, - "grad_norm": 7.104702472686768, - "learning_rate": 0.00013713635624898425, - "loss": 5.115517807006836, - "step": 11610 - }, - { - "epoch": 13.219755194990038, - "grad_norm": NaN, - "learning_rate": 0.00013708218213337667, - "loss": 4.99559440612793, - "step": 11620 - }, - { - "epoch": 13.2311414745232, - "grad_norm": 7.759593486785889, - "learning_rate": 0.00013703342542932987, - "loss": 5.390767288208008, - "step": 11630 - }, - { - "epoch": 13.242527754056361, - "grad_norm": 7.8164286613464355, - "learning_rate": 0.00013697925131372232, - "loss": 5.311045455932617, - "step": 11640 - }, - { - "epoch": 13.253914033589524, - "grad_norm": 6.780811309814453, - "learning_rate": 0.00013692507719811474, - "loss": 5.088868713378906, - "step": 11650 - }, - { - "epoch": 13.265300313122687, - "grad_norm": 7.4553022384643555, - "learning_rate": 0.0001368709030825072, - "loss": 5.636979675292968, - "step": 11660 - }, - { - "epoch": 13.27668659265585, - "grad_norm": 7.379472255706787, - "learning_rate": 0.00013681672896689962, - "loss": 5.3572509765625, - "step": 11670 - }, - { - "epoch": 13.288072872189012, - "grad_norm": 7.313671588897705, - "learning_rate": 0.00013676255485129204, - "loss": 5.362029647827148, - "step": 11680 - }, - { - "epoch": 13.299459151722175, - "grad_norm": 7.911259174346924, - "learning_rate": 0.0001367083807356845, - "loss": 5.288186645507812, - "step": 11690 - }, - { - "epoch": 13.310845431255338, - "grad_norm": 7.032965183258057, - "learning_rate": 0.00013665420662007692, - "loss": 5.303681182861328, - "step": 11700 - }, - { - "epoch": 13.3222317107885, - "grad_norm": 8.510554313659668, - "learning_rate": 0.00013660003250446937, - "loss": 5.175273513793945, - "step": 11710 - }, - { - "epoch": 13.333617990321663, - "grad_norm": 7.989207744598389, - "learning_rate": 0.0001365458583888618, - "loss": 4.8652587890625, - "step": 11720 - }, - { - "epoch": 13.345004269854824, - "grad_norm": 8.093276977539062, - "learning_rate": 0.00013649168427325425, - "loss": 5.2994426727294925, - "step": 11730 - }, - { - "epoch": 13.356390549387987, - "grad_norm": 6.790765285491943, - "learning_rate": 0.00013643751015764667, - "loss": 5.194287109375, - "step": 11740 - }, - { - "epoch": 13.36777682892115, - "grad_norm": 7.429324626922607, - "learning_rate": 0.00013638333604203912, - "loss": 5.5465232849121096, - "step": 11750 - }, - { - "epoch": 13.379163108454312, - "grad_norm": 7.065948486328125, - "learning_rate": 0.00013632916192643155, - "loss": 5.191884613037109, - "step": 11760 - }, - { - "epoch": 13.390549387987475, - "grad_norm": 7.3118510246276855, - "learning_rate": 0.000136274987810824, - "loss": 5.531578826904297, - "step": 11770 - }, - { - "epoch": 13.401935667520638, - "grad_norm": 7.449267864227295, - "learning_rate": 0.00013622081369521645, - "loss": 5.3741718292236325, - "step": 11780 - }, - { - "epoch": 13.4133219470538, - "grad_norm": 7.919931411743164, - "learning_rate": 0.00013616663957960888, - "loss": 5.327036666870117, - "step": 11790 - }, - { - "epoch": 13.424708226586963, - "grad_norm": 7.673199653625488, - "learning_rate": 0.00013611246546400133, - "loss": 5.344607925415039, - "step": 11800 - }, - { - "epoch": 13.436094506120126, - "grad_norm": 7.837534427642822, - "learning_rate": 0.00013605829134839375, - "loss": 5.430044174194336, - "step": 11810 - }, - { - "epoch": 13.447480785653287, - "grad_norm": 7.384720325469971, - "learning_rate": 0.00013600411723278618, - "loss": 5.121067810058594, - "step": 11820 - }, - { - "epoch": 13.45886706518645, - "grad_norm": 7.407566070556641, - "learning_rate": 0.0001359499431171786, - "loss": 5.254208755493164, - "step": 11830 - }, - { - "epoch": 13.470253344719612, - "grad_norm": 7.422298431396484, - "learning_rate": 0.00013589576900157105, - "loss": 5.210581207275391, - "step": 11840 - }, - { - "epoch": 13.481639624252775, - "grad_norm": 7.2973737716674805, - "learning_rate": 0.00013584159488596348, - "loss": 5.4987037658691404, - "step": 11850 - }, - { - "epoch": 13.493025903785938, - "grad_norm": 7.023255348205566, - "learning_rate": 0.00013578742077035593, - "loss": 5.042602920532227, - "step": 11860 - }, - { - "epoch": 13.5044121833191, - "grad_norm": 7.912652492523193, - "learning_rate": 0.00013573324665474835, - "loss": 5.281571578979492, - "step": 11870 - }, - { - "epoch": 13.515798462852263, - "grad_norm": 7.739786148071289, - "learning_rate": 0.0001356790725391408, - "loss": 5.43682861328125, - "step": 11880 - }, - { - "epoch": 13.527184742385426, - "grad_norm": 7.652828216552734, - "learning_rate": 0.00013562489842353326, - "loss": 5.211148071289062, - "step": 11890 - }, - { - "epoch": 13.538571021918589, - "grad_norm": 8.158793449401855, - "learning_rate": 0.00013557072430792568, - "loss": 5.4678295135498045, - "step": 11900 - }, - { - "epoch": 13.54995730145175, - "grad_norm": 7.335878372192383, - "learning_rate": 0.00013551655019231813, - "loss": 5.306196975708008, - "step": 11910 - }, - { - "epoch": 13.561343580984913, - "grad_norm": 7.801717758178711, - "learning_rate": 0.00013546237607671056, - "loss": 5.475741195678711, - "step": 11920 - }, - { - "epoch": 13.572729860518075, - "grad_norm": 8.175219535827637, - "learning_rate": 0.000135408201961103, - "loss": 5.457925796508789, - "step": 11930 - }, - { - "epoch": 13.584116140051238, - "grad_norm": 6.884897232055664, - "learning_rate": 0.00013535402784549543, - "loss": 5.128502655029297, - "step": 11940 - }, - { - "epoch": 13.5955024195844, - "grad_norm": 6.6634521484375, - "learning_rate": 0.00013529985372988788, - "loss": 5.319187164306641, - "step": 11950 - }, - { - "epoch": 13.606888699117563, - "grad_norm": 8.205178260803223, - "learning_rate": 0.0001352456796142803, - "loss": 5.198386001586914, - "step": 11960 - }, - { - "epoch": 13.618274978650726, - "grad_norm": 6.997448444366455, - "learning_rate": 0.00013519150549867273, - "loss": 5.356737899780273, - "step": 11970 - }, - { - "epoch": 13.629661258183889, - "grad_norm": 7.976472854614258, - "learning_rate": 0.00013513733138306516, - "loss": 5.224060440063477, - "step": 11980 - }, - { - "epoch": 13.641047537717052, - "grad_norm": 7.6008477210998535, - "learning_rate": 0.0001350831572674576, - "loss": 5.205660629272461, - "step": 11990 - }, - { - "epoch": 13.652433817250213, - "grad_norm": 8.134635925292969, - "learning_rate": 0.00013502898315185003, - "loss": 5.284549331665039, - "step": 12000 - }, - { - "epoch": 13.663820096783375, - "grad_norm": 7.697743892669678, - "learning_rate": 0.00013497480903624249, - "loss": 5.365811157226562, - "step": 12010 - }, - { - "epoch": 13.675206376316538, - "grad_norm": 6.987123966217041, - "learning_rate": 0.00013492063492063494, - "loss": 5.420026779174805, - "step": 12020 - }, - { - "epoch": 13.6865926558497, - "grad_norm": 7.670924663543701, - "learning_rate": 0.00013486646080502736, - "loss": 5.44122543334961, - "step": 12030 - }, - { - "epoch": 13.697978935382864, - "grad_norm": 7.6146955490112305, - "learning_rate": 0.0001348122866894198, - "loss": 5.368587493896484, - "step": 12040 - }, - { - "epoch": 13.709365214916026, - "grad_norm": 7.290314674377441, - "learning_rate": 0.00013475811257381224, - "loss": 4.899549102783203, - "step": 12050 - }, - { - "epoch": 13.720751494449189, - "grad_norm": 7.6227569580078125, - "learning_rate": 0.0001347039384582047, - "loss": 5.226024627685547, - "step": 12060 - }, - { - "epoch": 13.732137773982352, - "grad_norm": 8.122727394104004, - "learning_rate": 0.0001346497643425971, - "loss": 5.342987060546875, - "step": 12070 - }, - { - "epoch": 13.743524053515515, - "grad_norm": 7.191249847412109, - "learning_rate": 0.00013459559022698956, - "loss": 4.999538421630859, - "step": 12080 - }, - { - "epoch": 13.754910333048677, - "grad_norm": 7.330814361572266, - "learning_rate": 0.000134541416111382, - "loss": 5.499901199340821, - "step": 12090 - }, - { - "epoch": 13.766296612581838, - "grad_norm": 7.275107383728027, - "learning_rate": 0.00013448724199577441, - "loss": 5.174909591674805, - "step": 12100 - }, - { - "epoch": 13.777682892115001, - "grad_norm": 7.071986675262451, - "learning_rate": 0.00013443306788016687, - "loss": 5.219038391113282, - "step": 12110 - }, - { - "epoch": 13.789069171648164, - "grad_norm": 6.817813396453857, - "learning_rate": 0.0001343788937645593, - "loss": 5.07507553100586, - "step": 12120 - }, - { - "epoch": 13.800455451181326, - "grad_norm": 7.419076442718506, - "learning_rate": 0.00013432471964895174, - "loss": 5.351544952392578, - "step": 12130 - }, - { - "epoch": 13.81184173071449, - "grad_norm": 7.717519283294678, - "learning_rate": 0.00013427054553334417, - "loss": 5.706594085693359, - "step": 12140 - }, - { - "epoch": 13.823228010247652, - "grad_norm": 7.404618263244629, - "learning_rate": 0.00013421637141773662, - "loss": 5.376433563232422, - "step": 12150 - }, - { - "epoch": 13.834614289780815, - "grad_norm": 7.158414363861084, - "learning_rate": 0.00013416219730212904, - "loss": 5.363436508178711, - "step": 12160 - }, - { - "epoch": 13.846000569313977, - "grad_norm": 7.699437618255615, - "learning_rate": 0.0001341080231865215, - "loss": 5.6122314453125, - "step": 12170 - }, - { - "epoch": 13.857386848847138, - "grad_norm": 7.237250804901123, - "learning_rate": 0.00013405384907091392, - "loss": 5.395546340942383, - "step": 12180 - }, - { - "epoch": 13.868773128380301, - "grad_norm": 7.333144187927246, - "learning_rate": 0.00013399967495530637, - "loss": 5.211500549316407, - "step": 12190 - }, - { - "epoch": 13.880159407913464, - "grad_norm": 6.954226493835449, - "learning_rate": 0.0001339455008396988, - "loss": 5.263496017456054, - "step": 12200 - }, - { - "epoch": 13.891545687446627, - "grad_norm": 7.646538734436035, - "learning_rate": 0.00013389132672409125, - "loss": 5.6256969451904295, - "step": 12210 - }, - { - "epoch": 13.90293196697979, - "grad_norm": 7.714359760284424, - "learning_rate": 0.00013383715260848367, - "loss": 5.669702529907227, - "step": 12220 - }, - { - "epoch": 13.914318246512952, - "grad_norm": 7.628199577331543, - "learning_rate": 0.00013378297849287612, - "loss": 5.399072265625, - "step": 12230 - }, - { - "epoch": 13.925704526046115, - "grad_norm": 8.523447036743164, - "learning_rate": 0.00013372880437726855, - "loss": 5.366733169555664, - "step": 12240 - }, - { - "epoch": 13.937090805579277, - "grad_norm": 7.897779941558838, - "learning_rate": 0.00013367463026166097, - "loss": 5.20464973449707, - "step": 12250 - }, - { - "epoch": 13.94847708511244, - "grad_norm": 7.602762699127197, - "learning_rate": 0.00013362045614605342, - "loss": 5.602539443969727, - "step": 12260 - }, - { - "epoch": 13.959863364645603, - "grad_norm": 7.468645095825195, - "learning_rate": 0.00013356628203044585, - "loss": 5.513700485229492, - "step": 12270 - }, - { - "epoch": 13.971249644178764, - "grad_norm": 7.841520309448242, - "learning_rate": 0.0001335121079148383, - "loss": 5.51887321472168, - "step": 12280 - }, - { - "epoch": 13.982635923711927, - "grad_norm": 7.047943115234375, - "learning_rate": 0.00013345793379923072, - "loss": 5.111991500854492, - "step": 12290 - }, - { - "epoch": 13.99402220324509, - "grad_norm": 6.829543113708496, - "learning_rate": 0.00013340375968362317, - "loss": 5.058186340332031, - "step": 12300 - }, - { - "epoch": 14.004554511813264, - "grad_norm": 7.048587799072266, - "learning_rate": 0.0001333495855680156, - "loss": 4.963529586791992, - "step": 12310 - }, - { - "epoch": 14.015940791346427, - "grad_norm": 6.852132797241211, - "learning_rate": 0.00013329541145240805, - "loss": 4.623985290527344, - "step": 12320 - }, - { - "epoch": 14.02732707087959, - "grad_norm": 7.333976745605469, - "learning_rate": 0.00013324123733680047, - "loss": 5.1697029113769535, - "step": 12330 - }, - { - "epoch": 14.038713350412753, - "grad_norm": 8.014049530029297, - "learning_rate": 0.00013318706322119293, - "loss": 4.859414672851562, - "step": 12340 - }, - { - "epoch": 14.050099629945915, - "grad_norm": 7.355411529541016, - "learning_rate": 0.00013313288910558538, - "loss": 4.995968627929687, - "step": 12350 - }, - { - "epoch": 14.061485909479078, - "grad_norm": 7.705524921417236, - "learning_rate": 0.0001330787149899778, - "loss": 5.119239807128906, - "step": 12360 - }, - { - "epoch": 14.07287218901224, - "grad_norm": 7.468197822570801, - "learning_rate": 0.00013302454087437023, - "loss": 5.107388305664062, - "step": 12370 - }, - { - "epoch": 14.084258468545404, - "grad_norm": 6.574582099914551, - "learning_rate": 0.00013297036675876268, - "loss": 4.7201496124267575, - "step": 12380 - }, - { - "epoch": 14.095644748078564, - "grad_norm": 6.222922325134277, - "learning_rate": 0.0001329161926431551, - "loss": 4.612395477294922, - "step": 12390 - }, - { - "epoch": 14.107031027611727, - "grad_norm": 7.6845011711120605, - "learning_rate": 0.00013286201852754753, - "loss": 5.049299621582032, - "step": 12400 - }, - { - "epoch": 14.11841730714489, - "grad_norm": 7.420602798461914, - "learning_rate": 0.00013280784441193998, - "loss": 4.884346389770508, - "step": 12410 - }, - { - "epoch": 14.129803586678053, - "grad_norm": 7.716628074645996, - "learning_rate": 0.0001327536702963324, - "loss": 4.882117080688476, - "step": 12420 - }, - { - "epoch": 14.141189866211215, - "grad_norm": 7.8805646896362305, - "learning_rate": 0.00013269949618072486, - "loss": 4.686184692382812, - "step": 12430 - }, - { - "epoch": 14.152576145744378, - "grad_norm": 7.1464948654174805, - "learning_rate": 0.00013264532206511728, - "loss": 4.810683822631836, - "step": 12440 - }, - { - "epoch": 14.163962425277541, - "grad_norm": 7.473194599151611, - "learning_rate": 0.00013259114794950973, - "loss": 4.700112533569336, - "step": 12450 - }, - { - "epoch": 14.175348704810704, - "grad_norm": 6.588601589202881, - "learning_rate": 0.00013253697383390218, - "loss": 4.817057037353516, - "step": 12460 - }, - { - "epoch": 14.186734984343866, - "grad_norm": 7.659865856170654, - "learning_rate": 0.0001324827997182946, - "loss": 4.8424022674560545, - "step": 12470 - }, - { - "epoch": 14.198121263877027, - "grad_norm": 7.2345194816589355, - "learning_rate": 0.00013242862560268706, - "loss": 4.720790100097656, - "step": 12480 - }, - { - "epoch": 14.20950754341019, - "grad_norm": 7.407914161682129, - "learning_rate": 0.00013237445148707948, - "loss": 4.918620681762695, - "step": 12490 - }, - { - "epoch": 14.220893822943353, - "grad_norm": 8.086915969848633, - "learning_rate": 0.00013232027737147193, - "loss": 5.120816421508789, - "step": 12500 - }, - { - "epoch": 14.232280102476516, - "grad_norm": 7.739943981170654, - "learning_rate": 0.00013226610325586436, - "loss": 4.804103851318359, - "step": 12510 - }, - { - "epoch": 14.243666382009678, - "grad_norm": 6.809656620025635, - "learning_rate": 0.00013221192914025678, - "loss": 4.88025016784668, - "step": 12520 - }, - { - "epoch": 14.255052661542841, - "grad_norm": 7.113979816436768, - "learning_rate": 0.0001321577550246492, - "loss": 5.060388565063477, - "step": 12530 - }, - { - "epoch": 14.266438941076004, - "grad_norm": 7.600307941436768, - "learning_rate": 0.00013210358090904166, - "loss": 4.81654167175293, - "step": 12540 - }, - { - "epoch": 14.277825220609166, - "grad_norm": 7.201952934265137, - "learning_rate": 0.00013204940679343408, - "loss": 4.638751220703125, - "step": 12550 - }, - { - "epoch": 14.28921150014233, - "grad_norm": 7.577877521514893, - "learning_rate": 0.00013199523267782654, - "loss": 5.024491500854492, - "step": 12560 - }, - { - "epoch": 14.30059777967549, - "grad_norm": 7.727479934692383, - "learning_rate": 0.000131941058562219, - "loss": 5.108154296875, - "step": 12570 - }, - { - "epoch": 14.311984059208653, - "grad_norm": 7.286412239074707, - "learning_rate": 0.0001318868844466114, - "loss": 5.149254608154297, - "step": 12580 - }, - { - "epoch": 14.323370338741816, - "grad_norm": 7.6460723876953125, - "learning_rate": 0.00013183271033100386, - "loss": 4.752541351318359, - "step": 12590 - }, - { - "epoch": 14.334756618274978, - "grad_norm": 7.025350093841553, - "learning_rate": 0.0001317785362153963, - "loss": 4.839694213867188, - "step": 12600 - }, - { - "epoch": 14.346142897808141, - "grad_norm": 7.519998073577881, - "learning_rate": 0.00013172436209978874, - "loss": 4.982858276367187, - "step": 12610 - }, - { - "epoch": 14.357529177341304, - "grad_norm": 7.484885215759277, - "learning_rate": 0.00013167018798418116, - "loss": 4.672926330566407, - "step": 12620 - }, - { - "epoch": 14.368915456874467, - "grad_norm": 8.382508277893066, - "learning_rate": 0.00013161601386857362, - "loss": 5.045550537109375, - "step": 12630 - }, - { - "epoch": 14.38030173640763, - "grad_norm": 7.656764030456543, - "learning_rate": 0.00013156183975296604, - "loss": 5.17280158996582, - "step": 12640 - }, - { - "epoch": 14.391688015940792, - "grad_norm": 7.431979179382324, - "learning_rate": 0.0001315076656373585, - "loss": 4.8266761779785154, - "step": 12650 - }, - { - "epoch": 14.403074295473953, - "grad_norm": 7.853142738342285, - "learning_rate": 0.00013145349152175092, - "loss": 4.80140266418457, - "step": 12660 - }, - { - "epoch": 14.414460575007116, - "grad_norm": 7.3754987716674805, - "learning_rate": 0.00013139931740614334, - "loss": 4.92738037109375, - "step": 12670 - }, - { - "epoch": 14.425846854540278, - "grad_norm": 6.4924116134643555, - "learning_rate": 0.0001313451432905358, - "loss": 4.55872802734375, - "step": 12680 - }, - { - "epoch": 14.437233134073441, - "grad_norm": 8.214619636535645, - "learning_rate": 0.00013129096917492822, - "loss": 4.9627235412597654, - "step": 12690 - }, - { - "epoch": 14.448619413606604, - "grad_norm": 7.3021559715271, - "learning_rate": 0.00013123679505932067, - "loss": 5.223368072509766, - "step": 12700 - }, - { - "epoch": 14.460005693139767, - "grad_norm": 7.899365425109863, - "learning_rate": 0.0001311826209437131, - "loss": 5.1207012176513675, - "step": 12710 - }, - { - "epoch": 14.47139197267293, - "grad_norm": 7.248983860015869, - "learning_rate": 0.00013112844682810554, - "loss": 4.907715606689453, - "step": 12720 - }, - { - "epoch": 14.482778252206092, - "grad_norm": 7.069028377532959, - "learning_rate": 0.00013107427271249797, - "loss": 4.999393463134766, - "step": 12730 - }, - { - "epoch": 14.494164531739255, - "grad_norm": 7.262919902801514, - "learning_rate": 0.00013102009859689042, - "loss": 4.917198562622071, - "step": 12740 - }, - { - "epoch": 14.505550811272418, - "grad_norm": 7.535858154296875, - "learning_rate": 0.00013096592448128284, - "loss": 4.810692596435547, - "step": 12750 - }, - { - "epoch": 14.516937090805579, - "grad_norm": 7.142795562744141, - "learning_rate": 0.0001309117503656753, - "loss": 5.230551910400391, - "step": 12760 - }, - { - "epoch": 14.528323370338741, - "grad_norm": 7.323062419891357, - "learning_rate": 0.00013085757625006772, - "loss": 5.168463134765625, - "step": 12770 - }, - { - "epoch": 14.539709649871904, - "grad_norm": 8.29926872253418, - "learning_rate": 0.00013080340213446017, - "loss": 5.1340171813964846, - "step": 12780 - }, - { - "epoch": 14.551095929405067, - "grad_norm": 8.317549705505371, - "learning_rate": 0.0001307492280188526, - "loss": 4.967045593261719, - "step": 12790 - }, - { - "epoch": 14.56248220893823, - "grad_norm": 7.742959499359131, - "learning_rate": 0.00013069505390324505, - "loss": 4.707772827148437, - "step": 12800 - }, - { - "epoch": 14.573868488471392, - "grad_norm": 7.057898044586182, - "learning_rate": 0.00013064087978763747, - "loss": 4.755387496948242, - "step": 12810 - }, - { - "epoch": 14.585254768004555, - "grad_norm": 7.152231216430664, - "learning_rate": 0.0001305867056720299, - "loss": 5.241105270385742, - "step": 12820 - }, - { - "epoch": 14.596641047537718, - "grad_norm": 8.085304260253906, - "learning_rate": 0.00013053253155642235, - "loss": 4.788356781005859, - "step": 12830 - }, - { - "epoch": 14.608027327070879, - "grad_norm": 7.510035037994385, - "learning_rate": 0.00013047835744081477, - "loss": 5.091434478759766, - "step": 12840 - }, - { - "epoch": 14.619413606604041, - "grad_norm": 7.749240875244141, - "learning_rate": 0.00013042418332520723, - "loss": 4.968235778808594, - "step": 12850 - }, - { - "epoch": 14.630799886137204, - "grad_norm": 6.937013626098633, - "learning_rate": 0.00013037000920959965, - "loss": 5.242551040649414, - "step": 12860 - }, - { - "epoch": 14.642186165670367, - "grad_norm": 7.75348424911499, - "learning_rate": 0.0001303158350939921, - "loss": 4.967795944213867, - "step": 12870 - }, - { - "epoch": 14.65357244520353, - "grad_norm": 8.130202293395996, - "learning_rate": 0.00013026166097838453, - "loss": 5.108961868286133, - "step": 12880 - }, - { - "epoch": 14.664958724736692, - "grad_norm": 6.920219898223877, - "learning_rate": 0.00013020748686277698, - "loss": 4.6893165588378904, - "step": 12890 - }, - { - "epoch": 14.676345004269855, - "grad_norm": 7.73007869720459, - "learning_rate": 0.0001301533127471694, - "loss": 5.106839370727539, - "step": 12900 - }, - { - "epoch": 14.687731283803018, - "grad_norm": 7.3374924659729, - "learning_rate": 0.00013009913863156185, - "loss": 5.362062072753906, - "step": 12910 - }, - { - "epoch": 14.69911756333618, - "grad_norm": 7.493285655975342, - "learning_rate": 0.0001300449645159543, - "loss": 5.187427139282226, - "step": 12920 - }, - { - "epoch": 14.710503842869343, - "grad_norm": 7.747722625732422, - "learning_rate": 0.00012999079040034673, - "loss": 4.937312316894531, - "step": 12930 - }, - { - "epoch": 14.721890122402504, - "grad_norm": 7.39769983291626, - "learning_rate": 0.00012993661628473915, - "loss": 5.141265869140625, - "step": 12940 - }, - { - "epoch": 14.733276401935667, - "grad_norm": 6.812027931213379, - "learning_rate": 0.00012988244216913158, - "loss": 5.5300537109375, - "step": 12950 - }, - { - "epoch": 14.74466268146883, - "grad_norm": 7.199182987213135, - "learning_rate": 0.00012982826805352403, - "loss": 4.976782608032226, - "step": 12960 - }, - { - "epoch": 14.756048961001992, - "grad_norm": 7.161105155944824, - "learning_rate": 0.00012977409393791645, - "loss": 4.928173828125, - "step": 12970 - }, - { - "epoch": 14.767435240535155, - "grad_norm": 8.261829376220703, - "learning_rate": 0.0001297199198223089, - "loss": 4.858361053466797, - "step": 12980 - }, - { - "epoch": 14.778821520068318, - "grad_norm": 6.941257953643799, - "learning_rate": 0.00012966574570670133, - "loss": 4.880601119995117, - "step": 12990 - }, - { - "epoch": 14.79020779960148, - "grad_norm": 7.993049144744873, - "learning_rate": 0.00012961157159109378, - "loss": 5.036775970458985, - "step": 13000 - }, - { - "epoch": 14.801594079134643, - "grad_norm": 8.901403427124023, - "learning_rate": 0.0001295573974754862, - "loss": 5.106240844726562, - "step": 13010 - }, - { - "epoch": 14.812980358667804, - "grad_norm": 8.168084144592285, - "learning_rate": 0.00012950322335987866, - "loss": 5.213864135742187, - "step": 13020 - }, - { - "epoch": 14.824366638200967, - "grad_norm": 7.145936965942383, - "learning_rate": 0.0001294490492442711, - "loss": 5.022368621826172, - "step": 13030 - }, - { - "epoch": 14.83575291773413, - "grad_norm": 8.161090850830078, - "learning_rate": 0.00012939487512866353, - "loss": 5.109722518920899, - "step": 13040 - }, - { - "epoch": 14.847139197267293, - "grad_norm": 7.26900053024292, - "learning_rate": 0.00012934070101305599, - "loss": 5.046876907348633, - "step": 13050 - }, - { - "epoch": 14.858525476800455, - "grad_norm": 7.036923408508301, - "learning_rate": 0.0001292865268974484, - "loss": 4.823116683959961, - "step": 13060 - }, - { - "epoch": 14.869911756333618, - "grad_norm": 7.964908599853516, - "learning_rate": 0.00012923235278184086, - "loss": 5.159635925292969, - "step": 13070 - }, - { - "epoch": 14.88129803586678, - "grad_norm": 7.919378757476807, - "learning_rate": 0.00012917817866623329, - "loss": 4.9494384765625, - "step": 13080 - }, - { - "epoch": 14.892684315399944, - "grad_norm": 7.289670944213867, - "learning_rate": 0.0001291240045506257, - "loss": 5.084150695800782, - "step": 13090 - }, - { - "epoch": 14.904070594933106, - "grad_norm": 7.601792335510254, - "learning_rate": 0.00012906983043501813, - "loss": 4.942096328735351, - "step": 13100 - }, - { - "epoch": 14.915456874466269, - "grad_norm": 8.224839210510254, - "learning_rate": 0.0001290156563194106, - "loss": 4.804985046386719, - "step": 13110 - }, - { - "epoch": 14.92684315399943, - "grad_norm": 7.499617099761963, - "learning_rate": 0.000128961482203803, - "loss": 5.03905029296875, - "step": 13120 - }, - { - "epoch": 14.938229433532593, - "grad_norm": 8.021928787231445, - "learning_rate": 0.00012890730808819546, - "loss": 5.192089080810547, - "step": 13130 - }, - { - "epoch": 14.949615713065755, - "grad_norm": 7.354033470153809, - "learning_rate": 0.00012885313397258791, - "loss": 5.053718948364258, - "step": 13140 - }, - { - "epoch": 14.961001992598918, - "grad_norm": 7.821739673614502, - "learning_rate": 0.00012879895985698034, - "loss": 5.054132080078125, - "step": 13150 - }, - { - "epoch": 14.972388272132081, - "grad_norm": 7.435361862182617, - "learning_rate": 0.0001287447857413728, - "loss": 5.183145904541016, - "step": 13160 - }, - { - "epoch": 14.983774551665244, - "grad_norm": 7.934272289276123, - "learning_rate": 0.00012869061162576521, - "loss": 4.926422882080078, - "step": 13170 - }, - { - "epoch": 14.995160831198406, - "grad_norm": 8.532721519470215, - "learning_rate": 0.00012863643751015767, - "loss": 5.203876113891601, - "step": 13180 - }, - { - "epoch": 15.005693139766581, - "grad_norm": 7.0397629737854, - "learning_rate": 0.0001285822633945501, - "loss": 4.161993408203125, - "step": 13190 - }, - { - "epoch": 15.017079419299744, - "grad_norm": 8.11047649383545, - "learning_rate": 0.00012852808927894254, - "loss": 4.52406120300293, - "step": 13200 - }, - { - "epoch": 15.028465698832907, - "grad_norm": 7.387153148651123, - "learning_rate": 0.00012847391516333497, - "loss": 4.291901397705078, - "step": 13210 - }, - { - "epoch": 15.03985197836607, - "grad_norm": 7.610694885253906, - "learning_rate": 0.0001284197410477274, - "loss": 4.618151473999023, - "step": 13220 - }, - { - "epoch": 15.051238257899232, - "grad_norm": 8.041857719421387, - "learning_rate": 0.00012836556693211984, - "loss": 4.415428543090821, - "step": 13230 - }, - { - "epoch": 15.062624537432393, - "grad_norm": 8.153491020202637, - "learning_rate": 0.00012831139281651227, - "loss": 4.578215026855469, - "step": 13240 - }, - { - "epoch": 15.074010816965556, - "grad_norm": 7.969481468200684, - "learning_rate": 0.00012825721870090472, - "loss": 4.715361785888672, - "step": 13250 - }, - { - "epoch": 15.085397096498719, - "grad_norm": 6.474404811859131, - "learning_rate": 0.00012820304458529714, - "loss": 4.324704360961914, - "step": 13260 - }, - { - "epoch": 15.096783376031881, - "grad_norm": 7.767828464508057, - "learning_rate": 0.0001281488704696896, - "loss": 5.102642440795899, - "step": 13270 - }, - { - "epoch": 15.108169655565044, - "grad_norm": 6.66921854019165, - "learning_rate": 0.00012809469635408202, - "loss": 4.494071960449219, - "step": 13280 - }, - { - "epoch": 15.119555935098207, - "grad_norm": 7.946902275085449, - "learning_rate": 0.00012804052223847447, - "loss": 4.48153076171875, - "step": 13290 - }, - { - "epoch": 15.13094221463137, - "grad_norm": 7.2356486320495605, - "learning_rate": 0.0001279863481228669, - "loss": 4.334786224365234, - "step": 13300 - }, - { - "epoch": 15.142328494164532, - "grad_norm": 8.00400161743164, - "learning_rate": 0.00012793217400725935, - "loss": 4.645303344726562, - "step": 13310 - }, - { - "epoch": 15.153714773697695, - "grad_norm": 7.499370098114014, - "learning_rate": 0.00012787799989165177, - "loss": 4.6021240234375, - "step": 13320 - }, - { - "epoch": 15.165101053230856, - "grad_norm": 7.6751580238342285, - "learning_rate": 0.00012782382577604422, - "loss": 4.559888076782227, - "step": 13330 - }, - { - "epoch": 15.176487332764019, - "grad_norm": 7.785442352294922, - "learning_rate": 0.00012776965166043665, - "loss": 4.634941864013672, - "step": 13340 - }, - { - "epoch": 15.187873612297182, - "grad_norm": 7.800275802612305, - "learning_rate": 0.0001277154775448291, - "loss": 4.987696838378906, - "step": 13350 - }, - { - "epoch": 15.199259891830344, - "grad_norm": 7.645476341247559, - "learning_rate": 0.00012766130342922152, - "loss": 4.864948272705078, - "step": 13360 - }, - { - "epoch": 15.210646171363507, - "grad_norm": 7.993826866149902, - "learning_rate": 0.00012760712931361395, - "loss": 4.6918384552001955, - "step": 13370 - }, - { - "epoch": 15.22203245089667, - "grad_norm": 7.791032791137695, - "learning_rate": 0.0001275529551980064, - "loss": 4.629275131225586, - "step": 13380 - }, - { - "epoch": 15.233418730429833, - "grad_norm": 7.296651363372803, - "learning_rate": 0.00012749878108239882, - "loss": 4.688332748413086, - "step": 13390 - }, - { - "epoch": 15.244805009962995, - "grad_norm": 6.546830654144287, - "learning_rate": 0.00012744460696679128, - "loss": 4.458778381347656, - "step": 13400 - }, - { - "epoch": 15.256191289496158, - "grad_norm": 6.901986598968506, - "learning_rate": 0.0001273904328511837, - "loss": 4.439383316040039, - "step": 13410 - }, - { - "epoch": 15.267577569029319, - "grad_norm": 7.9615559577941895, - "learning_rate": 0.00012733625873557615, - "loss": 4.456288528442383, - "step": 13420 - }, - { - "epoch": 15.278963848562482, - "grad_norm": 7.0012593269348145, - "learning_rate": 0.00012728208461996858, - "loss": 4.7562213897705075, - "step": 13430 - }, - { - "epoch": 15.290350128095644, - "grad_norm": 8.112042427062988, - "learning_rate": 0.00012722791050436103, - "loss": 4.588046646118164, - "step": 13440 - }, - { - "epoch": 15.301736407628807, - "grad_norm": 8.545401573181152, - "learning_rate": 0.00012717373638875345, - "loss": 4.876891708374023, - "step": 13450 - }, - { - "epoch": 15.31312268716197, - "grad_norm": 7.401144981384277, - "learning_rate": 0.0001271195622731459, - "loss": 4.7802989959716795, - "step": 13460 - }, - { - "epoch": 15.324508966695133, - "grad_norm": 9.017389297485352, - "learning_rate": 0.00012706538815753836, - "loss": 4.951491165161133, - "step": 13470 - }, - { - "epoch": 15.335895246228295, - "grad_norm": 7.437997817993164, - "learning_rate": 0.00012701121404193078, - "loss": 4.486794662475586, - "step": 13480 - }, - { - "epoch": 15.347281525761458, - "grad_norm": 7.787178039550781, - "learning_rate": 0.00012695703992632323, - "loss": 4.632095718383789, - "step": 13490 - }, - { - "epoch": 15.35866780529462, - "grad_norm": 7.672351360321045, - "learning_rate": 0.00012690286581071566, - "loss": 4.595958709716797, - "step": 13500 - }, - { - "epoch": 15.370054084827782, - "grad_norm": 9.20431900024414, - "learning_rate": 0.00012684869169510808, - "loss": 4.731208038330078, - "step": 13510 - }, - { - "epoch": 15.381440364360945, - "grad_norm": 7.591139793395996, - "learning_rate": 0.0001267945175795005, - "loss": 4.76176643371582, - "step": 13520 - }, - { - "epoch": 15.392826643894107, - "grad_norm": 7.419577121734619, - "learning_rate": 0.00012674034346389296, - "loss": 4.38856201171875, - "step": 13530 - }, - { - "epoch": 15.40421292342727, - "grad_norm": 7.263993740081787, - "learning_rate": 0.00012668616934828538, - "loss": 4.695225524902344, - "step": 13540 - }, - { - "epoch": 15.415599202960433, - "grad_norm": 7.4275126457214355, - "learning_rate": 0.00012663199523267783, - "loss": 4.705525970458984, - "step": 13550 - }, - { - "epoch": 15.426985482493595, - "grad_norm": 8.560776710510254, - "learning_rate": 0.00012657782111707026, - "loss": 4.849109268188476, - "step": 13560 - }, - { - "epoch": 15.438371762026758, - "grad_norm": 8.176137924194336, - "learning_rate": 0.0001265236470014627, - "loss": 4.634634780883789, - "step": 13570 - }, - { - "epoch": 15.449758041559921, - "grad_norm": 7.90976095199585, - "learning_rate": 0.00012646947288585513, - "loss": 4.362587356567383, - "step": 13580 - }, - { - "epoch": 15.461144321093084, - "grad_norm": 8.50546646118164, - "learning_rate": 0.00012641529877024758, - "loss": 4.5653434753417965, - "step": 13590 - }, - { - "epoch": 15.472530600626245, - "grad_norm": 7.390838146209717, - "learning_rate": 0.00012636112465464004, - "loss": 4.620360565185547, - "step": 13600 - }, - { - "epoch": 15.483916880159407, - "grad_norm": 7.908941745758057, - "learning_rate": 0.00012630695053903246, - "loss": 4.444548416137695, - "step": 13610 - }, - { - "epoch": 15.49530315969257, - "grad_norm": 7.579942226409912, - "learning_rate": 0.0001262527764234249, - "loss": 4.685836410522461, - "step": 13620 - }, - { - "epoch": 15.506689439225733, - "grad_norm": 7.692448616027832, - "learning_rate": 0.00012619860230781734, - "loss": 4.817267227172851, - "step": 13630 - }, - { - "epoch": 15.518075718758896, - "grad_norm": 7.596404552459717, - "learning_rate": 0.00012614442819220976, - "loss": 4.794780349731445, - "step": 13640 - }, - { - "epoch": 15.529461998292058, - "grad_norm": 8.101449966430664, - "learning_rate": 0.00012609025407660219, - "loss": 4.803411483764648, - "step": 13650 - }, - { - "epoch": 15.540848277825221, - "grad_norm": 7.359433650970459, - "learning_rate": 0.00012603607996099464, - "loss": 4.751076889038086, - "step": 13660 - }, - { - "epoch": 15.552234557358384, - "grad_norm": 6.875908374786377, - "learning_rate": 0.00012598190584538706, - "loss": 4.687049484252929, - "step": 13670 - }, - { - "epoch": 15.563620836891547, - "grad_norm": 7.818774700164795, - "learning_rate": 0.0001259277317297795, - "loss": 4.953191757202148, - "step": 13680 - }, - { - "epoch": 15.575007116424707, - "grad_norm": 7.3973917961120605, - "learning_rate": 0.00012587355761417194, - "loss": 4.424762725830078, - "step": 13690 - }, - { - "epoch": 15.58639339595787, - "grad_norm": 8.235298156738281, - "learning_rate": 0.0001258193834985644, - "loss": 5.169184875488281, - "step": 13700 - }, - { - "epoch": 15.597779675491033, - "grad_norm": 7.2172532081604, - "learning_rate": 0.00012576520938295684, - "loss": 4.731855392456055, - "step": 13710 - }, - { - "epoch": 15.609165955024196, - "grad_norm": 7.717671871185303, - "learning_rate": 0.00012571103526734927, - "loss": 4.4836174011230465, - "step": 13720 - }, - { - "epoch": 15.620552234557358, - "grad_norm": 8.239593505859375, - "learning_rate": 0.00012565686115174172, - "loss": 4.932541275024414, - "step": 13730 - }, - { - "epoch": 15.631938514090521, - "grad_norm": 7.339993476867676, - "learning_rate": 0.00012560268703613414, - "loss": 4.306424331665039, - "step": 13740 - }, - { - "epoch": 15.643324793623684, - "grad_norm": 7.40126371383667, - "learning_rate": 0.0001255485129205266, - "loss": 4.77293815612793, - "step": 13750 - }, - { - "epoch": 15.654711073156847, - "grad_norm": 6.878884792327881, - "learning_rate": 0.00012549433880491902, - "loss": 4.673749923706055, - "step": 13760 - }, - { - "epoch": 15.66609735269001, - "grad_norm": 8.765450477600098, - "learning_rate": 0.00012544016468931147, - "loss": 4.610214614868164, - "step": 13770 - }, - { - "epoch": 15.67748363222317, - "grad_norm": 8.095568656921387, - "learning_rate": 0.0001253859905737039, - "loss": 4.4134258270263675, - "step": 13780 - }, - { - "epoch": 15.688869911756333, - "grad_norm": 8.046052932739258, - "learning_rate": 0.00012533181645809632, - "loss": 4.598631286621094, - "step": 13790 - }, - { - "epoch": 15.700256191289496, - "grad_norm": 7.999307155609131, - "learning_rate": 0.00012527764234248874, - "loss": 4.934955596923828, - "step": 13800 - }, - { - "epoch": 15.711642470822659, - "grad_norm": 6.909728050231934, - "learning_rate": 0.0001252234682268812, - "loss": 4.704043960571289, - "step": 13810 - }, - { - "epoch": 15.723028750355821, - "grad_norm": 7.83714485168457, - "learning_rate": 0.00012516929411127365, - "loss": 4.726215362548828, - "step": 13820 - }, - { - "epoch": 15.734415029888984, - "grad_norm": 7.958460330963135, - "learning_rate": 0.00012511511999566607, - "loss": 4.580567932128906, - "step": 13830 - }, - { - "epoch": 15.745801309422147, - "grad_norm": 8.28144359588623, - "learning_rate": 0.00012506094588005852, - "loss": 4.877122116088867, - "step": 13840 - }, - { - "epoch": 15.75718758895531, - "grad_norm": 7.191257953643799, - "learning_rate": 0.00012500677176445095, - "loss": 4.728401184082031, - "step": 13850 - }, - { - "epoch": 15.768573868488472, - "grad_norm": 8.373869895935059, - "learning_rate": 0.0001249525976488434, - "loss": 4.692030715942383, - "step": 13860 - }, - { - "epoch": 15.779960148021633, - "grad_norm": 7.541042804718018, - "learning_rate": 0.00012489842353323582, - "loss": 4.673214721679687, - "step": 13870 - }, - { - "epoch": 15.791346427554796, - "grad_norm": 7.176820278167725, - "learning_rate": 0.00012484424941762827, - "loss": 4.742586135864258, - "step": 13880 - }, - { - "epoch": 15.802732707087959, - "grad_norm": 7.285589694976807, - "learning_rate": 0.0001247900753020207, - "loss": 4.660622406005859, - "step": 13890 - }, - { - "epoch": 15.814118986621121, - "grad_norm": 7.229033946990967, - "learning_rate": 0.0001247413185979739, - "loss": 4.864823532104492, - "step": 13900 - }, - { - "epoch": 15.825505266154284, - "grad_norm": 8.045304298400879, - "learning_rate": 0.00012468714448236635, - "loss": 4.963446807861328, - "step": 13910 - }, - { - "epoch": 15.836891545687447, - "grad_norm": 8.248867988586426, - "learning_rate": 0.00012463297036675877, - "loss": 4.45933837890625, - "step": 13920 - }, - { - "epoch": 15.84827782522061, - "grad_norm": 7.996612071990967, - "learning_rate": 0.00012457879625115122, - "loss": 4.713712310791015, - "step": 13930 - }, - { - "epoch": 15.859664104753772, - "grad_norm": 7.0257568359375, - "learning_rate": 0.00012452462213554365, - "loss": 4.95684814453125, - "step": 13940 - }, - { - "epoch": 15.871050384286935, - "grad_norm": 7.490429401397705, - "learning_rate": 0.00012447044801993607, - "loss": 5.097381210327148, - "step": 13950 - }, - { - "epoch": 15.882436663820096, - "grad_norm": 8.141972541809082, - "learning_rate": 0.00012441627390432852, - "loss": 4.907619476318359, - "step": 13960 - }, - { - "epoch": 15.893822943353259, - "grad_norm": 7.669741153717041, - "learning_rate": 0.00012436209978872095, - "loss": 4.52638053894043, - "step": 13970 - }, - { - "epoch": 15.905209222886421, - "grad_norm": 7.623498439788818, - "learning_rate": 0.0001243079256731134, - "loss": 4.6605175018310545, - "step": 13980 - }, - { - "epoch": 15.916595502419584, - "grad_norm": 7.301693916320801, - "learning_rate": 0.00012425375155750582, - "loss": 4.762905883789062, - "step": 13990 - }, - { - "epoch": 15.927981781952747, - "grad_norm": 7.472468376159668, - "learning_rate": 0.00012419957744189827, - "loss": 4.756846237182617, - "step": 14000 - }, - { - "epoch": 15.93936806148591, - "grad_norm": 7.766934871673584, - "learning_rate": 0.0001241454033262907, - "loss": 4.748290252685547, - "step": 14010 - }, - { - "epoch": 15.950754341019072, - "grad_norm": 6.7247796058654785, - "learning_rate": 0.00012409122921068315, - "loss": 4.70723876953125, - "step": 14020 - }, - { - "epoch": 15.962140620552235, - "grad_norm": 8.007941246032715, - "learning_rate": 0.00012403705509507557, - "loss": 4.603389739990234, - "step": 14030 - }, - { - "epoch": 15.973526900085398, - "grad_norm": 7.72274923324585, - "learning_rate": 0.00012398288097946803, - "loss": 4.74395751953125, - "step": 14040 - }, - { - "epoch": 15.984913179618559, - "grad_norm": 7.55423641204834, - "learning_rate": 0.00012392870686386045, - "loss": 4.812894439697265, - "step": 14050 - }, - { - "epoch": 15.996299459151722, - "grad_norm": 7.276206016540527, - "learning_rate": 0.0001238745327482529, - "loss": 4.808458709716797, - "step": 14060 - }, - { - "epoch": 16.0068317677199, - "grad_norm": 7.715972900390625, - "learning_rate": 0.00012382035863264533, - "loss": 4.121777725219727, - "step": 14070 - }, - { - "epoch": 16.01821804725306, - "grad_norm": 7.861795425415039, - "learning_rate": 0.00012376618451703775, - "loss": 4.087036895751953, - "step": 14080 - }, - { - "epoch": 16.029604326786224, - "grad_norm": 7.377951622009277, - "learning_rate": 0.0001237120104014302, - "loss": 4.159128570556641, - "step": 14090 - }, - { - "epoch": 16.040990606319387, - "grad_norm": 6.9755730628967285, - "learning_rate": 0.00012365783628582263, - "loss": 4.118164443969727, - "step": 14100 - }, - { - "epoch": 16.05237688585255, - "grad_norm": 7.6896467208862305, - "learning_rate": 0.00012360366217021508, - "loss": 4.3465576171875, - "step": 14110 - }, - { - "epoch": 16.06376316538571, - "grad_norm": 7.475165367126465, - "learning_rate": 0.0001235494880546075, - "loss": 4.084910202026367, - "step": 14120 - }, - { - "epoch": 16.07514944491887, - "grad_norm": 7.571929931640625, - "learning_rate": 0.00012349531393899995, - "loss": 4.203293228149414, - "step": 14130 - }, - { - "epoch": 16.086535724452034, - "grad_norm": 7.941225051879883, - "learning_rate": 0.00012344113982339238, - "loss": 4.56109733581543, - "step": 14140 - }, - { - "epoch": 16.097922003985197, - "grad_norm": 7.033837795257568, - "learning_rate": 0.00012338696570778483, - "loss": 4.267443466186523, - "step": 14150 - }, - { - "epoch": 16.10930828351836, - "grad_norm": 7.488673210144043, - "learning_rate": 0.00012333279159217726, - "loss": 4.451668167114258, - "step": 14160 - }, - { - "epoch": 16.120694563051522, - "grad_norm": 7.623510837554932, - "learning_rate": 0.0001232786174765697, - "loss": 4.3589630126953125, - "step": 14170 - }, - { - "epoch": 16.132080842584685, - "grad_norm": 7.737654685974121, - "learning_rate": 0.00012322444336096213, - "loss": 4.383876037597656, - "step": 14180 - }, - { - "epoch": 16.143467122117848, - "grad_norm": 8.437525749206543, - "learning_rate": 0.00012317026924535458, - "loss": 4.350270843505859, - "step": 14190 - }, - { - "epoch": 16.15485340165101, - "grad_norm": 7.971991539001465, - "learning_rate": 0.00012311609512974703, - "loss": 4.457265472412109, - "step": 14200 - }, - { - "epoch": 16.166239681184173, - "grad_norm": 7.322634220123291, - "learning_rate": 0.00012306192101413946, - "loss": 4.124166488647461, - "step": 14210 - }, - { - "epoch": 16.177625960717336, - "grad_norm": 6.804745674133301, - "learning_rate": 0.00012300774689853188, - "loss": 4.338391876220703, - "step": 14220 - }, - { - "epoch": 16.1890122402505, - "grad_norm": 7.241665840148926, - "learning_rate": 0.0001229535727829243, - "loss": 3.9033172607421873, - "step": 14230 - }, - { - "epoch": 16.20039851978366, - "grad_norm": 7.835842609405518, - "learning_rate": 0.00012289939866731676, - "loss": 4.350611877441406, - "step": 14240 - }, - { - "epoch": 16.211784799316824, - "grad_norm": 7.062921047210693, - "learning_rate": 0.00012284522455170918, - "loss": 4.4512939453125, - "step": 14250 - }, - { - "epoch": 16.223171078849987, - "grad_norm": 7.936232566833496, - "learning_rate": 0.00012279105043610164, - "loss": 4.246225738525391, - "step": 14260 - }, - { - "epoch": 16.23455735838315, - "grad_norm": 8.720023155212402, - "learning_rate": 0.00012273687632049406, - "loss": 4.419322967529297, - "step": 14270 - }, - { - "epoch": 16.245943637916312, - "grad_norm": 7.396350383758545, - "learning_rate": 0.0001226827022048865, - "loss": 4.686096954345703, - "step": 14280 - }, - { - "epoch": 16.257329917449475, - "grad_norm": 7.3385515213012695, - "learning_rate": 0.00012262852808927894, - "loss": 4.519886016845703, - "step": 14290 - }, - { - "epoch": 16.268716196982638, - "grad_norm": 7.164921283721924, - "learning_rate": 0.0001225743539736714, - "loss": 4.170539474487304, - "step": 14300 - }, - { - "epoch": 16.280102476515797, - "grad_norm": 6.846629619598389, - "learning_rate": 0.00012252017985806384, - "loss": 4.29273796081543, - "step": 14310 - }, - { - "epoch": 16.29148875604896, - "grad_norm": 8.011646270751953, - "learning_rate": 0.00012246600574245626, - "loss": 4.424296951293945, - "step": 14320 - }, - { - "epoch": 16.302875035582122, - "grad_norm": 7.801056861877441, - "learning_rate": 0.00012241183162684872, - "loss": 4.3867851257324215, - "step": 14330 - }, - { - "epoch": 16.314261315115285, - "grad_norm": 7.563156604766846, - "learning_rate": 0.00012235765751124114, - "loss": 4.142354583740234, - "step": 14340 - }, - { - "epoch": 16.325647594648448, - "grad_norm": 7.874721527099609, - "learning_rate": 0.0001223034833956336, - "loss": 4.672331619262695, - "step": 14350 - }, - { - "epoch": 16.33703387418161, - "grad_norm": 8.173314094543457, - "learning_rate": 0.00012224930928002602, - "loss": 4.456505584716797, - "step": 14360 - }, - { - "epoch": 16.348420153714773, - "grad_norm": 7.8878092765808105, - "learning_rate": 0.00012219513516441844, - "loss": 4.445765686035156, - "step": 14370 - }, - { - "epoch": 16.359806433247936, - "grad_norm": 8.385254859924316, - "learning_rate": 0.00012214096104881086, - "loss": 4.569265747070313, - "step": 14380 - }, - { - "epoch": 16.3711927127811, - "grad_norm": 8.698989868164062, - "learning_rate": 0.00012208678693320332, - "loss": 4.381620788574219, - "step": 14390 - }, - { - "epoch": 16.38257899231426, - "grad_norm": 6.675107479095459, - "learning_rate": 0.00012203261281759575, - "loss": 4.386390686035156, - "step": 14400 - }, - { - "epoch": 16.393965271847424, - "grad_norm": 7.738358020782471, - "learning_rate": 0.00012197843870198819, - "loss": 3.9419265747070313, - "step": 14410 - }, - { - "epoch": 16.405351551380587, - "grad_norm": 7.995990753173828, - "learning_rate": 0.00012192426458638064, - "loss": 4.322775268554688, - "step": 14420 - }, - { - "epoch": 16.41673783091375, - "grad_norm": 7.4500813484191895, - "learning_rate": 0.00012187009047077307, - "loss": 4.260313415527344, - "step": 14430 - }, - { - "epoch": 16.428124110446912, - "grad_norm": 8.59277057647705, - "learning_rate": 0.00012181591635516552, - "loss": 4.660084533691406, - "step": 14440 - }, - { - "epoch": 16.439510389980075, - "grad_norm": 7.587380409240723, - "learning_rate": 0.00012176174223955794, - "loss": 4.50414924621582, - "step": 14450 - }, - { - "epoch": 16.450896669513238, - "grad_norm": 7.656953811645508, - "learning_rate": 0.00012170756812395038, - "loss": 4.5958251953125, - "step": 14460 - }, - { - "epoch": 16.4622829490464, - "grad_norm": 7.589241981506348, - "learning_rate": 0.00012165339400834281, - "loss": 4.327259063720703, - "step": 14470 - }, - { - "epoch": 16.47366922857956, - "grad_norm": 7.885225296020508, - "learning_rate": 0.00012159921989273526, - "loss": 4.439897537231445, - "step": 14480 - }, - { - "epoch": 16.485055508112723, - "grad_norm": 7.397141456604004, - "learning_rate": 0.00012154504577712768, - "loss": 4.272943115234375, - "step": 14490 - }, - { - "epoch": 16.496441787645885, - "grad_norm": 6.983608245849609, - "learning_rate": 0.00012149087166152013, - "loss": 4.209513092041016, - "step": 14500 - }, - { - "epoch": 16.507828067179048, - "grad_norm": 8.37636947631836, - "learning_rate": 0.00012143669754591256, - "loss": 4.410722732543945, - "step": 14510 - }, - { - "epoch": 16.51921434671221, - "grad_norm": 8.623056411743164, - "learning_rate": 0.00012138252343030501, - "loss": 4.736907577514648, - "step": 14520 - }, - { - "epoch": 16.530600626245374, - "grad_norm": 7.334944725036621, - "learning_rate": 0.00012132834931469745, - "loss": 4.085528945922851, - "step": 14530 - }, - { - "epoch": 16.541986905778536, - "grad_norm": 7.641763210296631, - "learning_rate": 0.00012127417519908987, - "loss": 4.196076202392578, - "step": 14540 - }, - { - "epoch": 16.5533731853117, - "grad_norm": 7.118811130523682, - "learning_rate": 0.00012122000108348232, - "loss": 4.219401931762695, - "step": 14550 - }, - { - "epoch": 16.56475946484486, - "grad_norm": 7.666287422180176, - "learning_rate": 0.00012116582696787475, - "loss": 4.500454330444336, - "step": 14560 - }, - { - "epoch": 16.576145744378024, - "grad_norm": 8.155136108398438, - "learning_rate": 0.0001211116528522672, - "loss": 4.450511932373047, - "step": 14570 - }, - { - "epoch": 16.587532023911187, - "grad_norm": 7.149616241455078, - "learning_rate": 0.00012105747873665963, - "loss": 4.229332733154297, - "step": 14580 - }, - { - "epoch": 16.59891830344435, - "grad_norm": 8.195096969604492, - "learning_rate": 0.00012100330462105208, - "loss": 4.491680908203125, - "step": 14590 - }, - { - "epoch": 16.610304582977513, - "grad_norm": 6.949581146240234, - "learning_rate": 0.0001209491305054445, - "loss": 4.443110656738281, - "step": 14600 - }, - { - "epoch": 16.621690862510675, - "grad_norm": 7.297284126281738, - "learning_rate": 0.00012089495638983694, - "loss": 4.731317520141602, - "step": 14610 - }, - { - "epoch": 16.633077142043838, - "grad_norm": 6.88108491897583, - "learning_rate": 0.00012084078227422936, - "loss": 4.465324783325196, - "step": 14620 - }, - { - "epoch": 16.644463421577, - "grad_norm": 8.48362922668457, - "learning_rate": 0.00012078660815862182, - "loss": 4.320807266235351, - "step": 14630 - }, - { - "epoch": 16.655849701110164, - "grad_norm": 8.934884071350098, - "learning_rate": 0.00012073243404301427, - "loss": 4.569874954223633, - "step": 14640 - }, - { - "epoch": 16.667235980643326, - "grad_norm": 7.942513942718506, - "learning_rate": 0.00012067825992740669, - "loss": 4.774484634399414, - "step": 14650 - }, - { - "epoch": 16.67862226017649, - "grad_norm": 7.406705379486084, - "learning_rate": 0.00012062408581179914, - "loss": 4.290068817138672, - "step": 14660 - }, - { - "epoch": 16.69000853970965, - "grad_norm": 7.856991291046143, - "learning_rate": 0.00012056991169619157, - "loss": 4.307246398925781, - "step": 14670 - }, - { - "epoch": 16.70139481924281, - "grad_norm": 8.263503074645996, - "learning_rate": 0.000120515737580584, - "loss": 4.2724559783935545, - "step": 14680 - }, - { - "epoch": 16.712781098775974, - "grad_norm": 8.12948226928711, - "learning_rate": 0.00012046156346497643, - "loss": 4.421898651123047, - "step": 14690 - }, - { - "epoch": 16.724167378309136, - "grad_norm": 8.351273536682129, - "learning_rate": 0.00012040738934936888, - "loss": 4.521099090576172, - "step": 14700 - }, - { - "epoch": 16.7355536578423, - "grad_norm": 7.658185958862305, - "learning_rate": 0.0001203532152337613, - "loss": 4.358420562744141, - "step": 14710 - }, - { - "epoch": 16.746939937375462, - "grad_norm": 7.626881122589111, - "learning_rate": 0.00012029904111815376, - "loss": 4.407142257690429, - "step": 14720 - }, - { - "epoch": 16.758326216908625, - "grad_norm": 7.826634407043457, - "learning_rate": 0.00012024486700254618, - "loss": 4.7284095764160154, - "step": 14730 - }, - { - "epoch": 16.769712496441787, - "grad_norm": 7.835376262664795, - "learning_rate": 0.00012019069288693863, - "loss": 4.649514007568359, - "step": 14740 - }, - { - "epoch": 16.78109877597495, - "grad_norm": 7.345791816711426, - "learning_rate": 0.00012013651877133107, - "loss": 4.5487213134765625, - "step": 14750 - }, - { - "epoch": 16.792485055508113, - "grad_norm": 7.653363227844238, - "learning_rate": 0.0001200823446557235, - "loss": 4.464413833618164, - "step": 14760 - }, - { - "epoch": 16.803871335041276, - "grad_norm": 7.4951043128967285, - "learning_rate": 0.00012002817054011595, - "loss": 4.552109146118164, - "step": 14770 - }, - { - "epoch": 16.81525761457444, - "grad_norm": 6.739938259124756, - "learning_rate": 0.00011997399642450837, - "loss": 4.435204315185547, - "step": 14780 - }, - { - "epoch": 16.8266438941076, - "grad_norm": 7.946920871734619, - "learning_rate": 0.00011991982230890082, - "loss": 4.394256591796875, - "step": 14790 - }, - { - "epoch": 16.838030173640764, - "grad_norm": 7.2037224769592285, - "learning_rate": 0.00011986564819329325, - "loss": 4.656013107299804, - "step": 14800 - }, - { - "epoch": 16.849416453173927, - "grad_norm": 7.150942325592041, - "learning_rate": 0.00011981147407768569, - "loss": 4.620296478271484, - "step": 14810 - }, - { - "epoch": 16.86080273270709, - "grad_norm": 8.865056037902832, - "learning_rate": 0.00011975729996207812, - "loss": 4.58060302734375, - "step": 14820 - }, - { - "epoch": 16.872189012240252, - "grad_norm": 7.587070941925049, - "learning_rate": 0.00011970312584647056, - "loss": 4.879767990112304, - "step": 14830 - }, - { - "epoch": 16.88357529177341, - "grad_norm": 8.456056594848633, - "learning_rate": 0.00011964895173086299, - "loss": 4.5918418884277346, - "step": 14840 - }, - { - "epoch": 16.894961571306574, - "grad_norm": 7.7380900382995605, - "learning_rate": 0.00011959477761525544, - "loss": 4.7449501037597654, - "step": 14850 - }, - { - "epoch": 16.906347850839737, - "grad_norm": 7.467341423034668, - "learning_rate": 0.00011954060349964786, - "loss": 4.479496765136719, - "step": 14860 - }, - { - "epoch": 16.9177341303729, - "grad_norm": 8.6979398727417, - "learning_rate": 0.00011948642938404031, - "loss": 4.7719371795654295, - "step": 14870 - }, - { - "epoch": 16.929120409906062, - "grad_norm": 7.696811676025391, - "learning_rate": 0.00011943225526843275, - "loss": 4.6034080505371096, - "step": 14880 - }, - { - "epoch": 16.940506689439225, - "grad_norm": 6.743338584899902, - "learning_rate": 0.00011937808115282518, - "loss": 4.397494888305664, - "step": 14890 - }, - { - "epoch": 16.951892968972388, - "grad_norm": 6.509140491485596, - "learning_rate": 0.00011932390703721763, - "loss": 4.695284271240235, - "step": 14900 - }, - { - "epoch": 16.96327924850555, - "grad_norm": 7.908071994781494, - "learning_rate": 0.00011926973292161005, - "loss": 4.142197799682617, - "step": 14910 - }, - { - "epoch": 16.974665528038713, - "grad_norm": 8.120683670043945, - "learning_rate": 0.0001192155588060025, - "loss": 4.365460968017578, - "step": 14920 - }, - { - "epoch": 16.986051807571876, - "grad_norm": 7.481232643127441, - "learning_rate": 0.00011916138469039493, - "loss": 4.682232284545899, - "step": 14930 - }, - { - "epoch": 16.99743808710504, - "grad_norm": 7.541874408721924, - "learning_rate": 0.00011910721057478738, - "loss": 4.875133895874024, - "step": 14940 - }, - { - "epoch": 17.007970395673215, - "grad_norm": 6.461215019226074, - "learning_rate": 0.0001190530364591798, - "loss": 3.5713672637939453, - "step": 14950 - }, - { - "epoch": 17.019356675206378, - "grad_norm": 8.02785873413086, - "learning_rate": 0.00011899886234357224, - "loss": 3.8977893829345702, - "step": 14960 - }, - { - "epoch": 17.030742954739537, - "grad_norm": 7.833250522613525, - "learning_rate": 0.00011894468822796467, - "loss": 4.051911926269531, - "step": 14970 - }, - { - "epoch": 17.0421292342727, - "grad_norm": 7.147907257080078, - "learning_rate": 0.00011889051411235712, - "loss": 4.067853546142578, - "step": 14980 - }, - { - "epoch": 17.053515513805863, - "grad_norm": 8.514575958251953, - "learning_rate": 0.00011883633999674957, - "loss": 4.16202392578125, - "step": 14990 - }, - { - "epoch": 17.064901793339025, - "grad_norm": 6.706639766693115, - "learning_rate": 0.000118782165881142, - "loss": 3.879001998901367, - "step": 15000 - }, - { - "epoch": 17.076288072872188, - "grad_norm": 7.3274827003479, - "learning_rate": 0.00011872799176553445, - "loss": 3.8930648803710937, - "step": 15010 - }, - { - "epoch": 17.08767435240535, - "grad_norm": 8.865731239318848, - "learning_rate": 0.00011867381764992687, - "loss": 4.000261688232422, - "step": 15020 - }, - { - "epoch": 17.099060631938514, - "grad_norm": 8.527647018432617, - "learning_rate": 0.00011861964353431931, - "loss": 4.380818557739258, - "step": 15030 - }, - { - "epoch": 17.110446911471676, - "grad_norm": 6.795366287231445, - "learning_rate": 0.00011856546941871173, - "loss": 4.000965118408203, - "step": 15040 - }, - { - "epoch": 17.12183319100484, - "grad_norm": 8.024996757507324, - "learning_rate": 0.00011851129530310419, - "loss": 3.8776901245117186, - "step": 15050 - }, - { - "epoch": 17.133219470538002, - "grad_norm": 7.76874303817749, - "learning_rate": 0.00011845712118749661, - "loss": 3.907266616821289, - "step": 15060 - }, - { - "epoch": 17.144605750071165, - "grad_norm": 7.170513153076172, - "learning_rate": 0.00011840294707188906, - "loss": 4.122880172729492, - "step": 15070 - }, - { - "epoch": 17.155992029604327, - "grad_norm": 7.131034851074219, - "learning_rate": 0.00011834877295628149, - "loss": 4.134088134765625, - "step": 15080 - }, - { - "epoch": 17.16737830913749, - "grad_norm": 9.287558555603027, - "learning_rate": 0.00011829459884067394, - "loss": 4.137316131591797, - "step": 15090 - }, - { - "epoch": 17.178764588670653, - "grad_norm": 6.616999626159668, - "learning_rate": 0.00011824042472506638, - "loss": 4.016845321655273, - "step": 15100 - }, - { - "epoch": 17.190150868203816, - "grad_norm": 7.829199314117432, - "learning_rate": 0.0001181862506094588, - "loss": 4.34521369934082, - "step": 15110 - }, - { - "epoch": 17.20153714773698, - "grad_norm": 8.117554664611816, - "learning_rate": 0.00011813207649385125, - "loss": 3.9818496704101562, - "step": 15120 - }, - { - "epoch": 17.21292342727014, - "grad_norm": 8.301651954650879, - "learning_rate": 0.00011807790237824368, - "loss": 4.036884307861328, - "step": 15130 - }, - { - "epoch": 17.224309706803304, - "grad_norm": 8.472175598144531, - "learning_rate": 0.00011802372826263613, - "loss": 4.04235725402832, - "step": 15140 - }, - { - "epoch": 17.235695986336463, - "grad_norm": 8.00710391998291, - "learning_rate": 0.00011796955414702855, - "loss": 4.428823089599609, - "step": 15150 - }, - { - "epoch": 17.247082265869626, - "grad_norm": 7.678086757659912, - "learning_rate": 0.00011791538003142099, - "loss": 4.123222351074219, - "step": 15160 - }, - { - "epoch": 17.25846854540279, - "grad_norm": 7.292410850524902, - "learning_rate": 0.00011786120591581343, - "loss": 4.134667205810547, - "step": 15170 - }, - { - "epoch": 17.26985482493595, - "grad_norm": 7.336828231811523, - "learning_rate": 0.00011780703180020587, - "loss": 4.142428970336914, - "step": 15180 - }, - { - "epoch": 17.281241104469114, - "grad_norm": 6.939968109130859, - "learning_rate": 0.00011775285768459829, - "loss": 4.088996505737304, - "step": 15190 - }, - { - "epoch": 17.292627384002277, - "grad_norm": 6.900147438049316, - "learning_rate": 0.00011769868356899074, - "loss": 4.092143630981445, - "step": 15200 - }, - { - "epoch": 17.30401366353544, - "grad_norm": 8.642024993896484, - "learning_rate": 0.0001176445094533832, - "loss": 4.337299728393555, - "step": 15210 - }, - { - "epoch": 17.315399943068602, - "grad_norm": 7.520287036895752, - "learning_rate": 0.00011759033533777562, - "loss": 3.809254837036133, - "step": 15220 - }, - { - "epoch": 17.326786222601765, - "grad_norm": 8.518157958984375, - "learning_rate": 0.00011753616122216806, - "loss": 4.095516967773437, - "step": 15230 - }, - { - "epoch": 17.338172502134928, - "grad_norm": 8.106553077697754, - "learning_rate": 0.00011748198710656048, - "loss": 4.244467163085938, - "step": 15240 - }, - { - "epoch": 17.34955878166809, - "grad_norm": 7.119726181030273, - "learning_rate": 0.00011742781299095293, - "loss": 4.04039192199707, - "step": 15250 - }, - { - "epoch": 17.360945061201253, - "grad_norm": 8.118093490600586, - "learning_rate": 0.00011737363887534536, - "loss": 4.112255859375, - "step": 15260 - }, - { - "epoch": 17.372331340734416, - "grad_norm": 7.7591729164123535, - "learning_rate": 0.00011731946475973781, - "loss": 3.9870590209960937, - "step": 15270 - }, - { - "epoch": 17.38371762026758, - "grad_norm": 6.926348686218262, - "learning_rate": 0.00011726529064413023, - "loss": 4.05236701965332, - "step": 15280 - }, - { - "epoch": 17.39510389980074, - "grad_norm": 7.686641216278076, - "learning_rate": 0.00011721111652852268, - "loss": 3.9766483306884766, - "step": 15290 - }, - { - "epoch": 17.406490179333904, - "grad_norm": 7.820587635040283, - "learning_rate": 0.00011715694241291511, - "loss": 4.117971801757813, - "step": 15300 - }, - { - "epoch": 17.417876458867067, - "grad_norm": 9.149397850036621, - "learning_rate": 0.00011710276829730755, - "loss": 4.0301368713378904, - "step": 15310 - }, - { - "epoch": 17.42926273840023, - "grad_norm": 7.755941390991211, - "learning_rate": 0.0001170485941817, - "loss": 4.266557312011718, - "step": 15320 - }, - { - "epoch": 17.44064901793339, - "grad_norm": 7.495479106903076, - "learning_rate": 0.00011699442006609242, - "loss": 4.452923202514649, - "step": 15330 - }, - { - "epoch": 17.45203529746655, - "grad_norm": 8.05996322631836, - "learning_rate": 0.00011694024595048487, - "loss": 4.292583847045899, - "step": 15340 - }, - { - "epoch": 17.463421576999714, - "grad_norm": 9.973164558410645, - "learning_rate": 0.0001168860718348773, - "loss": 4.290217590332031, - "step": 15350 - }, - { - "epoch": 17.474807856532877, - "grad_norm": 8.033830642700195, - "learning_rate": 0.00011683189771926975, - "loss": 4.155846405029297, - "step": 15360 - }, - { - "epoch": 17.48619413606604, - "grad_norm": 7.986049652099609, - "learning_rate": 0.00011677772360366217, - "loss": 4.047249603271484, - "step": 15370 - }, - { - "epoch": 17.497580415599202, - "grad_norm": 7.257813930511475, - "learning_rate": 0.00011672354948805461, - "loss": 4.1319633483886715, - "step": 15380 - }, - { - "epoch": 17.508966695132365, - "grad_norm": 8.351729393005371, - "learning_rate": 0.00011666937537244704, - "loss": 4.184175872802735, - "step": 15390 - }, - { - "epoch": 17.520352974665528, - "grad_norm": 7.465641021728516, - "learning_rate": 0.00011661520125683949, - "loss": 4.227563858032227, - "step": 15400 - }, - { - "epoch": 17.53173925419869, - "grad_norm": 7.468196392059326, - "learning_rate": 0.00011656102714123191, - "loss": 4.250960922241211, - "step": 15410 - }, - { - "epoch": 17.543125533731853, - "grad_norm": 7.476719856262207, - "learning_rate": 0.00011650685302562437, - "loss": 4.10565414428711, - "step": 15420 - }, - { - "epoch": 17.554511813265016, - "grad_norm": 7.9970269203186035, - "learning_rate": 0.00011645267891001682, - "loss": 4.217912292480468, - "step": 15430 - }, - { - "epoch": 17.56589809279818, - "grad_norm": 7.878209114074707, - "learning_rate": 0.00011639850479440924, - "loss": 4.038910293579102, - "step": 15440 - }, - { - "epoch": 17.57728437233134, - "grad_norm": 7.499887466430664, - "learning_rate": 0.00011634433067880168, - "loss": 4.3691764831542965, - "step": 15450 - }, - { - "epoch": 17.588670651864504, - "grad_norm": 7.418756484985352, - "learning_rate": 0.0001162901565631941, - "loss": 4.344051361083984, - "step": 15460 - }, - { - "epoch": 17.600056931397667, - "grad_norm": 8.542085647583008, - "learning_rate": 0.00011623598244758656, - "loss": 4.26641731262207, - "step": 15470 - }, - { - "epoch": 17.61144321093083, - "grad_norm": 8.016523361206055, - "learning_rate": 0.00011618180833197898, - "loss": 4.250433349609375, - "step": 15480 - }, - { - "epoch": 17.622829490463992, - "grad_norm": 7.668478488922119, - "learning_rate": 0.00011612763421637143, - "loss": 4.095648956298828, - "step": 15490 - }, - { - "epoch": 17.634215769997155, - "grad_norm": 7.5998101234436035, - "learning_rate": 0.00011607346010076386, - "loss": 4.096474075317383, - "step": 15500 - }, - { - "epoch": 17.645602049530314, - "grad_norm": 8.259299278259277, - "learning_rate": 0.00011601928598515631, - "loss": 4.329125213623047, - "step": 15510 - }, - { - "epoch": 17.656988329063477, - "grad_norm": 7.811594009399414, - "learning_rate": 0.00011596511186954873, - "loss": 4.401547622680664, - "step": 15520 - }, - { - "epoch": 17.66837460859664, - "grad_norm": 7.514590740203857, - "learning_rate": 0.00011591093775394117, - "loss": 4.361583709716797, - "step": 15530 - }, - { - "epoch": 17.679760888129803, - "grad_norm": 7.787680625915527, - "learning_rate": 0.00011585676363833362, - "loss": 4.375646591186523, - "step": 15540 - }, - { - "epoch": 17.691147167662965, - "grad_norm": 6.984362602233887, - "learning_rate": 0.00011580258952272605, - "loss": 4.231984329223633, - "step": 15550 - }, - { - "epoch": 17.702533447196128, - "grad_norm": 8.019062042236328, - "learning_rate": 0.0001157484154071185, - "loss": 4.141991806030274, - "step": 15560 - }, - { - "epoch": 17.71391972672929, - "grad_norm": 8.658064842224121, - "learning_rate": 0.00011569424129151092, - "loss": 4.37116813659668, - "step": 15570 - }, - { - "epoch": 17.725306006262453, - "grad_norm": 8.997333526611328, - "learning_rate": 0.00011564006717590336, - "loss": 4.313310241699218, - "step": 15580 - }, - { - "epoch": 17.736692285795616, - "grad_norm": 8.168388366699219, - "learning_rate": 0.0001155858930602958, - "loss": 4.10748291015625, - "step": 15590 - }, - { - "epoch": 17.74807856532878, - "grad_norm": 8.176225662231445, - "learning_rate": 0.00011553171894468824, - "loss": 4.249481964111328, - "step": 15600 - }, - { - "epoch": 17.75946484486194, - "grad_norm": 8.127287864685059, - "learning_rate": 0.00011547754482908066, - "loss": 4.257102203369141, - "step": 15610 - }, - { - "epoch": 17.770851124395104, - "grad_norm": 7.9209794998168945, - "learning_rate": 0.00011542337071347311, - "loss": 4.099342727661133, - "step": 15620 - }, - { - "epoch": 17.782237403928267, - "grad_norm": 7.507110118865967, - "learning_rate": 0.00011536919659786554, - "loss": 4.057368469238281, - "step": 15630 - }, - { - "epoch": 17.79362368346143, - "grad_norm": 7.258909702301025, - "learning_rate": 0.00011531502248225799, - "loss": 4.278286743164062, - "step": 15640 - }, - { - "epoch": 17.805009962994593, - "grad_norm": 8.5214204788208, - "learning_rate": 0.00011526084836665041, - "loss": 4.387587356567383, - "step": 15650 - }, - { - "epoch": 17.816396242527755, - "grad_norm": 6.49737548828125, - "learning_rate": 0.00011520667425104285, - "loss": 3.87442626953125, - "step": 15660 - }, - { - "epoch": 17.827782522060918, - "grad_norm": 8.327667236328125, - "learning_rate": 0.0001151525001354353, - "loss": 4.007180404663086, - "step": 15670 - }, - { - "epoch": 17.83916880159408, - "grad_norm": 8.513700485229492, - "learning_rate": 0.00011509832601982773, - "loss": 4.101554870605469, - "step": 15680 - }, - { - "epoch": 17.85055508112724, - "grad_norm": 8.8079833984375, - "learning_rate": 0.00011504415190422018, - "loss": 4.265046310424805, - "step": 15690 - }, - { - "epoch": 17.861941360660403, - "grad_norm": 7.73423957824707, - "learning_rate": 0.0001149899777886126, - "loss": 4.407987976074219, - "step": 15700 - }, - { - "epoch": 17.873327640193565, - "grad_norm": 7.101474285125732, - "learning_rate": 0.00011493580367300505, - "loss": 4.4353492736816404, - "step": 15710 - }, - { - "epoch": 17.884713919726728, - "grad_norm": 7.298971652984619, - "learning_rate": 0.00011488162955739748, - "loss": 4.033404159545898, - "step": 15720 - }, - { - "epoch": 17.89610019925989, - "grad_norm": 7.350695610046387, - "learning_rate": 0.00011482745544178992, - "loss": 4.468685150146484, - "step": 15730 - }, - { - "epoch": 17.907486478793054, - "grad_norm": 7.839898109436035, - "learning_rate": 0.00011477328132618234, - "loss": 4.322669601440429, - "step": 15740 - }, - { - "epoch": 17.918872758326216, - "grad_norm": 7.159393787384033, - "learning_rate": 0.00011471910721057479, - "loss": 4.0323749542236325, - "step": 15750 - }, - { - "epoch": 17.93025903785938, - "grad_norm": 7.601138591766357, - "learning_rate": 0.00011466493309496722, - "loss": 4.170376586914062, - "step": 15760 - }, - { - "epoch": 17.941645317392542, - "grad_norm": 7.914937496185303, - "learning_rate": 0.00011461075897935967, - "loss": 4.421757507324219, - "step": 15770 - }, - { - "epoch": 17.953031596925705, - "grad_norm": 7.36163854598999, - "learning_rate": 0.00011455658486375212, - "loss": 4.248828506469726, - "step": 15780 - }, - { - "epoch": 17.964417876458867, - "grad_norm": 7.326484680175781, - "learning_rate": 0.00011450241074814454, - "loss": 4.062700271606445, - "step": 15790 - }, - { - "epoch": 17.97580415599203, - "grad_norm": 7.293338775634766, - "learning_rate": 0.00011444823663253698, - "loss": 4.451213455200195, - "step": 15800 - }, - { - "epoch": 17.987190435525193, - "grad_norm": 7.929836273193359, - "learning_rate": 0.00011439406251692941, - "loss": 4.008718109130859, - "step": 15810 - }, - { - "epoch": 17.998576715058356, - "grad_norm": 7.278785705566406, - "learning_rate": 0.00011433988840132186, - "loss": 4.249869537353516, - "step": 15820 - }, - { - "epoch": 18.00910902362653, - "grad_norm": 7.38397216796875, - "learning_rate": 0.00011428571428571428, - "loss": 3.487931823730469, - "step": 15830 - }, - { - "epoch": 18.02049530315969, - "grad_norm": 7.779951572418213, - "learning_rate": 0.00011423154017010674, - "loss": 3.6420997619628905, - "step": 15840 - }, - { - "epoch": 18.031881582692854, - "grad_norm": 7.421573162078857, - "learning_rate": 0.00011417736605449916, - "loss": 3.697202682495117, - "step": 15850 - }, - { - "epoch": 18.043267862226017, - "grad_norm": 7.718578815460205, - "learning_rate": 0.00011412319193889161, - "loss": 3.6515987396240233, - "step": 15860 - }, - { - "epoch": 18.05465414175918, - "grad_norm": 8.21097183227539, - "learning_rate": 0.0001140744352348448, - "loss": 4.043758392333984, - "step": 15870 - }, - { - "epoch": 18.066040421292342, - "grad_norm": 8.3043794631958, - "learning_rate": 0.00011402026111923723, - "loss": 3.8113327026367188, - "step": 15880 - }, - { - "epoch": 18.077426700825505, - "grad_norm": 7.186902046203613, - "learning_rate": 0.00011396608700362967, - "loss": 3.746054458618164, - "step": 15890 - }, - { - "epoch": 18.088812980358668, - "grad_norm": 7.123048305511475, - "learning_rate": 0.0001139119128880221, - "loss": 3.691455078125, - "step": 15900 - }, - { - "epoch": 18.10019925989183, - "grad_norm": 7.646660804748535, - "learning_rate": 0.00011385773877241455, - "loss": 3.715811538696289, - "step": 15910 - }, - { - "epoch": 18.111585539424993, - "grad_norm": 7.243655204772949, - "learning_rate": 0.000113803564656807, - "loss": 3.995361328125, - "step": 15920 - }, - { - "epoch": 18.122971818958156, - "grad_norm": 7.36957311630249, - "learning_rate": 0.00011374939054119942, - "loss": 3.660219192504883, - "step": 15930 - }, - { - "epoch": 18.13435809849132, - "grad_norm": 8.030665397644043, - "learning_rate": 0.00011369521642559187, - "loss": 3.9243896484375, - "step": 15940 - }, - { - "epoch": 18.14574437802448, - "grad_norm": 7.206502914428711, - "learning_rate": 0.0001136410423099843, - "loss": 3.791769027709961, - "step": 15950 - }, - { - "epoch": 18.157130657557644, - "grad_norm": 8.390379905700684, - "learning_rate": 0.00011358686819437674, - "loss": 3.6325794219970704, - "step": 15960 - }, - { - "epoch": 18.168516937090807, - "grad_norm": 7.630048751831055, - "learning_rate": 0.00011353269407876916, - "loss": 3.683578872680664, - "step": 15970 - }, - { - "epoch": 18.17990321662397, - "grad_norm": 7.336859703063965, - "learning_rate": 0.00011347851996316161, - "loss": 3.736345672607422, - "step": 15980 - }, - { - "epoch": 18.19128949615713, - "grad_norm": 6.535525798797607, - "learning_rate": 0.00011342434584755404, - "loss": 3.869864654541016, - "step": 15990 - }, - { - "epoch": 18.20267577569029, - "grad_norm": 7.941544532775879, - "learning_rate": 0.00011337017173194649, - "loss": 3.8768684387207033, - "step": 16000 - }, - { - "epoch": 18.214062055223454, - "grad_norm": 8.93309497833252, - "learning_rate": 0.00011331599761633891, - "loss": 4.1240489959716795, - "step": 16010 - }, - { - "epoch": 18.225448334756617, - "grad_norm": 8.11670970916748, - "learning_rate": 0.00011326182350073135, - "loss": 3.7689064025878904, - "step": 16020 - }, - { - "epoch": 18.23683461428978, - "grad_norm": 8.06661319732666, - "learning_rate": 0.0001132076493851238, - "loss": 3.8555408477783204, - "step": 16030 - }, - { - "epoch": 18.248220893822943, - "grad_norm": 7.596058368682861, - "learning_rate": 0.00011315347526951623, - "loss": 3.899448013305664, - "step": 16040 - }, - { - "epoch": 18.259607173356105, - "grad_norm": 7.202267646789551, - "learning_rate": 0.00011309930115390868, - "loss": 3.954229736328125, - "step": 16050 - }, - { - "epoch": 18.270993452889268, - "grad_norm": 6.913619518280029, - "learning_rate": 0.0001130451270383011, - "loss": 4.055387496948242, - "step": 16060 - }, - { - "epoch": 18.28237973242243, - "grad_norm": 7.013467311859131, - "learning_rate": 0.00011299095292269355, - "loss": 3.91961784362793, - "step": 16070 - }, - { - "epoch": 18.293766011955594, - "grad_norm": 8.460749626159668, - "learning_rate": 0.00011293677880708598, - "loss": 3.9316688537597657, - "step": 16080 - }, - { - "epoch": 18.305152291488756, - "grad_norm": 6.905110836029053, - "learning_rate": 0.00011288260469147842, - "loss": 4.052811813354492, - "step": 16090 - }, - { - "epoch": 18.31653857102192, - "grad_norm": 6.9299116134643555, - "learning_rate": 0.00011282843057587084, - "loss": 3.9361194610595702, - "step": 16100 - }, - { - "epoch": 18.327924850555082, - "grad_norm": 6.814231872558594, - "learning_rate": 0.00011277425646026329, - "loss": 3.73162841796875, - "step": 16110 - }, - { - "epoch": 18.339311130088245, - "grad_norm": 7.320927619934082, - "learning_rate": 0.00011272008234465572, - "loss": 3.8358139038085937, - "step": 16120 - }, - { - "epoch": 18.350697409621407, - "grad_norm": 7.525396823883057, - "learning_rate": 0.00011266590822904817, - "loss": 3.948038864135742, - "step": 16130 - }, - { - "epoch": 18.36208368915457, - "grad_norm": 8.188949584960938, - "learning_rate": 0.00011261173411344062, - "loss": 4.153988647460937, - "step": 16140 - }, - { - "epoch": 18.373469968687733, - "grad_norm": 7.659078121185303, - "learning_rate": 0.00011255755999783304, - "loss": 3.8572532653808596, - "step": 16150 - }, - { - "epoch": 18.384856248220895, - "grad_norm": 7.888333320617676, - "learning_rate": 0.00011250338588222548, - "loss": 4.0505119323730465, - "step": 16160 - }, - { - "epoch": 18.396242527754055, - "grad_norm": 9.681838989257812, - "learning_rate": 0.0001124492117666179, - "loss": 3.6657783508300783, - "step": 16170 - }, - { - "epoch": 18.407628807287217, - "grad_norm": 7.816950798034668, - "learning_rate": 0.00011239503765101036, - "loss": 4.182400894165039, - "step": 16180 - }, - { - "epoch": 18.41901508682038, - "grad_norm": 8.955124855041504, - "learning_rate": 0.00011234086353540278, - "loss": 4.1284526824951175, - "step": 16190 - }, - { - "epoch": 18.430401366353543, - "grad_norm": 7.874047756195068, - "learning_rate": 0.00011228668941979523, - "loss": 4.089582824707032, - "step": 16200 - }, - { - "epoch": 18.441787645886706, - "grad_norm": 7.964369297027588, - "learning_rate": 0.00011223251530418766, - "loss": 4.226484298706055, - "step": 16210 - }, - { - "epoch": 18.45317392541987, - "grad_norm": 7.5684614181518555, - "learning_rate": 0.00011217834118858011, - "loss": 3.7357833862304686, - "step": 16220 - }, - { - "epoch": 18.46456020495303, - "grad_norm": 8.19275188446045, - "learning_rate": 0.00011212416707297253, - "loss": 3.658647918701172, - "step": 16230 - }, - { - "epoch": 18.475946484486194, - "grad_norm": 7.265321731567383, - "learning_rate": 0.00011206999295736497, - "loss": 4.110098266601563, - "step": 16240 - }, - { - "epoch": 18.487332764019357, - "grad_norm": 8.811004638671875, - "learning_rate": 0.0001120158188417574, - "loss": 4.014685821533203, - "step": 16250 - }, - { - "epoch": 18.49871904355252, - "grad_norm": 8.132495880126953, - "learning_rate": 0.00011196164472614985, - "loss": 3.972939682006836, - "step": 16260 - }, - { - "epoch": 18.510105323085682, - "grad_norm": 7.8982672691345215, - "learning_rate": 0.0001119074706105423, - "loss": 3.900859069824219, - "step": 16270 - }, - { - "epoch": 18.521491602618845, - "grad_norm": 8.57484245300293, - "learning_rate": 0.00011185329649493472, - "loss": 3.8809505462646485, - "step": 16280 - }, - { - "epoch": 18.532877882152007, - "grad_norm": 8.151063919067383, - "learning_rate": 0.00011179912237932718, - "loss": 3.9721370697021485, - "step": 16290 - }, - { - "epoch": 18.54426416168517, - "grad_norm": 7.32830810546875, - "learning_rate": 0.0001117449482637196, - "loss": 4.091455078125, - "step": 16300 - }, - { - "epoch": 18.555650441218333, - "grad_norm": 7.877411842346191, - "learning_rate": 0.00011169077414811204, - "loss": 3.9833248138427733, - "step": 16310 - }, - { - "epoch": 18.567036720751496, - "grad_norm": 7.7397379875183105, - "learning_rate": 0.00011163660003250446, - "loss": 3.792256164550781, - "step": 16320 - }, - { - "epoch": 18.57842300028466, - "grad_norm": 7.100308418273926, - "learning_rate": 0.00011158242591689692, - "loss": 3.683748245239258, - "step": 16330 - }, - { - "epoch": 18.58980927981782, - "grad_norm": 8.094539642333984, - "learning_rate": 0.00011152825180128934, - "loss": 4.14416732788086, - "step": 16340 - }, - { - "epoch": 18.60119555935098, - "grad_norm": 7.870161056518555, - "learning_rate": 0.00011147407768568179, - "loss": 3.89129753112793, - "step": 16350 - }, - { - "epoch": 18.612581838884143, - "grad_norm": 7.73281192779541, - "learning_rate": 0.00011141990357007422, - "loss": 4.153730773925782, - "step": 16360 - }, - { - "epoch": 18.623968118417306, - "grad_norm": 7.23974084854126, - "learning_rate": 0.00011136572945446667, - "loss": 3.8958515167236327, - "step": 16370 - }, - { - "epoch": 18.63535439795047, - "grad_norm": 11.619794845581055, - "learning_rate": 0.0001113115553388591, - "loss": 4.148723220825195, - "step": 16380 - }, - { - "epoch": 18.64674067748363, - "grad_norm": 7.968571186065674, - "learning_rate": 0.00011125738122325153, - "loss": 4.051975250244141, - "step": 16390 - }, - { - "epoch": 18.658126957016794, - "grad_norm": 8.169166564941406, - "learning_rate": 0.00011120320710764398, - "loss": 3.804905319213867, - "step": 16400 - }, - { - "epoch": 18.669513236549957, - "grad_norm": 7.773144721984863, - "learning_rate": 0.0001111490329920364, - "loss": 3.9082794189453125, - "step": 16410 - }, - { - "epoch": 18.68089951608312, - "grad_norm": 8.190699577331543, - "learning_rate": 0.00011109485887642886, - "loss": 4.108190536499023, - "step": 16420 - }, - { - "epoch": 18.692285795616282, - "grad_norm": 7.462385177612305, - "learning_rate": 0.00011104068476082128, - "loss": 4.018275451660156, - "step": 16430 - }, - { - "epoch": 18.703672075149445, - "grad_norm": 8.5626859664917, - "learning_rate": 0.00011098651064521372, - "loss": 3.7356529235839844, - "step": 16440 - }, - { - "epoch": 18.715058354682608, - "grad_norm": 8.161300659179688, - "learning_rate": 0.00011093233652960616, - "loss": 4.018674850463867, - "step": 16450 - }, - { - "epoch": 18.72644463421577, - "grad_norm": 7.462896347045898, - "learning_rate": 0.0001108781624139986, - "loss": 3.7080970764160157, - "step": 16460 - }, - { - "epoch": 18.737830913748933, - "grad_norm": 7.781260967254639, - "learning_rate": 0.00011082398829839102, - "loss": 4.161582183837891, - "step": 16470 - }, - { - "epoch": 18.749217193282096, - "grad_norm": 7.212603569030762, - "learning_rate": 0.00011076981418278347, - "loss": 3.770726776123047, - "step": 16480 - }, - { - "epoch": 18.76060347281526, - "grad_norm": 8.329229354858398, - "learning_rate": 0.00011071564006717592, - "loss": 4.128889083862305, - "step": 16490 - }, - { - "epoch": 18.77198975234842, - "grad_norm": 7.012000560760498, - "learning_rate": 0.00011066146595156835, - "loss": 4.022500991821289, - "step": 16500 - }, - { - "epoch": 18.783376031881584, - "grad_norm": 7.675992965698242, - "learning_rate": 0.00011060729183596079, - "loss": 3.725032424926758, - "step": 16510 - }, - { - "epoch": 18.794762311414747, - "grad_norm": 7.538510322570801, - "learning_rate": 0.00011055311772035321, - "loss": 3.657960891723633, - "step": 16520 - }, - { - "epoch": 18.806148590947906, - "grad_norm": 6.402365207672119, - "learning_rate": 0.00011049894360474566, - "loss": 3.9226734161376955, - "step": 16530 - }, - { - "epoch": 18.81753487048107, - "grad_norm": 6.849089622497559, - "learning_rate": 0.00011044476948913809, - "loss": 3.683644104003906, - "step": 16540 - }, - { - "epoch": 18.82892115001423, - "grad_norm": 7.539620399475098, - "learning_rate": 0.00011039059537353054, - "loss": 3.788827896118164, - "step": 16550 - }, - { - "epoch": 18.840307429547394, - "grad_norm": 8.003284454345703, - "learning_rate": 0.00011033642125792296, - "loss": 4.023321533203125, - "step": 16560 - }, - { - "epoch": 18.851693709080557, - "grad_norm": 8.320795059204102, - "learning_rate": 0.00011028224714231541, - "loss": 3.955525588989258, - "step": 16570 - }, - { - "epoch": 18.86307998861372, - "grad_norm": 8.324459075927734, - "learning_rate": 0.00011022807302670784, - "loss": 3.9589923858642577, - "step": 16580 - }, - { - "epoch": 18.874466268146882, - "grad_norm": 8.919875144958496, - "learning_rate": 0.00011017389891110028, - "loss": 4.231829833984375, - "step": 16590 - }, - { - "epoch": 18.885852547680045, - "grad_norm": 7.729628562927246, - "learning_rate": 0.00011011972479549273, - "loss": 3.9778141021728515, - "step": 16600 - }, - { - "epoch": 18.897238827213208, - "grad_norm": 7.793543815612793, - "learning_rate": 0.00011006555067988515, - "loss": 4.103470230102539, - "step": 16610 - }, - { - "epoch": 18.90862510674637, - "grad_norm": 7.603933334350586, - "learning_rate": 0.0001100113765642776, - "loss": 3.918026351928711, - "step": 16620 - }, - { - "epoch": 18.920011386279533, - "grad_norm": 9.164023399353027, - "learning_rate": 0.00010995720244867003, - "loss": 4.239971542358399, - "step": 16630 - }, - { - "epoch": 18.931397665812696, - "grad_norm": 8.179018020629883, - "learning_rate": 0.00010990302833306248, - "loss": 4.0833740234375, - "step": 16640 - }, - { - "epoch": 18.94278394534586, - "grad_norm": 8.107283592224121, - "learning_rate": 0.0001098488542174549, - "loss": 3.886615753173828, - "step": 16650 - }, - { - "epoch": 18.95417022487902, - "grad_norm": 8.483085632324219, - "learning_rate": 0.00010979468010184734, - "loss": 4.466776275634766, - "step": 16660 - }, - { - "epoch": 18.965556504412184, - "grad_norm": 7.803193092346191, - "learning_rate": 0.00010974050598623977, - "loss": 3.9009479522705077, - "step": 16670 - }, - { - "epoch": 18.976942783945347, - "grad_norm": 8.053387641906738, - "learning_rate": 0.00010968633187063222, - "loss": 4.092340469360352, - "step": 16680 - }, - { - "epoch": 18.98832906347851, - "grad_norm": 7.657060623168945, - "learning_rate": 0.00010963215775502464, - "loss": 4.109097290039062, - "step": 16690 - }, - { - "epoch": 18.999715343011673, - "grad_norm": 7.931739807128906, - "learning_rate": 0.0001095779836394171, - "loss": 3.970943069458008, - "step": 16700 - }, - { - "epoch": 19.010247651579846, - "grad_norm": 8.304778099060059, - "learning_rate": 0.00010952380952380953, - "loss": 3.4373973846435546, - "step": 16710 - }, - { - "epoch": 19.02163393111301, - "grad_norm": 8.502395629882812, - "learning_rate": 0.00010946963540820197, - "loss": 3.6647174835205076, - "step": 16720 - }, - { - "epoch": 19.03302021064617, - "grad_norm": 8.930685043334961, - "learning_rate": 0.00010941546129259441, - "loss": 3.4595081329345705, - "step": 16730 - }, - { - "epoch": 19.044406490179334, - "grad_norm": 7.8097920417785645, - "learning_rate": 0.00010936128717698683, - "loss": 3.548003005981445, - "step": 16740 - }, - { - "epoch": 19.055792769712497, - "grad_norm": 6.608278751373291, - "learning_rate": 0.00010930711306137929, - "loss": 3.3133895874023436, - "step": 16750 - }, - { - "epoch": 19.06717904924566, - "grad_norm": 7.428323745727539, - "learning_rate": 0.00010925293894577171, - "loss": 3.576006317138672, - "step": 16760 - }, - { - "epoch": 19.078565328778822, - "grad_norm": 7.900843620300293, - "learning_rate": 0.00010919876483016416, - "loss": 3.676096725463867, - "step": 16770 - }, - { - "epoch": 19.089951608311985, - "grad_norm": 7.658603191375732, - "learning_rate": 0.00010914459071455659, - "loss": 3.7328716278076173, - "step": 16780 - }, - { - "epoch": 19.101337887845148, - "grad_norm": 7.360223293304443, - "learning_rate": 0.00010909041659894902, - "loss": 3.652063751220703, - "step": 16790 - }, - { - "epoch": 19.11272416737831, - "grad_norm": 7.122159957885742, - "learning_rate": 0.00010903624248334146, - "loss": 3.690329360961914, - "step": 16800 - }, - { - "epoch": 19.124110446911473, - "grad_norm": 6.8670501708984375, - "learning_rate": 0.0001089820683677339, - "loss": 3.667049026489258, - "step": 16810 - }, - { - "epoch": 19.135496726444636, - "grad_norm": 7.987428665161133, - "learning_rate": 0.00010892789425212635, - "loss": 3.607272720336914, - "step": 16820 - }, - { - "epoch": 19.146883005977795, - "grad_norm": 7.282992839813232, - "learning_rate": 0.00010887372013651878, - "loss": 3.4856422424316404, - "step": 16830 - }, - { - "epoch": 19.158269285510958, - "grad_norm": 8.548340797424316, - "learning_rate": 0.00010881954602091123, - "loss": 3.8047679901123046, - "step": 16840 - }, - { - "epoch": 19.16965556504412, - "grad_norm": 7.989595413208008, - "learning_rate": 0.00010876537190530365, - "loss": 3.664881134033203, - "step": 16850 - }, - { - "epoch": 19.181041844577283, - "grad_norm": 7.799252033233643, - "learning_rate": 0.00010871119778969609, - "loss": 4.083931350708008, - "step": 16860 - }, - { - "epoch": 19.192428124110446, - "grad_norm": 7.790616512298584, - "learning_rate": 0.00010865702367408851, - "loss": 3.279020309448242, - "step": 16870 - }, - { - "epoch": 19.20381440364361, - "grad_norm": 7.22285795211792, - "learning_rate": 0.00010860284955848097, - "loss": 3.5854366302490233, - "step": 16880 - }, - { - "epoch": 19.21520068317677, - "grad_norm": 8.332810401916504, - "learning_rate": 0.00010854867544287339, - "loss": 3.9076087951660154, - "step": 16890 - }, - { - "epoch": 19.226586962709934, - "grad_norm": 6.827261924743652, - "learning_rate": 0.00010849450132726584, - "loss": 3.7439952850341798, - "step": 16900 - }, - { - "epoch": 19.237973242243097, - "grad_norm": 7.577035427093506, - "learning_rate": 0.00010844032721165827, - "loss": 3.5914188385009767, - "step": 16910 - }, - { - "epoch": 19.24935952177626, - "grad_norm": 8.372159957885742, - "learning_rate": 0.00010838615309605072, - "loss": 3.650664520263672, - "step": 16920 - }, - { - "epoch": 19.260745801309422, - "grad_norm": 7.719248294830322, - "learning_rate": 0.00010833197898044314, - "loss": 3.8901805877685547, - "step": 16930 - }, - { - "epoch": 19.272132080842585, - "grad_norm": 6.905912399291992, - "learning_rate": 0.00010827780486483558, - "loss": 3.5376930236816406, - "step": 16940 - }, - { - "epoch": 19.283518360375748, - "grad_norm": 6.742889404296875, - "learning_rate": 0.00010822363074922803, - "loss": 3.475457763671875, - "step": 16950 - }, - { - "epoch": 19.29490463990891, - "grad_norm": 7.5838446617126465, - "learning_rate": 0.00010816945663362046, - "loss": 3.4266204833984375, - "step": 16960 - }, - { - "epoch": 19.306290919442073, - "grad_norm": 7.7189836502075195, - "learning_rate": 0.00010811528251801291, - "loss": 3.5107288360595703, - "step": 16970 - }, - { - "epoch": 19.317677198975236, - "grad_norm": 7.462057590484619, - "learning_rate": 0.00010806110840240533, - "loss": 3.7074642181396484, - "step": 16980 - }, - { - "epoch": 19.3290634785084, - "grad_norm": 8.049522399902344, - "learning_rate": 0.00010800693428679778, - "loss": 3.521027374267578, - "step": 16990 - }, - { - "epoch": 19.34044975804156, - "grad_norm": 7.91304349899292, - "learning_rate": 0.00010795276017119021, - "loss": 3.9731681823730467, - "step": 17000 - }, - { - "epoch": 19.35183603757472, - "grad_norm": 8.217313766479492, - "learning_rate": 0.00010789858605558265, - "loss": 3.8252967834472655, - "step": 17010 - }, - { - "epoch": 19.363222317107883, - "grad_norm": 8.448884963989258, - "learning_rate": 0.00010784441193997507, - "loss": 3.677239990234375, - "step": 17020 - }, - { - "epoch": 19.374608596641046, - "grad_norm": 7.586091041564941, - "learning_rate": 0.00010779023782436752, - "loss": 3.6121559143066406, - "step": 17030 - }, - { - "epoch": 19.38599487617421, - "grad_norm": 7.399051189422607, - "learning_rate": 0.00010773606370875995, - "loss": 3.6527149200439455, - "step": 17040 - }, - { - "epoch": 19.39738115570737, - "grad_norm": 8.452932357788086, - "learning_rate": 0.0001076818895931524, - "loss": 3.5067722320556642, - "step": 17050 - }, - { - "epoch": 19.408767435240534, - "grad_norm": 7.3610382080078125, - "learning_rate": 0.00010762771547754485, - "loss": 3.557135009765625, - "step": 17060 - }, - { - "epoch": 19.420153714773697, - "grad_norm": 7.684902191162109, - "learning_rate": 0.00010757354136193727, - "loss": 3.7063770294189453, - "step": 17070 - }, - { - "epoch": 19.43153999430686, - "grad_norm": 7.2472825050354, - "learning_rate": 0.00010751936724632971, - "loss": 3.5020095825195314, - "step": 17080 - }, - { - "epoch": 19.442926273840023, - "grad_norm": 8.710956573486328, - "learning_rate": 0.00010746519313072214, - "loss": 3.7144115447998045, - "step": 17090 - }, - { - "epoch": 19.454312553373185, - "grad_norm": 7.865204811096191, - "learning_rate": 0.00010741101901511459, - "loss": 3.860599136352539, - "step": 17100 - }, - { - "epoch": 19.465698832906348, - "grad_norm": 7.315329551696777, - "learning_rate": 0.00010735684489950701, - "loss": 3.8806697845458986, - "step": 17110 - }, - { - "epoch": 19.47708511243951, - "grad_norm": 8.162956237792969, - "learning_rate": 0.00010730267078389946, - "loss": 3.6206798553466797, - "step": 17120 - }, - { - "epoch": 19.488471391972674, - "grad_norm": 7.473461627960205, - "learning_rate": 0.00010724849666829189, - "loss": 3.7630165100097654, - "step": 17130 - }, - { - "epoch": 19.499857671505836, - "grad_norm": 7.086675643920898, - "learning_rate": 0.00010719432255268434, - "loss": 3.6255001068115233, - "step": 17140 - }, - { - "epoch": 19.511243951039, - "grad_norm": 7.986152172088623, - "learning_rate": 0.00010714014843707677, - "loss": 3.654983139038086, - "step": 17150 - }, - { - "epoch": 19.52263023057216, - "grad_norm": 6.950078010559082, - "learning_rate": 0.0001070859743214692, - "loss": 3.5265995025634767, - "step": 17160 - }, - { - "epoch": 19.534016510105324, - "grad_norm": 8.496960639953613, - "learning_rate": 0.00010703180020586165, - "loss": 3.8141246795654298, - "step": 17170 - }, - { - "epoch": 19.545402789638487, - "grad_norm": 7.757366180419922, - "learning_rate": 0.00010697762609025408, - "loss": 3.748949432373047, - "step": 17180 - }, - { - "epoch": 19.55678906917165, - "grad_norm": 8.865738868713379, - "learning_rate": 0.00010692345197464653, - "loss": 3.841624069213867, - "step": 17190 - }, - { - "epoch": 19.56817534870481, - "grad_norm": 8.705485343933105, - "learning_rate": 0.00010686927785903896, - "loss": 3.5993213653564453, - "step": 17200 - }, - { - "epoch": 19.579561628237972, - "grad_norm": 8.318339347839355, - "learning_rate": 0.0001068151037434314, - "loss": 3.897611618041992, - "step": 17210 - }, - { - "epoch": 19.590947907771135, - "grad_norm": 8.347416877746582, - "learning_rate": 0.00010676092962782382, - "loss": 3.714302825927734, - "step": 17220 - }, - { - "epoch": 19.602334187304297, - "grad_norm": 6.775099754333496, - "learning_rate": 0.00010670675551221627, - "loss": 3.8760570526123046, - "step": 17230 - }, - { - "epoch": 19.61372046683746, - "grad_norm": 8.375359535217285, - "learning_rate": 0.0001066525813966087, - "loss": 3.7001274108886717, - "step": 17240 - }, - { - "epoch": 19.625106746370623, - "grad_norm": 8.114470481872559, - "learning_rate": 0.00010659840728100115, - "loss": 3.857496643066406, - "step": 17250 - }, - { - "epoch": 19.636493025903786, - "grad_norm": 8.257019996643066, - "learning_rate": 0.00010654423316539357, - "loss": 3.6363399505615233, - "step": 17260 - }, - { - "epoch": 19.64787930543695, - "grad_norm": 7.418999671936035, - "learning_rate": 0.00010649005904978602, - "loss": 3.6371334075927733, - "step": 17270 - }, - { - "epoch": 19.65926558497011, - "grad_norm": 8.225639343261719, - "learning_rate": 0.00010643588493417846, - "loss": 3.6147201538085936, - "step": 17280 - }, - { - "epoch": 19.670651864503274, - "grad_norm": 7.634995937347412, - "learning_rate": 0.00010638171081857088, - "loss": 3.783678436279297, - "step": 17290 - }, - { - "epoch": 19.682038144036436, - "grad_norm": 7.631542682647705, - "learning_rate": 0.00010632753670296334, - "loss": 3.7224945068359374, - "step": 17300 - }, - { - "epoch": 19.6934244235696, - "grad_norm": 8.868590354919434, - "learning_rate": 0.00010627336258735576, - "loss": 3.8564884185791017, - "step": 17310 - }, - { - "epoch": 19.704810703102762, - "grad_norm": 7.66297721862793, - "learning_rate": 0.00010621918847174821, - "loss": 3.8662670135498045, - "step": 17320 - }, - { - "epoch": 19.716196982635925, - "grad_norm": 7.4999589920043945, - "learning_rate": 0.00010616501435614064, - "loss": 3.648085021972656, - "step": 17330 - }, - { - "epoch": 19.727583262169087, - "grad_norm": 7.6754984855651855, - "learning_rate": 0.00010611084024053309, - "loss": 3.6652565002441406, - "step": 17340 - }, - { - "epoch": 19.73896954170225, - "grad_norm": 7.409824371337891, - "learning_rate": 0.00010605666612492551, - "loss": 3.8480754852294923, - "step": 17350 - }, - { - "epoch": 19.750355821235413, - "grad_norm": 7.804997444152832, - "learning_rate": 0.00010600249200931795, - "loss": 3.830272674560547, - "step": 17360 - }, - { - "epoch": 19.761742100768572, - "grad_norm": 9.533950805664062, - "learning_rate": 0.00010594831789371037, - "loss": 4.024043273925781, - "step": 17370 - }, - { - "epoch": 19.773128380301735, - "grad_norm": 7.675684452056885, - "learning_rate": 0.00010589414377810283, - "loss": 3.693775939941406, - "step": 17380 - }, - { - "epoch": 19.784514659834898, - "grad_norm": 8.96830940246582, - "learning_rate": 0.00010583996966249528, - "loss": 3.858016586303711, - "step": 17390 - }, - { - "epoch": 19.79590093936806, - "grad_norm": 6.618140697479248, - "learning_rate": 0.0001057857955468877, - "loss": 3.8667743682861326, - "step": 17400 - }, - { - "epoch": 19.807287218901223, - "grad_norm": 8.536417961120605, - "learning_rate": 0.00010573162143128015, - "loss": 3.633957290649414, - "step": 17410 - }, - { - "epoch": 19.818673498434386, - "grad_norm": 8.237014770507812, - "learning_rate": 0.00010567744731567258, - "loss": 3.944471740722656, - "step": 17420 - }, - { - "epoch": 19.83005977796755, - "grad_norm": 7.2452545166015625, - "learning_rate": 0.00010562327320006502, - "loss": 3.8378509521484374, - "step": 17430 - }, - { - "epoch": 19.84144605750071, - "grad_norm": 6.854305744171143, - "learning_rate": 0.00010556909908445744, - "loss": 3.6727760314941404, - "step": 17440 - }, - { - "epoch": 19.852832337033874, - "grad_norm": 7.254054546356201, - "learning_rate": 0.00010551492496884989, - "loss": 3.783167266845703, - "step": 17450 - }, - { - "epoch": 19.864218616567037, - "grad_norm": 7.683404445648193, - "learning_rate": 0.00010546075085324232, - "loss": 3.615367889404297, - "step": 17460 - }, - { - "epoch": 19.8756048961002, - "grad_norm": 7.9448065757751465, - "learning_rate": 0.00010540657673763477, - "loss": 3.5793502807617186, - "step": 17470 - }, - { - "epoch": 19.886991175633362, - "grad_norm": 7.222136974334717, - "learning_rate": 0.00010535240262202719, - "loss": 3.8391284942626953, - "step": 17480 - }, - { - "epoch": 19.898377455166525, - "grad_norm": 8.161956787109375, - "learning_rate": 0.00010529822850641964, - "loss": 3.8968780517578123, - "step": 17490 - }, - { - "epoch": 19.909763734699688, - "grad_norm": 7.755995750427246, - "learning_rate": 0.00010524405439081208, - "loss": 3.5014904022216795, - "step": 17500 - }, - { - "epoch": 19.92115001423285, - "grad_norm": 7.664623737335205, - "learning_rate": 0.00010518988027520451, - "loss": 3.861225128173828, - "step": 17510 - }, - { - "epoch": 19.932536293766013, - "grad_norm": 8.745182037353516, - "learning_rate": 0.00010513570615959696, - "loss": 3.8261661529541016, - "step": 17520 - }, - { - "epoch": 19.943922573299176, - "grad_norm": 7.4443206787109375, - "learning_rate": 0.00010508153204398938, - "loss": 3.686481475830078, - "step": 17530 - }, - { - "epoch": 19.95530885283234, - "grad_norm": 8.966758728027344, - "learning_rate": 0.00010502735792838183, - "loss": 3.7222663879394533, - "step": 17540 - }, - { - "epoch": 19.9666951323655, - "grad_norm": 9.107354164123535, - "learning_rate": 0.00010497318381277426, - "loss": 3.9835369110107424, - "step": 17550 - }, - { - "epoch": 19.97808141189866, - "grad_norm": 7.190637111663818, - "learning_rate": 0.0001049190096971667, - "loss": 3.5804458618164063, - "step": 17560 - }, - { - "epoch": 19.989467691431823, - "grad_norm": 8.058660507202148, - "learning_rate": 0.00010486483558155914, - "loss": 3.7318145751953127, - "step": 17570 - }, - { - "epoch": 20.0, - "grad_norm": 5.256486892700195, - "learning_rate": 0.00010481066146595157, - "loss": 3.4294113159179687, - "step": 17580 - }, - { - "epoch": 20.011386279533163, - "grad_norm": 6.450402736663818, - "learning_rate": 0.000104756487350344, - "loss": 3.1654720306396484, - "step": 17590 - }, - { - "epoch": 20.022772559066325, - "grad_norm": 8.31795597076416, - "learning_rate": 0.00010470231323473645, - "loss": 3.4220176696777345, - "step": 17600 - }, - { - "epoch": 20.034158838599488, - "grad_norm": 7.338985443115234, - "learning_rate": 0.0001046481391191289, - "loss": 3.2845233917236327, - "step": 17610 - }, - { - "epoch": 20.04554511813265, - "grad_norm": 7.033785820007324, - "learning_rate": 0.00010459396500352133, - "loss": 3.3925506591796877, - "step": 17620 - }, - { - "epoch": 20.056931397665814, - "grad_norm": 8.403705596923828, - "learning_rate": 0.00010453979088791376, - "loss": 3.3477813720703127, - "step": 17630 - }, - { - "epoch": 20.068317677198976, - "grad_norm": 8.051742553710938, - "learning_rate": 0.00010448561677230619, - "loss": 3.4361392974853517, - "step": 17640 - }, - { - "epoch": 20.07970395673214, - "grad_norm": 8.155322074890137, - "learning_rate": 0.00010443144265669864, - "loss": 3.4342941284179687, - "step": 17650 - }, - { - "epoch": 20.091090236265302, - "grad_norm": 7.207828998565674, - "learning_rate": 0.00010437726854109106, - "loss": 3.397259521484375, - "step": 17660 - }, - { - "epoch": 20.102476515798465, - "grad_norm": 8.539883613586426, - "learning_rate": 0.00010432309442548352, - "loss": 3.2583744049072267, - "step": 17670 - }, - { - "epoch": 20.113862795331624, - "grad_norm": 7.494978904724121, - "learning_rate": 0.00010426892030987594, - "loss": 3.2995159149169924, - "step": 17680 - }, - { - "epoch": 20.125249074864787, - "grad_norm": 8.173662185668945, - "learning_rate": 0.00010421474619426839, - "loss": 3.5137447357177733, - "step": 17690 - }, - { - "epoch": 20.13663535439795, - "grad_norm": 7.584948539733887, - "learning_rate": 0.00010416057207866082, - "loss": 3.537697601318359, - "step": 17700 - }, - { - "epoch": 20.148021633931112, - "grad_norm": 7.191871643066406, - "learning_rate": 0.00010410639796305325, - "loss": 3.5673038482666017, - "step": 17710 - }, - { - "epoch": 20.159407913464275, - "grad_norm": 7.3911356925964355, - "learning_rate": 0.00010405222384744568, - "loss": 3.557468032836914, - "step": 17720 - }, - { - "epoch": 20.170794192997437, - "grad_norm": 6.331779479980469, - "learning_rate": 0.00010399804973183813, - "loss": 3.5360912322998046, - "step": 17730 - }, - { - "epoch": 20.1821804725306, - "grad_norm": 7.614666938781738, - "learning_rate": 0.00010394387561623058, - "loss": 3.3655654907226564, - "step": 17740 - }, - { - "epoch": 20.193566752063763, - "grad_norm": 7.935511112213135, - "learning_rate": 0.000103889701500623, - "loss": 3.340676498413086, - "step": 17750 - }, - { - "epoch": 20.204953031596926, - "grad_norm": 8.085060119628906, - "learning_rate": 0.00010383552738501546, - "loss": 3.363076400756836, - "step": 17760 - }, - { - "epoch": 20.21633931113009, - "grad_norm": 7.689126491546631, - "learning_rate": 0.00010378135326940788, - "loss": 3.4896373748779297, - "step": 17770 - }, - { - "epoch": 20.22772559066325, - "grad_norm": 8.10032844543457, - "learning_rate": 0.00010372717915380032, - "loss": 3.325590133666992, - "step": 17780 - }, - { - "epoch": 20.239111870196414, - "grad_norm": 8.31187629699707, - "learning_rate": 0.00010367300503819274, - "loss": 3.2680404663085936, - "step": 17790 - }, - { - "epoch": 20.250498149729577, - "grad_norm": 7.616562366485596, - "learning_rate": 0.0001036188309225852, - "loss": 3.3835872650146483, - "step": 17800 - }, - { - "epoch": 20.26188442926274, - "grad_norm": 7.7585344314575195, - "learning_rate": 0.00010356465680697762, - "loss": 3.3168540954589845, - "step": 17810 - }, - { - "epoch": 20.273270708795902, - "grad_norm": 7.622613430023193, - "learning_rate": 0.00010351048269137007, - "loss": 3.4925868988037108, - "step": 17820 - }, - { - "epoch": 20.284656988329065, - "grad_norm": 7.538808345794678, - "learning_rate": 0.0001034563085757625, - "loss": 3.5138877868652343, - "step": 17830 - }, - { - "epoch": 20.296043267862228, - "grad_norm": 7.818309783935547, - "learning_rate": 0.00010340213446015495, - "loss": 3.594626617431641, - "step": 17840 - }, - { - "epoch": 20.30742954739539, - "grad_norm": 7.420721530914307, - "learning_rate": 0.00010334796034454739, - "loss": 3.3362926483154296, - "step": 17850 - }, - { - "epoch": 20.31881582692855, - "grad_norm": 8.078766822814941, - "learning_rate": 0.00010329378622893981, - "loss": 3.376148986816406, - "step": 17860 - }, - { - "epoch": 20.330202106461712, - "grad_norm": 8.338269233703613, - "learning_rate": 0.00010323961211333226, - "loss": 3.398274230957031, - "step": 17870 - }, - { - "epoch": 20.341588385994875, - "grad_norm": 8.21082592010498, - "learning_rate": 0.00010318543799772469, - "loss": 3.543021011352539, - "step": 17880 - }, - { - "epoch": 20.352974665528038, - "grad_norm": 7.475930213928223, - "learning_rate": 0.00010313126388211714, - "loss": 3.3988021850585937, - "step": 17890 - }, - { - "epoch": 20.3643609450612, - "grad_norm": 8.236494064331055, - "learning_rate": 0.00010307708976650956, - "loss": 3.582375717163086, - "step": 17900 - }, - { - "epoch": 20.375747224594363, - "grad_norm": 6.666841506958008, - "learning_rate": 0.000103022915650902, - "loss": 3.476043701171875, - "step": 17910 - }, - { - "epoch": 20.387133504127526, - "grad_norm": 8.686334609985352, - "learning_rate": 0.00010296874153529444, - "loss": 3.4657127380371096, - "step": 17920 - }, - { - "epoch": 20.39851978366069, - "grad_norm": 7.781078338623047, - "learning_rate": 0.00010291456741968688, - "loss": 3.4596488952636717, - "step": 17930 - }, - { - "epoch": 20.40990606319385, - "grad_norm": 6.893237590789795, - "learning_rate": 0.0001028603933040793, - "loss": 3.6022926330566407, - "step": 17940 - }, - { - "epoch": 20.421292342727014, - "grad_norm": 8.505749702453613, - "learning_rate": 0.00010280621918847175, - "loss": 3.511888122558594, - "step": 17950 - }, - { - "epoch": 20.432678622260177, - "grad_norm": 7.29093074798584, - "learning_rate": 0.0001027520450728642, - "loss": 3.803249740600586, - "step": 17960 - }, - { - "epoch": 20.44406490179334, - "grad_norm": 7.3153462409973145, - "learning_rate": 0.00010269787095725663, - "loss": 3.3840301513671873, - "step": 17970 - }, - { - "epoch": 20.455451181326502, - "grad_norm": 7.5833306312561035, - "learning_rate": 0.00010264369684164907, - "loss": 3.48956413269043, - "step": 17980 - }, - { - "epoch": 20.466837460859665, - "grad_norm": 8.305935859680176, - "learning_rate": 0.00010258952272604149, - "loss": 3.399882507324219, - "step": 17990 - }, - { - "epoch": 20.478223740392828, - "grad_norm": 8.22060775756836, - "learning_rate": 0.00010253534861043394, - "loss": 3.2761825561523437, - "step": 18000 - }, - { - "epoch": 20.48961001992599, - "grad_norm": 8.05759334564209, - "learning_rate": 0.00010248117449482637, - "loss": 3.5770782470703124, - "step": 18010 - }, - { - "epoch": 20.500996299459153, - "grad_norm": 8.089456558227539, - "learning_rate": 0.00010242700037921882, - "loss": 3.424999237060547, - "step": 18020 - }, - { - "epoch": 20.512382578992316, - "grad_norm": 7.183095932006836, - "learning_rate": 0.00010237282626361124, - "loss": 3.1551158905029295, - "step": 18030 - }, - { - "epoch": 20.523768858525475, - "grad_norm": 7.149391174316406, - "learning_rate": 0.0001023186521480037, - "loss": 3.384687805175781, - "step": 18040 - }, - { - "epoch": 20.535155138058638, - "grad_norm": 6.47637939453125, - "learning_rate": 0.00010226447803239612, - "loss": 3.601530075073242, - "step": 18050 - }, - { - "epoch": 20.5465414175918, - "grad_norm": 8.601676940917969, - "learning_rate": 0.00010221030391678856, - "loss": 3.6373306274414063, - "step": 18060 - }, - { - "epoch": 20.557927697124963, - "grad_norm": 8.294757843017578, - "learning_rate": 0.00010215612980118101, - "loss": 3.5030460357666016, - "step": 18070 - }, - { - "epoch": 20.569313976658126, - "grad_norm": 7.412970066070557, - "learning_rate": 0.00010210195568557343, - "loss": 3.206818389892578, - "step": 18080 - }, - { - "epoch": 20.58070025619129, - "grad_norm": 7.663300514221191, - "learning_rate": 0.00010204778156996589, - "loss": 3.5179752349853515, - "step": 18090 - }, - { - "epoch": 20.59208653572445, - "grad_norm": 7.107182025909424, - "learning_rate": 0.00010199360745435831, - "loss": 3.446649932861328, - "step": 18100 - }, - { - "epoch": 20.603472815257614, - "grad_norm": 7.1297688484191895, - "learning_rate": 0.00010193943333875076, - "loss": 3.4354694366455076, - "step": 18110 - }, - { - "epoch": 20.614859094790777, - "grad_norm": 7.961850643157959, - "learning_rate": 0.00010188525922314319, - "loss": 3.3658203125, - "step": 18120 - }, - { - "epoch": 20.62624537432394, - "grad_norm": 7.603863716125488, - "learning_rate": 0.00010183108510753562, - "loss": 3.439925765991211, - "step": 18130 - }, - { - "epoch": 20.637631653857103, - "grad_norm": 7.988809108734131, - "learning_rate": 0.00010177691099192805, - "loss": 3.531351089477539, - "step": 18140 - }, - { - "epoch": 20.649017933390265, - "grad_norm": 8.041953086853027, - "learning_rate": 0.0001017227368763205, - "loss": 3.673780822753906, - "step": 18150 - }, - { - "epoch": 20.660404212923428, - "grad_norm": 8.37690258026123, - "learning_rate": 0.00010166856276071292, - "loss": 3.794148254394531, - "step": 18160 - }, - { - "epoch": 20.67179049245659, - "grad_norm": 8.250043869018555, - "learning_rate": 0.00010161438864510538, - "loss": 3.5557697296142576, - "step": 18170 - }, - { - "epoch": 20.683176771989753, - "grad_norm": 8.75919246673584, - "learning_rate": 0.00010156021452949783, - "loss": 3.7775032043457033, - "step": 18180 - }, - { - "epoch": 20.694563051522916, - "grad_norm": 8.309146881103516, - "learning_rate": 0.00010150604041389025, - "loss": 3.4888229370117188, - "step": 18190 - }, - { - "epoch": 20.70594933105608, - "grad_norm": 8.30295467376709, - "learning_rate": 0.00010145186629828269, - "loss": 3.7718467712402344, - "step": 18200 - }, - { - "epoch": 20.71733561058924, - "grad_norm": 7.974371910095215, - "learning_rate": 0.00010139769218267511, - "loss": 3.401205062866211, - "step": 18210 - }, - { - "epoch": 20.7287218901224, - "grad_norm": 8.394783973693848, - "learning_rate": 0.00010134351806706757, - "loss": 3.713974380493164, - "step": 18220 - }, - { - "epoch": 20.740108169655564, - "grad_norm": 8.151832580566406, - "learning_rate": 0.00010128934395145999, - "loss": 3.8104171752929688, - "step": 18230 - }, - { - "epoch": 20.751494449188726, - "grad_norm": 7.931938171386719, - "learning_rate": 0.00010123516983585244, - "loss": 3.417683410644531, - "step": 18240 - }, - { - "epoch": 20.76288072872189, - "grad_norm": 7.705905914306641, - "learning_rate": 0.00010118099572024487, - "loss": 3.6185550689697266, - "step": 18250 - }, - { - "epoch": 20.774267008255052, - "grad_norm": 9.284891128540039, - "learning_rate": 0.00010112682160463732, - "loss": 3.7930877685546873, - "step": 18260 - }, - { - "epoch": 20.785653287788215, - "grad_norm": 7.982856750488281, - "learning_rate": 0.00010107264748902974, - "loss": 3.4403953552246094, - "step": 18270 - }, - { - "epoch": 20.797039567321377, - "grad_norm": 8.39303207397461, - "learning_rate": 0.00010101847337342218, - "loss": 3.756923294067383, - "step": 18280 - }, - { - "epoch": 20.80842584685454, - "grad_norm": 7.579355239868164, - "learning_rate": 0.00010096429925781463, - "loss": 3.770177459716797, - "step": 18290 - }, - { - "epoch": 20.819812126387703, - "grad_norm": 7.803186416625977, - "learning_rate": 0.00010091012514220706, - "loss": 3.6111522674560548, - "step": 18300 - }, - { - "epoch": 20.831198405920865, - "grad_norm": 7.868539810180664, - "learning_rate": 0.00010085595102659951, - "loss": 3.5871612548828127, - "step": 18310 - }, - { - "epoch": 20.842584685454028, - "grad_norm": 8.147734642028809, - "learning_rate": 0.00010080177691099193, - "loss": 3.5940319061279298, - "step": 18320 - }, - { - "epoch": 20.85397096498719, - "grad_norm": 7.550434589385986, - "learning_rate": 0.00010074760279538437, - "loss": 3.7070236206054688, - "step": 18330 - }, - { - "epoch": 20.865357244520354, - "grad_norm": 7.593775272369385, - "learning_rate": 0.00010069342867977681, - "loss": 3.5908023834228517, - "step": 18340 - }, - { - "epoch": 20.876743524053516, - "grad_norm": 9.302807807922363, - "learning_rate": 0.00010063925456416925, - "loss": 3.5747802734375, - "step": 18350 - }, - { - "epoch": 20.88812980358668, - "grad_norm": 8.078301429748535, - "learning_rate": 0.00010058508044856167, - "loss": 3.404593658447266, - "step": 18360 - }, - { - "epoch": 20.899516083119842, - "grad_norm": 7.820472240447998, - "learning_rate": 0.00010053090633295412, - "loss": 3.6110614776611327, - "step": 18370 - }, - { - "epoch": 20.910902362653005, - "grad_norm": 7.314359664916992, - "learning_rate": 0.00010047673221734655, - "loss": 3.4265087127685545, - "step": 18380 - }, - { - "epoch": 20.922288642186167, - "grad_norm": 7.681244373321533, - "learning_rate": 0.000100422558101739, - "loss": 3.6056175231933594, - "step": 18390 - }, - { - "epoch": 20.933674921719327, - "grad_norm": 8.012112617492676, - "learning_rate": 0.00010036838398613142, - "loss": 3.682481384277344, - "step": 18400 - }, - { - "epoch": 20.94506120125249, - "grad_norm": 7.457361221313477, - "learning_rate": 0.00010031420987052386, - "loss": 3.843639373779297, - "step": 18410 - }, - { - "epoch": 20.956447480785652, - "grad_norm": 8.518708229064941, - "learning_rate": 0.00010026003575491631, - "loss": 3.5751590728759766, - "step": 18420 - }, - { - "epoch": 20.967833760318815, - "grad_norm": 7.371086597442627, - "learning_rate": 0.00010020586163930874, - "loss": 3.6571311950683594, - "step": 18430 - }, - { - "epoch": 20.979220039851977, - "grad_norm": 7.497313976287842, - "learning_rate": 0.00010015168752370119, - "loss": 3.5427356719970704, - "step": 18440 - }, - { - "epoch": 20.99060631938514, - "grad_norm": 8.249246597290039, - "learning_rate": 0.00010009751340809361, - "loss": 3.492108154296875, - "step": 18450 - }, - { - "epoch": 21.001138627953317, - "grad_norm": 7.12626314163208, - "learning_rate": 0.00010004333929248607, - "loss": 3.4620594024658202, - "step": 18460 - }, - { - "epoch": 21.01252490748648, - "grad_norm": 7.223207473754883, - "learning_rate": 9.99891651768785e-05, - "loss": 2.993937301635742, - "step": 18470 - }, - { - "epoch": 21.023911187019642, - "grad_norm": 8.60847282409668, - "learning_rate": 9.993499106127093e-05, - "loss": 3.044596481323242, - "step": 18480 - }, - { - "epoch": 21.035297466552805, - "grad_norm": 7.056385040283203, - "learning_rate": 9.988081694566337e-05, - "loss": 3.329801559448242, - "step": 18490 - }, - { - "epoch": 21.046683746085968, - "grad_norm": 8.232537269592285, - "learning_rate": 9.98266428300558e-05, - "loss": 3.359906005859375, - "step": 18500 - }, - { - "epoch": 21.05807002561913, - "grad_norm": 6.871865272521973, - "learning_rate": 9.977246871444824e-05, - "loss": 3.1155139923095705, - "step": 18510 - }, - { - "epoch": 21.06945630515229, - "grad_norm": 8.1388578414917, - "learning_rate": 9.971829459884068e-05, - "loss": 3.1933937072753906, - "step": 18520 - }, - { - "epoch": 21.080842584685453, - "grad_norm": 7.495057106018066, - "learning_rate": 9.966412048323312e-05, - "loss": 3.4371055603027343, - "step": 18530 - }, - { - "epoch": 21.092228864218615, - "grad_norm": 6.833533763885498, - "learning_rate": 9.960994636762556e-05, - "loss": 3.0802495956420897, - "step": 18540 - }, - { - "epoch": 21.103615143751778, - "grad_norm": 6.975308895111084, - "learning_rate": 9.955577225201798e-05, - "loss": 3.318703842163086, - "step": 18550 - }, - { - "epoch": 21.11500142328494, - "grad_norm": 8.011495590209961, - "learning_rate": 9.950159813641042e-05, - "loss": 3.2149578094482423, - "step": 18560 - }, - { - "epoch": 21.126387702818104, - "grad_norm": 7.448869705200195, - "learning_rate": 9.944742402080287e-05, - "loss": 3.2515499114990236, - "step": 18570 - }, - { - "epoch": 21.137773982351266, - "grad_norm": 7.280409812927246, - "learning_rate": 9.939324990519531e-05, - "loss": 3.5192329406738283, - "step": 18580 - }, - { - "epoch": 21.14916026188443, - "grad_norm": 7.909364700317383, - "learning_rate": 9.933907578958775e-05, - "loss": 3.397074890136719, - "step": 18590 - }, - { - "epoch": 21.16054654141759, - "grad_norm": 7.611769676208496, - "learning_rate": 9.928490167398018e-05, - "loss": 3.0764091491699217, - "step": 18600 - }, - { - "epoch": 21.171932820950754, - "grad_norm": 7.476166248321533, - "learning_rate": 9.923072755837262e-05, - "loss": 3.295338821411133, - "step": 18610 - }, - { - "epoch": 21.183319100483917, - "grad_norm": 6.955434799194336, - "learning_rate": 9.917655344276505e-05, - "loss": 3.357422637939453, - "step": 18620 - }, - { - "epoch": 21.19470538001708, - "grad_norm": 8.147130966186523, - "learning_rate": 9.912237932715748e-05, - "loss": 3.1608652114868163, - "step": 18630 - }, - { - "epoch": 21.206091659550243, - "grad_norm": 8.146050453186035, - "learning_rate": 9.906820521154992e-05, - "loss": 3.1724903106689455, - "step": 18640 - }, - { - "epoch": 21.217477939083405, - "grad_norm": 7.68185567855835, - "learning_rate": 9.901403109594236e-05, - "loss": 3.063700485229492, - "step": 18650 - }, - { - "epoch": 21.228864218616568, - "grad_norm": 7.399899005889893, - "learning_rate": 9.89598569803348e-05, - "loss": 3.2527538299560548, - "step": 18660 - }, - { - "epoch": 21.24025049814973, - "grad_norm": 8.725862503051758, - "learning_rate": 9.890568286472724e-05, - "loss": 3.3397960662841797, - "step": 18670 - }, - { - "epoch": 21.251636777682894, - "grad_norm": 8.439692497253418, - "learning_rate": 9.885150874911967e-05, - "loss": 3.127163314819336, - "step": 18680 - }, - { - "epoch": 21.263023057216056, - "grad_norm": 7.275774955749512, - "learning_rate": 9.879733463351211e-05, - "loss": 3.3399112701416014, - "step": 18690 - }, - { - "epoch": 21.274409336749216, - "grad_norm": 8.231805801391602, - "learning_rate": 9.874316051790455e-05, - "loss": 3.3167404174804687, - "step": 18700 - }, - { - "epoch": 21.28579561628238, - "grad_norm": 8.807353019714355, - "learning_rate": 9.868898640229699e-05, - "loss": 3.2518096923828126, - "step": 18710 - }, - { - "epoch": 21.29718189581554, - "grad_norm": 7.745279788970947, - "learning_rate": 9.863481228668943e-05, - "loss": 2.9932077407836912, - "step": 18720 - }, - { - "epoch": 21.308568175348704, - "grad_norm": 8.119501113891602, - "learning_rate": 9.858063817108186e-05, - "loss": 3.360261917114258, - "step": 18730 - }, - { - "epoch": 21.319954454881866, - "grad_norm": 7.655581951141357, - "learning_rate": 9.85264640554743e-05, - "loss": 3.311513900756836, - "step": 18740 - }, - { - "epoch": 21.33134073441503, - "grad_norm": 7.7340240478515625, - "learning_rate": 9.847228993986674e-05, - "loss": 3.241596221923828, - "step": 18750 - }, - { - "epoch": 21.342727013948192, - "grad_norm": 7.133695602416992, - "learning_rate": 9.841811582425917e-05, - "loss": 3.315245819091797, - "step": 18760 - }, - { - "epoch": 21.354113293481355, - "grad_norm": 7.775753021240234, - "learning_rate": 9.83639417086516e-05, - "loss": 3.2109920501708986, - "step": 18770 - }, - { - "epoch": 21.365499573014517, - "grad_norm": 7.912374019622803, - "learning_rate": 9.830976759304404e-05, - "loss": 3.256867218017578, - "step": 18780 - }, - { - "epoch": 21.37688585254768, - "grad_norm": 7.180131435394287, - "learning_rate": 9.825559347743648e-05, - "loss": 3.2446563720703123, - "step": 18790 - }, - { - "epoch": 21.388272132080843, - "grad_norm": 8.159407615661621, - "learning_rate": 9.820141936182893e-05, - "loss": 3.262491226196289, - "step": 18800 - }, - { - "epoch": 21.399658411614006, - "grad_norm": 7.895713806152344, - "learning_rate": 9.814724524622137e-05, - "loss": 3.2164894104003907, - "step": 18810 - }, - { - "epoch": 21.41104469114717, - "grad_norm": 8.5209321975708, - "learning_rate": 9.809307113061381e-05, - "loss": 3.3610679626464846, - "step": 18820 - }, - { - "epoch": 21.42243097068033, - "grad_norm": 6.682577610015869, - "learning_rate": 9.803889701500623e-05, - "loss": 3.2081188201904296, - "step": 18830 - }, - { - "epoch": 21.433817250213494, - "grad_norm": 7.8088812828063965, - "learning_rate": 9.798472289939867e-05, - "loss": 3.5212165832519533, - "step": 18840 - }, - { - "epoch": 21.445203529746657, - "grad_norm": 7.557262897491455, - "learning_rate": 9.793054878379111e-05, - "loss": 3.161390495300293, - "step": 18850 - }, - { - "epoch": 21.45658980927982, - "grad_norm": 6.671349048614502, - "learning_rate": 9.787637466818355e-05, - "loss": 3.4181884765625, - "step": 18860 - }, - { - "epoch": 21.467976088812982, - "grad_norm": 8.085090637207031, - "learning_rate": 9.782220055257598e-05, - "loss": 3.2536998748779298, - "step": 18870 - }, - { - "epoch": 21.47936236834614, - "grad_norm": 7.863136291503906, - "learning_rate": 9.776802643696842e-05, - "loss": 3.254865264892578, - "step": 18880 - }, - { - "epoch": 21.490748647879304, - "grad_norm": 8.205495834350586, - "learning_rate": 9.771385232136086e-05, - "loss": 3.396427536010742, - "step": 18890 - }, - { - "epoch": 21.502134927412467, - "grad_norm": 7.5775861740112305, - "learning_rate": 9.76596782057533e-05, - "loss": 3.1754524230957033, - "step": 18900 - }, - { - "epoch": 21.51352120694563, - "grad_norm": 7.762941837310791, - "learning_rate": 9.760550409014574e-05, - "loss": 3.3684154510498048, - "step": 18910 - }, - { - "epoch": 21.524907486478792, - "grad_norm": 7.523013114929199, - "learning_rate": 9.755132997453817e-05, - "loss": 3.414249801635742, - "step": 18920 - }, - { - "epoch": 21.536293766011955, - "grad_norm": 8.015236854553223, - "learning_rate": 9.749715585893061e-05, - "loss": 3.2509552001953126, - "step": 18930 - }, - { - "epoch": 21.547680045545118, - "grad_norm": 9.543829917907715, - "learning_rate": 9.744298174332305e-05, - "loss": 3.6638416290283202, - "step": 18940 - }, - { - "epoch": 21.55906632507828, - "grad_norm": 8.284276008605957, - "learning_rate": 9.738880762771549e-05, - "loss": 3.4558319091796874, - "step": 18950 - }, - { - "epoch": 21.570452604611443, - "grad_norm": 8.424079895019531, - "learning_rate": 9.733463351210793e-05, - "loss": 3.319378662109375, - "step": 18960 - }, - { - "epoch": 21.581838884144606, - "grad_norm": 7.417494773864746, - "learning_rate": 9.728045939650035e-05, - "loss": 3.0912622451782226, - "step": 18970 - }, - { - "epoch": 21.59322516367777, - "grad_norm": 8.443395614624023, - "learning_rate": 9.722628528089279e-05, - "loss": 3.599252700805664, - "step": 18980 - }, - { - "epoch": 21.60461144321093, - "grad_norm": 7.053211688995361, - "learning_rate": 9.717211116528523e-05, - "loss": 3.4184902191162108, - "step": 18990 - }, - { - "epoch": 21.615997722744094, - "grad_norm": 7.322805881500244, - "learning_rate": 9.711793704967766e-05, - "loss": 3.3825252532958983, - "step": 19000 - }, - { - "epoch": 21.627384002277257, - "grad_norm": 7.776912212371826, - "learning_rate": 9.70637629340701e-05, - "loss": 3.413040542602539, - "step": 19010 - }, - { - "epoch": 21.63877028181042, - "grad_norm": 7.955855846405029, - "learning_rate": 9.700958881846254e-05, - "loss": 3.3618423461914064, - "step": 19020 - }, - { - "epoch": 21.650156561343582, - "grad_norm": 7.364816188812256, - "learning_rate": 9.695541470285499e-05, - "loss": 3.470820999145508, - "step": 19030 - }, - { - "epoch": 21.661542840876745, - "grad_norm": 7.257011413574219, - "learning_rate": 9.690124058724742e-05, - "loss": 3.3685569763183594, - "step": 19040 - }, - { - "epoch": 21.672929120409908, - "grad_norm": 7.3103485107421875, - "learning_rate": 9.684706647163985e-05, - "loss": 3.5275413513183596, - "step": 19050 - }, - { - "epoch": 21.684315399943067, - "grad_norm": 8.958032608032227, - "learning_rate": 9.679289235603229e-05, - "loss": 3.6240314483642577, - "step": 19060 - }, - { - "epoch": 21.69570167947623, - "grad_norm": 7.291479110717773, - "learning_rate": 9.673871824042473e-05, - "loss": 3.4977294921875, - "step": 19070 - }, - { - "epoch": 21.707087959009392, - "grad_norm": 7.926621437072754, - "learning_rate": 9.668454412481717e-05, - "loss": 3.3956787109375, - "step": 19080 - }, - { - "epoch": 21.718474238542555, - "grad_norm": 7.5746588706970215, - "learning_rate": 9.66303700092096e-05, - "loss": 3.4186546325683596, - "step": 19090 - }, - { - "epoch": 21.729860518075718, - "grad_norm": 8.237885475158691, - "learning_rate": 9.657619589360204e-05, - "loss": 3.315296173095703, - "step": 19100 - }, - { - "epoch": 21.74124679760888, - "grad_norm": 7.103343963623047, - "learning_rate": 9.652202177799447e-05, - "loss": 3.2063358306884764, - "step": 19110 - }, - { - "epoch": 21.752633077142043, - "grad_norm": 7.7268500328063965, - "learning_rate": 9.646784766238691e-05, - "loss": 3.576313781738281, - "step": 19120 - }, - { - "epoch": 21.764019356675206, - "grad_norm": 8.264037132263184, - "learning_rate": 9.641367354677935e-05, - "loss": 3.452584075927734, - "step": 19130 - }, - { - "epoch": 21.77540563620837, - "grad_norm": 7.947970390319824, - "learning_rate": 9.63594994311718e-05, - "loss": 3.2328662872314453, - "step": 19140 - }, - { - "epoch": 21.78679191574153, - "grad_norm": 8.009827613830566, - "learning_rate": 9.630532531556423e-05, - "loss": 3.3568878173828125, - "step": 19150 - }, - { - "epoch": 21.798178195274694, - "grad_norm": 7.520967483520508, - "learning_rate": 9.625115119995667e-05, - "loss": 3.2189865112304688, - "step": 19160 - }, - { - "epoch": 21.809564474807857, - "grad_norm": 6.894745349884033, - "learning_rate": 9.619697708434911e-05, - "loss": 3.3676876068115233, - "step": 19170 - }, - { - "epoch": 21.82095075434102, - "grad_norm": 7.391615867614746, - "learning_rate": 9.614280296874154e-05, - "loss": 3.1733936309814452, - "step": 19180 - }, - { - "epoch": 21.832337033874182, - "grad_norm": 8.549541473388672, - "learning_rate": 9.608862885313397e-05, - "loss": 3.4626365661621095, - "step": 19190 - }, - { - "epoch": 21.843723313407345, - "grad_norm": 7.7757182121276855, - "learning_rate": 9.603445473752641e-05, - "loss": 3.3878768920898437, - "step": 19200 - }, - { - "epoch": 21.855109592940508, - "grad_norm": 9.261902809143066, - "learning_rate": 9.598028062191885e-05, - "loss": 3.0820999145507812, - "step": 19210 - }, - { - "epoch": 21.86649587247367, - "grad_norm": 7.974034309387207, - "learning_rate": 9.592610650631129e-05, - "loss": 3.532638931274414, - "step": 19220 - }, - { - "epoch": 21.877882152006833, - "grad_norm": 7.275153160095215, - "learning_rate": 9.587193239070373e-05, - "loss": 3.409342956542969, - "step": 19230 - }, - { - "epoch": 21.889268431539996, - "grad_norm": 7.821238040924072, - "learning_rate": 9.581775827509616e-05, - "loss": 3.2864406585693358, - "step": 19240 - }, - { - "epoch": 21.900654711073155, - "grad_norm": 6.836928844451904, - "learning_rate": 9.57635841594886e-05, - "loss": 3.399283218383789, - "step": 19250 - }, - { - "epoch": 21.912040990606318, - "grad_norm": 8.900579452514648, - "learning_rate": 9.570941004388104e-05, - "loss": 3.277374267578125, - "step": 19260 - }, - { - "epoch": 21.92342727013948, - "grad_norm": 7.689421653747559, - "learning_rate": 9.565523592827348e-05, - "loss": 3.502179718017578, - "step": 19270 - }, - { - "epoch": 21.934813549672644, - "grad_norm": 7.474170684814453, - "learning_rate": 9.560106181266592e-05, - "loss": 3.3328819274902344, - "step": 19280 - }, - { - "epoch": 21.946199829205806, - "grad_norm": 7.40029764175415, - "learning_rate": 9.554688769705835e-05, - "loss": 3.339038848876953, - "step": 19290 - }, - { - "epoch": 21.95758610873897, - "grad_norm": 8.339094161987305, - "learning_rate": 9.549271358145079e-05, - "loss": 3.642498779296875, - "step": 19300 - }, - { - "epoch": 21.96897238827213, - "grad_norm": 8.725251197814941, - "learning_rate": 9.543853946584323e-05, - "loss": 3.3834274291992186, - "step": 19310 - }, - { - "epoch": 21.980358667805294, - "grad_norm": 7.735589504241943, - "learning_rate": 9.538436535023565e-05, - "loss": 3.362496185302734, - "step": 19320 - }, - { - "epoch": 21.991744947338457, - "grad_norm": 8.0684175491333, - "learning_rate": 9.533019123462809e-05, - "loss": 3.6285259246826174, - "step": 19330 - }, - { - "epoch": 22.002277255906634, - "grad_norm": 7.213688850402832, - "learning_rate": 9.527601711902053e-05, - "loss": 3.0752532958984373, - "step": 19340 - }, - { - "epoch": 22.013663535439797, - "grad_norm": 7.230661392211914, - "learning_rate": 9.522184300341297e-05, - "loss": 3.045562171936035, - "step": 19350 - }, - { - "epoch": 22.025049814972956, - "grad_norm": 7.4309587478637695, - "learning_rate": 9.51676688878054e-05, - "loss": 3.1042001724243162, - "step": 19360 - }, - { - "epoch": 22.03643609450612, - "grad_norm": 7.176165580749512, - "learning_rate": 9.511349477219786e-05, - "loss": 2.9163785934448243, - "step": 19370 - }, - { - "epoch": 22.04782237403928, - "grad_norm": 7.511384963989258, - "learning_rate": 9.50593206565903e-05, - "loss": 2.967784118652344, - "step": 19380 - }, - { - "epoch": 22.059208653572444, - "grad_norm": 7.659293174743652, - "learning_rate": 9.500514654098272e-05, - "loss": 3.1063777923583986, - "step": 19390 - }, - { - "epoch": 22.070594933105607, - "grad_norm": 7.430224895477295, - "learning_rate": 9.495097242537516e-05, - "loss": 3.028996467590332, - "step": 19400 - }, - { - "epoch": 22.08198121263877, - "grad_norm": 7.423468589782715, - "learning_rate": 9.48967983097676e-05, - "loss": 3.157891273498535, - "step": 19410 - }, - { - "epoch": 22.093367492171932, - "grad_norm": 7.423767566680908, - "learning_rate": 9.484262419416003e-05, - "loss": 2.9452770233154295, - "step": 19420 - }, - { - "epoch": 22.104753771705095, - "grad_norm": 7.975661754608154, - "learning_rate": 9.478845007855247e-05, - "loss": 3.0032615661621094, - "step": 19430 - }, - { - "epoch": 22.116140051238258, - "grad_norm": 7.372896194458008, - "learning_rate": 9.473427596294491e-05, - "loss": 3.0240869522094727, - "step": 19440 - }, - { - "epoch": 22.12752633077142, - "grad_norm": 8.073140144348145, - "learning_rate": 9.468010184733735e-05, - "loss": 3.047717475891113, - "step": 19450 - }, - { - "epoch": 22.138912610304583, - "grad_norm": 7.785470008850098, - "learning_rate": 9.462592773172979e-05, - "loss": 3.0755859375, - "step": 19460 - }, - { - "epoch": 22.150298889837746, - "grad_norm": 8.51516342163086, - "learning_rate": 9.457175361612221e-05, - "loss": 3.0527263641357423, - "step": 19470 - }, - { - "epoch": 22.16168516937091, - "grad_norm": 7.476900577545166, - "learning_rate": 9.451757950051466e-05, - "loss": 3.286362075805664, - "step": 19480 - }, - { - "epoch": 22.17307144890407, - "grad_norm": 9.160499572753906, - "learning_rate": 9.44634053849071e-05, - "loss": 3.2243637084960937, - "step": 19490 - }, - { - "epoch": 22.184457728437234, - "grad_norm": 7.854064464569092, - "learning_rate": 9.440923126929954e-05, - "loss": 2.984610748291016, - "step": 19500 - }, - { - "epoch": 22.195844007970397, - "grad_norm": 7.314716815948486, - "learning_rate": 9.435505715369198e-05, - "loss": 3.109171676635742, - "step": 19510 - }, - { - "epoch": 22.20723028750356, - "grad_norm": 6.959289073944092, - "learning_rate": 9.430088303808441e-05, - "loss": 3.1544565200805663, - "step": 19520 - }, - { - "epoch": 22.218616567036722, - "grad_norm": 7.609602928161621, - "learning_rate": 9.424670892247684e-05, - "loss": 3.071408271789551, - "step": 19530 - }, - { - "epoch": 22.23000284656988, - "grad_norm": 7.927331924438477, - "learning_rate": 9.419253480686928e-05, - "loss": 3.055502510070801, - "step": 19540 - }, - { - "epoch": 22.241389126103044, - "grad_norm": 8.021471977233887, - "learning_rate": 9.413836069126171e-05, - "loss": 2.963591194152832, - "step": 19550 - }, - { - "epoch": 22.252775405636207, - "grad_norm": 7.8159260749816895, - "learning_rate": 9.408418657565415e-05, - "loss": 3.1434553146362303, - "step": 19560 - }, - { - "epoch": 22.26416168516937, - "grad_norm": 7.591141223907471, - "learning_rate": 9.403001246004659e-05, - "loss": 3.1236942291259764, - "step": 19570 - }, - { - "epoch": 22.275547964702533, - "grad_norm": 7.809198379516602, - "learning_rate": 9.397583834443903e-05, - "loss": 2.9383840560913086, - "step": 19580 - }, - { - "epoch": 22.286934244235695, - "grad_norm": 7.057039737701416, - "learning_rate": 9.392166422883148e-05, - "loss": 2.704611396789551, - "step": 19590 - }, - { - "epoch": 22.298320523768858, - "grad_norm": 8.17033863067627, - "learning_rate": 9.38674901132239e-05, - "loss": 3.0170907974243164, - "step": 19600 - }, - { - "epoch": 22.30970680330202, - "grad_norm": 8.437378883361816, - "learning_rate": 9.381331599761634e-05, - "loss": 3.192144012451172, - "step": 19610 - }, - { - "epoch": 22.321093082835183, - "grad_norm": 8.427281379699707, - "learning_rate": 9.375914188200878e-05, - "loss": 3.265237808227539, - "step": 19620 - }, - { - "epoch": 22.332479362368346, - "grad_norm": 7.595215797424316, - "learning_rate": 9.370496776640122e-05, - "loss": 3.1780752182006835, - "step": 19630 - }, - { - "epoch": 22.34386564190151, - "grad_norm": 7.446441173553467, - "learning_rate": 9.365079365079366e-05, - "loss": 3.126137542724609, - "step": 19640 - }, - { - "epoch": 22.35525192143467, - "grad_norm": 8.297301292419434, - "learning_rate": 9.35966195351861e-05, - "loss": 3.281117630004883, - "step": 19650 - }, - { - "epoch": 22.366638200967834, - "grad_norm": 7.048451900482178, - "learning_rate": 9.354244541957853e-05, - "loss": 3.1563640594482423, - "step": 19660 - }, - { - "epoch": 22.378024480500997, - "grad_norm": 7.006303787231445, - "learning_rate": 9.348827130397097e-05, - "loss": 2.991481971740723, - "step": 19670 - }, - { - "epoch": 22.38941076003416, - "grad_norm": 6.830575942993164, - "learning_rate": 9.34340971883634e-05, - "loss": 3.372071075439453, - "step": 19680 - }, - { - "epoch": 22.400797039567323, - "grad_norm": 8.474934577941895, - "learning_rate": 9.337992307275583e-05, - "loss": 3.1545629501342773, - "step": 19690 - }, - { - "epoch": 22.412183319100485, - "grad_norm": 8.20909595489502, - "learning_rate": 9.332574895714827e-05, - "loss": 3.1375335693359374, - "step": 19700 - }, - { - "epoch": 22.423569598633648, - "grad_norm": 7.151069164276123, - "learning_rate": 9.327157484154072e-05, - "loss": 3.147255706787109, - "step": 19710 - }, - { - "epoch": 22.434955878166807, - "grad_norm": 8.133870124816895, - "learning_rate": 9.321740072593316e-05, - "loss": 3.096915435791016, - "step": 19720 - }, - { - "epoch": 22.44634215769997, - "grad_norm": 7.726839065551758, - "learning_rate": 9.31632266103256e-05, - "loss": 3.2699676513671876, - "step": 19730 - }, - { - "epoch": 22.457728437233133, - "grad_norm": 7.36300802230835, - "learning_rate": 9.310905249471802e-05, - "loss": 3.1540500640869142, - "step": 19740 - }, - { - "epoch": 22.469114716766295, - "grad_norm": 8.004473686218262, - "learning_rate": 9.305487837911046e-05, - "loss": 3.1127681732177734, - "step": 19750 - }, - { - "epoch": 22.480500996299458, - "grad_norm": 7.39958381652832, - "learning_rate": 9.30007042635029e-05, - "loss": 2.9369550704956056, - "step": 19760 - }, - { - "epoch": 22.49188727583262, - "grad_norm": 7.182417869567871, - "learning_rate": 9.294653014789534e-05, - "loss": 3.0352306365966797, - "step": 19770 - }, - { - "epoch": 22.503273555365784, - "grad_norm": 7.487427711486816, - "learning_rate": 9.289235603228778e-05, - "loss": 3.1564273834228516, - "step": 19780 - }, - { - "epoch": 22.514659834898946, - "grad_norm": 8.101351737976074, - "learning_rate": 9.283818191668021e-05, - "loss": 3.135332489013672, - "step": 19790 - }, - { - "epoch": 22.52604611443211, - "grad_norm": 7.65117883682251, - "learning_rate": 9.278400780107265e-05, - "loss": 3.1288909912109375, - "step": 19800 - }, - { - "epoch": 22.537432393965272, - "grad_norm": 10.03753662109375, - "learning_rate": 9.272983368546509e-05, - "loss": 2.8136066436767577, - "step": 19810 - }, - { - "epoch": 22.548818673498435, - "grad_norm": 7.884417533874512, - "learning_rate": 9.267565956985753e-05, - "loss": 3.3287643432617187, - "step": 19820 - }, - { - "epoch": 22.560204953031597, - "grad_norm": 7.343411922454834, - "learning_rate": 9.262148545424997e-05, - "loss": 3.0845478057861326, - "step": 19830 - }, - { - "epoch": 22.57159123256476, - "grad_norm": 7.07990026473999, - "learning_rate": 9.25673113386424e-05, - "loss": 3.0407432556152343, - "step": 19840 - }, - { - "epoch": 22.582977512097923, - "grad_norm": 7.047083377838135, - "learning_rate": 9.251313722303484e-05, - "loss": 2.975852394104004, - "step": 19850 - }, - { - "epoch": 22.594363791631086, - "grad_norm": 7.662022590637207, - "learning_rate": 9.245896310742728e-05, - "loss": 3.1719154357910155, - "step": 19860 - }, - { - "epoch": 22.60575007116425, - "grad_norm": 7.884767532348633, - "learning_rate": 9.240478899181972e-05, - "loss": 3.1837724685668944, - "step": 19870 - }, - { - "epoch": 22.61713635069741, - "grad_norm": 7.272804260253906, - "learning_rate": 9.235061487621214e-05, - "loss": 2.9902538299560546, - "step": 19880 - }, - { - "epoch": 22.628522630230574, - "grad_norm": 8.392932891845703, - "learning_rate": 9.229644076060458e-05, - "loss": 3.203661346435547, - "step": 19890 - }, - { - "epoch": 22.639908909763733, - "grad_norm": 7.606863975524902, - "learning_rate": 9.224226664499702e-05, - "loss": 3.245991516113281, - "step": 19900 - }, - { - "epoch": 22.651295189296896, - "grad_norm": 8.281023979187012, - "learning_rate": 9.218809252938946e-05, - "loss": 3.256304168701172, - "step": 19910 - }, - { - "epoch": 22.66268146883006, - "grad_norm": 9.497676849365234, - "learning_rate": 9.21339184137819e-05, - "loss": 3.2775390625, - "step": 19920 - }, - { - "epoch": 22.67406774836322, - "grad_norm": 8.350468635559082, - "learning_rate": 9.207974429817435e-05, - "loss": 3.1510446548461912, - "step": 19930 - }, - { - "epoch": 22.685454027896384, - "grad_norm": 8.590188980102539, - "learning_rate": 9.202557018256678e-05, - "loss": 3.364548110961914, - "step": 19940 - }, - { - "epoch": 22.696840307429547, - "grad_norm": 8.386192321777344, - "learning_rate": 9.197139606695921e-05, - "loss": 3.446294403076172, - "step": 19950 - }, - { - "epoch": 22.70822658696271, - "grad_norm": 8.23085880279541, - "learning_rate": 9.191722195135165e-05, - "loss": 3.0785924911499025, - "step": 19960 - }, - { - "epoch": 22.719612866495872, - "grad_norm": 7.4072771072387695, - "learning_rate": 9.186304783574408e-05, - "loss": 3.299879455566406, - "step": 19970 - }, - { - "epoch": 22.730999146029035, - "grad_norm": 6.338870048522949, - "learning_rate": 9.180887372013652e-05, - "loss": 3.1906932830810546, - "step": 19980 - }, - { - "epoch": 22.742385425562198, - "grad_norm": 7.546908378601074, - "learning_rate": 9.175469960452896e-05, - "loss": 3.0080894470214843, - "step": 19990 - }, - { - "epoch": 22.75377170509536, - "grad_norm": 8.439484596252441, - "learning_rate": 9.17005254889214e-05, - "loss": 3.1106647491455077, - "step": 20000 - }, - { - "epoch": 22.765157984628523, - "grad_norm": 8.297350883483887, - "learning_rate": 9.164635137331384e-05, - "loss": 2.9686201095581053, - "step": 20010 - }, - { - "epoch": 22.776544264161686, - "grad_norm": 7.963090896606445, - "learning_rate": 9.159217725770628e-05, - "loss": 3.098881721496582, - "step": 20020 - }, - { - "epoch": 22.78793054369485, - "grad_norm": 8.618762016296387, - "learning_rate": 9.15380031420987e-05, - "loss": 3.041916084289551, - "step": 20030 - }, - { - "epoch": 22.79931682322801, - "grad_norm": 7.110545635223389, - "learning_rate": 9.148382902649115e-05, - "loss": 3.047221565246582, - "step": 20040 - }, - { - "epoch": 22.810703102761174, - "grad_norm": 8.43532657623291, - "learning_rate": 9.142965491088359e-05, - "loss": 3.025491142272949, - "step": 20050 - }, - { - "epoch": 22.822089382294337, - "grad_norm": 7.121822834014893, - "learning_rate": 9.137548079527603e-05, - "loss": 3.299955368041992, - "step": 20060 - }, - { - "epoch": 22.8334756618275, - "grad_norm": 7.654300212860107, - "learning_rate": 9.132130667966847e-05, - "loss": 3.023772430419922, - "step": 20070 - }, - { - "epoch": 22.844861941360662, - "grad_norm": 7.399035930633545, - "learning_rate": 9.12671325640609e-05, - "loss": 3.1401979446411135, - "step": 20080 - }, - { - "epoch": 22.85624822089382, - "grad_norm": 8.279058456420898, - "learning_rate": 9.121295844845333e-05, - "loss": 3.2731346130371093, - "step": 20090 - }, - { - "epoch": 22.867634500426984, - "grad_norm": 8.097005844116211, - "learning_rate": 9.115878433284577e-05, - "loss": 3.2051586151123046, - "step": 20100 - }, - { - "epoch": 22.879020779960147, - "grad_norm": 7.273082256317139, - "learning_rate": 9.11046102172382e-05, - "loss": 3.2332443237304687, - "step": 20110 - }, - { - "epoch": 22.89040705949331, - "grad_norm": 8.696020126342773, - "learning_rate": 9.105043610163064e-05, - "loss": 3.323190689086914, - "step": 20120 - }, - { - "epoch": 22.901793339026472, - "grad_norm": 7.149501323699951, - "learning_rate": 9.099626198602308e-05, - "loss": 3.2843936920166015, - "step": 20130 - }, - { - "epoch": 22.913179618559635, - "grad_norm": 8.37787914276123, - "learning_rate": 9.094208787041552e-05, - "loss": 3.404499053955078, - "step": 20140 - }, - { - "epoch": 22.924565898092798, - "grad_norm": 8.074170112609863, - "learning_rate": 9.088791375480796e-05, - "loss": 3.290465545654297, - "step": 20150 - }, - { - "epoch": 22.93595217762596, - "grad_norm": 7.932847023010254, - "learning_rate": 9.08337396392004e-05, - "loss": 3.2561920166015623, - "step": 20160 - }, - { - "epoch": 22.947338457159123, - "grad_norm": 11.540017127990723, - "learning_rate": 9.077956552359283e-05, - "loss": 3.1390569686889647, - "step": 20170 - }, - { - "epoch": 22.958724736692286, - "grad_norm": 7.136811256408691, - "learning_rate": 9.072539140798527e-05, - "loss": 3.2981540679931642, - "step": 20180 - }, - { - "epoch": 22.97011101622545, - "grad_norm": 7.825453758239746, - "learning_rate": 9.067121729237771e-05, - "loss": 3.0614255905151366, - "step": 20190 - }, - { - "epoch": 22.98149729575861, - "grad_norm": 7.4651007652282715, - "learning_rate": 9.061704317677015e-05, - "loss": 3.2027751922607424, - "step": 20200 - }, - { - "epoch": 22.992883575291774, - "grad_norm": 7.570321559906006, - "learning_rate": 9.056286906116258e-05, - "loss": 3.1534257888793946, - "step": 20210 - }, - { - "epoch": 23.003415883859947, - "grad_norm": 6.957921028137207, - "learning_rate": 9.050869494555502e-05, - "loss": 2.89981746673584, - "step": 20220 - }, - { - "epoch": 23.01480216339311, - "grad_norm": 7.340804576873779, - "learning_rate": 9.045452082994746e-05, - "loss": 2.898666000366211, - "step": 20230 - }, - { - "epoch": 23.026188442926273, - "grad_norm": 7.689297676086426, - "learning_rate": 9.040034671433988e-05, - "loss": 3.076976776123047, - "step": 20240 - }, - { - "epoch": 23.037574722459436, - "grad_norm": 7.607706069946289, - "learning_rate": 9.034617259873232e-05, - "loss": 3.11181697845459, - "step": 20250 - }, - { - "epoch": 23.0489610019926, - "grad_norm": 7.3909502029418945, - "learning_rate": 9.029199848312476e-05, - "loss": 3.0006891250610352, - "step": 20260 - }, - { - "epoch": 23.06034728152576, - "grad_norm": 6.984787464141846, - "learning_rate": 9.023782436751721e-05, - "loss": 2.905653381347656, - "step": 20270 - }, - { - "epoch": 23.071733561058924, - "grad_norm": 8.02444839477539, - "learning_rate": 9.018365025190965e-05, - "loss": 2.8838172912597657, - "step": 20280 - }, - { - "epoch": 23.083119840592087, - "grad_norm": 7.629673480987549, - "learning_rate": 9.012947613630209e-05, - "loss": 3.2186203002929688, - "step": 20290 - }, - { - "epoch": 23.09450612012525, - "grad_norm": 7.7079877853393555, - "learning_rate": 9.007530202069451e-05, - "loss": 2.9591360092163086, - "step": 20300 - }, - { - "epoch": 23.105892399658412, - "grad_norm": 6.760339736938477, - "learning_rate": 9.002112790508695e-05, - "loss": 2.7239349365234373, - "step": 20310 - }, - { - "epoch": 23.117278679191575, - "grad_norm": 9.964285850524902, - "learning_rate": 8.996695378947939e-05, - "loss": 2.7104719161987303, - "step": 20320 - }, - { - "epoch": 23.128664958724737, - "grad_norm": 7.464658737182617, - "learning_rate": 8.991277967387183e-05, - "loss": 2.8070682525634765, - "step": 20330 - }, - { - "epoch": 23.1400512382579, - "grad_norm": 7.5616960525512695, - "learning_rate": 8.985860555826426e-05, - "loss": 2.708431434631348, - "step": 20340 - }, - { - "epoch": 23.151437517791063, - "grad_norm": 7.403007984161377, - "learning_rate": 8.98044314426567e-05, - "loss": 2.8072072982788088, - "step": 20350 - }, - { - "epoch": 23.162823797324226, - "grad_norm": 8.316858291625977, - "learning_rate": 8.975025732704914e-05, - "loss": 3.012776565551758, - "step": 20360 - }, - { - "epoch": 23.17421007685739, - "grad_norm": 7.397007942199707, - "learning_rate": 8.969608321144158e-05, - "loss": 3.178554344177246, - "step": 20370 - }, - { - "epoch": 23.185596356390548, - "grad_norm": 7.289053916931152, - "learning_rate": 8.964190909583402e-05, - "loss": 2.6939666748046873, - "step": 20380 - }, - { - "epoch": 23.19698263592371, - "grad_norm": 7.612484455108643, - "learning_rate": 8.958773498022645e-05, - "loss": 3.1522052764892576, - "step": 20390 - }, - { - "epoch": 23.208368915456873, - "grad_norm": 7.743655681610107, - "learning_rate": 8.953356086461889e-05, - "loss": 2.716688346862793, - "step": 20400 - }, - { - "epoch": 23.219755194990036, - "grad_norm": 10.625493049621582, - "learning_rate": 8.947938674901133e-05, - "loss": 3.075759506225586, - "step": 20410 - }, - { - "epoch": 23.2311414745232, - "grad_norm": 7.2747883796691895, - "learning_rate": 8.942521263340377e-05, - "loss": 2.8906108856201174, - "step": 20420 - }, - { - "epoch": 23.24252775405636, - "grad_norm": 8.012664794921875, - "learning_rate": 8.937103851779621e-05, - "loss": 2.891446113586426, - "step": 20430 - }, - { - "epoch": 23.253914033589524, - "grad_norm": 6.6297430992126465, - "learning_rate": 8.931686440218863e-05, - "loss": 2.9877086639404298, - "step": 20440 - }, - { - "epoch": 23.265300313122687, - "grad_norm": 7.2411274909973145, - "learning_rate": 8.926269028658107e-05, - "loss": 2.8750194549560546, - "step": 20450 - }, - { - "epoch": 23.27668659265585, - "grad_norm": 7.526220321655273, - "learning_rate": 8.920851617097351e-05, - "loss": 2.7831699371337892, - "step": 20460 - }, - { - "epoch": 23.288072872189012, - "grad_norm": 7.155262470245361, - "learning_rate": 8.915434205536595e-05, - "loss": 2.864755630493164, - "step": 20470 - }, - { - "epoch": 23.299459151722175, - "grad_norm": 8.7533540725708, - "learning_rate": 8.910016793975838e-05, - "loss": 3.0703664779663087, - "step": 20480 - }, - { - "epoch": 23.310845431255338, - "grad_norm": 8.419981956481934, - "learning_rate": 8.904599382415082e-05, - "loss": 3.047661781311035, - "step": 20490 - }, - { - "epoch": 23.3222317107885, - "grad_norm": 7.7640204429626465, - "learning_rate": 8.899181970854327e-05, - "loss": 2.887657356262207, - "step": 20500 - }, - { - "epoch": 23.333617990321663, - "grad_norm": 7.903047561645508, - "learning_rate": 8.89376455929357e-05, - "loss": 2.9257579803466798, - "step": 20510 - }, - { - "epoch": 23.345004269854826, - "grad_norm": 7.807440757751465, - "learning_rate": 8.888347147732814e-05, - "loss": 2.984356689453125, - "step": 20520 - }, - { - "epoch": 23.35639054938799, - "grad_norm": 7.732547760009766, - "learning_rate": 8.882929736172057e-05, - "loss": 3.2712615966796874, - "step": 20530 - }, - { - "epoch": 23.36777682892115, - "grad_norm": 9.053909301757812, - "learning_rate": 8.877512324611301e-05, - "loss": 2.9967681884765627, - "step": 20540 - }, - { - "epoch": 23.379163108454314, - "grad_norm": 7.440546035766602, - "learning_rate": 8.872094913050545e-05, - "loss": 3.0605968475341796, - "step": 20550 - }, - { - "epoch": 23.390549387987477, - "grad_norm": 6.801850318908691, - "learning_rate": 8.866677501489789e-05, - "loss": 3.1235517501831054, - "step": 20560 - }, - { - "epoch": 23.401935667520636, - "grad_norm": 8.020711898803711, - "learning_rate": 8.861260089929033e-05, - "loss": 2.8574342727661133, - "step": 20570 - }, - { - "epoch": 23.4133219470538, - "grad_norm": 7.850024223327637, - "learning_rate": 8.855842678368276e-05, - "loss": 3.050294303894043, - "step": 20580 - }, - { - "epoch": 23.42470822658696, - "grad_norm": 7.764718532562256, - "learning_rate": 8.850425266807519e-05, - "loss": 3.0289308547973635, - "step": 20590 - }, - { - "epoch": 23.436094506120124, - "grad_norm": 6.614105701446533, - "learning_rate": 8.845007855246763e-05, - "loss": 2.7201135635375975, - "step": 20600 - }, - { - "epoch": 23.447480785653287, - "grad_norm": 7.509239673614502, - "learning_rate": 8.839590443686008e-05, - "loss": 3.0174678802490233, - "step": 20610 - }, - { - "epoch": 23.45886706518645, - "grad_norm": 7.190005302429199, - "learning_rate": 8.834173032125252e-05, - "loss": 2.841200828552246, - "step": 20620 - }, - { - "epoch": 23.470253344719612, - "grad_norm": 8.263400077819824, - "learning_rate": 8.828755620564495e-05, - "loss": 2.7141368865966795, - "step": 20630 - }, - { - "epoch": 23.481639624252775, - "grad_norm": 6.695593357086182, - "learning_rate": 8.823338209003739e-05, - "loss": 2.7828269958496095, - "step": 20640 - }, - { - "epoch": 23.493025903785938, - "grad_norm": 8.470293998718262, - "learning_rate": 8.817920797442982e-05, - "loss": 3.0112863540649415, - "step": 20650 - }, - { - "epoch": 23.5044121833191, - "grad_norm": 7.583410263061523, - "learning_rate": 8.812503385882225e-05, - "loss": 2.9472209930419924, - "step": 20660 - }, - { - "epoch": 23.515798462852263, - "grad_norm": 7.364217758178711, - "learning_rate": 8.807085974321469e-05, - "loss": 2.9927459716796876, - "step": 20670 - }, - { - "epoch": 23.527184742385426, - "grad_norm": 8.428694725036621, - "learning_rate": 8.801668562760713e-05, - "loss": 3.037489128112793, - "step": 20680 - }, - { - "epoch": 23.53857102191859, - "grad_norm": 8.37234115600586, - "learning_rate": 8.796251151199957e-05, - "loss": 2.8356271743774415, - "step": 20690 - }, - { - "epoch": 23.54995730145175, - "grad_norm": 7.239820957183838, - "learning_rate": 8.7908337396392e-05, - "loss": 2.9129606246948243, - "step": 20700 - }, - { - "epoch": 23.561343580984914, - "grad_norm": 7.503726959228516, - "learning_rate": 8.785416328078444e-05, - "loss": 2.9780012130737306, - "step": 20710 - }, - { - "epoch": 23.572729860518077, - "grad_norm": 6.911001205444336, - "learning_rate": 8.779998916517688e-05, - "loss": 2.829633331298828, - "step": 20720 - }, - { - "epoch": 23.58411614005124, - "grad_norm": 8.57630729675293, - "learning_rate": 8.774581504956932e-05, - "loss": 3.0809423446655275, - "step": 20730 - }, - { - "epoch": 23.5955024195844, - "grad_norm": 7.118391990661621, - "learning_rate": 8.769164093396176e-05, - "loss": 2.9418434143066405, - "step": 20740 - }, - { - "epoch": 23.60688869911756, - "grad_norm": 8.470577239990234, - "learning_rate": 8.76374668183542e-05, - "loss": 3.073319435119629, - "step": 20750 - }, - { - "epoch": 23.618274978650724, - "grad_norm": 7.737377643585205, - "learning_rate": 8.758329270274663e-05, - "loss": 2.973114013671875, - "step": 20760 - }, - { - "epoch": 23.629661258183887, - "grad_norm": 7.212495803833008, - "learning_rate": 8.752911858713907e-05, - "loss": 3.256830596923828, - "step": 20770 - }, - { - "epoch": 23.64104753771705, - "grad_norm": 8.123977661132812, - "learning_rate": 8.747494447153151e-05, - "loss": 2.8294878005981445, - "step": 20780 - }, - { - "epoch": 23.652433817250213, - "grad_norm": 8.355793952941895, - "learning_rate": 8.742077035592395e-05, - "loss": 2.869054412841797, - "step": 20790 - }, - { - "epoch": 23.663820096783375, - "grad_norm": 8.310219764709473, - "learning_rate": 8.736659624031637e-05, - "loss": 3.0746349334716796, - "step": 20800 - }, - { - "epoch": 23.675206376316538, - "grad_norm": 7.146482944488525, - "learning_rate": 8.731242212470881e-05, - "loss": 2.7864452362060548, - "step": 20810 - }, - { - "epoch": 23.6865926558497, - "grad_norm": 7.662703990936279, - "learning_rate": 8.725824800910125e-05, - "loss": 2.792169952392578, - "step": 20820 - }, - { - "epoch": 23.697978935382864, - "grad_norm": 7.96389627456665, - "learning_rate": 8.720407389349369e-05, - "loss": 3.1457258224487306, - "step": 20830 - }, - { - "epoch": 23.709365214916026, - "grad_norm": 6.582839012145996, - "learning_rate": 8.714989977788614e-05, - "loss": 2.910396385192871, - "step": 20840 - }, - { - "epoch": 23.72075149444919, - "grad_norm": 7.634960651397705, - "learning_rate": 8.709572566227858e-05, - "loss": 3.0249223709106445, - "step": 20850 - }, - { - "epoch": 23.732137773982352, - "grad_norm": 8.20224380493164, - "learning_rate": 8.7041551546671e-05, - "loss": 2.8246204376220705, - "step": 20860 - }, - { - "epoch": 23.743524053515515, - "grad_norm": 7.890283584594727, - "learning_rate": 8.698737743106344e-05, - "loss": 3.098486137390137, - "step": 20870 - }, - { - "epoch": 23.754910333048677, - "grad_norm": 7.693148612976074, - "learning_rate": 8.693320331545588e-05, - "loss": 2.9983917236328126, - "step": 20880 - }, - { - "epoch": 23.76629661258184, - "grad_norm": 7.275303363800049, - "learning_rate": 8.687902919984832e-05, - "loss": 3.028023529052734, - "step": 20890 - }, - { - "epoch": 23.777682892115003, - "grad_norm": 9.735662460327148, - "learning_rate": 8.682485508424075e-05, - "loss": 2.907429313659668, - "step": 20900 - }, - { - "epoch": 23.789069171648165, - "grad_norm": 7.711111545562744, - "learning_rate": 8.677068096863319e-05, - "loss": 2.9936874389648436, - "step": 20910 - }, - { - "epoch": 23.800455451181328, - "grad_norm": 7.945051193237305, - "learning_rate": 8.671650685302563e-05, - "loss": 3.211922836303711, - "step": 20920 - }, - { - "epoch": 23.811841730714487, - "grad_norm": 7.160228729248047, - "learning_rate": 8.666233273741807e-05, - "loss": 2.911302375793457, - "step": 20930 - }, - { - "epoch": 23.82322801024765, - "grad_norm": 7.800594329833984, - "learning_rate": 8.660815862181049e-05, - "loss": 3.017009735107422, - "step": 20940 - }, - { - "epoch": 23.834614289780813, - "grad_norm": 7.576048851013184, - "learning_rate": 8.655398450620294e-05, - "loss": 3.0092130661010743, - "step": 20950 - }, - { - "epoch": 23.846000569313976, - "grad_norm": 7.396543979644775, - "learning_rate": 8.649981039059538e-05, - "loss": 3.004311180114746, - "step": 20960 - }, - { - "epoch": 23.85738684884714, - "grad_norm": 7.172977447509766, - "learning_rate": 8.644563627498782e-05, - "loss": 2.650033187866211, - "step": 20970 - }, - { - "epoch": 23.8687731283803, - "grad_norm": 7.44117546081543, - "learning_rate": 8.639146215938026e-05, - "loss": 2.9172443389892577, - "step": 20980 - }, - { - "epoch": 23.880159407913464, - "grad_norm": 8.184832572937012, - "learning_rate": 8.63372880437727e-05, - "loss": 3.129201316833496, - "step": 20990 - }, - { - "epoch": 23.891545687446627, - "grad_norm": 7.225976467132568, - "learning_rate": 8.628311392816512e-05, - "loss": 2.9618593215942384, - "step": 21000 - }, - { - "epoch": 23.90293196697979, - "grad_norm": 7.948321342468262, - "learning_rate": 8.622893981255756e-05, - "loss": 3.0456144332885744, - "step": 21010 - }, - { - "epoch": 23.914318246512952, - "grad_norm": 8.000722885131836, - "learning_rate": 8.617476569695e-05, - "loss": 3.007844924926758, - "step": 21020 - }, - { - "epoch": 23.925704526046115, - "grad_norm": 7.9927215576171875, - "learning_rate": 8.612059158134243e-05, - "loss": 2.997426223754883, - "step": 21030 - }, - { - "epoch": 23.937090805579277, - "grad_norm": 9.083395957946777, - "learning_rate": 8.606641746573487e-05, - "loss": 3.4572025299072267, - "step": 21040 - }, - { - "epoch": 23.94847708511244, - "grad_norm": 7.858376502990723, - "learning_rate": 8.601224335012731e-05, - "loss": 2.95123291015625, - "step": 21050 - }, - { - "epoch": 23.959863364645603, - "grad_norm": 6.826962471008301, - "learning_rate": 8.595806923451976e-05, - "loss": 3.145720672607422, - "step": 21060 - }, - { - "epoch": 23.971249644178766, - "grad_norm": 8.091140747070312, - "learning_rate": 8.590389511891219e-05, - "loss": 2.7883310317993164, - "step": 21070 - }, - { - "epoch": 23.98263592371193, - "grad_norm": 7.429564952850342, - "learning_rate": 8.584972100330462e-05, - "loss": 3.2133480072021485, - "step": 21080 - }, - { - "epoch": 23.99402220324509, - "grad_norm": 6.603694438934326, - "learning_rate": 8.579554688769706e-05, - "loss": 2.7781740188598634, - "step": 21090 - } - ], - "logging_steps": 10, - "max_steps": 36918, - "num_input_tokens_seen": 0, - "num_train_epochs": 42, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 5.6981989206528e+16, - "train_batch_size": 8, - "trial_name": null, - "trial_params": null -}